Index: src/test/org/apache/nutch/fetcher/TestFetcher.java
===================================================================
--- src/test/org/apache/nutch/fetcher/TestFetcher.java	(revision 1188268)
+++ src/test/org/apache/nutch/fetcher/TestFetcher.java	(working copy)
@@ -31,59 +31,57 @@
 import org.mortbay.jetty.Server;
 
 /**
- * Basic fetcher test
- * 1. generate seedlist
- * 2. inject
- * 3. generate
- * 3. fetch
- * 4. Verify contents
+ * Basic fetcher test 1. generate seedlist 2. inject 3. generate 3. fetch 4.
+ * Verify contents
+ * 
  * @author nutch-dev <nutch-dev at lucene.apache.org>
- *
+ * 
  */
 public class TestFetcher extends AbstractNutchTest {
 
-  final static Path testdir=new Path("build/test/fetch-test");
+  final static Path testdir = new Path("build/test/fetch-test");
   Path urlPath;
   Server server;
 
   @Override
-  public void setUp() throws Exception{
+  public void setUp() throws Exception {
     super.setUp();
     urlPath = new Path(testdir, "urls");
-    server = CrawlTestUtil.getServer(conf.getInt("content.server.port",50000), "build/test/data/fetch-test-site");
+    server = CrawlTestUtil.getServer(conf.getInt("content.server.port", 50000),
+        "build/test/data/fetch-test-site");
     server.start();
   }
 
   @Override
-  public void tearDown() throws Exception{
+  public void tearDown() throws Exception {
     server.stop();
     fs.delete(testdir, true);
   }
 
   public void testFetch() throws Exception {
 
-    //generate seedlist
+    // generate seedlist
     ArrayList<String> urls = new ArrayList<String>();
 
-    addUrl(urls,"index.html");
-    addUrl(urls,"pagea.html");
-    addUrl(urls,"pageb.html");
-    addUrl(urls,"dup_of_pagea.html");
-    addUrl(urls,"nested_spider_trap.html");
-    addUrl(urls,"exception.html");
+    addUrl(urls, "index.html");
+    addUrl(urls, "pagea.html");
+    addUrl(urls, "pageb.html");
+    addUrl(urls, "dup_of_pagea.html");
+    addUrl(urls, "nested_spider_trap.html");
+    addUrl(urls, "exception.html");
 
     CrawlTestUtil.generateSeedList(fs, urlPath, urls);
 
-    //inject
+    // inject
     InjectorJob injector = new InjectorJob(conf);
     injector.inject(urlPath);
 
-    //generate
+    // generate
     long time = System.currentTimeMillis();
     GeneratorJob g = new GeneratorJob(conf);
     String batchId = g.generate(Long.MAX_VALUE, time, false, false);
 
-    //fetch
+    // fetch
     time = System.currentTimeMillis();
     conf.setBoolean(FetcherJob.PARSE_KEY, true);
     FetcherJob fetcher = new FetcherJob(conf);
@@ -91,12 +89,13 @@
 
     time = System.currentTimeMillis() - time;
 
-    //verify politeness, time taken should be more than (num_of_pages +1)*delay
-    int minimumTime = (int) ((urls.size() + 1) * 1000 *
-        conf.getFloat("fetcher.server.delay", 5));
+    // verify politeness, time taken should be more than (num_of_pages +1)*delay
+    int minimumTime = (int) ((urls.size() + 1) * 1000 * conf.getFloat(
+        "fetcher.server.delay", 5));
     assertTrue(time > minimumTime);
 
-    List<URLWebPage> pages = CrawlTestUtil.readContents(webPageStore, Mark.FETCH_MARK, (String[])null);
+    List<URLWebPage> pages = CrawlTestUtil.readContents(webPageStore,
+        Mark.FETCH_MARK, (String[]) null);
     assertEquals(urls.size(), pages.size());
     List<String> handledurls = new ArrayList<String>();
     for (URLWebPage up : pages) {
@@ -105,23 +104,24 @@
         continue;
       }
       String content = new String(bb.array());
-      if (content.indexOf("Nutch fetcher test page")!=-1) {
+      if (content.indexOf("Nutch fetcher test page") != -1) {
         handledurls.add(up.getUrl());
       }
     }
     Collections.sort(urls);
     Collections.sort(handledurls);
 
-    //verify that enough pages were handled
+    // verify that enough pages were handled
     assertEquals(urls.size(), handledurls.size());
 
-    //verify that correct pages were handled
+    // verify that correct pages were handled
     assertTrue(handledurls.containsAll(urls));
     assertTrue(urls.containsAll(handledurls));
   }
 
   private void addUrl(ArrayList<String> urls, String page) {
-    urls.add("http://127.0.0.1:" + server.getConnectors()[0].getPort() + "/" + page);
+    urls.add("http://127.0.0.1:" + server.getConnectors()[0].getPort() + "/"
+        + page);
   }
 
   public void testAgentNameCheck() {
Index: src/test/org/apache/nutch/metadata/TestMetadata.java
===================================================================
--- src/test/org/apache/nutch/metadata/TestMetadata.java	(revision 1188268)
+++ src/test/org/apache/nutch/metadata/TestMetadata.java	(working copy)
@@ -45,7 +45,7 @@
   public static void main(String[] args) {
     TestRunner.run(suite());
   }
-  
+
   /**
    * Test to ensure that only non-null values get written when the
    * {@link Metadata} object is written using a Writeable.
@@ -282,4 +282,3 @@
   }
 
 }
-
Index: src/test/org/apache/nutch/metadata/TestSpellCheckedMetadata.java
===================================================================
--- src/test/org/apache/nutch/metadata/TestSpellCheckedMetadata.java	(revision 1188268)
+++ src/test/org/apache/nutch/metadata/TestSpellCheckedMetadata.java	(working copy)
@@ -30,7 +30,7 @@
 /**
  * JUnit based tests of class
  * {@link org.apache.nutch.metadata.SpellCheckedMetadata}.
- *
+ * 
  * @author Chris Mattmann
  * @author J&eacute;r&ocirc;me Charron
  */
@@ -52,20 +52,20 @@
 
   /** Test for the <code>getNormalizedName(String)</code> method. */
   public void testGetNormalizedName() {
-    assertEquals("Content-Type", SpellCheckedMetadata
-        .getNormalizedName("Content-Type"));
-    assertEquals("Content-Type", SpellCheckedMetadata
-        .getNormalizedName("ContentType"));
-    assertEquals("Content-Type", SpellCheckedMetadata
-        .getNormalizedName("Content-type"));
-    assertEquals("Content-Type", SpellCheckedMetadata
-        .getNormalizedName("contenttype"));
-    assertEquals("Content-Type", SpellCheckedMetadata
-        .getNormalizedName("contentype"));
-    assertEquals("Content-Type", SpellCheckedMetadata
-        .getNormalizedName("contntype"));
+    assertEquals("Content-Type",
+        SpellCheckedMetadata.getNormalizedName("Content-Type"));
+    assertEquals("Content-Type",
+        SpellCheckedMetadata.getNormalizedName("ContentType"));
+    assertEquals("Content-Type",
+        SpellCheckedMetadata.getNormalizedName("Content-type"));
+    assertEquals("Content-Type",
+        SpellCheckedMetadata.getNormalizedName("contenttype"));
+    assertEquals("Content-Type",
+        SpellCheckedMetadata.getNormalizedName("contentype"));
+    assertEquals("Content-Type",
+        SpellCheckedMetadata.getNormalizedName("contntype"));
   }
-  
+
   /** Test for the <code>add(String, String)</code> method. */
   public void testAdd() {
     String[] values = null;
@@ -256,8 +256,8 @@
   }
 
   /**
-   * IO Test method, usable only when you plan to do changes in metadata
-   * to measure relative performance impact.
+   * IO Test method, usable only when you plan to do changes in metadata to
+   * measure relative performance impact.
    */
   public final void testHandlingSpeed() {
     SpellCheckedMetadata result;
Index: src/test/org/apache/nutch/protocol/TestProtocolFactory.java
===================================================================
--- src/test/org/apache/nutch/protocol/TestProtocolFactory.java	(revision 1188268)
+++ src/test/org/apache/nutch/protocol/TestProtocolFactory.java	(working copy)
@@ -26,55 +26,56 @@
 
   Configuration conf;
   ProtocolFactory factory;
-  
+
   protected void setUp() throws Exception {
     conf = NutchConfiguration.create();
     conf.set("plugin.includes", ".*");
     conf.set("http.agent.name", "test-bot");
-    factory=new ProtocolFactory(conf);
+    factory = new ProtocolFactory(conf);
   }
 
-  public void testGetProtocol(){
+  public void testGetProtocol() {
 
-    //non existing protocol
+    // non existing protocol
     try {
       factory.getProtocol("xyzxyz://somehost");
       fail("Must throw ProtocolNotFound");
     } catch (ProtocolNotFound e) {
-      //all is ok
-    } catch (Exception ex){
+      // all is ok
+    } catch (Exception ex) {
       fail("Must not throw any other exception");
     }
-    
-    Protocol httpProtocol=null;
-    
-    //existing protocol
+
+    Protocol httpProtocol = null;
+
+    // existing protocol
     try {
-      httpProtocol=factory.getProtocol("http://somehost");
+      httpProtocol = factory.getProtocol("http://somehost");
       assertNotNull(httpProtocol);
-    } catch (Exception ex){
+    } catch (Exception ex) {
       fail("Must not throw any other exception");
     }
 
-    //cache key
-    Object protocol = ObjectCache.get(conf).getObject(Protocol.X_POINT_ID + "http");
+    // cache key
+    Object protocol = ObjectCache.get(conf).getObject(
+        Protocol.X_POINT_ID + "http");
     assertNotNull(protocol);
     assertEquals(httpProtocol, protocol);
-    
-    //test same object instance
+
+    // test same object instance
     try {
-      assertTrue(httpProtocol==factory.getProtocol("http://somehost"));
+      assertTrue(httpProtocol == factory.getProtocol("http://somehost"));
     } catch (ProtocolNotFound e) {
       fail("Must not throw any exception");
     }
   }
-  
-  public void testContains(){
+
+  public void testContains() {
     assertTrue(factory.contains("http", "http"));
     assertTrue(factory.contains("http", "http,ftp"));
     assertTrue(factory.contains("http", "   http ,   ftp"));
     assertTrue(factory.contains("smb", "ftp,smb,http"));
     assertFalse(factory.contains("smb", "smbb"));
   }
-  
+
 }
Index: src/test/org/apache/nutch/protocol/TestContent.java
===================================================================
--- src/test/org/apache/nutch/protocol/TestContent.java	(revision 1188268)
+++ src/test/org/apache/nutch/protocol/TestContent.java	(working copy)
@@ -26,14 +26,15 @@
 
 import junit.framework.TestCase;
 
-
 /** Unit tests for Content. */
 
 public class TestContent extends TestCase {
 
   private static Configuration conf = NutchConfiguration.create();
 
-  public TestContent(String name) { super(name); }
+  public TestContent(String name) {
+    super(name);
+  }
 
   public void testContent() throws Exception {
 
@@ -46,7 +47,7 @@
     metaData.add("Content-Type", "text/html");
 
     Content r = new Content(url, url, page.getBytes("UTF8"), "text/html",
-                            metaData, conf);
+        metaData, conf);
 
     WritableTestUtils.testWritable(r);
     assertEquals("text/html", r.getMetadata().get("Content-Type"));
@@ -59,52 +60,36 @@
     Content c = null;
     Metadata p = new Metadata();
 
-    c = new Content("http://www.foo.com/",
-                    "http://www.foo.com/",
-                    "".getBytes("UTF8"),
-                    "text/html; charset=UTF-8", p, conf);
+    c = new Content("http://www.foo.com/", "http://www.foo.com/",
+        "".getBytes("UTF8"), "text/html; charset=UTF-8", p, conf);
     assertEquals("text/html", c.getContentType());
 
-    c = new Content("http://www.foo.com/foo.html",
-                    "http://www.foo.com/",
-                    "".getBytes("UTF8"),
-                    "", p, conf);
+    c = new Content("http://www.foo.com/foo.html", "http://www.foo.com/",
+        "".getBytes("UTF8"), "", p, conf);
     assertEquals("text/html", c.getContentType());
 
-    c = new Content("http://www.foo.com/foo.html",
-                    "http://www.foo.com/",
-                    "".getBytes("UTF8"),
-                    null, p, conf);
+    c = new Content("http://www.foo.com/foo.html", "http://www.foo.com/",
+        "".getBytes("UTF8"), null, p, conf);
     assertEquals("text/html", c.getContentType());
 
-    c = new Content("http://www.foo.com/",
-                    "http://www.foo.com/",
-                    "<html></html>".getBytes("UTF8"),
-                    "", p, conf);
+    c = new Content("http://www.foo.com/", "http://www.foo.com/",
+        "<html></html>".getBytes("UTF8"), "", p, conf);
     assertEquals("text/html", c.getContentType());
 
-    c = new Content("http://www.foo.com/foo.html",
-                    "http://www.foo.com/",
-                    "<html></html>".getBytes("UTF8"),
-                    "text/plain", p, conf);
+    c = new Content("http://www.foo.com/foo.html", "http://www.foo.com/",
+        "<html></html>".getBytes("UTF8"), "text/plain", p, conf);
     assertEquals("text/html", c.getContentType());
 
-    c = new Content("http://www.foo.com/foo.png",
-                    "http://www.foo.com/",
-                    "<html></html>".getBytes("UTF8"),
-                    "text/plain", p, conf);
+    c = new Content("http://www.foo.com/foo.png", "http://www.foo.com/",
+        "<html></html>".getBytes("UTF8"), "text/plain", p, conf);
     assertEquals("text/html", c.getContentType());
 
-    c = new Content("http://www.foo.com/",
-                    "http://www.foo.com/",
-                    "".getBytes("UTF8"),
-                    "", p, conf);
+    c = new Content("http://www.foo.com/", "http://www.foo.com/",
+        "".getBytes("UTF8"), "", p, conf);
     assertEquals(MimeTypes.OCTET_STREAM, c.getContentType());
 
-    c = new Content("http://www.foo.com/",
-                    "http://www.foo.com/",
-                    "".getBytes("UTF8"),
-                    null, p, conf);
+    c = new Content("http://www.foo.com/", "http://www.foo.com/",
+        "".getBytes("UTF8"), null, p, conf);
     assertNotNull(c.getContentType());
   }
 
Index: src/test/org/apache/nutch/storage/TestGoraStorage.java
===================================================================
--- src/test/org/apache/nutch/storage/TestGoraStorage.java	(revision 1188268)
+++ src/test/org/apache/nutch/storage/TestGoraStorage.java	(working copy)
@@ -51,7 +51,7 @@
     readWrite(id, webPageStore);
   }
 
-  private static void readWrite(String id, DataStore<String, WebPage> store) 
+  private static void readWrite(String id, DataStore<String, WebPage> store)
       throws IOException {
     WebPage page = new WebPage();
     int max = 1000;
@@ -121,9 +121,9 @@
       assertEquals(0, (int) result.get());
     }
   }
-  
+
   /**
-   * Tests multiple processes reading and writing to the same store backend, 
+   * Tests multiple processes reading and writing to the same store backend,
    * this is to simulate a multi process Nutch environment (i.e. MapReduce).
    * 
    * @throws Exception
@@ -131,7 +131,7 @@
   public void testMultiProcess() throws Exception {
     // create and start a hsql server, a stand-alone (memory backed) db
     // (important: a stand-alone server should be used because simple
-    //  file based access i.e. jdbc:hsqldb:file is NOT process-safe.)
+    // file based access i.e. jdbc:hsqldb:file is NOT process-safe.)
     Server server = new Server();
     server.setDaemon(true);
     server.setSilent(true); // disables LOTS of trace
@@ -139,11 +139,11 @@
     server.setDatabasePath(0, "mem:" + className);
     server.setDatabaseName(0, className);
     server.start();
-    
+
     // create a fixed thread pool
     int numThreads = 4;
     ExecutorService pool = Executors.newFixedThreadPool(numThreads);
-    
+
     // spawn multiple processes, each thread spawns own process
     Collection<Callable<Integer>> tasks = new ArrayList<Callable<Integer>>();
     for (int i = 0; i < numThreads; i++) {
@@ -155,15 +155,16 @@
             String classpath = System.getProperty("java.class.path");
             String path = System.getProperty("java.home") + separator + "bin"
                 + separator + "java";
-            ProcessBuilder processBuilder = new ProcessBuilder(path, "-cp", 
+            ProcessBuilder processBuilder = new ProcessBuilder(path, "-cp",
                 classpath, className);
             processBuilder.redirectErrorStream(true);
             Process process = processBuilder.start();
             InputStream in = process.getInputStream();
             int exit = process.waitFor();
-            //print the output of the process
-            System.out.println("===Process stream for " + Thread.currentThread() 
-                + "\n" + IOUtils.toString(in) + "===End of process stream.");
+            // print the output of the process
+            System.out.println("===Process stream for "
+                + Thread.currentThread() + "\n" + IOUtils.toString(in)
+                + "===End of process stream.");
             in.close();
             // process should exit with zero code
             return exit;
@@ -183,8 +184,8 @@
     for (Future<Integer> result : results) {
       assertEquals(0, (int) result.get());
     }
-    
-    //stop db
+
+    // stop db
     server.stop();
   }
 
@@ -193,14 +194,16 @@
     System.out.println("Starting!");
 
     Configuration localConf = CrawlTestUtil.createConfiguration();
-    localConf.set("storage.data.store.class", "org.apache.gora.sql.store.SqlStore");
+    localConf.set("storage.data.store.class",
+        "org.apache.gora.sql.store.SqlStore");
 
-    //connect to local sql service
-    DataStoreFactory.properties.setProperty("gora.sqlstore.jdbc.driver","org.hsqldb.jdbcDriver");
+    // connect to local sql service
+    DataStoreFactory.properties.setProperty("gora.sqlstore.jdbc.driver",
+        "org.hsqldb.jdbcDriver");
     DataStoreFactory.properties.setProperty("gora.sqlstore.jdbc.url",
-        "jdbc:hsqldb:hsql://localhost/"+TestGoraStorage.class.getName());
-    DataStoreFactory.properties.setProperty("gora.sqlstore.jdbc.user","sa");
-    DataStoreFactory.properties.setProperty("gora.sqlstore.jdbc.password","");
+        "jdbc:hsqldb:hsql://localhost/" + TestGoraStorage.class.getName());
+    DataStoreFactory.properties.setProperty("gora.sqlstore.jdbc.user", "sa");
+    DataStoreFactory.properties.setProperty("gora.sqlstore.jdbc.password", "");
 
     DataStore<String, WebPage> store = StorageUtils.createWebStore(localConf,
         String.class, WebPage.class);
Index: src/test/org/apache/nutch/net/TestURLNormalizers.java
===================================================================
--- src/test/org/apache/nutch/net/TestURLNormalizers.java	(revision 1188268)
+++ src/test/org/apache/nutch/net/TestURLNormalizers.java	(working copy)
@@ -30,30 +30,38 @@
     String clazz1 = "org.apache.nutch.net.urlnormalizer.regex.RegexURLNormalizer";
     String clazz2 = "org.apache.nutch.net.urlnormalizer.basic.BasicURLNormalizer";
     conf.set("urlnormalizer.order", clazz1 + " " + clazz2);
-    
-    URLNormalizers normalizers = new URLNormalizers(conf, URLNormalizers.SCOPE_DEFAULT);
-    
+
+    URLNormalizers normalizers = new URLNormalizers(conf,
+        URLNormalizers.SCOPE_DEFAULT);
+
     assertNotNull(normalizers);
     try {
-      normalizers.normalize("http://www.example.com/", URLNormalizers.SCOPE_DEFAULT);
+      normalizers.normalize("http://www.example.com/",
+          URLNormalizers.SCOPE_DEFAULT);
     } catch (MalformedURLException mue) {
       fail(mue.toString());
     }
 
     // NUTCH-1011 - Get rid of superfluous slashes
     try {
-      String normalizedSlashes = normalizers.normalize("http://www.example.org//path/to//somewhere.html", URLNormalizers.SCOPE_DEFAULT);
-      assertEquals(normalizedSlashes, "http://www.example.org/path/to/somewhere.html");
+      String normalizedSlashes = normalizers.normalize(
+          "http://www.example.org//path/to//somewhere.html",
+          URLNormalizers.SCOPE_DEFAULT);
+      assertEquals(normalizedSlashes,
+          "http://www.example.org/path/to/somewhere.html");
     } catch (MalformedURLException mue) {
       fail(mue.toString());
     }
 
     // check the order
     int pos1 = -1, pos2 = -1;
-    URLNormalizer[] impls = normalizers.getURLNormalizers(URLNormalizers.SCOPE_DEFAULT);
+    URLNormalizer[] impls = normalizers
+        .getURLNormalizers(URLNormalizers.SCOPE_DEFAULT);
     for (int i = 0; i < impls.length; i++) {
-      if (impls[i].getClass().getName().equals(clazz1)) pos1 = i;
-      if (impls[i].getClass().getName().equals(clazz2)) pos2 = i;
+      if (impls[i].getClass().getName().equals(clazz1))
+        pos1 = i;
+      if (impls[i].getClass().getName().equals(clazz2))
+        pos2 = i;
     }
     if (pos1 != -1 && pos2 != -1) {
       assertTrue("RegexURLNormalizer before BasicURLNormalizer", pos1 < pos2);
Index: src/test/org/apache/nutch/net/TestURLFilters.java
===================================================================
--- src/test/org/apache/nutch/net/TestURLFilters.java	(revision 1188268)
+++ src/test/org/apache/nutch/net/TestURLFilters.java	(working copy)
@@ -25,6 +25,7 @@
 
   /**
    * Testcase for NUTCH-325.
+   * 
    * @throws URLFilterException
    */
   public void testNonExistingUrlFilter() throws URLFilterException {
Index: src/test/org/apache/nutch/crawl/DummyWritable.java
===================================================================
--- src/test/org/apache/nutch/crawl/DummyWritable.java	(revision 1188268)
+++ src/test/org/apache/nutch/crawl/DummyWritable.java	(working copy)
@@ -21,12 +21,12 @@
 
 public class DummyWritable extends IntWritable {
 
-    public DummyWritable() {
+  public DummyWritable() {
 
-    }
+  }
 
-    public DummyWritable(int i) {
-        super(i);
-    }
+  public DummyWritable(int i) {
+    super(i);
+  }
 
 }
Index: src/test/org/apache/nutch/crawl/TestGenerator.java
===================================================================
--- src/test/org/apache/nutch/crawl/TestGenerator.java	(revision 1188268)
+++ src/test/org/apache/nutch/crawl/TestGenerator.java	(working copy)
@@ -33,23 +33,21 @@
  * Basic generator test. 1. Insert entries in webtable 2. Generates entries to
  * fetch 3. Verifies that number of generated urls match 4. Verifies that
  * highest scoring urls are generated
- *
+ * 
  * @author nutch-dev <nutch-dev at lucene.apache.org>
  * @param <URLWebPage>
- *
+ * 
  */
 public class TestGenerator extends AbstractNutchTest {
 
   public static final Logger LOG = LoggerFactory.getLogger(TestGenerator.class);
 
   private static String[] FIELDS = new String[] {
-    WebPage.Field.MARKERS.getName(),
-    WebPage.Field.SCORE.getName()
-  };
+      WebPage.Field.MARKERS.getName(), WebPage.Field.SCORE.getName() };
 
   /**
    * Test that generator generates fetchlist ordered by score (desc).
-   *
+   * 
    * @throws Exception
    */
   public void testGenerateHighest() throws Exception {
@@ -69,7 +67,8 @@
 
     generateFetchlist(NUM_RESULTS, conf, false);
 
-    ArrayList<URLWebPage> l = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
+    ArrayList<URLWebPage> l = CrawlTestUtil.readContents(webPageStore,
+        Mark.GENERATE_MARK, FIELDS);
 
     // sort urls by score desc
     Collections.sort(l, new ScoreComparator());
@@ -108,7 +107,7 @@
 
   /**
    * Test that generator obeys the property "generate.max.per.host".
-   *
+   * 
    * @throws Exception
    */
   public void testGenerateHostLimit() throws Exception {
@@ -125,10 +124,12 @@
 
     Configuration myConfiguration = new Configuration(conf);
     myConfiguration.setInt(GeneratorJob.GENERATOR_MAX_COUNT, 1);
-    myConfiguration.set(GeneratorJob.GENERATOR_COUNT_MODE, GeneratorJob.GENERATOR_COUNT_VALUE_HOST);
+    myConfiguration.set(GeneratorJob.GENERATOR_COUNT_MODE,
+        GeneratorJob.GENERATOR_COUNT_VALUE_HOST);
     generateFetchlist(Integer.MAX_VALUE, myConfiguration, false);
 
-    ArrayList<URLWebPage> fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
+    ArrayList<URLWebPage> fetchList = CrawlTestUtil.readContents(webPageStore,
+        Mark.GENERATE_MARK, FIELDS);
 
     // verify we got right amount of records
     assertEquals(1, fetchList.size());
@@ -137,7 +138,8 @@
     myConfiguration.setInt(GeneratorJob.GENERATOR_MAX_COUNT, 2);
     generateFetchlist(Integer.MAX_VALUE, myConfiguration, false);
 
-    fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
+    fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK,
+        FIELDS);
 
     // verify we got right amount of records
     assertEquals(2, fetchList.size());
@@ -146,7 +148,8 @@
     myConfiguration.setInt(GeneratorJob.GENERATOR_MAX_COUNT, 3);
     generateFetchlist(Integer.MAX_VALUE, myConfiguration, false);
 
-    fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
+    fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK,
+        FIELDS);
 
     // verify we got right amount of records
     assertEquals(3, fetchList.size());
@@ -155,7 +158,7 @@
   /**
    * Test that generator obeys the property "generator.max.count" and
    * "generator.count.value=domain".
-   *
+   * 
    * @throws Exception
    */
   public void testGenerateDomainLimit() throws Exception {
@@ -175,11 +178,13 @@
 
     Configuration myConfiguration = new Configuration(conf);
     myConfiguration.setInt(GeneratorJob.GENERATOR_MAX_COUNT, 1);
-    myConfiguration.set(GeneratorJob.GENERATOR_COUNT_MODE, GeneratorJob.GENERATOR_COUNT_VALUE_DOMAIN);
+    myConfiguration.set(GeneratorJob.GENERATOR_COUNT_MODE,
+        GeneratorJob.GENERATOR_COUNT_VALUE_DOMAIN);
 
     generateFetchlist(Integer.MAX_VALUE, myConfiguration, false);
 
-    ArrayList<URLWebPage> fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
+    ArrayList<URLWebPage> fetchList = CrawlTestUtil.readContents(webPageStore,
+        Mark.GENERATE_MARK, FIELDS);
 
     // verify we got right amount of records
     assertEquals(1, fetchList.size());
@@ -188,7 +193,8 @@
     myConfiguration.setInt(GeneratorJob.GENERATOR_MAX_COUNT, 2);
     generateFetchlist(Integer.MAX_VALUE, myConfiguration, false);
 
-    fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
+    fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK,
+        FIELDS);
 
     // verify we got right amount of records
     assertEquals(2, fetchList.size());
@@ -197,7 +203,8 @@
     myConfiguration.setInt(GeneratorJob.GENERATOR_MAX_COUNT, 3);
     generateFetchlist(Integer.MAX_VALUE, myConfiguration, false);
 
-    fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
+    fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK,
+        FIELDS);
 
     // verify we got right amount of records
     assertEquals(3, fetchList.size());
@@ -205,7 +212,7 @@
 
   /**
    * Test generator obeys the filter setting.
-   *
+   * 
    * @throws Exception
    * @throws IOException
    */
@@ -227,13 +234,15 @@
 
     generateFetchlist(Integer.MAX_VALUE, myConfiguration, true);
 
-    ArrayList<URLWebPage> fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
+    ArrayList<URLWebPage> fetchList = CrawlTestUtil.readContents(webPageStore,
+        Mark.GENERATE_MARK, FIELDS);
 
     assertEquals(0, fetchList.size());
 
     generateFetchlist(Integer.MAX_VALUE, myConfiguration, false);
 
-    fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
+    fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK,
+        FIELDS);
 
     // verify nothing got filtered
     assertEquals(list.size(), fetchList.size());
@@ -242,7 +251,7 @@
 
   /**
    * Generate Fetchlist.
-   *
+   * 
    * @param numResults
    *          number of results to generate
    * @param config
@@ -255,14 +264,15 @@
     // generate segment
     GeneratorJob g = new GeneratorJob();
     g.setConf(config);
-    String batchId = g.generate(numResults, System.currentTimeMillis(), filter, false);
+    String batchId = g.generate(numResults, System.currentTimeMillis(), filter,
+        false);
     if (batchId == null)
       throw new RuntimeException("Generator failed");
   }
 
   /**
    * Constructs new {@link URLWebPage} from submitted parameters.
-   *
+   * 
    * @param url
    *          url to use
    * @param fetchInterval
Index: src/test/org/apache/nutch/crawl/TestSignatureFactory.java
===================================================================
--- src/test/org/apache/nutch/crawl/TestSignatureFactory.java	(revision 1188268)
+++ src/test/org/apache/nutch/crawl/TestSignatureFactory.java	(working copy)
@@ -24,9 +24,9 @@
 public class TestSignatureFactory extends TestCase {
 
   public void testGetSignature() {
-    Configuration conf=NutchConfiguration.create();
-    Signature signature1=SignatureFactory.getSignature(conf);
-    Signature signature2=SignatureFactory.getSignature(conf);
+    Configuration conf = NutchConfiguration.create();
+    Signature signature1 = SignatureFactory.getSignature(conf);
+    Signature signature2 = SignatureFactory.getSignature(conf);
     assertNotNull(signature1);
     assertNotNull(signature2);
     assertEquals(signature1, signature2);
Index: src/test/org/apache/nutch/crawl/TestInjector.java
===================================================================
--- src/test/org/apache/nutch/crawl/TestInjector.java	(revision 1188268)
+++ src/test/org/apache/nutch/crawl/TestInjector.java	(working copy)
@@ -33,7 +33,7 @@
  * Basic injector test: 1. Creates a text file with urls 2. Injects them into
  * crawldb 3. Reads crawldb entries and verifies contents 4. Injects more urls
  * into webdb 5. Reads crawldb entries and verifies contents
- *
+ * 
  * @author nutch-dev <nutch-dev at lucene.apache.org>
  */
 public class TestInjector extends AbstractNutchTest {
@@ -95,21 +95,21 @@
   }
 
   private static final String[] fields = new String[] {
-    WebPage.Field.MARKERS.getName(),
-    WebPage.Field.METADATA.getName(),
-    WebPage.Field.SCORE.getName()
-  };
+      WebPage.Field.MARKERS.getName(), WebPage.Field.METADATA.getName(),
+      WebPage.Field.SCORE.getName() };
 
   private List<String> readDb() throws Exception {
-    List<URLWebPage> pages = CrawlTestUtil.readContents(webPageStore, null, fields);
+    List<URLWebPage> pages = CrawlTestUtil.readContents(webPageStore, null,
+        fields);
     ArrayList<String> read = new ArrayList<String>();
     for (URLWebPage up : pages) {
       WebPage page = up.getDatum();
       String representation = up.getUrl();
-      representation += "\tnutch.score=" + (int)page.getScore();
+      representation += "\tnutch.score=" + (int) page.getScore();
       ByteBuffer bb = page.getFromMetadata(new Utf8("custom.attribute"));
       if (bb != null) {
-        representation += "\tcustom.attribute=" + ByteUtils.toString(bb.array());
+        representation += "\tcustom.attribute="
+            + ByteUtils.toString(bb.array());
       }
       read.add(representation);
     }
Index: src/test/org/apache/nutch/api/TestAPI.java
===================================================================
--- src/test/org/apache/nutch/api/TestAPI.java	(revision 1188268)
+++ src/test/org/apache/nutch/api/TestAPI.java	(working copy)
@@ -32,18 +32,18 @@
 import org.restlet.resource.ClientResource;
 
 public class TestAPI {
-  
+
   private static NutchServer server;
   ClientResource cli;
-  
+
   private static String baseUrl = "http://localhost:8192/nutch/";
-  
+
   @BeforeClass
   public static void before() throws Exception {
     server = new NutchServer(8192);
     server.start();
   }
-  
+
   @AfterClass
   public static void after() throws Exception {
     if (!server.stop(false)) {
@@ -51,7 +51,9 @@
         System.err.println("Waiting for jobs to complete - " + i + "s");
         try {
           Thread.sleep(1000);
-        } catch (Exception e) {};
+        } catch (Exception e) {
+        }
+        ;
         server.stop(false);
         if (!server.isRunning()) {
           break;
@@ -63,7 +65,7 @@
       server.stop(true);
     }
   }
-  
+
   @Test
   public void testInfoAPI() throws Exception {
     ClientResource cli = new ClientResource(baseUrl);
@@ -71,20 +73,20 @@
     String got = cli.get().getText();
     assertEquals(expected, got);
   }
-  
+
   @SuppressWarnings("rawtypes")
   @Test
   public void testConfsAPI() throws Exception {
     ClientResource cli = new ClientResource(baseUrl + ConfResource.PATH);
     assertEquals("[\"default\"]", cli.get().getText());
     // create
-    Map<String,Object> map = new HashMap<String,Object>();
+    Map<String, Object> map = new HashMap<String, Object>();
     map.put(Params.CONF_ID, "test");
-    HashMap<String,String> props = new HashMap<String,String>();
+    HashMap<String, String> props = new HashMap<String, String>();
     props.put("testProp", "blurfl");
     map.put(Params.PROPS, props);
-    JacksonRepresentation<Map<String,Object>> jr =
-      new JacksonRepresentation<Map<String,Object>>(map);
+    JacksonRepresentation<Map<String, Object>> jr = new JacksonRepresentation<Map<String, Object>>(
+        map);
     System.out.println(cli.put(jr).getText());
     assertEquals("[\"default\",\"test\"]", cli.get().getText());
     cli = new ClientResource(baseUrl + ConfResource.PATH + "/test");
@@ -95,14 +97,14 @@
     cli = new ClientResource(baseUrl + ConfResource.PATH);
     assertEquals("[\"default\"]", cli.get().getText());
   }
-  
+
   @SuppressWarnings("rawtypes")
   @Test
   public void testJobsAPI() throws Exception {
     ClientResource cli = new ClientResource(baseUrl + JobResource.PATH);
     assertEquals("[]", cli.get().getText());
     // create
-    Map<String,Object> map = new HashMap<String,Object>();
+    Map<String, Object> map = new HashMap<String, Object>();
     map.put(Params.JOB_TYPE, JobType.READDB.toString());
     map.put(Params.CONF_ID, "default");
     Representation r = cli.put(map);
@@ -112,36 +114,39 @@
     // list
     Map[] list = cli.get(Map[].class);
     assertEquals(1, list.length);
-    String id = (String)list[0].get("id");
-    String state = (String)list[0].get("state");
+    String id = (String) list[0].get("id");
+    String state = (String) list[0].get("state");
     assertEquals(jobId, id);
     assertEquals(state, "RUNNING");
     int cnt = 10;
     do {
       try {
         Thread.sleep(2000);
-      } catch (Exception e) {};
+      } catch (Exception e) {
+      }
+      ;
       list = cli.get(Map[].class);
-      state = (String)list[0].get("state");
+      state = (String) list[0].get("state");
       if (!state.equals("RUNNING")) {
         break;
       }
     } while (--cnt > 0);
     assertTrue(cnt > 0);
-    if (list == null) return;
+    if (list == null)
+      return;
     for (Map m : list) {
       System.out.println(m);
     }
   }
-  
+
   @SuppressWarnings("unchecked")
   @Test
   public void testStopKill() throws Exception {
     ClientResource cli = new ClientResource(baseUrl + JobResource.PATH);
     // create
-    Map<String,Object> map = new HashMap<String,Object>();
+    Map<String, Object> map = new HashMap<String, Object>();
     map.put(Params.JOB_TYPE, JobType.CLASS.toString());
-    Map<String,Object> args = new HashMap<String,Object>();
+    Map<String, Object> args = new HashMap<String, Object>();
     map.put(Params.ARGS, args);
     args.put(Nutch.ARG_CLASS, SpinningJob.class.getName());
     map.put(Params.CONF_ID, "default");
@@ -151,15 +156,16 @@
     assertNotNull(jobId);
     System.out.println(jobId);
     assertTrue(jobId.startsWith("default-CLASS-"));
-    ClientResource stopCli = new ClientResource(baseUrl + JobResource.PATH +
-        "?job=" + jobId + "&cmd=stop");
+    ClientResource stopCli = new ClientResource(baseUrl + JobResource.PATH
+        + "?job=" + jobId + "&cmd=stop");
     r = stopCli.get();
     assertEquals("true", r.getText());
     stopCli.release();
     Thread.sleep(2000); // wait for the job to finish
-    ClientResource jobCli = new ClientResource(baseUrl + JobResource.PATH + "/" + jobId);
-    Map<String,Object> res = jobCli.get(Map.class);
-    res = (Map<String,Object>)res.get("result");
+    ClientResource jobCli = new ClientResource(baseUrl + JobResource.PATH + "/"
+        + jobId);
+    Map<String, Object> res = jobCli.get(Map.class);
+    res = (Map<String, Object>) res.get("result");
     assertEquals("stopped", res.get("res"));
     jobCli.release();
     // restart and kill
@@ -169,19 +175,19 @@
     assertNotNull(jobId);
     System.out.println(jobId);
     assertTrue(jobId.startsWith("default-CLASS-"));
-    ClientResource killCli = new ClientResource(baseUrl + JobResource.PATH +
-        "?job=" + jobId + "&cmd=abort");
+    ClientResource killCli = new ClientResource(baseUrl + JobResource.PATH
+        + "?job=" + jobId + "&cmd=abort");
     r = killCli.get();
     assertEquals("true", r.getText());
     killCli.release();
     Thread.sleep(2000); // wait for the job to finish
     jobCli = new ClientResource(baseUrl + JobResource.PATH + "/" + jobId);
     res = jobCli.get(Map.class);
-    res = (Map<String,Object>)res.get("result");
+    res = (Map<String, Object>) res.get("result");
     assertEquals("killed", res.get("res"));
     jobCli.release();
   }
-  
+
   public static class SpinningJob extends NutchTool {
     volatile boolean shouldStop = false;
 
@@ -211,6 +217,6 @@
       shouldStop = true;
       return true;
     }
-    
+
   }
 }
Index: src/test/org/apache/nutch/parse/TestOutlinkExtractor.java
===================================================================
--- src/test/org/apache/nutch/parse/TestOutlinkExtractor.java	(revision 1188268)
+++ src/test/org/apache/nutch/parse/TestOutlinkExtractor.java	(working copy)
@@ -14,7 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
- 
+
 package org.apache.nutch.parse;
 
 import org.apache.nutch.parse.Outlink;
@@ -34,47 +34,57 @@
 public class TestOutlinkExtractor extends TestCase {
 
   private static Configuration conf = NutchConfiguration.create();
+
   public void testGetNoOutlinks() {
-    Outlink[]  outlinks = null;
-            
+    Outlink[] outlinks = null;
+
     outlinks = OutlinkExtractor.getOutlinks(null, conf);
     assertNotNull(outlinks);
     assertEquals(0, outlinks.length);
-    
+
     outlinks = OutlinkExtractor.getOutlinks("", conf);
     assertNotNull(outlinks);
     assertEquals(0, outlinks.length);
   }
-  
+
   public void testGetOutlinksHttp() {
-    Outlink[] outlinks = OutlinkExtractor.getOutlinks(
-        "Test with http://www.nutch.org/index.html is it found? " +
-        "What about www.google.com at http://www.google.de " +
-        "A longer URL could be http://www.sybit.com/solutions/portals.html", conf);
-    
+    Outlink[] outlinks = OutlinkExtractor
+        .getOutlinks(
+            "Test with http://www.nutch.org/index.html is it found? "
+                + "What about www.google.com at http://www.google.de "
+                + "A longer URL could be http://www.sybit.com/solutions/portals.html",
+            conf);
+
     assertTrue("Url not found!", outlinks.length == 3);
-    assertEquals("Wrong URL", "http://www.nutch.org/index.html", outlinks[0].getToUrl());
+    assertEquals("Wrong URL", "http://www.nutch.org/index.html",
+        outlinks[0].getToUrl());
     assertEquals("Wrong URL", "http://www.google.de", outlinks[1].getToUrl());
-    assertEquals("Wrong URL", "http://www.sybit.com/solutions/portals.html", outlinks[2].getToUrl());
+    assertEquals("Wrong URL", "http://www.sybit.com/solutions/portals.html",
+        outlinks[2].getToUrl());
   }
-  
+
   public void testGetOutlinksHttp2() {
-    Outlink[] outlinks = OutlinkExtractor.getOutlinks(
-        "Test with http://www.nutch.org/index.html is it found? " +
-        "What about www.google.com at http://www.google.de " +
-        "A longer URL could be http://www.sybit.com/solutions/portals.html", "http://www.sybit.de", conf);
-    
+    Outlink[] outlinks = OutlinkExtractor
+        .getOutlinks(
+            "Test with http://www.nutch.org/index.html is it found? "
+                + "What about www.google.com at http://www.google.de "
+                + "A longer URL could be http://www.sybit.com/solutions/portals.html",
+            "http://www.sybit.de", conf);
+
     assertTrue("Url not found!", outlinks.length == 3);
-    assertEquals("Wrong URL", "http://www.nutch.org/index.html", outlinks[0].getToUrl());
+    assertEquals("Wrong URL", "http://www.nutch.org/index.html",
+        outlinks[0].getToUrl());
     assertEquals("Wrong URL", "http://www.google.de", outlinks[1].getToUrl());
-    assertEquals("Wrong URL", "http://www.sybit.com/solutions/portals.html", outlinks[2].getToUrl());
+    assertEquals("Wrong URL", "http://www.sybit.com/solutions/portals.html",
+        outlinks[2].getToUrl());
   }
+
   public void testGetOutlinksFtp() {
     Outlink[] outlinks = OutlinkExtractor.getOutlinks(
-        "Test with ftp://www.nutch.org is it found? " +
-        "What about www.google.com at ftp://www.google.de", conf);
-    
-    assertTrue("Url not found!", outlinks.length >1);
+        "Test with ftp://www.nutch.org is it found? "
+            + "What about www.google.com at ftp://www.google.de", conf);
+
+    assertTrue("Url not found!", outlinks.length > 1);
     assertEquals("Wrong URL", "ftp://www.nutch.org", outlinks[0].getToUrl());
     assertEquals("Wrong URL", "ftp://www.google.de", outlinks[1].getToUrl());
   }
Index: src/test/org/apache/nutch/parse/TestParserFactory.java
===================================================================
--- src/test/org/apache/nutch/parse/TestParserFactory.java	(revision 1188268)
+++ src/test/org/apache/nutch/parse/TestParserFactory.java	(working copy)
@@ -27,76 +27,80 @@
 
 /**
  * Unit test for new parse plugin selection.
- *
+ * 
  * @author Sebastien Le Callonnec
  * @version 1.0
  */
 public class TestParserFactory extends TestCase {
-	
+
   private Configuration conf;
   private ParserFactory parserFactory;
-    
-  public TestParserFactory(String name) { super(name); }
 
+  public TestParserFactory(String name) {
+    super(name);
+  }
+
   /** Inits the Test Case with the test parse-plugin file */
   protected void setUp() throws Exception {
-      conf = NutchConfiguration.create();
-      conf.set("plugin.includes", ".*");
-      conf.set("parse.plugin.file",
-               "org/apache/nutch/parse/parse-plugin-test.xml");
-      parserFactory = new ParserFactory(conf);
+    conf = NutchConfiguration.create();
+    conf.set("plugin.includes", ".*");
+    conf.set("parse.plugin.file",
+        "org/apache/nutch/parse/parse-plugin-test.xml");
+    parserFactory = new ParserFactory(conf);
   }
-    
+
   /** Unit test for <code>getExtensions(String)</code> method. */
   public void testGetExtensions() throws Exception {
-    Extension ext = (Extension)parserFactory.getExtensions("text/html").get(0);
+    Extension ext = (Extension) parserFactory.getExtensions("text/html").get(0);
     assertEquals("parse-tika", ext.getDescriptor().getPluginId());
-    ext = (Extension) parserFactory.getExtensions("text/html; charset=ISO-8859-1").get(0);
+    ext = (Extension) parserFactory.getExtensions(
+        "text/html; charset=ISO-8859-1").get(0);
     assertEquals("parse-tika", ext.getDescriptor().getPluginId());
-    ext = (Extension)parserFactory.getExtensions("foo/bar").get(0);
+    ext = (Extension) parserFactory.getExtensions("foo/bar").get(0);
     assertEquals("parse-tika", ext.getDescriptor().getPluginId());
   }
-  
+
   /** Unit test to check <code>getParsers</code> method */
   public void testGetParsers() throws Exception {
-    Parser [] parsers = parserFactory.getParsers("text/html", "http://foo.com");
+    Parser[] parsers = parserFactory.getParsers("text/html", "http://foo.com");
     assertNotNull(parsers);
     assertEquals(1, parsers.length);
-    assertEquals("org.apache.nutch.parse.tika.TikaParser",
-                 parsers[0].getClass().getName());
+    assertEquals("org.apache.nutch.parse.tika.TikaParser", parsers[0]
+        .getClass().getName());
 
     parsers = parserFactory.getParsers("text/html; charset=ISO-8859-1",
-                                       "http://foo.com");
+        "http://foo.com");
     assertNotNull(parsers);
     assertEquals(1, parsers.length);
-    assertEquals("org.apache.nutch.parse.tika.TikaParser",
-                 parsers[0].getClass().getName());
-    
+    assertEquals("org.apache.nutch.parse.tika.TikaParser", parsers[0]
+        .getClass().getName());
+
     parsers = parserFactory.getParsers("application/x-javascript",
-                                       "http://foo.com");
+        "http://foo.com");
     assertNotNull(parsers);
     assertEquals(1, parsers.length);
-    assertEquals("org.apache.nutch.parse.js.JSParseFilter",
-                 parsers[0].getClass().getName());
-    
+    assertEquals("org.apache.nutch.parse.js.JSParseFilter", parsers[0]
+        .getClass().getName());
+
     parsers = parserFactory.getParsers("text/plain", "http://foo.com");
     assertNotNull(parsers);
     assertEquals(1, parsers.length);
-    assertEquals("org.apache.nutch.parse.tika.TikaParser",
-                 parsers[0].getClass().getName());
-    
+    assertEquals("org.apache.nutch.parse.tika.TikaParser", parsers[0]
+        .getClass().getName());
+
     Parser parser1 = parserFactory.getParsers("text/plain", "http://foo.com")[0];
     Parser parser2 = parserFactory.getParsers("*", "http://foo.com")[0];
-   
+
     assertEquals("Different instances!", parser1.hashCode(), parser2.hashCode());
-    
-    //test and make sure that the rss parser is loaded even though its plugin.xml
-    //doesn't claim to support text/rss, only application/rss+xml
-    parsers = parserFactory.getParsers("text/rss","http://foo.com");
+
+    // test and make sure that the rss parser is loaded even though its
+    // plugin.xml
+    // doesn't claim to support text/rss, only application/rss+xml
+    parsers = parserFactory.getParsers("text/rss", "http://foo.com");
     assertNotNull(parsers);
-    assertEquals(1,parsers.length);
-    assertEquals("org.apache.nutch.parse.tika.TikaParser",
-                 parsers[0].getClass().getName());
+    assertEquals(1, parsers.length);
+    assertEquals("org.apache.nutch.parse.tika.TikaParser", parsers[0]
+        .getClass().getName());
   }
- 
+
 }
Index: src/test/org/apache/nutch/util/TestSuffixStringMatcher.java
===================================================================
--- src/test/org/apache/nutch/util/TestSuffixStringMatcher.java	(revision 1188268)
+++ src/test/org/apache/nutch/util/TestSuffixStringMatcher.java	(working copy)
@@ -21,101 +21,94 @@
 
 /** Unit tests for SuffixStringMatcher. */
 public class TestSuffixStringMatcher extends TestCase {
-  public TestSuffixStringMatcher(String name) { 
-    super(name); 
+  public TestSuffixStringMatcher(String name) {
+    super(name);
   }
 
-  private final static int NUM_TEST_ROUNDS= 20;
-  private final static int MAX_TEST_SUFFIXES= 100;
-  private final static int MAX_SUFFIX_LEN= 10;
-  private final static int NUM_TEST_INPUTS_PER_ROUND= 100;
-  private final static int MAX_INPUT_LEN= 20;
+  private final static int NUM_TEST_ROUNDS = 20;
+  private final static int MAX_TEST_SUFFIXES = 100;
+  private final static int MAX_SUFFIX_LEN = 10;
+  private final static int NUM_TEST_INPUTS_PER_ROUND = 100;
+  private final static int MAX_INPUT_LEN = 20;
 
-  private final static char[] alphabet= 
-    new char[] {
-      'a', 'b', 'c', 'd',
-//      'e', 'f', 'g', 'h', 'i', 'j',
-//      'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
-//      'u', 'v', 'w', 'x', 'y', 'z', '1', '2', '3', '4',
-//      '5', '6', '7', '8', '9', '0'
-    };
+  private final static char[] alphabet = new char[] { 'a', 'b', 'c', 'd',
+  // 'e', 'f', 'g', 'h', 'i', 'j',
+  // 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
+  // 'u', 'v', 'w', 'x', 'y', 'z', '1', '2', '3', '4',
+  // '5', '6', '7', '8', '9', '0'
+  };
 
   private String makeRandString(int minLen, int maxLen) {
-    int len= minLen + (int) (Math.random() * (maxLen - minLen));
-    char[] chars= new char[len];
-    
-    for (int pos= 0; pos < len; pos++) {
-      chars[pos]= alphabet[(int) (Math.random() * alphabet.length)];
+    int len = minLen + (int) (Math.random() * (maxLen - minLen));
+    char[] chars = new char[len];
+
+    for (int pos = 0; pos < len; pos++) {
+      chars[pos] = alphabet[(int) (Math.random() * alphabet.length)];
     }
-    
+
     return new String(chars);
   }
-  
+
   public void testSuffixMatcher() {
-    int numMatches= 0;
-    int numInputsTested= 0;
+    int numMatches = 0;
+    int numInputsTested = 0;
 
-    for (int round= 0; round < NUM_TEST_ROUNDS; round++) {
+    for (int round = 0; round < NUM_TEST_ROUNDS; round++) {
 
       // build list of suffixes
-      int numSuffixes= (int) (Math.random() * MAX_TEST_SUFFIXES);
-      String[] suffixes= new String[numSuffixes];
-      for (int i= 0; i < numSuffixes; i++) {
-        suffixes[i]= makeRandString(0, MAX_SUFFIX_LEN);
+      int numSuffixes = (int) (Math.random() * MAX_TEST_SUFFIXES);
+      String[] suffixes = new String[numSuffixes];
+      for (int i = 0; i < numSuffixes; i++) {
+        suffixes[i] = makeRandString(0, MAX_SUFFIX_LEN);
       }
 
-      SuffixStringMatcher sufmatcher= new SuffixStringMatcher(suffixes);
+      SuffixStringMatcher sufmatcher = new SuffixStringMatcher(suffixes);
 
       // test random strings for suffix matches
-      for (int i= 0; i < NUM_TEST_INPUTS_PER_ROUND; i++) {
-        String input= makeRandString(0, MAX_INPUT_LEN);
-        boolean matches= false;
-        int longestMatch= -1;
-        int shortestMatch= -1;
+      for (int i = 0; i < NUM_TEST_INPUTS_PER_ROUND; i++) {
+        String input = makeRandString(0, MAX_INPUT_LEN);
+        boolean matches = false;
+        int longestMatch = -1;
+        int shortestMatch = -1;
 
-        for (int j= 0; j < suffixes.length; j++) {
+        for (int j = 0; j < suffixes.length; j++) {
 
-          if ((suffixes[j].length() > 0) 
-              && input.endsWith(suffixes[j])) {
+          if ((suffixes[j].length() > 0) && input.endsWith(suffixes[j])) {
 
-            matches= true;
-            int matchSize= suffixes[j].length();
+            matches = true;
+            int matchSize = suffixes[j].length();
 
-            if (matchSize > longestMatch) 
-              longestMatch= matchSize;
+            if (matchSize > longestMatch)
+              longestMatch = matchSize;
 
-            if ( (matchSize < shortestMatch)
-                 || (shortestMatch == -1) )
-              shortestMatch= matchSize;
+            if ((matchSize < shortestMatch) || (shortestMatch == -1))
+              shortestMatch = matchSize;
           }
 
         }
 
-        if (matches) 
+        if (matches)
           numMatches++;
 
         numInputsTested++;
 
-        assertTrue( "'" + input + "' should " + (matches ? "" : "not ") 
-                    + "match!",
-                    matches == sufmatcher.matches(input) );
+        assertTrue("'" + input + "' should " + (matches ? "" : "not ")
+            + "match!", matches == sufmatcher.matches(input));
         if (matches) {
-          assertTrue( shortestMatch 
-                      == sufmatcher.shortestMatch(input).length());
-          assertTrue( input.substring(input.length() - shortestMatch).equals(
-                        sufmatcher.shortestMatch(input)) );
+          assertTrue(shortestMatch == sufmatcher.shortestMatch(input).length());
+          assertTrue(input.substring(input.length() - shortestMatch).equals(
+              sufmatcher.shortestMatch(input)));
 
-          assertTrue( longestMatch 
-                      == sufmatcher.longestMatch(input).length());
-          assertTrue( input.substring(input.length() - longestMatch).equals(
-                        sufmatcher.longestMatch(input)) );
+          assertTrue(longestMatch == sufmatcher.longestMatch(input).length());
+          assertTrue(input.substring(input.length() - longestMatch).equals(
+              sufmatcher.longestMatch(input)));
 
         }
       }
     }
 
-    System.out.println("got " + numMatches + " matches out of " 
-                       + numInputsTested + " tests");
+    System.out.println("got " + numMatches + " matches out of "
+        + numInputsTested + " tests");
   }
 
 }
Index: src/test/org/apache/nutch/util/TestEncodingDetector.java
===================================================================
--- src/test/org/apache/nutch/util/TestEncodingDetector.java	(revision 1188268)
+++ src/test/org/apache/nutch/util/TestEncodingDetector.java	(working copy)
@@ -47,7 +47,7 @@
     // first disable auto detection
     conf.setInt(EncodingDetector.MIN_CONFIDENCE_KEY, -1);
 
-    //Metadata metadata = new Metadata();
+    // Metadata metadata = new Metadata();
     EncodingDetector detector;
     // Content content;
     String encoding;
@@ -67,8 +67,9 @@
     page.setBaseUrl(new Utf8("http://www.example.com/"));
     page.setContentType(new Utf8("text/plain"));
     page.setContent(ByteBuffer.wrap(contentInOctets));
-    page.putToHeaders(EncodingDetector.CONTENT_TYPE_UTF8, new Utf8("text/plain; charset=UTF-16"));
-    
+    page.putToHeaders(EncodingDetector.CONTENT_TYPE_UTF8, new Utf8(
+        "text/plain; charset=UTF-16"));
+
     detector = new EncodingDetector(conf);
     detector.autoDetectClues(page, true);
     encoding = detector.guessEncoding(page, "windows-1252");
@@ -78,7 +79,7 @@
     page.setBaseUrl(new Utf8("http://www.example.com/"));
     page.setContentType(new Utf8("text/plain"));
     page.setContent(ByteBuffer.wrap(contentInOctets));
-    
+
     detector = new EncodingDetector(conf);
     detector.autoDetectClues(page, true);
     detector.addClue("windows-1254", "sniffed");
@@ -91,8 +92,9 @@
     page.setBaseUrl(new Utf8("http://www.example.com/"));
     page.setContentType(new Utf8("text/plain"));
     page.setContent(ByteBuffer.wrap(contentInOctets));
-    page.putToMetadata(new Utf8(Response.CONTENT_TYPE), ByteBuffer.wrap("text/plain; charset=UTF-16".getBytes()));
-    
+    page.putToMetadata(new Utf8(Response.CONTENT_TYPE),
+        ByteBuffer.wrap("text/plain; charset=UTF-16".getBytes()));
+
     detector = new EncodingDetector(conf);
     detector.autoDetectClues(page, true);
     detector.addClue("utf-32", "sniffed");
Index: src/test/org/apache/nutch/util/TestURLUtil.java
===================================================================
--- src/test/org/apache/nutch/util/TestURLUtil.java	(revision 1188268)
+++ src/test/org/apache/nutch/util/TestURLUtil.java	(working copy)
@@ -22,17 +22,14 @@
 import junit.framework.TestCase;
 
 /** Test class for URLUtil */
-public class TestURLUtil
-  extends TestCase {
+public class TestURLUtil extends TestCase {
 
   @Override
-  protected void setUp()
-    throws Exception {
+  protected void setUp() throws Exception {
     super.setUp();
   }
 
-  public void testGetDomainName()
-    throws Exception {
+  public void testGetDomainName() throws Exception {
 
     URL url = null;
 
@@ -81,8 +78,7 @@
 
   }
 
-  public void testGetDomainSuffix()
-    throws Exception {
+  public void testGetDomainSuffix() throws Exception {
     URL url = null;
 
     url = new URL("http://lucene.apache.org/nutch");
@@ -133,8 +129,7 @@
 
   }
 
-  public void testGetHostSegments()
-    throws Exception {
+  public void testGetHostSegments() throws Exception {
     URL url;
     String[] segments;
 
@@ -165,9 +160,8 @@
 
   }
 
-  public void testChooseRepr()
-    throws Exception {
-    
+  public void testChooseRepr() throws Exception {
+
     String aDotCom = "http://www.a.com";
     String bDotCom = "http://www.b.com";
     String aSubDotCom = "http://www.news.a.com";
@@ -175,40 +169,41 @@
     String aPath = "http://www.a.com/xyz/index.html";
     String aPath2 = "http://www.a.com/abc/page.html";
     String aPath3 = "http://www.news.a.com/abc/page.html";
-    
+
     // 1) different domain them keep dest, temp or perm
     // a.com -> b.com*
     assertEquals(bDotCom, URLUtil.chooseRepr(aDotCom, bDotCom, true));
     assertEquals(bDotCom, URLUtil.chooseRepr(aDotCom, bDotCom, false));
-    
+
     // 2) permanent and root, keep src
     // *a.com -> a.com?y=1 || *a.com -> a.com/xyz/index.html
     assertEquals(aDotCom, URLUtil.chooseRepr(aDotCom, aQStr, false));
     assertEquals(aDotCom, URLUtil.chooseRepr(aDotCom, aPath, false));
-    
-    //3) permanent and not root and dest root, keep dest
-    //a.com/xyz/index.html -> a.com*
+
+    // 3) permanent and not root and dest root, keep dest
+    // a.com/xyz/index.html -> a.com*
     assertEquals(aDotCom, URLUtil.chooseRepr(aPath, aDotCom, false));
-    
-    //4) permanent and neither root keep dest
+
+    // 4) permanent and neither root keep dest
     // a.com/xyz/index.html -> a.com/abc/page.html*
     assertEquals(aPath2, URLUtil.chooseRepr(aPath, aPath2, false));
-    
-    //5) temp and root and dest not root keep src
-    //*a.com -> a.com/xyz/index.html
+
+    // 5) temp and root and dest not root keep src
+    // *a.com -> a.com/xyz/index.html
     assertEquals(aDotCom, URLUtil.chooseRepr(aDotCom, aPath, true));
-    
-    //6) temp and not root and dest root keep dest
+
+    // 6) temp and not root and dest root keep dest
     // a.com/xyz/index.html -> a.com*
     assertEquals(aDotCom, URLUtil.chooseRepr(aPath, aDotCom, true));
 
-    //7) temp and neither root, keep shortest, if hosts equal by path else by hosts
-    //  a.com/xyz/index.html -> a.com/abc/page.html*
+    // 7) temp and neither root, keep shortest, if hosts equal by path else by
+    // hosts
+    // a.com/xyz/index.html -> a.com/abc/page.html*
     // *www.a.com/xyz/index.html -> www.news.a.com/xyz/index.html
     assertEquals(aPath2, URLUtil.chooseRepr(aPath, aPath2, true));
     assertEquals(aPath, URLUtil.chooseRepr(aPath, aPath3, true));
 
-    //8) temp and both root keep shortest sub domain
+    // 8) temp and both root keep shortest sub domain
     // *www.a.com -> www.news.a.com
     assertEquals(aDotCom, URLUtil.chooseRepr(aDotCom, aSubDotCom, true));
   }
Index: src/test/org/apache/nutch/util/TestStringUtil.java
===================================================================
--- src/test/org/apache/nutch/util/TestStringUtil.java	(revision 1188268)
+++ src/test/org/apache/nutch/util/TestStringUtil.java	(working copy)
@@ -21,41 +21,41 @@
 
 /** Unit tests for StringUtil methods. */
 public class TestStringUtil extends TestCase {
-  public TestStringUtil(String name) { 
-    super(name); 
+  public TestStringUtil(String name) {
+    super(name);
   }
 
   public void testRightPad() {
-    String s= "my string";
+    String s = "my string";
 
-    String ps= StringUtil.rightPad(s, 0);
+    String ps = StringUtil.rightPad(s, 0);
     assertTrue(s.equals(ps));
 
-    ps= StringUtil.rightPad(s, 9);
+    ps = StringUtil.rightPad(s, 9);
     assertTrue(s.equals(ps));
 
-    ps= StringUtil.rightPad(s, 10);
-    assertTrue( (s+" ").equals(ps) );
+    ps = StringUtil.rightPad(s, 10);
+    assertTrue((s + " ").equals(ps));
 
-    ps= StringUtil.rightPad(s, 15);
-    assertTrue( (s+"      ").equals(ps) );
+    ps = StringUtil.rightPad(s, 15);
+    assertTrue((s + "      ").equals(ps));
 
   }
 
   public void testLeftPad() {
-    String s= "my string";
+    String s = "my string";
 
-    String ps= StringUtil.leftPad(s, 0);
+    String ps = StringUtil.leftPad(s, 0);
     assertTrue(s.equals(ps));
 
-    ps= StringUtil.leftPad(s, 9);
+    ps = StringUtil.leftPad(s, 9);
     assertTrue(s.equals(ps));
 
-    ps= StringUtil.leftPad(s, 10);
-    assertTrue( (" "+s).equals(ps) );
+    ps = StringUtil.leftPad(s, 10);
+    assertTrue((" " + s).equals(ps));
 
-    ps= StringUtil.leftPad(s, 15);
-    assertTrue( ("      "+s).equals(ps) );
+    ps = StringUtil.leftPad(s, 15);
+    assertTrue(("      " + s).equals(ps));
 
   }
 
Index: src/test/org/apache/nutch/util/AbstractNutchTest.java
===================================================================
--- src/test/org/apache/nutch/util/AbstractNutchTest.java	(revision 1188268)
+++ src/test/org/apache/nutch/util/AbstractNutchTest.java	(working copy)
@@ -58,11 +58,13 @@
     conf.set("storage.data.store.class", "org.apache.gora.sql.store.SqlStore");
     fs = FileSystem.get(conf);
     // using hsqldb in memory
-    DataStoreFactory.properties.setProperty("gora.sqlstore.jdbc.driver","org.hsqldb.jdbcDriver");
+    DataStoreFactory.properties.setProperty("gora.sqlstore.jdbc.driver",
+        "org.hsqldb.jdbcDriver");
     // use separate in-memory db-s for tests
-    DataStoreFactory.properties.setProperty("gora.sqlstore.jdbc.url","jdbc:hsqldb:mem:" + getClass().getName());
-    DataStoreFactory.properties.setProperty("gora.sqlstore.jdbc.user","sa");
-    DataStoreFactory.properties.setProperty("gora.sqlstore.jdbc.password","");
+    DataStoreFactory.properties.setProperty("gora.sqlstore.jdbc.url",
+        "jdbc:hsqldb:mem:" + getClass().getName());
+    DataStoreFactory.properties.setProperty("gora.sqlstore.jdbc.user", "sa");
+    DataStoreFactory.properties.setProperty("gora.sqlstore.jdbc.password", "");
     webPageStore = StorageUtils.createWebStore(conf, String.class,
         WebPage.class);
   }
Index: src/test/org/apache/nutch/util/TestTableUtil.java
===================================================================
--- src/test/org/apache/nutch/util/TestTableUtil.java	(revision 1188268)
+++ src/test/org/apache/nutch/util/TestTableUtil.java	(working copy)
@@ -40,10 +40,10 @@
     assertReverse(urlString1, reversedUrlString1);
     assertReverse(urlString2, reversedUrlString2);
     assertReverse(urlString3, reversedUrlString3);
-    assertReverse(urlString4, reversedUrlString4); 
-    assertReverse(urlString5, reversedUrlString5); 
-    assertReverse(urlString5, reversedUrlString5); 
-    assertReverse(urlString6, reversedUrlString6); 
+    assertReverse(urlString4, reversedUrlString4);
+    assertReverse(urlString5, reversedUrlString5);
+    assertReverse(urlString5, reversedUrlString5);
+    assertReverse(urlString6, reversedUrlString6);
   }
 
   public void testUnreverseUrl() throws Exception {
@@ -55,7 +55,8 @@
     assertUnreverse(reversedUrlString6, urlString6);
   }
 
-  private static void assertReverse(String url, String expectedReversedUrl) throws Exception {
+  private static void assertReverse(String url, String expectedReversedUrl)
+      throws Exception {
     String reversed = TableUtil.reverseUrl(url);
     assertEquals(expectedReversedUrl, reversed);
   }
Index: src/test/org/apache/nutch/util/CrawlTestUtil.java
===================================================================
--- src/test/org/apache/nutch/util/CrawlTestUtil.java	(revision 1188268)
+++ src/test/org/apache/nutch/util/CrawlTestUtil.java	(working copy)
@@ -45,7 +45,8 @@
 
 public class CrawlTestUtil {
 
-  private static final Logger LOG = LoggerFactory.getLogger(CrawlTestUtil.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(CrawlTestUtil.class);
 
   /**
    * For now we need to manually construct our Configuration, because we need to
@@ -95,15 +96,16 @@
     out.flush();
     out.close();
   }
-  
+
   /**
    * Read entries from a data store
-   *
+   * 
    * @return list of matching {@link URLWebPage} objects
    * @throws IOException
    */
-  public static ArrayList<URLWebPage> readContents(DataStore<String,WebPage> store,
-      Mark requiredMark, String... fields) throws IOException {
+  public static ArrayList<URLWebPage> readContents(
+      DataStore<String, WebPage> store, Mark requiredMark, String... fields)
+      throws IOException {
     ArrayList<URLWebPage> l = new ArrayList<URLWebPage>();
 
     Query<String, WebPage> query = store.newQuery();
@@ -122,13 +124,12 @@
       if (requiredMark != null && requiredMark.checkMark(page) == null)
         continue;
 
-      l.add(new URLWebPage(TableUtil.unreverseUrl(url), (WebPage)page.clone()));
+      l.add(new URLWebPage(TableUtil.unreverseUrl(url), (WebPage) page.clone()));
     }
 
     return l;
   }
 
-
   /**
    * Creates a new JettyServer with one static root context
    * 
@@ -144,7 +145,7 @@
     ResourceHandler handler = new ResourceHandler();
     handler.setResourceBase(staticContent);
     HandlerList handlers = new HandlerList();
-    handlers.setHandlers(new Handler[]{handler, new DefaultHandler()});
+    handlers.setHandlers(new Handler[] { handler, new DefaultHandler() });
     webServer.setHandler(handlers);
     return webServer;
   }
Index: src/test/org/apache/nutch/util/TestPrefixStringMatcher.java
===================================================================
--- src/test/org/apache/nutch/util/TestPrefixStringMatcher.java	(revision 1188268)
+++ src/test/org/apache/nutch/util/TestPrefixStringMatcher.java	(working copy)
@@ -21,101 +21,94 @@
 
 /** Unit tests for PrefixStringMatcher. */
 public class TestPrefixStringMatcher extends TestCase {
-  public TestPrefixStringMatcher(String name) { 
-    super(name); 
+  public TestPrefixStringMatcher(String name) {
+    super(name);
   }
 
-  private final static int NUM_TEST_ROUNDS= 20;
-  private final static int MAX_TEST_PREFIXES= 100;
-  private final static int MAX_PREFIX_LEN= 10;
-  private final static int NUM_TEST_INPUTS_PER_ROUND= 100;
-  private final static int MAX_INPUT_LEN= 20;
+  private final static int NUM_TEST_ROUNDS = 20;
+  private final static int MAX_TEST_PREFIXES = 100;
+  private final static int MAX_PREFIX_LEN = 10;
+  private final static int NUM_TEST_INPUTS_PER_ROUND = 100;
+  private final static int MAX_INPUT_LEN = 20;
 
-  private final static char[] alphabet= 
-    new char[] {
-      'a', 'b', 'c', 'd',
-//      'e', 'f', 'g', 'h', 'i', 'j',
-//      'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
-//      'u', 'v', 'w', 'x', 'y', 'z', '1', '2', '3', '4',
-//      '5', '6', '7', '8', '9', '0'
-    };
+  private final static char[] alphabet = new char[] { 'a', 'b', 'c', 'd',
+  // 'e', 'f', 'g', 'h', 'i', 'j',
+  // 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
+  // 'u', 'v', 'w', 'x', 'y', 'z', '1', '2', '3', '4',
+  // '5', '6', '7', '8', '9', '0'
+  };
 
   private String makeRandString(int minLen, int maxLen) {
-    int len= minLen + (int) (Math.random() * (maxLen - minLen));
-    char[] chars= new char[len];
-    
-    for (int pos= 0; pos < len; pos++) {
-      chars[pos]= alphabet[(int) (Math.random() * alphabet.length)];
+    int len = minLen + (int) (Math.random() * (maxLen - minLen));
+    char[] chars = new char[len];
+
+    for (int pos = 0; pos < len; pos++) {
+      chars[pos] = alphabet[(int) (Math.random() * alphabet.length)];
     }
-    
+
     return new String(chars);
   }
-  
+
   public void testPrefixMatcher() {
-    int numMatches= 0;
-    int numInputsTested= 0;
+    int numMatches = 0;
+    int numInputsTested = 0;
 
-    for (int round= 0; round < NUM_TEST_ROUNDS; round++) {
+    for (int round = 0; round < NUM_TEST_ROUNDS; round++) {
 
       // build list of prefixes
-      int numPrefixes= (int) (Math.random() * MAX_TEST_PREFIXES);
-      String[] prefixes= new String[numPrefixes];
-      for (int i= 0; i < numPrefixes; i++) {
-        prefixes[i]= makeRandString(0, MAX_PREFIX_LEN);
+      int numPrefixes = (int) (Math.random() * MAX_TEST_PREFIXES);
+      String[] prefixes = new String[numPrefixes];
+      for (int i = 0; i < numPrefixes; i++) {
+        prefixes[i] = makeRandString(0, MAX_PREFIX_LEN);
       }
 
-      PrefixStringMatcher prematcher= new PrefixStringMatcher(prefixes);
+      PrefixStringMatcher prematcher = new PrefixStringMatcher(prefixes);
 
       // test random strings for prefix matches
-      for (int i= 0; i < NUM_TEST_INPUTS_PER_ROUND; i++) {
-        String input= makeRandString(0, MAX_INPUT_LEN);
-        boolean matches= false;
-        int longestMatch= -1;
-        int shortestMatch= -1;
+      for (int i = 0; i < NUM_TEST_INPUTS_PER_ROUND; i++) {
+        String input = makeRandString(0, MAX_INPUT_LEN);
+        boolean matches = false;
+        int longestMatch = -1;
+        int shortestMatch = -1;
 
-        for (int j= 0; j < prefixes.length; j++) {
+        for (int j = 0; j < prefixes.length; j++) {
 
-          if ((prefixes[j].length() > 0) 
-              && input.startsWith(prefixes[j])) {
+          if ((prefixes[j].length() > 0) && input.startsWith(prefixes[j])) {
 
-            matches= true;
-            int matchSize= prefixes[j].length();
+            matches = true;
+            int matchSize = prefixes[j].length();
 
-            if (matchSize > longestMatch) 
-              longestMatch= matchSize;
+            if (matchSize > longestMatch)
+              longestMatch = matchSize;
 
-            if ( (matchSize < shortestMatch)
-                 || (shortestMatch == -1) )
-              shortestMatch= matchSize;
+            if ((matchSize < shortestMatch) || (shortestMatch == -1))
+              shortestMatch = matchSize;
           }
 
         }
 
-        if (matches) 
+        if (matches)
           numMatches++;
 
         numInputsTested++;
 
-        assertTrue( "'" + input + "' should " + (matches ? "" : "not ") 
-                    + "match!",
-                    matches == prematcher.matches(input) );
+        assertTrue("'" + input + "' should " + (matches ? "" : "not ")
+            + "match!", matches == prematcher.matches(input));
         if (matches) {
-          assertTrue( shortestMatch 
-                      == prematcher.shortestMatch(input).length());
-          assertTrue( input.substring(0, shortestMatch).equals(
-                        prematcher.shortestMatch(input)) );
+          assertTrue(shortestMatch == prematcher.shortestMatch(input).length());
+          assertTrue(input.substring(0, shortestMatch).equals(
+              prematcher.shortestMatch(input)));
 
-          assertTrue( longestMatch 
-                      == prematcher.longestMatch(input).length());
-          assertTrue( input.substring(0, longestMatch).equals(
-                        prematcher.longestMatch(input)) );
+          assertTrue(longestMatch == prematcher.longestMatch(input).length());
+          assertTrue(input.substring(0, longestMatch).equals(
+              prematcher.longestMatch(input)));
 
         }
       }
     }
 
-    System.out.println("got " + numMatches + " matches out of " 
-                       + numInputsTested + " tests");
+    System.out.println("got " + numMatches + " matches out of "
+        + numInputsTested + " tests");
   }
 
 }
Index: src/test/org/apache/nutch/util/TestGZIPUtils.java
===================================================================
--- src/test/org/apache/nutch/util/TestGZIPUtils.java	(revision 1188268)
+++ src/test/org/apache/nutch/util/TestGZIPUtils.java	(working copy)
@@ -23,223 +23,216 @@
 
 /** Unit tests for GZIPUtils methods. */
 public class TestGZIPUtils extends TestCase {
-  public TestGZIPUtils(String name) { 
-    super(name); 
+  public TestGZIPUtils(String name) {
+    super(name);
   }
 
   /* a short, highly compressable, string */
-  String SHORT_TEST_STRING= 
-    "aaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbcccccccccccccccc";
+  String SHORT_TEST_STRING = "aaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbcccccccccccccccc";
 
   /* a short, highly compressable, string */
-  String LONGER_TEST_STRING= 
-    SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING 
-    + SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING 
-    + SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING 
-    + SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING;
+  String LONGER_TEST_STRING = SHORT_TEST_STRING + SHORT_TEST_STRING
+      + SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING
+      + SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING
+      + SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING
+      + SHORT_TEST_STRING;
 
   /* a snapshot of the nutch webpage */
-  String WEBPAGE= 
-  "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n"
-  + "<html>\n"
-  + "<head>\n"
-  + "  <meta http-equiv=\"content-type\"\n"
-  + " content=\"text/html; charset=ISO-8859-1\">\n"
-  + "  <title>Nutch</title>\n"
-  + "</head>\n"
-  + "<body>\n"
-  + "<h1\n"
-  + " style=\"font-family: helvetica,arial,sans-serif; text-align: center; color: rgb(255, 153, 0);\"><a\n"
-  + " href=\"http://www.nutch.org/\"><font style=\"color: rgb(255, 153, 0);\">Nutch</font></a><br>\n"
-  + "<small>an open source web-search engine</small></h1>\n"
-  + "<hr style=\"width: 100%; height: 1px;\" noshade=\"noshade\">\n"
-  + "<table\n"
-  + " style=\"width: 100%; text-align: left; margin-left: auto; margin-right: auto;\"\n"
-  + " border=\"0\" cellspacing=\"0\" cellpadding=\"0\">\n"
-  + "  <tbody>\n"
-  + "    <tr>\n"
-  + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
-  + " href=\"http://sourceforge.net/project/showfiles.php?group_id=59548\">Download</a><br>\n"
-  + "      </td>\n"
-  + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
-  + " href=\"tutorial.html\">Tutorial</a><br>\n"
-  + "      </td>\n"
-  + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
-  + " href=\"http://cvs.sourceforge.net/cgi-bin/viewcvs.cgi/nutch/nutch/\">CVS</a><br>\n"
-  + "      </td>\n"
-  + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
-  + " href=\"api/index.html\">Javadoc</a><br>\n"
-  + "      </td>\n"
-  + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
-  + " href=\"http://sourceforge.net/tracker/?atid=491356&amp;group_id=59548&amp;func=browse\">Bugs</a><br>\n"
-  + "      </td>\n"
-  + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
-  + " href=\"http://sourceforge.net/mail/?group_id=59548\">Lists</a></td>\n"
-  + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
-  + " href=\"policies.html\">Policies</a><br>\n"
-  + "      </td>\n"
-  + "    </tr>\n"
-  + "  </tbody>\n"
-  + "</table>\n"
-  + "<hr style=\"width: 100%; height: 1px;\" noshade=\"noshade\">\n"
-  + "<h2>Introduction</h2>\n"
-  + "Nutch is a nascent effort to implement an open-source web search\n"
-  + "engine. Web search is a basic requirement for internet navigation, yet\n"
-  + "the number of web search engines is decreasing. Today's oligopoly could\n"
-  + "soon be a monopoly, with a single company controlling nearly all web\n"
-  + "search for its commercial gain. &nbsp;That would not be good for the\n"
-  + "users of internet. &nbsp;Nutch aims to enable anyone to easily and\n"
-  + "cost-effectively deploy a world-class web search engine.<br>\n"
-  + "<br>\n"
-  + "To succeed, the Nutch software must be able to:<br>\n"
-  + "<ul>\n"
-  + "  <li> crawl several billion pages per month</li>\n"
-  + "  <li>maintain an index of these pages</li>\n"
-  + "  <li>search that index up to 1000 times per second</li>\n"
-  + "  <li>provide very high quality search results</li>\n"
-  + "  <li>operate at minimal cost</li>\n"
-  + "</ul>\n"
-  + "<h2>Status</h2>\n"
-  + "Currently we're just a handful of developers working part-time to put\n"
-  + "together a demo. &nbsp;The demo is coded entirely in Java. &nbsp;However\n"
-  + "persistent data is written in well-documented formats so that modules\n"
-  + "may eventually be re-written in other languages (e.g., Perl, C++) as the\n"
-  + "project progresses.<br>\n"
-  + "<br>\n"
-  + "<hr style=\"width: 100%; height: 1px;\" noshade=\"noshade\"> <a\n"
-  + " href=\"http://sourceforge.net\"> </a>\n"
-  + "<div style=\"text-align: center;\"><a href=\"http://sourceforge.net\"><img\n"
-  + " src=\"http://sourceforge.net/sflogo.php?group_id=59548&amp;type=1\"\n"
-  + " style=\"border: 0px solid ; width: 88px; height: 31px;\"\n"
-  + " alt=\"SourceForge.net Logo\" title=\"\"></a></div>\n"
-  + "</body>\n"
-  + "</html>\n";
+  String WEBPAGE = "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n"
+      + "<html>\n"
+      + "<head>\n"
+      + "  <meta http-equiv=\"content-type\"\n"
+      + " content=\"text/html; charset=ISO-8859-1\">\n"
+      + "  <title>Nutch</title>\n"
+      + "</head>\n"
+      + "<body>\n"
+      + "<h1\n"
+      + " style=\"font-family: helvetica,arial,sans-serif; text-align: center; color: rgb(255, 153, 0);\"><a\n"
+      + " href=\"http://www.nutch.org/\"><font style=\"color: rgb(255, 153, 0);\">Nutch</font></a><br>\n"
+      + "<small>an open source web-search engine</small></h1>\n"
+      + "<hr style=\"width: 100%; height: 1px;\" noshade=\"noshade\">\n"
+      + "<table\n"
+      + " style=\"width: 100%; text-align: left; margin-left: auto; margin-right: auto;\"\n"
+      + " border=\"0\" cellspacing=\"0\" cellpadding=\"0\">\n"
+      + "  <tbody>\n"
+      + "    <tr>\n"
+      + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
+      + " href=\"http://sourceforge.net/project/showfiles.php?group_id=59548\">Download</a><br>\n"
+      + "      </td>\n"
+      + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
+      + " href=\"tutorial.html\">Tutorial</a><br>\n"
+      + "      </td>\n"
+      + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
+      + " href=\"http://cvs.sourceforge.net/cgi-bin/viewcvs.cgi/nutch/nutch/\">CVS</a><br>\n"
+      + "      </td>\n"
+      + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
+      + " href=\"api/index.html\">Javadoc</a><br>\n"
+      + "      </td>\n"
+      + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
+      + " href=\"http://sourceforge.net/tracker/?atid=491356&amp;group_id=59548&amp;func=browse\">Bugs</a><br>\n"
+      + "      </td>\n"
+      + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
+      + " href=\"http://sourceforge.net/mail/?group_id=59548\">Lists</a></td>\n"
+      + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
+      + " href=\"policies.html\">Policies</a><br>\n"
+      + "      </td>\n"
+      + "    </tr>\n"
+      + "  </tbody>\n"
+      + "</table>\n"
+      + "<hr style=\"width: 100%; height: 1px;\" noshade=\"noshade\">\n"
+      + "<h2>Introduction</h2>\n"
+      + "Nutch is a nascent effort to implement an open-source web search\n"
+      + "engine. Web search is a basic requirement for internet navigation, yet\n"
+      + "the number of web search engines is decreasing. Today's oligopoly could\n"
+      + "soon be a monopoly, with a single company controlling nearly all web\n"
+      + "search for its commercial gain. &nbsp;That would not be good for the\n"
+      + "users of internet. &nbsp;Nutch aims to enable anyone to easily and\n"
+      + "cost-effectively deploy a world-class web search engine.<br>\n"
+      + "<br>\n"
+      + "To succeed, the Nutch software must be able to:<br>\n"
+      + "<ul>\n"
+      + "  <li> crawl several billion pages per month</li>\n"
+      + "  <li>maintain an index of these pages</li>\n"
+      + "  <li>search that index up to 1000 times per second</li>\n"
+      + "  <li>provide very high quality search results</li>\n"
+      + "  <li>operate at minimal cost</li>\n"
+      + "</ul>\n"
+      + "<h2>Status</h2>\n"
+      + "Currently we're just a handful of developers working part-time to put\n"
+      + "together a demo. &nbsp;The demo is coded entirely in Java. &nbsp;However\n"
+      + "persistent data is written in well-documented formats so that modules\n"
+      + "may eventually be re-written in other languages (e.g., Perl, C++) as the\n"
+      + "project progresses.<br>\n"
+      + "<br>\n"
+      + "<hr style=\"width: 100%; height: 1px;\" noshade=\"noshade\"> <a\n"
+      + " href=\"http://sourceforge.net\"> </a>\n"
+      + "<div style=\"text-align: center;\"><a href=\"http://sourceforge.net\"><img\n"
+      + " src=\"http://sourceforge.net/sflogo.php?group_id=59548&amp;type=1\"\n"
+      + " style=\"border: 0px solid ; width: 88px; height: 31px;\"\n"
+      + " alt=\"SourceForge.net Logo\" title=\"\"></a></div>\n"
+      + "</body>\n"
+      + "</html>\n";
 
   // tests
 
   public void testZipUnzip() {
-    byte[] testBytes= SHORT_TEST_STRING.getBytes();
+    byte[] testBytes = SHORT_TEST_STRING.getBytes();
     testZipUnzip(testBytes);
-    testBytes= LONGER_TEST_STRING.getBytes();
+    testBytes = LONGER_TEST_STRING.getBytes();
     testZipUnzip(testBytes);
-    testBytes= WEBPAGE.getBytes();
+    testBytes = WEBPAGE.getBytes();
     testZipUnzip(testBytes);
   }
 
   public void testZipUnzipBestEffort() {
-    byte[] testBytes= SHORT_TEST_STRING.getBytes();
+    byte[] testBytes = SHORT_TEST_STRING.getBytes();
     testZipUnzipBestEffort(testBytes);
-    testBytes= LONGER_TEST_STRING.getBytes();
+    testBytes = LONGER_TEST_STRING.getBytes();
     testZipUnzipBestEffort(testBytes);
-    testBytes= WEBPAGE.getBytes();
+    testBytes = WEBPAGE.getBytes();
     testZipUnzipBestEffort(testBytes);
   }
-  
+
   public void testTruncation() {
-    byte[] testBytes= SHORT_TEST_STRING.getBytes();
+    byte[] testBytes = SHORT_TEST_STRING.getBytes();
     testTruncation(testBytes);
-    testBytes= LONGER_TEST_STRING.getBytes();
+    testBytes = LONGER_TEST_STRING.getBytes();
     testTruncation(testBytes);
-    testBytes= WEBPAGE.getBytes();
+    testBytes = WEBPAGE.getBytes();
     testTruncation(testBytes);
   }
 
   public void testLimit() {
-    byte[] testBytes= SHORT_TEST_STRING.getBytes();
+    byte[] testBytes = SHORT_TEST_STRING.getBytes();
     testLimit(testBytes);
-    testBytes= LONGER_TEST_STRING.getBytes();
+    testBytes = LONGER_TEST_STRING.getBytes();
     testLimit(testBytes);
-    testBytes= WEBPAGE.getBytes();
+    testBytes = WEBPAGE.getBytes();
     testLimit(testBytes);
   }
 
   // helpers
 
   public void testZipUnzip(byte[] origBytes) {
-    byte[] compressedBytes= GZIPUtils.zip(origBytes);
+    byte[] compressedBytes = GZIPUtils.zip(origBytes);
 
     assertTrue("compressed array is not smaller!",
-	       compressedBytes.length < origBytes.length);
+        compressedBytes.length < origBytes.length);
 
-    byte[] uncompressedBytes= null;
+    byte[] uncompressedBytes = null;
     try {
-      uncompressedBytes= GZIPUtils.unzip(compressedBytes);
+      uncompressedBytes = GZIPUtils.unzip(compressedBytes);
     } catch (IOException e) {
       e.printStackTrace();
-      assertTrue("caught exception '" + e + "' during unzip()",
-		 false);
+      assertTrue("caught exception '" + e + "' during unzip()", false);
     }
-    assertTrue("uncompressedBytes is wrong size", 
-	       uncompressedBytes.length == origBytes.length);
+    assertTrue("uncompressedBytes is wrong size",
+        uncompressedBytes.length == origBytes.length);
 
-    for (int i= 0; i < origBytes.length; i++) 
+    for (int i = 0; i < origBytes.length; i++)
       if (origBytes[i] != uncompressedBytes[i])
-	assertTrue("uncompressedBytes does not match origBytes", false);
+        assertTrue("uncompressedBytes does not match origBytes", false);
   }
 
   public void testZipUnzipBestEffort(byte[] origBytes) {
-    byte[] compressedBytes= GZIPUtils.zip(origBytes);
+    byte[] compressedBytes = GZIPUtils.zip(origBytes);
 
     assertTrue("compressed array is not smaller!",
-	       compressedBytes.length < origBytes.length);
+        compressedBytes.length < origBytes.length);
 
-    byte[] uncompressedBytes= GZIPUtils.unzipBestEffort(compressedBytes);
-    assertTrue("uncompressedBytes is wrong size", 
-	       uncompressedBytes.length == origBytes.length);
+    byte[] uncompressedBytes = GZIPUtils.unzipBestEffort(compressedBytes);
+    assertTrue("uncompressedBytes is wrong size",
+        uncompressedBytes.length == origBytes.length);
 
-    for (int i= 0; i < origBytes.length; i++) 
+    for (int i = 0; i < origBytes.length; i++)
       if (origBytes[i] != uncompressedBytes[i])
-	assertTrue("uncompressedBytes does not match origBytes", false);
+        assertTrue("uncompressedBytes does not match origBytes", false);
   }
 
   public void testTruncation(byte[] origBytes) {
-    byte[] compressedBytes= GZIPUtils.zip(origBytes);
+    byte[] compressedBytes = GZIPUtils.zip(origBytes);
 
     System.out.println("original data has len " + origBytes.length);
-    System.out.println("compressed data has len " 
-		       + compressedBytes.length);
+    System.out.println("compressed data has len " + compressedBytes.length);
 
-    for (int i= compressedBytes.length; i >= 0; i--) {
+    for (int i = compressedBytes.length; i >= 0; i--) {
 
-      byte[] truncCompressed= new byte[i];
+      byte[] truncCompressed = new byte[i];
 
-      for (int j= 0; j < i; j++)
-	truncCompressed[j]= compressedBytes[j];
+      for (int j = 0; j < i; j++)
+        truncCompressed[j] = compressedBytes[j];
 
-      byte[] trunc= GZIPUtils.unzipBestEffort(truncCompressed);
+      byte[] trunc = GZIPUtils.unzipBestEffort(truncCompressed);
 
       if (trunc == null) {
-	System.out.println("truncated to len "
-			   + i + ", trunc is null");
+        System.out.println("truncated to len " + i + ", trunc is null");
       } else {
-	System.out.println("truncated to len "
-			   + i + ", trunc.length=  " 
-			   + trunc.length);
+        System.out.println("truncated to len " + i + ", trunc.length=  "
+            + trunc.length);
 
-	for (int j= 0; j < trunc.length; j++)
-	  if (trunc[j] != origBytes[j]) 
-	    assertTrue("truncated/uncompressed array differs at pos "
-		       + j + " (compressed data had been truncated to len "
-		       + i + ")", false);
+        for (int j = 0; j < trunc.length; j++)
+          if (trunc[j] != origBytes[j])
+            assertTrue("truncated/uncompressed array differs at pos " + j
+                + " (compressed data had been truncated to len " + i + ")",
+                false);
       }
     }
   }
 
   public void testLimit(byte[] origBytes) {
-    byte[] compressedBytes= GZIPUtils.zip(origBytes);
+    byte[] compressedBytes = GZIPUtils.zip(origBytes);
 
     assertTrue("compressed array is not smaller!",
-               compressedBytes.length < origBytes.length);
+        compressedBytes.length < origBytes.length);
 
-    for (int i= 0; i < origBytes.length; i++) {
+    for (int i = 0; i < origBytes.length; i++) {
 
-      byte[] uncompressedBytes= 
-        GZIPUtils.unzipBestEffort(compressedBytes, i);
+      byte[] uncompressedBytes = GZIPUtils.unzipBestEffort(compressedBytes, i);
 
-      assertTrue("uncompressedBytes is wrong size", 
-                 uncompressedBytes.length == i);
+      assertTrue("uncompressedBytes is wrong size",
+          uncompressedBytes.length == i);
 
-      for (int j= 0; j < i; j++) 
+      for (int j = 0; j < i; j++)
         if (origBytes[j] != uncompressedBytes[j])
           assertTrue("uncompressedBytes does not match origBytes", false);
     }
Index: src/test/org/apache/nutch/util/WritableTestUtils.java
===================================================================
--- src/test/org/apache/nutch/util/WritableTestUtils.java	(revision 1188268)
+++ src/test/org/apache/nutch/util/WritableTestUtils.java	(working copy)
@@ -35,23 +35,22 @@
     TestCase.assertEquals(before, writeRead(before, conf));
   }
 
-  
   /** Utility method for testing writables. */
   public static Writable writeRead(Writable before, Configuration conf)
-    throws Exception {
-    
+      throws Exception {
+
     DataOutputBuffer dob = new DataOutputBuffer();
     before.write(dob);
-    
+
     DataInputBuffer dib = new DataInputBuffer();
     dib.reset(dob.getData(), dob.getLength());
-    
-    Writable after = (Writable)before.getClass().newInstance();
+
+    Writable after = (Writable) before.getClass().newInstance();
     if (conf != null) {
-      ((Configurable)after).setConf(conf);
+      ((Configurable) after).setConf(conf);
     }
     after.readFields(dib);
     return after;
   }
-  
+
 }
Index: src/test/org/apache/nutch/util/TestNodeWalker.java
===================================================================
--- src/test/org/apache/nutch/util/TestNodeWalker.java	(revision 1188268)
+++ src/test/org/apache/nutch/util/TestNodeWalker.java	(working copy)
@@ -24,49 +24,45 @@
 import org.w3c.dom.Node;
 import org.xml.sax.InputSource;
 
-
-
-
 /** Unit tests for NodeWalker methods. */
 public class TestNodeWalker extends TestCase {
-  public TestNodeWalker(String name) { 
-    super(name); 
+  public TestNodeWalker(String name) {
+    super(name);
   }
 
   /* a snapshot of the nutch webpage */
-  private final static String WEBPAGE= 
-  "<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"en\" xml:lang=\"en\"><head><title>Nutch</title></head>"
-  + "<body>"
-  + "<ul>"
-  + "<li>crawl several billion pages per month</li>"
-  + "<li>maintain an index of these pages</li>"
-  + "<li>search that index up to 1000 times per second</li>"
-  + "<li>provide very high quality search results</li>"
-  + "<li>operate at minimal cost</li>"
-  + "</ul>"
-  + "</body>"
-  + "</html>";
+  private final static String WEBPAGE = "<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"en\" xml:lang=\"en\"><head><title>Nutch</title></head>"
+      + "<body>"
+      + "<ul>"
+      + "<li>crawl several billion pages per month</li>"
+      + "<li>maintain an index of these pages</li>"
+      + "<li>search that index up to 1000 times per second</li>"
+      + "<li>provide very high quality search results</li>"
+      + "<li>operate at minimal cost</li>" + "</ul>" + "</body>" + "</html>";
 
   private final static String[] ULCONTENT = new String[4];
-  
-  protected void setUp() throws Exception{
-    ULCONTENT[0]="crawl several billion pages per month" ;
-    ULCONTENT[1]="maintain an index of these pages" ;
-    ULCONTENT[2]="search that index up to 1000 times per second"  ;
-    ULCONTENT[3]="operate at minimal cost" ;
+
+  protected void setUp() throws Exception {
+    ULCONTENT[0] = "crawl several billion pages per month";
+    ULCONTENT[1] = "maintain an index of these pages";
+    ULCONTENT[2] = "search that index up to 1000 times per second";
+    ULCONTENT[3] = "operate at minimal cost";
   }
 
   public void testSkipChildren() {
-    DOMParser parser= new DOMParser();
-    
+    DOMParser parser = new DOMParser();
+
     try {
       parser.setFeature("http://xml.org/sax/features/validation", false);
-      parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
-      parser.parse(new InputSource(new ByteArrayInputStream(WEBPAGE.getBytes())));
+      parser.setFeature(
+          "http://apache.org/xml/features/nonvalidating/load-external-dtd",
+          false);
+      parser
+          .parse(new InputSource(new ByteArrayInputStream(WEBPAGE.getBytes())));
     } catch (Exception e) {
       e.printStackTrace();
     }
-     
+
     StringBuffer sb = new StringBuffer();
     NodeWalker walker = new NodeWalker(parser.getDocument());
     while (walker.hasNext()) {
@@ -78,30 +74,33 @@
         sb.append(text);
       }
     }
-   assertTrue("UL Content can NOT be found in the node", findSomeUlContent(sb.toString()));
-     
-   StringBuffer sbSkip = new StringBuffer();
-   NodeWalker walkerSkip = new NodeWalker(parser.getDocument());
-   while (walkerSkip.hasNext()) {
-     Node currentNode = walkerSkip.nextNode();
-     String nodeName = currentNode.getNodeName();
-     short nodeType = currentNode.getNodeType();
-     if ("ul".equalsIgnoreCase(nodeName)) {
-       walkerSkip.skipChildren();
-     }
-     if (nodeType == Node.TEXT_NODE) {
-       String text = currentNode.getNodeValue();
-       text = text.replaceAll("\\s+", " ");
-       sbSkip.append(text);
-     }
-   }
-   assertFalse("UL Content can be found in the node", findSomeUlContent(sbSkip.toString()));
+    assertTrue("UL Content can NOT be found in the node",
+        findSomeUlContent(sb.toString()));
+
+    StringBuffer sbSkip = new StringBuffer();
+    NodeWalker walkerSkip = new NodeWalker(parser.getDocument());
+    while (walkerSkip.hasNext()) {
+      Node currentNode = walkerSkip.nextNode();
+      String nodeName = currentNode.getNodeName();
+      short nodeType = currentNode.getNodeType();
+      if ("ul".equalsIgnoreCase(nodeName)) {
+        walkerSkip.skipChildren();
+      }
+      if (nodeType == Node.TEXT_NODE) {
+        String text = currentNode.getNodeValue();
+        text = text.replaceAll("\\s+", " ");
+        sbSkip.append(text);
+      }
+    }
+    assertFalse("UL Content can be found in the node",
+        findSomeUlContent(sbSkip.toString()));
   }
-  
+
   public boolean findSomeUlContent(String str) {
-    for(int i=0; i<ULCONTENT.length ; i++){
-      if(str.contains(ULCONTENT[i])) return true;
-    }    
+    for (int i = 0; i < ULCONTENT.length; i++) {
+      if (str.contains(ULCONTENT[i]))
+        return true;
+    }
     return false;
   }
 }
Index: src/test/org/apache/nutch/indexer/TestIndexingFilters.java
===================================================================
--- src/test/org/apache/nutch/indexer/TestIndexingFilters.java	(revision 1188268)
+++ src/test/org/apache/nutch/indexer/TestIndexingFilters.java	(working copy)
@@ -27,6 +27,7 @@
 
   /**
    * Test behaviour when defined filter does not exist.
+   * 
    * @throws IndexingException
    */
   public void testNonExistingIndexingFilter() throws IndexingException {
@@ -36,13 +37,13 @@
     conf.set(IndexingFilters.INDEXINGFILTER_ORDER, class1 + " " + class2);
 
     IndexingFilters filters = new IndexingFilters(conf);
-//    filters.filter(new NutchDocument(), new ParseImpl("text", new ParseData(
-//        new ParseStatus(), "title", new Outlink[0], new Metadata())), new Text(
-//        "http://www.example.com/"), new CrawlDatum(), new Inlinks());
+    // filters.filter(new NutchDocument(), new ParseImpl("text", new ParseData(
+    // new ParseStatus(), "title", new Outlink[0], new Metadata())), new Text(
+    // "http://www.example.com/"), new CrawlDatum(), new Inlinks());
     WebPage page = new WebPage();
     page.setText(new Utf8("text"));
     page.setTitle(new Utf8("title"));
-    filters.filter(new NutchDocument(),"http://www.example.com/",page);
+    filters.filter(new NutchDocument(), "http://www.example.com/", page);
   }
 
 }
Index: src/test/org/apache/nutch/plugin/TestPluginSystem.java
===================================================================
--- src/test/org/apache/nutch/plugin/TestPluginSystem.java	(revision 1188268)
+++ src/test/org/apache/nutch/plugin/TestPluginSystem.java	(working copy)
@@ -40,262 +40,256 @@
  * @author joa23
  */
 public class TestPluginSystem extends TestCase {
-    private int fPluginCount;
+  private int fPluginCount;
 
-    private LinkedList fFolders = new LinkedList();
-    private Configuration conf ;
-    private PluginRepository repository;
+  private LinkedList fFolders = new LinkedList();
+  private Configuration conf;
+  private PluginRepository repository;
 
-    protected void setUp() throws Exception {
-        this.conf = NutchConfiguration.create();
-        conf.set("plugin.includes", ".*");
-//        String string = this.conf.get("plugin.includes", "");
-//        conf.set("plugin.includes", string + "|Dummy*");
-        fPluginCount = 5;
-        createDummyPlugins(fPluginCount);
-        this.repository = PluginRepository.get(conf);
-    }
+  protected void setUp() throws Exception {
+    this.conf = NutchConfiguration.create();
+    conf.set("plugin.includes", ".*");
+    // String string = this.conf.get("plugin.includes", "");
+    // conf.set("plugin.includes", string + "|Dummy*");
+    fPluginCount = 5;
+    createDummyPlugins(fPluginCount);
+    this.repository = PluginRepository.get(conf);
+  }
 
-    /*
-     * (non-Javadoc)
-     * 
-     * @see junit.framework.TestCase#tearDown()
-     */
-    protected void tearDown() throws Exception {
-        for (int i = 0; i < fFolders.size(); i++) {
-            File folder = (File) fFolders.get(i);
-            delete(folder);
-            folder.delete();
-        }
-
+  /*
+   * (non-Javadoc)
+   * 
+   * @see junit.framework.TestCase#tearDown()
+   */
+  protected void tearDown() throws Exception {
+    for (int i = 0; i < fFolders.size(); i++) {
+      File folder = (File) fFolders.get(i);
+      delete(folder);
+      folder.delete();
     }
 
-    /**
+  }
+
+  /**
      */
-    public void testPluginConfiguration() {
-        String string = getPluginFolder();
-        File file = new File(string);
-        if (!file.exists()) {
-            file.mkdir();
-        }
-        assertTrue(file.exists());
+  public void testPluginConfiguration() {
+    String string = getPluginFolder();
+    File file = new File(string);
+    if (!file.exists()) {
+      file.mkdir();
     }
+    assertTrue(file.exists());
+  }
 
-    /**
+  /**
      */
-    public void testLoadPlugins() {
-        PluginDescriptor[] descriptors = repository
-                .getPluginDescriptors();
-        int k = descriptors.length;
-        assertTrue(fPluginCount <= k);
-        for (int i = 0; i < descriptors.length; i++) {
-            PluginDescriptor descriptor = descriptors[i];
-            if (!descriptor.getPluginId().startsWith("getPluginFolder()")) {
-                continue;
-            }
-            assertEquals(1, descriptor.getExportedLibUrls().length);
-            assertEquals(1, descriptor.getNotExportedLibUrls().length);
-        }
+  public void testLoadPlugins() {
+    PluginDescriptor[] descriptors = repository.getPluginDescriptors();
+    int k = descriptors.length;
+    assertTrue(fPluginCount <= k);
+    for (int i = 0; i < descriptors.length; i++) {
+      PluginDescriptor descriptor = descriptors[i];
+      if (!descriptor.getPluginId().startsWith("getPluginFolder()")) {
+        continue;
+      }
+      assertEquals(1, descriptor.getExportedLibUrls().length);
+      assertEquals(1, descriptor.getNotExportedLibUrls().length);
     }
+  }
 
-    public void testRepositoryCache() {
-      Configuration config = NutchConfiguration.create();
-      PluginRepository repo = PluginRepository.get(config);
-      JobConf job = new NutchJobConf(config);
-      PluginRepository repo1 = PluginRepository.get(job);
-      assertTrue(repo == repo1);
-      // now construct a config without UUID
-      config = new Configuration();
-      config.addResource("nutch-default.xml");
-      config.addResource("nutch-site.xml");
-      repo = PluginRepository.get(config);
-      job = new NutchJobConf(config);
-      repo1 = PluginRepository.get(job);
-      assertTrue(repo1 != repo);
-    }
+  public void testRepositoryCache() {
+    Configuration config = NutchConfiguration.create();
+    PluginRepository repo = PluginRepository.get(config);
+    JobConf job = new NutchJobConf(config);
+    PluginRepository repo1 = PluginRepository.get(job);
+    assertTrue(repo == repo1);
+    // now construct a config without UUID
+    config = new Configuration();
+    config.addResource("nutch-default.xml");
+    config.addResource("nutch-site.xml");
+    repo = PluginRepository.get(config);
+    job = new NutchJobConf(config);
+    repo1 = PluginRepository.get(job);
+    assertTrue(repo1 != repo);
+  }
 
-    /**
+  /**
      *  
      */
-    public void testGetExtensionAndAttributes() {
-        String xpId = " sdsdsd";
-        ExtensionPoint extensionPoint =repository
-                .getExtensionPoint(xpId);
-        assertEquals(extensionPoint, null);
-        Extension[] extension1 = repository
-                .getExtensionPoint(getGetExtensionId()).getExtensions();
-        assertEquals(extension1.length, fPluginCount);
-        for (int i = 0; i < extension1.length; i++) {
-            Extension extension2 = extension1[i];
-            String string = extension2.getAttribute(getGetConfigElementName());
-            assertEquals(string, getParameterValue());
-        }
+  public void testGetExtensionAndAttributes() {
+    String xpId = " sdsdsd";
+    ExtensionPoint extensionPoint = repository.getExtensionPoint(xpId);
+    assertEquals(extensionPoint, null);
+    Extension[] extension1 = repository.getExtensionPoint(getGetExtensionId())
+        .getExtensions();
+    assertEquals(extension1.length, fPluginCount);
+    for (int i = 0; i < extension1.length; i++) {
+      Extension extension2 = extension1[i];
+      String string = extension2.getAttribute(getGetConfigElementName());
+      assertEquals(string, getParameterValue());
     }
+  }
 
-    /**
-     * @throws PluginRuntimeException
-     */
-    public void testGetExtensionInstances() throws PluginRuntimeException {
-        Extension[] extensions = repository
-                .getExtensionPoint(getGetExtensionId()).getExtensions();
-        assertEquals(extensions.length, fPluginCount);
-        for (int i = 0; i < extensions.length; i++) {
-            Extension extension = extensions[i];
-            Object object = extension.getExtensionInstance();
-            if (!(object instanceof HelloWorldExtension))
-                fail(" object is not a instance of HelloWorldExtension");
-            ((ITestExtension) object).testGetExtension("Bla ");
-            String string = ((ITestExtension) object).testGetExtension("Hello");
-            assertEquals("Hello World", string);
-        }
+  /**
+   * @throws PluginRuntimeException
+   */
+  public void testGetExtensionInstances() throws PluginRuntimeException {
+    Extension[] extensions = repository.getExtensionPoint(getGetExtensionId())
+        .getExtensions();
+    assertEquals(extensions.length, fPluginCount);
+    for (int i = 0; i < extensions.length; i++) {
+      Extension extension = extensions[i];
+      Object object = extension.getExtensionInstance();
+      if (!(object instanceof HelloWorldExtension))
+        fail(" object is not a instance of HelloWorldExtension");
+      ((ITestExtension) object).testGetExtension("Bla ");
+      String string = ((ITestExtension) object).testGetExtension("Hello");
+      assertEquals("Hello World", string);
     }
+  }
 
-    /**
+  /**
      * 
      *  
      */
-    public void testGetClassLoader() {
-        PluginDescriptor[] descriptors = repository
-                .getPluginDescriptors();
-        for (int i = 0; i < descriptors.length; i++) {
-            PluginDescriptor descriptor = descriptors[i];
-            assertNotNull(descriptor.getClassLoader());
-        }
+  public void testGetClassLoader() {
+    PluginDescriptor[] descriptors = repository.getPluginDescriptors();
+    for (int i = 0; i < descriptors.length; i++) {
+      PluginDescriptor descriptor = descriptors[i];
+      assertNotNull(descriptor.getClassLoader());
     }
+  }
 
-    /**
-     * @throws IOException
-     */
-    public void testGetResources() throws IOException {
-        PluginDescriptor[] descriptors = repository
-                .getPluginDescriptors();
-        for (int i = 0; i < descriptors.length; i++) {
-            PluginDescriptor descriptor = descriptors[i];
-            if (!descriptor.getPluginId().startsWith("getPluginFolder()")) {
-                continue;
-            }
-            String value = descriptor.getResourceString("key", Locale.UK);
-            assertEquals("value", value);
-            value = descriptor.getResourceString("key",
-                    Locale.TRADITIONAL_CHINESE);
-            assertEquals("value", value);
+  /**
+   * @throws IOException
+   */
+  public void testGetResources() throws IOException {
+    PluginDescriptor[] descriptors = repository.getPluginDescriptors();
+    for (int i = 0; i < descriptors.length; i++) {
+      PluginDescriptor descriptor = descriptors[i];
+      if (!descriptor.getPluginId().startsWith("getPluginFolder()")) {
+        continue;
+      }
+      String value = descriptor.getResourceString("key", Locale.UK);
+      assertEquals("value", value);
+      value = descriptor.getResourceString("key", Locale.TRADITIONAL_CHINESE);
+      assertEquals("value", value);
 
-        }
     }
+  }
 
-    /**
-     * @return a PluginFolderPath
-     */
-    private String getPluginFolder() {
-        String[] strings = conf.getStrings("plugin.folders");
-        if (strings == null || strings.length == 0)
-            fail("no plugin directory setuped..");
+  /**
+   * @return a PluginFolderPath
+   */
+  private String getPluginFolder() {
+    String[] strings = conf.getStrings("plugin.folders");
+    if (strings == null || strings.length == 0)
+      fail("no plugin directory setuped..");
 
-        String name = strings[0];
-        return new PluginManifestParser(conf, this.repository).getPluginFolder(name).toString();
-    }
+    String name = strings[0];
+    return new PluginManifestParser(conf, this.repository)
+        .getPluginFolder(name).toString();
+  }
 
-    /**
-     * Creates some Dummy Plugins
-     * 
-     * @param pCount
-     */
-    private void createDummyPlugins(int pCount) {
-        String string = getPluginFolder();
-        try {
-            File folder = new File(string);
-            folder.mkdir();
-            for (int i = 0; i < pCount; i++) {
-                String pluginFolder = string + File.separator + "DummyPlugin"
-                        + i;
-                File file = new File(pluginFolder);
-                file.mkdir();
-                fFolders.add(file);
-                createPluginManifest(i, file.getAbsolutePath());
-                createResourceFile(file.getAbsolutePath());
-            }
-        } catch (IOException e) {
-            e.printStackTrace();
-        }
+  /**
+   * Creates some Dummy Plugins
+   * 
+   * @param pCount
+   */
+  private void createDummyPlugins(int pCount) {
+    String string = getPluginFolder();
+    try {
+      File folder = new File(string);
+      folder.mkdir();
+      for (int i = 0; i < pCount; i++) {
+        String pluginFolder = string + File.separator + "DummyPlugin" + i;
+        File file = new File(pluginFolder);
+        file.mkdir();
+        fFolders.add(file);
+        createPluginManifest(i, file.getAbsolutePath());
+        createResourceFile(file.getAbsolutePath());
+      }
+    } catch (IOException e) {
+      e.printStackTrace();
     }
+  }
 
-    /**
-     * Creates an ResourceFile
-     * 
-     * @param pFolderPath
-     * @throws FileNotFoundException
-     * @throws IOException
-     */
-    private void createResourceFile(String pFolderPath)
-            throws FileNotFoundException, IOException {
-        Properties properties = new Properties();
-        properties.setProperty("key", "value");
-        properties.store(new FileOutputStream(pFolderPath + File.separator
-                + "messages" + ".properties"), "");
-    }
+  /**
+   * Creates an ResourceFile
+   * 
+   * @param pFolderPath
+   * @throws FileNotFoundException
+   * @throws IOException
+   */
+  private void createResourceFile(String pFolderPath)
+      throws FileNotFoundException, IOException {
+    Properties properties = new Properties();
+    properties.setProperty("key", "value");
+    properties.store(new FileOutputStream(pFolderPath + File.separator
+        + "messages" + ".properties"), "");
+  }
 
-    /**
-     * Deletes files in path
-     * 
-     * @param path
-     * @throws IOException
-     */
-    private void delete(File path) throws IOException {
-        File[] files = path.listFiles();
-        for (int i = 0; i < files.length; ++i) {
-            if (files[i].isDirectory())
-                delete(files[i]);
-            files[i].delete();
-        }
+  /**
+   * Deletes files in path
+   * 
+   * @param path
+   * @throws IOException
+   */
+  private void delete(File path) throws IOException {
+    File[] files = path.listFiles();
+    for (int i = 0; i < files.length; ++i) {
+      if (files[i].isDirectory())
+        delete(files[i]);
+      files[i].delete();
     }
+  }
 
-    /**
-     * Creates an Plugin Manifest File
-     * 
-     * @param i
-     * @param pFolderPath
-     * @throws IOException
-     */
-    private void createPluginManifest(int i, String pFolderPath)
-            throws IOException {
-        FileWriter out = new FileWriter(pFolderPath + File.separator
-                + "plugin.xml");
-        String xml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" 
-                + "<!--this is just a simple plugin for testing issues.-->"
-                + "<plugin id=\"org.apache.nutch.plugin."
-                + i
-                + "\" name=\""
-                + i
-                + "\" version=\"1.0\" provider-name=\"joa23\" "
-                + "class=\"org.apache.nutch.plugin.SimpleTestPlugin\">"
-                + "<extension-point id=\"aExtensioID\" "
-                + "name=\"simple Parser Extension\" "
-                + "schema=\"schema/testExtensionPoint.exsd\"/>"
-                + "<runtime><library name=\"libs/exported.jar\"><extport/></library>"
-                + "<library name=\"libs/not_exported.jar\"/></runtime>"
-                + "<extension point=\"aExtensioID\">"
-                + "<implementation name=\"simple Parser Extension\" "
-                + "id=\"aExtensionId.\" class=\"org.apache.nutch.plugin.HelloWorldExtension\">"
-                + "<parameter name=\"dummy-name\" value=\"a simple param value\"/>"
-                + "</implementation></extension></plugin>";
-        out.write(xml);
-        out.flush();
-        out.close();
-    }
+  /**
+   * Creates an Plugin Manifest File
+   * 
+   * @param i
+   * @param pFolderPath
+   * @throws IOException
+   */
+  private void createPluginManifest(int i, String pFolderPath)
+      throws IOException {
+    FileWriter out = new FileWriter(pFolderPath + File.separator + "plugin.xml");
+    String xml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
+        + "<!--this is just a simple plugin for testing issues.-->"
+        + "<plugin id=\"org.apache.nutch.plugin."
+        + i
+        + "\" name=\""
+        + i
+        + "\" version=\"1.0\" provider-name=\"joa23\" "
+        + "class=\"org.apache.nutch.plugin.SimpleTestPlugin\">"
+        + "<extension-point id=\"aExtensioID\" "
+        + "name=\"simple Parser Extension\" "
+        + "schema=\"schema/testExtensionPoint.exsd\"/>"
+        + "<runtime><library name=\"libs/exported.jar\"><extport/></library>"
+        + "<library name=\"libs/not_exported.jar\"/></runtime>"
+        + "<extension point=\"aExtensioID\">"
+        + "<implementation name=\"simple Parser Extension\" "
+        + "id=\"aExtensionId.\" class=\"org.apache.nutch.plugin.HelloWorldExtension\">"
+        + "<parameter name=\"dummy-name\" value=\"a simple param value\"/>"
+        + "</implementation></extension></plugin>";
+    out.write(xml);
+    out.flush();
+    out.close();
+  }
 
-    private String getParameterValue() {
-        return "a simple param value";
-    }
+  private String getParameterValue() {
+    return "a simple param value";
+  }
 
-    private static String getGetExtensionId() {
-        return "aExtensioID";
-    }
+  private static String getGetExtensionId() {
+    return "aExtensioID";
+  }
 
-    private static String getGetConfigElementName() {
-        return "dummy-name";
-    }
+  private static String getGetConfigElementName() {
+    return "dummy-name";
+  }
 
-    public static void main(String[] args) throws IOException {
-        new TestPluginSystem().createPluginManifest(1, "/");
-    }
+  public static void main(String[] args) throws IOException {
+    new TestPluginSystem().createPluginManifest(1, "/");
+  }
 }
Index: src/test/org/apache/nutch/plugin/ITestExtension.java
===================================================================
--- src/test/org/apache/nutch/plugin/ITestExtension.java	(revision 1188268)
+++ src/test/org/apache/nutch/plugin/ITestExtension.java	(working copy)
@@ -15,11 +15,12 @@
  * limitations under the License.
  */
 package org.apache.nutch.plugin;
+
 /**
  * A Simple Test Extension Interface.
  * 
  * @author joa23
- *
+ * 
  */
 public interface ITestExtension {
   public String testGetExtension(String hello);
Index: src/test/org/apache/nutch/plugin/HelloWorldExtension.java
===================================================================
--- src/test/org/apache/nutch/plugin/HelloWorldExtension.java	(revision 1188268)
+++ src/test/org/apache/nutch/plugin/HelloWorldExtension.java	(working copy)
@@ -24,8 +24,11 @@
  */
 public class HelloWorldExtension implements ITestExtension {
 
-  /* (non-Javadoc)
-   * @see org.apache.nutch.plugin.ITestExtension#testGetExtension(java.lang.String)
+  /*
+   * (non-Javadoc)
+   * 
+   * @see
+   * org.apache.nutch.plugin.ITestExtension#testGetExtension(java.lang.String)
    */
   public String testGetExtension(String hello) {
     return hello + " World";
Index: src/test/org/apache/nutch/plugin/SimpleTestPlugin.java
===================================================================
--- src/test/org/apache/nutch/plugin/SimpleTestPlugin.java	(revision 1188268)
+++ src/test/org/apache/nutch/plugin/SimpleTestPlugin.java	(working copy)
@@ -28,8 +28,8 @@
 public class SimpleTestPlugin extends Plugin {
 
   /**
-   * @param pDescriptor 
-   * @param conf 
+   * @param pDescriptor
+   * @param conf
    */
   public SimpleTestPlugin(PluginDescriptor pDescriptor, Configuration conf) {
 
@@ -55,4 +55,3 @@
   }
 
 }
-
Index: src/java/org/apache/nutch/fetcher/PartitionUrlByHost.java
===================================================================
--- src/java/org/apache/nutch/fetcher/PartitionUrlByHost.java	(revision 1188268)
+++ src/java/org/apache/nutch/fetcher/PartitionUrlByHost.java	(working copy)
@@ -22,17 +22,15 @@
 import org.apache.hadoop.mapreduce.Partitioner;
 import org.apache.nutch.util.TableUtil;
 
-public class PartitionUrlByHost
-extends Partitioner<IntWritable, FetchEntry> {
+public class PartitionUrlByHost extends Partitioner<IntWritable, FetchEntry> {
 
   @Override
-  public int getPartition(IntWritable key,
-      FetchEntry value, int numPartitions) {
+  public int getPartition(IntWritable key, FetchEntry value, int numPartitions) {
     String urlString = TableUtil.unreverseUrl(value.getKey());
 
     URL url = null;
 
-    int hashCode = (url==null ? urlString : url.getHost()).hashCode();
+    int hashCode = (url == null ? urlString : url.getHost()).hashCode();
 
     return (hashCode & Integer.MAX_VALUE) % numPartitions;
   }
Index: src/java/org/apache/nutch/fetcher/FetcherReducer.java
===================================================================
--- src/java/org/apache/nutch/fetcher/FetcherReducer.java	(revision 1188268)
+++ src/java/org/apache/nutch/fetcher/FetcherReducer.java	(working copy)
@@ -58,20 +58,23 @@
 import org.apache.nutch.util.URLUtil;
 import org.apache.gora.mapreduce.GoraReducer;
 
-public class FetcherReducer
-extends GoraReducer<IntWritable, FetchEntry, String, WebPage> {
+public class FetcherReducer extends
+    GoraReducer<IntWritable, FetchEntry, String, WebPage> {
 
   public static final Logger LOG = FetcherJob.LOG;
 
   private final AtomicInteger activeThreads = new AtomicInteger(0);
   private final AtomicInteger spinWaiting = new AtomicInteger(0);
 
-  private final long start = System.currentTimeMillis(); // start time of fetcher run
+  private final long start = System.currentTimeMillis(); // start time of
+                                                         // fetcher run
   private final AtomicLong lastRequestStart = new AtomicLong(start);
 
-  private final AtomicLong bytes = new AtomicLong(0);        // total bytes fetched
-  private final AtomicInteger pages = new AtomicInteger(0);  // total pages fetched
-  private final AtomicInteger errors = new AtomicInteger(0); // total pages errored
+  private final AtomicLong bytes = new AtomicLong(0); // total bytes fetched
+  private final AtomicInteger pages = new AtomicInteger(0); // total pages
+                                                            // fetched
+  private final AtomicInteger errors = new AtomicInteger(0); // total pages
+                                                             // errored
 
   private QueueFeeder feeder;
 
@@ -99,9 +102,10 @@
       this.queueID = queueID;
     }
 
-    /** Create an item. Queue id will be created based on <code>queueMode</code>
-     * argument, either as a protocol + hostname pair, protocol + IP
-     * address pair or protocol+domain pair.
+    /**
+     * Create an item. Queue id will be created based on <code>queueMode</code>
+     * argument, either as a protocol + hostname pair, protocol + IP address
+     * pair or protocol+domain pair.
      */
     public static FetchItem create(String url, WebPage page, String queueMode) {
       String queueID;
@@ -123,19 +127,18 @@
           LOG.warn("Unable to resolve: " + u.getHost() + ", skipping.");
           return null;
         }
-      }
-      else if (FetchItemQueues.QUEUE_MODE_DOMAIN.equalsIgnoreCase(queueMode)){
+      } else if (FetchItemQueues.QUEUE_MODE_DOMAIN.equalsIgnoreCase(queueMode)) {
         host = URLUtil.getDomainName(u);
         if (host == null) {
-          LOG.warn("Unknown domain for url: " + url + ", using URL string as key");
-          host=u.toExternalForm();
+          LOG.warn("Unknown domain for url: " + url
+              + ", using URL string as key");
+          host = u.toExternalForm();
         }
-      }
-      else {
+      } else {
         host = u.getHost();
         if (host == null) {
           LOG.warn("Unknown host for url: " + url + ", using URL string as key");
-          host=u.toExternalForm();
+          host = u.toExternalForm();
         }
       }
       queueID = proto + "://" + host.toLowerCase();
@@ -145,19 +148,22 @@
   }
 
   /**
-   * This class handles FetchItems which come from the same host ID (be it
-   * a proto/hostname or proto/IP pair). It also keeps track of requests in
+   * This class handles FetchItems which come from the same host ID (be it a
+   * proto/hostname or proto/IP pair). It also keeps track of requests in
    * progress and elapsed time between requests.
    */
   private static class FetchItemQueue {
-    List<FetchItem> queue = Collections.synchronizedList(new LinkedList<FetchItem>());
-    Set<FetchItem>  inProgress = Collections.synchronizedSet(new HashSet<FetchItem>());
+    List<FetchItem> queue = Collections
+        .synchronizedList(new LinkedList<FetchItem>());
+    Set<FetchItem> inProgress = Collections
+        .synchronizedSet(new HashSet<FetchItem>());
     AtomicLong nextFetchTime = new AtomicLong();
     long crawlDelay;
     long minCrawlDelay;
     int maxThreads;
 
-    public FetchItemQueue(Configuration conf, int maxThreads, long crawlDelay, long minCrawlDelay) {
+    public FetchItemQueue(Configuration conf, int maxThreads, long crawlDelay,
+        long minCrawlDelay) {
       this.maxThreads = maxThreads;
       this.crawlDelay = crawlDelay;
       this.minCrawlDelay = minCrawlDelay;
@@ -181,27 +187,34 @@
     }
 
     public void addFetchItem(FetchItem it) {
-      if (it == null) return;
+      if (it == null)
+        return;
       queue.add(it);
     }
 
     @SuppressWarnings("unused")
     public void addInProgressFetchItem(FetchItem it) {
-      if (it == null) return;
+      if (it == null)
+        return;
       inProgress.add(it);
     }
 
     public FetchItem getFetchItem() {
-      if (inProgress.size() >= maxThreads) return null;
+      if (inProgress.size() >= maxThreads)
+        return null;
       final long now = System.currentTimeMillis();
-      if (nextFetchTime.get() > now) return null;
+      if (nextFetchTime.get() > now)
+        return null;
       FetchItem it = null;
-      if (queue.size() == 0) return null;
+      if (queue.size() == 0)
+        return null;
       try {
         it = queue.remove(0);
         inProgress.add(it);
       } catch (final Exception e) {
-        LOG.error("Cannot remove FetchItem from queue or cannot add it to inProgress queue", e);
+        LOG.error(
+            "Cannot remove FetchItem from queue or cannot add it to inProgress queue",
+            e);
       }
       return it;
     }
@@ -225,11 +238,12 @@
 
     private void setEndTime(long endTime, boolean asap) {
       if (!asap)
-        nextFetchTime.set(endTime + (maxThreads > 1 ? minCrawlDelay : crawlDelay));
+        nextFetchTime.set(endTime
+            + (maxThreads > 1 ? minCrawlDelay : crawlDelay));
       else
         nextFetchTime.set(endTime);
     }
-    
+
     public synchronized int emptyQueue() {
       int presize = queue.size();
       queue.clear();
@@ -262,14 +276,17 @@
       this.maxThreads = conf.getInt("fetcher.threads.per.queue", 1);
       queueMode = conf.get("fetcher.queue.mode", QUEUE_MODE_HOST);
       // check that the mode is known
-      if (!queueMode.equals(QUEUE_MODE_IP) && !queueMode.equals(QUEUE_MODE_DOMAIN)
+      if (!queueMode.equals(QUEUE_MODE_IP)
+          && !queueMode.equals(QUEUE_MODE_DOMAIN)
           && !queueMode.equals(QUEUE_MODE_HOST)) {
-        LOG.error("Unknown partition mode : " + queueMode + " - forcing to byHost");
+        LOG.error("Unknown partition mode : " + queueMode
+            + " - forcing to byHost");
         queueMode = QUEUE_MODE_HOST;
       }
-      LOG.info("Using queue mode : "+queueMode);
+      LOG.info("Using queue mode : " + queueMode);
       this.crawlDelay = (long) (conf.getFloat("fetcher.server.delay", 1.0f) * 1000);
-      this.minCrawlDelay = (long) (conf.getFloat("fetcher.server.min.delay", 0.0f) * 1000);
+      this.minCrawlDelay = (long) (conf.getFloat("fetcher.server.min.delay",
+          0.0f) * 1000);
       this.timelimit = conf.getLong("fetcher.timelimit", -1);
     }
 
@@ -283,7 +300,8 @@
 
     public void addFetchItem(String url, WebPage page) {
       final FetchItem it = FetchItem.create(url, page, queueMode);
-      if (it != null) addFetchItem(it);
+      if (it != null)
+        addFetchItem(it);
     }
 
     public synchronized void addFetchItem(FetchItem it) {
@@ -316,8 +334,8 @@
     }
 
     public synchronized FetchItem getFetchItem() {
-      final Iterator<Map.Entry<String, FetchItemQueue>> it =
-        queues.entrySet().iterator();
+      final Iterator<Map.Entry<String, FetchItemQueue>> it = queues.entrySet()
+          .iterator();
       while (it.hasNext()) {
         final FetchItemQueue fiq = it.next().getValue();
         // reap empty queues
@@ -334,14 +352,15 @@
       }
       return null;
     }
-    
+
     public synchronized int checkTimelimit() {
       int count = 0;
       if (System.currentTimeMillis() >= timelimit && timelimit != -1) {
         // emptying the queues
         for (String id : queues.keySet()) {
           FetchItemQueue fiq = queues.get(id);
-          if (fiq.getQueueSize() == 0) continue;
+          if (fiq.getQueueSize() == 0)
+            continue;
           LOG.info("* queue: " + id + " >> timelimit! ");
           int deleted = fiq.emptyQueue();
           for (int i = 0; i < deleted; i++) {
@@ -352,16 +371,17 @@
         // there might also be a case where totalsize !=0 but number of queues
         // == 0
         // in which case we simply force it to 0 to avoid blocking
-        if (totalSize.get() != 0 && queues.size() == 0) totalSize.set(0);
+        if (totalSize.get() != 0 && queues.size() == 0)
+          totalSize.set(0);
       }
       return count;
     }
-    
 
     public synchronized void dump() {
       for (final String id : queues.keySet()) {
         final FetchItemQueue fiq = queues.get(id);
-        if (fiq.getQueueSize() == 0) continue;
+        if (fiq.getQueueSize() == 0)
+          continue;
         LOG.info("* queue: " + id);
         fiq.dump();
       }
@@ -385,8 +405,8 @@
     private final Context context;
 
     public FetcherThread(Context context, int num) {
-      this.setDaemon(true);                       // don't hang JVM on exit
-      this.setName("FetcherThread" + num);        // use an informative name
+      this.setDaemon(true); // don't hang JVM on exit
+      this.setName("FetcherThread" + num); // use an informative name
       this.context = context;
       Configuration conf = context.getConfiguration();
       this.urlFilters = new URLFilters(conf);
@@ -410,13 +430,15 @@
           if (fit == null) {
             if (feeder.isAlive() || fetchQueues.getTotalSize() > 0) {
               if (LOG.isDebugEnabled()) {
-                LOG.debug(getName() + " fetchQueues.getFetchItem() was null, spin-waiting ...");
+                LOG.debug(getName()
+                    + " fetchQueues.getFetchItem() was null, spin-waiting ...");
               }
               // spin-wait.
               spinWaiting.incrementAndGet();
               try {
                 Thread.sleep(500);
-              } catch (final Exception e) {}
+              } catch (final Exception e) {
+              }
               spinWaiting.decrementAndGet();
               continue;
             } else {
@@ -441,8 +463,10 @@
                 LOG.debug("redirectCount=" + redirectCount);
               }
               redirecting = false;
-              final Protocol protocol = this.protocolFactory.getProtocol(fit.url);
-              final RobotRules rules = protocol.getRobotRules(fit.url, fit.page);
+              final Protocol protocol = this.protocolFactory
+                  .getProtocol(fit.url);
+              final RobotRules rules = protocol
+                  .getRobotRules(fit.url, fit.page);
               if (!rules.isAllowed(fit.u)) {
                 // unblock
                 fetchQueues.finishFetchItem(fit, true);
@@ -457,38 +481,44 @@
                 if (rules.getCrawlDelay() > maxCrawlDelay) {
                   // unblock
                   fetchQueues.finishFetchItem(fit, true);
-                  LOG.debug("Crawl-Delay for " + fit.url + " too long (" + rules.getCrawlDelay() + "), skipping");
-                  output(fit, null, ProtocolStatusUtils.STATUS_ROBOTS_DENIED, CrawlStatus.STATUS_GONE);
+                  LOG.debug("Crawl-Delay for " + fit.url + " too long ("
+                      + rules.getCrawlDelay() + "), skipping");
+                  output(fit, null, ProtocolStatusUtils.STATUS_ROBOTS_DENIED,
+                      CrawlStatus.STATUS_GONE);
                   continue;
                 } else {
-                  final FetchItemQueue fiq = fetchQueues.getFetchItemQueue(fit.queueID);
+                  final FetchItemQueue fiq = fetchQueues
+                      .getFetchItemQueue(fit.queueID);
                   fiq.crawlDelay = rules.getCrawlDelay();
                 }
               }
-              final ProtocolOutput output = protocol.getProtocolOutput(fit.url, fit.page);
+              final ProtocolOutput output = protocol.getProtocolOutput(fit.url,
+                  fit.page);
               final ProtocolStatus status = output.getStatus();
               final Content content = output.getContent();
               // unblock queue
               fetchQueues.finishFetchItem(fit);
 
-              context.getCounter("FetcherStatus", ProtocolStatusUtils.getName(status.getCode())).increment(1);
+              context.getCounter("FetcherStatus",
+                  ProtocolStatusUtils.getName(status.getCode())).increment(1);
 
               int length = 0;
-              if (content!=null && content.getContent()!=null) length= content.getContent().length;
+              if (content != null && content.getContent() != null)
+                length = content.getContent().length;
               updateStatus(length);
 
-              switch(status.getCode()) {
+              switch (status.getCode()) {
 
               case ProtocolStatusCodes.WOULDBLOCK:
                 // retry ?
                 fetchQueues.addFetchItem(fit);
                 break;
 
-              case ProtocolStatusCodes.SUCCESS:        // got a page
+              case ProtocolStatusCodes.SUCCESS: // got a page
                 output(fit, content, status, CrawlStatus.STATUS_FETCHED);
                 break;
 
-              case ProtocolStatusCodes.MOVED:         // redirect
+              case ProtocolStatusCodes.MOVED: // redirect
               case ProtocolStatusCodes.TEMP_MOVED:
                 byte code;
                 boolean temp;
@@ -501,18 +531,18 @@
                 }
                 output(fit, content, status, code);
                 final String newUrl = ProtocolStatusUtils.getMessage(status);
-                handleRedirect(fit.url, newUrl, temp,  FetcherJob.PROTOCOL_REDIR);
+                handleRedirect(fit.url, newUrl, temp, FetcherJob.PROTOCOL_REDIR);
                 redirecting = false;
                 break;
               case ProtocolStatusCodes.EXCEPTION:
                 logError(fit.url, ProtocolStatusUtils.getMessage(status));
                 /* FALLTHROUGH */
-              case ProtocolStatusCodes.RETRY:          // retry
+              case ProtocolStatusCodes.RETRY: // retry
               case ProtocolStatusCodes.BLOCKED:
                 output(fit, null, status, CrawlStatus.STATUS_RETRY);
                 break;
 
-              case ProtocolStatusCodes.GONE:           // gone
+              case ProtocolStatusCodes.GONE: // gone
               case ProtocolStatusCodes.NOTFOUND:
               case ProtocolStatusCodes.ACCESS_DENIED:
               case ProtocolStatusCodes.ROBOTS_DENIED:
@@ -539,7 +569,7 @@
 
             } while (redirecting && (redirectCount <= maxRedirect));
 
-          } catch (final Throwable t) {                 // unexpected exception
+          } catch (final Throwable t) { // unexpected exception
             // unblock
             fetchQueues.finishFetchItem(fit);
             logError(fit.url, t.toString());
@@ -550,18 +580,20 @@
         }
 
       } catch (final Throwable e) {
-        LOG.error("fetcher caught:"+e.toString());
+        LOG.error("fetcher caught:" + e.toString());
         e.printStackTrace(LogUtil.getFatalStream(LOG));
       } finally {
-        if (fit != null) fetchQueues.finishFetchItem(fit);
+        if (fit != null)
+          fetchQueues.finishFetchItem(fit);
         activeThreads.decrementAndGet(); // count threads
-        LOG.info("-finishing thread " + getName() + ", activeThreads=" + activeThreads);
+        LOG.info("-finishing thread " + getName() + ", activeThreads="
+            + activeThreads);
       }
     }
 
-    private void handleRedirect(String url, String newUrl,
-        boolean temp, String redirType)
-    throws URLFilterException, IOException, InterruptedException {
+    private void handleRedirect(String url, String newUrl, boolean temp,
+        String redirType) throws URLFilterException, IOException,
+        InterruptedException {
       newUrl = normalizers.normalize(newUrl, URLNormalizers.SCOPE_FETCHER);
       newUrl = urlFilters.filter(newUrl);
       if (newUrl == null || newUrl.equals(url)) {
@@ -573,11 +605,12 @@
       if (!reprUrl.equals(url)) {
         newWebPage.setReprUrl(new Utf8(reprUrl));
       }
-      newWebPage.putToMetadata(FetcherJob.REDIRECT_DISCOVERED, TableUtil.YES_VAL);
+      newWebPage.putToMetadata(FetcherJob.REDIRECT_DISCOVERED,
+          TableUtil.YES_VAL);
       context.write(reversedNewUrl, newWebPage);
       if (LOG.isDebugEnabled()) {
-        LOG.debug(" - " + redirType + " redirect to " +
-            reprUrl + " (fetching later)");
+        LOG.debug(" - " + redirType + " redirect to " + reprUrl
+            + " (fetching later)");
       }
 
     }
@@ -587,9 +620,8 @@
       bytes.addAndGet(bytesInPage);
     }
 
-    private void output(FetchItem fit, Content content,
-        ProtocolStatus pstatus, byte status)
-    throws IOException, InterruptedException {
+    private void output(FetchItem fit, Content content, ProtocolStatus pstatus,
+        byte status) throws IOException, InterruptedException {
       fit.page.setStatus(status);
       final long prevFetchTime = fit.page.getFetchTime();
       fit.page.setPrevFetchTime(prevFetchTime);
@@ -610,7 +642,7 @@
         URLWebPage redirectedPage = parseUtil.process(key, fit.page);
         if (redirectedPage != null) {
           context.write(TableUtil.reverseUrl(redirectedPage.getUrl()),
-                        redirectedPage.getDatum());
+              redirectedPage.getDatum());
         }
       }
       context.write(key, fit.page);
@@ -623,8 +655,8 @@
   }
 
   /**
-   * This class feeds the queues with input items, and re-fills them as
-   * items are consumed by FetcherThread-s.
+   * This class feeds the queues with input items, and re-fills them as items
+   * are consumed by FetcherThread-s.
    */
   private static class QueueFeeder extends Thread {
     private final Context context;
@@ -634,9 +666,8 @@
     boolean hasMore;
     private long timelimit = -1;
 
-    public QueueFeeder(Context context,
-        FetchItemQueues queues, int size)
-    throws IOException, InterruptedException {
+    public QueueFeeder(Context context, FetchItemQueues queues, int size)
+        throws IOException, InterruptedException {
       this.context = context;
       this.queues = queues;
       this.size = size;
@@ -646,8 +677,9 @@
       if (hasMore) {
         currentIter = context.getValues().iterator();
       }
-      // the value of the time limit is either -1 or the time where it should finish
-      timelimit = context.getConfiguration().getLong("fetcher.timelimit", -1); 
+      // the value of the time limit is either -1 or the time where it should
+      // finish
+      timelimit = context.getConfiguration().getLong("fetcher.timelimit", -1);
     }
 
     @Override
@@ -674,7 +706,9 @@
             // queues are full - spin-wait until they have some free space
             try {
               Thread.sleep(1000);
-            } catch (final Exception e) {};
+            } catch (final Exception e) {
+            }
+            ;
             continue;
           }
           if (LOG.isDebugEnabled()) {
@@ -682,8 +716,7 @@
           }
           while (feed > 0 && currentIter.hasNext()) {
             FetchEntry entry = currentIter.next();
-            final String url =
-              TableUtil.unreverseUrl(entry.getKey());
+            final String url = TableUtil.unreverseUrl(entry.getKey());
             queues.addFetchItem(url, entry.getWebPage());
             feed--;
             cnt++;
@@ -700,27 +733,30 @@
         LOG.error("QueueFeeder error reading input, record " + cnt, e);
         return;
       }
-      LOG.info("QueueFeeder finished: total " + cnt + " records. Hit by time limit :"
-          + timelimitcount);
-      context.getCounter("FetcherStatus","HitByTimeLimit-QueueFeeder").increment(timelimitcount);
+      LOG.info("QueueFeeder finished: total " + cnt
+          + " records. Hit by time limit :" + timelimitcount);
+      context.getCounter("FetcherStatus", "HitByTimeLimit-QueueFeeder")
+          .increment(timelimitcount);
     }
   }
 
   private void reportStatus(Context context) throws IOException {
     StringBuffer status = new StringBuffer();
-    long elapsed = (System.currentTimeMillis() - start)/1000;
-    status.append(spinWaiting).append("/").append(activeThreads).append(" threads spinwaiting\n");
+    long elapsed = (System.currentTimeMillis() - start) / 1000;
+    status.append(spinWaiting).append("/").append(activeThreads)
+        .append(" threads spinwaiting\n");
     status.append(pages).append(" pages, ").append(errors).append(" errors, ");
-    status.append(Math.round(((float)pages.get()*10)/elapsed)/10.0).append(" pages/s, ");
-    status.append(Math.round(((((float)bytes.get())*8)/1024)/elapsed)).append(" kb/s, ");
+    status.append(Math.round(((float) pages.get() * 10) / elapsed) / 10.0)
+        .append(" pages/s, ");
+    status.append(Math.round(((((float) bytes.get()) * 8) / 1024) / elapsed))
+        .append(" kb/s, ");
     status.append(this.fetchQueues.getTotalSize()).append(" URLs in ");
     status.append(this.fetchQueues.getQueueCount()).append(" queues");
     context.setStatus(status.toString());
   }
 
   @Override
-  public void run(Context context)
-  throws IOException, InterruptedException {
+  public void run(Context context) throws IOException, InterruptedException {
     Configuration conf = context.getConfiguration();
     this.fetchQueues = new FetchItemQueues(conf);
     int threadCount = conf.getInt("fetcher.threads.fetch", 10);
@@ -733,34 +769,38 @@
     feeder = new QueueFeeder(context, fetchQueues, threadCount * 50);
     feeder.start();
 
-    for (int i = 0; i < threadCount; i++) {       // spawn threads
+    for (int i = 0; i < threadCount; i++) { // spawn threads
       FetcherThread ft = new FetcherThread(context, i);
       fetcherThreads.add(ft);
       ft.start();
     }
     // select a timeout that avoids a task timeout
-    final long timeout = conf.getInt("mapred.task.timeout", 10*60*1000)/2;
+    final long timeout = conf.getInt("mapred.task.timeout", 10 * 60 * 1000) / 2;
 
-    do {                                          // wait for threads to exit
+    do { // wait for threads to exit
       try {
         Thread.sleep(10000);
-      } catch (final InterruptedException e) {}
+      } catch (final InterruptedException e) {
+      }
 
       context.progress();
       reportStatus(context);
-      LOG.info("-activeThreads=" + activeThreads + ", spinWaiting=" + spinWaiting.get()
-          + ", fetchQueues= " + fetchQueues.getQueueCount() +", fetchQueues.totalSize=" + fetchQueues.getTotalSize());
+      LOG.info("-activeThreads=" + activeThreads + ", spinWaiting="
+          + spinWaiting.get() + ", fetchQueues= " + fetchQueues.getQueueCount()
+          + ", fetchQueues.totalSize=" + fetchQueues.getTotalSize());
 
       if (!feeder.isAlive() && fetchQueues.getTotalSize() < 5) {
         fetchQueues.dump();
       }
-      
+
       // check timelimit
       if (!feeder.isAlive()) {
         int hitByTimeLimit = fetchQueues.checkTimelimit();
-        if (hitByTimeLimit != 0) context.getCounter("FetcherStatus","HitByTimeLimit-Queues").increment(hitByTimeLimit);
+        if (hitByTimeLimit != 0)
+          context.getCounter("FetcherStatus", "HitByTimeLimit-Queues")
+              .increment(hitByTimeLimit);
       }
-      
+
       // some requests seem to hang, despite all intentions
       if ((System.currentTimeMillis() - lastRequestStart.get()) > timeout) {
         LOG.warn("Aborting with " + activeThreads + " hung threads.");
Index: src/java/org/apache/nutch/fetcher/FetcherJob.java
===================================================================
--- src/java/org/apache/nutch/fetcher/FetcherJob.java	(revision 1188268)
+++ src/java/org/apache/nutch/fetcher/FetcherJob.java	(working copy)
@@ -50,7 +50,7 @@
 
 /**
  * Multi-threaded fetcher.
- *
+ * 
  */
 public class FetcherJob extends NutchTool implements Tool {
 
@@ -76,8 +76,8 @@
    * Mapper class for Fetcher.
    * </p>
    * <p>
-   * This class reads the random integer written by {@link GeneratorJob} as its key
-   * while outputting the actual key and value arguments through a
+   * This class reads the random integer written by {@link GeneratorJob} as its
+   * key while outputting the actual key and value arguments through a
    * {@link FetchEntry} instance.
    * </p>
    * <p>
@@ -88,8 +88,8 @@
    * from other hosts as well.
    * </p>
    */
-  public static class FetcherMapper
-  extends GoraMapper<String, WebPage, IntWritable, FetchEntry> {
+  public static class FetcherMapper extends
+      GoraMapper<String, WebPage, IntWritable, FetchEntry> {
 
     private boolean shouldContinue;
 
@@ -101,7 +101,8 @@
     protected void setup(Context context) {
       Configuration conf = context.getConfiguration();
       shouldContinue = conf.getBoolean(RESUME_KEY, false);
-      batchId = new Utf8(conf.get(GeneratorJob.BATCH_ID, Nutch.ALL_BATCH_ID_STR));
+      batchId = new Utf8(
+          conf.get(GeneratorJob.BATCH_ID, Nutch.ALL_BATCH_ID_STR));
     }
 
     @Override
@@ -110,18 +111,20 @@
       Utf8 mark = Mark.GENERATE_MARK.checkMark(page);
       if (!NutchJob.shouldProcess(mark, batchId)) {
         if (LOG.isDebugEnabled()) {
-          LOG.debug("Skipping " + TableUtil.unreverseUrl(key) + "; different batch id");
+          LOG.debug("Skipping " + TableUtil.unreverseUrl(key)
+              + "; different batch id");
         }
         return;
       }
       if (shouldContinue && Mark.FETCH_MARK.checkMark(page) != null) {
         if (LOG.isDebugEnabled()) {
-          LOG.debug("Skipping " + TableUtil.unreverseUrl(key) + "; already fetched");
+          LOG.debug("Skipping " + TableUtil.unreverseUrl(key)
+              + "; already fetched");
         }
         return;
       }
-      context.write(new IntWritable(random.nextInt(65536)), new FetchEntry(context
-          .getConfiguration(), key, page));
+      context.write(new IntWritable(random.nextInt(65536)), new FetchEntry(
+          context.getConfiguration(), key, page));
     }
   }
 
@@ -141,21 +144,22 @@
       ParserJob parserJob = new ParserJob();
       fields.addAll(parserJob.getFields(job));
     }
-    ProtocolFactory protocolFactory = new ProtocolFactory(job.getConfiguration());
+    ProtocolFactory protocolFactory = new ProtocolFactory(
+        job.getConfiguration());
     fields.addAll(protocolFactory.getFields());
 
     return fields;
   }
 
   @Override
-  public Map<String,Object> run(Map<String,Object> args) throws Exception {
+  public Map<String, Object> run(Map<String, Object> args) throws Exception {
     checkConfiguration();
-    String batchId = (String)args.get(Nutch.ARG_BATCH);
-    Integer threads = (Integer)args.get(Nutch.ARG_THREADS);
-    Boolean shouldResume = (Boolean)args.get(Nutch.ARG_RESUME);
-    Boolean parse = (Boolean)args.get(Nutch.ARG_PARSE);
-    Integer numTasks = (Integer)args.get(Nutch.ARG_NUMTASKS);
- 
+    String batchId = (String) args.get(Nutch.ARG_BATCH);
+    Integer threads = (Integer) args.get(Nutch.ARG_THREADS);
+    Boolean shouldResume = (Boolean) args.get(Nutch.ARG_RESUME);
+    Boolean parse = (Boolean) args.get(Nutch.ARG_PARSE);
+    Integer numTasks = (Integer) args.get(Nutch.ARG_NUMTASKS);
+
     if (threads != null && threads > 0) {
       getConf().setInt(THREADS_KEY, threads);
     }
@@ -185,8 +189,8 @@
         FetchEntry.class, FetcherMapper.class, PartitionUrlByHost.class, false);
     StorageUtils.initReducerJob(currentJob, FetcherReducer.class);
     if (numTasks == null || numTasks < 1) {
-      currentJob.setNumReduceTasks(currentJob.getConfiguration().getInt("mapred.map.tasks",
-          currentJob.getNumReduceTasks()));
+      currentJob.setNumReduceTasks(currentJob.getConfiguration().getInt(
+          "mapred.map.tasks", currentJob.getNumReduceTasks()));
     } else {
       currentJob.setNumReduceTasks(numTasks);
     }
@@ -197,21 +201,28 @@
 
   /**
    * Run fetcher.
-   * @param batchId batchId (obtained from Generator) or null to fetch all generated fetchlists
-   * @param threads number of threads per map task
+   * 
+   * @param batchId
+   *          batchId (obtained from Generator) or null to fetch all generated
+   *          fetchlists
+   * @param threads
+   *          number of threads per map task
    * @param shouldResume
-   * @param parse if true, then parse content immediately, if false then a separate
-   * run of {@link ParserJob} will be needed.
-   * @param numTasks number of fetching tasks (reducers). If set to < 1 then use the default,
-   * which is mapred.map.tasks.
+   * @param parse
+   *          if true, then parse content immediately, if false then a separate
+   *          run of {@link ParserJob} will be needed.
+   * @param numTasks
+   *          number of fetching tasks (reducers). If set to < 1 then use the
+   *          default, which is mapred.map.tasks.
    * @return 0 on success
    * @throws Exception
    */
-  public int fetch(String batchId, int threads, boolean shouldResume, boolean parse, int numTasks)
-      throws Exception {
+  public int fetch(String batchId, int threads, boolean shouldResume,
+      boolean parse, int numTasks) throws Exception {
     LOG.info("FetcherJob: starting");
 
-    LOG.info("FetcherJob : timelimit set for : " + getConf().getLong("fetcher.timelimit", -1));
+    LOG.info("FetcherJob : timelimit set for : "
+        + getConf().getLong("fetcher.timelimit", -1));
     LOG.info("FetcherJob: threads: " + getConf().getInt(THREADS_KEY, 10));
     LOG.info("FetcherJob: parsing: " + getConf().getBoolean(PARSE_KEY, true));
     LOG.info("FetcherJob: resuming: " + getConf().getBoolean(RESUME_KEY, false));
@@ -221,11 +232,8 @@
       LOG.info("FetcherJob: batchId: " + batchId);
     }
 
-    run(ToolUtil.toArgMap(
-        Nutch.ARG_BATCH, batchId,
-        Nutch.ARG_THREADS, threads,
-        Nutch.ARG_RESUME, shouldResume,
-        Nutch.ARG_PARSE, parse,
+    run(ToolUtil.toArgMap(Nutch.ARG_BATCH, batchId, Nutch.ARG_THREADS, threads,
+        Nutch.ARG_RESUME, shouldResume, Nutch.ARG_PARSE, parse,
         Nutch.ARG_NUMTASKS, numTasks));
     LOG.info("FetcherJob: done");
     return 0;
@@ -271,14 +279,14 @@
     boolean parse = getConf().getBoolean(PARSE_KEY, false);
     String batchId;
 
-    String usage = "Usage: FetcherJob (<batchId> | -all) [-crawlId <id>] " +
-      "[-threads N] [-parse] [-resume] [-numTasks N]\n" +
-      "\tbatchId\tcrawl identifier returned by Generator, or -all for all generated batchId-s\n" +
-      "\t-crawlId <id>\t the id to prefix the schemas to operate on, (default: storage.crawl.id)\n" +
-      "\t-threads N\tnumber of fetching threads per task\n" +
-      "\t-parse\tif specified then fetcher will immediately parse fetched content\n" +
-      "\t-resume\tresume interrupted job\n" +
-      "\t-numTasks N\tif N > 0 then use this many reduce tasks for fetching (default: mapred.map.tasks)";
+    String usage = "Usage: FetcherJob (<batchId> | -all) [-crawlId <id>] "
+        + "[-threads N] [-parse] [-resume] [-numTasks N]\n"
+        + "\tbatchId\tcrawl identifier returned by Generator, or -all for all generated batchId-s\n"
+        + "\t-crawlId <id>\t the id to prefix the schemas to operate on, (default: storage.crawl.id)\n"
+        + "\t-threads N\tnumber of fetching threads per task\n"
+        + "\t-parse\tif specified then fetcher will immediately parse fetched content\n"
+        + "\t-resume\tresume interrupted job\n"
+        + "\t-numTasks N\tif N > 0 then use this many reduce tasks for fetching (default: mapred.map.tasks)";
 
     if (args.length == 0) {
       System.err.println(usage);
@@ -306,13 +314,16 @@
       }
     }
 
-    int fetchcode = fetch(batchId, threads, shouldResume, parse, numTasks); // run the Fetcher
+    int fetchcode = fetch(batchId, threads, shouldResume, parse, numTasks); // run
+                                                                            // the
+                                                                            // Fetcher
 
     return fetchcode;
   }
 
   public static void main(String[] args) throws Exception {
-    int res = ToolRunner.run(NutchConfiguration.create(), new FetcherJob(), args);
+    int res = ToolRunner.run(NutchConfiguration.create(), new FetcherJob(),
+        args);
     System.exit(res);
   }
 }
Index: src/java/org/apache/nutch/metadata/Metadata.java
===================================================================
--- src/java/org/apache/nutch/metadata/Metadata.java	(revision 1188268)
+++ src/java/org/apache/nutch/metadata/Metadata.java	(working copy)
@@ -27,23 +27,21 @@
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Writable;
 
-
 /**
  * A multi-valued metadata container.
- *
+ * 
  * @author Chris Mattmann
  * @author J&eacute;r&ocirc;me Charron
- *
+ * 
  */
-public class Metadata implements Writable, CreativeCommons,
-DublinCore, HttpHeaders, Nutch, Office, Feed {
+public class Metadata implements Writable, CreativeCommons, DublinCore,
+    HttpHeaders, Nutch, Office, Feed {
 
   /**
    * A map of all metadata attributes.
    */
   private Map<String, String[]> metadata = null;
 
-
   /**
    * Constructs a new, empty metadata.
    */
@@ -53,9 +51,10 @@
 
   /**
    * Returns true if named value is multivalued.
-   * @param name name of metadata
-   * @return true is named value is multivalued, false if single
-   * value or null
+   * 
+   * @param name
+   *          name of metadata
+   * @return true is named value is multivalued, false if single value or null
    */
   public boolean isMultiValued(final String name) {
     return metadata.get(name) != null && metadata.get(name).length > 1;
@@ -63,6 +62,7 @@
 
   /**
    * Returns an array of the names contained in the metadata.
+   * 
    * @return Metadata names
    */
   public String[] names() {
@@ -70,11 +70,11 @@
   }
 
   /**
-   * Get the value associated to a metadata name.
-   * If many values are assiociated to the specified name, then the first
-   * one is returned.
-   *
-   * @param name of the metadata.
+   * Get the value associated to a metadata name. If many values are assiociated
+   * to the specified name, then the first one is returned.
+   * 
+   * @param name
+   *          of the metadata.
    * @return the value associated to the specified metadata name.
    */
   public String get(final String name) {
@@ -88,13 +88,15 @@
 
   /**
    * Get the values associated to a metadata name.
-   * @param name of the metadata.
+   * 
+   * @param name
+   *          of the metadata.
    * @return the values associated to a metadata name.
    */
   public String[] getValues(final String name) {
     return _getValues(name);
   }
-  
+
   private String[] _getValues(final String name) {
     String[] values = metadata.get(name);
     if (values == null) {
@@ -104,12 +106,13 @@
   }
 
   /**
-   * Add a metadata name/value mapping.
-   * Add the specified value to the list of values associated to the
-   * specified metadata name.
-   *
-   * @param name the metadata name.
-   * @param value the metadata value.
+   * Add a metadata name/value mapping. Add the specified value to the list of
+   * values associated to the specified metadata name.
+   * 
+   * @param name
+   *          the metadata name.
+   * @param value
+   *          the metadata value.
    */
   public void add(final String name, final String value) {
     String[] values = metadata.get(name);
@@ -125,31 +128,37 @@
 
   /**
    * Copy All key-value pairs from properties.
-   * @param properties properties to copy from
+   * 
+   * @param properties
+   *          properties to copy from
    */
   public void setAll(Properties properties) {
     Enumeration names = properties.propertyNames();
     while (names.hasMoreElements()) {
       String name = (String) names.nextElement();
-      metadata.put(name, new String[]{properties.getProperty(name)});
+      metadata.put(name, new String[] { properties.getProperty(name) });
     }
   }
 
   /**
-   * Set metadata name/value.
-   * Associate the specified value to the specified metadata name. If some
-   * previous values were associated to this name, they are removed.
-   *
-   * @param name the metadata name.
-   * @param value the metadata value.
+   * Set metadata name/value. Associate the specified value to the specified
+   * metadata name. If some previous values were associated to this name, they
+   * are removed.
+   * 
+   * @param name
+   *          the metadata name.
+   * @param value
+   *          the metadata value.
    */
   public void set(String name, String value) {
-    metadata.put(name, new String[]{value});
+    metadata.put(name, new String[] { value });
   }
 
   /**
    * Remove a metadata and all its associated values.
-   * @param name metadata name to remove
+   * 
+   * @param name
+   *          metadata name to remove
    */
   public void remove(String name) {
     metadata.remove(name);
@@ -157,12 +166,13 @@
 
   /**
    * Returns the number of metadata names in this metadata.
+   * 
    * @return number of metadata names
    */
   public int size() {
     return metadata.size();
   }
-  
+
   /** Remove all mappings from metadata. */
   public void clear() {
     metadata.clear();
@@ -170,7 +180,9 @@
 
   public boolean equals(Object o) {
 
-    if (o == null) { return false; }
+    if (o == null) {
+      return false;
+    }
 
     Metadata other = null;
     try {
@@ -179,7 +191,9 @@
       return false;
     }
 
-    if (other.size() != size()) { return false; }
+    if (other.size() != size()) {
+      return false;
+    }
 
     String[] names = names();
     for (int i = 0; i < names.length; i++) {
@@ -203,10 +217,7 @@
     for (int i = 0; i < names.length; i++) {
       String[] values = _getValues(names[i]);
       for (int j = 0; j < values.length; j++) {
-        buf.append(names[i])
-           .append("=")
-           .append(values[j])
-           .append(" ");
+        buf.append(names[i]).append("=").append(values[j]).append(" ");
       }
     }
     return buf.toString();
Index: src/java/org/apache/nutch/metadata/Nutch.java
===================================================================
--- src/java/org/apache/nutch/metadata/Nutch.java	(revision 1188268)
+++ src/java/org/apache/nutch/metadata/Nutch.java	(working copy)
@@ -19,20 +19,17 @@
 import org.apache.avro.util.Utf8;
 import org.apache.hadoop.io.Text;
 
-
 /**
  * A collection of Nutch internal metadata constants.
- *
+ * 
  * @author Chris Mattmann
  * @author J&eacute;r&ocirc;me Charron
  */
 public interface Nutch {
 
-  public static final String ORIGINAL_CHAR_ENCODING =
-          "OriginalCharEncoding";
+  public static final String ORIGINAL_CHAR_ENCODING = "OriginalCharEncoding";
 
-  public static final String CHAR_ENCODING_FOR_CONVERSION =
-          "CharEncodingForConversion";
+  public static final String CHAR_ENCODING_FOR_CONVERSION = "CharEncodingForConversion";
 
   public static final String SIGNATURE_KEY = "nutch.content.digest";
 
@@ -42,20 +39,26 @@
 
   public static final String GENERATE_TIME_KEY = "_ngt_";
 
-  public static final Text WRITABLE_GENERATE_TIME_KEY = new Text(GENERATE_TIME_KEY);
+  public static final Text WRITABLE_GENERATE_TIME_KEY = new Text(
+      GENERATE_TIME_KEY);
 
   public static final String PROTO_STATUS_KEY = "_pst_";
 
-  public static final Text WRITABLE_PROTO_STATUS_KEY = new Text(PROTO_STATUS_KEY);
+  public static final Text WRITABLE_PROTO_STATUS_KEY = new Text(
+      PROTO_STATUS_KEY);
 
   public static final String FETCH_TIME_KEY = "_ftk_";
 
   public static final String FETCH_STATUS_KEY = "_fst_";
 
-  /** Sites may request that search engines don't provide access to cached documents. */
+  /**
+   * Sites may request that search engines don't provide access to cached
+   * documents.
+   */
   public static final String CACHING_FORBIDDEN_KEY = "caching.forbidden";
 
-  public static final Utf8 CACHING_FORBIDDEN_KEY_UTF8 = new Utf8(CACHING_FORBIDDEN_KEY);
+  public static final Utf8 CACHING_FORBIDDEN_KEY_UTF8 = new Utf8(
+      CACHING_FORBIDDEN_KEY);
 
   /** Show both original forbidden content and summaries (default). */
   public static final String CACHING_FORBIDDEN_NONE = "none";
@@ -75,8 +78,7 @@
   public static final Utf8 ALL_CRAWL_ID = new Utf8(ALL_BATCH_ID_STR);
 
   public static final String CRAWL_ID_KEY = "storage.crawl.id";
-  
-  
+
   // short constants for cmd-line args
   /** Batch id to select. */
   public static final String ARG_BATCH = "batch";
@@ -112,7 +114,7 @@
   public static final String ARG_CLASS = "class";
   /** Depth (number of cycles) of a crawl. */
   public static final String ARG_DEPTH = "depth";
-  
+
   // short constants for status / results fields
   /** Status / result message. */
   public static final String STAT_MESSAGE = "msg";
Index: src/java/org/apache/nutch/metadata/DublinCore.java
===================================================================
--- src/java/org/apache/nutch/metadata/DublinCore.java	(revision 1188268)
+++ src/java/org/apache/nutch/metadata/DublinCore.java	(working copy)
@@ -16,149 +16,146 @@
  */
 package org.apache.nutch.metadata;
 
-
 /**
  * A collection of Dublin Core metadata names.
- *
- * @see <a href="http://dublincore.org">dublincore.org</a> 
- *
+ * 
+ * @see <a href="http://dublincore.org">dublincore.org</a>
+ * 
  * @author Chris Mattmann
  * @author J&eacute;r&ocirc;me Charron
  */
 public interface DublinCore {
-  
-    
+
   /**
-   * Typically, Format may include the media-type or dimensions of the
-   * resource. Format may be used to determine the software, hardware or other
-   * equipment needed to display or operate the resource. Examples of
-   * dimensions include size and duration. Recommended best practice is to
-   * select a value from a controlled vocabulary (for example, the list of
-   * Internet Media Types [MIME] defining computer media formats).
+   * Typically, Format may include the media-type or dimensions of the resource.
+   * Format may be used to determine the software, hardware or other equipment
+   * needed to display or operate the resource. Examples of dimensions include
+   * size and duration. Recommended best practice is to select a value from a
+   * controlled vocabulary (for example, the list of Internet Media Types [MIME]
+   * defining computer media formats).
    */
   public static final String FORMAT = "format";
-  
+
   /**
-   * Recommended best practice is to identify the resource by means of a
-   * string or number conforming to a formal identification system. Example
-   * formal identification systems include the Uniform Resource Identifier
-   * (URI) (including the Uniform Resource Locator (URL)), the Digital Object
+   * Recommended best practice is to identify the resource by means of a string
+   * or number conforming to a formal identification system. Example formal
+   * identification systems include the Uniform Resource Identifier (URI)
+   * (including the Uniform Resource Locator (URL)), the Digital Object
    * Identifier (DOI) and the International Standard Book Number (ISBN).
    */
   public static final String IDENTIFIER = "identifier";
-  
+
   /**
    * Date on which the resource was changed.
    */
   public static final String MODIFIED = "modified";
-  
+
   /**
    * An entity responsible for making contributions to the content of the
-   * resource. Examples of a Contributor include a person, an organisation, or
-   * a service. Typically, the name of a Contributor should be used to
-   * indicate the entity.
+   * resource. Examples of a Contributor include a person, an organisation, or a
+   * service. Typically, the name of a Contributor should be used to indicate
+   * the entity.
    */
   public static final String CONTRIBUTOR = "contributor";
-  
+
   /**
-   * The extent or scope of the content of the resource. Coverage will
-   * typically include spatial location (a place name or geographic
-   * coordinates), temporal period (a period label, date, or date range) or
-   * jurisdiction (such as a named administrative entity). Recommended best
-   * practice is to select a value from a controlled vocabulary (for example,
-   * the Thesaurus of Geographic Names [TGN]) and that, where appropriate,
-   * named places or time periods be used in preference to numeric identifiers
-   * such as sets of coordinates or date ranges.
+   * The extent or scope of the content of the resource. Coverage will typically
+   * include spatial location (a place name or geographic coordinates), temporal
+   * period (a period label, date, or date range) or jurisdiction (such as a
+   * named administrative entity). Recommended best practice is to select a
+   * value from a controlled vocabulary (for example, the Thesaurus of
+   * Geographic Names [TGN]) and that, where appropriate, named places or time
+   * periods be used in preference to numeric identifiers such as sets of
+   * coordinates or date ranges.
    */
   public static final String COVERAGE = "coverage";
-  
+
   /**
    * An entity primarily responsible for making the content of the resource.
    * Examples of a Creator include a person, an organisation, or a service.
    * Typically, the name of a Creator should be used to indicate the entity.
    */
   public static final String CREATOR = "creator";
-  
+
   /**
    * A date associated with an event in the life cycle of the resource.
-   * Typically, Date will be associated with the creation or availability of
-   * the resource. Recommended best practice for encoding the date value is
-   * defined in a profile of ISO 8601 [W3CDTF] and follows the YYYY-MM-DD
-   * format.
+   * Typically, Date will be associated with the creation or availability of the
+   * resource. Recommended best practice for encoding the date value is defined
+   * in a profile of ISO 8601 [W3CDTF] and follows the YYYY-MM-DD format.
    */
   public static final String DATE = "date";
-  
+
   /**
    * An account of the content of the resource. Description may include but is
    * not limited to: an abstract, table of contents, reference to a graphical
    * representation of content or a free-text account of the content.
    */
   public static final String DESCRIPTION = "description";
-  
+
   /**
    * A language of the intellectual content of the resource. Recommended best
    * practice is to use RFC 3066 [RFC3066], which, in conjunction with ISO 639
-   * [ISO639], defines two- and three-letter primary language tags with
-   * optional subtags. Examples include "en" or "eng" for English, "akk" for
-   * Akkadian, and "en-GB" for English used in the United Kingdom.
+   * [ISO639], defines two- and three-letter primary language tags with optional
+   * subtags. Examples include "en" or "eng" for English, "akk" for Akkadian,
+   * and "en-GB" for English used in the United Kingdom.
    */
   public static final String LANGUAGE = "language";
-  
+
   /**
    * An entity responsible for making the resource available. Examples of a
    * Publisher include a person, an organisation, or a service. Typically, the
    * name of a Publisher should be used to indicate the entity.
    */
   public static final String PUBLISHER = "publisher";
-  
+
   /**
    * A reference to a related resource. Recommended best practice is to
    * reference the resource by means of a string or number conforming to a
    * formal identification system.
    */
   public static final String RELATION = "relation";
-  
+
   /**
-   * Information about rights held in and over the resource. Typically, a
-   * Rights element will contain a rights management statement for the
-   * resource, or reference a service providing such information. Rights
-   * information often encompasses Intellectual Property Rights (IPR),
-   * Copyright, and various Property Rights. If the Rights element is absent,
-   * no assumptions can be made about the status of these and other rights
-   * with respect to the resource.
+   * Information about rights held in and over the resource. Typically, a Rights
+   * element will contain a rights management statement for the resource, or
+   * reference a service providing such information. Rights information often
+   * encompasses Intellectual Property Rights (IPR), Copyright, and various
+   * Property Rights. If the Rights element is absent, no assumptions can be
+   * made about the status of these and other rights with respect to the
+   * resource.
    */
   public static final String RIGHTS = "rights";
-  
+
   /**
    * A reference to a resource from which the present resource is derived. The
    * present resource may be derived from the Source resource in whole or in
-   * part. Recommended best practice is to reference the resource by means of
-   * a string or number conforming to a formal identification system.
+   * part. Recommended best practice is to reference the resource by means of a
+   * string or number conforming to a formal identification system.
    */
   public static final String SOURCE = "source";
-  
+
   /**
    * The topic of the content of the resource. Typically, a Subject will be
-   * expressed as keywords, key phrases or classification codes that describe
-   * a topic of the resource. Recommended best practice is to select a value
-   * from a controlled vocabulary or formal classification scheme.
+   * expressed as keywords, key phrases or classification codes that describe a
+   * topic of the resource. Recommended best practice is to select a value from
+   * a controlled vocabulary or formal classification scheme.
    */
   public static final String SUBJECT = "subject";
-  
+
   /**
    * A name given to the resource. Typically, a Title will be a name by which
    * the resource is formally known.
    */
   public static final String TITLE = "title";
-  
+
   /**
    * The nature or genre of the content of the resource. Type includes terms
-   * describing general categories, functions, genres, or aggregation levels
-   * for content. Recommended best practice is to select a value from a
-   * controlled vocabulary (for example, the DCMI Type Vocabulary [DCMITYPE]).
-   * To describe the physical or digital manifestation of the resource, use
-   * the Format element.
+   * describing general categories, functions, genres, or aggregation levels for
+   * content. Recommended best practice is to select a value from a controlled
+   * vocabulary (for example, the DCMI Type Vocabulary [DCMITYPE]). To describe
+   * the physical or digital manifestation of the resource, use the Format
+   * element.
    */
   public static final String TYPE = "type";
-  
+
 }
Index: src/java/org/apache/nutch/metadata/MetaWrapper.java
===================================================================
--- src/java/org/apache/nutch/metadata/MetaWrapper.java	(revision 1188268)
+++ src/java/org/apache/nutch/metadata/MetaWrapper.java	(working copy)
@@ -28,28 +28,29 @@
 /**
  * This is a simple decorator that adds metadata to any Writable-s that can be
  * serialized by <tt>NutchWritable</tt>. This is useful when data needs to be
- * temporarily enriched during processing, but this
- * temporary metadata doesn't need to be permanently stored after the job is done.
+ * temporarily enriched during processing, but this temporary metadata doesn't
+ * need to be permanently stored after the job is done.
  * 
  * @author Andrzej Bialecki
  */
 public class MetaWrapper extends NutchWritable {
   private Metadata metadata;
-  
+
   public MetaWrapper() {
     super();
     metadata = new Metadata();
   }
-  
+
   public MetaWrapper(Writable instance, Configuration conf) {
     super(instance);
     metadata = new Metadata();
     setConf(conf);
   }
-  
+
   public MetaWrapper(Metadata metadata, Writable instance, Configuration conf) {
     super(instance);
-    if (metadata == null) metadata = new Metadata();
+    if (metadata == null)
+      metadata = new Metadata();
     this.metadata = metadata;
     setConf(conf);
   }
@@ -60,43 +61,52 @@
   public Metadata getMetadata() {
     return metadata;
   }
-  
+
   /**
-   * Add metadata. See {@link Metadata#add(String, String)} for more information.
-   * @param name metadata name
-   * @param value metadata value
+   * Add metadata. See {@link Metadata#add(String, String)} for more
+   * information.
+   * 
+   * @param name
+   *          metadata name
+   * @param value
+   *          metadata value
    */
   public void addMeta(String name, String value) {
     metadata.add(name, value);
   }
-  
+
   /**
-   * Set metadata. See {@link Metadata#set(String, String)} for more information.
+   * Set metadata. See {@link Metadata#set(String, String)} for more
+   * information.
+   * 
    * @param name
    * @param value
    */
   public void setMeta(String name, String value) {
     metadata.set(name, value);
   }
-  
+
   /**
    * Get metadata. See {@link Metadata#get(String)} for more information.
+   * 
    * @param name
    * @return metadata value
    */
   public String getMeta(String name) {
     return metadata.get(name);
   }
-  
+
   /**
-   * Get multiple metadata. See {@link Metadata#getValues(String)} for more information.
+   * Get multiple metadata. See {@link Metadata#getValues(String)} for more
+   * information.
+   * 
    * @param name
    * @return multiple values
    */
   public String[] getMetaValues(String name) {
     return metadata.getValues(name);
   }
-  
+
   public void readFields(DataInput in) throws IOException {
     super.readFields(in);
     metadata = new Metadata();
Index: src/java/org/apache/nutch/metadata/SpellCheckedMetadata.java
===================================================================
--- src/java/org/apache/nutch/metadata/SpellCheckedMetadata.java	(revision 1188268)
+++ src/java/org/apache/nutch/metadata/SpellCheckedMetadata.java	(working copy)
@@ -33,7 +33,7 @@
 
   /**
    * Treshold divider.
-   *
+   * 
    * <code>threshold = searched.length() / TRESHOLD_DIVIDER;</code>
    */
   private static final int TRESHOLD_DIVIDER = 3;
@@ -52,7 +52,7 @@
 
     // Uses following array to fill the metanames index and the
     // metanames list.
-    Class[] spellthese = {HttpHeaders.class};
+    Class[] spellthese = { HttpHeaders.class };
 
     for (Class spellCheckedNames : spellthese) {
       for (Field field : spellCheckedNames.getFields()) {
@@ -73,7 +73,7 @@
 
   /**
    * Normalizes String.
-   *
+   * 
    * @param str
    *          the string to normalize
    * @return normalized String
@@ -102,7 +102,7 @@
    * </ul>
    * If no matching with a well-known metadata name is found, then the original
    * name is returned.
-   *
+   * 
    * @param name
    *          Name to normalize
    * @return normalized name
Index: src/java/org/apache/nutch/metadata/HttpHeaders.java
===================================================================
--- src/java/org/apache/nutch/metadata/HttpHeaders.java	(revision 1188268)
+++ src/java/org/apache/nutch/metadata/HttpHeaders.java	(working copy)
@@ -16,14 +16,12 @@
  */
 package org.apache.nutch.metadata;
 
-
-
 /**
  * A collection of HTTP header names.
- *
- * @see <a href="http://rfc-ref.org/RFC-TEXTS/2616/">Hypertext Transfer
- *      Protocol -- HTTP/1.1 (RFC 2616)</a>
- *
+ * 
+ * @see <a href="http://rfc-ref.org/RFC-TEXTS/2616/">Hypertext Transfer Protocol
+ *      -- HTTP/1.1 (RFC 2616)</a>
+ * 
  * @author Chris Mattmann
  * @author J&eacute;r&ocirc;me Charron
  */
Index: src/java/org/apache/nutch/metadata/Office.java
===================================================================
--- src/java/org/apache/nutch/metadata/Office.java	(revision 1188268)
+++ src/java/org/apache/nutch/metadata/Office.java	(working copy)
@@ -16,37 +16,36 @@
  */
 package org.apache.nutch.metadata;
 
-
 /**
  * A collection of <i>"Office"</i> documents properties names.
- *
+ * 
  * @author Chris Mattmann
  * @author J&eacute;r&ocirc;me Charron
  */
 public interface Office {
-    
+
   public static final String KEYWORDS = "Keywords";
-  
+
   public static final String COMMENTS = "Comments";
-  
+
   public static final String LAST_AUTHOR = "Last-Author";
-  
+
   public static final String APPLICATION_NAME = "Application-Name";
-  
+
   public static final String CHARACTER_COUNT = "Character Count";
-  
+
   public static final String LAST_PRINTED = "Last-Printed";
-  
+
   public static final String LAST_SAVED = "Last-Save-Date";
-  
+
   public static final String PAGE_COUNT = "Page-Count";
-  
+
   public static final String REVISION_NUMBER = "Revision-Number";
-  
+
   public static final String WORD_COUNT = "Word-Count";
-  
+
   public static final String TEMPLATE = "Template";
-  
+
   public static final String AUTHOR = "Author";
-  
+
 }
Index: src/java/org/apache/nutch/metadata/CreativeCommons.java
===================================================================
--- src/java/org/apache/nutch/metadata/CreativeCommons.java	(revision 1188268)
+++ src/java/org/apache/nutch/metadata/CreativeCommons.java	(working copy)
@@ -16,21 +16,20 @@
  */
 package org.apache.nutch.metadata;
 
-
 /**
  * A collection of Creative Commons properties names.
- *
+ * 
  * @see <a href="http://www.creativecommons.org/">creativecommons.org</a>
- *
+ * 
  * @author Chris Mattmann
  * @author J&eacute;r&ocirc;me Charron
  */
 public interface CreativeCommons {
-  
+
   public final static String LICENSE_URL = "License-Url";
-  
+
   public final static String LICENSE_LOCATION = "License-Location";
-  
+
   public final static String WORK_TYPE = "Work-Type";
-  
+
 }
Index: src/java/org/apache/nutch/tools/proxy/FakeHandler.java
===================================================================
--- src/java/org/apache/nutch/tools/proxy/FakeHandler.java	(revision 1188268)
+++ src/java/org/apache/nutch/tools/proxy/FakeHandler.java	(working copy)
@@ -15,6 +15,7 @@
  * limitations under the License.
  ******************************************************************************/
 package org.apache.nutch.tools.proxy;
+
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -44,9 +45,14 @@
 import org.mortbay.jetty.Request;
 
 public class FakeHandler extends AbstractTestbedHandler {
-  /** Create links to hosts generated from a pool of numHosts/numPages random names. */
-  public static enum Mode {UNIQUE, RANDOM};
-    
+  /**
+   * Create links to hosts generated from a pool of numHosts/numPages random
+   * names.
+   */
+  public static enum Mode {
+    UNIQUE, RANDOM
+  };
+
   int numInternalLinks;
   int numExternalLinks;
   Mode hostMode;
@@ -55,34 +61,36 @@
   AtomicLong pageSeq = new AtomicLong(0);
   int numHosts;
   int numPages;
-  
+
   Random r = new Random(1234567890L); // predictable
   Random pageR;
 
-  private static final String testA = 
-    "<html><body><h1>Internet Weather Forecast Accuracy</h1>\n" + 
-    "<p>Weather forecasting is a secure and popular online presence, which is understandable. The weather affects most everyone's life, and the Internet can provide information on just about any location at any hour of the day or night. But how accurate is this information? How much can we trust it? Perhaps it is just my skeptical nature (or maybe the seeming unpredictability of nature), but I've never put much weight into weather forecasts - especially those made more than three days in advance. That skepticism progressed to a new high in the Summer of 2004, but I have only now done the research necessary to test the accuracy of online weather forecasts. First the story, then the data.</p>" +
-    "<h2>An Internet Weather Forecast Gone Terribly Awry</h2>" +
-    "<p>It was the Summer of 2004 and my wife and I were gearing up for a trip with another couple to Schlitterbahn in New Braunfels - one of the (if not the) best waterparks ever created. As a matter of course when embarking on a 2.5-hour drive to spend the day in a swimsuit, and given the tendency of the area for natural disasters, we checked the weather. The temperatures looked ideal and, most importantly, the chance of rain was a nice round goose egg.</p>";
-  private static final String testB =
-    "<p>A couple of hours into our Schlitterbahn experience, we got on a bus to leave the 'old section' for the 'new section.' Along the way, clouds gathered and multiple claps of thunder sounded. 'So much for the 0% chance of rain,' I commented. By the time we got to our destination, lightning sightings had led to the slides and pools being evacuated and soon the rain began coming down in torrents - accompanied by voluminous lightning flashes. After at least a half an hour the downpour had subsided, but the lightning showed no sign of letting up, so we began heading back to our vehicles. A hundred yards into the parking lot, we passing a tree that had apparently been split in two during the storm (whether by lightning or wind, I'm not sure). Not but a few yards later, there was a distinct thud and the husband of the couple accompanying us cried out as a near racquetball sized hunk of ice rebounded off of his head and onto the concrete. Soon, similarly sized hail was falling all around us as everyone scampered for cover. Some cowered under overturned trashcans while others were more fortunate and made it indoors.</p>" +
-    "<p>The hail, rain and lightning eventually subsided, but the most alarming news was waiting on cell phone voicemail. A friend who lived in the area had called frantically, knowing we were at the park, as the local news was reporting multiple people had been by struck by lightning at Schlitterbahn during the storm.</p>" +
-    "<p>'So much for the 0% chance of rain,' I repeated.</p></body></html>";
+  private static final String testA = "<html><body><h1>Internet Weather Forecast Accuracy</h1>\n"
+      + "<p>Weather forecasting is a secure and popular online presence, which is understandable. The weather affects most everyone's life, and the Internet can provide information on just about any location at any hour of the day or night. But how accurate is this information? How much can we trust it? Perhaps it is just my skeptical nature (or maybe the seeming unpredictability of nature), but I've never put much weight into weather forecasts - especially those made more than three days in advance. That skepticism progressed to a new high in the Summer of 2004, but I have only now done the research necessary to test the accuracy of online weather forecasts. First the story, then the data.</p>"
+      + "<h2>An Internet Weather Forecast Gone Terribly Awry</h2>"
+      + "<p>It was the Summer of 2004 and my wife and I were gearing up for a trip with another couple to Schlitterbahn in New Braunfels - one of the (if not the) best waterparks ever created. As a matter of course when embarking on a 2.5-hour drive to spend the day in a swimsuit, and given the tendency of the area for natural disasters, we checked the weather. The temperatures looked ideal and, most importantly, the chance of rain was a nice round goose egg.</p>";
+  private static final String testB = "<p>A couple of hours into our Schlitterbahn experience, we got on a bus to leave the 'old section' for the 'new section.' Along the way, clouds gathered and multiple claps of thunder sounded. 'So much for the 0% chance of rain,' I commented. By the time we got to our destination, lightning sightings had led to the slides and pools being evacuated and soon the rain began coming down in torrents - accompanied by voluminous lightning flashes. After at least a half an hour the downpour had subsided, but the lightning showed no sign of letting up, so we began heading back to our vehicles. A hundred yards into the parking lot, we passing a tree that had apparently been split in two during the storm (whether by lightning or wind, I'm not sure). Not but a few yards later, there was a distinct thud and the husband of the couple accompanying us cried out as a near racquetball sized hunk of ice rebounded off of his head and onto the concrete. Soon, similarly sized hail was falling all around us as everyone scampered for cover. Some cowered under overturned trashcans while others were more fortunate and made it indoors.</p>"
+      + "<p>The hail, rain and lightning eventually subsided, but the most alarming news was waiting on cell phone voicemail. A friend who lived in the area had called frantically, knowing we were at the park, as the local news was reporting multiple people had been by struck by lightning at Schlitterbahn during the storm.</p>"
+      + "<p>'So much for the 0% chance of rain,' I repeated.</p></body></html>";
 
   /**
    * Create fake pages.
-   * @param hostMode if UNIQUE then each external outlink will use a unique host name. If
-   * RANDOM then each outlink will use a host name allocated from pool of numHosts.
-   * @param pageMode if UNIQUE then each internal outlinks will use a unique page name.
-   * if RANDOM then each outlink will use a page name allocated from pool of numPages.
+   * 
+   * @param hostMode
+   *          if UNIQUE then each external outlink will use a unique host name.
+   *          If RANDOM then each outlink will use a host name allocated from
+   *          pool of numHosts.
+   * @param pageMode
+   *          if UNIQUE then each internal outlinks will use a unique page name.
+   *          if RANDOM then each outlink will use a page name allocated from
+   *          pool of numPages.
    * @param numInternalLinks
    * @param numExternalLinks
    * @param numHosts
    * @param numPages
    */
-  public FakeHandler(Mode hostMode, Mode pageMode,
-      int numInternalLinks, int numExternalLinks,
-      int numHosts, int numPages) {
+  public FakeHandler(Mode hostMode, Mode pageMode, int numInternalLinks,
+      int numExternalLinks, int numHosts, int numPages) {
     this.numExternalLinks = numExternalLinks;
     this.numInternalLinks = numInternalLinks;
     this.numHosts = numHosts;
@@ -90,10 +98,10 @@
     this.hostMode = hostMode;
     this.pageMode = pageMode;
   }
-  
+
   @Override
-  public void handle(Request req, HttpServletResponse res, String target, 
-          int dispatch) throws IOException, ServletException {
+  public void handle(Request req, HttpServletResponse res, String target,
+      int dispatch) throws IOException, ServletException {
     HttpURI u = req.getUri();
     String uri = u.toString();
     addMyHeader(res, "URI", uri);
@@ -126,7 +134,7 @@
       for (int i = 0; i < numInternalLinks; i++) {
         String link = "<p><a href='";
         if (pageMode.equals(Mode.RANDOM)) {
-          link += pageR.nextInt (numPages) + ".html'>";
+          link += pageR.nextInt(numPages) + ".html'>";
         } else {
           if (!basePath.endsWith("/")) {
             link += "/";
@@ -157,13 +165,14 @@
       }
       // fake a link to the root URL
       link = "<p><a href='" + u.getScheme() + "://" + u.getHost();
-      if (u.getPort() != 80 && u.getPort() != -1) link += ":" + u.getPort();
+      if (u.getPort() != 80 && u.getPort() != -1)
+        link += ":" + u.getPort();
       link += "/'>site " + u.getHost() + "</a></p>\r\n";
       os.write(link.getBytes());
       os.write(testB.getBytes());
       res.flushBuffer();
     } catch (IOException ioe) {
-    }    
+    }
   }
 
 }
Index: src/java/org/apache/nutch/tools/proxy/LogDebugHandler.java
===================================================================
--- src/java/org/apache/nutch/tools/proxy/LogDebugHandler.java	(revision 1188268)
+++ src/java/org/apache/nutch/tools/proxy/LogDebugHandler.java	(working copy)
@@ -15,6 +15,7 @@
  * limitations under the License.
  ******************************************************************************/
 package org.apache.nutch.tools.proxy;
+
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -47,29 +48,33 @@
 import org.mortbay.jetty.Request;
 
 public class LogDebugHandler extends AbstractTestbedHandler implements Filter {
-  private static final Logger LOG = LoggerFactory.getLogger(LogDebugHandler.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(LogDebugHandler.class);
 
   @Override
   public void handle(Request req, HttpServletResponse res, String target,
-          int dispatch) throws IOException, ServletException {
-    LOG.info("-- " + req.getMethod() + " " + req.getUri().toString() + "\n" + req.getConnection().getRequestFields());
+      int dispatch) throws IOException, ServletException {
+    LOG.info("-- " + req.getMethod() + " " + req.getUri().toString() + "\n"
+        + req.getConnection().getRequestFields());
   }
 
   @Override
   public void doFilter(ServletRequest req, ServletResponse res,
-          FilterChain chain) throws IOException, ServletException {
-    ((HttpServletResponse)res).addHeader("X-Handled-By", "AsyncProxyHandler");
-    ((HttpServletResponse)res).addHeader("X-TestbedHandlers", "AsyncProxyHandler");
+      FilterChain chain) throws IOException, ServletException {
+    ((HttpServletResponse) res).addHeader("X-Handled-By", "AsyncProxyHandler");
+    ((HttpServletResponse) res).addHeader("X-TestbedHandlers",
+        "AsyncProxyHandler");
     try {
       chain.doFilter(req, res);
     } catch (Throwable e) {
-      ((HttpServletResponse)res).sendError(HttpServletResponse.SC_BAD_REQUEST, e.toString());
+      ((HttpServletResponse) res).sendError(HttpServletResponse.SC_BAD_REQUEST,
+          e.toString());
     }
   }
 
   @Override
   public void init(FilterConfig arg0) throws ServletException {
     // TODO Auto-generated method stub
-    
+
   }
 }
Index: src/java/org/apache/nutch/tools/proxy/NotFoundHandler.java
===================================================================
--- src/java/org/apache/nutch/tools/proxy/NotFoundHandler.java	(revision 1188268)
+++ src/java/org/apache/nutch/tools/proxy/NotFoundHandler.java	(working copy)
@@ -15,6 +15,7 @@
  * limitations under the License.
  ******************************************************************************/
 package org.apache.nutch.tools.proxy;
+
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -43,13 +44,13 @@
 
   @Override
   public void handle(Request req, HttpServletResponse res, String target,
-          int dispatch) throws IOException, ServletException {
+      int dispatch) throws IOException, ServletException {
     // don't pass it down the chain
     req.setHandled(true);
     res.addHeader("X-Handled-By", getClass().getSimpleName());
     addMyHeader(res, "URI", req.getUri().toString());
-    res.sendError(HttpServletResponse.SC_NOT_FOUND, "Not found: " +
-            req.getUri().toString());
+    res.sendError(HttpServletResponse.SC_NOT_FOUND, "Not found: "
+        + req.getUri().toString());
   }
 
 }
Index: src/java/org/apache/nutch/tools/proxy/AbstractTestbedHandler.java
===================================================================
--- src/java/org/apache/nutch/tools/proxy/AbstractTestbedHandler.java	(revision 1188268)
+++ src/java/org/apache/nutch/tools/proxy/AbstractTestbedHandler.java	(working copy)
@@ -15,6 +15,7 @@
  * limitations under the License.
  ******************************************************************************/
 package org.apache.nutch.tools.proxy;
+
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -46,16 +47,17 @@
 
   @Override
   public void handle(String target, HttpServletRequest req,
-          HttpServletResponse res, int dispatch) throws IOException,
-          ServletException {
-    Request base_request = (req instanceof Request) ? (Request)req : HttpConnection.getCurrentConnection().getRequest();
+      HttpServletResponse res, int dispatch) throws IOException,
+      ServletException {
+    Request base_request = (req instanceof Request) ? (Request) req
+        : HttpConnection.getCurrentConnection().getRequest();
     res.addHeader("X-TestbedHandlers", this.getClass().getSimpleName());
     handle(base_request, res, target, dispatch);
   }
-  
-  public abstract void handle(Request req, HttpServletResponse res, String target,
-          int dispatch) throws IOException, ServletException;
-  
+
+  public abstract void handle(Request req, HttpServletResponse res,
+      String target, int dispatch) throws IOException, ServletException;
+
   public void addMyHeader(HttpServletResponse res, String name, String value) {
     name = "X-" + this.getClass().getSimpleName() + "-" + name;
     res.addHeader(name, value);
Index: src/java/org/apache/nutch/tools/proxy/DelayHandler.java
===================================================================
--- src/java/org/apache/nutch/tools/proxy/DelayHandler.java	(revision 1188268)
+++ src/java/org/apache/nutch/tools/proxy/DelayHandler.java	(working copy)
@@ -15,6 +15,7 @@
  * limitations under the License.
  ******************************************************************************/
 package org.apache.nutch.tools.proxy;
+
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -41,13 +42,13 @@
 import org.mortbay.jetty.Request;
 
 public class DelayHandler extends AbstractTestbedHandler {
-  
+
   public static final long DEFAULT_DELAY = 2000;
-  
+
   private int delay;
   private boolean random;
   private Random r;
-  
+
   public DelayHandler(int delay) {
     if (delay < 0) {
       delay = -delay;
@@ -59,13 +60,13 @@
 
   @Override
   public void handle(Request req, HttpServletResponse res, String target,
-          int dispatch) throws IOException, ServletException {
+      int dispatch) throws IOException, ServletException {
     try {
       int del = random ? r.nextInt(delay) : delay;
       Thread.sleep(del);
       addMyHeader(res, "Delay", String.valueOf(del));
     } catch (Exception e) {
-      
+
     }
   }
 }
Index: src/java/org/apache/nutch/tools/proxy/TestbedProxy.java
===================================================================
--- src/java/org/apache/nutch/tools/proxy/TestbedProxy.java	(revision 1188268)
+++ src/java/org/apache/nutch/tools/proxy/TestbedProxy.java	(working copy)
@@ -15,6 +15,7 @@
  * limitations under the License.
  ******************************************************************************/
 package org.apache.nutch.tools.proxy;
+
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -61,19 +62,32 @@
    */
   public static void main(String[] args) throws Exception {
     if (args.length == 0) {
-      System.err.println("TestbedProxy [-port <nnn>] [-forward] [-fake [...]] [-delay nnn] [-debug]");
-      System.err.println("-port <nnn>\trun the proxy on port <nnn> (special permissions may be needed for ports < 1024)");
-      System.err.println("-forward\tif specified, requests to all unknown urls will be passed to");
-      System.err.println("\t\toriginal servers. If false (default) unknown urls generate 404 Not Found.");
-      System.err.println("-delay\tdelay every response by nnn seconds. If delay is negative use a random value up to nnn");
-      System.err.println("-fake\tif specified, requests to all unknown urls will succeed with fake content");
-      System.err.println("\nAdditional options for -fake handler (all optional):");
-      System.err.println("\t-hostMode (u | r)\tcreate unique host names, or pick random from a pool");
-      System.err.println("\t-pageMode (u | r)\tcreate unique page names, or pick random from a pool");
-      System.err.println("\t-numHosts N\ttotal number of hosts when using hostMode r");
-      System.err.println("\t-numPages N\ttotal number of pages per host when using pageMode r");
-      System.err.println("\t-intLinks N\tnumber of internal (same host) links per page");
-      System.err.println("\t-extLinks N\tnumber of external (other host) links per page");
+      System.err
+          .println("TestbedProxy [-port <nnn>] [-forward] [-fake [...]] [-delay nnn] [-debug]");
+      System.err
+          .println("-port <nnn>\trun the proxy on port <nnn> (special permissions may be needed for ports < 1024)");
+      System.err
+          .println("-forward\tif specified, requests to all unknown urls will be passed to");
+      System.err
+          .println("\t\toriginal servers. If false (default) unknown urls generate 404 Not Found.");
+      System.err
+          .println("-delay\tdelay every response by nnn seconds. If delay is negative use a random value up to nnn");
+      System.err
+          .println("-fake\tif specified, requests to all unknown urls will succeed with fake content");
+      System.err
+          .println("\nAdditional options for -fake handler (all optional):");
+      System.err
+          .println("\t-hostMode (u | r)\tcreate unique host names, or pick random from a pool");
+      System.err
+          .println("\t-pageMode (u | r)\tcreate unique page names, or pick random from a pool");
+      System.err
+          .println("\t-numHosts N\ttotal number of hosts when using hostMode r");
+      System.err
+          .println("\t-numPages N\ttotal number of pages per host when using pageMode r");
+      System.err
+          .println("\t-intLinks N\tnumber of internal (same host) links per page");
+      System.err
+          .println("\t-extLinks N\tnumber of external (other host) links per page");
       System.err.println("\nDefaults for -fake handler:");
       System.err.println("\t-hostMode r");
       System.err.println("\t-pageMode r");
@@ -83,7 +97,7 @@
       System.err.println("\t-extLinks 5");
       System.exit(-1);
     }
-    
+
     Configuration conf = NutchConfiguration.create();
     int port = conf.getInt("segment.proxy.port", 8181);
     boolean forward = false;
@@ -97,7 +111,7 @@
     int numPages = 10000;
     int intLinks = 10;
     int extLinks = 5;
-    
+
     for (int i = 0; i < args.length; i++) {
       if (args[i].equals("-port")) {
         port = Integer.parseInt(args[++i]);
@@ -131,28 +145,30 @@
         System.exit(-1);
       }
     }
-    
+
     // Create the server
     Server server = new Server();
     SocketConnector connector = new SocketConnector();
     connector.setPort(port);
     connector.setResolveNames(false);
     server.addConnector(connector);
-    
+
     // create a list of handlers
     HandlerList list = new HandlerList();
     server.addHandler(list);
-    
+
     if (debug) {
       LOG.info("* Added debug handler.");
       list.addHandler(new LogDebugHandler());
     }
- 
+
     if (delay) {
-      LOG.info("* Added delay handler: " + (delayVal < 0 ? "random delay up to " + (-delayVal) : "constant delay of " + delayVal));
+      LOG.info("* Added delay handler: "
+          + (delayVal < 0 ? "random delay up to " + (-delayVal)
+              : "constant delay of " + delayVal));
       list.addHandler(new DelayHandler(delayVal));
     }
-    
+
     // XXX alternatively, we can add the DispatchHandler as the first one,
     // XXX to activate handler plugins and redirect requests to appropriate
     // XXX handlers ... Here we always load these handlers
Index: src/java/org/apache/nutch/tools/arc/ArcRecordReader.java
===================================================================
--- src/java/org/apache/nutch/tools/arc/ArcRecordReader.java	(revision 1188268)
+++ src/java/org/apache/nutch/tools/arc/ArcRecordReader.java	(working copy)
@@ -34,23 +34,29 @@
 import org.apache.hadoop.util.StringUtils;
 
 /**
- * <p>The <code>ArchRecordReader</code> class provides a record reader which 
- * reads records from arc files.</p>
+ * <p>
+ * The <code>ArchRecordReader</code> class provides a record reader which reads
+ * records from arc files.
+ * </p>
  * 
- * <p>Arc files are essentially tars of gzips.  Each record in an arc file is
- * a compressed gzip.  Multiple records are concatenated together to form a
- * complete arc.  For more information on the arc file format see
- * {@link http://www.archive.org/web/researcher/ArcFileFormat.php}.</p>
+ * <p>
+ * Arc files are essentially tars of gzips. Each record in an arc file is a
+ * compressed gzip. Multiple records are concatenated together to form a
+ * complete arc. For more information on the arc file format see {@link http
+ * ://www.archive.org/web/researcher/ArcFileFormat.php}.
+ * </p>
  * 
- * <p>Arc files are used by the internet archive and grub projects.</p>
+ * <p>
+ * Arc files are used by the internet archive and grub projects.
+ * </p>
  * 
  * @see http://www.archive.org/
  * @see http://www.grub.org/
  */
-public class ArcRecordReader
-  implements RecordReader<Text, BytesWritable> {
+public class ArcRecordReader implements RecordReader<Text, BytesWritable> {
 
-  public static final Logger LOG = LoggerFactory.getLogger(ArcRecordReader.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(ArcRecordReader.class);
 
   protected Configuration conf;
   protected long splitStart = 0;
@@ -60,30 +66,32 @@
   protected long fileLen = 0;
   protected FSDataInputStream in;
 
-  private static byte[] MAGIC = {(byte)0x1F, (byte)0x8B};
+  private static byte[] MAGIC = { (byte) 0x1F, (byte) 0x8B };
 
   /**
-   * <p>Returns true if the byte array passed matches the gzip header magic 
-   * number.</p>
+   * <p>
+   * Returns true if the byte array passed matches the gzip header magic number.
+   * </p>
    * 
-   * @param input The byte array to check.
+   * @param input
+   *          The byte array to check.
    * 
    * @return True if the byte array matches the gzip header magic number.
    */
   public static boolean isMagic(byte[] input) {
 
-	// check for null and incorrect length
+    // check for null and incorrect length
     if (input == null || input.length != MAGIC.length) {
       return false;
     }
-    
+
     // check byte by byte
     for (int i = 0; i < MAGIC.length; i++) {
       if (MAGIC[i] != input[i]) {
         return false;
       }
     }
-    
+
     // must match
     return true;
   }
@@ -91,13 +99,16 @@
   /**
    * Constructor that sets the configuration and file split.
    * 
-   * @param conf The job configuration.
-   * @param split The file split to read from.
+   * @param conf
+   *          The job configuration.
+   * @param split
+   *          The file split to read from.
    * 
-   * @throws IOException  If an IO error occurs while initializing file split.
+   * @throws IOException
+   *           If an IO error occurs while initializing file split.
    */
   public ArcRecordReader(Configuration conf, FileSplit split)
-    throws IOException {
+      throws IOException {
 
     Path path = split.getPath();
     FileSystem fs = path.getFileSystem(conf);
@@ -113,8 +124,7 @@
   /**
    * Closes the record reader resources.
    */
-  public void close()
-    throws IOException {
+  public void close() throws IOException {
     this.in.close();
   }
 
@@ -122,14 +132,15 @@
    * Creates a new instance of the <code>Text</code> object for the key.
    */
   public Text createKey() {
-    return (Text)ReflectionUtils.newInstance(Text.class, conf);
+    return (Text) ReflectionUtils.newInstance(Text.class, conf);
   }
 
   /**
    * Creates a new instance of the <code>BytesWritable</code> object for the key
    */
   public BytesWritable createValue() {
-    return (BytesWritable)ReflectionUtils.newInstance(BytesWritable.class, conf);
+    return (BytesWritable) ReflectionUtils.newInstance(BytesWritable.class,
+        conf);
   }
 
   /**
@@ -137,63 +148,64 @@
    * 
    * @return The long of the current position in the file.
    */
-  public long getPos()
-    throws IOException {
+  public long getPos() throws IOException {
     return in.getPos();
   }
 
   /**
-   * Returns the percentage of progress in processing the file.  This will be
+   * Returns the percentage of progress in processing the file. This will be
    * represented as a float from 0 to 1 with 1 being 100% completed.
    * 
    * @return The percentage of progress as a float from 0 to 1.
    */
-  public float getProgress()
-    throws IOException {
-	  
+  public float getProgress() throws IOException {
+
     // if we haven't even started
     if (splitEnd == splitStart) {
       return 0.0f;
+    } else {
+      // the progress is current pos - where we started / length of the split
+      return Math.min(1.0f, (getPos() - splitStart) / (float) splitLen);
     }
-    else {
-      // the progress is current pos - where we started  / length of the split
-      return Math.min(1.0f, (getPos() - splitStart) / (float)splitLen);
-    }
   }
 
   /**
-   * <p>Returns true if the next record in the split is read into the key and 
-   * value pair.  The key will be the arc record header and the values will be
-   * the raw content bytes of the arc record.</p>
+   * <p>
+   * Returns true if the next record in the split is read into the key and value
+   * pair. The key will be the arc record header and the values will be the raw
+   * content bytes of the arc record.
+   * </p>
    * 
-   * @param key The record key
-   * @param value The record value
+   * @param key
+   *          The record key
+   * @param value
+   *          The record value
    * 
    * @return True if the next record is read.
    * 
-   * @throws IOException If an error occurs while reading the record value.
+   * @throws IOException
+   *           If an error occurs while reading the record value.
    */
-  public boolean next(Text key, BytesWritable value)
-    throws IOException {
+  public boolean next(Text key, BytesWritable value) throws IOException {
 
     try {
-      
+
       // get the starting position on the input stream
       long startRead = in.getPos();
       byte[] magicBuffer = null;
-      
+
       // we need this loop to handle false positives in reading of gzip records
       while (true) {
-        
+
         // while we haven't passed the end of the split
         if (startRead >= splitEnd) {
           return false;
         }
-        
+
         // scanning for the gzip header
         boolean foundStart = false;
         while (!foundStart) {
-          
+
           // start at the current file position and scan for 1K at time, break
           // if there is no more to read
           startRead = in.getPos();
@@ -202,13 +214,13 @@
           if (read < 0) {
             break;
           }
-          
-          // scan the byte array for the gzip header magic number.  This happens
+
+          // scan the byte array for the gzip header magic number. This happens
           // byte by byte
           for (int i = 0; i < read - 1; i++) {
             byte[] testMagic = new byte[2];
-            System.arraycopy(magicBuffer, i, testMagic, 0, 2);            
-            if (isMagic(testMagic)) {              
+            System.arraycopy(magicBuffer, i, testMagic, 0, 2);
+            if (isMagic(testMagic)) {
               // set the next start to the current gzip header
               startRead += i;
               foundStart = true;
@@ -216,14 +228,14 @@
             }
           }
         }
-        
+
         // seek to the start of the gzip header
         in.seek(startRead);
         ByteArrayOutputStream baos = null;
         int totalRead = 0;
 
         try {
-          
+
           // read 4K of the gzip at a time putting into a byte array
           byte[] buffer = new byte[4096];
           GZIPInputStream zin = new GZIPInputStream(in);
@@ -233,9 +245,8 @@
             baos.write(buffer, 0, gzipRead);
             totalRead += gzipRead;
           }
-        }
-        catch (Exception e) {
-          
+        } catch (Exception e) {
+
           // there are times we get false positives where the gzip header exists
           // but it is not an actual gzip record, so we ignore it and start
           // over seeking
@@ -248,7 +259,7 @@
 
         // change the output stream to a byte array
         byte[] content = baos.toByteArray();
-        
+
         // the first line of the raw content in arc files is the header
         int eol = 0;
         for (int i = 0; i < content.length; i++) {
@@ -257,34 +268,33 @@
             break;
           }
         }
-        
+
         // create the header and the raw content minus the header
         String header = new String(content, 0, eol).trim();
         byte[] raw = new byte[(content.length - eol) - 1];
         System.arraycopy(content, eol + 1, raw, 0, raw.length);
-        
+
         // populate key and values with the header and raw content.
-        Text keyText = (Text)key;
+        Text keyText = (Text) key;
         keyText.set(header);
-        BytesWritable valueBytes = (BytesWritable)value;
+        BytesWritable valueBytes = (BytesWritable) value;
         valueBytes.set(raw, 0, raw.length);
 
-        // TODO: It would be best to start at the end of the gzip read but 
-        // the bytes read in gzip don't match raw bytes in the file so we 
-        // overshoot the next header.  With this current method you get
+        // TODO: It would be best to start at the end of the gzip read but
+        // the bytes read in gzip don't match raw bytes in the file so we
+        // overshoot the next header. With this current method you get
         // some false positives but don't miss records.
         if (startRead + 1 < fileLen) {
           in.seek(startRead + 1);
         }
-        
+
         // populated the record, now return
         return true;
       }
+    } catch (Exception e) {
+      LOG.equals(StringUtils.stringifyException(e));
     }
-    catch (Exception e) {
-      LOG.equals(StringUtils.stringifyException(e));      
-    }
-    
+
     // couldn't populate the record or there is no next record to read
     return false;
   }
Index: src/java/org/apache/nutch/tools/arc/ArcInputFormat.java
===================================================================
--- src/java/org/apache/nutch/tools/arc/ArcInputFormat.java	(revision 1188268)
+++ src/java/org/apache/nutch/tools/arc/ArcInputFormat.java	(working copy)
@@ -30,21 +30,22 @@
 /**
  * A input format the reads arc files.
  */
-public class ArcInputFormat
-  extends FileInputFormat<Text, BytesWritable> {
+public class ArcInputFormat extends FileInputFormat<Text, BytesWritable> {
 
   /**
    * Returns the <code>RecordReader</code> for reading the arc file.
    * 
-   * @param split The InputSplit of the arc file to process.
-   * @param job The job configuration.
-   * @param reporter The progress reporter.
+   * @param split
+   *          The InputSplit of the arc file to process.
+   * @param job
+   *          The job configuration.
+   * @param reporter
+   *          The progress reporter.
    */
   public RecordReader<Text, BytesWritable> getRecordReader(InputSplit split,
-      JobConf job, Reporter reporter)
-    throws IOException {
+      JobConf job, Reporter reporter) throws IOException {
     reporter.setStatus(split.toString());
-    return new ArcRecordReader(job, (FileSplit)split);
+    return new ArcRecordReader(job, (FileSplit) split);
   }
 
 }
Index: src/java/org/apache/nutch/tools/ResolveUrls.java
===================================================================
--- src/java/org/apache/nutch/tools/ResolveUrls.java	(revision 1188268)
+++ src/java/org/apache/nutch/tools/ResolveUrls.java	(working copy)
@@ -59,8 +59,7 @@
   /**
    * A Thread which gets the ip address of a single host by name.
    */
-  private static class ResolverThread
-    extends Thread {
+  private static class ResolverThread extends Thread {
 
     private String url = null;
 
@@ -74,14 +73,13 @@
       String host = URLUtil.getHost(url);
       long start = System.currentTimeMillis();
       try {
-        
-        // get the address by name and if no error is thrown then it 
+
+        // get the address by name and if no error is thrown then it
         // is resolved successfully
         InetAddress ia = InetAddress.getByName(host);
         LOG.info("Resolved: " + host);
         numResolved.incrementAndGet();
-      }
-      catch (Exception uhe) {
+      } catch (Exception uhe) {
         LOG.info("Error Resolving: " + host);
         numErrored.incrementAndGet();
       }
@@ -93,8 +91,8 @@
   }
 
   /**
-   * Creates a thread pool for resolving urls.  Reads in the url file on the
-   * local filesystem.  For each url it attempts to resolve it keeping a total
+   * Creates a thread pool for resolving urls. Reads in the url file on the
+   * local filesystem. For each url it attempts to resolve it keeping a total
    * account of the number resolved, errored, and the amount of time.
    */
   public void resolveUrls() {
@@ -103,13 +101,13 @@
 
       // create a thread pool with a fixed number of threads
       pool = Executors.newFixedThreadPool(numThreads);
-      
+
       // read in the urls file and loop through each line, one url per line
       BufferedReader buffRead = new BufferedReader(new FileReader(new File(
-        urlsFile)));
+          urlsFile)));
       String urlStr = null;
       while ((urlStr = buffRead.readLine()) != null) {
-        
+
         // spin up a resolver thread per url
         LOG.info("Starting: " + urlStr);
         pool.execute(new ResolverThread(urlStr));
@@ -119,9 +117,8 @@
       // the thread pool to give urls time to finish resolving
       buffRead.close();
       pool.awaitTermination(60, TimeUnit.SECONDS);
-    }
-    catch (Exception e) {
-      
+    } catch (Exception e) {
+
       // on error shutdown the thread pool immediately
       pool.shutdownNow();
       LOG.info(StringUtils.stringifyException(e));
@@ -129,15 +126,16 @@
 
     // shutdown the thread pool and log totals
     pool.shutdown();
-    LOG.info("Total: " + numTotal.get() + ", Resovled: "
-      + numResolved.get() + ", Errored: " + numErrored.get()
-      + ", Average Time: " + totalTime.get() / numTotal.get());
+    LOG.info("Total: " + numTotal.get() + ", Resovled: " + numResolved.get()
+        + ", Errored: " + numErrored.get() + ", Average Time: "
+        + totalTime.get() / numTotal.get());
   }
 
   /**
    * Create a new ResolveUrls with a file from the local file system.
-   *
-   * @param urlsFile The local urls file, one url per line.
+   * 
+   * @param urlsFile
+   *          The local urls file, one url per line.
    */
   public ResolveUrls(String urlsFile) {
     this(urlsFile, 100);
@@ -145,10 +143,12 @@
 
   /**
    * Create a new ResolveUrls with a urls file and a number of threads for the
-   * Thread pool.  Number of threads is 100 by default.
+   * Thread pool. Number of threads is 100 by default.
    * 
-   * @param urlsFile The local urls file, one url per line.
-   * @param numThreads The number of threads used to resolve urls in parallel.
+   * @param urlsFile
+   *          The local urls file, one url per line.
+   * @param numThreads
+   *          The number of threads used to resolve urls in parallel.
    */
   public ResolveUrls(String urlsFile, int numThreads) {
     this.urlsFile = urlsFile;
@@ -161,12 +161,12 @@
   public static void main(String[] args) {
 
     Options options = new Options();
-    Option helpOpts = OptionBuilder.withArgName("help").withDescription(
-      "show this help message").create("help");
-    Option urlOpts = OptionBuilder.withArgName("urls").hasArg().withDescription(
-      "the urls file to check").create("urls");
-    Option numThreadOpts = OptionBuilder.withArgName("numThreads").hasArgs().withDescription(
-      "the number of threads to use").create("numThreads");
+    Option helpOpts = OptionBuilder.withArgName("help")
+        .withDescription("show this help message").create("help");
+    Option urlOpts = OptionBuilder.withArgName("urls").hasArg()
+        .withDescription("the urls file to check").create("urls");
+    Option numThreadOpts = OptionBuilder.withArgName("numThreads").hasArgs()
+        .withDescription("the number of threads to use").create("numThreads");
     options.addOption(helpOpts);
     options.addOption(urlOpts);
     options.addOption(numThreadOpts);
@@ -191,8 +191,7 @@
       }
       ResolveUrls resolve = new ResolveUrls(urls, numThreads);
       resolve.resolveUrls();
-    }
-    catch (Exception e) {
+    } catch (Exception e) {
       LOG.error("ResolveUrls: " + StringUtils.stringifyException(e));
     }
   }
Index: src/java/org/apache/nutch/tools/DmozParser.java
===================================================================
--- src/java/org/apache/nutch/tools/DmozParser.java	(revision 1188268)
+++ src/java/org/apache/nutch/tools/DmozParser.java	(working copy)
@@ -36,16 +36,15 @@
 import org.apache.nutch.util.LogUtil;
 import org.apache.nutch.util.NutchConfiguration;
 
-
 /** Utility that converts DMOZ RDF into a flat file of URLs to be injected. */
 public class DmozParser {
   public static final Logger LOG = LoggerFactory.getLogger(DmozParser.class);
-  
-    long pages = 0;
 
+  long pages = 0;
+
   /**
-   * This filter fixes characters that might offend our parser.
-   * This lets us be tolerant of errors that might appear in the input XML.
+   * This filter fixes characters that might offend our parser. This lets us be
+   * tolerant of errors that might appear in the input XML.
    */
   private static class XMLCharFilter extends FilterReader {
     private boolean lastBad = false;
@@ -57,9 +56,9 @@
     public int read() throws IOException {
       int c = in.read();
       int value = c;
-      if (c != -1 && !(XMLChar.isValid(c)))     // fix invalid characters
+      if (c != -1 && !(XMLChar.isValid(c))) // fix invalid characters
         value = 'X';
-      else if (lastBad && c == '<') {           // fix mis-matched brackets
+      else if (lastBad && c == '<') { // fix mis-matched brackets
         in.mark(1);
         if (in.read() != '/')
           value = 'X';
@@ -70,37 +69,35 @@
       return value;
     }
 
-    public int read(char[] cbuf, int off, int len)
-      throws IOException {
+    public int read(char[] cbuf, int off, int len) throws IOException {
       int n = in.read(cbuf, off, len);
       if (n != -1) {
         for (int i = 0; i < n; i++) {
-          char c = cbuf[off+i];
+          char c = cbuf[off + i];
           char value = c;
-          if (!(XMLChar.isValid(c)))            // fix invalid characters
+          if (!(XMLChar.isValid(c))) // fix invalid characters
             value = 'X';
-          else if (lastBad && c == '<') {       // fix mis-matched brackets
-            if (i != n-1 && cbuf[off+i+1] != '/')
+          else if (lastBad && c == '<') { // fix mis-matched brackets
+            if (i != n - 1 && cbuf[off + i + 1] != '/')
               value = 'X';
           }
           lastBad = (c == 65533);
-          cbuf[off+i] = value;
+          cbuf[off + i] = value;
         }
       }
       return n;
     }
   }
 
-
   /**
-   * The RDFProcessor receives tag messages during a parse
-   * of RDF XML data.  We build whatever structures we need
-   * from these messages.
+   * The RDFProcessor receives tag messages during a parse of RDF XML data. We
+   * build whatever structures we need from these messages.
    */
   private class RDFProcessor extends DefaultHandler {
     String curURL = null, curSection = null;
-    boolean titlePending = false, descPending = false, insideAdultSection = false;
-    Pattern topicPattern = null; 
+    boolean titlePending = false, descPending = false,
+        insideAdultSection = false;
+    Pattern topicPattern = null;
     StringBuffer title = new StringBuffer(), desc = new StringBuffer();
     XMLReader reader;
     int subsetDenom;
@@ -109,10 +106,12 @@
     Locator location;
 
     /**
-     * Pass in an XMLReader, plus a flag as to whether we 
-     * should include adult material.
+     * Pass in an XMLReader, plus a flag as to whether we should include adult
+     * material.
      */
-    public RDFProcessor(XMLReader reader, int subsetDenom, boolean includeAdult, int skew, Pattern topicPattern) throws IOException {
+    public RDFProcessor(XMLReader reader, int subsetDenom,
+        boolean includeAdult, int skew, Pattern topicPattern)
+        throws IOException {
       this.reader = reader;
       this.subsetDenom = subsetDenom;
       this.includeAdult = includeAdult;
@@ -128,20 +127,21 @@
     /**
      * Start of an XML elt
      */
-    public void startElement(String namespaceURI, String localName, String qName, Attributes atts) throws SAXException {
+    public void startElement(String namespaceURI, String localName,
+        String qName, Attributes atts) throws SAXException {
       if ("Topic".equals(qName)) {
         curSection = atts.getValue("r:id");
       } else if ("ExternalPage".equals(qName)) {
         // Porn filter
-        if ((! includeAdult) && curSection.startsWith("Top/Adult")) {
+        if ((!includeAdult) && curSection.startsWith("Top/Adult")) {
           return;
         }
-          
+
         if (topicPattern != null && !topicPattern.matcher(curSection).matches()) {
           return;
         }
 
-        // Subset denominator filter.  
+        // Subset denominator filter.
         // Only emit with a chance of 1/denominator.
         String url = atts.getValue("about");
         int hashValue = MD5Hash.digest(url).hashCode();
@@ -174,18 +174,18 @@
      * Termination of XML elt
      */
     public void endElement(String namespaceURI, String localName, String qName)
-      throws SAXException {
+        throws SAXException {
       if (curURL != null) {
         if ("ExternalPage".equals(qName)) {
           //
-          // Inc the number of pages, insert the page, and 
+          // Inc the number of pages, insert the page, and
           // possibly print status.
           //
-          System.out.println(curURL); 
+          System.out.println(curURL);
           pages++;
 
           //
-          // Clear out the link text.  This is what
+          // Clear out the link text. This is what
           // you would use for adding to the linkdb.
           //
           if (title.length() > 0) {
@@ -220,15 +220,13 @@
     }
 
     /**
-     * From time to time the Parser will set the "current location"
-     * by calling this function.  It's useful for emitting locations
-     * for error messages.
+     * From time to time the Parser will set the "current location" by calling
+     * this function. It's useful for emitting locations for error messages.
      */
     public void setDocumentLocator(Locator locator) {
       location = locator;
     }
 
-
     //
     // Interface ErrorHandler
     //
@@ -249,12 +247,12 @@
     public void fatalError(SAXParseException spe) {
       if (LOG.isErrorEnabled()) {
         LOG.error("Fatal err: " + spe.toString() + ": " + spe.getMessage());
-        LOG.error("Last known line is " + location.getLineNumber() +
-                  ", column " + location.getColumnNumber());
+        LOG.error("Last known line is " + location.getLineNumber()
+            + ", column " + location.getColumnNumber());
         spe.printStackTrace(LogUtil.getFatalStream(LOG));
       }
     }
-        
+
     /**
      * Emit exception warning message
      */
@@ -267,34 +265,33 @@
   }
 
   /**
-   * Iterate through all the items in this structured DMOZ file.
-   * Add each URL to the web db.
+   * Iterate through all the items in this structured DMOZ file. Add each URL to
+   * the web db.
    */
   public void parseDmozFile(File dmozFile, int subsetDenom,
-                            boolean includeAdult,
-                            int skew,
-                            Pattern topicPattern)
+      boolean includeAdult, int skew, Pattern topicPattern)
 
-    throws IOException, SAXException, ParserConfigurationException {
+  throws IOException, SAXException, ParserConfigurationException {
 
     SAXParserFactory parserFactory = SAXParserFactory.newInstance();
     SAXParser parser = parserFactory.newSAXParser();
     XMLReader reader = parser.getXMLReader();
 
     // Create our own processor to receive SAX events
-    RDFProcessor rp =
-      new RDFProcessor(reader, subsetDenom, includeAdult,
-                       skew, topicPattern);
+    RDFProcessor rp = new RDFProcessor(reader, subsetDenom, includeAdult, skew,
+        topicPattern);
     reader.setContentHandler(rp);
     reader.setErrorHandler(rp);
     LOG.info("skew = " + rp.hashSkew);
 
     //
-    // Open filtered text stream.  The TextFilter makes sure that
+    // Open filtered text stream. The TextFilter makes sure that
     // only appropriate XML-approved Text characters are received.
     // Any non-conforming characters are silently skipped.
     //
-    XMLCharFilter in = new XMLCharFilter(new BufferedReader(new InputStreamReader(new BufferedInputStream(new FileInputStream(dmozFile)), "UTF-8")));
+    XMLCharFilter in = new XMLCharFilter(new BufferedReader(
+        new InputStreamReader(new BufferedInputStream(new FileInputStream(
+            dmozFile)), "UTF-8")));
     try {
       InputSource is = new InputSource(in);
       reader.parse(is);
@@ -309,18 +306,17 @@
     }
   }
 
-  private static void addTopicsFromFile(String topicFile,
-                                        Vector<String> topics)
-  throws IOException {
+  private static void addTopicsFromFile(String topicFile, Vector<String> topics)
+      throws IOException {
     BufferedReader in = null;
     try {
-      in = new BufferedReader(new InputStreamReader(new FileInputStream(topicFile), "UTF-8"));
+      in = new BufferedReader(new InputStreamReader(new FileInputStream(
+          topicFile), "UTF-8"));
       String line = null;
       while ((line = in.readLine()) != null) {
         topics.addElement(new String(line));
       }
-    } 
-    catch (Exception e) {
+    } catch (Exception e) {
       if (LOG.isErrorEnabled()) {
         LOG.error(e.toString());
         e.printStackTrace(LogUtil.getFatalStream(LOG));
@@ -330,18 +326,19 @@
       in.close();
     }
   }
-    
+
   /**
-   * Command-line access.  User may add URLs via a flat text file
-   * or the structured DMOZ file.  By default, we ignore Adult
-   * material (as categorized by DMOZ).
+   * Command-line access. User may add URLs via a flat text file or the
+   * structured DMOZ file. By default, we ignore Adult material (as categorized
+   * by DMOZ).
    */
   public static void main(String argv[]) throws Exception {
     if (argv.length < 1) {
-      System.err.println("Usage: DmozParser <dmoz_file> [-subset <subsetDenominator>] [-includeAdultMaterial] [-skew skew] [-topicFile <topic list file>] [-topic <topic> [-topic <topic> [...]]]");
+      System.err
+          .println("Usage: DmozParser <dmoz_file> [-subset <subsetDenominator>] [-includeAdultMaterial] [-skew skew] [-topicFile <topic list file>] [-topic <topic> [-topic <topic> [...]]]");
       return;
     }
-    
+
     //
     // Parse the command line, figure out what kind of
     // URL file we need to load
@@ -350,9 +347,9 @@
     int skew = 0;
     String dmozFile = argv[0];
     boolean includeAdult = false;
-    Pattern topicPattern = null; 
+    Pattern topicPattern = null;
     Vector<String> topics = new Vector<String>();
-    
+
     Configuration conf = NutchConfiguration.create();
     FileSystem fs = FileSystem.get(conf);
     try {
@@ -360,16 +357,16 @@
         if ("-includeAdultMaterial".equals(argv[i])) {
           includeAdult = true;
         } else if ("-subset".equals(argv[i])) {
-          subsetDenom = Integer.parseInt(argv[i+1]);
+          subsetDenom = Integer.parseInt(argv[i + 1]);
           i++;
         } else if ("-topic".equals(argv[i])) {
-          topics.addElement(argv[i+1]); 
+          topics.addElement(argv[i + 1]);
           i++;
         } else if ("-topicFile".equals(argv[i])) {
-          addTopicsFromFile(argv[i+1], topics);
+          addTopicsFromFile(argv[i + 1], topics);
           i++;
         } else if ("-skew".equals(argv[i])) {
-          skew = Integer.parseInt(argv[i+1]);
+          skew = Integer.parseInt(argv[i + 1]);
           i++;
         }
       }
@@ -377,21 +374,21 @@
       DmozParser parser = new DmozParser();
 
       if (!topics.isEmpty()) {
-        String regExp = new String("^("); 
+        String regExp = new String("^(");
         int j = 0;
-        for ( ; j < topics.size() - 1; ++j) {
+        for (; j < topics.size() - 1; ++j) {
           regExp = regExp.concat(topics.get(j));
           regExp = regExp.concat("|");
         }
         regExp = regExp.concat(topics.get(j));
-        regExp = regExp.concat(").*"); 
+        regExp = regExp.concat(").*");
         LOG.info("Topic selection pattern = " + regExp);
-        topicPattern = Pattern.compile(regExp); 
+        topicPattern = Pattern.compile(regExp);
       }
 
-      parser.parseDmozFile(new File(dmozFile), subsetDenom,
-                           includeAdult, skew, topicPattern);
-      
+      parser.parseDmozFile(new File(dmozFile), subsetDenom, includeAdult, skew,
+          topicPattern);
+
     } finally {
       fs.close();
     }
Index: src/java/org/apache/nutch/tools/Benchmark.java
===================================================================
--- src/java/org/apache/nutch/tools/Benchmark.java	(revision 1188268)
+++ src/java/org/apache/nutch/tools/Benchmark.java	(working copy)
@@ -50,7 +50,8 @@
     System.exit(res);
   }
 
-  private void createSeeds(FileSystem fs, Path seedsDir, int count) throws Exception {
+  private void createSeeds(FileSystem fs, Path seedsDir, int count)
+      throws Exception {
     OutputStream os = fs.create(new Path(seedsDir, "seeds"));
     for (int i = 0; i < count; i++) {
       String url = "http://www.test-" + i + ".com/\r\n";
@@ -61,7 +62,7 @@
   }
 
   public static final class BenchmarkResults {
-    Map<String,Map<String,Long>> timings = new HashMap<String,Map<String,Long>>();
+    Map<String, Map<String, Long>> timings = new HashMap<String, Map<String, Long>>();
     List<String> runs = new ArrayList<String>();
     List<String> stages = new ArrayList<String>();
     int seeds, depth, threads;
@@ -76,9 +77,9 @@
       if (!stages.contains(stage)) {
         stages.add(stage);
       }
-      Map<String,Long> t = timings.get(stage);
+      Map<String, Long> t = timings.get(stage);
       if (t == null) {
-        t = new HashMap<String,Long>();
+        t = new HashMap<String, Long>();
         timings.put(stage, t);
       }
       t.put(run, timing);
@@ -94,8 +95,9 @@
       sb.append("* TopN:\t" + topN + "\n");
       sb.append("* TOTAL ELAPSED:\t" + elapsed + "\n");
       for (String stage : stages) {
-        Map<String,Long> timing = timings.get(stage);
-        if (timing == null) continue;
+        Map<String, Long> timing = timings.get(stage);
+        if (timing == null)
+          continue;
         sb.append("- stage: " + stage + "\n");
         for (String r : runs) {
           Long Time = timing.get(r);
@@ -111,6 +113,7 @@
     public List<String> getStages() {
       return stages;
     }
+
     public List<String> getRuns() {
       return runs;
     }
@@ -121,21 +124,28 @@
     int seeds = 1;
     int depth = 10;
     int threads = 10;
-    //boolean delete = true;
+    // boolean delete = true;
     long topN = Long.MAX_VALUE;
 
     if (args.length == 0) {
-      System.err.println("Usage: Benchmark [-crawlId <id>] [-seeds NN] [-depth NN] [-threads NN] [-maxPerHost NN] [-plugins <regex>]");
-      System.err.println("\t-crawlId id\t the id to prefix the schemas to operate on, (default: storage.crawl.id)");
-      System.err.println("\t-seeds NN\tcreate NN unique hosts in a seed list (default: 1)");
+      System.err
+          .println("Usage: Benchmark [-crawlId <id>] [-seeds NN] [-depth NN] [-threads NN] [-maxPerHost NN] [-plugins <regex>]");
+      System.err
+          .println("\t-crawlId id\t the id to prefix the schemas to operate on, (default: storage.crawl.id)");
+      System.err
+          .println("\t-seeds NN\tcreate NN unique hosts in a seed list (default: 1)");
       System.err.println("\t-depth NN\tperform NN crawl cycles (default: 10)");
-      System.err.println("\t-threads NN\tuse NN threads per Fetcher task (default: 10)");
+      System.err
+          .println("\t-threads NN\tuse NN threads per Fetcher task (default: 10)");
       // XXX what is the equivalent here? not an additional job...
       // System.err.println("\t-keep\tkeep segment data (default: delete after updatedb)");
       System.err.println("\t-plugins <regex>\toverride 'plugin.includes'.");
-      System.err.println("\tNOTE: if not specified, this is reset to: " + plugins);
-      System.err.println("\tNOTE: if 'default' is specified then a value set in nutch-default/nutch-site is used.");
-      System.err.println("\t-maxPerHost NN\tmax. # of URLs per host in a fetchlist");
+      System.err.println("\tNOTE: if not specified, this is reset to: "
+          + plugins);
+      System.err
+          .println("\tNOTE: if 'default' is specified then a value set in nutch-default/nutch-site is used.");
+      System.err
+          .println("\t-maxPerHost NN\tmax. # of URLs per host in a fetchlist");
       return -1;
     }
     int maxPerHost = Integer.MAX_VALUE;
@@ -157,13 +167,14 @@
         return -1;
       }
     }
-    BenchmarkResults res = benchmark(seeds, depth, threads, maxPerHost, topN, plugins);
+    BenchmarkResults res = benchmark(seeds, depth, threads, maxPerHost, topN,
+        plugins);
     System.out.println(res);
     return 0;
   }
 
-  public BenchmarkResults benchmark(int seeds, int depth, int threads, int maxPerHost,
-        long topN, String plugins) throws Exception {
+  public BenchmarkResults benchmark(int seeds, int depth, int threads,
+      int maxPerHost, long topN, String plugins) throws Exception {
     Configuration conf = getConf();
     conf.set("http.proxy.host", "localhost");
     conf.setInt("http.proxy.port", 8181);
@@ -173,11 +184,12 @@
       conf.set("plugin.includes", plugins);
     }
     conf.setInt(GeneratorJob.GENERATOR_MAX_COUNT, maxPerHost);
-    conf.set(GeneratorJob.GENERATOR_COUNT_MODE, GeneratorJob.GENERATOR_COUNT_VALUE_HOST);
+    conf.set(GeneratorJob.GENERATOR_COUNT_MODE,
+        GeneratorJob.GENERATOR_COUNT_VALUE_HOST);
     Job job = new NutchJob(conf);
     FileSystem fs = FileSystem.get(job.getConfiguration());
-    Path dir = new Path(getConf().get("hadoop.tmp.dir"),
-            "bench-" + System.currentTimeMillis());
+    Path dir = new Path(getConf().get("hadoop.tmp.dir"), "bench-"
+        + System.currentTimeMillis());
     fs.mkdirs(dir);
     Path rootUrlDir = new Path(dir, "seed");
     fs.mkdirs(rootUrlDir);
@@ -204,7 +216,7 @@
     ParserJob parseSegment = new ParserJob(conf);
     DbUpdaterJob crawlDbTool = new DbUpdaterJob(conf);
     // not needed in the new API
-    //LinkDb linkDbTool = new LinkDb(getConf());
+    // LinkDb linkDbTool = new LinkDb(getConf());
 
     long start = System.currentTimeMillis();
     // initialize crawlDb
@@ -212,10 +224,10 @@
     long delta = System.currentTimeMillis() - start;
     res.addTiming("inject", "0", delta);
     int i;
-    for (i = 0; i < depth; i++) {             // generate new segment
+    for (i = 0; i < depth; i++) { // generate new segment
       start = System.currentTimeMillis();
       String batchId = generator.generate(topN, System.currentTimeMillis(),
-              false, false);
+          false, false);
       delta = System.currentTimeMillis() - start;
       res.addTiming("generate", i + "", delta);
       if (batchId == null) {
@@ -224,12 +236,12 @@
       }
       boolean isParsing = getConf().getBoolean("fetcher.parse", true);
       start = System.currentTimeMillis();
-      fetcher.fetch(batchId, threads, false, isParsing, -1);  // fetch it
+      fetcher.fetch(batchId, threads, false, isParsing, -1); // fetch it
       delta = System.currentTimeMillis() - start;
       res.addTiming("fetch", i + "", delta);
       if (!isParsing) {
         start = System.currentTimeMillis();
-        parseSegment.parse(batchId, false, false);    // parse it, if needed
+        parseSegment.parse(batchId, false, false); // parse it, if needed
         delta = System.currentTimeMillis() - start;
         res.addTiming("parse", i + "", delta);
       }
@@ -241,7 +253,9 @@
     if (i == 0) {
       LOG.warn("No URLs to fetch - check your seed list and URL filters.");
     }
-    if (LOG.isInfoEnabled()) { LOG.info("crawl finished: " + dir); }
+    if (LOG.isInfoEnabled()) {
+      LOG.info("crawl finished: " + dir);
+    }
     res.elapsed = System.currentTimeMillis() - res.elapsed;
     WebTableReader dbreader = new WebTableReader();
     dbreader.setConf(conf);
Index: src/java/org/apache/nutch/protocol/RobotRules.java
===================================================================
--- src/java/org/apache/nutch/protocol/RobotRules.java	(revision 1188268)
+++ src/java/org/apache/nutch/protocol/RobotRules.java	(working copy)
@@ -35,9 +35,8 @@
   public long getCrawlDelay();
 
   /**
-   * Returns <code>false</code> if the <code>robots.txt</code> file
-   * prohibits us from accessing the given <code>url</code>, or
-   * <code>true</code> otherwise.
+   * Returns <code>false</code> if the <code>robots.txt</code> file prohibits us
+   * from accessing the given <code>url</code>, or <code>true</code> otherwise.
    */
   public boolean isAllowed(URL url);
 
Index: src/java/org/apache/nutch/protocol/Protocol.java
===================================================================
--- src/java/org/apache/nutch/protocol/Protocol.java	(revision 1188268)
+++ src/java/org/apache/nutch/protocol/Protocol.java	(working copy)
@@ -23,7 +23,7 @@
 import org.apache.nutch.plugin.FieldPluggable;
 import org.apache.nutch.storage.WebPage;
 
-/** A retriever of url content.  Implemented by protocol extensions. */
+/** A retriever of url content. Implemented by protocol extensions. */
 public interface Protocol extends FieldPluggable, Configurable {
   /** The name of the extension point. */
   public final static String X_POINT_ID = Protocol.class.getName();
@@ -46,13 +46,16 @@
    */
   public final static String CHECK_ROBOTS = "protocol.plugin.check.robots";
 
-  /** Returns the {@link Content} for a fetchlist entry.
+  /**
+   * Returns the {@link Content} for a fetchlist entry.
    */
   ProtocolOutput getProtocolOutput(String url, WebPage page);
 
   /**
    * Retrieve robot rules applicable for this url.
-   * @param url url to check
+   * 
+   * @param url
+   *          url to check
    * @param page
    * @return robot rules (specific for this url or default), never null
    */
Index: src/java/org/apache/nutch/protocol/ProtocolOutput.java
===================================================================
--- src/java/org/apache/nutch/protocol/ProtocolOutput.java	(revision 1188268)
+++ src/java/org/apache/nutch/protocol/ProtocolOutput.java	(working copy)
@@ -17,10 +17,10 @@
 
 package org.apache.nutch.protocol;
 
-
 /**
- * Simple aggregate to pass from protocol plugins both content and
- * protocol status.
+ * Simple aggregate to pass from protocol plugins both content and protocol
+ * status.
+ * 
  * @author Andrzej Bialecki &lt;ab@getopt.org&gt;
  */
 public class ProtocolOutput {
Index: src/java/org/apache/nutch/protocol/ProtocolStatusCodes.java
===================================================================
--- src/java/org/apache/nutch/protocol/ProtocolStatusCodes.java	(revision 1188268)
+++ src/java/org/apache/nutch/protocol/ProtocolStatusCodes.java	(working copy)
@@ -19,38 +19,42 @@
 public interface ProtocolStatusCodes {
 
   /** Content was retrieved without errors. */
-  public static final int SUCCESS              = 1;
+  public static final int SUCCESS = 1;
   /** Content was not retrieved. Any further errors may be indicated in args. */
-  public static final int FAILED               = 2;
+  public static final int FAILED = 2;
 
-  /** This protocol was not found.  Application may attempt to retry later. */
-  public static final int PROTO_NOT_FOUND      = 10;
+  /** This protocol was not found. Application may attempt to retry later. */
+  public static final int PROTO_NOT_FOUND = 10;
   /** Resource is gone. */
-  public static final int GONE                 = 11;
+  public static final int GONE = 11;
   /** Resource has moved permanently. New url should be found in args. */
-  public static final int MOVED                = 12;
+  public static final int MOVED = 12;
   /** Resource has moved temporarily. New url should be found in args. */
-  public static final int TEMP_MOVED           = 13;
+  public static final int TEMP_MOVED = 13;
   /** Resource was not found. */
-  public static final int NOTFOUND             = 14;
+  public static final int NOTFOUND = 14;
   /** Temporary failure. Application may retry immediately. */
-  public static final int RETRY                = 15;
-  /** Unspecified exception occured. Further information may be provided in args. */
-  public static final int EXCEPTION            = 16;
+  public static final int RETRY = 15;
+  /**
+   * Unspecified exception occured. Further information may be provided in args.
+   */
+  public static final int EXCEPTION = 16;
   /** Access denied - authorization required, but missing/incorrect. */
-  public static final int ACCESS_DENIED        = 17;
+  public static final int ACCESS_DENIED = 17;
   /** Access denied by robots.txt rules. */
-  public static final int ROBOTS_DENIED        = 18;
+  public static final int ROBOTS_DENIED = 18;
   /** Too many redirects. */
-  public static final int REDIR_EXCEEDED       = 19;
+  public static final int REDIR_EXCEEDED = 19;
   /** Not fetching. */
-  public static final int NOTFETCHING          = 20;
+  public static final int NOTFETCHING = 20;
   /** Unchanged since the last fetch. */
-  public static final int NOTMODIFIED          = 21;
-  /** Request was refused by protocol plugins, because it would block.
-   * The expected number of milliseconds to wait before retry may be provided
-   * in args. */
-  public static final int WOULDBLOCK           = 22;
+  public static final int NOTMODIFIED = 21;
+  /**
+   * Request was refused by protocol plugins, because it would block. The
+   * expected number of milliseconds to wait before retry may be provided in
+   * args.
+   */
+  public static final int WOULDBLOCK = 22;
   /** Thread was blocked http.max.delays times during fetching. */
-  public static final int BLOCKED              = 23;
+  public static final int BLOCKED = 23;
 }
Index: src/java/org/apache/nutch/protocol/ProtocolStatusUtils.java
===================================================================
--- src/java/org/apache/nutch/protocol/ProtocolStatusUtils.java	(revision 1188268)
+++ src/java/org/apache/nutch/protocol/ProtocolStatusUtils.java	(working copy)
@@ -99,7 +99,7 @@
     }
     return TableUtil.toString(args.iterator().next());
   }
-  
+
   public static String toString(ProtocolStatus status) {
     if (status == null) {
       return "(null)";
@@ -112,7 +112,8 @@
       int i = 0;
       Iterator<Utf8> it = args.iterator();
       while (it.hasNext()) {
-        if (i > 0) sb.append(',');
+        if (i > 0)
+          sb.append(',');
         sb.append(it.next());
         i++;
       }
Index: src/java/org/apache/nutch/protocol/ProtocolFactory.java
===================================================================
--- src/java/org/apache/nutch/protocol/ProtocolFactory.java	(revision 1188268)
+++ src/java/org/apache/nutch/protocol/ProtocolFactory.java	(working copy)
@@ -41,7 +41,8 @@
  */
 public class ProtocolFactory {
 
-  public static final Logger LOG = LoggerFactory.getLogger(ProtocolFactory.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(ProtocolFactory.class);
 
   private final ExtensionPoint extensionPoint;
 
@@ -59,10 +60,11 @@
 
   /**
    * Returns the appropriate {@link Protocol} implementation for a url.
-   *
+   * 
    * @param urlString
    *          Url String
-   * @return The appropriate {@link Protocol} implementation for a given {@link URL}.
+   * @return The appropriate {@link Protocol} implementation for a given
+   *         {@link URL}.
    * @throws ProtocolNotFound
    *           when Protocol can not be found for urlString
    */
@@ -109,10 +111,11 @@
     return null;
   }
 
-  boolean contains(String what, String where){
-    String parts[]=where.split("[, ]");
-    for(int i=0;i<parts.length;i++) {
-      if(parts[i].equals(what)) return true;
+  boolean contains(String what, String where) {
+    String parts[] = where.split("[, ]");
+    for (int i = 0; i < parts.length; i++) {
+      if (parts[i].equals(what))
+        return true;
     }
     return false;
   }
Index: src/java/org/apache/nutch/protocol/Content.java
===================================================================
--- src/java/org/apache/nutch/protocol/Content.java	(revision 1188268)
+++ src/java/org/apache/nutch/protocol/Content.java	(working copy)
@@ -42,7 +42,7 @@
 import org.apache.nutch.util.MimeUtil;
 import org.apache.nutch.util.NutchConfiguration;
 
-public final class Content implements Writable{
+public final class Content implements Writable {
 
   public static final String DIR_NAME = "content";
 
@@ -86,7 +86,7 @@
     this.mimeTypes = new MimeUtil(conf);
     this.contentType = getContentType(contentType, url, content);
   }
-  
+
   public Content(String url, String base, byte[] content, String contentType,
       Metadata metadata, MimeUtil mimeTypes) {
 
@@ -142,11 +142,11 @@
       metadata.readFields(in); // read meta data
       break;
     default:
-      throw new VersionMismatchException((byte)2, oldVersion);
+      throw new VersionMismatchException((byte) 2, oldVersion);
     }
 
   }
-  
+
   public final void readFields(DataInput in) throws IOException {
     metadata.clear();
     int sizeOrVersion = in.readInt();
@@ -164,14 +164,14 @@
         metadata.readFields(in);
         break;
       default:
-        throw new VersionMismatchException((byte)VERSION, (byte)version);
+        throw new VersionMismatchException((byte) VERSION, (byte) version);
       }
     } else { // size
       byte[] compressed = new byte[sizeOrVersion];
       in.readFully(compressed, 0, compressed.length);
       ByteArrayInputStream deflated = new ByteArrayInputStream(compressed);
-      DataInput inflater =
-        new DataInputStream(new InflaterInputStream(deflated));
+      DataInput inflater = new DataInputStream(
+          new InflaterInputStream(deflated));
       readFieldsCompressed(inflater);
     }
   }
@@ -205,8 +205,9 @@
     return url;
   }
 
-  /** The base url for relative links contained in the content.
-   * Maybe be different from url if the request redirected.
+  /**
+   * The base url for relative links contained in the content. Maybe be
+   * different from url if the request redirected.
    */
   public String getBaseUrl() {
     return base;
@@ -221,7 +222,9 @@
     this.content = content;
   }
 
-  /** The media type of the retrieved content.
+  /**
+   * The media type of the retrieved content.
+   * 
    * @see <a href="http://www.iana.org/assignments/media-types/">
    *      http://www.iana.org/assignments/media-types/</a>
    */
@@ -277,9 +280,9 @@
       System.out.println("usage:" + usage);
       return;
     }
-    
-    GenericOptionsParser optParser =
-      new GenericOptionsParser(NutchConfiguration.create(), args);
+
+    GenericOptionsParser optParser = new GenericOptionsParser(
+        NutchConfiguration.create(), args);
     String[] argv = optParser.getRemainingArgs();
     Configuration conf = optParser.getConfiguration();
 
Index: src/java/org/apache/nutch/protocol/ProtocolNotFound.java
===================================================================
--- src/java/org/apache/nutch/protocol/ProtocolNotFound.java	(revision 1188268)
+++ src/java/org/apache/nutch/protocol/ProtocolNotFound.java	(working copy)
@@ -22,7 +22,7 @@
   private String url;
 
   public ProtocolNotFound(String url) {
-    this(url, "protocol not found for url="+url);
+    this(url, "protocol not found for url=" + url);
   }
 
   public ProtocolNotFound(String url, String message) {
@@ -30,5 +30,7 @@
     this.url = url;
   }
 
-  public String getUrl() { return url; }
+  public String getUrl() {
+    return url;
+  }
 }
Index: src/java/org/apache/nutch/html/Entities.java
===================================================================
--- src/java/org/apache/nutch/html/Entities.java	(revision 1188268)
+++ src/java/org/apache/nutch/html/Entities.java	(working copy)
@@ -20,28 +20,29 @@
 import java.util.*;
 
 public class Entities {
-  static final Hashtable<String, String> decoder =
-    new Hashtable<String, String>(300);
-  static final String[]  encoder = new String[0x100];
+  static final Hashtable<String, String> decoder = new Hashtable<String, String>(
+      300);
+  static final String[] encoder = new String[0x100];
 
   static final String decode(String entity) {
-    if (entity.charAt(entity.length()-1) == ';')  // remove trailing semicolon
-      entity = entity.substring(0, entity.length()-1);
+    if (entity.charAt(entity.length() - 1) == ';') // remove trailing semicolon
+      entity = entity.substring(0, entity.length() - 1);
     if (entity.charAt(1) == '#') {
       int start = 2;
       int radix = 10;
       if (entity.charAt(2) == 'X' || entity.charAt(2) == 'x') {
-	start++;
-	radix = 16;
+        start++;
+        radix = 16;
       }
-      Character c =
-	new Character((char)Integer.parseInt(entity.substring(start), radix));
+      Character c = new Character((char) Integer.parseInt(
+          entity.substring(start), radix));
       return c.toString();
     } else {
       String s = decoder.get(entity);
       if (s != null)
-	return s;
-      else return "";
+        return s;
+      else
+        return "";
     }
   }
 
@@ -50,280 +51,280 @@
     StringBuffer buffer = new StringBuffer(length * 2);
     for (int i = 0; i < length; i++) {
       char c = s.charAt(i);
-      int j = (int)c;
+      int j = (int) c;
       if (j < 0x100 && encoder[j] != null) {
-	buffer.append(encoder[j]);		  // have a named encoding
-	buffer.append(';');
+        buffer.append(encoder[j]); // have a named encoding
+        buffer.append(';');
       } else if (j < 0x80) {
-	buffer.append(c);			  // use ASCII value
+        buffer.append(c); // use ASCII value
       } else {
-	buffer.append("&#");			  // use numeric encoding
-	buffer.append((int)c);
-	buffer.append(';');
+        buffer.append("&#"); // use numeric encoding
+        buffer.append((int) c);
+        buffer.append(';');
       }
     }
     return buffer.toString();
   }
 
   static final void add(String entity, int value) {
-    decoder.put(entity, (new Character((char)value)).toString());
+    decoder.put(entity, (new Character((char) value)).toString());
     if (value < 0x100)
       encoder[value] = entity;
   }
 
   static {
-    add("&nbsp",   160);
-    add("&iexcl",  161);
-    add("&cent",   162);
-    add("&pound",  163);
+    add("&nbsp", 160);
+    add("&iexcl", 161);
+    add("&cent", 162);
+    add("&pound", 163);
     add("&curren", 164);
-    add("&yen",    165);
+    add("&yen", 165);
     add("&brvbar", 166);
-    add("&sect",   167);
-    add("&uml",    168);
-    add("&copy",   169);
-    add("&ordf",   170);
-    add("&laquo",  171);
-    add("&not",    172);
-    add("&shy",    173);
-    add("&reg",    174);
-    add("&macr",   175);
-    add("&deg",    176);
+    add("&sect", 167);
+    add("&uml", 168);
+    add("&copy", 169);
+    add("&ordf", 170);
+    add("&laquo", 171);
+    add("&not", 172);
+    add("&shy", 173);
+    add("&reg", 174);
+    add("&macr", 175);
+    add("&deg", 176);
     add("&plusmn", 177);
-    add("&sup2",   178);
-    add("&sup3",   179);
-    add("&acute",  180);
-    add("&micro",  181);
-    add("&para",   182);
+    add("&sup2", 178);
+    add("&sup3", 179);
+    add("&acute", 180);
+    add("&micro", 181);
+    add("&para", 182);
     add("&middot", 183);
-    add("&cedil",  184);
-    add("&sup1",   185);
-    add("&ordm",   186);
-    add("&raquo",  187);
+    add("&cedil", 184);
+    add("&sup1", 185);
+    add("&ordm", 186);
+    add("&raquo", 187);
     add("&frac14", 188);
     add("&frac12", 189);
     add("&frac34", 190);
     add("&iquest", 191);
     add("&Agrave", 192);
     add("&Aacute", 193);
-    add("&Acirc",  194);
+    add("&Acirc", 194);
     add("&Atilde", 195);
-    add("&Auml",   196);
-    add("&Aring",  197);
-    add("&AElig",  198);
+    add("&Auml", 196);
+    add("&Aring", 197);
+    add("&AElig", 198);
     add("&Ccedil", 199);
     add("&Egrave", 200);
     add("&Eacute", 201);
-    add("&Ecirc",  202);
-    add("&Euml",   203);
+    add("&Ecirc", 202);
+    add("&Euml", 203);
     add("&Igrave", 204);
     add("&Iacute", 205);
-    add("&Icirc",  206);
-    add("&Iuml",   207);
-    add("&ETH",    208);
+    add("&Icirc", 206);
+    add("&Iuml", 207);
+    add("&ETH", 208);
     add("&Ntilde", 209);
     add("&Ograve", 210);
     add("&Oacute", 211);
-    add("&Ocirc",  212);
+    add("&Ocirc", 212);
     add("&Otilde", 213);
-    add("&Ouml",   214);
-    add("&times",  215);
+    add("&Ouml", 214);
+    add("&times", 215);
     add("&Oslash", 216);
     add("&Ugrave", 217);
     add("&Uacute", 218);
-    add("&Ucirc",  219);
-    add("&Uuml",   220);
+    add("&Ucirc", 219);
+    add("&Uuml", 220);
     add("&Yacute", 221);
-    add("&THORN",  222);
-    add("&szlig",  223);
+    add("&THORN", 222);
+    add("&szlig", 223);
     add("&agrave", 224);
     add("&aacute", 225);
-    add("&acirc",  226);
+    add("&acirc", 226);
     add("&atilde", 227);
-    add("&auml",   228);
-    add("&aring",  229);
-    add("&aelig",  230);
+    add("&auml", 228);
+    add("&aring", 229);
+    add("&aelig", 230);
     add("&ccedil", 231);
     add("&egrave", 232);
     add("&eacute", 233);
-    add("&ecirc",  234);
-    add("&euml",   235);
+    add("&ecirc", 234);
+    add("&euml", 235);
     add("&igrave", 236);
     add("&iacute", 237);
-    add("&icirc",  238);
-    add("&iuml",   239);
-    add("&eth",    240);
+    add("&icirc", 238);
+    add("&iuml", 239);
+    add("&eth", 240);
     add("&ntilde", 241);
     add("&ograve", 242);
     add("&oacute", 243);
-    add("&ocirc",  244);
+    add("&ocirc", 244);
     add("&otilde", 245);
-    add("&ouml",   246);
+    add("&ouml", 246);
     add("&divide", 247);
     add("&oslash", 248);
     add("&ugrave", 249);
     add("&uacute", 250);
-    add("&ucirc",  251);
-    add("&uuml",   252);
+    add("&ucirc", 251);
+    add("&uuml", 252);
     add("&yacute", 253);
-    add("&thorn",  254);
-    add("&yuml",   255);
-    add("&fnof",   402);
-    add("&Alpha",  913);
-    add("&Beta",   914);
-    add("&Gamma",  915);
-    add("&Delta",  916);
-    add("&Epsilon",917);
-    add("&Zeta",   918);
-    add("&Eta",    919);
-    add("&Theta",  920);
-    add("&Iota",   921);
-    add("&Kappa",  922);
+    add("&thorn", 254);
+    add("&yuml", 255);
+    add("&fnof", 402);
+    add("&Alpha", 913);
+    add("&Beta", 914);
+    add("&Gamma", 915);
+    add("&Delta", 916);
+    add("&Epsilon", 917);
+    add("&Zeta", 918);
+    add("&Eta", 919);
+    add("&Theta", 920);
+    add("&Iota", 921);
+    add("&Kappa", 922);
     add("&Lambda", 923);
-    add("&Mu",     924);
-    add("&Nu",     925);
-    add("&Xi",     926);
-    add("&Omicron",927);
-    add("&Pi",     928);
-    add("&Rho",    929);
-    add("&Sigma",  931);
-    add("&Tau",    932);
-    add("&Upsilon",933);
-    add("&Phi",    934);
-    add("&Chi",    935);
-    add("&Psi",    936);
-    add("&Omega",  937);
-    add("&alpha",  945);
-    add("&beta",   946);
-    add("&gamma",  947);
-    add("&delta",  948);
-    add("&epsilon",949);
-    add("&zeta",   950);
-    add("&eta",    951);
-    add("&theta",  952);
-    add("&iota",   953);
-    add("&kappa",  954);
+    add("&Mu", 924);
+    add("&Nu", 925);
+    add("&Xi", 926);
+    add("&Omicron", 927);
+    add("&Pi", 928);
+    add("&Rho", 929);
+    add("&Sigma", 931);
+    add("&Tau", 932);
+    add("&Upsilon", 933);
+    add("&Phi", 934);
+    add("&Chi", 935);
+    add("&Psi", 936);
+    add("&Omega", 937);
+    add("&alpha", 945);
+    add("&beta", 946);
+    add("&gamma", 947);
+    add("&delta", 948);
+    add("&epsilon", 949);
+    add("&zeta", 950);
+    add("&eta", 951);
+    add("&theta", 952);
+    add("&iota", 953);
+    add("&kappa", 954);
     add("&lambda", 955);
-    add("&mu",     956);
-    add("&nu",     957);
-    add("&xi",     958);
-    add("&omicron",959);
-    add("&pi",     960);
-    add("&rho",    961);
+    add("&mu", 956);
+    add("&nu", 957);
+    add("&xi", 958);
+    add("&omicron", 959);
+    add("&pi", 960);
+    add("&rho", 961);
     add("&sigmaf", 962);
-    add("&sigma",  963);
-    add("&tau",    964);
-    add("&upsilon",965);
-    add("&phi",    966);
-    add("&chi",    967);
-    add("&psi",    968);
-    add("&omega",  969);
-    add("&thetasym",977);
-    add("&upsih",  978);
-    add("&piv",    982);
-    add("&bull",   8226);
+    add("&sigma", 963);
+    add("&tau", 964);
+    add("&upsilon", 965);
+    add("&phi", 966);
+    add("&chi", 967);
+    add("&psi", 968);
+    add("&omega", 969);
+    add("&thetasym", 977);
+    add("&upsih", 978);
+    add("&piv", 982);
+    add("&bull", 8226);
     add("&hellip", 8230);
-    add("&prime",  8242);
-    add("&Prime",  8243);
-    add("&oline",  8254);
-    add("&frasl",  8260);
+    add("&prime", 8242);
+    add("&Prime", 8243);
+    add("&oline", 8254);
+    add("&frasl", 8260);
     add("&weierp", 8472);
-    add("&image",  8465);
-    add("&real",   8476);
-    add("&trade",  8482);
-    add("&alefsym",8501);
-    add("&larr",   8592);
-    add("&uarr",   8593);
-    add("&rarr",   8594);
-    add("&darr",   8595);
-    add("&harr",   8596);
-    add("&crarr",  8629);
-    add("&lArr",   8656);
-    add("&uArr",   8657);
-    add("&rArr",   8658);
-    add("&dArr",   8659);
-    add("&hArr",   8660);
+    add("&image", 8465);
+    add("&real", 8476);
+    add("&trade", 8482);
+    add("&alefsym", 8501);
+    add("&larr", 8592);
+    add("&uarr", 8593);
+    add("&rarr", 8594);
+    add("&darr", 8595);
+    add("&harr", 8596);
+    add("&crarr", 8629);
+    add("&lArr", 8656);
+    add("&uArr", 8657);
+    add("&rArr", 8658);
+    add("&dArr", 8659);
+    add("&hArr", 8660);
     add("&forall", 8704);
-    add("&part",   8706);
-    add("&exist",  8707);
-    add("&empty",  8709);
-    add("&nabla",  8711);
-    add("&isin",   8712);
-    add("&notin",  8713);
-    add("&ni",     8715);
-    add("&prod",   8719);
-    add("&sum",    8721);
-    add("&minus",  8722);
+    add("&part", 8706);
+    add("&exist", 8707);
+    add("&empty", 8709);
+    add("&nabla", 8711);
+    add("&isin", 8712);
+    add("&notin", 8713);
+    add("&ni", 8715);
+    add("&prod", 8719);
+    add("&sum", 8721);
+    add("&minus", 8722);
     add("&lowast", 8727);
-    add("&radic",  8730);
-    add("&prop",   8733);
-    add("&infin",  8734);
-    add("&ang",    8736);
-    add("&and",    8743);
-    add("&or",     8744);
-    add("&cap",    8745);
-    add("&cup",    8746);
-    add("&int",    8747);
+    add("&radic", 8730);
+    add("&prop", 8733);
+    add("&infin", 8734);
+    add("&ang", 8736);
+    add("&and", 8743);
+    add("&or", 8744);
+    add("&cap", 8745);
+    add("&cup", 8746);
+    add("&int", 8747);
     add("&there4", 8756);
-    add("&sim",    8764);
-    add("&cong",   8773);
-    add("&asymp",  8776);
-    add("&ne",     8800);
-    add("&equiv",  8801);
-    add("&le",     8804);
-    add("&ge",     8805);
-    add("&sub",    8834);
-    add("&sup",    8835);
-    add("&nsub",   8836);
-    add("&sube",   8838);
-    add("&supe",   8839);
-    add("&oplus",  8853);
+    add("&sim", 8764);
+    add("&cong", 8773);
+    add("&asymp", 8776);
+    add("&ne", 8800);
+    add("&equiv", 8801);
+    add("&le", 8804);
+    add("&ge", 8805);
+    add("&sub", 8834);
+    add("&sup", 8835);
+    add("&nsub", 8836);
+    add("&sube", 8838);
+    add("&supe", 8839);
+    add("&oplus", 8853);
     add("&otimes", 8855);
-    add("&perp",   8869);
-    add("&sdot",   8901);
-    add("&lceil",  8968);
-    add("&rceil",  8969);
+    add("&perp", 8869);
+    add("&sdot", 8901);
+    add("&lceil", 8968);
+    add("&rceil", 8969);
     add("&lfloor", 8970);
     add("&rfloor", 8971);
-    add("&lang",   9001);
-    add("&rang",   9002);
-    add("&loz",    9674);
+    add("&lang", 9001);
+    add("&rang", 9002);
+    add("&loz", 9674);
     add("&spades", 9824);
-    add("&clubs",  9827);
+    add("&clubs", 9827);
     add("&hearts", 9829);
-    add("&diams",  9830);
-    add("&quot",   34);
-    add("&amp",    38);
-    add("&lt",     60);
-    add("&gt",     62);
-    add("&OElig",  338);
-    add("&oelig",  339);
+    add("&diams", 9830);
+    add("&quot", 34);
+    add("&amp", 38);
+    add("&lt", 60);
+    add("&gt", 62);
+    add("&OElig", 338);
+    add("&oelig", 339);
     add("&Scaron", 352);
     add("&scaron", 353);
-    add("&Yuml",   376);
-    add("&circ",   710);
-    add("&tilde",  732);
-    add("&ensp",   8194);
-    add("&emsp",   8195);
+    add("&Yuml", 376);
+    add("&circ", 710);
+    add("&tilde", 732);
+    add("&ensp", 8194);
+    add("&emsp", 8195);
     add("&thinsp", 8201);
-    add("&zwnj",   8204);
-    add("&zwj",    8205);
-    add("&lrm",    8206);
-    add("&rlm",    8207);
-    add("&ndash",  8211);
-    add("&mdash",  8212);
-    add("&lsquo",  8216);
-    add("&rsquo",  8217);
-    add("&sbquo",  8218);
-    add("&ldquo",  8220);
-    add("&rdquo",  8221);
-    add("&bdquo",  8222);
+    add("&zwnj", 8204);
+    add("&zwj", 8205);
+    add("&lrm", 8206);
+    add("&rlm", 8207);
+    add("&ndash", 8211);
+    add("&mdash", 8212);
+    add("&lsquo", 8216);
+    add("&rsquo", 8217);
+    add("&sbquo", 8218);
+    add("&ldquo", 8220);
+    add("&rdquo", 8221);
+    add("&bdquo", 8222);
     add("&dagger", 8224);
     add("&Dagger", 8225);
     add("&permil", 8240);
     add("&lsaquo", 8249);
     add("&rsaquo", 8250);
-    add("&euro",   8364);
+    add("&euro", 8364);
 
   }
 }
Index: src/java/org/apache/nutch/scoring/ScoreDatum.java
===================================================================
--- src/java/org/apache/nutch/scoring/ScoreDatum.java	(revision 1188268)
+++ src/java/org/apache/nutch/scoring/ScoreDatum.java	(working copy)
@@ -34,9 +34,10 @@
   private String url;
   private String anchor;
   private Map<String, byte[]> metaData = new HashMap<String, byte[]>();
-  
-  public ScoreDatum() { }
-  
+
+  public ScoreDatum() {
+  }
+
   public ScoreDatum(float score, String url, String anchor) {
     this.score = score;
     this.url = url;
@@ -49,7 +50,7 @@
     url = Text.readString(in);
     anchor = Text.readString(in);
     metaData.clear();
-    
+
     int size = WritableUtils.readVInt(in);
     for (int i = 0; i < size; i++) {
       String key = Text.readString(in);
@@ -63,30 +64,30 @@
     out.writeFloat(score);
     Text.writeString(out, url);
     Text.writeString(out, anchor);
-    
+
     WritableUtils.writeVInt(out, metaData.size());
     for (Entry<String, byte[]> e : metaData.entrySet()) {
       Text.writeString(out, e.getKey());
       Bytes.writeByteArray(out, e.getValue());
     }
   }
-  
+
   public byte[] getMeta(String key) {
     return metaData.get(key);
   }
-  
+
   public void setMeta(String key, byte[] value) {
     metaData.put(key, value);
   }
-  
+
   public byte[] deleteMeta(String key) {
     return metaData.remove(key);
   }
-  
+
   public float getScore() {
     return score;
   }
-  
+
   public void setScore(float score) {
     this.score = score;
   }
@@ -94,7 +95,7 @@
   public String getUrl() {
     return url;
   }
-  
+
   public void setUrl(String url) {
     this.url = url;
   }
Index: src/java/org/apache/nutch/scoring/ScoringFilterException.java
===================================================================
--- src/java/org/apache/nutch/scoring/ScoringFilterException.java	(revision 1188268)
+++ src/java/org/apache/nutch/scoring/ScoringFilterException.java	(working copy)
@@ -1,19 +1,19 @@
 /*
-* Licensed to the Apache Software Foundation (ASF) under one or more
-* contributor license agreements.  See the NOTICE file distributed with
-* this work for additional information regarding copyright ownership.
-* The ASF licenses this file to You under the Apache License, Version 2.0
-* (the "License"); you may not use this file except in compliance with
-* the License.  You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 package org.apache.nutch.scoring;
 
 /**
Index: src/java/org/apache/nutch/scoring/ScoringFilter.java
===================================================================
--- src/java/org/apache/nutch/scoring/ScoringFilter.java	(revision 1188268)
+++ src/java/org/apache/nutch/scoring/ScoringFilter.java	(working copy)
@@ -26,11 +26,11 @@
 
 /**
  * A contract defining behavior of scoring plugins.
- *
- * A scoring filter will manipulate scoring variables in CrawlDatum and
- * in resulting search indexes. Filters can be chained in a specific order,
- * to provide multi-stage scoring adjustments.
- *
+ * 
+ * A scoring filter will manipulate scoring variables in CrawlDatum and in
+ * resulting search indexes. Filters can be chained in a specific order, to
+ * provide multi-stage scoring adjustments.
+ * 
  * @author Andrzej Bialecki
  */
 public interface ScoringFilter extends Configurable, FieldPluggable {
@@ -39,74 +39,101 @@
 
   /**
    * Set an initial score for newly injected pages. Note: newly injected pages
-   * may have no inlinks, so filter implementations may wish to set this
-   * score to a non-zero value, to give newly injected pages some initial
-   * credit.
-   * @param url url of the page
-   * @param page new page. Filters will modify it in-place.
+   * may have no inlinks, so filter implementations may wish to set this score
+   * to a non-zero value, to give newly injected pages some initial credit.
+   * 
+   * @param url
+   *          url of the page
+   * @param page
+   *          new page. Filters will modify it in-place.
    * @throws ScoringFilterException
    */
-  public void injectedScore(String url, WebPage page) throws ScoringFilterException;
+  public void injectedScore(String url, WebPage page)
+      throws ScoringFilterException;
 
   /**
-   * Set an initial score for newly discovered pages. Note: newly discovered pages
-   * have at least one inlink with its score contribution, so filter implementations
-   * may choose to set initial score to zero (unknown value), and then the inlink
-   * score contribution will set the "real" value of the new page.
-   * @param url url of the page
+   * Set an initial score for newly discovered pages. Note: newly discovered
+   * pages have at least one inlink with its score contribution, so filter
+   * implementations may choose to set initial score to zero (unknown value),
+   * and then the inlink score contribution will set the "real" value of the new
+   * page.
+   * 
+   * @param url
+   *          url of the page
    * @param page
    * @throws ScoringFilterException
    */
-  public void initialScore(String url, WebPage page) throws ScoringFilterException;
+  public void initialScore(String url, WebPage page)
+      throws ScoringFilterException;
 
   /**
-   * This method prepares a sort value for the purpose of sorting and
-   * selecting top N scoring pages during fetchlist generation.
-   * @param url url of the page
-   * @param datum page row. Modifications will be persisted.
-   * @param initSort initial sort value, or a value from previous filters in chain
+   * This method prepares a sort value for the purpose of sorting and selecting
+   * top N scoring pages during fetchlist generation.
+   * 
+   * @param url
+   *          url of the page
+   * @param datum
+   *          page row. Modifications will be persisted.
+   * @param initSort
+   *          initial sort value, or a value from previous filters in chain
    */
-  public float generatorSortValue(String url, WebPage page, float initSort) throws ScoringFilterException;
+  public float generatorSortValue(String url, WebPage page, float initSort)
+      throws ScoringFilterException;
 
   /**
    * Distribute score value from the current page to all its outlinked pages.
-   * @param fromUrl url of the source page
-   * @param row page row
-   * @param scoreData A list of {@link OutlinkedScoreDatum}s for every outlink.
-   * These {@link OutlinkedScoreDatum}s will be passed to
-   * {@link #updateScore(String, OldWebTableRow, List)}
-   * for every outlinked URL.
-   * @param allCount number of all collected outlinks from the source page
+   * 
+   * @param fromUrl
+   *          url of the source page
+   * @param row
+   *          page row
+   * @param scoreData
+   *          A list of {@link OutlinkedScoreDatum}s for every outlink. These
+   *          {@link OutlinkedScoreDatum}s will be passed to
+   *          {@link #updateScore(String, OldWebTableRow, List)} for every
+   *          outlinked URL.
+   * @param allCount
+   *          number of all collected outlinks from the source page
    * @throws ScoringFilterException
    */
-  public void distributeScoreToOutlinks(String fromUrl,
-      WebPage page, Collection<ScoreDatum> scoreData,
-      int allCount) throws ScoringFilterException;
+  public void distributeScoreToOutlinks(String fromUrl, WebPage page,
+      Collection<ScoreDatum> scoreData, int allCount)
+      throws ScoringFilterException;
 
   /**
-   * This method calculates a new score during table update, based on the values contributed
-   * by inlinked pages.
-   * @param url url of the page
+   * This method calculates a new score during table update, based on the values
+   * contributed by inlinked pages.
+   * 
+   * @param url
+   *          url of the page
    * @param page
-   * @param inlinked list of {@link OutlinkedScoreDatum}s for all inlinks pointing to this URL.
+   * @param inlinked
+   *          list of {@link OutlinkedScoreDatum}s for all inlinks pointing to
+   *          this URL.
    * @throws ScoringFilterException
    */
-  public void updateScore(String url, WebPage page, List<ScoreDatum> inlinkedScoreData)
-  throws ScoringFilterException;
+  public void updateScore(String url, WebPage page,
+      List<ScoreDatum> inlinkedScoreData) throws ScoringFilterException;
 
   /**
    * This method calculates a Lucene document boost.
-   * @param url url of the page
-   * @param doc document. NOTE: this already contains all information collected
-   * by indexing filters. Implementations may modify this instance, in order to store/remove
-   * some information.
-   * @param row page row
-   * @param initScore initial boost value for the Lucene document.
-   * @return boost value for the Lucene document. This value is passed as an argument
-   * to the next scoring filter in chain. NOTE: implementations may also express
-   * other scoring strategies by modifying Lucene document directly.
+   * 
+   * @param url
+   *          url of the page
+   * @param doc
+   *          document. NOTE: this already contains all information collected by
+   *          indexing filters. Implementations may modify this instance, in
+   *          order to store/remove some information.
+   * @param row
+   *          page row
+   * @param initScore
+   *          initial boost value for the Lucene document.
+   * @return boost value for the Lucene document. This value is passed as an
+   *         argument to the next scoring filter in chain. NOTE: implementations
+   *         may also express other scoring strategies by modifying Lucene
+   *         document directly.
    * @throws ScoringFilterException
    */
-  public float indexerScore(String url, NutchDocument doc, WebPage page, float initScore)
-  throws ScoringFilterException;
+  public float indexerScore(String url, NutchDocument doc, WebPage page,
+      float initScore) throws ScoringFilterException;
 }
Index: src/java/org/apache/nutch/scoring/ScoringFilters.java
===================================================================
--- src/java/org/apache/nutch/scoring/ScoringFilters.java	(revision 1188268)
+++ src/java/org/apache/nutch/scoring/ScoringFilters.java	(working copy)
@@ -35,7 +35,7 @@
 
 /**
  * Creates and caches {@link ScoringFilter} implementing plugins.
- *
+ * 
  * @author Andrzej Bialecki
  */
 public class ScoringFilters extends Configured implements ScoringFilter {
@@ -46,7 +46,8 @@
     super(conf);
     ObjectCache objectCache = ObjectCache.get(conf);
     String order = conf.get("scoring.filter.order");
-    this.filters = (ScoringFilter[]) objectCache.getObject(ScoringFilter.class.getName());
+    this.filters = (ScoringFilter[]) objectCache.getObject(ScoringFilter.class
+        .getName());
 
     if (this.filters == null) {
       String[] orderedFilters = null;
@@ -55,20 +56,23 @@
       }
 
       try {
-        ExtensionPoint point = PluginRepository.get(conf).getExtensionPoint(ScoringFilter.X_POINT_ID);
-        if (point == null) throw new RuntimeException(ScoringFilter.X_POINT_ID + " not found.");
+        ExtensionPoint point = PluginRepository.get(conf).getExtensionPoint(
+            ScoringFilter.X_POINT_ID);
+        if (point == null)
+          throw new RuntimeException(ScoringFilter.X_POINT_ID + " not found.");
         Extension[] extensions = point.getExtensions();
-        HashMap<String, ScoringFilter> filterMap =
-          new HashMap<String, ScoringFilter>();
+        HashMap<String, ScoringFilter> filterMap = new HashMap<String, ScoringFilter>();
         for (int i = 0; i < extensions.length; i++) {
           Extension extension = extensions[i];
-          ScoringFilter filter = (ScoringFilter) extension.getExtensionInstance();
+          ScoringFilter filter = (ScoringFilter) extension
+              .getExtensionInstance();
           if (!filterMap.containsKey(filter.getClass().getName())) {
             filterMap.put(filter.getClass().getName(), filter);
           }
         }
         if (orderedFilters == null) {
-          objectCache.setObject(ScoringFilter.class.getName(), filterMap.values().toArray(new ScoringFilter[0]));
+          objectCache.setObject(ScoringFilter.class.getName(), filterMap
+              .values().toArray(new ScoringFilter[0]));
         } else {
           ScoringFilter[] filter = new ScoringFilter[orderedFilters.length];
           for (int i = 0; i < orderedFilters.length; i++) {
@@ -79,14 +83,15 @@
       } catch (PluginRuntimeException e) {
         throw new RuntimeException(e);
       }
-      this.filters = (ScoringFilter[]) objectCache.getObject(ScoringFilter.class.getName());
+      this.filters = (ScoringFilter[]) objectCache
+          .getObject(ScoringFilter.class.getName());
     }
   }
 
   /** Calculate a sort value for Generate. */
   @Override
   public float generatorSortValue(String url, WebPage row, float initSort)
-  throws ScoringFilterException {
+      throws ScoringFilterException {
     for (ScoringFilter filter : filters) {
       initSort = filter.generatorSortValue(url, row, initSort);
     }
@@ -95,7 +100,8 @@
 
   /** Calculate a new initial score, used when adding newly discovered pages. */
   @Override
-  public void initialScore(String url, WebPage row) throws ScoringFilterException {
+  public void initialScore(String url, WebPage row)
+      throws ScoringFilterException {
     for (ScoringFilter filter : filters) {
       filter.initialScore(url, row);
     }
@@ -103,7 +109,8 @@
 
   /** Calculate a new initial score, used when injecting new pages. */
   @Override
-  public void injectedScore(String url, WebPage row) throws ScoringFilterException {
+  public void injectedScore(String url, WebPage row)
+      throws ScoringFilterException {
     for (ScoringFilter filter : filters) {
       filter.injectedScore(url, row);
     }
Index: src/java/org/apache/nutch/storage/ProtocolStatus.java
===================================================================
--- src/java/org/apache/nutch/storage/ProtocolStatus.java	(revision 1188268)
+++ src/java/org/apache/nutch/storage/ProtocolStatus.java	(working copy)
@@ -38,72 +38,111 @@
 
 @SuppressWarnings("all")
 public class ProtocolStatus extends PersistentBase {
-  public static final Schema _SCHEMA = Schema.parse("{\"type\":\"record\",\"name\":\"ProtocolStatus\",\"namespace\":\"org.apache.nutch.storage\",\"fields\":[{\"name\":\"code\",\"type\":\"int\"},{\"name\":\"args\",\"type\":{\"type\":\"array\",\"items\":\"string\"}},{\"name\":\"lastModified\",\"type\":\"long\"}]}");
+  public static final Schema _SCHEMA = Schema
+      .parse("{\"type\":\"record\",\"name\":\"ProtocolStatus\",\"namespace\":\"org.apache.nutch.storage\",\"fields\":[{\"name\":\"code\",\"type\":\"int\"},{\"name\":\"args\",\"type\":{\"type\":\"array\",\"items\":\"string\"}},{\"name\":\"lastModified\",\"type\":\"long\"}]}");
+
   public static enum Field {
-    CODE(0,"code"),
-    ARGS(1,"args"),
-    LAST_MODIFIED(2,"lastModified"),
-    ;
+    CODE(0, "code"), ARGS(1, "args"), LAST_MODIFIED(2, "lastModified"), ;
     private int index;
     private String name;
-    Field(int index, String name) {this.index=index;this.name=name;}
-    public int getIndex() {return index;}
-    public String getName() {return name;}
-    public String toString() {return name;}
+
+    Field(int index, String name) {
+      this.index = index;
+      this.name = name;
+    }
+
+    public int getIndex() {
+      return index;
+    }
+
+    public String getName() {
+      return name;
+    }
+
+    public String toString() {
+      return name;
+    }
   };
-  public static final String[] _ALL_FIELDS = {"code","args","lastModified",};
+
+  public static final String[] _ALL_FIELDS = { "code", "args", "lastModified", };
   static {
     PersistentBase.registerFields(ProtocolStatus.class, _ALL_FIELDS);
   }
   private int code;
   private GenericArray<Utf8> args;
   private long lastModified;
+
   public ProtocolStatus() {
     this(new StateManagerImpl());
   }
+
   public ProtocolStatus(StateManager stateManager) {
     super(stateManager);
     args = new ListGenericArray<Utf8>(getSchema().getField("args").schema());
   }
+
   public ProtocolStatus newInstance(StateManager stateManager) {
     return new ProtocolStatus(stateManager);
   }
-  public Schema getSchema() { return _SCHEMA; }
+
+  public Schema getSchema() {
+    return _SCHEMA;
+  }
+
   public Object get(int _field) {
     switch (_field) {
-    case 0: return code;
-    case 1: return args;
-    case 2: return lastModified;
-    default: throw new AvroRuntimeException("Bad index");
+    case 0:
+      return code;
+    case 1:
+      return args;
+    case 2:
+      return lastModified;
+    default:
+      throw new AvroRuntimeException("Bad index");
     }
   }
-  @SuppressWarnings(value="unchecked")
+
+  @SuppressWarnings(value = "unchecked")
   public void put(int _field, Object _value) {
-    if(isFieldEqual(_field, _value)) return;
+    if (isFieldEqual(_field, _value))
+      return;
     getStateManager().setDirty(this, _field);
     switch (_field) {
-    case 0:code = (Integer)_value; break;
-    case 1:args = (GenericArray<Utf8>)_value; break;
-    case 2:lastModified = (Long)_value; break;
-    default: throw new AvroRuntimeException("Bad index");
+    case 0:
+      code = (Integer) _value;
+      break;
+    case 1:
+      args = (GenericArray<Utf8>) _value;
+      break;
+    case 2:
+      lastModified = (Long) _value;
+      break;
+    default:
+      throw new AvroRuntimeException("Bad index");
     }
   }
+
   public int getCode() {
     return (Integer) get(0);
   }
+
   public void setCode(int value) {
     put(0, value);
   }
+
   public GenericArray<Utf8> getArgs() {
     return (GenericArray<Utf8>) get(1);
   }
+
   public void addToArgs(Utf8 element) {
     getStateManager().setDirty(this, 1);
     args.add(element);
   }
+
   public long getLastModified() {
     return (Long) get(2);
   }
+
   public void setLastModified(long value) {
     put(2, value);
   }
Index: src/java/org/apache/nutch/storage/StorageUtils.java
===================================================================
--- src/java/org/apache/nutch/storage/StorageUtils.java	(revision 1188268)
+++ src/java/org/apache/nutch/storage/StorageUtils.java	(working copy)
@@ -36,17 +36,18 @@
 public class StorageUtils {
 
   @SuppressWarnings("unchecked")
-  public static <K, V extends Persistent> DataStore<K, V> createDataStore(Configuration conf,
-      Class<K> keyClass, Class<V> persistentClass) throws ClassNotFoundException, GoraException {
-    Class<? extends DataStore<K, V>> dataStoreClass =
-      (Class<? extends DataStore<K, V>>) getDataStoreClass(conf);
-    return DataStoreFactory.createDataStore(dataStoreClass,
-            keyClass, persistentClass);
+  public static <K, V extends Persistent> DataStore<K, V> createDataStore(
+      Configuration conf, Class<K> keyClass, Class<V> persistentClass)
+      throws ClassNotFoundException, GoraException {
+    Class<? extends DataStore<K, V>> dataStoreClass = (Class<? extends DataStore<K, V>>) getDataStoreClass(conf);
+    return DataStoreFactory.createDataStore(dataStoreClass, keyClass,
+        persistentClass);
   }
 
   @SuppressWarnings("unchecked")
-  public static <K, V extends Persistent> DataStore<K, V> createWebStore(Configuration conf,
-      Class<K> keyClass, Class<V> persistentClass) throws ClassNotFoundException, GoraException {
+  public static <K, V extends Persistent> DataStore<K, V> createWebStore(
+      Configuration conf, Class<K> keyClass, Class<V> persistentClass)
+      throws ClassNotFoundException, GoraException {
     String schema = conf.get("storage.schema", "webpage");
     String crawlId = conf.get(Nutch.CRAWL_ID_KEY, "");
 
@@ -54,70 +55,69 @@
       schema = crawlId + "_" + schema;
     }
 
-    Class<? extends DataStore<K, V>> dataStoreClass =
-      (Class<? extends DataStore<K, V>>) getDataStoreClass(conf);
-    return DataStoreFactory.createDataStore(dataStoreClass,
-            keyClass, persistentClass, schema);
+    Class<? extends DataStore<K, V>> dataStoreClass = (Class<? extends DataStore<K, V>>) getDataStoreClass(conf);
+    return DataStoreFactory.createDataStore(dataStoreClass, keyClass,
+        persistentClass, schema);
   }
 
   @SuppressWarnings("unchecked")
-  public static <K, V extends Persistent> Class<? extends DataStore<K, V>>
-  getDataStoreClass(Configuration conf)  throws ClassNotFoundException {
-    return (Class<? extends DataStore<K, V>>)
-      Class.forName(conf.get("storage.data.store.class",
-          "org.apache.gora.sql.store.SqlStore"));
+  public static <K, V extends Persistent> Class<? extends DataStore<K, V>> getDataStoreClass(
+      Configuration conf) throws ClassNotFoundException {
+    return (Class<? extends DataStore<K, V>>) Class.forName(conf.get(
+        "storage.data.store.class", "org.apache.gora.sql.store.SqlStore"));
   }
 
   public static <K, V> void initMapperJob(Job job,
-      Collection<WebPage.Field> fields,
-      Class<K> outKeyClass, Class<V> outValueClass,
-      Class<? extends GoraMapper<String, WebPage, K, V>> mapperClass, boolean reuseObjects)
-  throws ClassNotFoundException, IOException {
-    initMapperJob(job, fields, outKeyClass, outValueClass,
-        mapperClass, null, reuseObjects);
+      Collection<WebPage.Field> fields, Class<K> outKeyClass,
+      Class<V> outValueClass,
+      Class<? extends GoraMapper<String, WebPage, K, V>> mapperClass,
+      boolean reuseObjects) throws ClassNotFoundException, IOException {
+    initMapperJob(job, fields, outKeyClass, outValueClass, mapperClass, null,
+        reuseObjects);
   }
 
   public static <K, V> void initMapperJob(Job job,
-      Collection<WebPage.Field> fields,
-      Class<K> outKeyClass, Class<V> outValueClass,
+      Collection<WebPage.Field> fields, Class<K> outKeyClass,
+      Class<V> outValueClass,
       Class<? extends GoraMapper<String, WebPage, K, V>> mapperClass)
-  throws ClassNotFoundException, IOException {
-    initMapperJob(job, fields, outKeyClass, outValueClass,
-        mapperClass, null, true);
+      throws ClassNotFoundException, IOException {
+    initMapperJob(job, fields, outKeyClass, outValueClass, mapperClass, null,
+        true);
   }
 
   public static <K, V> void initMapperJob(Job job,
-      Collection<WebPage.Field> fields,
-      Class<K> outKeyClass, Class<V> outValueClass,
+      Collection<WebPage.Field> fields, Class<K> outKeyClass,
+      Class<V> outValueClass,
       Class<? extends GoraMapper<String, WebPage, K, V>> mapperClass,
       Class<? extends Partitioner<K, V>> partitionerClass)
-  throws ClassNotFoundException, IOException {
-    initMapperJob(job, fields, outKeyClass, outValueClass,
-        mapperClass, partitionerClass, true);
+      throws ClassNotFoundException, IOException {
+    initMapperJob(job, fields, outKeyClass, outValueClass, mapperClass,
+        partitionerClass, true);
   }
 
   public static <K, V> void initMapperJob(Job job,
-      Collection<WebPage.Field> fields,
-      Class<K> outKeyClass, Class<V> outValueClass,
+      Collection<WebPage.Field> fields, Class<K> outKeyClass,
+      Class<V> outValueClass,
       Class<? extends GoraMapper<String, WebPage, K, V>> mapperClass,
       Class<? extends Partitioner<K, V>> partitionerClass, boolean reuseObjects)
-  throws ClassNotFoundException, IOException {
+      throws ClassNotFoundException, IOException {
     DataStore<String, WebPage> store = createWebStore(job.getConfiguration(),
         String.class, WebPage.class);
-    if (store==null) throw new RuntimeException("Could not create datastore");
+    if (store == null)
+      throw new RuntimeException("Could not create datastore");
     Query<String, WebPage> query = store.newQuery();
     query.setFields(toStringArray(fields));
-    GoraMapper.initMapperJob(job, query, store,
-        outKeyClass, outValueClass, mapperClass, partitionerClass, reuseObjects);
+    GoraMapper.initMapperJob(job, query, store, outKeyClass, outValueClass,
+        mapperClass, partitionerClass, reuseObjects);
     GoraOutputFormat.setOutput(job, store, true);
   }
 
   public static <K, V> void initReducerJob(Job job,
       Class<? extends GoraReducer<K, V, String, WebPage>> reducerClass)
-  throws ClassNotFoundException, GoraException {
+      throws ClassNotFoundException, GoraException {
     Configuration conf = job.getConfiguration();
-    DataStore<String, WebPage> store =
-      StorageUtils.createWebStore(conf, String.class, WebPage.class);
+    DataStore<String, WebPage> store = StorageUtils.createWebStore(conf,
+        String.class, WebPage.class);
     GoraReducer.initReducerJob(job, store, reducerClass);
     GoraOutputFormat.setOutput(job, store, true);
   }
Index: src/java/org/apache/nutch/storage/WebTableCreator.java
===================================================================
--- src/java/org/apache/nutch/storage/WebTableCreator.java	(revision 1188268)
+++ src/java/org/apache/nutch/storage/WebTableCreator.java	(working copy)
@@ -21,9 +21,8 @@
 
 public class WebTableCreator {
   public static void main(String[] args) throws Exception {
-    DataStore<String, WebPage> store =
-      StorageUtils.createDataStore(NutchConfiguration.create(), String.class,
-        WebPage.class);
+    DataStore<String, WebPage> store = StorageUtils.createDataStore(
+        NutchConfiguration.create(), String.class, WebPage.class);
 
     System.out.println(store);
   }
Index: src/java/org/apache/nutch/storage/Mark.java
===================================================================
--- src/java/org/apache/nutch/storage/Mark.java	(revision 1188268)
+++ src/java/org/apache/nutch/storage/Mark.java	(working copy)
@@ -19,8 +19,8 @@
 import org.apache.avro.util.Utf8;
 
 public enum Mark {
-  INJECT_MARK("_injmrk_"), GENERATE_MARK("_gnmrk_"), FETCH_MARK("_ftcmrk_"),
-  PARSE_MARK("__prsmrk__"), UPDATEDB_MARK("_updmrk_"), INDEX_MARK("_idxmrk_");
+  INJECT_MARK("_injmrk_"), GENERATE_MARK("_gnmrk_"), FETCH_MARK("_ftcmrk_"), PARSE_MARK(
+      "__prsmrk__"), UPDATEDB_MARK("_updmrk_"), INDEX_MARK("_idxmrk_");
 
   private Utf8 name;
 
Index: src/java/org/apache/nutch/storage/WebPage.java
===================================================================
--- src/java/org/apache/nutch/storage/WebPage.java	(revision 1188268)
+++ src/java/org/apache/nutch/storage/WebPage.java	(working copy)
@@ -38,39 +38,45 @@
 
 @SuppressWarnings("all")
 public class WebPage extends PersistentBase {
-  public static final Schema _SCHEMA = Schema.parse("{\"type\":\"record\",\"name\":\"WebPage\",\"namespace\":\"org.apache.nutch.storage\",\"fields\":[{\"name\":\"baseUrl\",\"type\":\"string\"},{\"name\":\"status\",\"type\":\"int\"},{\"name\":\"fetchTime\",\"type\":\"long\"},{\"name\":\"prevFetchTime\",\"type\":\"long\"},{\"name\":\"fetchInterval\",\"type\":\"int\"},{\"name\":\"retriesSinceFetch\",\"type\":\"int\"},{\"name\":\"modifiedTime\",\"type\":\"long\"},{\"name\":\"protocolStatus\",\"type\":{\"type\":\"record\",\"name\":\"ProtocolStatus\",\"fields\":[{\"name\":\"code\",\"type\":\"int\"},{\"name\":\"args\",\"type\":{\"type\":\"array\",\"items\":\"string\"}},{\"name\":\"lastModified\",\"type\":\"long\"}]}},{\"name\":\"content\",\"type\":\"bytes\"},{\"name\":\"contentType\",\"type\":\"string\"},{\"name\":\"prevSignature\",\"type\":\"bytes\"},{\"name\":\"signature\",\"type\":\"bytes\"},{\"name\":\"title\",\"type\":\"string\"},{\"name\":\"text\",\"type\":\"string\"},{\"name\":\"parseStatus\",\"type\":{\"type\":\"record\",\"name\":\"ParseStatus\",\"fields\":[{\"name\":\"majorCode\",\"type\":\"int\"},{\"name\":\"minorCode\",\"type\":\"int\"},{\"name\":\"args\",\"type\":{\"type\":\"array\",\"items\":\"string\"}}]}},{\"name\":\"score\",\"type\":\"float\"},{\"name\":\"reprUrl\",\"type\":\"string\"},{\"name\":\"headers\",\"type\":{\"type\":\"map\",\"values\":\"string\"}},{\"name\":\"outlinks\",\"type\":{\"type\":\"map\",\"values\":\"string\"}},{\"name\":\"inlinks\",\"type\":{\"type\":\"map\",\"values\":\"string\"}},{\"name\":\"markers\",\"type\":{\"type\":\"map\",\"values\":\"string\"}},{\"name\":\"metadata\",\"type\":{\"type\":\"map\",\"values\":\"bytes\"}}]}");
+  public static final Schema _SCHEMA = Schema
+      .parse("{\"type\":\"record\",\"name\":\"WebPage\",\"namespace\":\"org.apache.nutch.storage\",\"fields\":[{\"name\":\"baseUrl\",\"type\":\"string\"},{\"name\":\"status\",\"type\":\"int\"},{\"name\":\"fetchTime\",\"type\":\"long\"},{\"name\":\"prevFetchTime\",\"type\":\"long\"},{\"name\":\"fetchInterval\",\"type\":\"int\"},{\"name\":\"retriesSinceFetch\",\"type\":\"int\"},{\"name\":\"modifiedTime\",\"type\":\"long\"},{\"name\":\"protocolStatus\",\"type\":{\"type\":\"record\",\"name\":\"ProtocolStatus\",\"fields\":[{\"name\":\"code\",\"type\":\"int\"},{\"name\":\"args\",\"type\":{\"type\":\"array\",\"items\":\"string\"}},{\"name\":\"lastModified\",\"type\":\"long\"}]}},{\"name\":\"content\",\"type\":\"bytes\"},{\"name\":\"contentType\",\"type\":\"string\"},{\"name\":\"prevSignature\",\"type\":\"bytes\"},{\"name\":\"signature\",\"type\":\"bytes\"},{\"name\":\"title\",\"type\":\"string\"},{\"name\":\"text\",\"type\":\"string\"},{\"name\":\"parseStatus\",\"type\":{\"type\":\"record\",\"name\":\"ParseStatus\",\"fields\":[{\"name\":\"majorCode\",\"type\":\"int\"},{\"name\":\"minorCode\",\"type\":\"int\"},{\"name\":\"args\",\"type\":{\"type\":\"array\",\"items\":\"string\"}}]}},{\"name\":\"score\",\"type\":\"float\"},{\"name\":\"reprUrl\",\"type\":\"string\"},{\"name\":\"headers\",\"type\":{\"type\":\"map\",\"values\":\"string\"}},{\"name\":\"outlinks\",\"type\":{\"type\":\"map\",\"values\":\"string\"}},{\"name\":\"inlinks\",\"type\":{\"type\":\"map\",\"values\":\"string\"}},{\"name\":\"markers\",\"type\":{\"type\":\"map\",\"values\":\"string\"}},{\"name\":\"metadata\",\"type\":{\"type\":\"map\",\"values\":\"bytes\"}}]}");
+
   public static enum Field {
-    BASE_URL(0,"baseUrl"),
-    STATUS(1,"status"),
-    FETCH_TIME(2,"fetchTime"),
-    PREV_FETCH_TIME(3,"prevFetchTime"),
-    FETCH_INTERVAL(4,"fetchInterval"),
-    RETRIES_SINCE_FETCH(5,"retriesSinceFetch"),
-    MODIFIED_TIME(6,"modifiedTime"),
-    PROTOCOL_STATUS(7,"protocolStatus"),
-    CONTENT(8,"content"),
-    CONTENT_TYPE(9,"contentType"),
-    PREV_SIGNATURE(10,"prevSignature"),
-    SIGNATURE(11,"signature"),
-    TITLE(12,"title"),
-    TEXT(13,"text"),
-    PARSE_STATUS(14,"parseStatus"),
-    SCORE(15,"score"),
-    REPR_URL(16,"reprUrl"),
-    HEADERS(17,"headers"),
-    OUTLINKS(18,"outlinks"),
-    INLINKS(19,"inlinks"),
-    MARKERS(20,"markers"),
-    METADATA(21,"metadata"),
-    ;
+    BASE_URL(0, "baseUrl"), STATUS(1, "status"), FETCH_TIME(2, "fetchTime"), PREV_FETCH_TIME(
+        3, "prevFetchTime"), FETCH_INTERVAL(4, "fetchInterval"), RETRIES_SINCE_FETCH(
+        5, "retriesSinceFetch"), MODIFIED_TIME(6, "modifiedTime"), PROTOCOL_STATUS(
+        7, "protocolStatus"), CONTENT(8, "content"), CONTENT_TYPE(9,
+        "contentType"), PREV_SIGNATURE(10, "prevSignature"), SIGNATURE(11,
+        "signature"), TITLE(12, "title"), TEXT(13, "text"), PARSE_STATUS(14,
+        "parseStatus"), SCORE(15, "score"), REPR_URL(16, "reprUrl"), HEADERS(
+        17, "headers"), OUTLINKS(18, "outlinks"), INLINKS(19, "inlinks"), MARKERS(
+        20, "markers"), METADATA(21, "metadata"), ;
     private int index;
     private String name;
-    Field(int index, String name) {this.index=index;this.name=name;}
-    public int getIndex() {return index;}
-    public String getName() {return name;}
-    public String toString() {return name;}
+
+    Field(int index, String name) {
+      this.index = index;
+      this.name = name;
+    }
+
+    public int getIndex() {
+      return index;
+    }
+
+    public String getName() {
+      return name;
+    }
+
+    public String toString() {
+      return name;
+    }
   };
-  public static final String[] _ALL_FIELDS = {"baseUrl","status","fetchTime","prevFetchTime","fetchInterval","retriesSinceFetch","modifiedTime","protocolStatus","content","contentType","prevSignature","signature","title","text","parseStatus","score","reprUrl","headers","outlinks","inlinks","markers","metadata",};
+
+  public static final String[] _ALL_FIELDS = { "baseUrl", "status",
+      "fetchTime", "prevFetchTime", "fetchInterval", "retriesSinceFetch",
+      "modifiedTime", "protocolStatus", "content", "contentType",
+      "prevSignature", "signature", "title", "text", "parseStatus", "score",
+      "reprUrl", "headers", "outlinks", "inlinks", "markers", "metadata", };
   static {
     PersistentBase.registerFields(WebPage.class, _ALL_FIELDS);
   }
@@ -91,262 +97,413 @@
   private ParseStatus parseStatus;
   private float score;
   private Utf8 reprUrl;
-  private Map<Utf8,Utf8> headers;
-  private Map<Utf8,Utf8> outlinks;
-  private Map<Utf8,Utf8> inlinks;
-  private Map<Utf8,Utf8> markers;
-  private Map<Utf8,ByteBuffer> metadata;
+  private Map<Utf8, Utf8> headers;
+  private Map<Utf8, Utf8> outlinks;
+  private Map<Utf8, Utf8> inlinks;
+  private Map<Utf8, Utf8> markers;
+  private Map<Utf8, ByteBuffer> metadata;
+
   public WebPage() {
     this(new StateManagerImpl());
   }
+
   public WebPage(StateManager stateManager) {
     super(stateManager);
-    headers = new StatefulHashMap<Utf8,Utf8>();
-    outlinks = new StatefulHashMap<Utf8,Utf8>();
-    inlinks = new StatefulHashMap<Utf8,Utf8>();
-    markers = new StatefulHashMap<Utf8,Utf8>();
-    metadata = new StatefulHashMap<Utf8,ByteBuffer>();
+    headers = new StatefulHashMap<Utf8, Utf8>();
+    outlinks = new StatefulHashMap<Utf8, Utf8>();
+    inlinks = new StatefulHashMap<Utf8, Utf8>();
+    markers = new StatefulHashMap<Utf8, Utf8>();
+    metadata = new StatefulHashMap<Utf8, ByteBuffer>();
   }
+
   public WebPage newInstance(StateManager stateManager) {
     return new WebPage(stateManager);
   }
-  public Schema getSchema() { return _SCHEMA; }
+
+  public Schema getSchema() {
+    return _SCHEMA;
+  }
+
   public Object get(int _field) {
     switch (_field) {
-    case 0: return baseUrl;
-    case 1: return status;
-    case 2: return fetchTime;
-    case 3: return prevFetchTime;
-    case 4: return fetchInterval;
-    case 5: return retriesSinceFetch;
-    case 6: return modifiedTime;
-    case 7: return protocolStatus;
-    case 8: return content;
-    case 9: return contentType;
-    case 10: return prevSignature;
-    case 11: return signature;
-    case 12: return title;
-    case 13: return text;
-    case 14: return parseStatus;
-    case 15: return score;
-    case 16: return reprUrl;
-    case 17: return headers;
-    case 18: return outlinks;
-    case 19: return inlinks;
-    case 20: return markers;
-    case 21: return metadata;
-    default: throw new AvroRuntimeException("Bad index");
+    case 0:
+      return baseUrl;
+    case 1:
+      return status;
+    case 2:
+      return fetchTime;
+    case 3:
+      return prevFetchTime;
+    case 4:
+      return fetchInterval;
+    case 5:
+      return retriesSinceFetch;
+    case 6:
+      return modifiedTime;
+    case 7:
+      return protocolStatus;
+    case 8:
+      return content;
+    case 9:
+      return contentType;
+    case 10:
+      return prevSignature;
+    case 11:
+      return signature;
+    case 12:
+      return title;
+    case 13:
+      return text;
+    case 14:
+      return parseStatus;
+    case 15:
+      return score;
+    case 16:
+      return reprUrl;
+    case 17:
+      return headers;
+    case 18:
+      return outlinks;
+    case 19:
+      return inlinks;
+    case 20:
+      return markers;
+    case 21:
+      return metadata;
+    default:
+      throw new AvroRuntimeException("Bad index");
     }
   }
-  @SuppressWarnings(value="unchecked")
+
+  @SuppressWarnings(value = "unchecked")
   public void put(int _field, Object _value) {
-    if(isFieldEqual(_field, _value)) return;
+    if (isFieldEqual(_field, _value))
+      return;
     getStateManager().setDirty(this, _field);
     switch (_field) {
-    case 0:baseUrl = (Utf8)_value; break;
-    case 1:status = (Integer)_value; break;
-    case 2:fetchTime = (Long)_value; break;
-    case 3:prevFetchTime = (Long)_value; break;
-    case 4:fetchInterval = (Integer)_value; break;
-    case 5:retriesSinceFetch = (Integer)_value; break;
-    case 6:modifiedTime = (Long)_value; break;
-    case 7:protocolStatus = (ProtocolStatus)_value; break;
-    case 8:content = (ByteBuffer)_value; break;
-    case 9:contentType = (Utf8)_value; break;
-    case 10:prevSignature = (ByteBuffer)_value; break;
-    case 11:signature = (ByteBuffer)_value; break;
-    case 12:title = (Utf8)_value; break;
-    case 13:text = (Utf8)_value; break;
-    case 14:parseStatus = (ParseStatus)_value; break;
-    case 15:score = (Float)_value; break;
-    case 16:reprUrl = (Utf8)_value; break;
-    case 17:headers = (Map<Utf8,Utf8>)_value; break;
-    case 18:outlinks = (Map<Utf8,Utf8>)_value; break;
-    case 19:inlinks = (Map<Utf8,Utf8>)_value; break;
-    case 20:markers = (Map<Utf8,Utf8>)_value; break;
-    case 21:metadata = (Map<Utf8,ByteBuffer>)_value; break;
-    default: throw new AvroRuntimeException("Bad index");
+    case 0:
+      baseUrl = (Utf8) _value;
+      break;
+    case 1:
+      status = (Integer) _value;
+      break;
+    case 2:
+      fetchTime = (Long) _value;
+      break;
+    case 3:
+      prevFetchTime = (Long) _value;
+      break;
+    case 4:
+      fetchInterval = (Integer) _value;
+      break;
+    case 5:
+      retriesSinceFetch = (Integer) _value;
+      break;
+    case 6:
+      modifiedTime = (Long) _value;
+      break;
+    case 7:
+      protocolStatus = (ProtocolStatus) _value;
+      break;
+    case 8:
+      content = (ByteBuffer) _value;
+      break;
+    case 9:
+      contentType = (Utf8) _value;
+      break;
+    case 10:
+      prevSignature = (ByteBuffer) _value;
+      break;
+    case 11:
+      signature = (ByteBuffer) _value;
+      break;
+    case 12:
+      title = (Utf8) _value;
+      break;
+    case 13:
+      text = (Utf8) _value;
+      break;
+    case 14:
+      parseStatus = (ParseStatus) _value;
+      break;
+    case 15:
+      score = (Float) _value;
+      break;
+    case 16:
+      reprUrl = (Utf8) _value;
+      break;
+    case 17:
+      headers = (Map<Utf8, Utf8>) _value;
+      break;
+    case 18:
+      outlinks = (Map<Utf8, Utf8>) _value;
+      break;
+    case 19:
+      inlinks = (Map<Utf8, Utf8>) _value;
+      break;
+    case 20:
+      markers = (Map<Utf8, Utf8>) _value;
+      break;
+    case 21:
+      metadata = (Map<Utf8, ByteBuffer>) _value;
+      break;
+    default:
+      throw new AvroRuntimeException("Bad index");
     }
   }
+
   public Utf8 getBaseUrl() {
     return (Utf8) get(0);
   }
+
   public void setBaseUrl(Utf8 value) {
     put(0, value);
   }
+
   public int getStatus() {
     return (Integer) get(1);
   }
+
   public void setStatus(int value) {
     put(1, value);
   }
+
   public long getFetchTime() {
     return (Long) get(2);
   }
+
   public void setFetchTime(long value) {
     put(2, value);
   }
+
   public long getPrevFetchTime() {
     return (Long) get(3);
   }
+
   public void setPrevFetchTime(long value) {
     put(3, value);
   }
+
   public int getFetchInterval() {
     return (Integer) get(4);
   }
+
   public void setFetchInterval(int value) {
     put(4, value);
   }
+
   public int getRetriesSinceFetch() {
     return (Integer) get(5);
   }
+
   public void setRetriesSinceFetch(int value) {
     put(5, value);
   }
+
   public long getModifiedTime() {
     return (Long) get(6);
   }
+
   public void setModifiedTime(long value) {
     put(6, value);
   }
+
   public ProtocolStatus getProtocolStatus() {
     return (ProtocolStatus) get(7);
   }
+
   public void setProtocolStatus(ProtocolStatus value) {
     put(7, value);
   }
+
   public ByteBuffer getContent() {
     return (ByteBuffer) get(8);
   }
+
   public void setContent(ByteBuffer value) {
     put(8, value);
   }
+
   public Utf8 getContentType() {
     return (Utf8) get(9);
   }
+
   public void setContentType(Utf8 value) {
     put(9, value);
   }
+
   public ByteBuffer getPrevSignature() {
     return (ByteBuffer) get(10);
   }
+
   public void setPrevSignature(ByteBuffer value) {
     put(10, value);
   }
+
   public ByteBuffer getSignature() {
     return (ByteBuffer) get(11);
   }
+
   public void setSignature(ByteBuffer value) {
     put(11, value);
   }
+
   public Utf8 getTitle() {
     return (Utf8) get(12);
   }
+
   public void setTitle(Utf8 value) {
     put(12, value);
   }
+
   public Utf8 getText() {
     return (Utf8) get(13);
   }
+
   public void setText(Utf8 value) {
     put(13, value);
   }
+
   public ParseStatus getParseStatus() {
     return (ParseStatus) get(14);
   }
+
   public void setParseStatus(ParseStatus value) {
     put(14, value);
   }
+
   public float getScore() {
     return (Float) get(15);
   }
+
   public void setScore(float value) {
     put(15, value);
   }
+
   public Utf8 getReprUrl() {
     return (Utf8) get(16);
   }
+
   public void setReprUrl(Utf8 value) {
     put(16, value);
   }
+
   public Map<Utf8, Utf8> getHeaders() {
     return (Map<Utf8, Utf8>) get(17);
   }
+
   public Utf8 getFromHeaders(Utf8 key) {
-    if (headers == null) { return null; }
+    if (headers == null) {
+      return null;
+    }
     return headers.get(key);
   }
+
   public void putToHeaders(Utf8 key, Utf8 value) {
     getStateManager().setDirty(this, 17);
     headers.put(key, value);
   }
+
   public Utf8 removeFromHeaders(Utf8 key) {
-    if (headers == null) { return null; }
+    if (headers == null) {
+      return null;
+    }
     getStateManager().setDirty(this, 17);
     return headers.remove(key);
   }
+
   public Map<Utf8, Utf8> getOutlinks() {
     return (Map<Utf8, Utf8>) get(18);
   }
+
   public Utf8 getFromOutlinks(Utf8 key) {
-    if (outlinks == null) { return null; }
+    if (outlinks == null) {
+      return null;
+    }
     return outlinks.get(key);
   }
+
   public void putToOutlinks(Utf8 key, Utf8 value) {
     getStateManager().setDirty(this, 18);
     outlinks.put(key, value);
   }
+
   public Utf8 removeFromOutlinks(Utf8 key) {
-    if (outlinks == null) { return null; }
+    if (outlinks == null) {
+      return null;
+    }
     getStateManager().setDirty(this, 18);
     return outlinks.remove(key);
   }
+
   public Map<Utf8, Utf8> getInlinks() {
     return (Map<Utf8, Utf8>) get(19);
   }
+
   public Utf8 getFromInlinks(Utf8 key) {
-    if (inlinks == null) { return null; }
+    if (inlinks == null) {
+      return null;
+    }
     return inlinks.get(key);
   }
+
   public void putToInlinks(Utf8 key, Utf8 value) {
     getStateManager().setDirty(this, 19);
     inlinks.put(key, value);
   }
+
   public Utf8 removeFromInlinks(Utf8 key) {
-    if (inlinks == null) { return null; }
+    if (inlinks == null) {
+      return null;
+    }
     getStateManager().setDirty(this, 19);
     return inlinks.remove(key);
   }
+
   public Map<Utf8, Utf8> getMarkers() {
     return (Map<Utf8, Utf8>) get(20);
   }
+
   public Utf8 getFromMarkers(Utf8 key) {
-    if (markers == null) { return null; }
+    if (markers == null) {
+      return null;
+    }
     return markers.get(key);
   }
+
   public void putToMarkers(Utf8 key, Utf8 value) {
     getStateManager().setDirty(this, 20);
     markers.put(key, value);
   }
+
   public Utf8 removeFromMarkers(Utf8 key) {
-    if (markers == null) { return null; }
+    if (markers == null) {
+      return null;
+    }
     getStateManager().setDirty(this, 20);
     return markers.remove(key);
   }
+
   public Map<Utf8, ByteBuffer> getMetadata() {
     return (Map<Utf8, ByteBuffer>) get(21);
   }
+
   public ByteBuffer getFromMetadata(Utf8 key) {
-    if (metadata == null) { return null; }
+    if (metadata == null) {
+      return null;
+    }
     return metadata.get(key);
   }
+
   public void putToMetadata(Utf8 key, ByteBuffer value) {
     getStateManager().setDirty(this, 21);
     metadata.put(key, value);
   }
+
   public ByteBuffer removeFromMetadata(Utf8 key) {
-    if (metadata == null) { return null; }
+    if (metadata == null) {
+      return null;
+    }
     getStateManager().setDirty(this, 21);
     return metadata.remove(key);
   }
Index: src/java/org/apache/nutch/storage/ParseStatus.java
===================================================================
--- src/java/org/apache/nutch/storage/ParseStatus.java	(revision 1188268)
+++ src/java/org/apache/nutch/storage/ParseStatus.java	(working copy)
@@ -38,71 +38,111 @@
 
 @SuppressWarnings("all")
 public class ParseStatus extends PersistentBase {
-  public static final Schema _SCHEMA = Schema.parse("{\"type\":\"record\",\"name\":\"ParseStatus\",\"namespace\":\"org.apache.nutch.storage\",\"fields\":[{\"name\":\"majorCode\",\"type\":\"int\"},{\"name\":\"minorCode\",\"type\":\"int\"},{\"name\":\"args\",\"type\":{\"type\":\"array\",\"items\":\"string\"}}]}");
+  public static final Schema _SCHEMA = Schema
+      .parse("{\"type\":\"record\",\"name\":\"ParseStatus\",\"namespace\":\"org.apache.nutch.storage\",\"fields\":[{\"name\":\"majorCode\",\"type\":\"int\"},{\"name\":\"minorCode\",\"type\":\"int\"},{\"name\":\"args\",\"type\":{\"type\":\"array\",\"items\":\"string\"}}]}");
+
   public static enum Field {
-    MAJOR_CODE(0,"majorCode"),
-    MINOR_CODE(1,"minorCode"),
-    ARGS(2,"args"),
-    ;
+    MAJOR_CODE(0, "majorCode"), MINOR_CODE(1, "minorCode"), ARGS(2, "args"), ;
     private int index;
     private String name;
-    Field(int index, String name) {this.index=index;this.name=name;}
-    public int getIndex() {return index;}
-    public String getName() {return name;}
-    public String toString() {return name;}
+
+    Field(int index, String name) {
+      this.index = index;
+      this.name = name;
+    }
+
+    public int getIndex() {
+      return index;
+    }
+
+    public String getName() {
+      return name;
+    }
+
+    public String toString() {
+      return name;
+    }
   };
-  public static final String[] _ALL_FIELDS = {"majorCode","minorCode","args",};
+
+  public static final String[] _ALL_FIELDS = { "majorCode", "minorCode",
+      "args", };
   static {
     PersistentBase.registerFields(ParseStatus.class, _ALL_FIELDS);
   }
   private int majorCode;
   private int minorCode;
   private GenericArray<Utf8> args;
+
   public ParseStatus() {
     this(new StateManagerImpl());
   }
+
   public ParseStatus(StateManager stateManager) {
     super(stateManager);
     args = new ListGenericArray<Utf8>(getSchema().getField("args").schema());
   }
+
   public ParseStatus newInstance(StateManager stateManager) {
     return new ParseStatus(stateManager);
   }
-  public Schema getSchema() { return _SCHEMA; }
+
+  public Schema getSchema() {
+    return _SCHEMA;
+  }
+
   public Object get(int _field) {
     switch (_field) {
-    case 0: return majorCode;
-    case 1: return minorCode;
-    case 2: return args;
-    default: throw new AvroRuntimeException("Bad index");
+    case 0:
+      return majorCode;
+    case 1:
+      return minorCode;
+    case 2:
+      return args;
+    default:
+      throw new AvroRuntimeException("Bad index");
     }
   }
-  @SuppressWarnings(value="unchecked")
+
+  @SuppressWarnings(value = "unchecked")
   public void put(int _field, Object _value) {
-    if(isFieldEqual(_field, _value)) return;
+    if (isFieldEqual(_field, _value))
+      return;
     getStateManager().setDirty(this, _field);
     switch (_field) {
-    case 0:majorCode = (Integer)_value; break;
-    case 1:minorCode = (Integer)_value; break;
-    case 2:args = (GenericArray<Utf8>)_value; break;
-    default: throw new AvroRuntimeException("Bad index");
+    case 0:
+      majorCode = (Integer) _value;
+      break;
+    case 1:
+      minorCode = (Integer) _value;
+      break;
+    case 2:
+      args = (GenericArray<Utf8>) _value;
+      break;
+    default:
+      throw new AvroRuntimeException("Bad index");
     }
   }
+
   public int getMajorCode() {
     return (Integer) get(0);
   }
+
   public void setMajorCode(int value) {
     put(0, value);
   }
+
   public int getMinorCode() {
     return (Integer) get(1);
   }
+
   public void setMinorCode(int value) {
     put(1, value);
   }
+
   public GenericArray<Utf8> getArgs() {
     return (GenericArray<Utf8>) get(2);
   }
+
   public void addToArgs(Utf8 element) {
     getStateManager().setDirty(this, 2);
     args.add(element);
Index: src/java/org/apache/nutch/net/protocols/ProtocolException.java
===================================================================
--- src/java/org/apache/nutch/net/protocols/ProtocolException.java	(revision 1188268)
+++ src/java/org/apache/nutch/net/protocols/ProtocolException.java	(working copy)
@@ -21,12 +21,12 @@
 
 /**
  * Base exception for all protocol handlers
+ * 
  * @deprecated Use {@link org.apache.nutch.protocol.ProtocolException} instead.
  */
 @SuppressWarnings("serial")
 public class ProtocolException extends Exception implements Serializable {
 
-
   public ProtocolException() {
     super();
   }
Index: src/java/org/apache/nutch/net/protocols/HttpDateFormat.java
===================================================================
--- src/java/org/apache/nutch/net/protocols/HttpDateFormat.java	(revision 1188268)
+++ src/java/org/apache/nutch/net/protocols/HttpDateFormat.java	(working copy)
@@ -26,15 +26,15 @@
 
 /**
  * class to handle HTTP dates.
- *
+ * 
  * Modified from FastHttpDateFormat.java in jakarta-tomcat.
- *
+ * 
  * @author John Xing
  */
 public class HttpDateFormat {
 
-  protected static SimpleDateFormat format = 
-    new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss zzz", Locale.US);
+  protected static SimpleDateFormat format = new SimpleDateFormat(
+      "EEE, dd MMM yyyy HH:mm:ss zzz", Locale.US);
 
   /**
    * HTTP date uses TimeZone GMT
@@ -43,29 +43,29 @@
     format.setTimeZone(TimeZone.getTimeZone("GMT"));
   }
 
-  //HttpDate (long t) {
-  //}
+  // HttpDate (long t) {
+  // }
 
-  //HttpDate (String s) {
-  //}
+  // HttpDate (String s) {
+  // }
 
-//  /**
-//   * Get the current date in HTTP format.
-//   */
-//  public static String getCurrentDate() {
-//
-//    long now = System.currentTimeMillis();
-//    if ((now - currentDateGenerated) > 1000) {
-//        synchronized (format) {
-//            if ((now - currentDateGenerated) > 1000) {
-//                currentDateGenerated = now;
-//                currentDate = format.format(new Date(now));
-//            }
-//        }
-//    }
-//    return currentDate;
-//
-//  }
+  // /**
+  // * Get the current date in HTTP format.
+  // */
+  // public static String getCurrentDate() {
+  //
+  // long now = System.currentTimeMillis();
+  // if ((now - currentDateGenerated) > 1000) {
+  // synchronized (format) {
+  // if ((now - currentDateGenerated) > 1000) {
+  // currentDateGenerated = now;
+  // currentDate = format.format(new Date(now));
+  // }
+  // }
+  // }
+  // return currentDate;
+  //
+  // }
 
   /**
    * Get the HTTP format of the specified date.
Index: src/java/org/apache/nutch/net/protocols/Response.java
===================================================================
--- src/java/org/apache/nutch/net/protocols/Response.java	(revision 1188268)
+++ src/java/org/apache/nutch/net/protocols/Response.java	(working copy)
@@ -23,12 +23,11 @@
 import org.apache.nutch.metadata.HttpHeaders;
 import org.apache.nutch.metadata.Metadata;
 
-
 /**
- * A response inteface.  Makes all protocols model HTTP.
+ * A response inteface. Makes all protocols model HTTP.
  */
 public interface Response extends HttpHeaders {
-  
+
   /** Returns the URL used to retrieve this response. */
   public URL getUrl();
 
@@ -40,7 +39,7 @@
 
   /** Returns all the headers. */
   public Metadata getHeaders();
-  
+
   /** Returns the full content of the response. */
   public byte[] getContent();
 
Index: src/java/org/apache/nutch/net/URLNormalizer.java
===================================================================
--- src/java/org/apache/nutch/net/URLNormalizer.java	(revision 1188268)
+++ src/java/org/apache/nutch/net/URLNormalizer.java	(working copy)
@@ -21,13 +21,17 @@
 
 import org.apache.hadoop.conf.Configurable;
 
-/** Interface used to convert URLs to normal form and optionally perform substitutions */
+/**
+ * Interface used to convert URLs to normal form and optionally perform
+ * substitutions
+ */
 public interface URLNormalizer extends Configurable {
-  
+
   /* Extension ID */
   public static final String X_POINT_ID = URLNormalizer.class.getName();
-  
+
   /* Interface for URL normalization */
-  public String normalize(String urlString, String scope) throws MalformedURLException;
+  public String normalize(String urlString, String scope)
+      throws MalformedURLException;
 
 }
Index: src/java/org/apache/nutch/net/URLFilter.java
===================================================================
--- src/java/org/apache/nutch/net/URLFilter.java	(revision 1188268)
+++ src/java/org/apache/nutch/net/URLFilter.java	(working copy)
@@ -23,17 +23,18 @@
 // Nutch imports
 import org.apache.nutch.plugin.Pluggable;
 
-
 /**
- * Interface used to limit which URLs enter Nutch.
- * Used by the injector and the db updater.
+ * Interface used to limit which URLs enter Nutch. Used by the injector and the
+ * db updater.
  */
 
 public interface URLFilter extends Pluggable, Configurable {
   /** The name of the extension point. */
   public final static String X_POINT_ID = URLFilter.class.getName();
 
-  /* Interface for a filter that transforms a URL: it can pass the
-     original URL through or "delete" the URL by returning null */
+  /*
+   * Interface for a filter that transforms a URL: it can pass the original URL
+   * through or "delete" the URL by returning null
+   */
   public String filter(String urlString);
 }
Index: src/java/org/apache/nutch/net/URLNormalizers.java
===================================================================
--- src/java/org/apache/nutch/net/URLNormalizers.java	(revision 1188268)
+++ src/java/org/apache/nutch/net/URLNormalizers.java	(working copy)
@@ -43,47 +43,63 @@
  * contexts where they are used (note however that they need to be activated
  * first through <tt>plugin.include</tt> property).
  * 
- * <p>There is one global scope defined by default, which consists of all
- * active normalizers. The order in which these normalizers
- * are executed may be defined in "urlnormalizer.order" property, which lists
- * space-separated implementation classes (if this property is missing normalizers
- * will be run in random order). If there are more
- * normalizers activated than explicitly named on this list, the remaining ones
- * will be run in random order after the ones specified on the list are executed.</p>
- * <p>You can define a set of contexts (or scopes) in which normalizers may be
+ * <p>
+ * There is one global scope defined by default, which consists of all active
+ * normalizers. The order in which these normalizers are executed may be defined
+ * in "urlnormalizer.order" property, which lists space-separated implementation
+ * classes (if this property is missing normalizers will be run in random
+ * order). If there are more normalizers activated than explicitly named on this
+ * list, the remaining ones will be run in random order after the ones specified
+ * on the list are executed.
+ * </p>
+ * <p>
+ * You can define a set of contexts (or scopes) in which normalizers may be
  * called. Each scope can have its own list of normalizers (defined in
  * "urlnormalizer.scope.<scope_name>" property) and its own order (defined in
  * "urlnormalizer.order.<scope_name>" property). If any of these properties are
- * missing, default settings are used for the global scope.</p>
- * <p>In case no normalizers are required for any given scope, a
- * <code>org.apache.nutch.net.urlnormalizer.pass.PassURLNormalizer</code> should be used.</p>
- * <p>Each normalizer may further select among many configurations, depending on
- * the scope in which it is called, because the scope name is passed as a parameter
- * to each normalizer. You can also use the same normalizer for many scopes.</p>
- * <p>Several scopes have been defined, and various Nutch tools will attempt using
- * scope-specific normalizers first (and fall back to default config if scope-specific
- * configuration is missing).</p>
- * <p>Normalizers may be run several times, to ensure that modifications introduced
+ * missing, default settings are used for the global scope.
+ * </p>
+ * <p>
+ * In case no normalizers are required for any given scope, a
+ * <code>org.apache.nutch.net.urlnormalizer.pass.PassURLNormalizer</code> should
+ * be used.
+ * </p>
+ * <p>
+ * Each normalizer may further select among many configurations, depending on
+ * the scope in which it is called, because the scope name is passed as a
+ * parameter to each normalizer. You can also use the same normalizer for many
+ * scopes.
+ * </p>
+ * <p>
+ * Several scopes have been defined, and various Nutch tools will attempt using
+ * scope-specific normalizers first (and fall back to default config if
+ * scope-specific configuration is missing).
+ * </p>
+ * <p>
+ * Normalizers may be run several times, to ensure that modifications introduced
  * by normalizers at the end of the list can be further reduced by normalizers
- * executed at the beginning. By default this loop is executed just once - if you want
- * to ensure that all possible combinations have been applied you may want to run
- * this loop up to the number of activated normalizers. This loop count can be configured
- * through <tt>urlnormalizer.loop.count</tt> property. As soon as the url is
- * unchanged the loop will stop and return the result.</p>
+ * executed at the beginning. By default this loop is executed just once - if
+ * you want to ensure that all possible combinations have been applied you may
+ * want to run this loop up to the number of activated normalizers. This loop
+ * count can be configured through <tt>urlnormalizer.loop.count</tt> property.
+ * As soon as the url is unchanged the loop will stop and return the result.
+ * </p>
  * 
  * @author Andrzej Bialecki
  */
 public final class URLNormalizers {
-  
-  /** Default scope. If no scope properties are defined then the configuration for
-   * this scope will be used.
+
+  /**
+   * Default scope. If no scope properties are defined then the configuration
+   * for this scope will be used.
    */
   public static final String SCOPE_DEFAULT = "default";
   /** Scope used by {@link org.apache.nutch.crawl.URLPartitioner}. */
   public static final String SCOPE_PARTITION = "partition";
   /** Scope used by {@link org.apache.nutch.crawl.GeneratorJob}. */
   public static final String SCOPE_GENERATE_HOST_COUNT = "generate_host_count";
-  /** Scope used by {@link org.apache.nutch.fetcher.FetcherJob} when processing
+  /**
+   * Scope used by {@link org.apache.nutch.fetcher.FetcherJob} when processing
    * redirect URLs.
    */
   public static final String SCOPE_FETCHER = "fetcher";
@@ -93,15 +109,18 @@
   public static final String SCOPE_LINKDB = "linkdb";
   /** Scope used by {@link org.apache.nutch.crawl.InjectorJob}. */
   public static final String SCOPE_INJECT = "inject";
-  /** Scope used when constructing new {@link org.apache.nutch.parse.Outlink} instances. */
+  /**
+   * Scope used when constructing new {@link org.apache.nutch.parse.Outlink}
+   * instances.
+   */
   public static final String SCOPE_OUTLINK = "outlink";
-  
 
-  public static final Logger LOG = LoggerFactory.getLogger(URLNormalizers.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(URLNormalizers.class);
 
   /* Empty extension list for caching purposes. */
   private final List<Extension> EMPTY_EXTENSION_LIST = Collections.EMPTY_LIST;
-  
+
   private final URLNormalizer[] EMPTY_NORMALIZERS = new URLNormalizer[0];
 
   private Configuration conf;
@@ -109,37 +128,39 @@
   private ExtensionPoint extensionPoint;
 
   private URLNormalizer[] normalizers;
-  
+
   private int loopCount;
 
   public URLNormalizers(Configuration conf, String scope) {
     this.conf = conf;
     this.extensionPoint = PluginRepository.get(conf).getExtensionPoint(
-            URLNormalizer.X_POINT_ID);
+        URLNormalizer.X_POINT_ID);
     ObjectCache objectCache = ObjectCache.get(conf);
-    
+
     if (this.extensionPoint == null) {
       throw new RuntimeException("x point " + URLNormalizer.X_POINT_ID
-              + " not found.");
+          + " not found.");
     }
 
-    normalizers = (URLNormalizer[])objectCache.getObject(URLNormalizer.X_POINT_ID + "_" + scope);
+    normalizers = (URLNormalizer[]) objectCache
+        .getObject(URLNormalizer.X_POINT_ID + "_" + scope);
     if (normalizers == null) {
       normalizers = getURLNormalizers(scope);
     }
     if (normalizers == EMPTY_NORMALIZERS) {
-      normalizers = (URLNormalizer[])objectCache.getObject(URLNormalizer.X_POINT_ID + "_" + SCOPE_DEFAULT);
+      normalizers = (URLNormalizer[]) objectCache
+          .getObject(URLNormalizer.X_POINT_ID + "_" + SCOPE_DEFAULT);
       if (normalizers == null) {
         normalizers = getURLNormalizers(SCOPE_DEFAULT);
       }
     }
-    
+
     loopCount = conf.getInt("urlnormalizer.loop.count", 1);
   }
 
   /**
-   * Function returns an array of {@link URLNormalizer}s for a given scope,
-   * with a specified order.
+   * Function returns an array of {@link URLNormalizer}s for a given scope, with
+   * a specified order.
    * 
    * @param scope
    *          The scope to return the <code>Array</code> of
@@ -151,13 +172,14 @@
   URLNormalizer[] getURLNormalizers(String scope) {
     List<Extension> extensions = getExtensions(scope);
     ObjectCache objectCache = ObjectCache.get(conf);
-    
+
     if (extensions == EMPTY_EXTENSION_LIST) {
       return EMPTY_NORMALIZERS;
     }
-    
-    List<URLNormalizer> normalizers = new Vector<URLNormalizer>(extensions.size());
 
+    List<URLNormalizer> normalizers = new Vector<URLNormalizer>(
+        extensions.size());
+
     Iterator<Extension> it = extensions.iterator();
     while (it.hasNext()) {
       Extension ext = it.next();
@@ -174,14 +196,13 @@
       } catch (PluginRuntimeException e) {
         e.printStackTrace();
         LOG.warn("URLNormalizers:PluginRuntimeException when "
-                + "initializing url normalizer plugin "
-                + ext.getDescriptor().getPluginId()
-                + " instance in getURLNormalizers "
-                + "function: attempting to continue instantiating plugins");
+            + "initializing url normalizer plugin "
+            + ext.getDescriptor().getPluginId()
+            + " instance in getURLNormalizers "
+            + "function: attempting to continue instantiating plugins");
       }
     }
-    return normalizers.toArray(new URLNormalizer[normalizers
-            .size()]);
+    return normalizers.toArray(new URLNormalizer[normalizers.size()]);
   }
 
   /**
@@ -195,9 +216,8 @@
    */
   private List<Extension> getExtensions(String scope) {
     ObjectCache objectCache = ObjectCache.get(conf);
-    List<Extension> extensions = 
-      (List<Extension>) objectCache.getObject(URLNormalizer.X_POINT_ID + "_x_"
-                                                + scope);
+    List<Extension> extensions = (List<Extension>) objectCache
+        .getObject(URLNormalizer.X_POINT_ID + "_x_" + scope);
 
     // Just compare the reference:
     // if this is the empty list, we know we will find no extension.
@@ -208,11 +228,13 @@
     if (extensions == null) {
       extensions = findExtensions(scope);
       if (extensions != null) {
-        objectCache.setObject(URLNormalizer.X_POINT_ID + "_x_" + scope, extensions);
+        objectCache.setObject(URLNormalizer.X_POINT_ID + "_x_" + scope,
+            extensions);
       } else {
         // Put the empty extension list into cache
         // to remember we don't know any related extension.
-        objectCache.setObject(URLNormalizer.X_POINT_ID + "_x_" + scope, EMPTY_EXTENSION_LIST);
+        objectCache.setObject(URLNormalizer.X_POINT_ID + "_x_" + scope,
+            EMPTY_EXTENSION_LIST);
         extensions = EMPTY_EXTENSION_LIST;
       }
     }
@@ -232,7 +254,8 @@
 
     String[] orders = null;
     String orderlist = conf.get("urlnormalizer.order." + scope);
-    if (orderlist == null) orderlist = conf.get("urlnormalizer.order");
+    if (orderlist == null)
+      orderlist = conf.get("urlnormalizer.order");
     if (orderlist != null && !orderlist.trim().equals("")) {
       orders = orderlist.split("\\s+");
     }
@@ -270,13 +293,17 @@
 
   /**
    * Normalize
-   * @param urlString The URL string to normalize.
-   * @param scope The given scope.
+   * 
+   * @param urlString
+   *          The URL string to normalize.
+   * @param scope
+   *          The given scope.
    * @return A normalized String, using the given <code>scope</code>
-   * @throws MalformedURLException If the given URL string is malformed.
+   * @throws MalformedURLException
+   *           If the given URL string is malformed.
    */
   public String normalize(String urlString, String scope)
-          throws MalformedURLException {
+      throws MalformedURLException {
     // optionally loop several times, and break if no further changes
     String initialString = urlString;
     for (int k = 0; k < loopCount; k++) {
@@ -285,7 +312,8 @@
           return null;
         urlString = this.normalizers[i].normalize(urlString, scope);
       }
-      if (initialString.equals(urlString)) break;
+      if (initialString.equals(urlString))
+        break;
       initialString = urlString;
     }
     return urlString;
Index: src/java/org/apache/nutch/net/URLNormalizerChecker.java
===================================================================
--- src/java/org/apache/nutch/net/URLNormalizerChecker.java	(revision 1188268)
+++ src/java/org/apache/nutch/net/URLNormalizerChecker.java	(working copy)
@@ -36,23 +36,23 @@
   private Configuration conf;
 
   public URLNormalizerChecker(Configuration conf) {
-      this.conf = conf;
+    this.conf = conf;
   }
 
   private void checkOne(String normalizerName, String scope) throws Exception {
     URLNormalizer normalizer = null;
 
-    ExtensionPoint point =
-      PluginRepository.get(conf).getExtensionPoint(URLNormalizer.X_POINT_ID);
+    ExtensionPoint point = PluginRepository.get(conf).getExtensionPoint(
+        URLNormalizer.X_POINT_ID);
 
     if (point == null)
-      throw new RuntimeException(URLNormalizer.X_POINT_ID+" not found.");
+      throw new RuntimeException(URLNormalizer.X_POINT_ID + " not found.");
 
     Extension[] extensions = point.getExtensions();
 
     for (int i = 0; i < extensions.length; i++) {
       Extension extension = extensions[i];
-      normalizer = (URLNormalizer)extension.getExtensionInstance();
+      normalizer = (URLNormalizer) extension.getExtensionInstance();
       if (normalizer.getClass().getName().equals(normalizerName)) {
         break;
       } else {
@@ -61,7 +61,8 @@
     }
 
     if (normalizer == null)
-      throw new RuntimeException("URLNormalizer "+normalizerName+" not found.");
+      throw new RuntimeException("URLNormalizer " + normalizerName
+          + " not found.");
 
     System.out.println("Checking URLNormalizer " + normalizerName);
 
@@ -79,7 +80,7 @@
     BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
     String line;
     URLNormalizers normalizers = new URLNormalizers(conf, scope);
-    while((line = in.readLine()) != null) {
+    while ((line = in.readLine()) != null) {
       String out = normalizers.normalize(line, scope);
       System.out.println(out);
     }
@@ -88,7 +89,7 @@
   public static void main(String[] args) throws Exception {
 
     String usage = "Usage: URLNormalizerChecker [-normalizer <normalizerName>] [-scope <scope>]"
-      + "\n\tscope can be one of: default,partition,generate_host_count,fetcher,crawldb,linkdb,inject,outlink";
+        + "\n\tscope can be one of: default,partition,generate_host_count,fetcher,crawldb,linkdb,inject,outlink";
 
     String normalizerName = null;
     String scope = URLNormalizers.SCOPE_DEFAULT;
@@ -103,7 +104,8 @@
       }
     }
 
-    URLNormalizerChecker checker = new URLNormalizerChecker(NutchConfiguration.create());
+    URLNormalizerChecker checker = new URLNormalizerChecker(
+        NutchConfiguration.create());
     if (normalizerName != null) {
       checker.checkOne(normalizerName, scope);
     } else {
Index: src/java/org/apache/nutch/net/URLFilters.java
===================================================================
--- src/java/org/apache/nutch/net/URLFilters.java	(revision 1188268)
+++ src/java/org/apache/nutch/net/URLFilters.java	(working copy)
@@ -28,7 +28,8 @@
 import org.apache.nutch.util.ObjectCache;
 
 import org.apache.hadoop.conf.Configuration;
-/** Creates and caches {@link URLFilter} implementing plugins.*/
+
+/** Creates and caches {@link URLFilter} implementing plugins. */
 public class URLFilters {
 
   public static final String URLFILTER_ORDER = "urlfilter.order";
@@ -37,7 +38,8 @@
   public URLFilters(Configuration conf) {
     String order = conf.get(URLFILTER_ORDER);
     ObjectCache objectCache = ObjectCache.get(conf);
-    this.filters = (URLFilter[]) objectCache.getObject(URLFilter.class.getName());
+    this.filters = (URLFilter[]) objectCache.getObject(URLFilter.class
+        .getName());
 
     if (this.filters == null) {
       String[] orderedFilters = null;
@@ -60,8 +62,8 @@
           }
         }
         if (orderedFilters == null) {
-          objectCache.setObject(URLFilter.class.getName(), filterMap.values().toArray(
-              new URLFilter[0]));
+          objectCache.setObject(URLFilter.class.getName(), filterMap.values()
+              .toArray(new URLFilter[0]));
         } else {
           ArrayList<URLFilter> filters = new ArrayList<URLFilter>();
           for (int i = 0; i < orderedFilters.length; i++) {
@@ -70,13 +72,14 @@
               filters.add(filter);
             }
           }
-          objectCache.setObject(URLFilter.class.getName(), filters
-              .toArray(new URLFilter[filters.size()]));
+          objectCache.setObject(URLFilter.class.getName(),
+              filters.toArray(new URLFilter[filters.size()]));
         }
       } catch (PluginRuntimeException e) {
         throw new RuntimeException(e);
       }
-      this.filters = (URLFilter[]) objectCache.getObject(URLFilter.class.getName());
+      this.filters = (URLFilter[]) objectCache.getObject(URLFilter.class
+          .getName());
     }
   }
 
Index: src/java/org/apache/nutch/net/URLFilterChecker.java
===================================================================
--- src/java/org/apache/nutch/net/URLFilterChecker.java	(revision 1188268)
+++ src/java/org/apache/nutch/net/URLFilterChecker.java	(working copy)
@@ -38,23 +38,23 @@
   private Configuration conf;
 
   public URLFilterChecker(Configuration conf) {
-      this.conf = conf;
+    this.conf = conf;
   }
 
   private void checkOne(String filterName) throws Exception {
     URLFilter filter = null;
 
-    ExtensionPoint point =
-      PluginRepository.get(conf).getExtensionPoint(URLFilter.X_POINT_ID);
+    ExtensionPoint point = PluginRepository.get(conf).getExtensionPoint(
+        URLFilter.X_POINT_ID);
 
     if (point == null)
-      throw new RuntimeException(URLFilter.X_POINT_ID+" not found.");
+      throw new RuntimeException(URLFilter.X_POINT_ID + " not found.");
 
     Extension[] extensions = point.getExtensions();
 
     for (int i = 0; i < extensions.length; i++) {
       Extension extension = extensions[i];
-      filter = (URLFilter)extension.getExtensionInstance();
+      filter = (URLFilter) extension.getExtensionInstance();
       if (filter.getClass().getName().equals(filterName)) {
         break;
       } else {
@@ -63,19 +63,19 @@
     }
 
     if (filter == null)
-      throw new RuntimeException("Filter "+filterName+" not found.");
+      throw new RuntimeException("Filter " + filterName + " not found.");
 
     // jerome : should we keep this behavior?
-    //if (LogFormatter.hasLoggedSevere())
-    //  throw new RuntimeException("Severe error encountered.");
+    // if (LogFormatter.hasLoggedSevere())
+    // throw new RuntimeException("Severe error encountered.");
 
-    System.out.println("Checking URLFilter "+filterName);
+    System.out.println("Checking URLFilter " + filterName);
 
     BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
     String line;
-    while((line=in.readLine())!=null) {
-      String out=filter.filter(line);
-      if(out!=null) {
+    while ((line = in.readLine()) != null) {
+      String out = filter.filter(line);
+      if (out != null) {
         System.out.print("+");
         System.out.println(out);
       } else {
@@ -90,10 +90,10 @@
 
     BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
     String line;
-    while((line=in.readLine())!=null) {
+    while ((line = in.readLine()) != null) {
       URLFilters filters = new URLFilters(this.conf);
       String out = filters.filter(line);
-      if(out!=null) {
+      if (out != null) {
         System.out.print("+");
         System.out.println(out);
       } else {
Index: src/java/org/apache/nutch/crawl/Crawler.java
===================================================================
--- src/java/org/apache/nutch/crawl/Crawler.java	(revision 1188268)
+++ src/java/org/apache/nutch/crawl/Crawler.java	(working copy)
@@ -45,29 +45,28 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-
 public class Crawler extends NutchTool implements Tool {
   private static final Logger LOG = LoggerFactory.getLogger(Crawler.class);
-  
+
   private boolean cleanSeedDir = false;
   private String tmpSeedDir = null;
-  private HashMap<String,Object> results = new HashMap<String,Object>();
-  private Map<String,Object> status =
-    Collections.synchronizedMap(new HashMap<String,Object>());
+  private HashMap<String, Object> results = new HashMap<String, Object>();
+  private Map<String, Object> status = Collections
+      .synchronizedMap(new HashMap<String, Object>());
   private NutchTool currentTool = null;
   private boolean shouldStop = false;
-  
+
   @Override
-  public Map<String,Object> getStatus() {
+  public Map<String, Object> getStatus() {
     return status;
   }
-  
-  private Map<String,Object> runTool(Class<? extends NutchTool> toolClass,
-      Map<String,Object> args) throws Exception {
-    currentTool = (NutchTool)ReflectionUtils.newInstance(toolClass, getConf());
+
+  private Map<String, Object> runTool(Class<? extends NutchTool> toolClass,
+      Map<String, Object> args) throws Exception {
+    currentTool = (NutchTool) ReflectionUtils.newInstance(toolClass, getConf());
     return currentTool.run(args);
   }
-  
+
   @Override
   public boolean stopJob() throws Exception {
     shouldStop = true;
@@ -87,20 +86,20 @@
   }
 
   @Override
-  public Map<String,Object> run(Map<String, Object> args) throws Exception {
+  public Map<String, Object> run(Map<String, Object> args) throws Exception {
     results.clear();
     status.clear();
-    String crawlId = (String)args.get(Nutch.ARG_CRAWL);
+    String crawlId = (String) args.get(Nutch.ARG_CRAWL);
     if (crawlId != null) {
       getConf().set(Nutch.CRAWL_ID_KEY, crawlId);
     }
     String seedDir = null;
-    String seedList = (String)args.get(Nutch.ARG_SEEDLIST);    
+    String seedList = (String) args.get(Nutch.ARG_SEEDLIST);
     if (seedList != null) { // takes precedence
       String[] seeds = seedList.split("\\s+");
       // create tmp. dir
-      String tmpSeedDir = getConf().get("hadoop.tmp.dir") + "/seed-" +
-        System.currentTimeMillis();
+      String tmpSeedDir = getConf().get("hadoop.tmp.dir") + "/seed-"
+          + System.currentTimeMillis();
       FileSystem fs = FileSystem.get(getConf());
       Path p = new Path(tmpSeedDir);
       fs.mkdirs(p);
@@ -115,22 +114,25 @@
       cleanSeedDir = true;
       seedDir = tmpSeedDir;
     } else {
-      seedDir = (String)args.get(Nutch.ARG_SEEDDIR);
+      seedDir = (String) args.get(Nutch.ARG_SEEDDIR);
     }
-    Integer depth = (Integer)args.get(Nutch.ARG_DEPTH);
-    if (depth == null) depth = 1;
-    Boolean parse = (Boolean)args.get(Nutch.ARG_PARSE);
+    Integer depth = (Integer) args.get(Nutch.ARG_DEPTH);
+    if (depth == null)
+      depth = 1;
+    Boolean parse = (Boolean) args.get(Nutch.ARG_PARSE);
     if (parse == null) {
       parse = getConf().getBoolean(FetcherJob.PARSE_KEY, false);
     }
-    String solrUrl = (String)args.get(Nutch.ARG_SOLR);
+    String solrUrl = (String) args.get(Nutch.ARG_SOLR);
     int onePhase = 3;
-    if (!parse) onePhase++;
+    if (!parse)
+      onePhase++;
     float totalPhases = depth * onePhase;
-    if (seedDir != null) totalPhases++;
+    if (seedDir != null)
+      totalPhases++;
     float phase = 0;
-    Map<String,Object> jobRes = null;
-    LinkedHashMap<String,Object> subTools = new LinkedHashMap<String,Object>();
+    Map<String, Object> jobRes = null;
+    LinkedHashMap<String, Object> subTools = new LinkedHashMap<String, Object>();
     status.put(Nutch.STAT_JOBS, subTools);
     results.put(Nutch.STAT_JOBS, subTools);
     // inject phase
@@ -202,15 +204,17 @@
 
   @Override
   public float getProgress() {
-    Float p = (Float)status.get(Nutch.STAT_PROGRESS);
-    if (p == null) return 0;
+    Float p = (Float) status.get(Nutch.STAT_PROGRESS);
+    if (p == null)
+      return 0;
     return p;
   }
 
   @Override
   public int run(String[] args) throws Exception {
     if (args.length == 0) {
-      System.out.println("Usage: Crawler (<seedDir> | -continue) [-solr <solrURL>] [-threads n] [-depth i] [-topN N]");
+      System.out
+          .println("Usage: Crawler (<seedDir> | -continue) [-solr <solrURL>] [-threads n] [-depth i] [-topN N]");
       return -1;
     }
     // parse most common arguments here
@@ -219,17 +223,17 @@
     int depth = 5;
     long topN = Long.MAX_VALUE;
     String solrUrl = null;
-    
+
     for (int i = 0; i < args.length; i++) {
       if ("-threads".equals(args[i])) {
-        threads = Integer.parseInt(args[i+1]);
+        threads = Integer.parseInt(args[i + 1]);
         i++;
       } else if ("-depth".equals(args[i])) {
-        depth = Integer.parseInt(args[i+1]);
+        depth = Integer.parseInt(args[i + 1]);
         i++;
       } else if ("-topN".equals(args[i])) {
-          topN = Integer.parseInt(args[i+1]);
-          i++;
+        topN = Integer.parseInt(args[i + 1]);
+        i++;
       } else if ("-solr".equals(args[i])) {
         solrUrl = StringUtils.lowerCase(args[i + 1]);
         i++;
@@ -239,16 +243,13 @@
         seedDir = args[i];
       }
     }
-    Map<String,Object> argMap = ToolUtil.toArgMap(
-        Nutch.ARG_THREADS, threads,
-        Nutch.ARG_DEPTH, depth,
-        Nutch.ARG_TOPN, topN,
-        Nutch.ARG_SOLR, solrUrl,
+    Map<String, Object> argMap = ToolUtil.toArgMap(Nutch.ARG_THREADS, threads,
+        Nutch.ARG_DEPTH, depth, Nutch.ARG_TOPN, topN, Nutch.ARG_SOLR, solrUrl,
         Nutch.ARG_SEEDDIR, seedDir);
     run(argMap);
     return 0;
   }
-  
+
   public static void main(String[] args) throws Exception {
     Crawler c = new Crawler();
     Configuration conf = NutchConfiguration.create();
Index: src/java/org/apache/nutch/crawl/GeneratorReducer.java
===================================================================
--- src/java/org/apache/nutch/crawl/GeneratorReducer.java	(revision 1188268)
+++ src/java/org/apache/nutch/crawl/GeneratorReducer.java	(working copy)
@@ -30,14 +30,15 @@
 import org.apache.nutch.util.URLUtil;
 import org.apache.gora.mapreduce.GoraReducer;
 
-/** Reduce class for generate
- *
- * The #reduce() method write a random integer to all generated URLs. This random
- * number is then used by {@link FetcherMapper}.
- *
+/**
+ * Reduce class for generate
+ * 
+ * The #reduce() method write a random integer to all generated URLs. This
+ * random number is then used by {@link FetcherMapper}.
+ * 
  */
-public class GeneratorReducer
-extends GoraReducer<SelectorEntry, WebPage, String, WebPage> {
+public class GeneratorReducer extends
+    GoraReducer<SelectorEntry, WebPage, String, WebPage> {
 
   private long limit;
   private long maxCount;
@@ -80,10 +81,11 @@
   }
 
   @Override
-  protected void setup(Context context)
-      throws IOException, InterruptedException {
+  protected void setup(Context context) throws IOException,
+      InterruptedException {
     Configuration conf = context.getConfiguration();
-    long totalLimit = conf.getLong(GeneratorJob.GENERATOR_TOP_N, Long.MAX_VALUE);
+    long totalLimit = conf
+        .getLong(GeneratorJob.GENERATOR_TOP_N, Long.MAX_VALUE);
     if (totalLimit == Long.MAX_VALUE) {
       limit = Long.MAX_VALUE;
     } else {
@@ -91,8 +93,8 @@
     }
     maxCount = conf.getLong(GeneratorJob.GENERATOR_MAX_COUNT, -2);
     batchId = new Utf8(conf.get(GeneratorJob.BATCH_ID));
-    String countMode =
-      conf.get(GeneratorJob.GENERATOR_COUNT_MODE, GeneratorJob.GENERATOR_COUNT_VALUE_HOST);
+    String countMode = conf.get(GeneratorJob.GENERATOR_COUNT_MODE,
+        GeneratorJob.GENERATOR_COUNT_VALUE_HOST);
     if (countMode.equals(GeneratorJob.GENERATOR_COUNT_VALUE_DOMAIN)) {
       byDomain = true;
     }
Index: src/java/org/apache/nutch/crawl/DbUpdateReducer.java
===================================================================
--- src/java/org/apache/nutch/crawl/DbUpdateReducer.java	(revision 1188268)
+++ src/java/org/apache/nutch/crawl/DbUpdateReducer.java	(working copy)
@@ -36,11 +36,11 @@
 import org.apache.nutch.util.WebPageWritable;
 import org.apache.gora.mapreduce.GoraReducer;
 
-public class DbUpdateReducer
-extends GoraReducer<String, NutchWritable, String, WebPage> {
+public class DbUpdateReducer extends
+    GoraReducer<String, NutchWritable, String, WebPage> {
 
-  public static final String CRAWLDB_ADDITIONS_ALLOWED = "db.update.additions.allowed";	
-	
+  public static final String CRAWLDB_ADDITIONS_ALLOWED = "db.update.additions.allowed";
+
   public static final Logger LOG = DbUpdaterJob.LOG;
 
   private int retryMax;
@@ -51,11 +51,12 @@
   private List<ScoreDatum> inlinkedScoreData = new ArrayList<ScoreDatum>();
 
   @Override
-  protected void setup(Context context) throws IOException, InterruptedException {
+  protected void setup(Context context) throws IOException,
+      InterruptedException {
     Configuration conf = context.getConfiguration();
     retryMax = conf.getInt("db.fetch.retry.max", 3);
     additionsAllowed = conf.getBoolean(CRAWLDB_ADDITIONS_ALLOWED, true);
-    maxInterval = conf.getInt("db.fetch.interval.max", 0 );
+    maxInterval = conf.getInt("db.fetch.interval.max", 0);
     schedule = FetchScheduleFactory.getFetchSchedule(conf);
     scoringFilters = new ScoringFilters(conf);
   }
@@ -99,7 +100,7 @@
       }
     } else {
       if (page.getMetadata().containsKey(FetcherJob.REDIRECT_DISCOVERED)
-            && !page.isReadable(WebPage.Field.STATUS.getIndex())) {
+          && !page.isReadable(WebPage.Field.STATUS.getIndex())) {
         // this row is marked during fetch as the destination of a redirect
         // but does not contain anything else, so we initialize it.
         page.setStatus(CrawlStatus.STATUS_UNFETCHED);
@@ -110,12 +111,12 @@
           page.setScore(0.0f);
         }
       } else { // update row
-        byte status = (byte)page.getStatus();
+        byte status = (byte) page.getStatus();
         switch (status) {
-        case CrawlStatus.STATUS_FETCHED:         // succesful fetch
-        case CrawlStatus.STATUS_REDIR_TEMP:      // successful fetch, redirected
+        case CrawlStatus.STATUS_FETCHED: // succesful fetch
+        case CrawlStatus.STATUS_REDIR_TEMP: // successful fetch, redirected
         case CrawlStatus.STATUS_REDIR_PERM:
-        case CrawlStatus.STATUS_NOTMODIFIED:     // successful fetch, notmodified
+        case CrawlStatus.STATUS_NOTMODIFIED: // successful fetch, notmodified
           int modified = FetchSchedule.STATUS_UNKNOWN;
           if (status == CrawlStatus.STATUS_NOTMODIFIED) {
             modified = FetchSchedule.STATUS_NOTMODIFIED;
@@ -133,8 +134,8 @@
           long prevFetchTime = page.getPrevFetchTime();
           long modifiedTime = page.getModifiedTime();
 
-          schedule.setFetchSchedule(url, page, prevFetchTime, 0L,
-              fetchTime, modifiedTime, modified);
+          schedule.setFetchSchedule(url, page, prevFetchTime, 0L, fetchTime,
+              modifiedTime, modified);
           if (maxInterval < page.getFetchInterval())
             schedule.forceRefetch(url, page, false);
           break;
@@ -163,8 +164,8 @@
     try {
       scoringFilters.updateScore(url, page, inlinkedScoreData);
     } catch (ScoringFilterException e) {
-      LOG.warn("Scoring filters failed with exception " +
-                StringUtils.stringifyException(e));
+      LOG.warn("Scoring filters failed with exception "
+          + StringUtils.stringifyException(e));
     }
 
     // clear markers
Index: src/java/org/apache/nutch/crawl/DbUpdaterJob.java
===================================================================
--- src/java/org/apache/nutch/crawl/DbUpdaterJob.java	(revision 1188268)
+++ src/java/org/apache/nutch/crawl/DbUpdaterJob.java	(working copy)
@@ -45,10 +45,8 @@
 
   public static final Logger LOG = LoggerFactory.getLogger(DbUpdaterJob.class);
 
+  private static final Collection<WebPage.Field> FIELDS = new HashSet<WebPage.Field>();
 
-  private static final Collection<WebPage.Field> FIELDS =
-    new HashSet<WebPage.Field>();
-
   static {
     FIELDS.add(WebPage.Field.OUTLINKS);
     FIELDS.add(WebPage.Field.INLINKS);
@@ -72,22 +70,23 @@
   public DbUpdaterJob(Configuration conf) {
     setConf(conf);
   }
-    
-  public Map<String,Object> run(Map<String,Object> args) throws Exception {
-    String crawlId = (String)args.get(Nutch.ARG_CRAWL);
+
+  public Map<String, Object> run(Map<String, Object> args) throws Exception {
+    String crawlId = (String) args.get(Nutch.ARG_CRAWL);
     numJobs = 1;
     currentJobNum = 0;
     currentJob = new NutchJob(getConf(), "update-table");
     if (crawlId != null) {
       currentJob.getConfiguration().set(Nutch.CRAWL_ID_KEY, crawlId);
     }
-    //job.setBoolean(ALL, updateAll);
+    // job.setBoolean(ALL, updateAll);
     ScoringFilters scoringFilters = new ScoringFilters(getConf());
     HashSet<WebPage.Field> fields = new HashSet<WebPage.Field>(FIELDS);
     fields.addAll(scoringFilters.getFields());
     // TODO: Figure out why this needs to be here
-    currentJob.getConfiguration().setClass("mapred.output.key.comparator.class",
-        StringComparator.class, RawComparator.class);
+    currentJob.getConfiguration().setClass(
+        "mapred.output.key.comparator.class", StringComparator.class,
+        RawComparator.class);
     StorageUtils.initMapperJob(currentJob, fields, String.class,
         NutchWritable.class, DbUpdateMapper.class);
     StorageUtils.initReducerJob(currentJob, DbUpdateReducer.class);
@@ -95,7 +94,7 @@
     ToolUtil.recordJobStatus(null, currentJob, results);
     return results;
   }
-  
+
   private int updateTable(String crawlId) throws Exception {
     LOG.info("DbUpdaterJob: starting");
     run(ToolUtil.toArgMap(Nutch.ARG_CRAWL, crawlId));
@@ -112,7 +111,8 @@
   }
 
   public static void main(String[] args) throws Exception {
-    int res = ToolRunner.run(NutchConfiguration.create(), new DbUpdaterJob(), args);
+    int res = ToolRunner.run(NutchConfiguration.create(), new DbUpdaterJob(),
+        args);
     System.exit(res);
   }
 
Index: src/java/org/apache/nutch/crawl/FetchSchedule.java
===================================================================
--- src/java/org/apache/nutch/crawl/FetchSchedule.java	(revision 1188268)
+++ src/java/org/apache/nutch/crawl/FetchSchedule.java	(working copy)
@@ -24,115 +24,142 @@
 import org.apache.nutch.storage.WebPage;
 
 /**
- * This interface defines the contract for implementations that manipulate
- * fetch times and re-fetch intervals.
- *
+ * This interface defines the contract for implementations that manipulate fetch
+ * times and re-fetch intervals.
+ * 
  * @author Andrzej Bialecki
  */
 public interface FetchSchedule extends Configurable {
 
   /** It is unknown whether page was changed since our last visit. */
-  public static final int STATUS_UNKNOWN       = 0;
+  public static final int STATUS_UNKNOWN = 0;
   /** Page is known to have been modified since our last visit. */
-  public static final int STATUS_MODIFIED      = 1;
+  public static final int STATUS_MODIFIED = 1;
   /** Page is known to remain unmodified since our last visit. */
-  public static final int STATUS_NOTMODIFIED    = 2;
+  public static final int STATUS_NOTMODIFIED = 2;
 
   public static final int SECONDS_PER_DAY = 3600 * 24;
 
   /**
-   * Initialize fetch schedule related data. Implementations should at least
-   * set the <code>fetchTime</code> and <code>fetchInterval</code>. The default
-   * implementation set the <code>fetchTime</code> to now, using the
-   * default <code>fetchInterval</code>.
-   *
-   * @param url URL of the page.
+   * Initialize fetch schedule related data. Implementations should at least set
+   * the <code>fetchTime</code> and <code>fetchInterval</code>. The default
+   * implementation set the <code>fetchTime</code> to now, using the default
+   * <code>fetchInterval</code>.
+   * 
+   * @param url
+   *          URL of the page.
    * @param page
    */
   public void initializeSchedule(String url, WebPage page);
 
   /**
    * Sets the <code>fetchInterval</code> and <code>fetchTime</code> on a
-   * successfully fetched page.
-   * Implementations may use supplied arguments to support different re-fetching
-   * schedules.
-   *
-   * @param url url of the page
+   * successfully fetched page. Implementations may use supplied arguments to
+   * support different re-fetching schedules.
+   * 
+   * @param url
+   *          url of the page
    * @param page
-   * @param prevFetchTime previous value of fetch time, or -1 if not available
-   * @param prevModifiedTime previous value of modifiedTime, or -1 if not available
-   * @param fetchTime the latest time, when the page was recently re-fetched. Most FetchSchedule
-   * implementations should update the value in {@param datum} to something greater than this value.
-   * @param modifiedTime last time the content was modified. This information comes from
-   * the protocol implementations, or is set to < 0 if not available. Most FetchSchedule
-   * implementations should update the value in {@param datum} to this value.
-   * @param state if {@link #STATUS_MODIFIED}, then the content is considered to be "changed" before the
-   * <code>fetchTime</code>, if {@link #STATUS_NOTMODIFIED} then the content is known to be unchanged.
-   * This information may be obtained by comparing page signatures before and after fetching. If this
-   * is set to {@link #STATUS_UNKNOWN}, then it is unknown whether the page was changed; implementations
-   * are free to follow a sensible default behavior.
+   * @param prevFetchTime
+   *          previous value of fetch time, or -1 if not available
+   * @param prevModifiedTime
+   *          previous value of modifiedTime, or -1 if not available
+   * @param fetchTime
+   *          the latest time, when the page was recently re-fetched. Most
+   *          FetchSchedule implementations should update the value in
+   * @param datum
+   *          to something greater than this value.
+   * @param modifiedTime
+   *          last time the content was modified. This information comes from
+   *          the protocol implementations, or is set to < 0 if not available.
+   *          Most FetchSchedule implementations should update the value in
+   * @param datum
+   *          to this value.
+   * @param state
+   *          if {@link #STATUS_MODIFIED}, then the content is considered to be
+   *          "changed" before the <code>fetchTime</code>, if
+   *          {@link #STATUS_NOTMODIFIED} then the content is known to be
+   *          unchanged. This information may be obtained by comparing page
+   *          signatures before and after fetching. If this is set to
+   *          {@link #STATUS_UNKNOWN}, then it is unknown whether the page was
+   *          changed; implementations are free to follow a sensible default
+   *          behavior.
    */
-  public void setFetchSchedule(String url, WebPage page,
-      long prevFetchTime, long prevModifiedTime,
-      long fetchTime, long modifiedTime, int state);
+  public void setFetchSchedule(String url, WebPage page, long prevFetchTime,
+      long prevModifiedTime, long fetchTime, long modifiedTime, int state);
 
   /**
-   * This method specifies how to schedule refetching of pages
-   * marked as GONE. Default implementation increases fetchInterval by 50%,
-   * and if it exceeds the <code>maxInterval</code> it calls
+   * This method specifies how to schedule refetching of pages marked as GONE.
+   * Default implementation increases fetchInterval by 50%, and if it exceeds
+   * the <code>maxInterval</code> it calls
    * {@link #forceRefetch(Text, CrawlDatum, boolean)}.
-   * @param url URL of the page
+   * 
+   * @param url
+   *          URL of the page
    * @param page
    */
-  public void setPageGoneSchedule(String url, WebPage page,
-      long prevFetchTime, long prevModifiedTime, long fetchTime);
+  public void setPageGoneSchedule(String url, WebPage page, long prevFetchTime,
+      long prevModifiedTime, long fetchTime);
 
   /**
-   * This method adjusts the fetch schedule if fetching needs to be
-   * re-tried due to transient errors. The default implementation
-   * sets the next fetch time 1 day in the future and increases the
-   * retry counter.Set
-   * @param url URL of the page
+   * This method adjusts the fetch schedule if fetching needs to be re-tried due
+   * to transient errors. The default implementation sets the next fetch time 1
+   * day in the future and increases the retry counter.Set
+   * 
+   * @param url
+   *          URL of the page
    * @param page
-   * @param prevFetchTime previous fetch time
-   * @param prevModifiedTime previous modified time
-   * @param fetchTime current fetch time
+   * @param prevFetchTime
+   *          previous fetch time
+   * @param prevModifiedTime
+   *          previous modified time
+   * @param fetchTime
+   *          current fetch time
    */
   public void setPageRetrySchedule(String url, WebPage page,
       long prevFetchTime, long prevModifiedTime, long fetchTime);
 
   /**
    * Calculates last fetch time of the given CrawlDatum.
+   * 
    * @return the date as a long.
    */
   public long calculateLastFetchTime(WebPage page);
 
   /**
-   * This method provides information whether the page is suitable for
-   * selection in the current fetchlist. NOTE: a true return value does not
-   * guarantee that the page will be fetched, it just allows it to be
-   * included in the further selection process based on scores. The default
-   * implementation checks <code>fetchTime</code>, if it is higher than the
-   * {@param curTime} it returns false, and true otherwise. It will also
-   * check that fetchTime is not too remote (more than <code>maxInterval</code),
-   * in which case it lowers the interval and returns true.
-   * @param url URL of the page
-   * @param row url's row
-   * @param curTime reference time (usually set to the time when the
-   * fetchlist generation process was started).
+   * This method provides information whether the page is suitable for selection
+   * in the current fetchlist. NOTE: a true return value does not guarantee that
+   * the page will be fetched, it just allows it to be included in the further
+   * selection process based on scores. The default implementation checks
+   * <code>fetchTime</code>, if it is higher than the
+   * 
+   * @param curTime
+   *          it returns false, and true otherwise. It will also check that
+   *          fetchTime is not too remote (more than <code>maxInterval</code),
+   *          in which case it lowers the interval and returns true.
+   * @param url
+   *          URL of the page
+   * @param row
+   *          url's row
+   * @param curTime
+   *          reference time (usually set to the time when the fetchlist
+   *          generation process was started).
    * @return true, if the page should be considered for inclusion in the current
-   * fetchlist, otherwise false.
+   *         fetchlist, otherwise false.
    */
   public boolean shouldFetch(String url, WebPage page, long curTime);
 
   /**
-   * This method resets fetchTime, fetchInterval, modifiedTime and
-   * page signature, so that it forces refetching.
-   * @param url URL of the page
+   * This method resets fetchTime, fetchInterval, modifiedTime and page
+   * signature, so that it forces refetching.
+   * 
+   * @param url
+   *          URL of the page
    * @param page
-   * @param asap if true, force refetch as soon as possible - this sets
-   * the fetchTime to now. If false, force refetch whenever the next fetch
-   * time is set.
+   * @param asap
+   *          if true, force refetch as soon as possible - this sets the
+   *          fetchTime to now. If false, force refetch whenever the next fetch
+   *          time is set.
    */
   public void forceRefetch(String url, WebPage row, boolean asap);
 
Index: src/java/org/apache/nutch/crawl/MD5Signature.java
===================================================================
--- src/java/org/apache/nutch/crawl/MD5Signature.java	(revision 1188268)
+++ src/java/org/apache/nutch/crawl/MD5Signature.java	(working copy)
@@ -24,10 +24,10 @@
 import org.apache.nutch.storage.WebPage;
 
 /**
- * Default implementation of a page signature. It calculates an MD5 hash
- * of the raw binary content of a page. In case there is no content, it
- * calculates a hash from the page's URL.
- *
+ * Default implementation of a page signature. It calculates an MD5 hash of the
+ * raw binary content of a page. In case there is no content, it calculates a
+ * hash from the page's URL.
+ * 
  * @author Andrzej Bialecki &lt;ab@getopt.org&gt;
  */
 public class MD5Signature extends Signature {
@@ -41,7 +41,8 @@
   @Override
   public byte[] calculate(WebPage page) {
     byte[] data = page.getContent().array();
-    if (data == null && page.getBaseUrl()!=null) data = page.getBaseUrl().getBytes();
+    if (data == null && page.getBaseUrl() != null)
+      data = page.getBaseUrl().getBytes();
     return MD5Hash.digest(data).getDigest();
   }
 
Index: src/java/org/apache/nutch/crawl/InjectorJob.java
===================================================================
--- src/java/org/apache/nutch/crawl/InjectorJob.java	(revision 1188268)
+++ src/java/org/apache/nutch/crawl/InjectorJob.java	(working copy)
@@ -54,14 +54,17 @@
 import org.apache.nutch.util.TableUtil;
 import org.apache.nutch.util.ToolUtil;
 
-/** This class takes a flat file of URLs and adds them to the of pages to be
- * crawled.  Useful for bootstrapping the system.
- * The URL files contain one URL per line, optionally followed by custom metadata
- * separated by tabs with the metadata key separated from the corresponding value by '='. <br>
+/**
+ * This class takes a flat file of URLs and adds them to the of pages to be
+ * crawled. Useful for bootstrapping the system. The URL files contain one URL
+ * per line, optionally followed by custom metadata separated by tabs with the
+ * metadata key separated from the corresponding value by '='. <br>
  * Note that some metadata keys are reserved : <br>
  * - <i>nutch.score</i> : allows to set a custom score for a specific URL <br>
- * - <i>nutch.fetchInterval</i> : allows to set a custom fetch interval for a specific URL <br>
- * e.g. http://www.nutch.org/ \t nutch.score=10 \t nutch.fetchInterval=2592000 \t userType=open_source
+ * - <i>nutch.fetchInterval</i> : allows to set a custom fetch interval for a
+ * specific URL <br>
+ * e.g. http://www.nutch.org/ \t nutch.score=10 \t nutch.fetchInterval=2592000
+ * \t userType=open_source
  **/
 public class InjectorJob extends NutchTool implements Tool {
 
@@ -161,32 +164,32 @@
       // now add the metadata
       Iterator<String> keysIter = metadata.keySet().iterator();
       while (keysIter.hasNext()) {
-          String keymd = keysIter.next();
-          String valuemd = metadata.get(keymd);
-          row.putToMetadata(new Utf8(keymd), ByteBuffer.wrap(valuemd.getBytes()));
+        String keymd = keysIter.next();
+        String valuemd = metadata.get(keymd);
+        row.putToMetadata(new Utf8(keymd), ByteBuffer.wrap(valuemd.getBytes()));
       }
 
       if (customScore != -1)
-    	  row.setScore(customScore);
+        row.setScore(customScore);
       else
-    	  row.setScore(scoreInjected);
+        row.setScore(scoreInjected);
 
       try {
-    	  scfilters.injectedScore(url, row);
+        scfilters.injectedScore(url, row);
       } catch (ScoringFilterException e) {
-    	  if (LOG.isWarnEnabled()) {
-    		  LOG.warn("Cannot filter injected score for url " + url
-    				  + ", using default (" + e.getMessage() + ")");
-    	  }
+        if (LOG.isWarnEnabled()) {
+          LOG.warn("Cannot filter injected score for url " + url
+              + ", using default (" + e.getMessage() + ")");
+        }
       }
 
       Mark.INJECT_MARK.putMark(row, YES_STRING);
       context.write(reversedUrl, row);
     }
   }
-  
-  public static class InjectorMapper 
-      extends GoraMapper<String, WebPage, String, WebPage> {
+
+  public static class InjectorMapper extends
+      GoraMapper<String, WebPage, String, WebPage> {
     private FetchSchedule schedule;
 
     @Override
@@ -211,7 +214,7 @@
 
       context.write(key, row);
     }
-        
+
   }
 
   public InjectorJob() {
@@ -222,12 +225,12 @@
     setConf(conf);
   }
 
-  public Map<String,Object> run(Map<String,Object> args) throws Exception {
+  public Map<String, Object> run(Map<String, Object> args) throws Exception {
     getConf().setLong("injector.current.time", System.currentTimeMillis());
     Path input;
     Object path = args.get(Nutch.ARG_SEEDDIR);
     if (path instanceof Path) {
-      input = (Path)path;
+      input = (Path) path;
     } else {
       input = new Path(path.toString());
     }
@@ -240,8 +243,8 @@
     currentJob.setMapOutputKeyClass(String.class);
     currentJob.setMapOutputValueClass(WebPage.class);
     currentJob.setOutputFormatClass(GoraOutputFormat.class);
-    DataStore<String, WebPage> store = StorageUtils.createWebStore(currentJob.getConfiguration(),
-        String.class, WebPage.class);
+    DataStore<String, WebPage> store = StorageUtils.createWebStore(
+        currentJob.getConfiguration(), String.class, WebPage.class);
     GoraOutputFormat.setOutput(currentJob, store, true);
     currentJob.setReducerClass(Reducer.class);
     currentJob.setNumReduceTasks(0);
@@ -253,8 +256,8 @@
     status.put(Nutch.STAT_PROGRESS, 0.5f);
     currentJobNum = 1;
     currentJob = new NutchJob(getConf(), "inject-p2 " + input);
-    StorageUtils.initMapperJob(currentJob, FIELDS, String.class,
-        WebPage.class, InjectorMapper.class);
+    StorageUtils.initMapperJob(currentJob, FIELDS, String.class, WebPage.class,
+        InjectorMapper.class);
     currentJob.setNumReduceTasks(0);
     ToolUtil.recordJobStatus(null, currentJob, results);
     status.put(Nutch.STAT_PROGRESS, 1.0f);
@@ -289,7 +292,8 @@
   }
 
   public static void main(String[] args) throws Exception {
-    int res = ToolRunner.run(NutchConfiguration.create(), new InjectorJob(), args);
+    int res = ToolRunner.run(NutchConfiguration.create(), new InjectorJob(),
+        args);
     System.exit(res);
   }
 }
Index: src/java/org/apache/nutch/crawl/WebTableReader.java
===================================================================
--- src/java/org/apache/nutch/crawl/WebTableReader.java	(revision 1188268)
+++ src/java/org/apache/nutch/crawl/WebTableReader.java	(working copy)
@@ -65,7 +65,8 @@
 
 public class WebTableReader extends NutchTool implements Tool {
 
-  public static final Logger LOG = LoggerFactory.getLogger(WebTableReader.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(WebTableReader.class);
 
   public static class WebTableStatMapper extends
       GoraMapper<String, WebPage, Text, LongWritable> {
@@ -216,16 +217,17 @@
       LOG.info("WebTable statistics start");
     }
     run(ToolUtil.toArgMap(Nutch.ARG_SORT, sort));
-    for (Entry<String,Object> e : results.entrySet()) {
+    for (Entry<String, Object> e : results.entrySet()) {
       LOG.info(e.getKey() + ":\t" + e.getValue());
     }
   }
 
   /** Prints out the entry to the standard out **/
   private void read(String key, boolean dumpContent, boolean dumpHeaders,
-      boolean dumpLinks, boolean dumpText) throws ClassNotFoundException, IOException {
-    DataStore<String, WebPage> datastore = StorageUtils.createWebStore(getConf(),
-        String.class, WebPage.class);
+      boolean dumpLinks, boolean dumpText) throws ClassNotFoundException,
+      IOException {
+    DataStore<String, WebPage> datastore = StorageUtils.createWebStore(
+        getConf(), String.class, WebPage.class);
 
     Query<String, WebPage> query = datastore.newQuery();
     String reversedUrl = TableUtil.reverseUrl(key);
@@ -276,9 +278,10 @@
       // checks whether the Key passes the regex
       String url = TableUtil.unreverseUrl(key.toString());
       if (regex.matcher(url).matches()) {
-        context.write(new Text(url),
-            new Text(getPageRepresentation(key, value, dumpContent, dumpHeaders,
-                dumpLinks, dumpText)));
+        context.write(
+            new Text(url),
+            new Text(getPageRepresentation(key, value, dumpContent,
+                dumpHeaders, dumpLinks, dumpText)));
       }
     }
 
@@ -288,8 +291,10 @@
         throws IOException, InterruptedException {
       regex = Pattern.compile(context.getConfiguration().get(regexParamName,
           ".+"));
-      dumpContent = context.getConfiguration().getBoolean(contentParamName, false);
-      dumpHeaders = context.getConfiguration().getBoolean(headersParamName, false);
+      dumpContent = context.getConfiguration().getBoolean(contentParamName,
+          false);
+      dumpHeaders = context.getConfiguration().getBoolean(headersParamName,
+          false);
       dumpLinks = context.getConfiguration().getBoolean(linksParamName, false);
       dumpText = context.getConfiguration().getBoolean(textParamName, false);
     }
@@ -313,8 +318,8 @@
     cfg.setBoolean(WebTableRegexMapper.linksParamName, links);
     cfg.setBoolean(WebTableRegexMapper.textParamName, text);
 
-    DataStore<String, WebPage> store = StorageUtils.createWebStore(job
-        .getConfiguration(), String.class, WebPage.class);
+    DataStore<String, WebPage> store = StorageUtils.createWebStore(
+        job.getConfiguration(), String.class, WebPage.class);
     Query<String, WebPage> query = store.newQuery();
     query.setFields(WebPage._ALL_FIELDS);
 
@@ -335,26 +340,31 @@
   }
 
   private static String getPageRepresentation(String key, WebPage page,
-      boolean dumpContent, boolean dumpHeaders, boolean dumpLinks, boolean dumpText) {
+      boolean dumpContent, boolean dumpHeaders, boolean dumpLinks,
+      boolean dumpText) {
     StringBuffer sb = new StringBuffer();
     sb.append("key:\t" + key).append("\n");
     sb.append("baseUrl:\t" + page.getBaseUrl()).append("\n");
-    sb.append("status:\t").append(page.getStatus()).append(" (").append(
-        CrawlStatus.getName((byte) page.getStatus())).append(")\n");
+    sb.append("status:\t").append(page.getStatus()).append(" (")
+        .append(CrawlStatus.getName((byte) page.getStatus())).append(")\n");
     sb.append("fetchInterval:\t" + page.getFetchInterval()).append("\n");
     sb.append("fetchTime:\t" + page.getFetchTime()).append("\n");
     sb.append("prevFetchTime:\t" + page.getPrevFetchTime()).append("\n");
     sb.append("retries:\t" + page.getRetriesSinceFetch()).append("\n");
     sb.append("modifiedTime:\t" + page.getModifiedTime()).append("\n");
-    sb.append("protocolStatus:\t" +
-        ProtocolStatusUtils.toString(page.getProtocolStatus())).append("\n");
-    sb.append("parseStatus:\t" +
-        ParseStatusUtils.toString(page.getParseStatus())).append("\n");
+    sb.append(
+        "protocolStatus:\t"
+            + ProtocolStatusUtils.toString(page.getProtocolStatus())).append(
+        "\n");
+    sb.append(
+        "parseStatus:\t" + ParseStatusUtils.toString(page.getParseStatus()))
+        .append("\n");
     sb.append("title:\t" + page.getTitle()).append("\n");
     sb.append("score:\t" + page.getScore()).append("\n");
     ByteBuffer sig = page.getSignature();
     if (sig != null) {
-      sb.append("signature:\t" + StringUtil.toHexString(sig.array())).append("\n");
+      sb.append("signature:\t" + StringUtil.toHexString(sig.array())).append(
+          "\n");
     }
     Map<Utf8, Utf8> markers = page.getMarkers();
     sb.append("markers:\t" + markers).append("\n");
@@ -370,23 +380,23 @@
       }
     }
     if (dumpLinks) {
-      Map<Utf8,Utf8> inlinks = page.getInlinks();
-      Map<Utf8,Utf8> outlinks = page.getOutlinks();
+      Map<Utf8, Utf8> inlinks = page.getInlinks();
+      Map<Utf8, Utf8> outlinks = page.getOutlinks();
       if (outlinks != null) {
-        for (Entry<Utf8,Utf8> e : outlinks.entrySet()) {
+        for (Entry<Utf8, Utf8> e : outlinks.entrySet()) {
           sb.append("outlink:\t" + e.getKey() + "\t" + e.getValue() + "\n");
         }
       }
       if (inlinks != null) {
-        for (Entry<Utf8,Utf8> e : inlinks.entrySet()) {
+        for (Entry<Utf8, Utf8> e : inlinks.entrySet()) {
           sb.append("inlink:\t" + e.getKey() + "\t" + e.getValue() + "\n");
         }
       }
     }
     if (dumpHeaders) {
-      Map<Utf8,Utf8> headers = page.getHeaders();
+      Map<Utf8, Utf8> headers = page.getHeaders();
       if (headers != null) {
-        for (Entry<Utf8,Utf8> e : headers.entrySet()) {
+        for (Entry<Utf8, Utf8> e : headers.entrySet()) {
           sb.append("header:\t" + e.getKey() + "\t" + e.getValue() + "\n");
         }
       }
@@ -414,14 +424,17 @@
     System.exit(res);
   }
 
-  private static enum Op {READ, STAT, DUMP};
+  private static enum Op {
+    READ, STAT, DUMP
+  };
 
   public int run(String[] args) throws Exception {
     if (args.length < 1) {
       System.err
           .println("Usage: WebTableReader (-stats | -url [url] | -dump <out_dir> [-regex regex]) [-crawlId <id>] [-content] [-headers] [-links] [-text]");
-      System.err.println("\t-crawlId <id>\t the id to prefix the schemas to operate on, (default: storage.crawl.id)");
       System.err
+          .println("\t-crawlId <id>\t the id to prefix the schemas to operate on, (default: storage.crawl.id)");
+      System.err
           .println("\t-stats [-sort] \tprint overall statistics to System.out");
       System.err.println("\t\t[-sort]\tlist status sorted by host");
       System.err
@@ -449,8 +462,8 @@
         if (args[i].equals("-url")) {
           param = args[++i];
           op = Op.READ;
-          //read(param);
-          //return 0;
+          // read(param);
+          // return 0;
         } else if (args[i].equals("-stats")) {
           op = Op.STAT;
         } else if (args[i].equals("-sort")) {
@@ -495,24 +508,25 @@
 
   // for now handles only -stat
   @Override
-  public Map<String,Object> run(Map<String,Object> args) throws Exception {
+  public Map<String, Object> run(Map<String, Object> args) throws Exception {
     Path tmpFolder = new Path(getConf().get("mapred.temp.dir", ".")
         + "stat_tmp" + System.currentTimeMillis());
 
     numJobs = 1;
     currentJob = new NutchJob(getConf(), "db_stats");
 
-    Boolean sort = (Boolean)args.get(Nutch.ARG_SORT);
-    if (sort == null) sort = Boolean.FALSE;
+    Boolean sort = (Boolean) args.get(Nutch.ARG_SORT);
+    if (sort == null)
+      sort = Boolean.FALSE;
     currentJob.getConfiguration().setBoolean("db.reader.stats.sort", sort);
 
-    DataStore<String, WebPage> store = StorageUtils.createWebStore(currentJob
-        .getConfiguration(), String.class, WebPage.class);
+    DataStore<String, WebPage> store = StorageUtils.createWebStore(
+        currentJob.getConfiguration(), String.class, WebPage.class);
     Query<String, WebPage> query = store.newQuery();
     query.setFields(WebPage._ALL_FIELDS);
 
-    GoraMapper.initMapperJob(currentJob, query, store, Text.class, LongWritable.class,
-        WebTableStatMapper.class, null, true);
+    GoraMapper.initMapperJob(currentJob, query, store, Text.class,
+        LongWritable.class, WebTableStatMapper.class, null, true);
 
     currentJob.setCombinerClass(WebTableStatCombiner.class);
     currentJob.setReducerClass(WebTableStatReducer.class);
@@ -569,7 +583,8 @@
     }
 
     LongWritable totalCnt = stats.get("T");
-    if (totalCnt==null)totalCnt=new LongWritable(0);
+    if (totalCnt == null)
+      totalCnt = new LongWritable(0);
     stats.remove("T");
     results.put("TOTAL urls", totalCnt.get());
     for (Map.Entry<String, LongWritable> entry : stats.entrySet()) {
@@ -588,8 +603,9 @@
         if (st.length > 2)
           results.put(st[2], val.get());
         else
-          results.put(st[0] + " " + code + " ("
-              + CrawlStatus.getName((byte) code) + ")", val.get());
+          results.put(
+              st[0] + " " + code + " (" + CrawlStatus.getName((byte) code)
+                  + ")", val.get());
       } else
         results.put(k, val.get());
     }
@@ -597,7 +613,7 @@
     fileSystem.delete(tmpFolder, true);
     if (LOG.isInfoEnabled()) {
       LOG.info("Statistics for WebTable: ");
-      for (Entry<String,Object> e : results.entrySet()) {
+      for (Entry<String, Object> e : results.entrySet()) {
         LOG.info(e.getKey() + ":\t" + e.getValue());
       }
       LOG.info("WebTable statistics: done");
Index: src/java/org/apache/nutch/crawl/NutchWritable.java
===================================================================
--- src/java/org/apache/nutch/crawl/NutchWritable.java	(revision 1188268)
+++ src/java/org/apache/nutch/crawl/NutchWritable.java	(working copy)
@@ -24,13 +24,12 @@
   private static Class<? extends Writable>[] CLASSES = null;
 
   static {
-    CLASSES = new Class[] {
-      org.apache.nutch.scoring.ScoreDatum.class,
-      org.apache.nutch.util.WebPageWritable.class
-    };
+    CLASSES = new Class[] { org.apache.nutch.scoring.ScoreDatum.class,
+        org.apache.nutch.util.WebPageWritable.class };
   }
 
-  public NutchWritable() { }
+  public NutchWritable() {
+  }
 
   public NutchWritable(Writable instance) {
     set(instance);
Index: src/java/org/apache/nutch/crawl/DefaultFetchSchedule.java
===================================================================
--- src/java/org/apache/nutch/crawl/DefaultFetchSchedule.java	(revision 1188268)
+++ src/java/org/apache/nutch/crawl/DefaultFetchSchedule.java	(working copy)
@@ -20,19 +20,18 @@
 import org.apache.nutch.storage.WebPage;
 
 /**
- * This class implements the default re-fetch schedule. That is, no matter
- * if the page was changed or not, the <code>fetchInterval</code> remains
+ * This class implements the default re-fetch schedule. That is, no matter if
+ * the page was changed or not, the <code>fetchInterval</code> remains
  * unchanged, and the updated page fetchTime will always be set to
  * <code>fetchTime + fetchInterval * 1000</code>.
- *
+ * 
  * @author Andrzej Bialecki
  */
 public class DefaultFetchSchedule extends AbstractFetchSchedule {
 
   @Override
-  public void setFetchSchedule(String url, WebPage page,
-          long prevFetchTime, long prevModifiedTime,
-          long fetchTime, long modifiedTime, int state) {
+  public void setFetchSchedule(String url, WebPage page, long prevFetchTime,
+      long prevModifiedTime, long fetchTime, long modifiedTime, int state) {
     super.setFetchSchedule(url, page, prevFetchTime, prevModifiedTime,
         fetchTime, modifiedTime, state);
     page.setFetchTime(fetchTime + page.getFetchInterval() * 1000L);
Index: src/java/org/apache/nutch/crawl/CrawlStatus.java
===================================================================
--- src/java/org/apache/nutch/crawl/CrawlStatus.java	(revision 1188268)
+++ src/java/org/apache/nutch/crawl/CrawlStatus.java	(working copy)
@@ -21,22 +21,22 @@
 
 public class CrawlStatus {
   /** Page was not fetched yet. */
-  public static final byte STATUS_UNFETCHED      = 0x01;
+  public static final byte STATUS_UNFETCHED = 0x01;
   /** Page was successfully fetched. */
-  public static final byte STATUS_FETCHED        = 0x02;
+  public static final byte STATUS_FETCHED = 0x02;
   /** Page no longer exists. */
-  public static final byte STATUS_GONE           = 0x03;
+  public static final byte STATUS_GONE = 0x03;
   /** Page temporarily redirects to other page. */
-  public static final byte STATUS_REDIR_TEMP     = 0x04;
+  public static final byte STATUS_REDIR_TEMP = 0x04;
   /** Page permanently redirects to other page. */
-  public static final byte STATUS_REDIR_PERM     = 0x05;
+  public static final byte STATUS_REDIR_PERM = 0x05;
   /** Fetching unsuccessful, needs to be retried (transient errors). */
-  public static final byte STATUS_RETRY          = 0x22;
+  public static final byte STATUS_RETRY = 0x22;
   /** Fetching successful - page is not modified. */
-  public static final byte STATUS_NOTMODIFIED    = 0x26;
-  
+  public static final byte STATUS_NOTMODIFIED = 0x26;
+
   private static final Map<Byte, String> NAMES = new HashMap<Byte, String>();
-  
+
   static {
     NAMES.put(STATUS_UNFETCHED, "status_unfetched");
     NAMES.put(STATUS_FETCHED, "status_fetched");
@@ -46,9 +46,9 @@
     NAMES.put(STATUS_RETRY, "status_retry");
     NAMES.put(STATUS_NOTMODIFIED, "status_notmodified");
   }
-  
+
   public static String getName(byte status) {
     return NAMES.get(status);
   }
- 
+
 }
Index: src/java/org/apache/nutch/crawl/GeneratorJob.java
===================================================================
--- src/java/org/apache/nutch/crawl/GeneratorJob.java	(revision 1188268)
+++ src/java/org/apache/nutch/crawl/GeneratorJob.java	(working copy)
@@ -72,13 +72,14 @@
 
   public static final Logger LOG = LoggerFactory.getLogger(GeneratorJob.class);
 
-  public static class SelectorEntry
-  implements WritableComparable<SelectorEntry> {
+  public static class SelectorEntry implements
+      WritableComparable<SelectorEntry> {
 
     String url;
     float score;
 
-    public SelectorEntry() {  }
+    public SelectorEntry() {
+    }
 
     public SelectorEntry(String url, float score) {
       this.url = url;
@@ -107,7 +108,7 @@
     public int hashCode() {
       final int prime = 31;
       int result = 1;
-      result = prime * result +  url.hashCode();
+      result = prime * result + url.hashCode();
       result = prime * result + Float.floatToIntBits(score);
       return result;
     }
@@ -131,7 +132,7 @@
 
   static {
     WritableComparator.define(SelectorEntry.class,
-                              new SelectorEntryComparator());
+        new SelectorEntryComparator());
   }
 
   public GeneratorJob() {
@@ -142,15 +143,15 @@
     setConf(conf);
   }
 
-  public Map<String,Object> run(Map<String,Object> args) throws Exception {
+  public Map<String, Object> run(Map<String, Object> args) throws Exception {
     // map to inverted subset due for fetch, sort by score
-    Long topN = (Long)args.get(Nutch.ARG_TOPN);
-    Long curTime = (Long)args.get(Nutch.ARG_CURTIME);
+    Long topN = (Long) args.get(Nutch.ARG_TOPN);
+    Long curTime = (Long) args.get(Nutch.ARG_CURTIME);
     if (curTime == null) {
       curTime = System.currentTimeMillis();
     }
-    Boolean filter = (Boolean)args.get(Nutch.ARG_FILTER);
-    Boolean norm = (Boolean)args.get(Nutch.ARG_NORMALIZE);
+    Boolean filter = (Boolean) args.get(Nutch.ARG_FILTER);
+    Boolean norm = (Boolean) args.get(Nutch.ARG_NORMALIZE);
     // map to inverted subset due for fetch, sort by score
     getConf().setLong(GENERATOR_CUR_TIME, curTime);
     if (topN != null)
@@ -164,15 +165,20 @@
     getConf().setLong(Nutch.GENERATE_TIME_KEY, System.currentTimeMillis());
     if (norm != null)
       getConf().setBoolean(GENERATOR_NORMALISE, norm);
-    String mode = getConf().get(GENERATOR_COUNT_MODE, GENERATOR_COUNT_VALUE_HOST);
+    String mode = getConf().get(GENERATOR_COUNT_MODE,
+        GENERATOR_COUNT_VALUE_HOST);
     if (GENERATOR_COUNT_VALUE_HOST.equalsIgnoreCase(mode)) {
-      getConf().set(URLPartitioner.PARTITION_MODE_KEY, URLPartitioner.PARTITION_MODE_HOST);
+      getConf().set(URLPartitioner.PARTITION_MODE_KEY,
+          URLPartitioner.PARTITION_MODE_HOST);
     } else if (GENERATOR_COUNT_VALUE_DOMAIN.equalsIgnoreCase(mode)) {
-        getConf().set(URLPartitioner.PARTITION_MODE_KEY, URLPartitioner.PARTITION_MODE_DOMAIN);
+      getConf().set(URLPartitioner.PARTITION_MODE_KEY,
+          URLPartitioner.PARTITION_MODE_DOMAIN);
     } else {
-      LOG.warn("Unknown generator.max.count mode '" + mode + "', using mode=" + GENERATOR_COUNT_VALUE_HOST);
+      LOG.warn("Unknown generator.max.count mode '" + mode + "', using mode="
+          + GENERATOR_COUNT_VALUE_HOST);
       getConf().set(GENERATOR_COUNT_MODE, GENERATOR_COUNT_VALUE_HOST);
-      getConf().set(URLPartitioner.PARTITION_MODE_KEY, URLPartitioner.PARTITION_MODE_HOST);
+      getConf().set(URLPartitioner.PARTITION_MODE_KEY,
+          URLPartitioner.PARTITION_MODE_HOST);
     }
     numJobs = 1;
     currentJobNum = 0;
@@ -185,11 +191,12 @@
     results.put(BATCH_ID, batchId);
     return results;
   }
-  
+
   private String batchId;
-  
+
   /**
    * Mark URLs ready for fetching.
+   * 
    * @throws ClassNotFoundException
    * @throws InterruptedException
    * */
@@ -202,12 +209,9 @@
     if (topN != Long.MAX_VALUE) {
       LOG.info("GeneratorJob: topN: " + topN);
     }
-    run(ToolUtil.toArgMap(
-        Nutch.ARG_TOPN, topN,
-        Nutch.ARG_CURTIME, curTime,
-        Nutch.ARG_FILTER, filter,
-        Nutch.ARG_NORMALIZE, norm));
-    batchId =  getConf().get(BATCH_ID);
+    run(ToolUtil.toArgMap(Nutch.ARG_TOPN, topN, Nutch.ARG_CURTIME, curTime,
+        Nutch.ARG_FILTER, filter, Nutch.ARG_NORMALIZE, norm));
+    batchId = getConf().get(BATCH_ID);
     LOG.info("GeneratorJob: done");
     LOG.info("GeneratorJob: generated batch id: " + batchId);
     return batchId;
@@ -238,7 +242,8 @@
   }
 
   public static void main(String args[]) throws Exception {
-    int res = ToolRunner.run(NutchConfiguration.create(), new GeneratorJob(), args);
+    int res = ToolRunner.run(NutchConfiguration.create(), new GeneratorJob(),
+        args);
     System.exit(res);
   }
 
Index: src/java/org/apache/nutch/crawl/URLPartitioner.java
===================================================================
--- src/java/org/apache/nutch/crawl/URLPartitioner.java	(revision 1188268)
+++ src/java/org/apache/nutch/crawl/URLPartitioner.java	(working copy)
@@ -36,10 +36,10 @@
  * Partition urls by host, domain name or IP depending on the value of the
  * parameter 'partition.url.mode' which can be 'byHost', 'byDomain' or 'byIP'
  */
-public class URLPartitioner
-extends Partitioner<SelectorEntry, WebPage>
-implements Configurable {
-  private static final Logger LOG = LoggerFactory.getLogger(URLPartitioner.class);
+public class URLPartitioner extends Partitioner<SelectorEntry, WebPage>
+    implements Configurable {
+  private static final Logger LOG = LoggerFactory
+      .getLogger(URLPartitioner.class);
 
   public static final String PARTITION_MODE_KEY = "partition.url.mode";
 
@@ -82,15 +82,16 @@
     URL url = null;
     int hashCode = urlString.hashCode();
     try {
-      urlString = normalizers.normalize(urlString, URLNormalizers.SCOPE_PARTITION);
+      urlString = normalizers.normalize(urlString,
+          URLNormalizers.SCOPE_PARTITION);
       url = new URL(urlString);
       hashCode = url.getHost().hashCode();
     } catch (MalformedURLException e) {
       LOG.warn("Malformed URL: '" + urlString + "'");
     }
 
-    if (mode.equals(PARTITION_MODE_DOMAIN) && url != null) hashCode = URLUtil
-        .getDomainName(url).hashCode();
+    if (mode.equals(PARTITION_MODE_DOMAIN) && url != null)
+      hashCode = URLUtil.getDomainName(url).hashCode();
     else if (mode.equals(PARTITION_MODE_IP)) {
       try {
         InetAddress address = InetAddress.getByName(url.getHost());
Index: src/java/org/apache/nutch/crawl/AdaptiveFetchSchedule.java
===================================================================
--- src/java/org/apache/nutch/crawl/AdaptiveFetchSchedule.java	(revision 1188268)
+++ src/java/org/apache/nutch/crawl/AdaptiveFetchSchedule.java	(working copy)
@@ -30,11 +30,12 @@
  * If SYNC_DELTA property is true, then:
  * <ul>
  * <li>calculate a <code>delta = fetchTime - modifiedTime</code></li>
- * <li>try to synchronize with the time of change, by shifting the next fetchTime
- * by a fraction of the difference between the last modification time and the last
- * fetch time. I.e. the next fetch time will be set to
+ * <li>try to synchronize with the time of change, by shifting the next
+ * fetchTime by a fraction of the difference between the last modification time
+ * and the last fetch time. I.e. the next fetch time will be set to
  * <code>fetchTime + fetchInterval - delta * SYNC_DELTA_RATE</code></li>
- * <li>if the adjusted fetch interval is bigger than the delta, then <code>fetchInterval = delta</code>.</li>
+ * <li>if the adjusted fetch interval is bigger than the delta, then
+ * <code>fetchInterval = delta</code>.</li>
  * </ul>
  * </li>
  * <li>the minimum value of fetchInterval may not be smaller than MIN_INTERVAL
@@ -42,10 +43,13 @@
  * <li>the maximum value of fetchInterval may not be bigger than MAX_INTERVAL
  * (default is 365 days).</li>
  * </ul>
- * <p>NOTE: values of DEC_FACTOR and INC_FACTOR higher than 0.4f may destabilize the algorithm,
- * so that the fetch interval either increases or decreases infinitely, with little
- * relevance to the page changes. Please use {@link #main(String[])} method to
- * test the values before applying them in a production system.</p>
+ * <p>
+ * NOTE: values of DEC_FACTOR and INC_FACTOR higher than 0.4f may destabilize
+ * the algorithm, so that the fetch interval either increases or decreases
+ * infinitely, with little relevance to the page changes. Please use
+ * {@link #main(String[])} method to test the values before applying them in a
+ * production system.
+ * </p>
  * 
  * @author Andrzej Bialecki
  */
@@ -58,54 +62,59 @@
   private int MAX_INTERVAL;
 
   private int MIN_INTERVAL;
-  
+
   private boolean SYNC_DELTA;
 
   private double SYNC_DELTA_RATE;
-  
+
   public void setConf(Configuration conf) {
     super.setConf(conf);
-    if (conf == null) return;
+    if (conf == null)
+      return;
     INC_RATE = conf.getFloat("db.fetch.schedule.adaptive.inc_rate", 0.2f);
     DEC_RATE = conf.getFloat("db.fetch.schedule.adaptive.dec_rate", 0.2f);
     MIN_INTERVAL = conf.getInt("db.fetch.schedule.adaptive.min_interval", 60);
-    MAX_INTERVAL = conf.getInt("db.fetch.schedule.adaptive.max_interval", SECONDS_PER_DAY * 365 ); // 1 year
+    MAX_INTERVAL = conf.getInt("db.fetch.schedule.adaptive.max_interval",
+        SECONDS_PER_DAY * 365); // 1 year
     SYNC_DELTA = conf.getBoolean("db.fetch.schedule.adaptive.sync_delta", true);
-    SYNC_DELTA_RATE = conf.getFloat("db.fetch.schedule.adaptive.sync_delta_rate", 0.2f);
+    SYNC_DELTA_RATE = conf.getFloat(
+        "db.fetch.schedule.adaptive.sync_delta_rate", 0.2f);
   }
 
   @Override
-  public void setFetchSchedule(String url, WebPage page,
-          long prevFetchTime, long prevModifiedTime,
-          long fetchTime, long modifiedTime, int state) {
+  public void setFetchSchedule(String url, WebPage page, long prevFetchTime,
+      long prevModifiedTime, long fetchTime, long modifiedTime, int state) {
     super.setFetchSchedule(url, page, prevFetchTime, prevModifiedTime,
         fetchTime, modifiedTime, state);
     long refTime = fetchTime;
-    if (modifiedTime <= 0) modifiedTime = fetchTime;
+    if (modifiedTime <= 0)
+      modifiedTime = fetchTime;
     int interval = page.getFetchInterval();
     switch (state) {
-      case FetchSchedule.STATUS_MODIFIED:
-        interval *= (1.0f - DEC_RATE);
-        break;
-      case FetchSchedule.STATUS_NOTMODIFIED:
-        interval *= (1.0f + INC_RATE);
-        break;
-      case FetchSchedule.STATUS_UNKNOWN:
-        break;
+    case FetchSchedule.STATUS_MODIFIED:
+      interval *= (1.0f - DEC_RATE);
+      break;
+    case FetchSchedule.STATUS_NOTMODIFIED:
+      interval *= (1.0f + INC_RATE);
+      break;
+    case FetchSchedule.STATUS_UNKNOWN:
+      break;
     }
     page.setFetchInterval(interval);
     if (SYNC_DELTA) {
       // try to synchronize with the time of change
       // TODO: different from normal class (is delta in seconds)?
-      int delta = (int) ((fetchTime - modifiedTime) / 1000L) ;
-      if (delta > interval) interval = delta;
+      int delta = (int) ((fetchTime - modifiedTime) / 1000L);
+      if (delta > interval)
+        interval = delta;
       refTime = fetchTime - Math.round(delta * SYNC_DELTA_RATE);
     }
-    if (interval < MIN_INTERVAL) interval = MIN_INTERVAL;
-    if (interval > MAX_INTERVAL) interval = MAX_INTERVAL;
+    if (interval < MIN_INTERVAL)
+      interval = MIN_INTERVAL;
+    if (interval > MAX_INTERVAL)
+      interval = MAX_INTERVAL;
     page.setFetchTime(refTime + interval * 1000L);
     page.setModifiedTime(modifiedTime);
   }
 
-
 }
Index: src/java/org/apache/nutch/crawl/TextProfileSignature.java
===================================================================
--- src/java/org/apache/nutch/crawl/TextProfileSignature.java	(revision 1188268)
+++ src/java/org/apache/nutch/crawl/TextProfileSignature.java	(working copy)
@@ -29,28 +29,33 @@
 import org.apache.nutch.storage.WebPage;
 
 /**
- * <p>An implementation of a page signature. It calculates an MD5 hash
- * of a plain text "profile" of a page. In case there is no text, it
- * calculates a hash using the {@link MD5Signature}.</p>
- * <p>The algorithm to calculate a page "profile" takes the plain text version of
- * a page and performs the following steps:
+ * <p>
+ * An implementation of a page signature. It calculates an MD5 hash of a plain
+ * text "profile" of a page. In case there is no text, it calculates a hash
+ * using the {@link MD5Signature}.
+ * </p>
+ * <p>
+ * The algorithm to calculate a page "profile" takes the plain text version of a
+ * page and performs the following steps:
  * <ul>
  * <li>remove all characters except letters and digits, and bring all characters
  * to lower case,</li>
  * <li>split the text into tokens (all consecutive non-whitespace characters),</li>
- * <li>discard tokens equal or shorter than MIN_TOKEN_LEN (default 2 characters),</li>
+ * <li>discard tokens equal or shorter than MIN_TOKEN_LEN (default 2
+ * characters),</li>
  * <li>sort the list of tokens by decreasing frequency,</li>
- * <li>round down the counts of tokens to the nearest multiple of QUANT
- * (<code>QUANT = QUANT_RATE * maxFreq</code>, where <code>QUANT_RATE</code> is 0.01f
- * by default, and <code>maxFreq</code> is the maximum token frequency). If
- * <code>maxFreq</code> is higher than 1, then QUANT is always higher than 2 (which
- * means that tokens with frequency 1 are always discarded).</li>
- * <li>tokens, which frequency after quantization falls below QUANT, are discarded.</li>
- * <li>create a list of tokens and their quantized frequency, separated by spaces,
- * in the order of decreasing frequency.</li>
+ * <li>round down the counts of tokens to the nearest multiple of QUANT (
+ * <code>QUANT = QUANT_RATE * maxFreq</code>, where <code>QUANT_RATE</code> is
+ * 0.01f by default, and <code>maxFreq</code> is the maximum token frequency).
+ * If <code>maxFreq</code> is higher than 1, then QUANT is always higher than 2
+ * (which means that tokens with frequency 1 are always discarded).</li>
+ * <li>tokens, which frequency after quantization falls below QUANT, are
+ * discarded.</li>
+ * <li>create a list of tokens and their quantized frequency, separated by
+ * spaces, in the order of decreasing frequency.</li>
  * </ul>
  * This list is then submitted to an MD5 hash calculation.
- *
+ * 
  * @author Andrzej Bialecki &lt;ab@getopt.org&gt;
  */
 public class TextProfileSignature extends Signature {
@@ -65,12 +70,16 @@
 
   @Override
   public byte[] calculate(WebPage page) {
-    int MIN_TOKEN_LEN = getConf().getInt("db.signature.text_profile.min_token_len", 2);
-    float QUANT_RATE = getConf().getFloat("db.signature.text_profile.quant_rate", 0.01f);
+    int MIN_TOKEN_LEN = getConf().getInt(
+        "db.signature.text_profile.min_token_len", 2);
+    float QUANT_RATE = getConf().getFloat(
+        "db.signature.text_profile.quant_rate", 0.01f);
     HashMap<String, Token> tokens = new HashMap<String, Token>();
     String text = null;
-    if (page.getText() != null) text = page.getText().toString();
-    if (text == null || text.length() == 0) return fallback.calculate(page);
+    if (page.getText() != null)
+      text = page.getText().toString();
+    if (text == null || text.length() == 0)
+      return fallback.calculate(page);
     StringBuffer curToken = new StringBuffer();
     int maxFreq = 0;
     for (int i = 0; i < text.length(); i++) {
@@ -88,7 +97,8 @@
               tokens.put(s, tok);
             }
             tok.cnt++;
-            if (tok.cnt > maxFreq) maxFreq = tok.cnt;
+            if (tok.cnt > maxFreq)
+              maxFreq = tok.cnt;
           }
           curToken.setLength(0);
         }
@@ -104,17 +114,20 @@
         tokens.put(s, tok);
       }
       tok.cnt++;
-      if (tok.cnt > maxFreq) maxFreq = tok.cnt;
+      if (tok.cnt > maxFreq)
+        maxFreq = tok.cnt;
     }
     Iterator<Token> it = tokens.values().iterator();
     ArrayList<Token> profile = new ArrayList<Token>();
     // calculate the QUANT value
     int QUANT = Math.round(maxFreq * QUANT_RATE);
     if (QUANT < 2) {
-      if (maxFreq > 1) QUANT = 2;
-      else QUANT = 1;
+      if (maxFreq > 1)
+        QUANT = 2;
+      else
+        QUANT = 1;
     }
-    while(it.hasNext()) {
+    while (it.hasNext()) {
       Token t = it.next();
       // round down to the nearest QUANT
       t.cnt = (t.cnt / QUANT) * QUANT;
@@ -129,7 +142,8 @@
     it = profile.iterator();
     while (it.hasNext()) {
       Token t = it.next();
-      if (newText.length() > 0) newText.append("\n");
+      if (newText.length() > 0)
+        newText.append("\n");
       newText.append(t.toString());
     }
     return MD5Hash.digest(newText.toString()).getDigest();
Index: src/java/org/apache/nutch/crawl/SignatureComparator.java
===================================================================
--- src/java/org/apache/nutch/crawl/SignatureComparator.java	(revision 1188268)
+++ src/java/org/apache/nutch/crawl/SignatureComparator.java	(working copy)
@@ -19,19 +19,26 @@
 
 public class SignatureComparator {
   public static int compare(byte[] data1, byte[] data2) {
-    if (data1 == null && data2 == null) return 0;
-    if (data1 == null) return -1;
-    if (data2 == null) return 1;
-    return _compare(data1, 0, data1.length, data2, 0, data2.length);  }
-  
-  
-  public static int _compare(byte[] data1, int s1, int l1, byte[] data2, int s2, int l2) {
-    if (l2 > l1) return -1;
-    if (l2 < l1) return 1;
+    if (data1 == null && data2 == null)
+      return 0;
+    if (data1 == null)
+      return -1;
+    if (data2 == null)
+      return 1;
+    return _compare(data1, 0, data1.length, data2, 0, data2.length);
+  }
+
+  public static int _compare(byte[] data1, int s1, int l1, byte[] data2,
+      int s2, int l2) {
+    if (l2 > l1)
+      return -1;
+    if (l2 < l1)
+      return 1;
     int res = 0;
     for (int i = 0; i < l1; i++) {
       res = (data1[s1 + i] - data2[s2 + i]);
-      if (res != 0) return res;
+      if (res != 0)
+        return res;
     }
     return 0;
   }
Index: src/java/org/apache/nutch/crawl/SignatureFactory.java
===================================================================
--- src/java/org/apache/nutch/crawl/SignatureFactory.java	(revision 1188268)
+++ src/java/org/apache/nutch/crawl/SignatureFactory.java	(working copy)
@@ -28,26 +28,28 @@
 
 /**
  * Factory class, which instantiates a Signature implementation according to the
- * current Configuration configuration. This newly created instance is cached in the
- * Configuration instance, so that it could be later retrieved.
- *
+ * current Configuration configuration. This newly created instance is cached in
+ * the Configuration instance, so that it could be later retrieved.
+ * 
  * @author Andrzej Bialecki &lt;ab@getopt.org&gt;
  */
 public class SignatureFactory {
-  private static final Logger LOG = LoggerFactory.getLogger(SignatureFactory.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(SignatureFactory.class);
 
-  private SignatureFactory() {}                   // no public ctor
+  private SignatureFactory() {
+  } // no public ctor
 
   /** Return the default Signature implementation. */
   public static Signature getSignature(Configuration conf) {
     String clazz = conf.get("db.signature.class", MD5Signature.class.getName());
     ObjectCache objectCache = ObjectCache.get(conf);
-    Signature impl = (Signature)objectCache.getObject(clazz);
+    Signature impl = (Signature) objectCache.getObject(clazz);
     if (impl == null) {
       try {
         LOG.info("Using Signature impl: " + clazz);
         Class<?> implClass = Class.forName(clazz);
-        impl = (Signature)implClass.newInstance();
+        impl = (Signature) implClass.newInstance();
         impl.setConf(conf);
         objectCache.setObject(clazz, impl);
       } catch (Exception e) {
Index: src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java
===================================================================
--- src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java	(revision 1188268)
+++ src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java	(working copy)
@@ -29,13 +29,13 @@
 /**
  * This class provides common methods for implementations of
  * {@link FetchSchedule}.
- *
+ * 
  * @author Andrzej Bialecki
  */
-public abstract class AbstractFetchSchedule
-extends Configured
-implements FetchSchedule {
-  private static final Logger LOG = LoggerFactory.getLogger(AbstractFetchSchedule.class);
+public abstract class AbstractFetchSchedule extends Configured implements
+    FetchSchedule {
+  private static final Logger LOG = LoggerFactory
+      .getLogger(AbstractFetchSchedule.class);
 
   protected int defaultInterval;
   protected int maxInterval;
@@ -59,24 +59,28 @@
   @Override
   public void setConf(Configuration conf) {
     super.setConf(conf);
-    if (conf == null) return;
+    if (conf == null)
+      return;
     int oldDefaultInterval = conf.getInt("db.default.fetch.interval", 0);
     defaultInterval = conf.getInt("db.fetch.interval.default", 0);
-    if (oldDefaultInterval > 0 && defaultInterval == 0) defaultInterval = oldDefaultInterval * SECONDS_PER_DAY;
+    if (oldDefaultInterval > 0 && defaultInterval == 0)
+      defaultInterval = oldDefaultInterval * SECONDS_PER_DAY;
     int oldMaxInterval = conf.getInt("db.max.fetch.interval", 0);
-    maxInterval = conf.getInt("db.fetch.interval.max", 0 );
-    if (oldMaxInterval > 0 && maxInterval == 0) maxInterval = oldMaxInterval * FetchSchedule.SECONDS_PER_DAY;
+    maxInterval = conf.getInt("db.fetch.interval.max", 0);
+    if (oldMaxInterval > 0 && maxInterval == 0)
+      maxInterval = oldMaxInterval * FetchSchedule.SECONDS_PER_DAY;
     LOG.info("defaultInterval=" + defaultInterval);
     LOG.info("maxInterval=" + maxInterval);
   }
-  
+
   /**
-   * Initialize fetch schedule related data. Implementations should at least
-   * set the <code>fetchTime</code> and <code>fetchInterval</code>. The default
-   * implementation sets the <code>fetchTime</code> to now, using the
-   * default <code>fetchInterval</code>.
-   *
-   * @param url URL of the page.
+   * Initialize fetch schedule related data. Implementations should at least set
+   * the <code>fetchTime</code> and <code>fetchInterval</code>. The default
+   * implementation sets the <code>fetchTime</code> to now, using the default
+   * <code>fetchInterval</code>.
+   * 
+   * @param url
+   *          URL of the page.
    * @param page
    */
   @Override
@@ -88,60 +92,70 @@
 
   /**
    * Sets the <code>fetchInterval</code> and <code>fetchTime</code> on a
-   * successfully fetched page. NOTE: this implementation resets the
-   * retry counter - extending classes should call super.setFetchSchedule() to
+   * successfully fetched page. NOTE: this implementation resets the retry
+   * counter - extending classes should call super.setFetchSchedule() to
    * preserve this behavior.
    */
   @Override
-  public void setFetchSchedule(String url, WebPage page,
-          long prevFetchTime, long prevModifiedTime,
-          long fetchTime, long modifiedTime, int state) {
+  public void setFetchSchedule(String url, WebPage page, long prevFetchTime,
+      long prevModifiedTime, long fetchTime, long modifiedTime, int state) {
     page.setRetriesSinceFetch(0);
   }
 
   /**
-   * This method specifies how to schedule refetching of pages
-   * marked as GONE. Default implementation increases fetchInterval by 50%,
-   * and if it exceeds the <code>maxInterval</code> it calls
+   * This method specifies how to schedule refetching of pages marked as GONE.
+   * Default implementation increases fetchInterval by 50%, and if it exceeds
+   * the <code>maxInterval</code> it calls
    * {@link #forceRefetch(Text, CrawlDatum, boolean)}.
-   * @param url URL of the page
+   * 
+   * @param url
+   *          URL of the page
    * @param page
    * @return adjusted page information, including all original information.
-   * NOTE: this may be a different instance than {@param datum}, but
-   * implementations should make sure that it contains at least all
-   * information from {@param datum}.
+   *         NOTE: this may be a different instance than
+   * @param datum
+   *          , but implementations should make sure that it contains at least
+   *          all information from
+   * @param datum
+   *          .
    */
   @Override
-  public void setPageGoneSchedule(String url, WebPage page,
-          long prevFetchTime, long prevModifiedTime, long fetchTime) {
+  public void setPageGoneSchedule(String url, WebPage page, long prevFetchTime,
+      long prevModifiedTime, long fetchTime) {
     // no page is truly GONE ... just increase the interval by 50%
     // and try much later.
     int newFetchInterval = (int) (page.getFetchInterval() * 1.5f);
     page.setFetchInterval(newFetchInterval);
     page.setFetchTime(fetchTime + newFetchInterval * 1000L);
-    if (maxInterval < newFetchInterval) forceRefetch(url, page, false);
+    if (maxInterval < newFetchInterval)
+      forceRefetch(url, page, false);
   }
 
   /**
-   * This method adjusts the fetch schedule if fetching needs to be
-   * re-tried due to transient errors. The default implementation
-   * sets the next fetch time 1 day in the future and increases
-   * the retry counter.
-   * @param url URL of the page
+   * This method adjusts the fetch schedule if fetching needs to be re-tried due
+   * to transient errors. The default implementation sets the next fetch time 1
+   * day in the future and increases the retry counter.
+   * 
+   * @param url
+   *          URL of the page
    * @param page
-   * @param prevFetchTime previous fetch time
-   * @param prevModifiedTime previous modified time
-   * @param fetchTime current fetch time
+   * @param prevFetchTime
+   *          previous fetch time
+   * @param prevModifiedTime
+   *          previous modified time
+   * @param fetchTime
+   *          current fetch time
    */
   @Override
   public void setPageRetrySchedule(String url, WebPage page,
-          long prevFetchTime, long prevModifiedTime, long fetchTime) {
+      long prevFetchTime, long prevModifiedTime, long fetchTime) {
     page.setFetchTime(fetchTime + SECONDS_PER_DAY * 1000L);
     page.setRetriesSinceFetch(page.getRetriesSinceFetch() + 1);
   }
 
   /**
    * This method return the last fetch time of the CrawlDatum
+   * 
    * @return the date as a long.
    */
   @Override
@@ -150,20 +164,24 @@
   }
 
   /**
-   * This method provides information whether the page is suitable for
-   * selection in the current fetchlist. NOTE: a true return value does not
-   * guarantee that the page will be fetched, it just allows it to be
-   * included in the further selection process based on scores. The default
-   * implementation checks <code>fetchTime</code>, if it is higher than the
-   * {@param curTime} it returns false, and true otherwise. It will also
-   * check that fetchTime is not too remote (more than <code>maxInterval</code),
-   * in which case it lowers the interval and returns true.
-   * @param url URL of the page
+   * This method provides information whether the page is suitable for selection
+   * in the current fetchlist. NOTE: a true return value does not guarantee that
+   * the page will be fetched, it just allows it to be included in the further
+   * selection process based on scores. The default implementation checks
+   * <code>fetchTime</code>, if it is higher than the
+   * 
+   * @param curTime
+   *          it returns false, and true otherwise. It will also check that
+   *          fetchTime is not too remote (more than <code>maxInterval</code),
+   *          in which case it lowers the interval and returns true.
+   * @param url
+   *          URL of the page
    * @param page
-   * @param curTime reference time (usually set to the time when the
-   * fetchlist generation process was started).
+   * @param curTime
+   *          reference time (usually set to the time when the fetchlist
+   *          generation process was started).
    * @return true, if the page should be considered for inclusion in the current
-   * fetchlist, otherwise false.
+   *         fetchlist, otherwise false.
    */
   @Override
   public boolean shouldFetch(String url, WebPage page, long curTime) {
@@ -183,11 +201,14 @@
   /**
    * This method resets fetchTime, fetchInterval, modifiedTime,
    * retriesSinceFetch and page signature, so that it forces refetching.
-   * @param url URL of the page
+   * 
+   * @param url
+   *          URL of the page
    * @param page
-   * @param asap if true, force refetch as soon as possible - this sets
-   * the fetchTime to now. If false, force refetch whenever the next fetch
-   * time is set.
+   * @param asap
+   *          if true, force refetch as soon as possible - this sets the
+   *          fetchTime to now. If false, force refetch whenever the next fetch
+   *          time is set.
    */
   @Override
   public void forceRefetch(String url, WebPage page, boolean asap) {
@@ -198,10 +219,10 @@
     page.setRetriesSinceFetch(0);
     // TODO: row.setSignature(null) ??
     page.setModifiedTime(0L);
-    if (asap) page.setFetchTime(System.currentTimeMillis());
+    if (asap)
+      page.setFetchTime(System.currentTimeMillis());
   }
 
-
   public Set<WebPage.Field> getFields() {
     return FIELDS;
   }
Index: src/java/org/apache/nutch/crawl/GeneratorMapper.java
===================================================================
--- src/java/org/apache/nutch/crawl/GeneratorMapper.java	(revision 1188268)
+++ src/java/org/apache/nutch/crawl/GeneratorMapper.java	(working copy)
@@ -30,8 +30,8 @@
 import org.apache.nutch.util.TableUtil;
 import org.apache.gora.mapreduce.GoraMapper;
 
-public class GeneratorMapper
-extends GoraMapper<String, WebPage, SelectorEntry, WebPage> {
+public class GeneratorMapper extends
+    GoraMapper<String, WebPage, SelectorEntry, WebPage> {
 
   private URLFilters filters;
   private URLNormalizers normalizers;
@@ -42,8 +42,8 @@
   private long curTime;
 
   @Override
-  public void map(String reversedUrl, WebPage page,
-      Context context) throws IOException, InterruptedException {
+  public void map(String reversedUrl, WebPage page, Context context)
+      throws IOException, InterruptedException {
     String url = TableUtil.unreverseUrl(reversedUrl);
 
     if (Mark.GENERATE_MARK.checkMark(page) != null) {
@@ -55,20 +55,22 @@
     // If filtering is on don't generate URLs that don't pass URLFilters
     try {
       if (normalise) {
-        url = normalizers.normalize(url, URLNormalizers.SCOPE_GENERATE_HOST_COUNT);
+        url = normalizers.normalize(url,
+            URLNormalizers.SCOPE_GENERATE_HOST_COUNT);
       }
       if (filter && filters.filter(url) == null)
         return;
     } catch (URLFilterException e) {
-      GeneratorJob.LOG.warn("Couldn't filter url: " + url + " (" + e.getMessage() + ")");
+      GeneratorJob.LOG.warn("Couldn't filter url: " + url + " ("
+          + e.getMessage() + ")");
       return;
     }
 
     // check fetch schedule
     if (!schedule.shouldFetch(url, page, curTime)) {
       if (GeneratorJob.LOG.isDebugEnabled()) {
-        GeneratorJob.LOG.debug("-shouldFetch rejected '" + url + "', fetchTime=" +
-            page.getFetchTime() + ", curTime=" + curTime);
+        GeneratorJob.LOG.debug("-shouldFetch rejected '" + url
+            + "', fetchTime=" + page.getFetchTime() + ", curTime=" + curTime);
       }
       return;
     }
@@ -76,7 +78,7 @@
     try {
       score = scoringFilters.generatorSortValue(url, page, score);
     } catch (ScoringFilterException e) {
-      //ignore
+      // ignore
     }
     SelectorEntry entry = new SelectorEntry(url, score);
     context.write(entry, page);
@@ -86,10 +88,10 @@
   public void setup(Context context) {
     Configuration conf = context.getConfiguration();
     filters = new URLFilters(conf);
-    curTime =
-      conf.getLong(GeneratorJob.GENERATOR_CUR_TIME, System.currentTimeMillis());
-    normalizers =
-      new URLNormalizers(conf, URLNormalizers.SCOPE_GENERATE_HOST_COUNT);
+    curTime = conf.getLong(GeneratorJob.GENERATOR_CUR_TIME,
+        System.currentTimeMillis());
+    normalizers = new URLNormalizers(conf,
+        URLNormalizers.SCOPE_GENERATE_HOST_COUNT);
     filter = conf.getBoolean(GeneratorJob.GENERATOR_FILTER, true);
     normalise = conf.getBoolean(GeneratorJob.GENERATOR_NORMALISE, true);
     schedule = FetchScheduleFactory.getFetchSchedule(conf);
Index: src/java/org/apache/nutch/crawl/DbUpdateMapper.java
===================================================================
--- src/java/org/apache/nutch/crawl/DbUpdateMapper.java	(revision 1188268)
+++ src/java/org/apache/nutch/crawl/DbUpdateMapper.java	(working copy)
@@ -33,8 +33,8 @@
 import org.apache.nutch.util.WebPageWritable;
 import org.apache.gora.mapreduce.GoraMapper;
 
-public class DbUpdateMapper
-extends GoraMapper<String, WebPage, String, NutchWritable> {
+public class DbUpdateMapper extends
+    GoraMapper<String, WebPage, String, NutchWritable> {
   public static final Logger LOG = DbUpdaterJob.LOG;
 
   private ScoringFilters scoringFilters;
@@ -43,7 +43,7 @@
 
   @Override
   public void map(String key, WebPage page, Context context)
-  throws IOException, InterruptedException {
+      throws IOException, InterruptedException {
 
     String url = TableUtil.unreverseUrl(key);
 
@@ -51,20 +51,24 @@
     Map<Utf8, Utf8> outlinks = page.getOutlinks();
     if (outlinks != null) {
       for (Entry<Utf8, Utf8> e : outlinks.entrySet()) {
-        scoreData.add(new ScoreDatum(0.0f, e.getKey().toString(), e.getValue().toString()));
+        scoreData.add(new ScoreDatum(0.0f, e.getKey().toString(), e.getValue()
+            .toString()));
       }
     }
 
     // TODO: Outlink filtering (i.e. "only keep the first n outlinks")
     try {
-      scoringFilters.distributeScoreToOutlinks(url, page, scoreData, (outlinks == null ? 0 : outlinks.size()));
+      scoringFilters.distributeScoreToOutlinks(url, page, scoreData,
+          (outlinks == null ? 0 : outlinks.size()));
     } catch (ScoringFilterException e) {
-      LOG.warn("Distributing score failed for URL: " + key +
-          " exception:" + StringUtils.stringifyException(e));
+      LOG.warn("Distributing score failed for URL: " + key + " exception:"
+          + StringUtils.stringifyException(e));
     }
 
-    context.write(key,
-        new NutchWritable(new WebPageWritable(context.getConfiguration(), page)));
+    context
+        .write(key,
+            new NutchWritable(new WebPageWritable(context.getConfiguration(),
+                page)));
 
     for (ScoreDatum scoreDatum : scoreData) {
       String reversedOut = TableUtil.reverseUrl(scoreDatum.getUrl());
Index: src/java/org/apache/nutch/crawl/FetchScheduleFactory.java
===================================================================
--- src/java/org/apache/nutch/crawl/FetchScheduleFactory.java	(revision 1188268)
+++ src/java/org/apache/nutch/crawl/FetchScheduleFactory.java	(working copy)
@@ -25,20 +25,23 @@
 /** Creates and caches a {@link FetchSchedule} implementation. */
 public class FetchScheduleFactory {
 
-  public static final Logger LOG = LoggerFactory.getLogger(FetchScheduleFactory.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(FetchScheduleFactory.class);
 
-  private FetchScheduleFactory() {}                   // no public ctor
+  private FetchScheduleFactory() {
+  } // no public ctor
 
   /** Return the FetchSchedule implementation. */
   public static FetchSchedule getFetchSchedule(Configuration conf) {
-    String clazz = conf.get("db.fetch.schedule.class", DefaultFetchSchedule.class.getName());
+    String clazz = conf.get("db.fetch.schedule.class",
+        DefaultFetchSchedule.class.getName());
     ObjectCache objectCache = ObjectCache.get(conf);
-    FetchSchedule impl = (FetchSchedule)objectCache.getObject(clazz);
+    FetchSchedule impl = (FetchSchedule) objectCache.getObject(clazz);
     if (impl == null) {
       try {
         LOG.info("Using FetchSchedule impl: " + clazz);
         Class<?> implClass = Class.forName(clazz);
-        impl = (FetchSchedule)implClass.newInstance();
+        impl = (FetchSchedule) implClass.newInstance();
         impl.setConf(conf);
         objectCache.setObject(clazz, impl);
       } catch (Exception e) {
Index: src/java/org/apache/nutch/api/AdminResource.java
===================================================================
--- src/java/org/apache/nutch/api/AdminResource.java	(revision 1188268)
+++ src/java/org/apache/nutch/api/AdminResource.java	(working copy)
@@ -26,19 +26,20 @@
 import org.slf4j.LoggerFactory;
 
 public class AdminResource extends ServerResource {
-  private static final Logger LOG = LoggerFactory.getLogger(AdminResource.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(AdminResource.class);
 
   public static final String PATH = "admin";
   public static final String DESCR = "Service admin actions";
 
   @Get("json")
   public Object execute() throws Exception {
-    String cmd = (String)getRequestAttributes().get(Params.CMD);
+    String cmd = (String) getRequestAttributes().get(Params.CMD);
     if ("status".equalsIgnoreCase(cmd)) {
       // status
-      Map<String,Object> res = new HashMap<String,Object>();
+      Map<String, Object> res = new HashMap<String, Object>();
       res.put("started", NutchApp.started);
-      Map<String,Object> jobs = new HashMap<String,Object>();      
+      Map<String, Object> jobs = new HashMap<String, Object>();
       jobs.put("all", NutchApp.jobMgr.list(null, State.ANY));
       jobs.put("running", NutchApp.jobMgr.list(null, State.RUNNING));
       res.put("jobs", jobs);
@@ -55,7 +56,8 @@
               LOG.info("Service stopped.");
             } catch (Exception e) {
               LOG.error("Error stopping", e);
-            };
+            }
+            ;
           }
         };
         t.setDaemon(true);
Index: src/java/org/apache/nutch/api/ConfManager.java
===================================================================
--- src/java/org/apache/nutch/api/ConfManager.java	(revision 1188268)
+++ src/java/org/apache/nutch/api/ConfManager.java	(working copy)
@@ -24,14 +24,16 @@
 public interface ConfManager {
 
   public Set<String> list() throws Exception;
-  
+
   public Configuration get(String confId);
-  
-  public Map<String,String> getAsMap(String confId);
-  
+
+  public Map<String, String> getAsMap(String confId);
+
   public void delete(String confId);
-  
-  public void create(String confId, Map<String,String> props, boolean force) throws Exception;
-  
-  public void setProperty(String confId, String propName, String propValue) throws Exception;
+
+  public void create(String confId, Map<String, String> props, boolean force)
+      throws Exception;
+
+  public void setProperty(String confId, String propName, String propValue)
+      throws Exception;
 }
Index: src/java/org/apache/nutch/api/impl/RAMConfManager.java
===================================================================
--- src/java/org/apache/nutch/api/impl/RAMConfManager.java	(revision 1188268)
+++ src/java/org/apache/nutch/api/impl/RAMConfManager.java	(working copy)
@@ -29,54 +29,57 @@
 import org.apache.nutch.util.NutchConfiguration;
 
 public class RAMConfManager implements ConfManager {
-  Map<String,Configuration> configs = new HashMap<String,Configuration>();
-  
+  Map<String, Configuration> configs = new HashMap<String, Configuration>();
+
   public RAMConfManager() {
     configs.put(ConfResource.DEFAULT_CONF, NutchConfiguration.create());
   }
-  
+
   public Set<String> list() {
     return configs.keySet();
   }
-  
+
   public Configuration get(String confId) {
     return configs.get(confId);
   }
-  
-  public Map<String,String> getAsMap(String confId) {
+
+  public Map<String, String> getAsMap(String confId) {
     Configuration cfg = configs.get(confId);
-    if (cfg == null) return null;
-    Iterator<Entry<String,String>> it = cfg.iterator();
-    TreeMap<String,String> res = new TreeMap<String,String>();
+    if (cfg == null)
+      return null;
+    Iterator<Entry<String, String>> it = cfg.iterator();
+    TreeMap<String, String> res = new TreeMap<String, String>();
     while (it.hasNext()) {
-      Entry<String,String> e = it.next();
+      Entry<String, String> e = it.next();
       res.put(e.getKey(), e.getValue());
     }
     return res;
   }
-  
-  public void create(String confId, Map<String,String> props, boolean force) throws Exception {
+
+  public void create(String confId, Map<String, String> props, boolean force)
+      throws Exception {
     if (configs.containsKey(confId) && !force) {
       throw new Exception("Config name '" + confId + "' already exists.");
     }
     Configuration conf = NutchConfiguration.create();
     // apply overrides
     if (props != null) {
-      for (Entry<String,String> e : props.entrySet()) {
+      for (Entry<String, String> e : props.entrySet()) {
         conf.set(e.getKey(), e.getValue());
       }
     }
     configs.put(confId, conf);
   }
-  
-  public void setProperty(String confId, String propName, String propValue) throws Exception {
+
+  public void setProperty(String confId, String propName, String propValue)
+      throws Exception {
     if (!configs.containsKey(confId)) {
       throw new Exception("Unknown configId '" + confId + "'");
     }
     Configuration conf = configs.get(confId);
     conf.set(propName, propValue);
   }
-  
+
   public void delete(String confId) {
     configs.remove(confId);
   }
Index: src/java/org/apache/nutch/api/impl/RAMJobManager.java
===================================================================
--- src/java/org/apache/nutch/api/impl/RAMJobManager.java	(revision 1188268)
+++ src/java/org/apache/nutch/api/impl/RAMJobManager.java	(working copy)
@@ -51,7 +51,7 @@
   int CAPACITY = 100;
   ThreadPoolExecutor exec = new MyPoolExecutor(10, CAPACITY, 1, TimeUnit.HOURS,
       new ArrayBlockingQueue<Runnable>(CAPACITY));
-  
+
   private class MyPoolExecutor extends ThreadPoolExecutor {
 
     public MyPoolExecutor(int corePoolSize, int maximumPoolSize,
@@ -63,19 +63,19 @@
     protected void beforeExecute(Thread t, Runnable r) {
       // TODO Auto-generated method stub
       super.beforeExecute(t, r);
-      synchronized(jobRunning) {
-        jobRunning.offer(((JobWorker)r).jobStatus);
+      synchronized (jobRunning) {
+        jobRunning.offer(((JobWorker) r).jobStatus);
       }
     }
 
     @Override
     protected void afterExecute(Runnable r, Throwable t) {
       super.afterExecute(r, t);
-      synchronized(jobRunning) {
-        jobRunning.remove(((JobWorker)r).jobStatus);
+      synchronized (jobRunning) {
+        jobRunning.remove(((JobWorker) r).jobStatus);
       }
-      JobStatus status = ((JobWorker)r).jobStatus;
-      synchronized(jobHistory) {
+      JobStatus status = ((JobWorker) r).jobStatus;
+      synchronized (jobHistory) {
         if (!jobHistory.offer(status)) {
           jobHistory.poll();
           jobHistory.add(status);
@@ -83,12 +83,14 @@
       }
     }
   }
-  
-  ArrayBlockingQueue<JobStatus> jobHistory = new ArrayBlockingQueue<JobStatus>(CAPACITY);
-  ArrayBlockingQueue<JobStatus> jobRunning = new ArrayBlockingQueue<JobStatus>(CAPACITY);
-  
-  private static Map<JobType,Class<? extends NutchTool>> typeToClass = new HashMap<JobType,Class<? extends NutchTool>>();
-  
+
+  ArrayBlockingQueue<JobStatus> jobHistory = new ArrayBlockingQueue<JobStatus>(
+      CAPACITY);
+  ArrayBlockingQueue<JobStatus> jobRunning = new ArrayBlockingQueue<JobStatus>(
+      CAPACITY);
+
+  private static Map<JobType, Class<? extends NutchTool>> typeToClass = new HashMap<JobType, Class<? extends NutchTool>>();
+
   static {
     typeToClass.put(JobType.FETCH, FetcherJob.class);
     typeToClass.put(JobType.GENERATE, GeneratorJob.class);
@@ -101,19 +103,20 @@
   }
 
   private void addFinishedStatus(JobStatus status) {
-    synchronized(jobHistory) {
+    synchronized (jobHistory) {
       if (!jobHistory.offer(status)) {
         jobHistory.poll();
         jobHistory.add(status);
       }
     }
   }
-  
+
   @Override
   public List<JobStatus> list(String crawlId, State state) throws Exception {
     List<JobStatus> res = new ArrayList<JobStatus>();
-    if (state == null) state = State.ANY;
-    switch(state) {
+    if (state == null)
+      state = State.ANY;
+    switch (state) {
     case ANY:
       res.addAll(jobHistory);
       /* FALLTHROUGH */
@@ -144,8 +147,9 @@
 
   @Override
   public String create(String crawlId, JobType type, String confId,
-      Map<String,Object> args) throws Exception {
-    if (args == null) args = Collections.emptyMap();
+      Map<String, Object> args) throws Exception {
+    if (args == null)
+      args = Collections.emptyMap();
     JobWorker worker = new JobWorker(crawlId, type, confId, args);
     String id = worker.getId();
     exec.execute(worker);
@@ -179,16 +183,17 @@
     }
     return false;
   }
-  
+
   private class JobWorker implements Runnable {
     String id;
     JobType type;
     String confId;
     NutchTool tool;
-    Map<String,Object> args;
+    Map<String, Object> args;
     JobStatus jobStatus;
-    
-    JobWorker(String crawlId, JobType type, String confId, Map<String,Object> args) throws Exception {
+
+    JobWorker(String crawlId, JobType type, String confId,
+        Map<String, Object> args) throws Exception {
       if (confId == null) {
         confId = ConfResource.DEFAULT_CONF;
       }
@@ -208,30 +213,31 @@
       }
       Class<? extends NutchTool> clz = typeToClass.get(type);
       if (clz == null) {
-        clz = (Class<? extends NutchTool>)Class.forName((String)args.get(Nutch.ARG_CLASS));
+        clz = (Class<? extends NutchTool>) Class.forName((String) args
+            .get(Nutch.ARG_CLASS));
       }
       tool = ReflectionUtils.newInstance(clz, conf);
       jobStatus = new JobStatus(id, type, confId, args, State.IDLE, "idle");
       jobStatus.tool = tool;
     }
-    
+
     public String getId() {
       return id;
     }
-    
+
     public float getProgress() {
       return tool.getProgress();
     }
-    
+
     public State getState() {
       return jobStatus.state;
     }
-    
-    public Map<String,Object> getResult() {
+
+    public Map<String, Object> getResult() {
       return jobStatus.result;
     }
-    
-    public Map<String,Object> getStatus() {
+
+    public Map<String, Object> getStatus() {
       return tool.getStatus();
     }
 
Index: src/java/org/apache/nutch/api/JobResource.java
===================================================================
--- src/java/org/apache/nutch/api/JobResource.java	(revision 1188268)
+++ src/java/org/apache/nutch/api/JobResource.java	(working copy)
@@ -29,23 +29,26 @@
 public class JobResource extends ServerResource {
   public static final String PATH = "jobs";
   public static final String DESCR = "Job manager";
-  
+
   @Get("json")
   public Object retrieve() throws Exception {
     String cid = null;
     String jid = null;
     String cmd = null;
     Form form = getQuery();
-    cid = (String)getRequestAttributes().get(Params.CRAWL_ID);
-    jid = (String)getRequestAttributes().get(Params.JOB_ID);
-    cmd = (String)getRequestAttributes().get(Params.CMD);
+    cid = (String) getRequestAttributes().get(Params.CRAWL_ID);
+    jid = (String) getRequestAttributes().get(Params.JOB_ID);
+    cmd = (String) getRequestAttributes().get(Params.CMD);
     if (form != null) {
       String v = form.getFirstValue(Params.CRAWL_ID);
-      if (v != null) cid = v;
+      if (v != null)
+        cid = v;
       v = form.getFirstValue(Params.JOB_ID);
-      if (v != null) jid = v;
+      if (v != null)
+        jid = v;
       v = form.getFirstValue(Params.CMD);
-      if (v != null) cmd = v;
+      if (v != null)
+        cmd = v;
     }
     if (jid == null) {
       return NutchApp.jobMgr.list(cid, State.ANY);
@@ -65,20 +68,17 @@
       }
     }
   }
-  
+
   /*
-   * String crawlId
-   * String type
-   * String confId
-   * Object[] args
+   * String crawlId String type String confId Object[] args
    */
   @Put("json")
-  public Object create(Map<String,Object> args) throws Exception {
-    String cid = (String)args.get(Params.CRAWL_ID);
-    String typeString = (String)args.get(Params.JOB_TYPE);
+  public Object create(Map<String, Object> args) throws Exception {
+    String cid = (String) args.get(Params.CRAWL_ID);
+    String typeString = (String) args.get(Params.JOB_TYPE);
     JobType type = JobType.valueOf(typeString.toUpperCase());
-    String confId = (String)args.get(Params.CONF_ID);
-    Map<String,Object> cmdArgs = (Map<String,Object>)args.get(Params.ARGS);
+    String confId = (String) args.get(Params.CONF_ID);
+    Map<String, Object> cmdArgs = (Map<String, Object>) args.get(Params.ARGS);
     String jobId = NutchApp.jobMgr.create(cid, type, confId, cmdArgs);
     return jobId;
   }
Index: src/java/org/apache/nutch/api/NutchApp.java
===================================================================
--- src/java/org/apache/nutch/api/NutchApp.java	(revision 1188268)
+++ src/java/org/apache/nutch/api/NutchApp.java	(working copy)
@@ -27,40 +27,40 @@
   public static JobManager jobMgr;
   public static NutchServer server;
   public static long started;
-  
+
   static {
     confMgr = new RAMConfManager();
     jobMgr = new RAMJobManager();
   }
-  
+
   /**
    * Creates a root Restlet that will receive all incoming calls.
    */
   @Override
   public synchronized Restlet createInboundRoot() {
-      getTunnelService().setEnabled(true);
-      getTunnelService().setExtensionsTunnel(true);
-      Router router = new Router(getContext());
-      //router.getLogger().setLevel(Level.FINEST);
-      // configs
-      router.attach("/", APIInfoResource.class);
-      router.attach("/" + AdminResource.PATH, AdminResource.class);
-      router.attach("/" + AdminResource.PATH + "/{" + Params.CMD + 
-          "}", AdminResource.class);
-      router.attach("/" + ConfResource.PATH, ConfResource.class);
-      router.attach("/" + ConfResource.PATH + "/{"+ Params.CONF_ID +
-          "}", ConfResource.class);
-      router.attach("/" + ConfResource.PATH + "/{" + Params.CONF_ID +
-          "}/{" + Params.PROP_NAME + "}", ConfResource.class);
-      // db
-      router.attach("/" + DbResource.PATH, DbResource.class);
-      // jobs
-      router.attach("/" + JobResource.PATH, JobResource.class);
-      router.attach("/" + JobResource.PATH + "/{" + Params.JOB_ID + "}",
-          JobResource.class);
-      router.attach("/" + JobResource.PATH, JobResource.class);
-      router.attach("/" + JobResource.PATH + "/{" + Params.JOB_ID + "}/{" +
-          Params.CMD + "}", JobResource.class);
-      return router;
+    getTunnelService().setEnabled(true);
+    getTunnelService().setExtensionsTunnel(true);
+    Router router = new Router(getContext());
+    // router.getLogger().setLevel(Level.FINEST);
+    // configs
+    router.attach("/", APIInfoResource.class);
+    router.attach("/" + AdminResource.PATH, AdminResource.class);
+    router.attach("/" + AdminResource.PATH + "/{" + Params.CMD + "}",
+        AdminResource.class);
+    router.attach("/" + ConfResource.PATH, ConfResource.class);
+    router.attach("/" + ConfResource.PATH + "/{" + Params.CONF_ID + "}",
+        ConfResource.class);
+    router.attach("/" + ConfResource.PATH + "/{" + Params.CONF_ID + "}/{"
+        + Params.PROP_NAME + "}", ConfResource.class);
+    // db
+    router.attach("/" + DbResource.PATH, DbResource.class);
+    // jobs
+    router.attach("/" + JobResource.PATH, JobResource.class);
+    router.attach("/" + JobResource.PATH + "/{" + Params.JOB_ID + "}",
+        JobResource.class);
+    router.attach("/" + JobResource.PATH, JobResource.class);
+    router.attach("/" + JobResource.PATH + "/{" + Params.JOB_ID + "}/{"
+        + Params.CMD + "}", JobResource.class);
+    return router;
   }
 }
Index: src/java/org/apache/nutch/api/Params.java
===================================================================
--- src/java/org/apache/nutch/api/Params.java	(revision 1188268)
+++ src/java/org/apache/nutch/api/Params.java	(working copy)
@@ -17,7 +17,7 @@
 package org.apache.nutch.api;
 
 public interface Params {
-  
+
   public static final String CONF_ID = "conf";
   public static final String PROP_NAME = "prop";
   public static final String PROP_VALUE = "value";
@@ -28,8 +28,7 @@
   public static final String ARGS = "args";
   public static final String CMD = "cmd";
   public static final String FORCE = "force";
-  
-  
+
   public static final String JOB_CMD_STOP = "stop";
   public static final String JOB_CMD_ABORT = "abort";
   public static final String JOB_CMD_GET = "get";
Index: src/java/org/apache/nutch/api/JobManager.java
===================================================================
--- src/java/org/apache/nutch/api/JobManager.java	(revision 1188268)
+++ src/java/org/apache/nutch/api/JobManager.java	(working copy)
@@ -23,17 +23,19 @@
 import org.apache.nutch.api.JobStatus.State;
 
 public interface JobManager {
-  
-  public static enum JobType {INJECT, GENERATE, FETCH, PARSE, UPDATEDB, INDEX, CRAWL, READDB, CLASS};
 
+  public static enum JobType {
+    INJECT, GENERATE, FETCH, PARSE, UPDATEDB, INDEX, CRAWL, READDB, CLASS
+  };
+
   public List<JobStatus> list(String crawlId, State state) throws Exception;
-  
+
   public JobStatus get(String crawlId, String id) throws Exception;
-  
+
   public String create(String crawlId, JobType type, String confId,
-      Map<String,Object> args) throws Exception;
-  
+      Map<String, Object> args) throws Exception;
+
   public boolean abort(String crawlId, String id) throws Exception;
-  
+
   public boolean stop(String crawlId, String id) throws Exception;
 }
Index: src/java/org/apache/nutch/api/DbReader.java
===================================================================
--- src/java/org/apache/nutch/api/DbReader.java	(revision 1188268)
+++ src/java/org/apache/nutch/api/DbReader.java	(working copy)
@@ -49,9 +49,9 @@
 public class DbReader {
   private static final Logger LOG = LoggerFactory.getLogger(DbReader.class);
 
-  DataStore<String,WebPage> store;
+  DataStore<String, WebPage> store;
   Configuration conf;
-  
+
   public DbReader(Configuration conf, String crawlId) {
     conf = new Configuration(conf);
     if (crawlId != null) {
@@ -64,17 +64,17 @@
       store = null;
     }
   }
-  
-  public Iterator<Map<String,Object>> iterator(String[] fields, String startKey, String endKey,
-      String batchId) throws Exception {
-    Query<String,WebPage> q = store.newQuery();
+
+  public Iterator<Map<String, Object>> iterator(String[] fields,
+      String startKey, String endKey, String batchId) throws Exception {
+    Query<String, WebPage> q = store.newQuery();
     String[] qFields = fields;
     if (fields != null) {
       HashSet<String> flds = new HashSet<String>(Arrays.asList(fields));
       // remove "url"
       flds.remove("url");
       if (flds.size() > 0) {
-        qFields = (String[])flds.toArray(new String[flds.size()]);
+        qFields = (String[]) flds.toArray(new String[flds.size()]);
       } else {
         qFields = null;
       }
@@ -86,26 +86,27 @@
         q.setEndKey(endKey);
       }
     }
-    Result<String,WebPage> res = store.execute(q);
+    Result<String, WebPage> res = store.execute(q);
     // XXX we should add the filtering capability to Query
     return new DbIterator(res, fields, batchId);
   }
-  
+
   public void close() throws IOException {
     if (store != null) {
       store.close();
     }
   }
-  
-  private class DbIterator implements Iterator<Map<String,Object>> {
-    private Result<String,WebPage> res;
+
+  private class DbIterator implements Iterator<Map<String, Object>> {
+    private Result<String, WebPage> res;
     private boolean hasNext;
     private String url;
     private WebPage page;
     private Utf8 batchId;
     private TreeSet<String> fields;
 
-    DbIterator(Result<String,WebPage> res, String[] fields, String batchId) throws IOException {
+    DbIterator(Result<String, WebPage> res, String[] fields, String batchId)
+        throws IOException {
       this.res = res;
       if (batchId != null) {
         this.batchId = new Utf8(batchId);
@@ -115,7 +116,7 @@
       }
       advance();
     }
-    
+
     private void advance() throws IOException {
       hasNext = res.next();
       if (hasNext && batchId != null) {
@@ -126,8 +127,8 @@
             return;
           } else {
             if (LOG.isDebugEnabled()) {
-              LOG.debug("Skipping " + 
-                TableUtil.unreverseUrl(res.getKey()) + "; different batch id");
+              LOG.debug("Skipping " + TableUtil.unreverseUrl(res.getKey())
+                  + "; different batch id");
             }
             hasNext = res.next();
           }
@@ -139,9 +140,9 @@
       return hasNext;
     }
 
-    public Map<String,Object> next() {
+    public Map<String, Object> next() {
       url = res.getKey();
-      page = (WebPage)res.get().clone();
+      page = (WebPage) res.get().clone();
       try {
         advance();
         if (!hasNext) {
@@ -155,15 +156,15 @@
       return pageAsMap(url, page);
     }
 
-    private Map<String,Object> pageAsMap(String url, WebPage page) {
-      HashMap<String,Object> res = new HashMap<String,Object>();
+    private Map<String, Object> pageAsMap(String url, WebPage page) {
+      HashMap<String, Object> res = new HashMap<String, Object>();
       if (fields == null || fields.contains("url")) {
         res.put("url", TableUtil.unreverseUrl(url));
       }
       String[] pfields = page.getFields();
       TreeSet<String> flds = null;
       if (fields != null) {
-        flds = (TreeSet<String>)fields.clone();
+        flds = (TreeSet<String>) fields.clone();
       } else {
         flds = new TreeSet<String>(Arrays.asList(pfields));
       }
@@ -179,13 +180,13 @@
         }
         if ("metadata".equals(f)) {
           Map<Utf8, ByteBuffer> metadata = page.getMetadata();
-          Map<String,String> simpleMeta = new HashMap<String,String>();
+          Map<String, String> simpleMeta = new HashMap<String, String>();
           if (metadata != null) {
             Iterator<Entry<Utf8, ByteBuffer>> iterator = metadata.entrySet()
                 .iterator();
             while (iterator.hasNext()) {
               Entry<Utf8, ByteBuffer> entry = iterator.next();
-              simpleMeta.put(entry.getKey().toString(), 
+              simpleMeta.put(entry.getKey().toString(),
                   Bytes.toStringBinary(entry.getValue().array()));
             }
           }
@@ -212,23 +213,23 @@
           if (val instanceof Utf8) {
             val = val.toString();
           } else if (val instanceof ByteBuffer) {
-            val = Bytes.toStringBinary(((ByteBuffer)val).array());
+            val = Bytes.toStringBinary(((ByteBuffer) val).array());
           }
           res.put(f, val);
         }
       }
       return res;
     }
-    
-    private Map<String,String> convertMap(Map map) {
-      Map<String,String> res = new HashMap<String,String>();
+
+    private Map<String, String> convertMap(Map map) {
+      Map<String, String> res = new HashMap<String, String>();
       for (Object o : map.entrySet()) {
-        Entry e = (Entry)o;
+        Entry e = (Entry) o;
         res.put(e.getKey().toString(), e.getValue().toString());
       }
       return res;
     }
-    
+
     public void remove() {
       throw new UnsupportedOperationException();
     }
Index: src/java/org/apache/nutch/api/NutchServer.java
===================================================================
--- src/java/org/apache/nutch/api/NutchServer.java	(revision 1188268)
+++ src/java/org/apache/nutch/api/NutchServer.java	(working copy)
@@ -27,31 +27,31 @@
 
 public class NutchServer {
   private static final Logger LOG = LoggerFactory.getLogger(NutchServer.class);
-  
+
   private Component component;
   private NutchApp app;
   private int port;
   private boolean running;
-  
+
   public NutchServer(int port) {
     this.port = port;
-    // Create a new Component. 
+    // Create a new Component.
     component = new Component();
-    //component.getLogger().setLevel(Level.FINEST);
-   
-    // Add a new HTTP server listening on port 8182. 
-    component.getServers().add(Protocol.HTTP, port); 
-   
-    // Attach the application. 
+    // component.getLogger().setLevel(Level.FINEST);
+
+    // Add a new HTTP server listening on port 8182.
+    component.getServers().add(Protocol.HTTP, port);
+
+    // Attach the application.
     app = new NutchApp();
     component.getDefaultHost().attach("/nutch", app);
     NutchApp.server = this;
   }
-  
+
   public boolean isRunning() {
     return running;
   }
-  
+
   public void start() throws Exception {
     LOG.info("Starting NutchServer on port " + port + "...");
     component.start();
@@ -59,7 +59,7 @@
     running = true;
     NutchApp.started = System.currentTimeMillis();
   }
-  
+
   public boolean canStop() throws Exception {
     List<JobStatus> jobs = NutchApp.jobMgr.list(null, State.RUNNING);
     if (!jobs.isEmpty()) {
@@ -67,7 +67,7 @@
     }
     return true;
   }
-  
+
   public boolean stop(boolean force) throws Exception {
     if (!running) {
       return true;
@@ -83,7 +83,7 @@
     return true;
   }
 
-  public static void main(String[] args) throws Exception { 
+  public static void main(String[] args) throws Exception {
     if (args.length == 0) {
       System.err.println("Usage: NutchServer <port>");
       System.exit(-1);
Index: src/java/org/apache/nutch/api/APIInfoResource.java
===================================================================
--- src/java/org/apache/nutch/api/APIInfoResource.java	(revision 1188268)
+++ src/java/org/apache/nutch/api/APIInfoResource.java	(working copy)
@@ -25,12 +25,12 @@
 
 public class APIInfoResource extends ServerResource {
   private static final List<String[]> info = new ArrayList<String[]>();
-  
+
   static {
-    info.add(new String[]{AdminResource.PATH, AdminResource.DESCR});
-    info.add(new String[]{ConfResource.PATH, ConfResource.DESCR});
-    info.add(new String[]{DbResource.PATH, DbResource.DESCR});
-    info.add(new String[]{JobResource.PATH, JobResource.DESCR});
+    info.add(new String[] { AdminResource.PATH, AdminResource.DESCR });
+    info.add(new String[] { ConfResource.PATH, ConfResource.DESCR });
+    info.add(new String[] { DbResource.PATH, DbResource.DESCR });
+    info.add(new String[] { JobResource.PATH, JobResource.DESCR });
   }
 
   @Get("json")
Index: src/java/org/apache/nutch/api/ConfResource.java
===================================================================
--- src/java/org/apache/nutch/api/ConfResource.java	(revision 1188268)
+++ src/java/org/apache/nutch/api/ConfResource.java	(working copy)
@@ -27,24 +27,24 @@
 import org.restlet.resource.ServerResource;
 
 public class ConfResource extends ServerResource {
-  
+
   public static final String PATH = "confs";
   public static final String DESCR = "Configuration manager";
   public static final String DEFAULT_CONF = "default";
-  
+
   private AtomicInteger seqId = new AtomicInteger();
-  
+
   @Get("json")
   public Object retrieve() throws Exception {
-    String id = (String)getRequestAttributes().get(Params.CONF_ID);
+    String id = (String) getRequestAttributes().get(Params.CONF_ID);
     if (id == null) {
       return NutchApp.confMgr.list();
     } else {
-      String prop = (String)getRequestAttributes().get(Params.PROP_NAME);
+      String prop = (String) getRequestAttributes().get(Params.PROP_NAME);
       if (prop == null) {
         return NutchApp.confMgr.getAsMap(id);
       } else {
-        Map<String,String> cfg = NutchApp.confMgr.getAsMap(id);
+        Map<String, String> cfg = NutchApp.confMgr.getAsMap(id);
         if (cfg == null) {
           return null;
         } else {
@@ -53,43 +53,45 @@
       }
     }
   }
-  
+
   @Put("json")
-  public String create(Map<String,Object> args) throws Exception {
+  public String create(Map<String, Object> args) throws Exception {
     System.out.println("args=" + args);
-    String id = (String)args.get(Params.CONF_ID); 
+    String id = (String) args.get(Params.CONF_ID);
     if (id == null) {
       id = String.valueOf(seqId.incrementAndGet());
     }
-    Map<String,String> props = (Map<String,String>)args.get(Params.PROPS);
-    Boolean force = (Boolean)args.get(Params.FORCE);
+    Map<String, String> props = (Map<String, String>) args.get(Params.PROPS);
+    Boolean force = (Boolean) args.get(Params.FORCE);
     boolean f = force != null ? force : false;
     NutchApp.confMgr.create(id, props, f);
     return id;
   }
-  
+
   @Post("json")
-  public void update(Map<String,Object> args) throws Exception {
-    String id = (String)args.get(Params.CONF_ID); 
-    if (id == null) id = (String)getRequestAttributes().get(Params.CONF_ID);
+  public void update(Map<String, Object> args) throws Exception {
+    String id = (String) args.get(Params.CONF_ID);
+    if (id == null)
+      id = (String) getRequestAttributes().get(Params.CONF_ID);
     if (id == null) {
       throw new Exception("Missing config id");
     }
-    String prop = (String)args.get(Params.PROP_NAME);
-    if (prop == null) prop = (String)getRequestAttributes().get(Params.PROP_NAME);
+    String prop = (String) args.get(Params.PROP_NAME);
+    if (prop == null)
+      prop = (String) getRequestAttributes().get(Params.PROP_NAME);
     if (prop == null) {
       throw new Exception("Missing property name prop");
     }
-    String value = (String)args.get(Params.PROP_VALUE);
+    String value = (String) args.get(Params.PROP_VALUE);
     if (value == null) {
       throw new Exception("Missing property value");
     }
     NutchApp.confMgr.setProperty(id, prop, value);
   }
-  
+
   @Delete
   public void remove() throws Exception {
-    String id = (String)getRequestAttributes().get(Params.CONF_ID);
+    String id = (String) getRequestAttributes().get(Params.CONF_ID);
     if (id == null) {
       throw new Exception("Missing config id");
     }
Index: src/java/org/apache/nutch/api/DbResource.java
===================================================================
--- src/java/org/apache/nutch/api/DbResource.java	(revision 1188268)
+++ src/java/org/apache/nutch/api/DbResource.java	(working copy)
@@ -38,8 +38,8 @@
   public static final String DESCR = "DB data streaming";
 
   static JacksonConverter cnv = new JacksonConverter();
-  WeakHashMap<String,DbReader> readers = new WeakHashMap<String,DbReader>();
-  
+  WeakHashMap<String, DbReader> readers = new WeakHashMap<String, DbReader>();
+
   @Override
   protected void doInit() throws ResourceException {
     super.doInit();
@@ -68,12 +68,16 @@
         if (startKey != null) {
           try {
             startKey = TableUtil.reverseUrl(startKey);
-          } catch (MalformedURLException e) { /*ignore */};
+          } catch (MalformedURLException e) { /* ignore */
+          }
+          ;
         }
         if (endKey != null) {
           try {
             endKey = TableUtil.reverseUrl(endKey);
-          } catch (MalformedURLException e) { /*ignore */};
+          } catch (MalformedURLException e) { /* ignore */
+          }
+          ;
         }
       }
       batchId = form.getFirstValue("batch");
@@ -95,16 +99,17 @@
         startKey, endKey, batchId);
     return res;
   }
-  
+
   private static class DbRepresentation extends OutputRepresentation {
     private DbReader r;
     private Variant variant;
     private String[] fields;
     private String startKey, endKey, batchId;
     private DbResource resource;
-    
-    public DbRepresentation(DbResource resource, Variant variant, DbReader reader,
-        String[] fields, String startKey, String endKey, String batchId) {
+
+    public DbRepresentation(DbResource resource, Variant variant,
+        DbReader reader, String[] fields, String startKey, String endKey,
+        String batchId) {
       super(variant.getMediaType());
       this.resource = resource;
       this.r = reader;
@@ -119,7 +124,8 @@
     public void write(OutputStream out) throws IOException {
       try {
         out.write('[');
-        Iterator<Map<String,Object>> it = r.iterator(fields, startKey, endKey, batchId);
+        Iterator<Map<String, Object>> it = r.iterator(fields, startKey, endKey,
+            batchId);
         boolean first = true;
         while (it.hasNext()) {
           if (!first) {
@@ -127,7 +133,7 @@
           } else {
             first = false;
           }
-          Map<String,Object> item = it.next();
+          Map<String, Object> item = it.next();
           Representation repr = cnv.toRepresentation(item, variant, resource);
           repr.write(out);
           out.flush();
Index: src/java/org/apache/nutch/api/JobStatus.java
===================================================================
--- src/java/org/apache/nutch/api/JobStatus.java	(revision 1188268)
+++ src/java/org/apache/nutch/api/JobStatus.java	(working copy)
@@ -22,19 +22,21 @@
 import org.apache.nutch.util.NutchTool;
 
 public class JobStatus {
-  public static enum State {IDLE, RUNNING, FINISHED, FAILED, KILLED,
-    STOPPING, KILLING, ANY};
+  public static enum State {
+    IDLE, RUNNING, FINISHED, FAILED, KILLED, STOPPING, KILLING, ANY
+  };
+
   public String id;
   public JobType type;
   public String confId;
-  public Map<String,Object> args;
-  public Map<String,Object> result;
+  public Map<String, Object> args;
+  public Map<String, Object> result;
   public NutchTool tool;
   public State state;
   public String msg;
-  
-  public JobStatus(String id, JobType type, String confId, Map<String,Object> args,
-      State state, String msg) {
+
+  public JobStatus(String id, JobType type, String confId,
+      Map<String, Object> args, State state, String msg) {
     this.id = id;
     this.type = type;
     this.confId = confId;
Index: src/java/org/apache/nutch/parse/ParseStatusUtils.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseStatusUtils.java	(revision 1188268)
+++ src/java/org/apache/nutch/parse/ParseStatusUtils.java	(working copy)
@@ -28,7 +28,7 @@
 public class ParseStatusUtils {
 
   public static ParseStatus STATUS_SUCCESS = new ParseStatus();
-  public static final HashMap<Short,String> minorCodes = new HashMap<Short,String>();
+  public static final HashMap<Short, String> minorCodes = new HashMap<Short, String>();
 
   static {
     STATUS_SUCCESS.setMajorCode(ParseStatusCodes.SUCCESS);
@@ -48,8 +48,9 @@
     return status.getMajorCode() == ParseStatusCodes.SUCCESS;
   }
 
-  /** A convenience method. Return a String representation of the first
-   * argument, or null.
+  /**
+   * A convenience method. Return a String representation of the first argument,
+   * or null.
    */
   public static String getMessage(ParseStatus status) {
     GenericArray<Utf8> args = status.getArgs();
@@ -83,7 +84,8 @@
     return new Parse("", "", new Outlink[0], status);
   }
 
-  public static Parse getEmptyParse(int minorCode, String message, Configuration conf) {
+  public static Parse getEmptyParse(int minorCode, String message,
+      Configuration conf) {
     ParseStatus status = new ParseStatus();
     status.setMajorCode(ParseStatusCodes.FAILED);
     status.setMinorCode(minorCode);
@@ -91,14 +93,14 @@
 
     return new Parse("", "", new Outlink[0], status);
   }
-  
+
   public static String toString(ParseStatus status) {
     if (status == null) {
       return "(null)";
     }
     StringBuilder sb = new StringBuilder();
-    sb.append(ParseStatusCodes.majorCodes[status.getMajorCode()] +
-        "/" + minorCodes.get((short)status.getMinorCode()));
+    sb.append(ParseStatusCodes.majorCodes[status.getMajorCode()] + "/"
+        + minorCodes.get((short) status.getMinorCode()));
     sb.append(" (" + status.getMajorCode() + "/" + status.getMinorCode() + ")");
     sb.append(", args=[");
     GenericArray<Utf8> args = status.getArgs();
@@ -106,7 +108,8 @@
       int i = 0;
       Iterator<Utf8> it = args.iterator();
       while (it.hasNext()) {
-        if (i > 0) sb.append(',');
+        if (i > 0)
+          sb.append(',');
         sb.append(it.next());
         i++;
       }
Index: src/java/org/apache/nutch/parse/ParsePluginsReader.java
===================================================================
--- src/java/org/apache/nutch/parse/ParsePluginsReader.java	(revision 1188268)
+++ src/java/org/apache/nutch/parse/ParsePluginsReader.java	(working copy)
@@ -42,50 +42,50 @@
 // Nutch imports
 import org.apache.nutch.util.NutchConfiguration;
 
-
 /**
  * A reader to load the information stored in the
  * <code>$NUTCH_HOME/conf/parse-plugins.xml</code> file.
- *
+ * 
  * @author mattmann
  * @version 1.0
  */
 public class ParsePluginsReader {
-  
+
   /* our log stream */
-  public static final Logger LOG = LoggerFactory.getLogger(ParsePluginsReader.class);
-  
+  public static final Logger LOG = LoggerFactory
+      .getLogger(ParsePluginsReader.class);
+
   /** The property name of the parse-plugins location */
   private static final String PP_FILE_PROP = "parse.plugin.file";
 
   /** the parse-plugins file */
   private String fParsePluginsFile = null;
 
-  
   /**
    * Constructs a new ParsePluginsReader
    */
-  public ParsePluginsReader() { }
-  
+  public ParsePluginsReader() {
+  }
+
   /**
    * Reads the <code>parse-plugins.xml</code> file and returns the
    * {@link #ParsePluginList} defined by it.
-   *
+   * 
    * @return A {@link #ParsePluginList} specified by the
    *         <code>parse-plugins.xml</code> file.
    * @throws Exception
-   *             If any parsing error occurs.
+   *           If any parsing error occurs.
    */
   public ParsePluginList parse(Configuration conf) {
-    
+
     ParsePluginList pList = new ParsePluginList();
-    
+
     // open up the XML file
     DocumentBuilderFactory factory = null;
     DocumentBuilder parser = null;
     Document document = null;
     InputSource inputSource = null;
-    
+
     InputStream ppInputStream = null;
     if (fParsePluginsFile != null) {
       URL parsePluginUrl = null;
@@ -94,56 +94,55 @@
         ppInputStream = parsePluginUrl.openStream();
       } catch (Exception e) {
         if (LOG.isWarnEnabled()) {
-          LOG.warn("Unable to load parse plugins file from URL " +
-                   "[" + fParsePluginsFile + "]. Reason is [" + e + "]");
+          LOG.warn("Unable to load parse plugins file from URL " + "["
+              + fParsePluginsFile + "]. Reason is [" + e + "]");
         }
         return pList;
       }
     } else {
-      ppInputStream = conf.getConfResourceAsInputStream(
-                          conf.get(PP_FILE_PROP));
+      ppInputStream = conf.getConfResourceAsInputStream(conf.get(PP_FILE_PROP));
     }
-    
+
     inputSource = new InputSource(ppInputStream);
-    
+
     try {
       factory = DocumentBuilderFactory.newInstance();
       parser = factory.newDocumentBuilder();
       document = parser.parse(inputSource);
     } catch (Exception e) {
       if (LOG.isWarnEnabled()) {
-        LOG.warn("Unable to parse [" + fParsePluginsFile + "]." +
-                 "Reason is [" + e + "]");
+        LOG.warn("Unable to parse [" + fParsePluginsFile + "]." + "Reason is ["
+            + e + "]");
       }
       return null;
     }
-    
+
     Element parsePlugins = document.getDocumentElement();
-    
+
     // build up the alias hash map
     Map<String, String> aliases = getAliases(parsePlugins);
     // And store it on the parse plugin list
     pList.setAliases(aliases);
-     
+
     // get all the mime type nodes
     NodeList mimeTypes = parsePlugins.getElementsByTagName("mimeType");
-    
+
     // iterate through the mime types
     for (int i = 0; i < mimeTypes.getLength(); i++) {
       Element mimeType = (Element) mimeTypes.item(i);
       String mimeTypeStr = mimeType.getAttribute("name");
-      
+
       // for each mimeType, get the plugin list
       NodeList pluginList = mimeType.getElementsByTagName("plugin");
-      
+
       // iterate through the plugins, add them in order read
       // OR if they have a special order="" attribute, then hold those in
       // a separate list, and then insert them into the final list at the
       // order specified
       if (pluginList != null && pluginList.getLength() > 0) {
         List<String> plugList = new ArrayList<String>(pluginList.getLength());
-        
-        for (int j = 0; j<pluginList.getLength(); j++) {
+
+        for (int j = 0; j < pluginList.getLength(); j++) {
           Element plugin = (Element) pluginList.item(j);
           String pluginId = plugin.getAttribute("id");
           String extId = aliases.get(pluginId);
@@ -163,110 +162,110 @@
             plugList.add(extId);
           }
         }
-        
+
         // now add the plugin list and map it to this mimeType
         pList.setPluginList(mimeTypeStr, plugList);
-        
+
       } else if (LOG.isWarnEnabled()) {
         LOG.warn("ParsePluginsReader:ERROR:no plugins defined for mime type: "
-                 + mimeTypeStr + ", continuing parse");
+            + mimeTypeStr + ", continuing parse");
       }
     }
     return pList;
   }
-  
+
   /**
    * Tests parsing of the parse-plugins.xml file. An alternative name for the
-   * file can be specified via the <code>--file</code> option, although the
-   * file must be located in the <code>$NUTCH_HOME/conf</code> directory.
-   *
+   * file can be specified via the <code>--file</code> option, although the file
+   * must be located in the <code>$NUTCH_HOME/conf</code> directory.
+   * 
    * @param args
-   *            Currently only the --file argument to specify an alternative
-   *            name for the parse-plugins.xml file is supported.
+   *          Currently only the --file argument to specify an alternative name
+   *          for the parse-plugins.xml file is supported.
    */
   public static void main(String[] args) throws Exception {
     String parsePluginFile = null;
     String usage = "ParsePluginsReader [--file <parse plugin file location>]";
-    
-    if (( args.length != 0 && args.length != 2 )
+
+    if ((args.length != 0 && args.length != 2)
         || (args.length == 2 && !"--file".equals(args[0]))) {
       System.err.println(usage);
       System.exit(1);
     }
-    
+
     for (int i = 0; i < args.length; i++) {
       if (args[i].equals("--file")) {
         parsePluginFile = args[++i];
       }
     }
-    
+
     ParsePluginsReader reader = new ParsePluginsReader();
-    
+
     if (parsePluginFile != null) {
       reader.setFParsePluginsFile(parsePluginFile);
     }
-    
+
     ParsePluginList prefs = reader.parse(NutchConfiguration.create());
-    
+
     for (String mimeType : prefs.getSupportedMimeTypes()) {
-      
+
       System.out.println("MIMETYPE: " + mimeType);
       List<String> plugList = prefs.getPluginList(mimeType);
-      
+
       System.out.println("EXTENSION IDs:");
-      
+
       for (String j : plugList) {
         System.out.println(j);
       }
     }
-    
+
   }
-  
+
   /**
    * @return Returns the fParsePluginsFile.
    */
   public String getFParsePluginsFile() {
     return fParsePluginsFile;
   }
-  
+
   /**
    * @param parsePluginsFile
-   *            The fParsePluginsFile to set.
+   *          The fParsePluginsFile to set.
    */
   public void setFParsePluginsFile(String parsePluginsFile) {
     fParsePluginsFile = parsePluginsFile;
   }
-  
+
   private Map<String, String> getAliases(Element parsePluginsRoot) {
 
     Map<String, String> aliases = new HashMap<String, String>();
     NodeList aliasRoot = parsePluginsRoot.getElementsByTagName("aliases");
-	  
+
     if (aliasRoot == null || (aliasRoot != null && aliasRoot.getLength() == 0)) {
       if (LOG.isWarnEnabled()) {
         LOG.warn("No aliases defined in parse-plugins.xml!");
       }
       return aliases;
     }
-	  
+
     if (aliasRoot.getLength() > 1) {
       // log a warning, but try and continue processing
       if (LOG.isWarnEnabled()) {
         LOG.warn("There should only be one \"aliases\" tag in parse-plugins.xml");
       }
     }
-	  
-    Element aliasRootElem = (Element)aliasRoot.item(0);
+
+    Element aliasRootElem = (Element) aliasRoot.item(0);
     NodeList aliasElements = aliasRootElem.getElementsByTagName("alias");
-	  
+
     if (aliasElements != null && aliasElements.getLength() > 0) {
-      for (int i=0; i<aliasElements.getLength(); i++) {
-        Element aliasElem = (Element)aliasElements.item(i);
-	String parsePluginId = aliasElem.getAttribute("name");
-	String extensionId = aliasElem.getAttribute("extension-id");
+      for (int i = 0; i < aliasElements.getLength(); i++) {
+        Element aliasElem = (Element) aliasElements.item(i);
+        String parsePluginId = aliasElem.getAttribute("name");
+        String extensionId = aliasElem.getAttribute("extension-id");
         if (LOG.isTraceEnabled()) {
-          LOG.trace("Found alias: plugin-id: " + parsePluginId +
-                    ", extension-id: " + extensionId);
+          LOG.trace("Found alias: plugin-id: " + parsePluginId
+              + ", extension-id: " + extensionId);
         }
         if (parsePluginId != null && extensionId != null) {
           aliases.put(parsePluginId, extensionId);
@@ -275,5 +274,5 @@
     }
     return aliases;
   }
-  
+
 }
Index: src/java/org/apache/nutch/parse/ParseFilters.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseFilters.java	(revision 1188268)
+++ src/java/org/apache/nutch/parse/ParseFilters.java	(working copy)
@@ -31,7 +31,7 @@
 import org.apache.nutch.util.ObjectCache;
 import org.w3c.dom.DocumentFragment;
 
-/** Creates and caches {@link ParseFilter} implementing plugins.*/
+/** Creates and caches {@link ParseFilter} implementing plugins. */
 public class ParseFilters {
 
   private ParseFilter[] parseFilters;
@@ -41,7 +41,8 @@
   public ParseFilters(Configuration conf) {
     String order = conf.get(HTMLPARSEFILTER_ORDER);
     ObjectCache objectCache = ObjectCache.get(conf);
-    this.parseFilters = (ParseFilter[]) objectCache.getObject(ParseFilter.class.getName());
+    this.parseFilters = (ParseFilter[]) objectCache.getObject(ParseFilter.class
+        .getName());
     if (parseFilters == null) {
       /*
        * If ordered filters are required, prepare array of filters based on
@@ -51,21 +52,23 @@
       if (order != null && !order.trim().equals("")) {
         orderedFilters = order.split("\\s+");
       }
-      HashMap<String, ParseFilter> filterMap =
-        new HashMap<String, ParseFilter>();
+      HashMap<String, ParseFilter> filterMap = new HashMap<String, ParseFilter>();
       try {
-        ExtensionPoint point = PluginRepository.get(conf).getExtensionPoint(ParseFilter.X_POINT_ID);
+        ExtensionPoint point = PluginRepository.get(conf).getExtensionPoint(
+            ParseFilter.X_POINT_ID);
         if (point == null)
           throw new RuntimeException(ParseFilter.X_POINT_ID + " not found.");
         Extension[] extensions = point.getExtensions();
         for (int i = 0; i < extensions.length; i++) {
           Extension extension = extensions[i];
-          ParseFilter parseFilter = (ParseFilter) extension.getExtensionInstance();
+          ParseFilter parseFilter = (ParseFilter) extension
+              .getExtensionInstance();
           if (!filterMap.containsKey(parseFilter.getClass().getName())) {
             filterMap.put(parseFilter.getClass().getName(), parseFilter);
           }
         }
-        ParseFilter[] htmlParseFilters = filterMap.values().toArray(new ParseFilter[filterMap.size()]);
+        ParseFilter[] htmlParseFilters = filterMap.values().toArray(
+            new ParseFilter[filterMap.size()]);
         /*
          * If no ordered filters required, just get the filters in an
          * indeterminate order
@@ -77,19 +80,19 @@
         else {
           ArrayList<ParseFilter> filters = new ArrayList<ParseFilter>();
           for (int i = 0; i < orderedFilters.length; i++) {
-            ParseFilter filter = filterMap
-            .get(orderedFilters[i]);
+            ParseFilter filter = filterMap.get(orderedFilters[i]);
             if (filter != null) {
               filters.add(filter);
             }
           }
-          objectCache.setObject(ParseFilter.class.getName(), filters
-              .toArray(new ParseFilter[filters.size()]));
+          objectCache.setObject(ParseFilter.class.getName(),
+              filters.toArray(new ParseFilter[filters.size()]));
         }
       } catch (PluginRuntimeException e) {
         throw new RuntimeException(e);
       }
-      this.parseFilters = (ParseFilter[]) objectCache.getObject(ParseFilter.class.getName());
+      this.parseFilters = (ParseFilter[]) objectCache
+          .getObject(ParseFilter.class.getName());
     }
   }
 
Index: src/java/org/apache/nutch/parse/ParseUtil.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseUtil.java	(revision 1188268)
+++ src/java/org/apache/nutch/parse/ParseUtil.java	(working copy)
@@ -48,7 +48,7 @@
  * A Utility class containing methods to simply perform parsing utilities such
  * as iterating through a preferred list of {@link Parser}s to obtain
  * {@link Parse} objects.
- *
+ * 
  * @author mattmann
  * @author J&eacute;r&ocirc;me Charron
  * @author S&eacute;bastien Le Callonnec
@@ -68,8 +68,9 @@
   private ParserFactory parserFactory;
   /** Parser timeout set to 30 sec by default. Set -1 to deactivate **/
   private int MAX_PARSE_TIME = 30;
+
   /**
-   *
+   * 
    * @param conf
    */
   public ParseUtil(Configuration conf) {
@@ -86,12 +87,13 @@
   public void setConf(Configuration conf) {
     this.conf = conf;
     parserFactory = new ParserFactory(conf);
-    MAX_PARSE_TIME=conf.getInt("parser.timeout", 30);
+    MAX_PARSE_TIME = conf.getInt("parser.timeout", 30);
     sig = SignatureFactory.getSignature(conf);
     filters = new URLFilters(conf);
     normalizers = new URLNormalizers(conf, URLNormalizers.SCOPE_OUTLINK);
     int maxOutlinksPerPage = conf.getInt("db.max.outlinks.per.page", 100);
-    maxOutlinks = (maxOutlinksPerPage < 0) ? Integer.MAX_VALUE : maxOutlinksPerPage;
+    maxOutlinks = (maxOutlinksPerPage < 0) ? Integer.MAX_VALUE
+        : maxOutlinksPerPage;
     ignoreExternalLinks = conf.getBoolean("db.ignore.external.links", false);
   }
 
@@ -100,8 +102,9 @@
    * until a successful parse is performed and a {@link Parse} object is
    * returned. If the parse is unsuccessful, a message is logged to the
    * <code>WARNING</code> level, and an empty parse is returned.
-   *
-   * @throws ParseException If no suitable parser is found to perform the parse.
+   * 
+   * @throws ParseException
+   *           If no suitable parser is found to perform the parse.
    */
   public Parse parse(String url, WebPage page) throws ParseException {
     Parser[] parsers = null;
@@ -111,56 +114,58 @@
     try {
       parsers = this.parserFactory.getParsers(contentType, url);
     } catch (ParserNotFound e) {
-      LOG.warn("No suitable parser found when trying to parse content " + url +
-          " of type " + contentType);
+      LOG.warn("No suitable parser found when trying to parse content " + url
+          + " of type " + contentType);
       throw new ParseException(e.getMessage());
     }
 
-    for (int i=0; i<parsers.length; i++) {
+    for (int i = 0; i < parsers.length; i++) {
       if (LOG.isDebugEnabled()) {
         LOG.debug("Parsing [" + url + "] with [" + parsers[i] + "]");
       }
       Parse parse = null;
-      
-      if (MAX_PARSE_TIME!=-1)
-    	  parse = runParser(parsers[i], url, page);
-      else 
-    	  parse = parsers[i].getParse(url, page);
-      
-      if (parse!=null && ParseStatusUtils.isSuccess(parse.getParseStatus())) {
+
+      if (MAX_PARSE_TIME != -1)
+        parse = runParser(parsers[i], url, page);
+      else
+        parse = parsers[i].getParse(url, page);
+
+      if (parse != null && ParseStatusUtils.isSuccess(parse.getParseStatus())) {
         return parse;
       }
     }
 
-    LOG.warn("Unable to successfully parse content " + url +
-        " of type " + contentType);
-    return ParseStatusUtils.getEmptyParse(new ParseException("Unable to successfully parse content"), null);
+    LOG.warn("Unable to successfully parse content " + url + " of type "
+        + contentType);
+    return ParseStatusUtils.getEmptyParse(new ParseException(
+        "Unable to successfully parse content"), null);
   }
-  
+
   private Parse runParser(Parser p, String url, WebPage page) {
-	  ParseCallable pc = new ParseCallable(p, page, url);
-	  FutureTask<Parse> task = new FutureTask<Parse>(pc);
-	  Parse res = null;
-	  Thread t = new Thread(task);
-	  t.start();
-	  try {
-		  res = task.get(MAX_PARSE_TIME, TimeUnit.SECONDS);
-	  } catch (TimeoutException e) {
-		  LOG.warn("TIMEOUT parsing " + url + " with " + p);
-	  } catch (Exception e) {
-		  task.cancel(true);
-		  res = null;
-		  t.interrupt();
-	  } finally {
-		  t = null;
-		  pc = null;
-	  }
-	  return res;
+    ParseCallable pc = new ParseCallable(p, page, url);
+    FutureTask<Parse> task = new FutureTask<Parse>(pc);
+    Parse res = null;
+    Thread t = new Thread(task);
+    t.start();
+    try {
+      res = task.get(MAX_PARSE_TIME, TimeUnit.SECONDS);
+    } catch (TimeoutException e) {
+      LOG.warn("TIMEOUT parsing " + url + " with " + p);
+    } catch (Exception e) {
+      task.cancel(true);
+      res = null;
+      t.interrupt();
+    } finally {
+      t = null;
+      pc = null;
+    }
+    return res;
   }
 
   /**
-   * Parses given web page and stores parsed content within page. Returns
-   * a pair of <String, WebPage> if a meta-redirect is discovered
+   * Parses given web page and stores parsed content within page. Returns a pair
+   * of <String, WebPage> if a meta-redirect is discovered
+   * 
    * @param key
    * @param page
    * @return newly-discovered webpage (via a meta-redirect)
@@ -171,7 +176,8 @@
     byte status = (byte) page.getStatus();
     if (status != CrawlStatus.STATUS_FETCHED) {
       if (LOG.isDebugEnabled()) {
-        LOG.debug("Skipping " + url + " as status is: " + CrawlStatus.getName(status));
+        LOG.debug("Skipping " + url + " as status is: "
+            + CrawlStatus.getName(status));
       }
       return redirectedPage;
     }
@@ -180,7 +186,8 @@
     try {
       parse = parse(url, page);
     } catch (final Exception e) {
-      LOG.warn("Error parsing: " + url + ": " + StringUtils.stringifyException(e));
+      LOG.warn("Error parsing: " + url + ": "
+          + StringUtils.stringifyException(e));
       return redirectedPage;
     }
 
@@ -242,8 +249,7 @@
             toUrl = filters.filter(toUrl);
           } catch (final URLFilterException e) {
             continue;
-          }
-          catch (MalformedURLException e2){
+          } catch (MalformedURLException e2) {
             continue;
           }
           if (toUrl == null) {
Index: src/java/org/apache/nutch/parse/ParserNotFound.java
===================================================================
--- src/java/org/apache/nutch/parse/ParserNotFound.java	(revision 1188268)
+++ src/java/org/apache/nutch/parse/ParserNotFound.java	(working copy)
@@ -18,17 +18,17 @@
 
 public class ParserNotFound extends ParseException {
 
-  private static final long serialVersionUID=23993993939L;
+  private static final long serialVersionUID = 23993993939L;
   private String url;
   private String contentType;
 
-  public ParserNotFound(String message){
-    super(message);    
+  public ParserNotFound(String message) {
+    super(message);
   }
-  
+
   public ParserNotFound(String url, String contentType) {
-    this(url, contentType,
-         "parser not found for contentType="+contentType+" url="+url);
+    this(url, contentType, "parser not found for contentType=" + contentType
+        + " url=" + url);
   }
 
   public ParserNotFound(String url, String contentType, String message) {
@@ -37,6 +37,11 @@
     this.contentType = contentType;
   }
 
-  public String getUrl() { return url; }
-  public String getContentType() { return contentType; }
+  public String getUrl() {
+    return url;
+  }
+
+  public String getContentType() {
+    return contentType;
+  }
 }
Index: src/java/org/apache/nutch/parse/Parse.java
===================================================================
--- src/java/org/apache/nutch/parse/Parse.java	(revision 1188268)
+++ src/java/org/apache/nutch/parse/Parse.java	(working copy)
@@ -16,7 +16,6 @@
  ******************************************************************************/
 package org.apache.nutch.parse;
 
-
 public class Parse {
 
   private String text;
Index: src/java/org/apache/nutch/parse/ParseFilter.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseFilter.java	(revision 1188268)
+++ src/java/org/apache/nutch/parse/ParseFilter.java	(working copy)
@@ -22,18 +22,19 @@
 import org.apache.nutch.storage.WebPage;
 import org.w3c.dom.DocumentFragment;
 
-
-/** Extension point for DOM-based parsers.  Permits one to add additional
- * metadata to parses provided by the html or tika plugins.  All plugins found which implement this extension
- * point are run sequentially on the parse.
+/**
+ * Extension point for DOM-based parsers. Permits one to add additional metadata
+ * to parses provided by the html or tika plugins. All plugins found which
+ * implement this extension point are run sequentially on the parse.
  */
 public interface ParseFilter extends FieldPluggable, Configurable {
   /** The name of the extension point. */
   final static String X_POINT_ID = ParseFilter.class.getName();
 
-  /** Adds metadata or otherwise modifies a parse, given
-   * the DOM tree of a page. */
-  Parse filter(String url, WebPage page, Parse parse,
-                    HTMLMetaTags metaTags, DocumentFragment doc);
+  /**
+   * Adds metadata or otherwise modifies a parse, given the DOM tree of a page.
+   */
+  Parse filter(String url, WebPage page, Parse parse, HTMLMetaTags metaTags,
+      DocumentFragment doc);
 
 }
Index: src/java/org/apache/nutch/parse/OutlinkExtractor.java
===================================================================
--- src/java/org/apache/nutch/parse/OutlinkExtractor.java	(revision 1188268)
+++ src/java/org/apache/nutch/parse/OutlinkExtractor.java	(working copy)
@@ -34,8 +34,8 @@
 import org.apache.oro.text.regex.Perl5Matcher;
 
 /**
- * Extractor to extract {@link org.apache.nutch.parse.Outlink}s 
- * / URLs from plain text using Regular Expressions.
+ * Extractor to extract {@link org.apache.nutch.parse.Outlink}s / URLs from
+ * plain text using Regular Expressions.
  * 
  * @see <a
  *      href="http://wiki.java.net/bin/view/Javapedia/RegularExpressions">Comparison
@@ -48,24 +48,27 @@
  * @since 0.7
  */
 public class OutlinkExtractor {
-  private static final Logger LOG = LoggerFactory.getLogger(OutlinkExtractor.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(OutlinkExtractor.class);
 
   /**
    * Regex pattern to get URLs within a plain text.
    * 
    * @see <a
    *      href="http://www.truerwords.net/articles/ut/urlactivation.html">http://www.truerwords.net/articles/ut/urlactivation.html
+
    *      </a>
    */
-  private static final String URL_PATTERN = 
-    "([A-Za-z][A-Za-z0-9+.-]{1,120}:[A-Za-z0-9/](([A-Za-z0-9$_.+!*,;/?:@&~=-])|%[A-Fa-f0-9]{2}){1,333}(#([a-zA-Z0-9][a-zA-Z0-9$_.+!*,;/?:@&~=%-]{0,1000}))?)";
+  private static final String URL_PATTERN = "([A-Za-z][A-Za-z0-9+.-]{1,120}:[A-Za-z0-9/](([A-Za-z0-9$_.+!*,;/?:@&~=-])|%[A-Fa-f0-9]{2}){1,333}(#([a-zA-Z0-9][a-zA-Z0-9$_.+!*,;/?:@&~=%-]{0,1000}))?)";
 
   /**
-   * Extracts <code>Outlink</code> from given plain text.
-   * Applying this method to non-plain-text can result in extremely lengthy
-   * runtimes for parasitic cases (postscript is a known example).
-   * @param plainText  the plain text from wich URLs should be extracted.
+   * Extracts <code>Outlink</code> from given plain text. Applying this method
+   * to non-plain-text can result in extremely lengthy runtimes for parasitic
+   * cases (postscript is a known example).
    * 
+   * @param plainText
+   *          the plain text from wich URLs should be extracted.
+   * 
    * @return Array of <code>Outlink</code>s within found in plainText
    */
   public static Outlink[] getOutlinks(final String plainText, Configuration conf) {
@@ -73,15 +76,18 @@
   }
 
   /**
-   * Extracts <code>Outlink</code> from given plain text and adds anchor
-   * to the extracted <code>Outlink</code>s
+   * Extracts <code>Outlink</code> from given plain text and adds anchor to the
+   * extracted <code>Outlink</code>s
    * 
-   * @param plainText the plain text from wich URLs should be extracted.
-   * @param anchor    the anchor of the url
+   * @param plainText
+   *          the plain text from wich URLs should be extracted.
+   * @param anchor
+   *          the anchor of the url
    * 
    * @return Array of <code>Outlink</code>s within found in plainText
    */
-  public static Outlink[] getOutlinks(final String plainText, String anchor, Configuration conf) {
+  public static Outlink[] getOutlinks(final String plainText, String anchor,
+      Configuration conf) {
     long start = System.currentTimeMillis();
     final List<Outlink> outlinks = new ArrayList<Outlink>();
 
@@ -97,11 +103,11 @@
       MatchResult result;
       String url;
 
-      //loop the matches
+      // loop the matches
       while (matcher.contains(input, pattern)) {
         // if this is taking too long, stop matching
-        //   (SHOULD really check cpu time used so that heavily loaded systems
-        //   do not unnecessarily hit this limit.)
+        // (SHOULD really check cpu time used so that heavily loaded systems
+        // do not unnecessarily hit this limit.)
         if (System.currentTimeMillis() - start >= 60000L) {
           if (LOG.isWarnEnabled()) {
             LOG.warn("Time limit exceeded for getOutLinks");
@@ -117,13 +123,16 @@
         }
       }
     } catch (Exception ex) {
-      // if the matcher fails (perhaps a malformed URL) we just log it and move on
-      if (LOG.isErrorEnabled()) { LOG.error("getOutlinks", ex); }
+      // if the matcher fails (perhaps a malformed URL) we just log it and move
+      // on
+      if (LOG.isErrorEnabled()) {
+        LOG.error("getOutlinks", ex);
+      }
     }
 
     final Outlink[] retval;
 
-    //create array of the Outlinks
+    // create array of the Outlinks
     if (outlinks != null && outlinks.size() > 0) {
       retval = outlinks.toArray(new Outlink[0]);
     } else {
@@ -132,7 +141,6 @@
 
     return retval;
   }
-  
 
   /**
    * Extracts outlinks from a plain text. <br />
@@ -161,7 +169,7 @@
     // url = re.getParen(0);
     //
     // if (LOG.isTraceEnabled()) {
-    //   LOG.trace("Extracted url: " + url);
+    // LOG.trace("Extracted url: " + url);
     // }
     //
     // try {
@@ -191,9 +199,8 @@
   }
 
   /**
-   * Extracts outlinks from a plain text.
-   * </p>
-   * This Method takes the JDK5 Regexp API.
+   * Extracts outlinks from a plain text. </p> This Method takes the JDK5 Regexp
+   * API.
    * 
    * @param plainText
    * 
@@ -241,5 +248,5 @@
     //
     // return retval;
   }
- 
+
 }
Index: src/java/org/apache/nutch/parse/ParserFactory.java
===================================================================
--- src/java/org/apache/nutch/parse/ParserFactory.java	(revision 1188268)
+++ src/java/org/apache/nutch/parse/ParserFactory.java	(working copy)
@@ -35,8 +35,7 @@
 import org.apache.nutch.util.MimeUtil;
 import org.apache.nutch.util.ObjectCache;
 
-
-/** Creates and caches {@link Parser} plugins.*/
+/** Creates and caches {@link Parser} plugins. */
 public final class ParserFactory {
 
   public static final Logger LOG = LoggerFactory.getLogger(ParserFactory.class);
@@ -45,8 +44,7 @@
   public static final String DEFAULT_PLUGIN = "*";
 
   /** Empty extension list for caching purposes. */
-  private final List<Extension> EMPTY_EXTENSION_LIST =
-    new ArrayList<Extension>();
+  private final List<Extension> EMPTY_EXTENSION_LIST = new ArrayList<Extension>();
 
   private final Configuration conf;
   private final ExtensionPoint extensionPoint;
@@ -57,10 +55,12 @@
     ObjectCache objectCache = ObjectCache.get(conf);
     this.extensionPoint = PluginRepository.get(conf).getExtensionPoint(
         Parser.X_POINT_ID);
-    this.parsePluginList = (ParsePluginList)objectCache.getObject(ParsePluginList.class.getName());
+    this.parsePluginList = (ParsePluginList) objectCache
+        .getObject(ParsePluginList.class.getName());
     if (this.parsePluginList == null) {
       this.parsePluginList = new ParsePluginsReader().parse(conf);
-      objectCache.setObject(ParsePluginList.class.getName(), this.parsePluginList);
+      objectCache.setObject(ParsePluginList.class.getName(),
+          this.parsePluginList);
     }
 
     if (this.extensionPoint == null) {
@@ -72,33 +72,34 @@
     }
   }
 
-
   /**
    * Function returns an array of {@link Parser}s for a given content type.
-   *
+   * 
    * The function consults the internal list of parse plugins for the
-   * ParserFactory to determine the list of pluginIds, then gets the
-   * appropriate extension points to instantiate as {@link Parser}s.
-   *
-   * @param contentType The contentType to return the <code>Array</code>
-   *                    of {@link Parser}s for.
-   * @param url The url for the content that may allow us to get the type from
-   *            the file suffix.
+   * ParserFactory to determine the list of pluginIds, then gets the appropriate
+   * extension points to instantiate as {@link Parser}s.
+   * 
+   * @param contentType
+   *          The contentType to return the <code>Array</code> of {@link Parser}
+   *          s for.
+   * @param url
+   *          The url for the content that may allow us to get the type from the
+   *          file suffix.
    * @return An <code>Array</code> of {@link Parser}s for the given contentType.
    *         If there were plugins mapped to a contentType via the
-   *         <code>parse-plugins.xml</code> file, but never enabled via
-   *         the <code>plugin.includes</code> Nutch conf, then those plugins
-   *         won't be part of this array, i.e., they will be skipped.
-   *         So, if the ordered list of parsing plugins for
-   *         <code>text/plain</code> was <code>[parse-text,parse-html,
+   *         <code>parse-plugins.xml</code> file, but never enabled via the
+   *         <code>plugin.includes</code> Nutch conf, then those plugins won't
+   *         be part of this array, i.e., they will be skipped. So, if the
+   *         ordered list of parsing plugins for <code>text/plain</code> was
+   *         <code>[parse-text,parse-html,
    *         parse-rtf]</code>, and only <code>parse-html</code> and
    *         <code>parse-rtf</code> were enabled via
-   *         <code>plugin.includes</code>, then this ordered Array would
-   *         consist of two {@link Parser} interfaces,
+   *         <code>plugin.includes</code>, then this ordered Array would consist
+   *         of two {@link Parser} interfaces,
    *         <code>[parse-html, parse-rtf]</code>.
    */
   public Parser[] getParsers(String contentType, String url)
-  throws ParserNotFound {
+      throws ParserNotFound {
 
     List<Parser> parsers = null;
     List<Extension> parserExts = null;
@@ -108,7 +109,7 @@
     // TODO once the MimeTypes is available
     // parsers = getExtensions(MimeUtils.map(contentType));
     // if (parsers != null) {
-    //   return parsers;
+    // return parsers;
     // }
     // Last Chance: Guess content-type from file url...
     // parsers = getExtensions(MimeUtils.getMimeType(url));
@@ -119,50 +120,51 @@
     }
 
     parsers = new ArrayList<Parser>(parserExts.size());
-    for (Extension ext : parserExts){
+    for (Extension ext : parserExts) {
       Parser p = null;
       try {
-        //check to see if we've cached this parser instance yet
+        // check to see if we've cached this parser instance yet
         p = (Parser) objectCache.getObject(ext.getId());
         if (p == null) {
           // go ahead and instantiate it and then cache it
           p = (Parser) ext.getExtensionInstance();
-          objectCache.setObject(ext.getId(),p);
+          objectCache.setObject(ext.getId(), p);
         }
         parsers.add(p);
       } catch (PluginRuntimeException e) {
         if (LOG.isWarnEnabled()) {
           e.printStackTrace(LogUtil.getWarnStream(LOG));
           LOG.warn("ParserFactory:PluginRuntimeException when "
-                 + "initializing parser plugin "
-                 + ext.getDescriptor().getPluginId()
-                 + " instance in getParsers "
-                 + "function: attempting to continue instantiating parsers");
+              + "initializing parser plugin "
+              + ext.getDescriptor().getPluginId() + " instance in getParsers "
+              + "function: attempting to continue instantiating parsers");
         }
       }
     }
-    return parsers.toArray(new Parser[]{});
+    return parsers.toArray(new Parser[] {});
   }
 
   /**
    * Function returns a {@link Parser} instance with the specified
-   * <code>extId</code>, representing its extension ID. If the Parser
-   * instance isn't found, then the function throws a
-   * <code>ParserNotFound</code> exception. If the function is able to find
-   * the {@link Parser} in the internal <code>PARSER_CACHE</code> then it
-   * will return the already instantiated Parser. Otherwise, if it has to
-   * instantiate the Parser itself , then this function will cache that Parser
-   * in the internal <code>PARSER_CACHE</code>.
-   *
-   * @param id The string extension ID (e.g.,
-   *        "org.apache.nutch.parse.rss.RSSParser",
-   *        "org.apache.nutch.parse.rtf.RTFParseFactory") of the {@link Parser}
-   *        implementation to return.
+   * <code>extId</code>, representing its extension ID. If the Parser instance
+   * isn't found, then the function throws a <code>ParserNotFound</code>
+   * exception. If the function is able to find the {@link Parser} in the
+   * internal <code>PARSER_CACHE</code> then it will return the already
+   * instantiated Parser. Otherwise, if it has to instantiate the Parser itself
+   * , then this function will cache that Parser in the internal
+   * <code>PARSER_CACHE</code>.
+   * 
+   * @param id
+   *          The string extension ID (e.g.,
+   *          "org.apache.nutch.parse.rss.RSSParser",
+   *          "org.apache.nutch.parse.rtf.RTFParseFactory") of the
+   *          {@link Parser} implementation to return.
    * @return A {@link Parser} implementation specified by the parameter
    *         <code>id</code>.
-   * @throws ParserNotFound If the Parser is not found (i.e., registered with
-   *         the extension point), or if the there a
-   *         {@link PluginRuntimeException} instantiating the {@link Parser}.
+   * @throws ParserNotFound
+   *           If the Parser is not found (i.e., registered with the extension
+   *           point), or if the there a {@link PluginRuntimeException}
+   *           instantiating the {@link Parser}.
    */
   public Parser getParserById(String id) throws ParserNotFound {
 
@@ -186,7 +188,7 @@
     if (objectCache.getObject(parserExt.getId()) != null) {
       return (Parser) objectCache.getObject(parserExt.getId());
 
-    // if not found in cache, instantiate the Parser
+      // if not found in cache, instantiate the Parser
     } else {
       try {
         Parser p = (Parser) parserExt.getExtensionInstance();
@@ -194,9 +196,9 @@
         return p;
       } catch (PluginRuntimeException e) {
         if (LOG.isWarnEnabled()) {
-          LOG.warn("Canno initialize parser " +
-                   parserExt.getDescriptor().getPluginId() +
-                   " (cause: " + e.toString());
+          LOG.warn("Canno initialize parser "
+              + parserExt.getDescriptor().getPluginId() + " (cause: "
+              + e.toString());
         }
         throw new ParserNotFound("Cannot init parser for id [" + id + "]");
       }
@@ -214,7 +216,7 @@
           columns.addAll(pluginFields);
         }
       } catch (PluginRuntimeException e) {
-        LOG.error("PluginRuntimeException",e);
+        LOG.error("PluginRuntimeException", e);
       }
     }
     return columns;
@@ -222,10 +224,11 @@
 
   /**
    * Finds the best-suited parse plugin for a given contentType.
-   *
-   * @param contentType Content-Type for which we seek a parse plugin.
-   * @return a list of extensions to be used for this contentType.
-   *         If none, returns <code>null</code>.
+   * 
+   * @param contentType
+   *          Content-Type for which we seek a parse plugin.
+   * @return a list of extensions to be used for this contentType. If none,
+   *         returns <code>null</code>.
    */
   @SuppressWarnings("unchecked")
   protected List<Extension> getExtensions(String contentType) {
@@ -248,8 +251,8 @@
       if (extensions != null) {
         objectCache.setObject(type, extensions);
       } else {
-      	// Put the empty extension list into cache
-      	// to remember we don't know any related extension.
+        // Put the empty extension list into cache
+        // to remember we don't know any related extension.
         objectCache.setObject(type, EMPTY_EXTENSION_LIST);
       }
     }
@@ -258,22 +261,24 @@
 
   /**
    * searches a list of suitable parse plugins for the given contentType.
-   * <p>It first looks for a preferred plugin defined in the parse-plugin
-   * file.  If none is found, it returns a list of default plugins.
-   *
-   * @param contentType Content-Type for which we seek a parse plugin.
-   * @return List - List of extensions to be used for this contentType.
-   *                If none, returns null.
+   * <p>
+   * It first looks for a preferred plugin defined in the parse-plugin file. If
+   * none is found, it returns a list of default plugins.
+   * 
+   * @param contentType
+   *          Content-Type for which we seek a parse plugin.
+   * @return List - List of extensions to be used for this contentType. If none,
+   *         returns null.
    */
   private List<Extension> findExtensions(String contentType) {
 
     Extension[] extensions = this.extensionPoint.getExtensions();
 
     // Look for a preferred plugin.
-    List<String> parsePluginList =
-      this.parsePluginList.getPluginList(contentType);
-    List<Extension> extensionList =
-      matchExtensions(parsePluginList, extensions, contentType);
+    List<String> parsePluginList = this.parsePluginList
+        .getPluginList(contentType);
+    List<Extension> extensionList = matchExtensions(parsePluginList,
+        extensions, contentType);
     if (extensionList != null) {
       return extensionList;
     }
@@ -286,20 +291,23 @@
   /**
    * Tries to find a suitable parser for the given contentType.
    * <ol>
-   * <li>It checks if a parser which accepts the contentType
-   * can be found in the <code>plugins</code> list;</li>
-   * <li>If this list is empty, it tries to find amongst the loaded
-   * extensions whether some of them might suit and warns the user.</li>
+   * <li>It checks if a parser which accepts the contentType can be found in the
+   * <code>plugins</code> list;</li>
+   * <li>If this list is empty, it tries to find amongst the loaded extensions
+   * whether some of them might suit and warns the user.</li>
    * </ol>
-   * @param plugins List of candidate plugins.
-   * @param extensions Array of loaded extensions.
-   * @param contentType Content-Type for which we seek a parse plugin.
-   * @return List - List of extensions to be used for this contentType.
-   *                If none, returns null.
+   * 
+   * @param plugins
+   *          List of candidate plugins.
+   * @param extensions
+   *          Array of loaded extensions.
+   * @param contentType
+   *          Content-Type for which we seek a parse plugin.
+   * @return List - List of extensions to be used for this contentType. If none,
+   *         returns null.
    */
   private List<Extension> matchExtensions(List<String> plugins,
-                               Extension[] extensions,
-                               String contentType) {
+      Extension[] extensions, String contentType) {
 
     List<Extension> extList = new ArrayList<Extension>();
     if (plugins != null) {
@@ -317,7 +325,7 @@
         // in either case, LOG the appropriate error message to WARN level
 
         if (ext == null) {
-          //try to get it just by its pluginId
+          // try to get it just by its pluginId
           ext = getExtension(extensions, parsePluginId);
 
           if (LOG.isWarnEnabled()) {
@@ -325,17 +333,17 @@
               // plugin was enabled via plugin.includes
               // its plugin.xml just doesn't claim to support that
               // particular mimeType
-              LOG.warn("ParserFactory:Plugin: " + parsePluginId +
-                       " mapped to contentType " + contentType +
-                       " via parse-plugins.xml, but " + "its plugin.xml " +
-                       "file does not claim to support contentType: " +
-                       contentType);
+              LOG.warn("ParserFactory:Plugin: " + parsePluginId
+                  + " mapped to contentType " + contentType
+                  + " via parse-plugins.xml, but " + "its plugin.xml "
+                  + "file does not claim to support contentType: "
+                  + contentType);
             } else {
               // plugin wasn't enabled via plugin.includes
-              LOG.warn("ParserFactory: Plugin: " + parsePluginId +
-                       " mapped to contentType " + contentType +
-                       " via parse-plugins.xml, but not enabled via " +
-                       "plugin.includes in nutch-default.xml");
+              LOG.warn("ParserFactory: Plugin: " + parsePluginId
+                  + " mapped to contentType " + contentType
+                  + " via parse-plugins.xml, but not enabled via "
+                  + "plugin.includes in nutch-default.xml");
             }
           }
         }
@@ -355,12 +363,12 @@
       // any extensions where this is the case, throw a
       // NotMappedParserException
 
-      for (int i=0; i<extensions.length; i++) {
-      	if ("*".equals(extensions[i].getAttribute("contentType"))){
+      for (int i = 0; i < extensions.length; i++) {
+        if ("*".equals(extensions[i].getAttribute("contentType"))) {
           extList.add(0, extensions[i]);
-        }
-      	else if (extensions[i].getAttribute("contentType") != null
-            && contentType.matches(escapeContentType(extensions[i].getAttribute("contentType")))) {
+        } else if (extensions[i].getAttribute("contentType") != null
+            && contentType.matches(escapeContentType(extensions[i]
+                .getAttribute("contentType")))) {
           extList.add(extensions[i]);
         }
       }
@@ -369,21 +377,23 @@
         if (LOG.isInfoEnabled()) {
           StringBuffer extensionsIDs = new StringBuffer("[");
           boolean isFirst = true;
-          for (Extension ext : extList){
-        	  if (!isFirst) extensionsIDs.append(" - ");
-        	  else isFirst=false;
-        	  extensionsIDs.append(ext.getId());
+          for (Extension ext : extList) {
+            if (!isFirst)
+              extensionsIDs.append(" - ");
+            else
+              isFirst = false;
+            extensionsIDs.append(ext.getId());
           }
-    	  extensionsIDs.append("]");
-          LOG.info("The parsing plugins: " + extensionsIDs.toString() +
-                   " are enabled via the plugin.includes system " +
-                   "property, and all claim to support the content type " +
-                   contentType + ", but they are not mapped to it  in the " +
-                   "parse-plugins.xml file");
+          extensionsIDs.append("]");
+          LOG.info("The parsing plugins: " + extensionsIDs.toString()
+              + " are enabled via the plugin.includes system "
+              + "property, and all claim to support the content type "
+              + contentType + ", but they are not mapped to it  in the "
+              + "parse-plugins.xml file");
         }
       } else if (LOG.isDebugEnabled()) {
-        LOG.debug("ParserFactory:No parse plugins mapped or enabled for " +
-                  "contentType " + contentType);
+        LOG.debug("ParserFactory:No parse plugins mapped or enabled for "
+            + "contentType " + contentType);
       }
     }
 
@@ -391,23 +401,22 @@
   }
 
   private String escapeContentType(String contentType) {
-  	// Escapes contentType in order to use as a regex 
-  	// (and keep backwards compatibility).
-  	// This enables to accept multiple types for a single parser. 
-  	return contentType.replace("+", "\\+").replace(".", "\\.");
-	}
+    // Escapes contentType in order to use as a regex
+    // (and keep backwards compatibility).
+    // This enables to accept multiple types for a single parser.
+    return contentType.replace("+", "\\+").replace(".", "\\.");
+  }
 
-
-	private boolean match(Extension extension, String id, String type) {
-    return (id.equals(extension.getId())) &&
-            (extension.getAttribute("contentType").equals("*") ||
-             type.matches(escapeContentType(extension.getAttribute("contentType"))) ||
-             type.equals(DEFAULT_PLUGIN));
+  private boolean match(Extension extension, String id, String type) {
+    return (id.equals(extension.getId()))
+        && (extension.getAttribute("contentType").equals("*")
+            || type.matches(escapeContentType(extension
+                .getAttribute("contentType"))) || type.equals(DEFAULT_PLUGIN));
   }
 
   /** Get an extension from its id and supported content-type. */
   private Extension getExtension(Extension[] list, String id, String type) {
-    for (int i=0; i<list.length; i++) {
+    for (int i = 0; i < list.length; i++) {
       if (match(list[i], id, type)) {
         return list[i];
       }
@@ -416,7 +425,7 @@
   }
 
   private Extension getExtension(Extension[] list, String id) {
-    for (int i=0; i<list.length; i++) {
+    for (int i = 0; i < list.length; i++) {
       if (id.equals(list[i].getId())) {
         return list[i];
       }
Index: src/java/org/apache/nutch/parse/ParsePluginList.java
===================================================================
--- src/java/org/apache/nutch/parse/ParsePluginList.java	(revision 1188268)
+++ src/java/org/apache/nutch/parse/ParsePluginList.java	(working copy)
@@ -22,25 +22,23 @@
 import java.util.List;
 import java.util.Map;
 
-
 /**
  * This class represents a natural ordering for which parsing plugin should get
  * called for a particular mimeType. It provides methods to store the
  * parse-plugins.xml data, and methods to retreive the name of the appropriate
  * parsing plugin for a contentType.
- *
+ * 
  * @author mattmann
  * @version 1.0
  */
 public class ParsePluginList {
-  
+
   /* a map to link mimeType to an ordered list of parsing plugins */
   private Map<String, List<String>> fMimeTypeToPluginMap = null;
-  
+
   /* A list of aliases */
   private Map<String, String> aliases = null;
-  
-  
+
   /**
    * Constructs a new ParsePluginList
    */
@@ -48,7 +46,7 @@
     fMimeTypeToPluginMap = new HashMap<String, List<String>>();
     aliases = new HashMap<String, String>();
   }
-  
+
   public List<String> getPluginList(String mimeType) {
     return fMimeTypeToPluginMap.get(mimeType);
   }
@@ -56,18 +54,18 @@
   void setAliases(Map<String, String> aliases) {
     this.aliases = aliases;
   }
-  
+
   public Map<String, String> getAliases() {
     return aliases;
   }
-  
+
   void setPluginList(String mimeType, List<String> l) {
     fMimeTypeToPluginMap.put(mimeType, l);
   }
-  
+
   List<String> getSupportedMimeTypes() {
-    return Arrays.asList(fMimeTypeToPluginMap.keySet().toArray(
-            new String[] {}));
+    return Arrays
+        .asList(fMimeTypeToPluginMap.keySet().toArray(new String[] {}));
   }
-  
+
 }
Index: src/java/org/apache/nutch/parse/Outlink.java
===================================================================
--- src/java/org/apache/nutch/parse/Outlink.java	(revision 1188268)
+++ src/java/org/apache/nutch/parse/Outlink.java	(working copy)
@@ -28,11 +28,13 @@
   private String toUrl;
   private String anchor;
 
-  public Outlink() {}
+  public Outlink() {
+  }
 
   public Outlink(String toUrl, String anchor) throws MalformedURLException {
     this.toUrl = toUrl;
-    if (anchor == null) anchor = "";
+    if (anchor == null)
+      anchor = "";
     this.anchor = anchor;
   }
 
@@ -43,8 +45,8 @@
 
   /** Skips over one Outlink in the input. */
   public static void skip(DataInput in) throws IOException {
-    Text.skip(in);                                // skip toUrl
-    Text.skip(in);                                // skip anchor
+    Text.skip(in); // skip toUrl
+    Text.skip(in); // skip anchor
   }
 
   public void write(DataOutput out) throws IOException {
@@ -58,21 +60,24 @@
     return outlink;
   }
 
-  public String getToUrl() { return toUrl; }
-  public String getAnchor() { return anchor; }
+  public String getToUrl() {
+    return toUrl;
+  }
 
+  public String getAnchor() {
+    return anchor;
+  }
 
   public boolean equals(Object o) {
     if (!(o instanceof Outlink))
       return false;
-    Outlink other = (Outlink)o;
-    return
-      this.toUrl.equals(other.toUrl) &&
-      this.anchor.equals(other.anchor);
+    Outlink other = (Outlink) o;
+    return this.toUrl.equals(other.toUrl) && this.anchor.equals(other.anchor);
   }
 
   public String toString() {
-    return "toUrl: " + toUrl + " anchor: " + anchor;  // removed "\n". toString, not printLine... WD.
+    return "toUrl: " + toUrl + " anchor: " + anchor; // removed "\n". toString,
+                                                     // not printLine... WD.
   }
 
 }
Index: src/java/org/apache/nutch/parse/ParserJob.java
===================================================================
--- src/java/org/apache/nutch/parse/ParserJob.java	(revision 1188268)
+++ src/java/org/apache/nutch/parse/ParserJob.java	(working copy)
@@ -66,9 +66,8 @@
     FIELDS.add(WebPage.Field.METADATA);
   }
 
-
-  public static class ParserMapper 
-      extends GoraMapper<String, WebPage, String, WebPage> {
+  public static class ParserMapper extends
+      GoraMapper<String, WebPage, String, WebPage> {
     private ParseUtil parseUtil;
 
     private boolean shouldResume;
@@ -76,14 +75,15 @@
     private boolean force;
 
     private Utf8 batchId;
-    
+
     @Override
     public void setup(Context context) throws IOException {
       Configuration conf = context.getConfiguration();
       parseUtil = new ParseUtil(conf);
       shouldResume = conf.getBoolean(RESUME_KEY, false);
       force = conf.getBoolean(FORCE_KEY, false);
-      batchId = new Utf8(conf.get(GeneratorJob.BATCH_ID, Nutch.ALL_BATCH_ID_STR));
+      batchId = new Utf8(
+          conf.get(GeneratorJob.BATCH_ID, Nutch.ALL_BATCH_ID_STR));
     }
 
     @Override
@@ -92,18 +92,21 @@
       Utf8 mark = Mark.FETCH_MARK.checkMark(page);
       if (!NutchJob.shouldProcess(mark, batchId)) {
         if (LOG.isDebugEnabled()) {
-          LOG.debug("Skipping " + TableUtil.unreverseUrl(key) + "; different batch id");
+          LOG.debug("Skipping " + TableUtil.unreverseUrl(key)
+              + "; different batch id");
         }
         return;
       }
       if (shouldResume && Mark.PARSE_MARK.checkMark(page) != null) {
         if (force) {
           if (LOG.isDebugEnabled()) {
-            LOG.debug("Forced parsing " + TableUtil.unreverseUrl(key) + "; already parsed");
+            LOG.debug("Forced parsing " + TableUtil.unreverseUrl(key)
+                + "; already parsed");
           }
         } else {
           if (LOG.isDebugEnabled()) {
-            LOG.debug("Skipping " + TableUtil.unreverseUrl(key) + "; already parsed");
+            LOG.debug("Skipping " + TableUtil.unreverseUrl(key)
+                + "; already parsed");
           }
           return;
         }
@@ -118,12 +121,12 @@
 
       if (redirectedPage != null) {
         context.write(TableUtil.reverseUrl(redirectedPage.getUrl()),
-                      redirectedPage.getDatum());
+            redirectedPage.getDatum());
       }
       context.write(key, page);
-    }    
+    }
   }
-  
+
   public ParserJob() {
 
   }
@@ -139,8 +142,8 @@
     ParseFilters parseFilters = new ParseFilters(conf);
 
     Collection<WebPage.Field> parsePluginFields = parserFactory.getFields();
-    Collection<WebPage.Field> signaturePluginFields =
-      SignatureFactory.getFields(conf);
+    Collection<WebPage.Field> signaturePluginFields = SignatureFactory
+        .getFields(conf);
     Collection<WebPage.Field> htmlParsePluginFields = parseFilters.getFields();
 
     if (parsePluginFields != null) {
@@ -167,11 +170,11 @@
   }
 
   @Override
-  public Map<String,Object> run(Map<String,Object> args) throws Exception {
-    String batchId = (String)args.get(Nutch.ARG_BATCH);
-    Boolean shouldResume = (Boolean)args.get(Nutch.ARG_RESUME);
-    Boolean force = (Boolean)args.get(Nutch.ARG_FORCE);
-    
+  public Map<String, Object> run(Map<String, Object> args) throws Exception {
+    String batchId = (String) args.get(Nutch.ARG_BATCH);
+    Boolean shouldResume = (Boolean) args.get(Nutch.ARG_RESUME);
+    Boolean force = (Boolean) args.get(Nutch.ARG_FORCE);
+
     if (batchId != null) {
       getConf().set(GeneratorJob.BATCH_ID, batchId);
     }
@@ -182,7 +185,7 @@
       getConf().setBoolean(FORCE_KEY, force);
     }
     currentJob = new NutchJob(getConf(), "parse");
-    
+
     Collection<WebPage.Field> fields = getFields(currentJob);
     StorageUtils.initMapperJob(currentJob, fields, String.class, WebPage.class,
         ParserMapper.class);
@@ -194,20 +197,20 @@
     return results;
   }
 
-  public int parse(String batchId, boolean shouldResume, boolean force) throws Exception {
+  public int parse(String batchId, boolean shouldResume, boolean force)
+      throws Exception {
     LOG.info("ParserJob: starting");
 
     LOG.info("ParserJob: resuming:\t" + getConf().getBoolean(RESUME_KEY, false));
-    LOG.info("ParserJob: forced reparse:\t" + getConf().getBoolean(FORCE_KEY, false));
+    LOG.info("ParserJob: forced reparse:\t"
+        + getConf().getBoolean(FORCE_KEY, false));
     if (batchId == null || batchId.equals(Nutch.ALL_BATCH_ID_STR)) {
       LOG.info("ParserJob: parsing all");
     } else {
       LOG.info("ParserJob: batchId:\t" + batchId);
     }
-    run(ToolUtil.toArgMap(
-        Nutch.ARG_BATCH, batchId,
-        Nutch.ARG_RESUME, shouldResume,
-        Nutch.ARG_FORCE, force));
+    run(ToolUtil.toArgMap(Nutch.ARG_BATCH, batchId, Nutch.ARG_RESUME,
+        shouldResume, Nutch.ARG_FORCE, force));
     LOG.info("ParserJob: success");
     return 0;
   }
@@ -218,12 +221,15 @@
     String batchId = null;
 
     if (args.length < 1) {
-      System.err.println("Usage: ParserJob (<batchId> | -all) [-crawlId <id>] [-resume] [-force]");
+      System.err
+          .println("Usage: ParserJob (<batchId> | -all) [-crawlId <id>] [-resume] [-force]");
       System.err.println("\tbatchId\tsymbolic batch ID created by Generator");
-      System.err.println("\t-crawlId <id>\t the id to prefix the schemas to operate on, (default: storage.crawl.id)");
+      System.err
+          .println("\t-crawlId <id>\t the id to prefix the schemas to operate on, (default: storage.crawl.id)");
       System.err.println("\t-all\tconsider pages from all crawl jobs");
       System.err.println("-resume\tresume a previous incomplete job");
-      System.err.println("-force\tforce re-parsing even if a page is already parsed");
+      System.err
+          .println("-force\tforce re-parsing even if a page is already parsed");
       return -1;
     }
     for (int i = 0; i < args.length; i++) {
Index: src/java/org/apache/nutch/parse/HTMLMetaTags.java
===================================================================
--- src/java/org/apache/nutch/parse/HTMLMetaTags.java	(revision 1188268)
+++ src/java/org/apache/nutch/parse/HTMLMetaTags.java	(working copy)
@@ -22,8 +22,8 @@
 import java.util.Properties;
 
 /**
- * This class holds the information about HTML "meta" tags extracted from 
- * a page. Some special tags have convenience methods for easy checking.
+ * This class holds the information about HTML "meta" tags extracted from a
+ * page. Some special tags have convenience methods for easy checking.
  */
 public class HTMLMetaTags {
   private boolean noIndex = false;
@@ -154,8 +154,8 @@
   }
 
   /**
-   * A convenience method. Returns the current value of <code>refreshTime</code>.
-   * The value may be invalid if {@link #getRefresh()}returns
+   * A convenience method. Returns the current value of <code>refreshTime</code>
+   * . The value may be invalid if {@link #getRefresh()}returns
    * <code>false</code>.
    */
   public int getRefreshTime() {
@@ -177,26 +177,22 @@
   public Properties getHttpEquivTags() {
     return httpEquivTags;
   }
-  
+
   public String toString() {
     StringBuffer sb = new StringBuffer();
-    sb.append("base=" + baseHref
-            + ", noCache=" + noCache
-            + ", noFollow=" + noFollow
-            + ", noIndex=" + noIndex
-            + ", refresh=" + refresh
-            + ", refreshHref=" + refreshHref + "\n"
-            );
+    sb.append("base=" + baseHref + ", noCache=" + noCache + ", noFollow="
+        + noFollow + ", noIndex=" + noIndex + ", refresh=" + refresh
+        + ", refreshHref=" + refreshHref + "\n");
     sb.append(" * general tags:\n");
     Iterator<?> it = generalTags.keySet().iterator();
     while (it.hasNext()) {
-      String key = (String)it.next();
+      String key = (String) it.next();
       sb.append("   - " + key + "\t=\t" + generalTags.get(key) + "\n");
     }
     sb.append(" * http-equiv tags:\n");
     it = httpEquivTags.keySet().iterator();
     while (it.hasNext()) {
-      String key = (String)it.next();
+      String key = (String) it.next();
       sb.append("   - " + key + "\t=\t" + httpEquivTags.get(key) + "\n");
     }
     return sb.toString();
Index: src/java/org/apache/nutch/parse/ParseCallable.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseCallable.java	(revision 1188268)
+++ src/java/org/apache/nutch/parse/ParseCallable.java	(working copy)
@@ -24,7 +24,7 @@
   private Parser p;
   private WebPage content;
   private String url;
-  
+
   public ParseCallable(Parser p, WebPage content, String url) {
     this.p = p;
     this.content = content;
@@ -34,5 +34,5 @@
   @Override
   public Parse call() throws Exception {
     return p.getParse(url, content);
-  }    
+  }
 }
Index: src/java/org/apache/nutch/parse/Parser.java
===================================================================
--- src/java/org/apache/nutch/parse/Parser.java	(revision 1188268)
+++ src/java/org/apache/nutch/parse/Parser.java	(working copy)
@@ -22,9 +22,10 @@
 import org.apache.nutch.plugin.FieldPluggable;
 import org.apache.nutch.storage.WebPage;
 
-/** A parser for content generated by a {@link org.apache.nutch.protocol.Protocol}
- * implementation.  This interface is implemented by extensions.  Nutch's core
- * contains no page parsing code.
+/**
+ * A parser for content generated by a
+ * {@link org.apache.nutch.protocol.Protocol} implementation. This interface is
+ * implemented by extensions. Nutch's core contains no page parsing code.
  */
 public interface Parser extends FieldPluggable, Configurable {
   /** The name of the extension point. */
@@ -34,8 +35,9 @@
    * <p>
    * This method parses content in WebPage instance
    * </p>
-   *
-   * @param url Page's URL
+   * 
+   * @param url
+   *          Page's URL
    * @param page
    */
   Parse getParse(String url, WebPage page);
Index: src/java/org/apache/nutch/parse/ParseStatusCodes.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseStatusCodes.java	(revision 1188268)
+++ src/java/org/apache/nutch/parse/ParseStatusCodes.java	(working copy)
@@ -22,44 +22,52 @@
   // Primary status codes:
 
   /** Parsing was not performed. */
-  public static final byte NOTPARSED       = 0;
+  public static final byte NOTPARSED = 0;
   /** Parsing succeeded. */
-  public static final byte SUCCESS         = 1;
+  public static final byte SUCCESS = 1;
   /** General failure. There may be a more specific error message in arguments. */
-  public static final byte FAILED          = 2;
+  public static final byte FAILED = 2;
 
-  public static final String[] majorCodes = {
-    "notparsed",
-    "success",
-    "failed"
-  };
+  public static final String[] majorCodes = { "notparsed", "success", "failed" };
 
   // Secondary success codes go here:
 
-  public static final short SUCCESS_OK                = 0;
+  public static final short SUCCESS_OK = 0;
 
-  /** Parsed content contains a directive to redirect to another URL.
-   * The target URL can be retrieved from the arguments.
+  /**
+   * Parsed content contains a directive to redirect to another URL. The target
+   * URL can be retrieved from the arguments.
    */
-  public static final short SUCCESS_REDIRECT          = 100;
+  public static final short SUCCESS_REDIRECT = 100;
 
   // Secondary failure codes go here:
 
-  /** Parsing failed. An Exception occured (which may be retrieved from the arguments). */
-  public static final short FAILED_EXCEPTION          = 200;
-  /** Parsing failed. Content was truncated, but the parser cannot handle incomplete content. */
-  public static final short FAILED_TRUNCATED          = 202;
-  /** Parsing failed. Invalid format - the content may be corrupted or of wrong type. */
-  public static final short FAILED_INVALID_FORMAT     = 203;
-  /** Parsing failed. Other related parts of the content are needed to complete
+  /**
+   * Parsing failed. An Exception occured (which may be retrieved from the
+   * arguments).
+   */
+  public static final short FAILED_EXCEPTION = 200;
+  /**
+   * Parsing failed. Content was truncated, but the parser cannot handle
+   * incomplete content.
+   */
+  public static final short FAILED_TRUNCATED = 202;
+  /**
+   * Parsing failed. Invalid format - the content may be corrupted or of wrong
+   * type.
+   */
+  public static final short FAILED_INVALID_FORMAT = 203;
+  /**
+   * Parsing failed. Other related parts of the content are needed to complete
    * parsing. The list of URLs to missing parts may be provided in arguments.
    * The Fetcher may decide to fetch these parts at once, then put them into
    * Content.metadata, and supply them for re-parsing.
    */
-  public static final short FAILED_MISSING_PARTS      = 204;
-  /** Parsing failed. There was no content to be parsed - probably caused
-   * by errors at protocol stage.
+  public static final short FAILED_MISSING_PARTS = 204;
+  /**
+   * Parsing failed. There was no content to be parsed - probably caused by
+   * errors at protocol stage.
    */
-  public static final short FAILED_MISSING_CONTENT    = 205;
-  
+  public static final short FAILED_MISSING_CONTENT = 205;
+
 }
Index: src/java/org/apache/nutch/util/SuffixStringMatcher.java
===================================================================
--- src/java/org/apache/nutch/util/SuffixStringMatcher.java	(revision 1188268)
+++ src/java/org/apache/nutch/util/SuffixStringMatcher.java	(working copy)
@@ -21,8 +21,8 @@
 import java.util.Iterator;
 
 /**
- * A class for efficiently matching <code>String</code>s against a set
- * of suffixes.  Zero-length <code>Strings</code> are ignored.
+ * A class for efficiently matching <code>String</code>s against a set of
+ * suffixes. Zero-length <code>Strings</code> are ignored.
  */
 public class SuffixStringMatcher extends TrieStringMatcher {
 
@@ -32,7 +32,7 @@
    */
   public SuffixStringMatcher(String[] suffixes) {
     super();
-    for (int i= 0; i < suffixes.length; i++)
+    for (int i = 0; i < suffixes.length; i++)
       addPatternBackward(suffixes[i]);
   }
 
@@ -43,20 +43,20 @@
    */
   public SuffixStringMatcher(Collection suffixes) {
     super();
-    Iterator iter= suffixes.iterator();
+    Iterator iter = suffixes.iterator();
     while (iter.hasNext())
-      addPatternBackward((String)iter.next());
+      addPatternBackward((String) iter.next());
   }
 
   /**
-   * Returns true if the given <code>String</code> is matched by a
-   * suffix in the trie
+   * Returns true if the given <code>String</code> is matched by a suffix in the
+   * trie
    */
   public boolean matches(String input) {
-    TrieNode node= root;
-    for (int i= input.length() - 1; i >= 0; i--) {
-      node= node.getChild(input.charAt(i));
-      if (node == null) 
+    TrieNode node = root;
+    for (int i = input.length() - 1; i >= 0; i--) {
+      node = node.getChild(input.charAt(i));
+      if (node == null)
         return false;
       if (node.isTerminal())
         return true;
@@ -64,16 +64,15 @@
     return false;
   }
 
-
   /**
    * Returns the shortest suffix of <code>input<code> that is matched,
    * or <code>null<code> if no match exists.
    */
   public String shortestMatch(String input) {
-    TrieNode node= root;
-    for (int i= input.length() - 1; i >= 0; i--) {
-      node= node.getChild(input.charAt(i));
-      if (node == null) 
+    TrieNode node = root;
+    for (int i = input.length() - 1; i >= 0; i--) {
+      node = node.getChild(input.charAt(i));
+      if (node == null)
         return null;
       if (node.isTerminal())
         return input.substring(i);
@@ -86,29 +85,26 @@
    * or <code>null<code> if no match exists.
    */
   public String longestMatch(String input) {
-    TrieNode node= root;
-    String result= null;
-    for (int i= input.length() - 1; i >= 0; i--) {
-      node= node.getChild(input.charAt(i));
-      if (node == null) 
+    TrieNode node = root;
+    String result = null;
+    for (int i = input.length() - 1; i >= 0; i--) {
+      node = node.getChild(input.charAt(i));
+      if (node == null)
         break;
       if (node.isTerminal())
-        result= input.substring(i);
+        result = input.substring(i);
     }
     return result;
   }
 
   public static final void main(String[] argv) {
-    SuffixStringMatcher matcher= 
-      new SuffixStringMatcher( 
-        new String[] 
-        {"a", "abcd", "bcd", "bcdefg", "defg", "aac", "baz", "foo", "foobar"} );
+    SuffixStringMatcher matcher = new SuffixStringMatcher(new String[] { "a",
+        "abcd", "bcd", "bcdefg", "defg", "aac", "baz", "foo", "foobar" });
 
-    String[] tests= {"a", "ac", "abcd", "abcdefg", "apple", "aa", "aac",
-                    "aaccca", "abaz", "baz", "bazooka", "fo", "foobar",
-                    "kite", };
+    String[] tests = { "a", "ac", "abcd", "abcdefg", "apple", "aa", "aac",
+        "aaccca", "abaz", "baz", "bazooka", "fo", "foobar", "kite", };
 
-    for (int i= 0; i < tests.length; i++) {
+    for (int i = 0; i < tests.length; i++) {
       System.out.println("testing: " + tests[i]);
       System.out.println("   matches: " + matcher.matches(tests[i]));
       System.out.println("  shortest: " + matcher.shortestMatch(tests[i]));
Index: src/java/org/apache/nutch/util/IdentityPageReducer.java
===================================================================
--- src/java/org/apache/nutch/util/IdentityPageReducer.java	(revision 1188268)
+++ src/java/org/apache/nutch/util/IdentityPageReducer.java	(working copy)
@@ -21,12 +21,12 @@
 import org.apache.nutch.storage.WebPage;
 import org.apache.gora.mapreduce.GoraReducer;
 
-public class IdentityPageReducer
-extends GoraReducer<String, WebPage, String, WebPage> {
+public class IdentityPageReducer extends
+    GoraReducer<String, WebPage, String, WebPage> {
 
   @Override
-  protected void reduce(String key, Iterable<WebPage> values,
-      Context context) throws IOException, InterruptedException {
+  protected void reduce(String key, Iterable<WebPage> values, Context context)
+      throws IOException, InterruptedException {
     for (WebPage page : values) {
       context.write(key, page);
     }
Index: src/java/org/apache/nutch/util/URLUtil.java
===================================================================
--- src/java/org/apache/nutch/util/URLUtil.java	(revision 1188268)
+++ src/java/org/apache/nutch/util/URLUtil.java	(working copy)
@@ -27,125 +27,142 @@
 /** Utility class for URL analysis */
 public class URLUtil {
 
-  private static Pattern IP_PATTERN = Pattern.compile("(\\d{1,3}\\.){3}(\\d{1,3})");
+  private static Pattern IP_PATTERN = Pattern
+      .compile("(\\d{1,3}\\.){3}(\\d{1,3})");
 
-  /** Returns the domain name of the url. The domain name of a url is
-   *  the substring of the url's hostname, w/o subdomain names. As an
-   *  example <br><code>
+  /**
+   * Returns the domain name of the url. The domain name of a url is the
+   * substring of the url's hostname, w/o subdomain names. As an example <br>
+   * <code>
    *  getDomainName(conf, new URL(http://lucene.apache.org/))
    *  </code><br>
-   *  will return <br><code> apache.org</code>
-   *   */
+   * will return <br>
+   * <code> apache.org</code>
+   * */
   public static String getDomainName(URL url) {
     DomainSuffixes tlds = DomainSuffixes.getInstance();
     String host = url.getHost();
-    //it seems that java returns hostnames ending with .
-    if(host.endsWith("."))
+    // it seems that java returns hostnames ending with .
+    if (host.endsWith("."))
       host = host.substring(0, host.length() - 1);
-    if(IP_PATTERN.matcher(host).matches())
+    if (IP_PATTERN.matcher(host).matches())
       return host;
-    
+
     int index = 0;
     String candidate = host;
-    for(;index >= 0;) {
+    for (; index >= 0;) {
       index = candidate.indexOf('.');
-      String subCandidate = candidate.substring(index+1); 
-      if(tlds.isDomainSuffix(subCandidate)) {
-        return candidate; 
+      String subCandidate = candidate.substring(index + 1);
+      if (tlds.isDomainSuffix(subCandidate)) {
+        return candidate;
       }
       candidate = subCandidate;
     }
     return candidate;
   }
 
-  /** Returns the domain name of the url. The domain name of a url is
-   *  the substring of the url's hostname, w/o subdomain names. As an
-   *  example <br><code>
+  /**
+   * Returns the domain name of the url. The domain name of a url is the
+   * substring of the url's hostname, w/o subdomain names. As an example <br>
+   * <code>
    *  getDomainName(conf, new http://lucene.apache.org/)
    *  </code><br>
-   *  will return <br><code> apache.org</code>
+   * will return <br>
+   * <code> apache.org</code>
+   * 
    * @throws MalformedURLException
    */
   public static String getDomainName(String url) throws MalformedURLException {
     return getDomainName(new URL(url));
   }
 
-  /** Returns whether the given urls have the same domain name.
-   * As an example, <br>
+  /**
+   * Returns whether the given urls have the same domain name. As an example, <br>
    * <code> isSameDomain(new URL("http://lucene.apache.org")
    * , new URL("http://people.apache.org/"))
    * <br> will return true. </code>
-   *
+   * 
    * @return true if the domain names are equal
    */
   public static boolean isSameDomainName(URL url1, URL url2) {
     return getDomainName(url1).equalsIgnoreCase(getDomainName(url2));
   }
 
-  /**Returns whether the given urls have the same domain name.
-  * As an example, <br>
-  * <code> isSameDomain("http://lucene.apache.org"
-  * ,"http://people.apache.org/")
-  * <br> will return true. </code>
-  * @return true if the domain names are equal
-  * @throws MalformedURLException
-  */
+  /**
+   * Returns whether the given urls have the same domain name. As an example, <br>
+   * <code> isSameDomain("http://lucene.apache.org"
+   * ,"http://people.apache.org/")
+   * <br> will return true. </code>
+   * 
+   * @return true if the domain names are equal
+   * @throws MalformedURLException
+   */
   public static boolean isSameDomainName(String url1, String url2)
-    throws MalformedURLException {
+      throws MalformedURLException {
     return isSameDomainName(new URL(url1), new URL(url2));
   }
 
-  /** Returns the {@link DomainSuffix} corresponding to the
-   * last public part of the hostname
+  /**
+   * Returns the {@link DomainSuffix} corresponding to the last public part of
+   * the hostname
    */
   public static DomainSuffix getDomainSuffix(URL url) {
     DomainSuffixes tlds = DomainSuffixes.getInstance();
     String host = url.getHost();
-    if(IP_PATTERN.matcher(host).matches())
+    if (IP_PATTERN.matcher(host).matches())
       return null;
-    
+
     int index = 0;
     String candidate = host;
-    for(;index >= 0;) {
+    for (; index >= 0;) {
       index = candidate.indexOf('.');
-      String subCandidate = candidate.substring(index+1);
+      String subCandidate = candidate.substring(index + 1);
       DomainSuffix d = tlds.get(subCandidate);
-      if(d != null) {
-        return d; 
+      if (d != null) {
+        return d;
       }
       candidate = subCandidate;
     }
     return null;
   }
 
-  /** Returns the {@link DomainSuffix} corresponding to the
-   * last public part of the hostname
+  /**
+   * Returns the {@link DomainSuffix} corresponding to the last public part of
+   * the hostname
    */
-  public static DomainSuffix getDomainSuffix(String url) throws MalformedURLException {
+  public static DomainSuffix getDomainSuffix(String url)
+      throws MalformedURLException {
     return getDomainSuffix(new URL(url));
   }
 
-  /** Partitions of the hostname of the url by "."  */
+  /** Partitions of the hostname of the url by "." */
   public static String[] getHostSegments(URL url) {
     String host = url.getHost();
-    //return whole hostname, if it is an ipv4
-    //TODO : handle ipv6
-    if(IP_PATTERN.matcher(host).matches())
-      return new String[] {host};
+    // return whole hostname, if it is an ipv4
+    // TODO : handle ipv6
+    if (IP_PATTERN.matcher(host).matches())
+      return new String[] { host };
     return host.split("\\.");
   }
 
-  /** Partitions of the hostname of the url by "."
-   * @throws MalformedURLException */
-  public static String[] getHostSegments(String url) throws MalformedURLException {
-   return getHostSegments(new URL(url));
+  /**
+   * Partitions of the hostname of the url by "."
+   * 
+   * @throws MalformedURLException
+   */
+  public static String[] getHostSegments(String url)
+      throws MalformedURLException {
+    return getHostSegments(new URL(url));
   }
 
   /**
-   * <p>Given two urls, a src and a destination of a redirect, it returns the 
-   * representative url.<p>
+   * <p>
+   * Given two urls, a src and a destination of a redirect, it returns the
+   * representative url.
+   * <p>
    * 
-   * <p>This method implements an extended version of the algorithm used by the
+   * <p>
+   * This method implements an extended version of the algorithm used by the
    * Yahoo! Slurp crawler described here:<br>
    * <a href=
    * "http://help.yahoo.com/l/nz/yahooxtra/search/webcrawler/slurp-11.html"> How
@@ -153,46 +170,63 @@
    * <br>
    * <ol>
    * <li>Choose target url if either url is malformed.</li>
-   * <li>If different domains the keep the destination whether or not the 
+   * <li>If different domains the keep the destination whether or not the
    * redirect is temp or perm</li>
-   * <ul><li>a.com -> b.com*</li></ul>
+   * <ul>
+   * <li>a.com -> b.com*</li>
+   * </ul>
    * <li>If the redirect is permanent and the source is root, keep the source.</li>
-   * <ul><li>*a.com -> a.com?y=1 || *a.com -> a.com/xyz/index.html</li></ul>
-   * <li>If the redirect is permanent and the source is not root and the 
+   * <ul>
+   * <li>*a.com -> a.com?y=1 || *a.com -> a.com/xyz/index.html</li>
+   * </ul>
+   * <li>If the redirect is permanent and the source is not root and the
    * destination is root, keep the destination</li>
-   * <ul><li>a.com/xyz/index.html -> a.com*</li></ul>
+   * <ul>
+   * <li>a.com/xyz/index.html -> a.com*</li>
+   * </ul>
    * <li>If the redirect is permanent and neither the source nor the destination
    * is root, then keep the destination</li>
-   * <ul><li>a.com/xyz/index.html -> a.com/abc/page.html*</li></ul>
+   * <ul>
+   * <li>a.com/xyz/index.html -> a.com/abc/page.html*</li>
+   * </ul>
    * <li>If the redirect is temporary and source is root and destination is not
    * root, then keep the source</li>
-   * <ul><li>*a.com -> a.com/xyz/index.html</li></ul>
+   * <ul>
+   * <li>*a.com -> a.com/xyz/index.html</li>
+   * </ul>
    * <li>If the redirect is temporary and source is not root and destination is
    * root, then keep the destination</li>
-   * <ul><li>a.com/xyz/index.html -> a.com*</li></ul>
+   * <ul>
+   * <li>a.com/xyz/index.html -> a.com*</li>
+   * </ul>
    * <li>If the redirect is temporary and neither the source or the destination
-   * is root, then keep the shortest url.  First check for the shortest host,
-   * and if both are equal then check by path.  Path is first by length then by
-   * the number of / path separators.</li>
+   * is root, then keep the shortest url. First check for the shortest host, and
+   * if both are equal then check by path. Path is first by length then by the
+   * number of / path separators.</li>
    * <ul>
    * <li>a.com/xyz/index.html -> a.com/abc/page.html*</li>
    * <li>*www.a.com/xyz/index.html -> www.news.a.com/xyz/index.html</li>
    * </ul>
    * <li>If the redirect is temporary and both the source and the destination
    * are root, then keep the shortest sub-domain</li>
-   * <ul><li>*www.a.com -> www.news.a.com</li></ul>
+   * <ul>
+   * <li>*www.a.com -> www.news.a.com</li>
+   * </ul>
    * <br>
-   * While not in this logic there is a further piece of representative url 
-   * logic that occurs during indexing and after scoring.  During creation of 
-   * the basic fields before indexing, if a url has a representative url stored
-   * we check both the url and its representative url (which should never be 
-   * the same) against their linkrank scores and the highest scoring one is 
-   * kept as the url and the lower scoring one is held as the orig url inside 
-   * of the index.
+   * While not in this logic there is a further piece of representative url
+   * logic that occurs during indexing and after scoring. During creation of the
+   * basic fields before indexing, if a url has a representative url stored we
+   * check both the url and its representative url (which should never be the
+   * same) against their linkrank scores and the highest scoring one is kept as
+   * the url and the lower scoring one is held as the orig url inside of the
+   * index.
    * 
-   * @param src The source url.
-   * @param dst The destination url.
-   * @param temp Is the redirect a temporary redirect.
+   * @param src
+   *          The source url.
+   * @param dst
+   *          The destination url.
+   * @param temp
+   *          Is the redirect a temporary redirect.
    * 
    * @return String The representative url.
    */
@@ -204,8 +238,7 @@
     try {
       srcUrl = new URL(src);
       dstUrl = new URL(dst);
-    }
-    catch (MalformedURLException e) {
+    } catch (MalformedURLException e) {
       return dst;
     }
 
@@ -223,27 +256,27 @@
 
     // 1) different domain them keep dest, temp or perm
     // a.com -> b.com*
-    //    
+    //
     // 2) permanent and root, keep src
     // *a.com -> a.com?y=1 || *a.com -> a.com/xyz/index.html
-    //      
+    //
     // 3) permanent and not root and dest root, keep dest
     // a.com/xyz/index.html -> a.com*
-    //      
+    //
     // 4) permanent and neither root keep dest
     // a.com/xyz/index.html -> a.com/abc/page.html*
-    //      
+    //
     // 5) temp and root and dest not root keep src
     // *a.com -> a.com/xyz/index.html
-    //  
+    //
     // 7) temp and not root and dest root keep dest
     // a.com/xyz/index.html -> a.com*
-    //  
+    //
     // 8) temp and neither root, keep shortest, if hosts equal by path else by
     // hosts. paths are first by length then by number of / separators
     // a.com/xyz/index.html -> a.com/abc/page.html*
     // *www.a.com/xyz/index.html -> www.news.a.com/xyz/index.html
-    //  
+    //
     // 9) temp and both root keep shortest sub domain
     // *www.a.com -> www.news.a.com
 
@@ -255,39 +288,33 @@
 
     // if it is a permanent redirect
     if (!temp) {
-      
+
       // if source is root return source, otherwise destination
       if (srcRoot) {
         return src;
-      }
-      else {
+      } else {
         return dst;
       }
-    }
-    else { // temporary redirect
+    } else { // temporary redirect
 
       // source root and destination not root
       if (srcRoot && !destRoot) {
         return src;
-      }
-      else if (!srcRoot && destRoot) { // destination root and source not
+      } else if (!srcRoot && destRoot) { // destination root and source not
         return dst;
-      }
-      else if (!srcRoot && !destRoot && (srcHost.equals(dstHost))) {
+      } else if (!srcRoot && !destRoot && (srcHost.equals(dstHost))) {
 
         // source and destination hosts are the same, check paths, host length
         int numSrcPaths = srcFile.split("/").length;
         int numDstPaths = dstFile.split("/").length;
         if (numSrcPaths != numDstPaths) {
           return (numDstPaths < numSrcPaths ? dst : src);
-        }
-        else {
+        } else {
           int srcPathLength = srcFile.length();
           int dstPathLength = dstFile.length();
           return (dstPathLength < srcPathLength ? dst : src);
         }
-      }
-      else {
+      } else {
 
         // different host names and both root take the shortest
         int numSrcSubs = srcHost.split("\\.").length;
@@ -301,24 +328,25 @@
    * Returns the lowercased hostname for the url or null if the url is not well
    * formed.
    * 
-   * @param url The url to check.
+   * @param url
+   *          The url to check.
    * @return String The hostname for the url.
    */
   public static String getHost(String url) {
     try {
       return new URL(url).getHost().toLowerCase();
-    }
-    catch (MalformedURLException e) {
+    } catch (MalformedURLException e) {
       return null;
     }
   }
 
   /**
-   * Returns the page for the url.  The page consists of the protocol, host,
-   * and path, but does not include the query string.  The host is lowercased
-   * but the path is not.
+   * Returns the page for the url. The page consists of the protocol, host, and
+   * path, but does not include the query string. The host is lowercased but the
+   * path is not.
    * 
-   * @param url The url to check.
+   * @param url
+   *          The url to check.
    * @return String The page for the url.
    */
   public static String getPage(String url) {
@@ -327,25 +355,23 @@
       url = url.toLowerCase();
       String queryStr = new URL(url).getQuery();
       return (queryStr != null) ? url.replace("?" + queryStr, "") : url;
-    }
-    catch (MalformedURLException e) {
+    } catch (MalformedURLException e) {
       return null;
     }
   }
-  
+
   /** For testing */
-  public static void main(String[] args){
-    
-    if(args.length!=1) {
+  public static void main(String[] args) {
+
+    if (args.length != 1) {
       System.err.println("Usage : URLUtil <url>");
-      return ;
+      return;
     }
-    
+
     String url = args[0];
     try {
       System.out.println(URLUtil.getDomainName(new URL(url)));
-    }
-    catch (MalformedURLException ex) {
+    } catch (MalformedURLException ex) {
       ex.printStackTrace();
     }
   }
Index: src/java/org/apache/nutch/util/HadoopFSUtil.java
===================================================================
--- src/java/org/apache/nutch/util/HadoopFSUtil.java	(revision 1188268)
+++ src/java/org/apache/nutch/util/HadoopFSUtil.java	(working copy)
@@ -25,48 +25,48 @@
 
 public class HadoopFSUtil {
 
-    /**
-     * Returns PathFilter that passes all paths through.
-     */
-    public static PathFilter getPassAllFilter() {
-        return new PathFilter() {
-            public boolean accept(Path arg0) {
-                return true;
-            }
-        };
-    }
+  /**
+   * Returns PathFilter that passes all paths through.
+   */
+  public static PathFilter getPassAllFilter() {
+    return new PathFilter() {
+      public boolean accept(Path arg0) {
+        return true;
+      }
+    };
+  }
 
-    /**
-     * Returns PathFilter that passes directories through.
-     */
-    public static PathFilter getPassDirectoriesFilter(final FileSystem fs) {
-        return new PathFilter() {
-            public boolean accept(final Path path) {
-                try {
-                    return fs.getFileStatus(path).isDir();
-                } catch (IOException ioe) {
-                    return false;
-                }
-            }
+  /**
+   * Returns PathFilter that passes directories through.
+   */
+  public static PathFilter getPassDirectoriesFilter(final FileSystem fs) {
+    return new PathFilter() {
+      public boolean accept(final Path path) {
+        try {
+          return fs.getFileStatus(path).isDir();
+        } catch (IOException ioe) {
+          return false;
+        }
+      }
 
-        };
+    };
+  }
+
+  /**
+   * Turns an array of FileStatus into an array of Paths.
+   */
+  public static Path[] getPaths(FileStatus[] stats) {
+    if (stats == null) {
+      return null;
     }
-    
-    /**
-     * Turns an array of FileStatus into an array of Paths.
-     */
-    public static Path[] getPaths(FileStatus[] stats) {
-      if (stats == null) {
-        return null;
-      }
-      if (stats.length == 0) {
-        return new Path[0];
-      }
-      Path[] res = new Path[stats.length];
-      for (int i = 0; i < stats.length; i++) {
-        res[i] = stats[i].getPath();
-      }
-      return res;
+    if (stats.length == 0) {
+      return new Path[0];
     }
+    Path[] res = new Path[stats.length];
+    for (int i = 0; i < stats.length; i++) {
+      res[i] = stats[i].getPath();
+    }
+    return res;
+  }
 
 }
Index: src/java/org/apache/nutch/util/StringUtil.java
===================================================================
--- src/java/org/apache/nutch/util/StringUtil.java	(revision 1188268)
+++ src/java/org/apache/nutch/util/StringUtil.java	(working copy)
@@ -18,42 +18,42 @@
 package org.apache.nutch.util;
 
 /**
- * A collection of String processing utility methods. 
+ * A collection of String processing utility methods.
  */
 public class StringUtil {
 
   /**
-   * Returns a copy of <code>s</code> padded with trailing spaces so
-   * that it's length is <code>length</code>.  Strings already
-   * <code>length</code> characters long or longer are not altered.
+   * Returns a copy of <code>s</code> padded with trailing spaces so that it's
+   * length is <code>length</code>. Strings already <code>length</code>
+   * characters long or longer are not altered.
    */
   public static String rightPad(String s, int length) {
-    StringBuffer sb= new StringBuffer(s);
-    for (int i= length - s.length(); i > 0; i--) 
+    StringBuffer sb = new StringBuffer(s);
+    for (int i = length - s.length(); i > 0; i--)
       sb.append(" ");
     return sb.toString();
   }
 
   /**
-   * Returns a copy of <code>s</code> padded with leading spaces so
-   * that it's length is <code>length</code>.  Strings already
-   * <code>length</code> characters long or longer are not altered.
+   * Returns a copy of <code>s</code> padded with leading spaces so that it's
+   * length is <code>length</code>. Strings already <code>length</code>
+   * characters long or longer are not altered.
    */
   public static String leftPad(String s, int length) {
-    StringBuffer sb= new StringBuffer();
-    for (int i= length - s.length(); i > 0; i--) 
+    StringBuffer sb = new StringBuffer();
+    for (int i = length - s.length(); i > 0; i--)
       sb.append(" ");
     sb.append(s);
     return sb.toString();
   }
 
+  private static final char[] HEX_DIGITS = { '0', '1', '2', '3', '4', '5', '6',
+      '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
 
-  private static final char[] HEX_DIGITS =
-  {'0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'};
-
   /**
    * Convenience call for {@link #toHexString(byte[], String, int)}, where
    * <code>sep = null; lineLen = Integer.MAX_VALUE</code>.
+   * 
    * @param buf
    */
   public static String toHexString(byte[] buf) {
@@ -63,37 +63,48 @@
   /**
    * Get a text representation of a byte[] as hexadecimal String, where each
    * pair of hexadecimal digits corresponds to consecutive bytes in the array.
-   * @param buf input data
-   * @param sep separate every pair of hexadecimal digits with this separator, or
-   * null if no separation is needed.
-   * @param lineLen break the output String into lines containing output for lineLen
-   * bytes.
+   * 
+   * @param buf
+   *          input data
+   * @param sep
+   *          separate every pair of hexadecimal digits with this separator, or
+   *          null if no separation is needed.
+   * @param lineLen
+   *          break the output String into lines containing output for lineLen
+   *          bytes.
    */
   public static String toHexString(byte[] buf, String sep, int lineLen) {
-    if (buf == null) return null;
-    if (lineLen <= 0) lineLen = Integer.MAX_VALUE;
+    if (buf == null)
+      return null;
+    if (lineLen <= 0)
+      lineLen = Integer.MAX_VALUE;
     StringBuffer res = new StringBuffer(buf.length * 2);
     for (int i = 0; i < buf.length; i++) {
       int b = buf[i];
       res.append(HEX_DIGITS[(b >> 4) & 0xf]);
       res.append(HEX_DIGITS[b & 0xf]);
-      if (i > 0 && (i % lineLen) == 0) res.append('\n');
-      else if (sep != null && i < lineLen - 1) res.append(sep); 
+      if (i > 0 && (i % lineLen) == 0)
+        res.append('\n');
+      else if (sep != null && i < lineLen - 1)
+        res.append(sep);
     }
     return res.toString();
   }
-  
+
   /**
    * Convert a String containing consecutive (no inside whitespace) hexadecimal
-   * digits into a corresponding byte array. If the number of digits is not even,
-   * a '0' will be appended in the front of the String prior to conversion.
-   * Leading and trailing whitespace is ignored.
-   * @param text input text
+   * digits into a corresponding byte array. If the number of digits is not
+   * even, a '0' will be appended in the front of the String prior to
+   * conversion. Leading and trailing whitespace is ignored.
+   * 
+   * @param text
+   *          input text
    * @return converted byte array, or null if unable to convert
    */
   public static byte[] fromHexString(String text) {
     text = text.trim();
-    if (text.length() % 2 != 0) text = "0" + text;
+    if (text.length() % 2 != 0)
+      text = "0" + text;
     int resLen = text.length() / 2;
     int loNibble, hiNibble;
     byte[] res = new byte[resLen];
@@ -101,12 +112,13 @@
       int j = i << 1;
       hiNibble = charToNibble(text.charAt(j));
       loNibble = charToNibble(text.charAt(j + 1));
-      if (loNibble == -1 || hiNibble == -1) return null;
-      res[i] = (byte)(hiNibble << 4 | loNibble);
+      if (loNibble == -1 || hiNibble == -1)
+        return null;
+      res[i] = (byte) (hiNibble << 4 | loNibble);
     }
     return res;
   }
-  
+
   private static final int charToNibble(char c) {
     if (c >= '0' && c <= '9') {
       return c - '0';
@@ -129,8 +141,8 @@
   public static void main(String[] args) {
     if (args.length != 1)
       System.out.println("Usage: StringUtil <encoding name>");
-    else 
-      System.out.println(args[0] + " is resolved to " +
-                         EncodingDetector.resolveEncodingAlias(args[0]));
+    else
+      System.out.println(args[0] + " is resolved to "
+          + EncodingDetector.resolveEncodingAlias(args[0]));
   }
 }
Index: src/java/org/apache/nutch/util/WebPageWritable.java
===================================================================
--- src/java/org/apache/nutch/util/WebPageWritable.java	(revision 1188268)
+++ src/java/org/apache/nutch/util/WebPageWritable.java	(working copy)
@@ -26,8 +26,7 @@
 import org.apache.nutch.storage.WebPage;
 import org.apache.gora.util.IOUtils;
 
-public class WebPageWritable extends Configured
-implements Writable {
+public class WebPageWritable extends Configured implements Writable {
 
   private WebPage webPage;
 
Index: src/java/org/apache/nutch/util/CommandRunner.java
===================================================================
--- src/java/org/apache/nutch/util/CommandRunner.java	(revision 1188268)
+++ src/java/org/apache/nutch/util/CommandRunner.java	(working copy)
@@ -82,11 +82,11 @@
   }
 
   public void evaluate() throws IOException {
-      this.exec();
+    this.exec();
   }
 
   /**
-   *
+   * 
    * @return process exit value (return code) or -1 if timed out.
    * @throws IOException
    */
@@ -94,13 +94,11 @@
     Process proc = Runtime.getRuntime().exec(_command);
     _barrier = new CyclicBarrier(3 + ((_stdin != null) ? 1 : 0));
 
-    PullerThread so =
-      new PullerThread("STDOUT", proc.getInputStream(), _stdout);
+    PullerThread so = new PullerThread("STDOUT", proc.getInputStream(), _stdout);
     so.setDaemon(true);
     so.start();
 
-    PullerThread se =
-      new PullerThread("STDERR", proc.getErrorStream(), _stderr);
+    PullerThread se = new PullerThread("STDERR", proc.getErrorStream(), _stderr);
     se.setDaemon(true);
     se.start();
 
@@ -145,11 +143,11 @@
             Thread.sleep(1000);
             _xit = proc.exitValue();
           } catch (InterruptedException ie) {
-              if (Thread.interrupted()) {
-                  break; // stop waiting on an interrupt for this thread
-              } else {
-                  continue;
-              }
+            if (Thread.interrupted()) {
+              break; // stop waiting on an interrupt for this thread
+            } else {
+              continue;
+            }
           } catch (IllegalThreadStateException iltse) {
             continue;
           }
@@ -181,11 +179,8 @@
 
     private boolean _closeInput;
 
-    protected PumperThread(
-      String name,
-      InputStream is,
-      OutputStream os,
-      boolean closeInput) {
+    protected PumperThread(String name, InputStream is, OutputStream os,
+        boolean closeInput) {
       super(name);
       _is = is;
       _os = os;
@@ -218,12 +213,12 @@
         }
       }
       try {
-         _barrier.await();
-       } catch (InterruptedException ie) {
-         /* IGNORE */
-       } catch (BrokenBarrierException bbe) {
-         /* IGNORE */
-       }
+        _barrier.await();
+      } catch (InterruptedException ie) {
+        /* IGNORE */
+      } catch (BrokenBarrierException bbe) {
+        /* IGNORE */
+      }
     }
   }
 
@@ -269,8 +264,9 @@
 
     for (int i = 0; i < args.length; i++) {
       if (args[i].equals("-timeout")) {
-        timeout = Integer.parseInt(args[++i]);;
-      } else if (i != args.length-2) {
+        timeout = Integer.parseInt(args[++i]);
+        ;
+      } else if (i != args.length - 2) {
         System.err.println(usage);
         System.exit(-1);
       } else {
@@ -290,6 +286,6 @@
 
     cr.evaluate();
 
-    System.err.println("output value: "+cr.getExitValue());
+    System.err.println("output value: " + cr.getExitValue());
   }
 }
Index: src/java/org/apache/nutch/util/NutchConfiguration.java
===================================================================
--- src/java/org/apache/nutch/util/NutchConfiguration.java	(revision 1188268)
+++ src/java/org/apache/nutch/util/NutchConfiguration.java	(working copy)
@@ -23,37 +23,42 @@
 
 import org.apache.hadoop.conf.Configuration;
 
-
-/** Utility to create Hadoop {@link Configuration}s that include Nutch-specific
- * resources.  */
+/**
+ * Utility to create Hadoop {@link Configuration}s that include Nutch-specific
+ * resources.
+ */
 public class NutchConfiguration {
   public static final String UUID_KEY = "nutch.conf.uuid";
-  
-  private NutchConfiguration() {}                 // singleton
-  
+
+  private NutchConfiguration() {
+  } // singleton
+
   /*
-   * Configuration.hashCode() doesn't return values that
-   * correspond to a unique set of parameters. This is a workaround
-   * so that we can track instances of Configuration created by Nutch.
+   * Configuration.hashCode() doesn't return values that correspond to a unique
+   * set of parameters. This is a workaround so that we can track instances of
+   * Configuration created by Nutch.
    */
   private static void setUUID(Configuration conf) {
     UUID uuid = UUID.randomUUID();
     conf.set(UUID_KEY, uuid.toString());
   }
-  
+
   /**
-   * Retrieve a Nutch UUID of this configuration object, or null
-   * if the configuration was created elsewhere.
-   * @param conf configuration instance
+   * Retrieve a Nutch UUID of this configuration object, or null if the
+   * configuration was created elsewhere.
+   * 
+   * @param conf
+   *          configuration instance
    * @return uuid or null
    */
   public static String getUUID(Configuration conf) {
     return conf.get(UUID_KEY);
   }
 
-  /** Create a {@link Configuration} for Nutch. This will load the standard
-   * Nutch resources, <code>nutch-default.xml</code> and
-   * <code>nutch-site.xml</code> overrides.
+  /**
+   * Create a {@link Configuration} for Nutch. This will load the standard Nutch
+   * resources, <code>nutch-default.xml</code> and <code>nutch-site.xml</code>
+   * overrides.
    */
   public static Configuration create() {
     Configuration conf = new Configuration();
@@ -61,14 +66,19 @@
     addNutchResources(conf);
     return conf;
   }
-  
-  /** Create a {@link Configuration} from supplied properties.
-   * @param addNutchResources if true, then first <code>nutch-default.xml</code>,
-   * and then <code>nutch-site.xml</code> will be loaded prior to applying the
-   * properties. Otherwise these resources won't be used.
-   * @param nutchProperties a set of properties to define (or override)
+
+  /**
+   * Create a {@link Configuration} from supplied properties.
+   * 
+   * @param addNutchResources
+   *          if true, then first <code>nutch-default.xml</code>, and then
+   *          <code>nutch-site.xml</code> will be loaded prior to applying the
+   *          properties. Otherwise these resources won't be used.
+   * @param nutchProperties
+   *          a set of properties to define (or override)
    */
-  public static Configuration create(boolean addNutchResources, Properties nutchProperties) {
+  public static Configuration create(boolean addNutchResources,
+      Properties nutchProperties) {
     Configuration conf = new Configuration();
     setUUID(conf);
     if (addNutchResources) {
@@ -83,8 +93,8 @@
   /**
    * Add the standard Nutch resources to {@link Configuration}.
    * 
-   * @param conf               Configuration object to which
-   *                           configuration is to be added.
+   * @param conf
+   *          Configuration object to which configuration is to be added.
    */
   private static Configuration addNutchResources(Configuration conf) {
     conf.addResource("nutch-default.xml");
@@ -92,4 +102,3 @@
     return conf;
   }
 }
-
Index: src/java/org/apache/nutch/util/ToolUtil.java
===================================================================
--- src/java/org/apache/nutch/util/ToolUtil.java	(revision 1188268)
+++ src/java/org/apache/nutch/util/ToolUtil.java	(working copy)
@@ -28,14 +28,14 @@
 
 public class ToolUtil {
 
-  public static final Map<String,Object> toArgMap(Object... args) {
+  public static final Map<String, Object> toArgMap(Object... args) {
     if (args == null) {
       return null;
     }
     if (args.length % 2 != 0) {
       throw new RuntimeException("expected pairs of argName argValue");
     }
-    HashMap<String,Object> res = new HashMap<String,Object>();
+    HashMap<String, Object> res = new HashMap<String, Object>();
     for (int i = 0; i < args.length; i += 2) {
       if (args[i + 1] != null) {
         res.put(String.valueOf(args[i]), args[i + 1]);
@@ -43,19 +43,21 @@
     }
     return res;
   }
-  
-  public static final void recordJobStatus(String label, Job job, Map<String,Object> results) {
-    Map<String,Object> jobs = (Map<String,Object>)results.get(Nutch.STAT_JOBS);
+
+  public static final void recordJobStatus(String label, Job job,
+      Map<String, Object> results) {
+    Map<String, Object> jobs = (Map<String, Object>) results
+        .get(Nutch.STAT_JOBS);
     if (jobs == null) {
-      jobs = new LinkedHashMap<String,Object>();
+      jobs = new LinkedHashMap<String, Object>();
       results.put(Nutch.STAT_JOBS, jobs);
     }
-    Map<String,Object> stats = new HashMap<String,Object>();
-    Map<String,Object> countStats = new HashMap<String,Object>();
+    Map<String, Object> stats = new HashMap<String, Object>();
+    Map<String, Object> countStats = new HashMap<String, Object>();
     try {
       Counters counters = job.getCounters();
       for (CounterGroup cg : counters) {
-        Map<String,Object> cnts = new HashMap<String,Object>();
+        Map<String, Object> cnts = new HashMap<String, Object>();
         countStats.put(cg.getDisplayName(), cnts);
         for (Counter c : cg) {
           cnts.put(c.getName(), c.getValue());
Index: src/java/org/apache/nutch/util/NutchJob.java
===================================================================
--- src/java/org/apache/nutch/util/NutchJob.java	(revision 1188268)
+++ src/java/org/apache/nutch/util/NutchJob.java	(working copy)
@@ -24,7 +24,7 @@
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.nutch.metadata.Nutch;
 
-/** A {@link Job} for Nutch jobs.  */
+/** A {@link Job} for Nutch jobs. */
 public class NutchJob extends Job {
 
   public NutchJob(Configuration conf) throws IOException {
Index: src/java/org/apache/nutch/util/DomUtil.java
===================================================================
--- src/java/org/apache/nutch/util/DomUtil.java	(revision 1188268)
+++ src/java/org/apache/nutch/util/DomUtil.java	(working copy)
@@ -38,7 +38,6 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-
 public class DomUtil {
 
   private final static Logger LOG = LoggerFactory.getLogger(DomUtil.class);
@@ -61,10 +60,10 @@
       input.setEncoding("UTF-8");
       parser.parse(input);
       int i = 0;
-      while (! (parser.getDocument().getChildNodes().item(i) instanceof Element)) {
-       i++;
-      } 
-      element = (Element)parser.getDocument().getChildNodes().item(i);
+      while (!(parser.getDocument().getChildNodes().item(i) instanceof Element)) {
+        i++;
+      }
+      element = (Element) parser.getDocument().getChildNodes().item(i);
     } catch (FileNotFoundException e) {
       e.printStackTrace(LogUtil.getWarnStream(LOG));
     } catch (SAXException e) {
Index: src/java/org/apache/nutch/util/LogUtil.java
===================================================================
--- src/java/org/apache/nutch/util/LogUtil.java	(revision 1188268)
+++ src/java/org/apache/nutch/util/LogUtil.java	(working copy)
@@ -26,10 +26,9 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-
 /**
  * Utility class for logging.
- *
+ * 
  * @author J&eacute;r&ocirc;me Charron
  */
 public class LogUtil {
@@ -38,27 +37,26 @@
 
   private static Method TRACE = null;
   private static Method DEBUG = null;
-  private static Method INFO  = null;
-  private static Method WARN  = null;
+  private static Method INFO = null;
+  private static Method WARN = null;
   private static Method ERROR = null;
   private static Method FATAL = null;
-  
+
   static {
     try {
       TRACE = Logger.class.getMethod("trace", new Class[] { String.class });
       DEBUG = Logger.class.getMethod("debug", new Class[] { String.class });
-      INFO  = Logger.class.getMethod("info",  new Class[] { String.class });
-      WARN  = Logger.class.getMethod("warn",  new Class[] { String.class });
+      INFO = Logger.class.getMethod("info", new Class[] { String.class });
+      WARN = Logger.class.getMethod("warn", new Class[] { String.class });
       ERROR = Logger.class.getMethod("error", new Class[] { String.class });
       FATAL = Logger.class.getMethod("error", new Class[] { String.class });
-    } catch(Exception e) {
+    } catch (Exception e) {
       if (LOG.isErrorEnabled()) {
         LOG.error("Cannot init log methods", e);
       }
     }
   }
-  
-  
+
   public static PrintStream getTraceStream(final Logger logger) {
     return getLogStream(logger, TRACE);
   }
@@ -70,7 +68,7 @@
   public static PrintStream getInfoStream(final Logger logger) {
     return getLogStream(logger, INFO);
   }
-  
+
   public static PrintStream getWarnStream(final Logger logger) {
     return getLogStream(logger, WARN);
   }
@@ -82,34 +80,35 @@
   public static PrintStream getFatalStream(final Logger logger) {
     return getLogStream(logger, FATAL);
   }
-  
+
   /** Returns a stream that, when written to, adds log lines. */
-  private static PrintStream getLogStream(final Logger logger, final Method method) {
+  private static PrintStream getLogStream(final Logger logger,
+      final Method method) {
     return new PrintStream(new ByteArrayOutputStream() {
-        private int scan = 0;
+      private int scan = 0;
 
-        private boolean hasNewline() {
-          for (; scan < count; scan++) {
-            if (buf[scan] == '\n')
-              return true;
-          }
-          return false;
+      private boolean hasNewline() {
+        for (; scan < count; scan++) {
+          if (buf[scan] == '\n')
+            return true;
         }
+        return false;
+      }
 
-        public void flush() throws IOException {
-          if (!hasNewline())
-            return;
-          try {
-            method.invoke(logger, new Object[] { toString().trim() });
-          } catch (Exception e) {
-            if (LOG.isErrorEnabled()) {
-              LOG.error("Cannot log with method [" + method + "]", e);
-            }
+      public void flush() throws IOException {
+        if (!hasNewline())
+          return;
+        try {
+          method.invoke(logger, new Object[] { toString().trim() });
+        } catch (Exception e) {
+          if (LOG.isErrorEnabled()) {
+            LOG.error("Cannot log with method [" + method + "]", e);
           }
-          reset();
-          scan = 0;
         }
-      }, true);
+        reset();
+        scan = 0;
+      }
+    }, true);
   }
 
 }
Index: src/java/org/apache/nutch/util/NutchTool.java
===================================================================
--- src/java/org/apache/nutch/util/NutchTool.java	(revision 1188268)
+++ src/java/org/apache/nutch/util/NutchTool.java	(working copy)
@@ -26,19 +26,20 @@
 import org.apache.nutch.metadata.Nutch;
 
 public abstract class NutchTool extends Configured {
-  
-  protected HashMap<String,Object> results = new HashMap<String,Object>();
-  protected Map<String,Object> status =
-    Collections.synchronizedMap(new HashMap<String,Object>());
+
+  protected HashMap<String, Object> results = new HashMap<String, Object>();
+  protected Map<String, Object> status = Collections
+      .synchronizedMap(new HashMap<String, Object>());
   protected Job currentJob;
   protected int numJobs;
   protected int currentJobNum;
-  
-  /** Runs the tool, using a map of arguments.
-   * May return results, or null.
+
+  /**
+   * Runs the tool, using a map of arguments. May return results, or null.
    */
-  public abstract Map<String,Object> run(Map<String,Object> args) throws Exception;
-  
+  public abstract Map<String, Object> run(Map<String, Object> args)
+      throws Exception;
+
   /** Returns relative progress of the tool, a float in range [0,1]. */
   public float getProgress() {
     float res = 0;
@@ -55,29 +56,31 @@
     }
     // take into account multiple jobs
     if (numJobs > 1) {
-      res = (currentJobNum + res) / (float)numJobs;
+      res = (currentJobNum + res) / (float) numJobs;
     }
     status.put(Nutch.STAT_PROGRESS, res);
     return res;
   }
-  
-  
+
   /** Returns current status of the running tool. */
-  public Map<String,Object> getStatus() {
+  public Map<String, Object> getStatus() {
     return status;
   }
-  
-  /** Stop the job with the possibility to resume. Subclasses should
-   * override this, since by default it calls {@link #killJob()}.
+
+  /**
+   * Stop the job with the possibility to resume. Subclasses should override
+   * this, since by default it calls {@link #killJob()}.
+   * 
    * @return true if succeeded, false otherwise
    */
   public boolean stopJob() throws Exception {
     return killJob();
   }
-  
+
   /**
-   * Kill the job immediately. Clients should assume that any results
-   * that the job produced so far are in inconsistent state or missing.
+   * Kill the job immediately. Clients should assume that any results that the
+   * job produced so far are in inconsistent state or missing.
+   * 
    * @return true if succeeded, false otherwise.
    * @throws Exception
    */
Index: src/java/org/apache/nutch/util/EncodingDetector.java
===================================================================
--- src/java/org/apache/nutch/util/EncodingDetector.java	(revision 1188268)
+++ src/java/org/apache/nutch/util/EncodingDetector.java	(working copy)
@@ -35,27 +35,26 @@
 
 /**
  * A simple class for detecting character encodings.
- *
+ * 
  * <p>
  * Broadly this encompasses two functions, which are distinctly separate:
- *
+ * 
  * <ol>
- *  <li>Auto detecting a set of "clues" from input text.</li>
- *  <li>Taking a set of clues and making a "best guess" as to the
- *      "real" encoding.</li>
+ * <li>Auto detecting a set of "clues" from input text.</li>
+ * <li>Taking a set of clues and making a "best guess" as to the "real"
+ * encoding.</li>
  * </ol>
  * </p>
- *
+ * 
  * <p>
- * A caller will often have some extra information about what the
- * encoding might be (e.g. from the HTTP header or HTML meta-tags, often
- * wrong but still potentially useful clues). The types of clues may differ
- * from caller to caller. Thus a typical calling sequence is:
+ * A caller will often have some extra information about what the encoding might
+ * be (e.g. from the HTTP header or HTML meta-tags, often wrong but still
+ * potentially useful clues). The types of clues may differ from caller to
+ * caller. Thus a typical calling sequence is:
  * <ul>
- *    <li>Run step (1) to generate a set of auto-detected clues;</li>
- *    <li>Combine these clues with the caller-dependent "extra clues"
- *        available;</li>
- *    <li>Run step (2) to guess what the most probable answer is.</li>
+ * <li>Run step (1) to generate a set of auto-detected clues;</li>
+ * <li>Combine these clues with the caller-dependent "extra clues" available;</li>
+ * <li>Run step (2) to guess what the most probable answer is.</li>
  * </p>
  */
 public class EncodingDetector {
@@ -90,34 +89,32 @@
 
     @Override
     public String toString() {
-      return value + " (" + source +
-           ((confidence >= 0) ? ", " + confidence + "% confidence" : "") + ")";
+      return value + " (" + source
+          + ((confidence >= 0) ? ", " + confidence + "% confidence" : "") + ")";
     }
 
     public boolean isEmpty() {
-      return (value==null || "".equals(value));
+      return (value == null || "".equals(value));
     }
 
     public boolean meetsThreshold() {
-      return (confidence < 0 ||
-               (minConfidence >= 0 && confidence >= minConfidence));
+      return (confidence < 0 || (minConfidence >= 0 && confidence >= minConfidence));
     }
   }
 
-  public static final Logger LOG = LoggerFactory.getLogger(EncodingDetector.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(EncodingDetector.class);
 
   public static final int NO_THRESHOLD = -1;
 
-  public static final String MIN_CONFIDENCE_KEY =
-    "encodingdetector.charset.min.confidence";
+  public static final String MIN_CONFIDENCE_KEY = "encodingdetector.charset.min.confidence";
 
-  private static final HashMap<String, String> ALIASES =
-    new HashMap<String, String>();
+  private static final HashMap<String, String> ALIASES = new HashMap<String, String>();
 
   private static final HashSet<String> DETECTABLES = new HashSet<String>();
 
   // CharsetDetector will die without a minimum amount of data.
-  private static final int MIN_LENGTH=4;
+  private static final int MIN_LENGTH = 4;
 
   static {
     DETECTABLES.add("text/html");
@@ -130,23 +127,22 @@
     DETECTABLES.add("application/rss+xml");
     DETECTABLES.add("application/xhtml+xml");
     /*
-     * the following map is not an alias mapping table, but
-     * maps character encodings which are often used in mislabelled
-     * documents to their correct encodings. For instance,
-     * there are a lot of documents labelled 'ISO-8859-1' which contain
-     * characters not covered by ISO-8859-1 but covered by windows-1252.
-     * Because windows-1252 is a superset of ISO-8859-1 (sharing code points
-     * for the common part), it's better to treat ISO-8859-1 as
-     * synonymous with windows-1252 than to reject, as invalid, documents
-     * labelled as ISO-8859-1 that have characters outside ISO-8859-1.
+     * the following map is not an alias mapping table, but maps character
+     * encodings which are often used in mislabelled documents to their correct
+     * encodings. For instance, there are a lot of documents labelled
+     * 'ISO-8859-1' which contain characters not covered by ISO-8859-1 but
+     * covered by windows-1252. Because windows-1252 is a superset of ISO-8859-1
+     * (sharing code points for the common part), it's better to treat
+     * ISO-8859-1 as synonymous with windows-1252 than to reject, as invalid,
+     * documents labelled as ISO-8859-1 that have characters outside ISO-8859-1.
      */
     ALIASES.put("ISO-8859-1", "windows-1252");
     ALIASES.put("EUC-KR", "x-windows-949");
     ALIASES.put("x-EUC-CN", "GB18030");
     ALIASES.put("GBK", "GB18030");
-    //ALIASES.put("Big5", "Big5HKSCS");
-    //ALIASES.put("TIS620", "Cp874");
-    //ALIASES.put("ISO-8859-11", "Cp874");
+    // ALIASES.put("Big5", "Big5HKSCS");
+    // ALIASES.put("TIS620", "Cp874");
+    // ALIASES.put("ISO-8859-11", "Cp874");
 
   }
 
@@ -168,7 +164,7 @@
   }
 
   private void autoDetectClues(ByteBuffer dataBuffer, Utf8 typeUtf8,
-                               String encoding, boolean filter) {
+      String encoding, boolean filter) {
     byte[] data = dataBuffer.array();
     String type = TableUtil.toString(typeUtf8);
 
@@ -216,12 +212,14 @@
 
   /**
    * Guess the encoding with the previously specified list of clues.
-   *
-   * @param row URL's row
-   * @param defaultValue Default encoding to return if no encoding can be
-   * detected with enough confidence. Note that this will <b>not</b> be
-   * normalized with {@link EncodingDetector#resolveEncodingAlias}
-   *
+   * 
+   * @param row
+   *          URL's row
+   * @param defaultValue
+   *          Default encoding to return if no encoding can be detected with
+   *          enough confidence. Note that this will <b>not</b> be normalized
+   *          with {@link EncodingDetector#resolveEncodingAlias}
+   * 
    * @return Guessed encoding or defaultValue
    */
   public String guessEncoding(WebPage page, String defaultValue) {
@@ -232,33 +230,33 @@
 
   /**
    * Guess the encoding with the previously specified list of clues.
-   *
-   * @param baseUrl Base URL
-   * @param defaultValue Default encoding to return if no encoding can be
-   * detected with enough confidence. Note that this will <b>not</b> be
-   * normalized with {@link EncodingDetector#resolveEncodingAlias}
-   *
+   * 
+   * @param baseUrl
+   *          Base URL
+   * @param defaultValue
+   *          Default encoding to return if no encoding can be detected with
+   *          enough confidence. Note that this will <b>not</b> be normalized
+   *          with {@link EncodingDetector#resolveEncodingAlias}
+   * 
    * @return Guessed encoding or defaultValue
    */
   private String guessEncoding(String baseUrl, String defaultValue) {
     /*
-     * This algorithm could be replaced by something more sophisticated;
-     * ideally we would gather a bunch of data on where various clues
-     * (autodetect, HTTP headers, HTML meta tags, etc.) disagree, tag each with
-     * the correct answer, and use machine learning/some statistical method
-     * to generate a better heuristic.
+     * This algorithm could be replaced by something more sophisticated; ideally
+     * we would gather a bunch of data on where various clues (autodetect, HTTP
+     * headers, HTML meta tags, etc.) disagree, tag each with the correct
+     * answer, and use machine learning/some statistical method to generate a
+     * better heuristic.
      */
 
-
     if (LOG.isTraceEnabled()) {
       findDisagreements(baseUrl, clues);
     }
 
     /*
-     * Go down the list of encoding "clues". Use a clue if:
-     *  1. Has a confidence value which meets our confidence threshold, OR
-     *  2. Doesn't meet the threshold, but is the best try,
-     *     since nothing else is available.
+     * Go down the list of encoding "clues". Use a clue if: 1. Has a confidence
+     * value which meets our confidence threshold, OR 2. Doesn't meet the
+     * threshold, but is the best try, since nothing else is available.
      */
     EncodingClue defaultClue = new EncodingClue(defaultValue, "default");
     EncodingClue bestClue = defaultClue;
@@ -270,8 +268,8 @@
       String charset = clue.value;
       if (minConfidence >= 0 && clue.confidence >= minConfidence) {
         if (LOG.isTraceEnabled()) {
-          LOG.trace(baseUrl + ": Choosing encoding: " + charset +
-                    " with confidence " + clue.confidence);
+          LOG.trace(baseUrl + ": Choosing encoding: " + charset
+              + " with confidence " + clue.confidence);
         }
         return resolveEncodingAlias(charset).toLowerCase();
       } else if (clue.confidence == NO_THRESHOLD && bestClue == defaultClue) {
@@ -291,10 +289,10 @@
   }
 
   /*
-   * Strictly for analysis, look for "disagreements." The top guess from
-   * each source is examined; if these meet the threshold and disagree, then
-   * we log the information -- useful for testing or generating training data
-   * for a better heuristic.
+   * Strictly for analysis, look for "disagreements." The top guess from each
+   * source is examined; if these meet the threshold and disagree, then we log
+   * the information -- useful for testing or generating training data for a
+   * better heuristic.
    */
   private void findDisagreements(String url, List<EncodingClue> newClues) {
     HashSet<String> valsSeen = new HashSet<String>();
@@ -316,9 +314,9 @@
     if (disagreement) {
       // dump all values in case of disagreement
       StringBuffer sb = new StringBuffer();
-      sb.append("Disagreement: "+url+"; ");
+      sb.append("Disagreement: " + url + "; ");
       for (int i = 0; i < newClues.size(); i++) {
-        if (i>0) {
+        if (i > 0) {
           sb.append(", ");
         }
         sb.append(newClues.get(i));
@@ -333,7 +331,7 @@
         return null;
       String canonicalName = new String(Charset.forName(encoding).name());
       return ALIASES.containsKey(canonicalName) ? ALIASES.get(canonicalName)
-                                                : canonicalName;
+          : canonicalName;
     } catch (Exception e) {
       LOG.warn("Invalid encoding " + encoding + " detected, using default.");
       return null;
@@ -341,14 +339,14 @@
   }
 
   /**
-   * Parse the character encoding from the specified content type header.
-   * If the content type is null, or there is no explicit character encoding,
-   * <code>null</code> is returned.
-   * <br />
-   * This method was copied from org.apache.catalina.util.RequestUtil,
-   * which is licensed under the Apache License, Version 2.0 (the "License").
-   *
-   * @param contentType a content type header
+   * Parse the character encoding from the specified content type header. If the
+   * content type is null, or there is no explicit character encoding,
+   * <code>null</code> is returned. <br />
+   * This method was copied from org.apache.catalina.util.RequestUtil, which is
+   * licensed under the Apache License, Version 2.0 (the "License").
+   * 
+   * @param contentType
+   *          a content type header
    */
   public static String parseCharacterEncoding(Utf8 contentTypeUtf8) {
     if (contentTypeUtf8 == null)
@@ -363,51 +361,36 @@
       encoding = encoding.substring(0, end);
     encoding = encoding.trim();
     if ((encoding.length() > 2) && (encoding.startsWith("\""))
-      && (encoding.endsWith("\"")))
+        && (encoding.endsWith("\"")))
       encoding = encoding.substring(1, encoding.length() - 1);
     return (encoding.trim());
 
   }
 
-  /*public static void main(String[] args) throws IOException {
-    if (args.length != 1) {
-      System.err.println("Usage: EncodingDetector <file>");
-      System.exit(1);
-    }
+  /*
+   * public static void main(String[] args) throws IOException { if (args.length
+   * != 1) { System.err.println("Usage: EncodingDetector <file>");
+   * System.exit(1); }
+   * 
+   * Configuration conf = NutchConfiguration.create(); EncodingDetector detector
+   * = new EncodingDetector(NutchConfiguration.create());
+   * 
+   * // do everything as bytes; don't want any conversion BufferedInputStream
+   * istr = new BufferedInputStream(new FileInputStream(args[0]));
+   * ByteArrayOutputStream ostr = new ByteArrayOutputStream(); byte[] bytes =
+   * new byte[1000]; boolean more = true; while (more) { int len =
+   * istr.read(bytes); if (len < bytes.length) { more = false; if (len > 0) {
+   * ostr.write(bytes, 0, len); } } else { ostr.write(bytes); } }
+   * 
+   * byte[] data = ostr.toByteArray(); MimeUtil mimeTypes = new MimeUtil(conf);
+   * 
+   * // make a fake Content Content content = new Content("", "", data,
+   * "text/html", new Metadata(), mimeTypes);
+   * 
+   * detector.autoDetectClues(content, true); String encoding =
+   * detector.guessEncoding(content,
+   * conf.get("parser.character.encoding.default"));
+   * System.out.println("Guessed encoding: " + encoding); }
+   */
 
-    Configuration conf = NutchConfiguration.create();
-    EncodingDetector detector =
-      new EncodingDetector(NutchConfiguration.create());
-
-    // do everything as bytes; don't want any conversion
-    BufferedInputStream istr =
-      new BufferedInputStream(new FileInputStream(args[0]));
-    ByteArrayOutputStream ostr = new ByteArrayOutputStream();
-    byte[] bytes = new byte[1000];
-    boolean more = true;
-    while (more) {
-      int len = istr.read(bytes);
-      if (len < bytes.length) {
-        more = false;
-        if (len > 0) {
-          ostr.write(bytes, 0, len);
-        }
-      } else {
-        ostr.write(bytes);
-      }
-    }
-
-    byte[] data = ostr.toByteArray();
-    MimeUtil mimeTypes = new MimeUtil(conf);
-
-    // make a fake Content
-    Content content =
-      new Content("", "", data, "text/html", new Metadata(), mimeTypes);
-
-    detector.autoDetectClues(content, true);
-    String encoding = detector.guessEncoding(content,
-        conf.get("parser.character.encoding.default"));
-    System.out.println("Guessed encoding: " + encoding);
-  }*/
-
 }
Index: src/java/org/apache/nutch/util/NutchJobConf.java
===================================================================
--- src/java/org/apache/nutch/util/NutchJobConf.java	(revision 1188268)
+++ src/java/org/apache/nutch/util/NutchJobConf.java	(working copy)
@@ -20,7 +20,7 @@
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.mapred.JobConf;
 
-/** A {@link JobConf} for Nutch jobs.  */
+/** A {@link JobConf} for Nutch jobs. */
 public class NutchJobConf extends JobConf {
 
   public NutchJobConf(Configuration conf) {
@@ -28,4 +28,3 @@
   }
 
 }
-
Index: src/java/org/apache/nutch/util/MimeUtil.java
===================================================================
--- src/java/org/apache/nutch/util/MimeUtil.java	(revision 1188268)
+++ src/java/org/apache/nutch/util/MimeUtil.java	(working copy)
@@ -35,12 +35,12 @@
  * @author mattmann
  * @since NUTCH-608
  * 
- * <p>
- * This is a facade class to insulate Nutch from its underlying Mime Type
- * substrate library, <a href="http://incubator.apache.org/tika/">Apache Tika</a>.
- * Any mime handling code should be placed in this utility class, and hidden
- * from the Nutch classes that rely on it.
- * </p>
+ *        <p>
+ *        This is a facade class to insulate Nutch from its underlying Mime Type
+ *        substrate library, <a href="http://incubator.apache.org/tika/">Apache
+ *        Tika</a>. Any mime handling code should be placed in this utility
+ *        class, and hidden from the Nutch classes that rely on it.
+ *        </p>
  */
 public final class MimeUtil {
 
@@ -53,7 +53,8 @@
   private boolean mimeMagic;
 
   /* our log stream */
-  private static final Logger LOG = LoggerFactory.getLogger(MimeUtil.class.getName());
+  private static final Logger LOG = LoggerFactory.getLogger(MimeUtil.class
+      .getName());
 
   public MimeUtil(Configuration conf) {
     ObjectCache objectCache = ObjectCache.get(conf);
@@ -61,25 +62,26 @@
         .getName());
     if (mimeTypez == null) {
       try {
-          String customMimeTypeFile = conf.get("mime.types.file");
-          if (customMimeTypeFile!=null && customMimeTypeFile.equals("")==false){
-              try {
-              mimeTypez = MimeTypesFactory.create(conf
-                      .getConfResourceAsInputStream(customMimeTypeFile));
-              }
-              catch (Exception e){
-                  LOG.error("Can't load mime.types.file : "+customMimeTypeFile+" using Tika's default");
-              }
+        String customMimeTypeFile = conf.get("mime.types.file");
+        if (customMimeTypeFile != null
+            && customMimeTypeFile.equals("") == false) {
+          try {
+            mimeTypez = MimeTypesFactory.create(conf
+                .getConfResourceAsInputStream(customMimeTypeFile));
+          } catch (Exception e) {
+            LOG.error("Can't load mime.types.file : " + customMimeTypeFile
+                + " using Tika's default");
           }
-          if (mimeTypez==null)
-              mimeTypez = MimeTypes.getDefaultMimeTypes();
+        }
+        if (mimeTypez == null)
+          mimeTypez = MimeTypes.getDefaultMimeTypes();
       } catch (Exception e) {
-        LOG.error("Exception in MimeUtil "+e.getMessage());
+        LOG.error("Exception in MimeUtil " + e.getMessage());
         throw new RuntimeException(e);
       }
       objectCache.setObject(MimeTypes.class.getName(), mimeTypez);
     }
-    
+
     this.mimeTypes = mimeTypez;
     this.mimeMagic = conf.getBoolean("mime.type.magic", true);
   }
@@ -115,13 +117,13 @@
   /**
    * A facade interface to trying all the possible mime type resolution
    * strategies available within Tika. First, the mime type provided in
-   * <code>typeName</code> is cleaned, with {@link #cleanMimeType(String)}.
-   * Then the cleaned mime type is looked up in the underlying Tika
-   * {@link MimeTypes} registry, by its cleaned name. If the {@link MimeType} is
-   * found, then that mime type is used, otherwise {@link URL} resolution is
-   * used to try and determine the mime type. If that means is unsuccessful, and
-   * if <code>mime.type.magic</code> is enabled in {@link NutchConfiguration},
-   * then mime type magic resolution is used to try and obtain a
+   * <code>typeName</code> is cleaned, with {@link #cleanMimeType(String)}. Then
+   * the cleaned mime type is looked up in the underlying Tika {@link MimeTypes}
+   * registry, by its cleaned name. If the {@link MimeType} is found, then that
+   * mime type is used, otherwise {@link URL} resolution is used to try and
+   * determine the mime type. If that means is unsuccessful, and if
+   * <code>mime.type.magic</code> is enabled in {@link NutchConfiguration}, then
+   * mime type magic resolution is used to try and obtain a
    * better-than-the-default approximation of the {@link MimeType}.
    * 
    * @param typeName
@@ -138,8 +140,7 @@
 
     try {
       cleanedMimeType = MimeUtil.cleanMimeType(typeName) != null ? this.mimeTypes
-          .forName(MimeUtil.cleanMimeType(typeName)).getName()
-          : null;
+          .forName(MimeUtil.cleanMimeType(typeName)).getName() : null;
     } catch (MimeTypeException mte) {
       // Seems to be a malformed mime type name...
     }
@@ -167,9 +168,10 @@
     // returned by the magic
     if (this.mimeMagic) {
       MimeType magicType = this.mimeTypes.getMimeType(data);
-      if (magicType != null && !magicType.getName().equals(MimeTypes.OCTET_STREAM)
-          && !magicType.getName().equals(MimeTypes.PLAIN_TEXT)
-          && type != null && !type.getName().equals(magicType.getName())) {
+      if (magicType != null
+          && !magicType.getName().equals(MimeTypes.OCTET_STREAM)
+          && !magicType.getName().equals(MimeTypes.PLAIN_TEXT) && type != null
+          && !type.getName().equals(magicType.getName())) {
         // If magic enabled and the current mime type differs from that of the
         // one returned from the magic, take the magic mimeType
         type = magicType;
@@ -195,8 +197,8 @@
    * @param url
    *          A string representation of the document {@link URL} to sense the
    *          {@link MimeType} for.
-   * @return An appropriate {@link MimeType}, identified from the given
-   *         Document url in string form.
+   * @return An appropriate {@link MimeType}, identified from the given Document
+   *         url in string form.
    */
   public MimeType getMimeType(String url) {
     return this.mimeTypes.getMimeType(url);
@@ -208,8 +210,8 @@
    * 
    * @param name
    *          The name of a valid {@link MimeType} in the Tika mime registry.
-   * @return The object representation of the {@link MimeType}, if it exists,
-   *         or null otherwise.
+   * @return The object representation of the {@link MimeType}, if it exists, or
+   *         null otherwise.
    */
   public MimeType forName(String name) {
     try {
Index: src/java/org/apache/nutch/util/DeflateUtils.java
===================================================================
--- src/java/org/apache/nutch/util/DeflateUtils.java	(revision 1188268)
+++ src/java/org/apache/nutch/util/DeflateUtils.java	(working copy)
@@ -28,19 +28,18 @@
 import org.slf4j.LoggerFactory;
 
 /**
- *  A collection of utility methods for working on deflated data.
+ * A collection of utility methods for working on deflated data.
  */
 public class DeflateUtils {
-  
+
   private static final Logger LOG = LoggerFactory.getLogger(DeflateUtils.class);
   private static final int EXPECTED_COMPRESSION_RATIO = 5;
   private static final int BUF_SIZE = 4096;
 
   /**
-   * Returns an inflated copy of the input array.  If the deflated 
-   * input has been truncated or corrupted, a best-effort attempt is
-   * made to inflate as much as possible.  If no data can be extracted
-   * <code>null</code> is returned.
+   * Returns an inflated copy of the input array. If the deflated input has been
+   * truncated or corrupted, a best-effort attempt is made to inflate as much as
+   * possible. If no data can be extracted <code>null</code> is returned.
    */
   public static final byte[] inflateBestEffort(byte[] in) {
     return inflateBestEffort(in, Integer.MAX_VALUE);
@@ -48,38 +47,37 @@
 
   /**
    * Returns an inflated copy of the input array, truncated to
-   * <code>sizeLimit</code> bytes, if necessary.  If the deflated input
-   * has been truncated or corrupted, a best-effort attempt is made to
-   * inflate as much as possible.  If no data can be extracted
-   * <code>null</code> is returned.
+   * <code>sizeLimit</code> bytes, if necessary. If the deflated input has been
+   * truncated or corrupted, a best-effort attempt is made to inflate as much as
+   * possible. If no data can be extracted <code>null</code> is returned.
    */
   public static final byte[] inflateBestEffort(byte[] in, int sizeLimit) {
-    // decompress using InflaterInputStream 
-    ByteArrayOutputStream outStream = 
-      new ByteArrayOutputStream(EXPECTED_COMPRESSION_RATIO * in.length);
+    // decompress using InflaterInputStream
+    ByteArrayOutputStream outStream = new ByteArrayOutputStream(
+        EXPECTED_COMPRESSION_RATIO * in.length);
 
     // "true" because HTTP does not provide zlib headers
     Inflater inflater = new Inflater(true);
-    InflaterInputStream inStream = 
-      new InflaterInputStream(new ByteArrayInputStream(in), inflater);
+    InflaterInputStream inStream = new InflaterInputStream(
+        new ByteArrayInputStream(in), inflater);
 
     byte[] buf = new byte[BUF_SIZE];
     int written = 0;
     while (true) {
       try {
-	int size = inStream.read(buf);
-	if (size <= 0) 
-	  break;
-	if ((written + size) > sizeLimit) {
-	  outStream.write(buf, 0, sizeLimit - written);
-	  break;
-	}
-	outStream.write(buf, 0, size);
-	written+= size;
+        int size = inStream.read(buf);
+        if (size <= 0)
+          break;
+        if ((written + size) > sizeLimit) {
+          outStream.write(buf, 0, sizeLimit - written);
+          break;
+        }
+        outStream.write(buf, 0, size);
+        written += size;
       } catch (Exception e) {
-	LOG.info( "Caught Exception in inflateBestEffort" );
+        LOG.info("Caught Exception in inflateBestEffort");
         e.printStackTrace(LogUtil.getWarnStream(LOG));
-	break;
+        break;
       }
     }
     try {
@@ -90,23 +88,24 @@
     return outStream.toByteArray();
   }
 
-
   /**
-   * Returns an inflated copy of the input array.  
-   * @throws IOException if the input cannot be properly decompressed
+   * Returns an inflated copy of the input array.
+   * 
+   * @throws IOException
+   *           if the input cannot be properly decompressed
    */
   public static final byte[] inflate(byte[] in) throws IOException {
-    // decompress using InflaterInputStream 
-    ByteArrayOutputStream outStream = 
-      new ByteArrayOutputStream(EXPECTED_COMPRESSION_RATIO * in.length);
+    // decompress using InflaterInputStream
+    ByteArrayOutputStream outStream = new ByteArrayOutputStream(
+        EXPECTED_COMPRESSION_RATIO * in.length);
 
-    InflaterInputStream inStream = 
-      new InflaterInputStream ( new ByteArrayInputStream(in) );
+    InflaterInputStream inStream = new InflaterInputStream(
+        new ByteArrayInputStream(in));
 
     byte[] buf = new byte[BUF_SIZE];
     while (true) {
       int size = inStream.read(buf);
-      if (size <= 0) 
+      if (size <= 0)
         break;
       outStream.write(buf, 0, size);
     }
@@ -119,9 +118,9 @@
    * Returns a deflated copy of the input array.
    */
   public static final byte[] deflate(byte[] in) {
-    // compress using DeflaterOutputStream 
-    ByteArrayOutputStream byteOut = 
-      new ByteArrayOutputStream(in.length / EXPECTED_COMPRESSION_RATIO);
+    // compress using DeflaterOutputStream
+    ByteArrayOutputStream byteOut = new ByteArrayOutputStream(in.length
+        / EXPECTED_COMPRESSION_RATIO);
 
     DeflaterOutputStream outStream = new DeflaterOutputStream(byteOut);
 
Index: src/java/org/apache/nutch/util/TableUtil.java
===================================================================
--- src/java/org/apache/nutch/util/TableUtil.java	(revision 1188268)
+++ src/java/org/apache/nutch/util/TableUtil.java	(working copy)
@@ -32,7 +32,7 @@
    * <p>
    * E.g. "http://bar.foo.com:8983/to/index.html?a=b" becomes
    * "com.foo.bar:8983:http/to/index.html?a=b".
-   *
+   * 
    * @param url
    *          url to be reversed
    * @return Reversed url
@@ -49,7 +49,7 @@
    * <p>
    * E.g. "http://bar.foo.com:8983/to/index.html?a=b" becomes
    * "com.foo.bar:http:8983/to/index.html?a=b".
-   *
+   * 
    * @param url
    *          url to be reversed
    * @return Reversed url
@@ -109,7 +109,7 @@
   /**
    * Given a reversed url, returns the reversed host E.g
    * "com.foo.bar:http:8983/to/index.html?a=b" -> "com.foo.bar"
-   *
+   * 
    * @param reversedUrl
    *          Reversed url
    * @return Reversed host
@@ -128,7 +128,7 @@
 
   /**
    * Convert given Utf8 instance to String
-   *
+   * 
    * @param utf8
    *          Utf8 object
    * @return string-ifed Utf8 object or null if Utf8 instance is null
Index: src/java/org/apache/nutch/util/TimingUtil.java
===================================================================
--- src/java/org/apache/nutch/util/TimingUtil.java	(revision 1188268)
+++ src/java/org/apache/nutch/util/TimingUtil.java	(working copy)
@@ -21,35 +21,39 @@
 
 public class TimingUtil {
 
-    private static long[] TIME_FACTOR = { 60 * 60 * 1000, 60 * 1000, 1000 };
+  private static long[] TIME_FACTOR = { 60 * 60 * 1000, 60 * 1000, 1000 };
 
-    /**
-     * Calculate the elapsed time between two times specified in milliseconds.
-     * @param start The start of the time period
-     * @param end The end of the time period
-     * @return a string of the form "XhYmZs" when the elapsed time is X hours, Y minutes and Z seconds or null if start > end.
-     */
-    public static String elapsedTime(long start, long end){
-        if (start > end) {
-            return null;
-        }
+  /**
+   * Calculate the elapsed time between two times specified in milliseconds.
+   * 
+   * @param start
+   *          The start of the time period
+   * @param end
+   *          The end of the time period
+   * @return a string of the form "XhYmZs" when the elapsed time is X hours, Y
+   *         minutes and Z seconds or null if start > end.
+   */
+  public static String elapsedTime(long start, long end) {
+    if (start > end) {
+      return null;
+    }
 
-        long[] elapsedTime = new long[TIME_FACTOR.length];
+    long[] elapsedTime = new long[TIME_FACTOR.length];
 
-        for (int i = 0; i < TIME_FACTOR.length; i++) {
-            elapsedTime[i] = start > end ? -1 : (end - start) / TIME_FACTOR[i];
-            start += TIME_FACTOR[i] * elapsedTime[i];
-        }
+    for (int i = 0; i < TIME_FACTOR.length; i++) {
+      elapsedTime[i] = start > end ? -1 : (end - start) / TIME_FACTOR[i];
+      start += TIME_FACTOR[i] * elapsedTime[i];
+    }
 
-        NumberFormat nf = NumberFormat.getInstance();
-        nf.setMinimumIntegerDigits(2);
-        StringBuffer buf = new StringBuffer();
-        for (int i = 0; i < elapsedTime.length; i++) {
-            if (i > 0) {
-                buf.append(":");
-            }
-            buf.append(nf.format(elapsedTime[i]));
-        }
-        return buf.toString();
+    NumberFormat nf = NumberFormat.getInstance();
+    nf.setMinimumIntegerDigits(2);
+    StringBuffer buf = new StringBuffer();
+    for (int i = 0; i < elapsedTime.length; i++) {
+      if (i > 0) {
+        buf.append(":");
+      }
+      buf.append(nf.format(elapsedTime[i]));
     }
+    return buf.toString();
+  }
 }
Index: src/java/org/apache/nutch/util/LockUtil.java
===================================================================
--- src/java/org/apache/nutch/util/LockUtil.java	(revision 1188268)
+++ src/java/org/apache/nutch/util/LockUtil.java	(working copy)
@@ -28,22 +28,29 @@
  * @author Andrzej Bialecki
  */
 public class LockUtil {
-  
+
   /**
    * Create a lock file.
-   * @param fs filesystem
-   * @param lockFile name of the lock file
-   * @param accept if true, and the target file exists, consider it valid. If false
-   * and the target file exists, throw an IOException.
-   * @throws IOException if accept is false, and the target file already exists,
-   * or if it's a directory.
+   * 
+   * @param fs
+   *          filesystem
+   * @param lockFile
+   *          name of the lock file
+   * @param accept
+   *          if true, and the target file exists, consider it valid. If false
+   *          and the target file exists, throw an IOException.
+   * @throws IOException
+   *           if accept is false, and the target file already exists, or if
+   *           it's a directory.
    */
-  public static void createLockFile(FileSystem fs, Path lockFile, boolean accept) throws IOException {
+  public static void createLockFile(FileSystem fs, Path lockFile, boolean accept)
+      throws IOException {
     if (fs.exists(lockFile)) {
-      if(!accept)
+      if (!accept)
         throw new IOException("lock file " + lockFile + " already exists.");
       if (fs.getFileStatus(lockFile).isDir())
-        throw new IOException("lock file " + lockFile + " already exists and is a directory.");
+        throw new IOException("lock file " + lockFile
+            + " already exists and is a directory.");
       // do nothing - the file already exists.
     } else {
       // make sure parents exist
@@ -55,16 +62,23 @@
   /**
    * Remove lock file. NOTE: applications enforce the semantics of this file -
    * this method simply removes any file with a given name.
-   * @param fs filesystem
-   * @param lockFile lock file name
+   * 
+   * @param fs
+   *          filesystem
+   * @param lockFile
+   *          lock file name
    * @return false, if the lock file doesn't exist. True, if it existed and was
-   * successfully removed.
-   * @throws IOException if lock file exists but it is a directory.
+   *         successfully removed.
+   * @throws IOException
+   *           if lock file exists but it is a directory.
    */
-  public static boolean removeLockFile(FileSystem fs, Path lockFile) throws IOException {
-    if (!fs.exists(lockFile)) return false;
+  public static boolean removeLockFile(FileSystem fs, Path lockFile)
+      throws IOException {
+    if (!fs.exists(lockFile))
+      return false;
     if (fs.getFileStatus(lockFile).isDir())
-      throw new IOException("lock file " + lockFile + " exists but is a directory!");
+      throw new IOException("lock file " + lockFile
+          + " exists but is a directory!");
     return fs.delete(lockFile, false);
   }
 }
Index: src/java/org/apache/nutch/util/Bytes.java
===================================================================
--- src/java/org/apache/nutch/util/Bytes.java	(revision 1188268)
+++ src/java/org/apache/nutch/util/Bytes.java	(working copy)
@@ -42,1396 +42,1384 @@
  */
 public class Bytes {
 
-	private static final Logger LOG = LoggerFactory.getLogger(Bytes.class);
+  private static final Logger LOG = LoggerFactory.getLogger(Bytes.class);
 
-	/** When we encode strings, we always specify UTF8 encoding */
-	public static final String UTF8_ENCODING = "UTF-8";
+  /** When we encode strings, we always specify UTF8 encoding */
+  public static final String UTF8_ENCODING = "UTF-8";
 
-	/**
-	 * An empty instance.
-	 */
-	public static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
+  /**
+   * An empty instance.
+   */
+  public static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
 
-	/**
-	 * Size of boolean in bytes
-	 */
-	public static final int SIZEOF_BOOLEAN = Byte.SIZE / Byte.SIZE;
+  /**
+   * Size of boolean in bytes
+   */
+  public static final int SIZEOF_BOOLEAN = Byte.SIZE / Byte.SIZE;
 
-	/**
-	 * Size of byte in bytes
-	 */
-	public static final int SIZEOF_BYTE = SIZEOF_BOOLEAN;
+  /**
+   * Size of byte in bytes
+   */
+  public static final int SIZEOF_BYTE = SIZEOF_BOOLEAN;
 
-	/**
-	 * Size of char in bytes
-	 */
-	public static final int SIZEOF_CHAR = Character.SIZE / Byte.SIZE;
+  /**
+   * Size of char in bytes
+   */
+  public static final int SIZEOF_CHAR = Character.SIZE / Byte.SIZE;
 
-	/**
-	 * Size of double in bytes
-	 */
-	public static final int SIZEOF_DOUBLE = Double.SIZE / Byte.SIZE;
+  /**
+   * Size of double in bytes
+   */
+  public static final int SIZEOF_DOUBLE = Double.SIZE / Byte.SIZE;
 
-	/**
-	 * Size of float in bytes
-	 */
-	public static final int SIZEOF_FLOAT = Float.SIZE / Byte.SIZE;
+  /**
+   * Size of float in bytes
+   */
+  public static final int SIZEOF_FLOAT = Float.SIZE / Byte.SIZE;
 
-	/**
-	 * Size of int in bytes
-	 */
-	public static final int SIZEOF_INT = Integer.SIZE / Byte.SIZE;
+  /**
+   * Size of int in bytes
+   */
+  public static final int SIZEOF_INT = Integer.SIZE / Byte.SIZE;
 
-	/**
-	 * Size of long in bytes
-	 */
-	public static final int SIZEOF_LONG = Long.SIZE / Byte.SIZE;
+  /**
+   * Size of long in bytes
+   */
+  public static final int SIZEOF_LONG = Long.SIZE / Byte.SIZE;
 
-	/**
-	 * Size of short in bytes
-	 */
-	public static final int SIZEOF_SHORT = Short.SIZE / Byte.SIZE;
+  /**
+   * Size of short in bytes
+   */
+  public static final int SIZEOF_SHORT = Short.SIZE / Byte.SIZE;
 
-	/**
-	 * Estimate of size cost to pay beyond payload in jvm for instance of byte
-	 * []. Estimate based on study of jhat and jprofiler numbers.
-	 */
-	// JHat says BU is 56 bytes.
-	// SizeOf which uses java.lang.instrument says 24 bytes. (3 longs?)
-	public static final int ESTIMATED_HEAP_TAX = 16;
+  /**
+   * Estimate of size cost to pay beyond payload in jvm for instance of byte [].
+   * Estimate based on study of jhat and jprofiler numbers.
+   */
+  // JHat says BU is 56 bytes.
+  // SizeOf which uses java.lang.instrument says 24 bytes. (3 longs?)
+  public static final int ESTIMATED_HEAP_TAX = 16;
 
-	/**
-	 * Byte array comparator class.
-	 */
-	public static class ByteArrayComparator implements RawComparator<byte[]> {
-		/**
-		 * Constructor
-		 */
-		public ByteArrayComparator() {
-			super();
-		}
+  /**
+   * Byte array comparator class.
+   */
+  public static class ByteArrayComparator implements RawComparator<byte[]> {
+    /**
+     * Constructor
+     */
+    public ByteArrayComparator() {
+      super();
+    }
 
-		public int compare(byte[] left, byte[] right) {
-			return compareTo(left, right);
-		}
+    public int compare(byte[] left, byte[] right) {
+      return compareTo(left, right);
+    }
 
-		public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
-			return compareTo(b1, s1, l1, b2, s2, l2);
-		}
-	}
+    public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+      return compareTo(b1, s1, l1, b2, s2, l2);
+    }
+  }
 
-	/**
-	 * Pass this to TreeMaps where byte [] are keys.
-	 */
-	public static Comparator<byte[]> BYTES_COMPARATOR = new ByteArrayComparator();
+  /**
+   * Pass this to TreeMaps where byte [] are keys.
+   */
+  public static Comparator<byte[]> BYTES_COMPARATOR = new ByteArrayComparator();
 
-	/**
-	 * Use comparing byte arrays, byte-by-byte
-	 */
-	public static RawComparator<byte[]> BYTES_RAWCOMPARATOR = new ByteArrayComparator();
+  /**
+   * Use comparing byte arrays, byte-by-byte
+   */
+  public static RawComparator<byte[]> BYTES_RAWCOMPARATOR = new ByteArrayComparator();
 
-	/**
-	 * Read byte-array written with a WritableableUtils.vint prefix.
-	 * 
-	 * @param in
-	 *            Input to read from.
-	 * @return byte array read off <code>in</code>
-	 * @throws IOException
-	 *             e
-	 */
-	public static byte[] readByteArray(final DataInput in) throws IOException {
-		int len = WritableUtils.readVInt(in);
-		if (len < 0) {
-			throw new NegativeArraySizeException(Integer.toString(len));
-		}
-		byte[] result = new byte[len];
-		in.readFully(result, 0, len);
-		return result;
-	}
+  /**
+   * Read byte-array written with a WritableableUtils.vint prefix.
+   * 
+   * @param in
+   *          Input to read from.
+   * @return byte array read off <code>in</code>
+   * @throws IOException
+   *           e
+   */
+  public static byte[] readByteArray(final DataInput in) throws IOException {
+    int len = WritableUtils.readVInt(in);
+    if (len < 0) {
+      throw new NegativeArraySizeException(Integer.toString(len));
+    }
+    byte[] result = new byte[len];
+    in.readFully(result, 0, len);
+    return result;
+  }
 
-	/**
-	 * Read byte-array written with a WritableableUtils.vint prefix. IOException
-	 * is converted to a RuntimeException.
-	 * 
-	 * @param in
-	 *            Input to read from.
-	 * @return byte array read off <code>in</code>
-	 */
-	public static byte[] readByteArrayThrowsRuntime(final DataInput in) {
-		try {
-			return readByteArray(in);
-		} catch (Exception e) {
-			throw new RuntimeException(e);
-		}
-	}
+  /**
+   * Read byte-array written with a WritableableUtils.vint prefix. IOException
+   * is converted to a RuntimeException.
+   * 
+   * @param in
+   *          Input to read from.
+   * @return byte array read off <code>in</code>
+   */
+  public static byte[] readByteArrayThrowsRuntime(final DataInput in) {
+    try {
+      return readByteArray(in);
+    } catch (Exception e) {
+      throw new RuntimeException(e);
+    }
+  }
 
-	/**
-	 * Write byte-array with a WritableableUtils.vint prefix.
-	 * 
-	 * @param out
-	 *            output stream to be written to
-	 * @param b
-	 *            array to write
-	 * @throws IOException
-	 *             e
-	 */
-	public static void writeByteArray(final DataOutput out, final byte[] b)
-			throws IOException {
-		if (b == null) {
-			WritableUtils.writeVInt(out, 0);
-		} else {
-			writeByteArray(out, b, 0, b.length);
-		}
-	}
+  /**
+   * Write byte-array with a WritableableUtils.vint prefix.
+   * 
+   * @param out
+   *          output stream to be written to
+   * @param b
+   *          array to write
+   * @throws IOException
+   *           e
+   */
+  public static void writeByteArray(final DataOutput out, final byte[] b)
+      throws IOException {
+    if (b == null) {
+      WritableUtils.writeVInt(out, 0);
+    } else {
+      writeByteArray(out, b, 0, b.length);
+    }
+  }
 
-	/**
-	 * Write byte-array to out with a vint length prefix.
-	 * 
-	 * @param out
-	 *            output stream
-	 * @param b
-	 *            array
-	 * @param offset
-	 *            offset into array
-	 * @param length
-	 *            length past offset
-	 * @throws IOException
-	 *             e
-	 */
-	public static void writeByteArray(final DataOutput out, final byte[] b,
-			final int offset, final int length) throws IOException {
-		WritableUtils.writeVInt(out, length);
-		out.write(b, offset, length);
-	}
+  /**
+   * Write byte-array to out with a vint length prefix.
+   * 
+   * @param out
+   *          output stream
+   * @param b
+   *          array
+   * @param offset
+   *          offset into array
+   * @param length
+   *          length past offset
+   * @throws IOException
+   *           e
+   */
+  public static void writeByteArray(final DataOutput out, final byte[] b,
+      final int offset, final int length) throws IOException {
+    WritableUtils.writeVInt(out, length);
+    out.write(b, offset, length);
+  }
 
-	/**
-	 * Write byte-array from src to tgt with a vint length prefix.
-	 * 
-	 * @param tgt
-	 *            target array
-	 * @param tgtOffset
-	 *            offset into target array
-	 * @param src
-	 *            source array
-	 * @param srcOffset
-	 *            source offset
-	 * @param srcLength
-	 *            source length
-	 * @return New offset in src array.
-	 */
-	public static int writeByteArray(final byte[] tgt, final int tgtOffset,
-			final byte[] src, final int srcOffset, final int srcLength) {
-		byte[] vint = vintToBytes(srcLength);
-		System.arraycopy(vint, 0, tgt, tgtOffset, vint.length);
-		int offset = tgtOffset + vint.length;
-		System.arraycopy(src, srcOffset, tgt, offset, srcLength);
-		return offset + srcLength;
-	}
+  /**
+   * Write byte-array from src to tgt with a vint length prefix.
+   * 
+   * @param tgt
+   *          target array
+   * @param tgtOffset
+   *          offset into target array
+   * @param src
+   *          source array
+   * @param srcOffset
+   *          source offset
+   * @param srcLength
+   *          source length
+   * @return New offset in src array.
+   */
+  public static int writeByteArray(final byte[] tgt, final int tgtOffset,
+      final byte[] src, final int srcOffset, final int srcLength) {
+    byte[] vint = vintToBytes(srcLength);
+    System.arraycopy(vint, 0, tgt, tgtOffset, vint.length);
+    int offset = tgtOffset + vint.length;
+    System.arraycopy(src, srcOffset, tgt, offset, srcLength);
+    return offset + srcLength;
+  }
 
-	/**
-	 * Put bytes at the specified byte array position.
-	 * 
-	 * @param tgtBytes
-	 *            the byte array
-	 * @param tgtOffset
-	 *            position in the array
-	 * @param srcBytes
-	 *            array to write out
-	 * @param srcOffset
-	 *            source offset
-	 * @param srcLength
-	 *            source length
-	 * @return incremented offset
-	 */
-	public static int putBytes(byte[] tgtBytes, int tgtOffset, byte[] srcBytes,
-			int srcOffset, int srcLength) {
-		System.arraycopy(srcBytes, srcOffset, tgtBytes, tgtOffset, srcLength);
-		return tgtOffset + srcLength;
-	}
+  /**
+   * Put bytes at the specified byte array position.
+   * 
+   * @param tgtBytes
+   *          the byte array
+   * @param tgtOffset
+   *          position in the array
+   * @param srcBytes
+   *          array to write out
+   * @param srcOffset
+   *          source offset
+   * @param srcLength
+   *          source length
+   * @return incremented offset
+   */
+  public static int putBytes(byte[] tgtBytes, int tgtOffset, byte[] srcBytes,
+      int srcOffset, int srcLength) {
+    System.arraycopy(srcBytes, srcOffset, tgtBytes, tgtOffset, srcLength);
+    return tgtOffset + srcLength;
+  }
 
-	/**
-	 * Write a single byte out to the specified byte array position.
-	 * 
-	 * @param bytes
-	 *            the byte array
-	 * @param offset
-	 *            position in the array
-	 * @param b
-	 *            byte to write out
-	 * @return incremented offset
-	 */
-	public static int putByte(byte[] bytes, int offset, byte b) {
-		bytes[offset] = b;
-		return offset + 1;
-	}
+  /**
+   * Write a single byte out to the specified byte array position.
+   * 
+   * @param bytes
+   *          the byte array
+   * @param offset
+   *          position in the array
+   * @param b
+   *          byte to write out
+   * @return incremented offset
+   */
+  public static int putByte(byte[] bytes, int offset, byte b) {
+    bytes[offset] = b;
+    return offset + 1;
+  }
 
-	/**
-	 * Returns a new byte array, copied from the passed ByteBuffer.
-	 * 
-	 * @param bb
-	 *            A ByteBuffer
-	 * @return the byte array
-	 */
-	public static byte[] toBytes(ByteBuffer bb) {
-		int length = bb.limit();
-		byte[] result = new byte[length];
-		System.arraycopy(bb.array(), bb.arrayOffset(), result, 0, length);
-		return result;
-	}
+  /**
+   * Returns a new byte array, copied from the passed ByteBuffer.
+   * 
+   * @param bb
+   *          A ByteBuffer
+   * @return the byte array
+   */
+  public static byte[] toBytes(ByteBuffer bb) {
+    int length = bb.limit();
+    byte[] result = new byte[length];
+    System.arraycopy(bb.array(), bb.arrayOffset(), result, 0, length);
+    return result;
+  }
 
-	/**
-	 * @param b
-	 *            Presumed UTF-8 encoded byte array.
-	 * @return String made from <code>b</code>
-	 */
-	public static String toString(final byte[] b) {
-		if (b == null) {
-			return null;
-		}
-		return toString(b, 0, b.length);
-	}
+  /**
+   * @param b
+   *          Presumed UTF-8 encoded byte array.
+   * @return String made from <code>b</code>
+   */
+  public static String toString(final byte[] b) {
+    if (b == null) {
+      return null;
+    }
+    return toString(b, 0, b.length);
+  }
 
-	/**
-	 * Joins two byte arrays together using a separator.
-	 * 
-	 * @param b1
-	 *            The first byte array.
-	 * @param sep
-	 *            The separator to use.
-	 * @param b2
-	 *            The second byte array.
-	 */
-	public static String toString(final byte[] b1, String sep, final byte[] b2) {
-		return toString(b1, 0, b1.length) + sep + toString(b2, 0, b2.length);
-	}
+  /**
+   * Joins two byte arrays together using a separator.
+   * 
+   * @param b1
+   *          The first byte array.
+   * @param sep
+   *          The separator to use.
+   * @param b2
+   *          The second byte array.
+   */
+  public static String toString(final byte[] b1, String sep, final byte[] b2) {
+    return toString(b1, 0, b1.length) + sep + toString(b2, 0, b2.length);
+  }
 
-	/**
-	 * This method will convert utf8 encoded bytes into a string. If an
-	 * UnsupportedEncodingException occurs, this method will eat it and return
-	 * null instead.
-	 * 
-	 * @param b
-	 *            Presumed UTF-8 encoded byte array.
-	 * @param off
-	 *            offset into array
-	 * @param len
-	 *            length of utf-8 sequence
-	 * @return String made from <code>b</code> or null
-	 */
-	public static String toString(final byte[] b, int off, int len) {
-		if (b == null) {
-			return null;
-		}
-		if (len == 0) {
-			return "";
-		}
-		try {
-			return new String(b, off, len, UTF8_ENCODING);
-		} catch (UnsupportedEncodingException e) {
-			LOG.error("UTF-8 not supported?", e);
-			return null;
-		}
-	}
+  /**
+   * This method will convert utf8 encoded bytes into a string. If an
+   * UnsupportedEncodingException occurs, this method will eat it and return
+   * null instead.
+   * 
+   * @param b
+   *          Presumed UTF-8 encoded byte array.
+   * @param off
+   *          offset into array
+   * @param len
+   *          length of utf-8 sequence
+   * @return String made from <code>b</code> or null
+   */
+  public static String toString(final byte[] b, int off, int len) {
+    if (b == null) {
+      return null;
+    }
+    if (len == 0) {
+      return "";
+    }
+    try {
+      return new String(b, off, len, UTF8_ENCODING);
+    } catch (UnsupportedEncodingException e) {
+      LOG.error("UTF-8 not supported?", e);
+      return null;
+    }
+  }
 
-	/**
-	 * Write a printable representation of a byte array.
-	 * 
-	 * @param b
-	 *            byte array
-	 * @return string
-	 * @see #toStringBinary(byte[], int, int)
-	 */
-	public static String toStringBinary(final byte[] b) {
-		return toStringBinary(b, 0, b.length);
-	}
+  /**
+   * Write a printable representation of a byte array.
+   * 
+   * @param b
+   *          byte array
+   * @return string
+   * @see #toStringBinary(byte[], int, int)
+   */
+  public static String toStringBinary(final byte[] b) {
+    return toStringBinary(b, 0, b.length);
+  }
 
-	/**
-	 * Write a printable representation of a byte array. Non-printable
-	 * characters are hex escaped in the format \\x%02X, eg: \x00 \x05 etc
-	 * 
-	 * @param b
-	 *            array to write out
-	 * @param off
-	 *            offset to start at
-	 * @param len
-	 *            length to write
-	 * @return string output
-	 */
-	public static String toStringBinary(final byte[] b, int off, int len) {
-		StringBuilder result = new StringBuilder();
-		try {
-			String first = new String(b, off, len, "ISO-8859-1");
-			for (int i = 0; i < first.length(); ++i) {
-				int ch = first.charAt(i) & 0xFF;
-				if ((ch >= '0' && ch <= '9')
-						|| (ch >= 'A' && ch <= 'Z')
-						|| (ch >= 'a' && ch <= 'z')
-						|| " `~!@#$%^&*()-_=+[]{}\\|;:'\",.<>/?".indexOf(ch) >= 0) {
-					result.append(first.charAt(i));
-				} else {
-					result.append(String.format("\\x%02X", ch));
-				}
-			}
-		} catch (UnsupportedEncodingException e) {
-			LOG.error("ISO-8859-1 not supported?", e);
-		}
-		return result.toString();
-	}
+  /**
+   * Write a printable representation of a byte array. Non-printable characters
+   * are hex escaped in the format \\x%02X, eg: \x00 \x05 etc
+   * 
+   * @param b
+   *          array to write out
+   * @param off
+   *          offset to start at
+   * @param len
+   *          length to write
+   * @return string output
+   */
+  public static String toStringBinary(final byte[] b, int off, int len) {
+    StringBuilder result = new StringBuilder();
+    try {
+      String first = new String(b, off, len, "ISO-8859-1");
+      for (int i = 0; i < first.length(); ++i) {
+        int ch = first.charAt(i) & 0xFF;
+        if ((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z')
+            || (ch >= 'a' && ch <= 'z')
+            || " `~!@#$%^&*()-_=+[]{}\\|;:'\",.<>/?".indexOf(ch) >= 0) {
+          result.append(first.charAt(i));
+        } else {
+          result.append(String.format("\\x%02X", ch));
+        }
+      }
+    } catch (UnsupportedEncodingException e) {
+      LOG.error("ISO-8859-1 not supported?", e);
+    }
+    return result.toString();
+  }
 
-	private static boolean isHexDigit(char c) {
-		return (c >= 'A' && c <= 'F') || (c >= '0' && c <= '9');
-	}
+  private static boolean isHexDigit(char c) {
+    return (c >= 'A' && c <= 'F') || (c >= '0' && c <= '9');
+  }
 
-	/**
-	 * Takes a ASCII digit in the range A-F0-9 and returns the corresponding
-	 * integer/ordinal value.
-	 * 
-	 * @param ch
-	 *            The hex digit.
-	 * @return The converted hex value as a byte.
-	 */
-	public static byte toBinaryFromHex(byte ch) {
-		if (ch >= 'A' && ch <= 'F')
-			return (byte) ((byte) 10 + (byte) (ch - 'A'));
-		// else
-		return (byte) (ch - '0');
-	}
+  /**
+   * Takes a ASCII digit in the range A-F0-9 and returns the corresponding
+   * integer/ordinal value.
+   * 
+   * @param ch
+   *          The hex digit.
+   * @return The converted hex value as a byte.
+   */
+  public static byte toBinaryFromHex(byte ch) {
+    if (ch >= 'A' && ch <= 'F')
+      return (byte) ((byte) 10 + (byte) (ch - 'A'));
+    // else
+    return (byte) (ch - '0');
+  }
 
-	public static byte[] toBytesBinary(String in) {
-		// this may be bigger than we need, but lets be safe.
-		byte[] b = new byte[in.length()];
-		int size = 0;
-		for (int i = 0; i < in.length(); ++i) {
-			char ch = in.charAt(i);
-			if (ch == '\\') {
-				// begin hex escape:
-				char next = in.charAt(i + 1);
-				if (next != 'x') {
-					// invalid escape sequence, ignore this one.
-					b[size++] = (byte) ch;
-					continue;
-				}
-				// ok, take next 2 hex digits.
-				char hd1 = in.charAt(i + 2);
-				char hd2 = in.charAt(i + 3);
+  public static byte[] toBytesBinary(String in) {
+    // this may be bigger than we need, but lets be safe.
+    byte[] b = new byte[in.length()];
+    int size = 0;
+    for (int i = 0; i < in.length(); ++i) {
+      char ch = in.charAt(i);
+      if (ch == '\\') {
+        // begin hex escape:
+        char next = in.charAt(i + 1);
+        if (next != 'x') {
+          // invalid escape sequence, ignore this one.
+          b[size++] = (byte) ch;
+          continue;
+        }
+        // ok, take next 2 hex digits.
+        char hd1 = in.charAt(i + 2);
+        char hd2 = in.charAt(i + 3);
 
-				// they need to be A-F0-9:
-				if (!isHexDigit(hd1) || !isHexDigit(hd2)) {
-					// bogus escape code, ignore:
-					continue;
-				}
-				// turn hex ASCII digit -> number
-				byte d = (byte) ((toBinaryFromHex((byte) hd1) << 4) + toBinaryFromHex((byte) hd2));
+        // they need to be A-F0-9:
+        if (!isHexDigit(hd1) || !isHexDigit(hd2)) {
+          // bogus escape code, ignore:
+          continue;
+        }
+        // turn hex ASCII digit -> number
+        byte d = (byte) ((toBinaryFromHex((byte) hd1) << 4) + toBinaryFromHex((byte) hd2));
 
-				b[size++] = d;
-				i += 3; // skip 3
-			} else {
-				b[size++] = (byte) ch;
-			}
-		}
-		// resize:
-		byte[] b2 = new byte[size];
-		System.arraycopy(b, 0, b2, 0, size);
-		return b2;
-	}
+        b[size++] = d;
+        i += 3; // skip 3
+      } else {
+        b[size++] = (byte) ch;
+      }
+    }
+    // resize:
+    byte[] b2 = new byte[size];
+    System.arraycopy(b, 0, b2, 0, size);
+    return b2;
+  }
 
-	/**
-	 * Converts a string to a UTF-8 byte array.
-	 * 
-	 * @param s
-	 *            string
-	 * @return the byte array
-	 */
-	public static byte[] toBytes(String s) {
-		try {
-			return s.getBytes(UTF8_ENCODING);
-		} catch (UnsupportedEncodingException e) {
-			LOG.error("UTF-8 not supported?", e);
-			return null;
-		}
-	}
+  /**
+   * Converts a string to a UTF-8 byte array.
+   * 
+   * @param s
+   *          string
+   * @return the byte array
+   */
+  public static byte[] toBytes(String s) {
+    try {
+      return s.getBytes(UTF8_ENCODING);
+    } catch (UnsupportedEncodingException e) {
+      LOG.error("UTF-8 not supported?", e);
+      return null;
+    }
+  }
 
-	/**
-	 * Convert a boolean to a byte array. True becomes -1 and false becomes 0.
-	 * 
-	 * @param b
-	 *            value
-	 * @return <code>b</code> encoded in a byte array.
-	 */
-	public static byte[] toBytes(final boolean b) {
-		return new byte[] { b ? (byte) -1 : (byte) 0 };
-	}
+  /**
+   * Convert a boolean to a byte array. True becomes -1 and false becomes 0.
+   * 
+   * @param b
+   *          value
+   * @return <code>b</code> encoded in a byte array.
+   */
+  public static byte[] toBytes(final boolean b) {
+    return new byte[] { b ? (byte) -1 : (byte) 0 };
+  }
 
-	/**
-	 * Reverses {@link #toBytes(boolean)}
-	 * 
-	 * @param b
-	 *            array
-	 * @return True or false.
-	 */
-	public static boolean toBoolean(final byte[] b) {
-		if (b.length != 1) {
-			throw new IllegalArgumentException("Array has wrong size: "
-					+ b.length);
-		}
-		return b[0] != (byte) 0;
-	}
+  /**
+   * Reverses {@link #toBytes(boolean)}
+   * 
+   * @param b
+   *          array
+   * @return True or false.
+   */
+  public static boolean toBoolean(final byte[] b) {
+    if (b.length != 1) {
+      throw new IllegalArgumentException("Array has wrong size: " + b.length);
+    }
+    return b[0] != (byte) 0;
+  }
 
-	/**
-	 * Convert a long value to a byte array using big-endian.
-	 * 
-	 * @param val
-	 *            value to convert
-	 * @return the byte array
-	 */
-	public static byte[] toBytes(long val) {
-		byte[] b = new byte[8];
-		for (int i = 7; i > 0; i--) {
-			b[i] = (byte) val;
-			val >>>= 8;
-		}
-		b[0] = (byte) val;
-		return b;
-	}
+  /**
+   * Convert a long value to a byte array using big-endian.
+   * 
+   * @param val
+   *          value to convert
+   * @return the byte array
+   */
+  public static byte[] toBytes(long val) {
+    byte[] b = new byte[8];
+    for (int i = 7; i > 0; i--) {
+      b[i] = (byte) val;
+      val >>>= 8;
+    }
+    b[0] = (byte) val;
+    return b;
+  }
 
-	/**
-	 * Converts a byte array to a long value. Reverses {@link #toBytes(long)}
-	 * 
-	 * @param bytes
-	 *            array
-	 * @return the long value
-	 */
-	public static long toLong(byte[] bytes) {
-		return toLong(bytes, 0, SIZEOF_LONG);
-	}
+  /**
+   * Converts a byte array to a long value. Reverses {@link #toBytes(long)}
+   * 
+   * @param bytes
+   *          array
+   * @return the long value
+   */
+  public static long toLong(byte[] bytes) {
+    return toLong(bytes, 0, SIZEOF_LONG);
+  }
 
-	/**
-	 * Converts a byte array to a long value. Assumes there will be
-	 * {@link #SIZEOF_LONG} bytes available.
-	 * 
-	 * @param bytes
-	 *            bytes
-	 * @param offset
-	 *            offset
-	 * @return the long value
-	 */
-	public static long toLong(byte[] bytes, int offset) {
-		return toLong(bytes, offset, SIZEOF_LONG);
-	}
+  /**
+   * Converts a byte array to a long value. Assumes there will be
+   * {@link #SIZEOF_LONG} bytes available.
+   * 
+   * @param bytes
+   *          bytes
+   * @param offset
+   *          offset
+   * @return the long value
+   */
+  public static long toLong(byte[] bytes, int offset) {
+    return toLong(bytes, offset, SIZEOF_LONG);
+  }
 
-	/**
-	 * Converts a byte array to a long value.
-	 * 
-	 * @param bytes
-	 *            array of bytes
-	 * @param offset
-	 *            offset into array
-	 * @param length
-	 *            length of data (must be {@link #SIZEOF_LONG})
-	 * @return the long value
-	 * @throws IllegalArgumentException
-	 *             if length is not {@link #SIZEOF_LONG} or if there's not
-	 *             enough room in the array at the offset indicated.
-	 */
-	public static long toLong(byte[] bytes, int offset, final int length) {
-		if (length != SIZEOF_LONG || offset + length > bytes.length) {
-			throw explainWrongLengthOrOffset(bytes, offset, length, SIZEOF_LONG);
-		}
-		long l = 0;
-		for (int i = offset; i < offset + length; i++) {
-			l <<= 8;
-			l ^= bytes[i] & 0xFF;
-		}
-		return l;
-	}
+  /**
+   * Converts a byte array to a long value.
+   * 
+   * @param bytes
+   *          array of bytes
+   * @param offset
+   *          offset into array
+   * @param length
+   *          length of data (must be {@link #SIZEOF_LONG})
+   * @return the long value
+   * @throws IllegalArgumentException
+   *           if length is not {@link #SIZEOF_LONG} or if there's not enough
+   *           room in the array at the offset indicated.
+   */
+  public static long toLong(byte[] bytes, int offset, final int length) {
+    if (length != SIZEOF_LONG || offset + length > bytes.length) {
+      throw explainWrongLengthOrOffset(bytes, offset, length, SIZEOF_LONG);
+    }
+    long l = 0;
+    for (int i = offset; i < offset + length; i++) {
+      l <<= 8;
+      l ^= bytes[i] & 0xFF;
+    }
+    return l;
+  }
 
-	private static IllegalArgumentException explainWrongLengthOrOffset(
-			final byte[] bytes, final int offset, final int length,
-			final int expectedLength) {
-		String reason;
-		if (length != expectedLength) {
-			reason = "Wrong length: " + length + ", expected " + expectedLength;
-		} else {
-			reason = "offset (" + offset + ") + length (" + length
-					+ ") exceed the" + " capacity of the array: "
-					+ bytes.length;
-		}
-		return new IllegalArgumentException(reason);
-	}
+  private static IllegalArgumentException explainWrongLengthOrOffset(
+      final byte[] bytes, final int offset, final int length,
+      final int expectedLength) {
+    String reason;
+    if (length != expectedLength) {
+      reason = "Wrong length: " + length + ", expected " + expectedLength;
+    } else {
+      reason = "offset (" + offset + ") + length (" + length + ") exceed the"
+          + " capacity of the array: " + bytes.length;
+    }
+    return new IllegalArgumentException(reason);
+  }
 
-	/**
-	 * Put a long value out to the specified byte array position.
-	 * 
-	 * @param bytes
-	 *            the byte array
-	 * @param offset
-	 *            position in the array
-	 * @param val
-	 *            long to write out
-	 * @return incremented offset
-	 * @throws IllegalArgumentException
-	 *             if the byte array given doesn't have enough room at the
-	 *             offset specified.
-	 */
-	public static int putLong(byte[] bytes, int offset, long val) {
-		if (bytes.length - offset < SIZEOF_LONG) {
-			throw new IllegalArgumentException(
-					"Not enough room to put a long at" + " offset " + offset
-							+ " in a " + bytes.length + " byte array");
-		}
-		for (int i = offset + 7; i > offset; i--) {
-			bytes[i] = (byte) val;
-			val >>>= 8;
-		}
-		bytes[offset] = (byte) val;
-		return offset + SIZEOF_LONG;
-	}
+  /**
+   * Put a long value out to the specified byte array position.
+   * 
+   * @param bytes
+   *          the byte array
+   * @param offset
+   *          position in the array
+   * @param val
+   *          long to write out
+   * @return incremented offset
+   * @throws IllegalArgumentException
+   *           if the byte array given doesn't have enough room at the offset
+   *           specified.
+   */
+  public static int putLong(byte[] bytes, int offset, long val) {
+    if (bytes.length - offset < SIZEOF_LONG) {
+      throw new IllegalArgumentException("Not enough room to put a long at"
+          + " offset " + offset + " in a " + bytes.length + " byte array");
+    }
+    for (int i = offset + 7; i > offset; i--) {
+      bytes[i] = (byte) val;
+      val >>>= 8;
+    }
+    bytes[offset] = (byte) val;
+    return offset + SIZEOF_LONG;
+  }
 
-	/**
-	 * Presumes float encoded as IEEE 754 floating-point "single format"
-	 * 
-	 * @param bytes
-	 *            byte array
-	 * @return Float made from passed byte array.
-	 */
-	public static float toFloat(byte[] bytes) {
-		return toFloat(bytes, 0);
-	}
+  /**
+   * Presumes float encoded as IEEE 754 floating-point "single format"
+   * 
+   * @param bytes
+   *          byte array
+   * @return Float made from passed byte array.
+   */
+  public static float toFloat(byte[] bytes) {
+    return toFloat(bytes, 0);
+  }
 
-	/**
-	 * Presumes float encoded as IEEE 754 floating-point "single format"
-	 * 
-	 * @param bytes
-	 *            array to convert
-	 * @param offset
-	 *            offset into array
-	 * @return Float made from passed byte array.
-	 */
-	public static float toFloat(byte[] bytes, int offset) {
-		return Float.intBitsToFloat(toInt(bytes, offset, SIZEOF_INT));
-	}
+  /**
+   * Presumes float encoded as IEEE 754 floating-point "single format"
+   * 
+   * @param bytes
+   *          array to convert
+   * @param offset
+   *          offset into array
+   * @return Float made from passed byte array.
+   */
+  public static float toFloat(byte[] bytes, int offset) {
+    return Float.intBitsToFloat(toInt(bytes, offset, SIZEOF_INT));
+  }
 
-	/**
-	 * @param bytes
-	 *            byte array
-	 * @param offset
-	 *            offset to write to
-	 * @param f
-	 *            float value
-	 * @return New offset in <code>bytes</code>
-	 */
-	public static int putFloat(byte[] bytes, int offset, float f) {
-		return putInt(bytes, offset, Float.floatToRawIntBits(f));
-	}
+  /**
+   * @param bytes
+   *          byte array
+   * @param offset
+   *          offset to write to
+   * @param f
+   *          float value
+   * @return New offset in <code>bytes</code>
+   */
+  public static int putFloat(byte[] bytes, int offset, float f) {
+    return putInt(bytes, offset, Float.floatToRawIntBits(f));
+  }
 
-	/**
-	 * @param f
-	 *            float value
-	 * @return the float represented as byte []
-	 */
-	public static byte[] toBytes(final float f) {
-		// Encode it as int
-		return Bytes.toBytes(Float.floatToRawIntBits(f));
-	}
+  /**
+   * @param f
+   *          float value
+   * @return the float represented as byte []
+   */
+  public static byte[] toBytes(final float f) {
+    // Encode it as int
+    return Bytes.toBytes(Float.floatToRawIntBits(f));
+  }
 
-	/**
-	 * @param bytes
-	 *            byte array
-	 * @return Return double made from passed bytes.
-	 */
-	public static double toDouble(final byte[] bytes) {
-		return toDouble(bytes, 0);
-	}
+  /**
+   * @param bytes
+   *          byte array
+   * @return Return double made from passed bytes.
+   */
+  public static double toDouble(final byte[] bytes) {
+    return toDouble(bytes, 0);
+  }
 
-	/**
-	 * @param bytes
-	 *            byte array
-	 * @param offset
-	 *            offset where double is
-	 * @return Return double made from passed bytes.
-	 */
-	public static double toDouble(final byte[] bytes, final int offset) {
-		return Double.longBitsToDouble(toLong(bytes, offset, SIZEOF_LONG));
-	}
+  /**
+   * @param bytes
+   *          byte array
+   * @param offset
+   *          offset where double is
+   * @return Return double made from passed bytes.
+   */
+  public static double toDouble(final byte[] bytes, final int offset) {
+    return Double.longBitsToDouble(toLong(bytes, offset, SIZEOF_LONG));
+  }
 
-	/**
-	 * @param bytes
-	 *            byte array
-	 * @param offset
-	 *            offset to write to
-	 * @param d
-	 *            value
-	 * @return New offset into array <code>bytes</code>
-	 */
-	public static int putDouble(byte[] bytes, int offset, double d) {
-		return putLong(bytes, offset, Double.doubleToLongBits(d));
-	}
+  /**
+   * @param bytes
+   *          byte array
+   * @param offset
+   *          offset to write to
+   * @param d
+   *          value
+   * @return New offset into array <code>bytes</code>
+   */
+  public static int putDouble(byte[] bytes, int offset, double d) {
+    return putLong(bytes, offset, Double.doubleToLongBits(d));
+  }
 
-	/**
-	 * Serialize a double as the IEEE 754 double format output. The resultant
-	 * array will be 8 bytes long.
-	 * 
-	 * @param d
-	 *            value
-	 * @return the double represented as byte []
-	 */
-	public static byte[] toBytes(final double d) {
-		// Encode it as a long
-		return Bytes.toBytes(Double.doubleToRawLongBits(d));
-	}
+  /**
+   * Serialize a double as the IEEE 754 double format output. The resultant
+   * array will be 8 bytes long.
+   * 
+   * @param d
+   *          value
+   * @return the double represented as byte []
+   */
+  public static byte[] toBytes(final double d) {
+    // Encode it as a long
+    return Bytes.toBytes(Double.doubleToRawLongBits(d));
+  }
 
-	/**
-	 * Convert an int value to a byte array
-	 * 
-	 * @param val
-	 *            value
-	 * @return the byte array
-	 */
-	public static byte[] toBytes(int val) {
-		byte[] b = new byte[4];
-		for (int i = 3; i > 0; i--) {
-			b[i] = (byte) val;
-			val >>>= 8;
-		}
-		b[0] = (byte) val;
-		return b;
-	}
+  /**
+   * Convert an int value to a byte array
+   * 
+   * @param val
+   *          value
+   * @return the byte array
+   */
+  public static byte[] toBytes(int val) {
+    byte[] b = new byte[4];
+    for (int i = 3; i > 0; i--) {
+      b[i] = (byte) val;
+      val >>>= 8;
+    }
+    b[0] = (byte) val;
+    return b;
+  }
 
-	/**
-	 * Converts a byte array to an int value
-	 * 
-	 * @param bytes
-	 *            byte array
-	 * @return the int value
-	 */
-	public static int toInt(byte[] bytes) {
-		return toInt(bytes, 0, SIZEOF_INT);
-	}
+  /**
+   * Converts a byte array to an int value
+   * 
+   * @param bytes
+   *          byte array
+   * @return the int value
+   */
+  public static int toInt(byte[] bytes) {
+    return toInt(bytes, 0, SIZEOF_INT);
+  }
 
-	/**
-	 * Converts a byte array to an int value
-	 * 
-	 * @param bytes
-	 *            byte array
-	 * @param offset
-	 *            offset into array
-	 * @return the int value
-	 */
-	public static int toInt(byte[] bytes, int offset) {
-		return toInt(bytes, offset, SIZEOF_INT);
-	}
+  /**
+   * Converts a byte array to an int value
+   * 
+   * @param bytes
+   *          byte array
+   * @param offset
+   *          offset into array
+   * @return the int value
+   */
+  public static int toInt(byte[] bytes, int offset) {
+    return toInt(bytes, offset, SIZEOF_INT);
+  }
 
-	/**
-	 * Converts a byte array to an int value
-	 * 
-	 * @param bytes
-	 *            byte array
-	 * @param offset
-	 *            offset into array
-	 * @param length
-	 *            length of int (has to be {@link #SIZEOF_INT})
-	 * @return the int value
-	 * @throws IllegalArgumentException
-	 *             if length is not {@link #SIZEOF_INT} or if there's not enough
-	 *             room in the array at the offset indicated.
-	 */
-	public static int toInt(byte[] bytes, int offset, final int length) {
-		if (length != SIZEOF_INT || offset + length > bytes.length) {
-			throw explainWrongLengthOrOffset(bytes, offset, length, SIZEOF_INT);
-		}
-		int n = 0;
-		for (int i = offset; i < (offset + length); i++) {
-			n <<= 8;
-			n ^= bytes[i] & 0xFF;
-		}
-		return n;
-	}
+  /**
+   * Converts a byte array to an int value
+   * 
+   * @param bytes
+   *          byte array
+   * @param offset
+   *          offset into array
+   * @param length
+   *          length of int (has to be {@link #SIZEOF_INT})
+   * @return the int value
+   * @throws IllegalArgumentException
+   *           if length is not {@link #SIZEOF_INT} or if there's not enough
+   *           room in the array at the offset indicated.
+   */
+  public static int toInt(byte[] bytes, int offset, final int length) {
+    if (length != SIZEOF_INT || offset + length > bytes.length) {
+      throw explainWrongLengthOrOffset(bytes, offset, length, SIZEOF_INT);
+    }
+    int n = 0;
+    for (int i = offset; i < (offset + length); i++) {
+      n <<= 8;
+      n ^= bytes[i] & 0xFF;
+    }
+    return n;
+  }
 
-	/**
-	 * Put an int value out to the specified byte array position.
-	 * 
-	 * @param bytes
-	 *            the byte array
-	 * @param offset
-	 *            position in the array
-	 * @param val
-	 *            int to write out
-	 * @return incremented offset
-	 * @throws IllegalArgumentException
-	 *             if the byte array given doesn't have enough room at the
-	 *             offset specified.
-	 */
-	public static int putInt(byte[] bytes, int offset, int val) {
-		if (bytes.length - offset < SIZEOF_INT) {
-			throw new IllegalArgumentException(
-					"Not enough room to put an int at" + " offset " + offset
-							+ " in a " + bytes.length + " byte array");
-		}
-		for (int i = offset + 3; i > offset; i--) {
-			bytes[i] = (byte) val;
-			val >>>= 8;
-		}
-		bytes[offset] = (byte) val;
-		return offset + SIZEOF_INT;
-	}
+  /**
+   * Put an int value out to the specified byte array position.
+   * 
+   * @param bytes
+   *          the byte array
+   * @param offset
+   *          position in the array
+   * @param val
+   *          int to write out
+   * @return incremented offset
+   * @throws IllegalArgumentException
+   *           if the byte array given doesn't have enough room at the offset
+   *           specified.
+   */
+  public static int putInt(byte[] bytes, int offset, int val) {
+    if (bytes.length - offset < SIZEOF_INT) {
+      throw new IllegalArgumentException("Not enough room to put an int at"
+          + " offset " + offset + " in a " + bytes.length + " byte array");
+    }
+    for (int i = offset + 3; i > offset; i--) {
+      bytes[i] = (byte) val;
+      val >>>= 8;
+    }
+    bytes[offset] = (byte) val;
+    return offset + SIZEOF_INT;
+  }
 
-	/**
-	 * Convert a short value to a byte array of {@link #SIZEOF_SHORT} bytes
-	 * long.
-	 * 
-	 * @param val
-	 *            value
-	 * @return the byte array
-	 */
-	public static byte[] toBytes(short val) {
-		byte[] b = new byte[SIZEOF_SHORT];
-		b[1] = (byte) val;
-		val >>= 8;
-		b[0] = (byte) val;
-		return b;
-	}
+  /**
+   * Convert a short value to a byte array of {@link #SIZEOF_SHORT} bytes long.
+   * 
+   * @param val
+   *          value
+   * @return the byte array
+   */
+  public static byte[] toBytes(short val) {
+    byte[] b = new byte[SIZEOF_SHORT];
+    b[1] = (byte) val;
+    val >>= 8;
+    b[0] = (byte) val;
+    return b;
+  }
 
-	/**
-	 * Converts a byte array to a short value
-	 * 
-	 * @param bytes
-	 *            byte array
-	 * @return the short value
-	 */
-	public static short toShort(byte[] bytes) {
-		return toShort(bytes, 0, SIZEOF_SHORT);
-	}
+  /**
+   * Converts a byte array to a short value
+   * 
+   * @param bytes
+   *          byte array
+   * @return the short value
+   */
+  public static short toShort(byte[] bytes) {
+    return toShort(bytes, 0, SIZEOF_SHORT);
+  }
 
-	/**
-	 * Converts a byte array to a short value
-	 * 
-	 * @param bytes
-	 *            byte array
-	 * @param offset
-	 *            offset into array
-	 * @return the short value
-	 */
-	public static short toShort(byte[] bytes, int offset) {
-		return toShort(bytes, offset, SIZEOF_SHORT);
-	}
+  /**
+   * Converts a byte array to a short value
+   * 
+   * @param bytes
+   *          byte array
+   * @param offset
+   *          offset into array
+   * @return the short value
+   */
+  public static short toShort(byte[] bytes, int offset) {
+    return toShort(bytes, offset, SIZEOF_SHORT);
+  }
 
-	/**
-	 * Converts a byte array to a short value
-	 * 
-	 * @param bytes
-	 *            byte array
-	 * @param offset
-	 *            offset into array
-	 * @param length
-	 *            length, has to be {@link #SIZEOF_SHORT}
-	 * @return the short value
-	 * @throws IllegalArgumentException
-	 *             if length is not {@link #SIZEOF_SHORT} or if there's not
-	 *             enough room in the array at the offset indicated.
-	 */
-	public static short toShort(byte[] bytes, int offset, final int length) {
-		if (length != SIZEOF_SHORT || offset + length > bytes.length) {
-			throw explainWrongLengthOrOffset(bytes, offset, length,
-					SIZEOF_SHORT);
-		}
-		short n = 0;
-		n ^= bytes[offset] & 0xFF;
-		n <<= 8;
-		n ^= bytes[offset + 1] & 0xFF;
-		return n;
-	}
+  /**
+   * Converts a byte array to a short value
+   * 
+   * @param bytes
+   *          byte array
+   * @param offset
+   *          offset into array
+   * @param length
+   *          length, has to be {@link #SIZEOF_SHORT}
+   * @return the short value
+   * @throws IllegalArgumentException
+   *           if length is not {@link #SIZEOF_SHORT} or if there's not enough
+   *           room in the array at the offset indicated.
+   */
+  public static short toShort(byte[] bytes, int offset, final int length) {
+    if (length != SIZEOF_SHORT || offset + length > bytes.length) {
+      throw explainWrongLengthOrOffset(bytes, offset, length, SIZEOF_SHORT);
+    }
+    short n = 0;
+    n ^= bytes[offset] & 0xFF;
+    n <<= 8;
+    n ^= bytes[offset + 1] & 0xFF;
+    return n;
+  }
 
-	/**
-	 * Put a short value out to the specified byte array position.
-	 * 
-	 * @param bytes
-	 *            the byte array
-	 * @param offset
-	 *            position in the array
-	 * @param val
-	 *            short to write out
-	 * @return incremented offset
-	 * @throws IllegalArgumentException
-	 *             if the byte array given doesn't have enough room at the
-	 *             offset specified.
-	 */
-	public static int putShort(byte[] bytes, int offset, short val) {
-		if (bytes.length - offset < SIZEOF_SHORT) {
-			throw new IllegalArgumentException(
-					"Not enough room to put a short at" + " offset " + offset
-							+ " in a " + bytes.length + " byte array");
-		}
-		bytes[offset + 1] = (byte) val;
-		val >>= 8;
-		bytes[offset] = (byte) val;
-		return offset + SIZEOF_SHORT;
-	}
+  /**
+   * Put a short value out to the specified byte array position.
+   * 
+   * @param bytes
+   *          the byte array
+   * @param offset
+   *          position in the array
+   * @param val
+   *          short to write out
+   * @return incremented offset
+   * @throws IllegalArgumentException
+   *           if the byte array given doesn't have enough room at the offset
+   *           specified.
+   */
+  public static int putShort(byte[] bytes, int offset, short val) {
+    if (bytes.length - offset < SIZEOF_SHORT) {
+      throw new IllegalArgumentException("Not enough room to put a short at"
+          + " offset " + offset + " in a " + bytes.length + " byte array");
+    }
+    bytes[offset + 1] = (byte) val;
+    val >>= 8;
+    bytes[offset] = (byte) val;
+    return offset + SIZEOF_SHORT;
+  }
 
-	/**
-	 * @param vint
-	 *            Integer to make a vint of.
-	 * @return Vint as bytes array.
-	 */
-	public static byte[] vintToBytes(final long vint) {
-		long i = vint;
-		int size = WritableUtils.getVIntSize(i);
-		byte[] result = new byte[size];
-		int offset = 0;
-		if (i >= -112 && i <= 127) {
-			result[offset] = (byte) i;
-			return result;
-		}
+  /**
+   * @param vint
+   *          Integer to make a vint of.
+   * @return Vint as bytes array.
+   */
+  public static byte[] vintToBytes(final long vint) {
+    long i = vint;
+    int size = WritableUtils.getVIntSize(i);
+    byte[] result = new byte[size];
+    int offset = 0;
+    if (i >= -112 && i <= 127) {
+      result[offset] = (byte) i;
+      return result;
+    }
 
-		int len = -112;
-		if (i < 0) {
-			i ^= -1L; // take one's complement'
-			len = -120;
-		}
+    int len = -112;
+    if (i < 0) {
+      i ^= -1L; // take one's complement'
+      len = -120;
+    }
 
-		long tmp = i;
-		while (tmp != 0) {
-			tmp = tmp >> 8;
-			len--;
-		}
+    long tmp = i;
+    while (tmp != 0) {
+      tmp = tmp >> 8;
+      len--;
+    }
 
-		result[offset++] = (byte) len;
+    result[offset++] = (byte) len;
 
-		len = (len < -120) ? -(len + 120) : -(len + 112);
+    len = (len < -120) ? -(len + 120) : -(len + 112);
 
-		for (int idx = len; idx != 0; idx--) {
-			int shiftbits = (idx - 1) * 8;
-			long mask = 0xFFL << shiftbits;
-			result[offset++] = (byte) ((i & mask) >> shiftbits);
-		}
-		return result;
-	}
+    for (int idx = len; idx != 0; idx--) {
+      int shiftbits = (idx - 1) * 8;
+      long mask = 0xFFL << shiftbits;
+      result[offset++] = (byte) ((i & mask) >> shiftbits);
+    }
+    return result;
+  }
 
-	/**
-	 * @param buffer
-	 *            buffer to convert
-	 * @return vint bytes as an integer.
-	 */
-	public static long bytesToVint(final byte[] buffer) {
-		int offset = 0;
-		byte firstByte = buffer[offset++];
-		int len = WritableUtils.decodeVIntSize(firstByte);
-		if (len == 1) {
-			return firstByte;
-		}
-		long i = 0;
-		for (int idx = 0; idx < len - 1; idx++) {
-			byte b = buffer[offset++];
-			i = i << 8;
-			i = i | (b & 0xFF);
-		}
-		return (WritableUtils.isNegativeVInt(firstByte) ? ~i : i);
-	}
+  /**
+   * @param buffer
+   *          buffer to convert
+   * @return vint bytes as an integer.
+   */
+  public static long bytesToVint(final byte[] buffer) {
+    int offset = 0;
+    byte firstByte = buffer[offset++];
+    int len = WritableUtils.decodeVIntSize(firstByte);
+    if (len == 1) {
+      return firstByte;
+    }
+    long i = 0;
+    for (int idx = 0; idx < len - 1; idx++) {
+      byte b = buffer[offset++];
+      i = i << 8;
+      i = i | (b & 0xFF);
+    }
+    return (WritableUtils.isNegativeVInt(firstByte) ? ~i : i);
+  }
 
-	/**
-	 * Reads a zero-compressed encoded long from input stream and returns it.
-	 * 
-	 * @param buffer
-	 *            Binary array
-	 * @param offset
-	 *            Offset into array at which vint begins.
-	 * @throws java.io.IOException
-	 *             e
-	 * @return deserialized long from stream.
-	 */
-	public static long readVLong(final byte[] buffer, final int offset)
-			throws IOException {
-		byte firstByte = buffer[offset];
-		int len = WritableUtils.decodeVIntSize(firstByte);
-		if (len == 1) {
-			return firstByte;
-		}
-		long i = 0;
-		for (int idx = 0; idx < len - 1; idx++) {
-			byte b = buffer[offset + 1 + idx];
-			i = i << 8;
-			i = i | (b & 0xFF);
-		}
-		return (WritableUtils.isNegativeVInt(firstByte) ? ~i : i);
-	}
+  /**
+   * Reads a zero-compressed encoded long from input stream and returns it.
+   * 
+   * @param buffer
+   *          Binary array
+   * @param offset
+   *          Offset into array at which vint begins.
+   * @throws java.io.IOException
+   *           e
+   * @return deserialized long from stream.
+   */
+  public static long readVLong(final byte[] buffer, final int offset)
+      throws IOException {
+    byte firstByte = buffer[offset];
+    int len = WritableUtils.decodeVIntSize(firstByte);
+    if (len == 1) {
+      return firstByte;
+    }
+    long i = 0;
+    for (int idx = 0; idx < len - 1; idx++) {
+      byte b = buffer[offset + 1 + idx];
+      i = i << 8;
+      i = i | (b & 0xFF);
+    }
+    return (WritableUtils.isNegativeVInt(firstByte) ? ~i : i);
+  }
 
-	/**
-	 * @param left
-	 *            left operand
-	 * @param right
-	 *            right operand
-	 * @return 0 if equal, < 0 if left is less than right, etc.
-	 */
-	public static int compareTo(final byte[] left, final byte[] right) {
-		return compareTo(left, 0, left.length, right, 0, right.length);
-	}
+  /**
+   * @param left
+   *          left operand
+   * @param right
+   *          right operand
+   * @return 0 if equal, < 0 if left is less than right, etc.
+   */
+  public static int compareTo(final byte[] left, final byte[] right) {
+    return compareTo(left, 0, left.length, right, 0, right.length);
+  }
 
-	/**
-	 * Lexographically compare two arrays.
-	 * 
-	 * @param buffer1
-	 *            left operand
-	 * @param buffer2
-	 *            right operand
-	 * @param offset1
-	 *            Where to start comparing in the left buffer
-	 * @param offset2
-	 *            Where to start comparing in the right buffer
-	 * @param length1
-	 *            How much to compare from the left buffer
-	 * @param length2
-	 *            How much to compare from the right buffer
-	 * @return 0 if equal, < 0 if left is less than right, etc.
-	 */
-	public static int compareTo(byte[] buffer1, int offset1, int length1,
-			byte[] buffer2, int offset2, int length2) {
-		// Bring WritableComparator code local
-		int end1 = offset1 + length1;
-		int end2 = offset2 + length2;
-		for (int i = offset1, j = offset2; i < end1 && j < end2; i++, j++) {
-			int a = (buffer1[i] & 0xff);
-			int b = (buffer2[j] & 0xff);
-			if (a != b) {
-				return a - b;
-			}
-		}
-		return length1 - length2;
-	}
+  /**
+   * Lexographically compare two arrays.
+   * 
+   * @param buffer1
+   *          left operand
+   * @param buffer2
+   *          right operand
+   * @param offset1
+   *          Where to start comparing in the left buffer
+   * @param offset2
+   *          Where to start comparing in the right buffer
+   * @param length1
+   *          How much to compare from the left buffer
+   * @param length2
+   *          How much to compare from the right buffer
+   * @return 0 if equal, < 0 if left is less than right, etc.
+   */
+  public static int compareTo(byte[] buffer1, int offset1, int length1,
+      byte[] buffer2, int offset2, int length2) {
+    // Bring WritableComparator code local
+    int end1 = offset1 + length1;
+    int end2 = offset2 + length2;
+    for (int i = offset1, j = offset2; i < end1 && j < end2; i++, j++) {
+      int a = (buffer1[i] & 0xff);
+      int b = (buffer2[j] & 0xff);
+      if (a != b) {
+        return a - b;
+      }
+    }
+    return length1 - length2;
+  }
 
-	/**
-	 * @param left
-	 *            left operand
-	 * @param right
-	 *            right operand
-	 * @return True if equal
-	 */
-	public static boolean equals(final byte[] left, final byte[] right) {
-		// Could use Arrays.equals?
-		// noinspection SimplifiableConditionalExpression
-		if (left == null && right == null) {
-			return true;
-		}
-		return (left == null || right == null || (left.length != right.length) ? false
-				: compareTo(left, right) == 0);
-	}
+  /**
+   * @param left
+   *          left operand
+   * @param right
+   *          right operand
+   * @return True if equal
+   */
+  public static boolean equals(final byte[] left, final byte[] right) {
+    // Could use Arrays.equals?
+    // noinspection SimplifiableConditionalExpression
+    if (left == null && right == null) {
+      return true;
+    }
+    return (left == null || right == null || (left.length != right.length) ? false
+        : compareTo(left, right) == 0);
+  }
 
-	/**
-	 * Return true if the byte array on the right is a prefix of the byte array
-	 * on the left.
-	 */
-	public static boolean startsWith(byte[] bytes, byte[] prefix) {
-		return bytes != null
-				&& prefix != null
-				&& bytes.length >= prefix.length
-				&& compareTo(bytes, 0, prefix.length, prefix, 0, prefix.length) == 0;
-	}
+  /**
+   * Return true if the byte array on the right is a prefix of the byte array on
+   * the left.
+   */
+  public static boolean startsWith(byte[] bytes, byte[] prefix) {
+    return bytes != null && prefix != null && bytes.length >= prefix.length
+        && compareTo(bytes, 0, prefix.length, prefix, 0, prefix.length) == 0;
+  }
 
-	/**
-	 * @param b
-	 *            bytes to hash
-	 * @return Runs {@link WritableComparator#hashBytes(byte[], int)} on the
-	 *         passed in array. This method is what
-	 *         {@link org.apache.hadoop.io.Text} and
-	 *         {@link ImmutableBytesWritable} use calculating hash code.
-	 */
-	public static int hashCode(final byte[] b) {
-		return hashCode(b, b.length);
-	}
+  /**
+   * @param b
+   *          bytes to hash
+   * @return Runs {@link WritableComparator#hashBytes(byte[], int)} on the
+   *         passed in array. This method is what
+   *         {@link org.apache.hadoop.io.Text} and
+   *         {@link ImmutableBytesWritable} use calculating hash code.
+   */
+  public static int hashCode(final byte[] b) {
+    return hashCode(b, b.length);
+  }
 
-	/**
-	 * @param b
-	 *            value
-	 * @param length
-	 *            length of the value
-	 * @return Runs {@link WritableComparator#hashBytes(byte[], int)} on the
-	 *         passed in array. This method is what
-	 *         {@link org.apache.hadoop.io.Text} and
-	 *         {@link ImmutableBytesWritable} use calculating hash code.
-	 */
-	public static int hashCode(final byte[] b, final int length) {
-		return WritableComparator.hashBytes(b, length);
-	}
+  /**
+   * @param b
+   *          value
+   * @param length
+   *          length of the value
+   * @return Runs {@link WritableComparator#hashBytes(byte[], int)} on the
+   *         passed in array. This method is what
+   *         {@link org.apache.hadoop.io.Text} and
+   *         {@link ImmutableBytesWritable} use calculating hash code.
+   */
+  public static int hashCode(final byte[] b, final int length) {
+    return WritableComparator.hashBytes(b, length);
+  }
 
-	/**
-	 * @param b
-	 *            bytes to hash
-	 * @return A hash of <code>b</code> as an Integer that can be used as key in
-	 *         Maps.
-	 */
-	public static Integer mapKey(final byte[] b) {
-		return hashCode(b);
-	}
+  /**
+   * @param b
+   *          bytes to hash
+   * @return A hash of <code>b</code> as an Integer that can be used as key in
+   *         Maps.
+   */
+  public static Integer mapKey(final byte[] b) {
+    return hashCode(b);
+  }
 
-	/**
-	 * @param b
-	 *            bytes to hash
-	 * @param length
-	 *            length to hash
-	 * @return A hash of <code>b</code> as an Integer that can be used as key in
-	 *         Maps.
-	 */
-	public static Integer mapKey(final byte[] b, final int length) {
-		return hashCode(b, length);
-	}
+  /**
+   * @param b
+   *          bytes to hash
+   * @param length
+   *          length to hash
+   * @return A hash of <code>b</code> as an Integer that can be used as key in
+   *         Maps.
+   */
+  public static Integer mapKey(final byte[] b, final int length) {
+    return hashCode(b, length);
+  }
 
-	/**
-	 * @param a
-	 *            lower half
-	 * @param b
-	 *            upper half
-	 * @return New array that has a in lower half and b in upper half.
-	 */
-	public static byte[] add(final byte[] a, final byte[] b) {
-		return add(a, b, EMPTY_BYTE_ARRAY);
-	}
+  /**
+   * @param a
+   *          lower half
+   * @param b
+   *          upper half
+   * @return New array that has a in lower half and b in upper half.
+   */
+  public static byte[] add(final byte[] a, final byte[] b) {
+    return add(a, b, EMPTY_BYTE_ARRAY);
+  }
 
-	/**
-	 * @param a
-	 *            first third
-	 * @param b
-	 *            second third
-	 * @param c
-	 *            third third
-	 * @return New array made from a, b and c
-	 */
-	public static byte[] add(final byte[] a, final byte[] b, final byte[] c) {
-		byte[] result = new byte[a.length + b.length + c.length];
-		System.arraycopy(a, 0, result, 0, a.length);
-		System.arraycopy(b, 0, result, a.length, b.length);
-		System.arraycopy(c, 0, result, a.length + b.length, c.length);
-		return result;
-	}
+  /**
+   * @param a
+   *          first third
+   * @param b
+   *          second third
+   * @param c
+   *          third third
+   * @return New array made from a, b and c
+   */
+  public static byte[] add(final byte[] a, final byte[] b, final byte[] c) {
+    byte[] result = new byte[a.length + b.length + c.length];
+    System.arraycopy(a, 0, result, 0, a.length);
+    System.arraycopy(b, 0, result, a.length, b.length);
+    System.arraycopy(c, 0, result, a.length + b.length, c.length);
+    return result;
+  }
 
-	/**
-	 * @param a
-	 *            array
-	 * @param length
-	 *            amount of bytes to grab
-	 * @return First <code>length</code> bytes from <code>a</code>
-	 */
-	public static byte[] head(final byte[] a, final int length) {
-		if (a.length < length) {
-			return null;
-		}
-		byte[] result = new byte[length];
-		System.arraycopy(a, 0, result, 0, length);
-		return result;
-	}
+  /**
+   * @param a
+   *          array
+   * @param length
+   *          amount of bytes to grab
+   * @return First <code>length</code> bytes from <code>a</code>
+   */
+  public static byte[] head(final byte[] a, final int length) {
+    if (a.length < length) {
+      return null;
+    }
+    byte[] result = new byte[length];
+    System.arraycopy(a, 0, result, 0, length);
+    return result;
+  }
 
-	/**
-	 * @param a
-	 *            array
-	 * @param length
-	 *            amount of bytes to snarf
-	 * @return Last <code>length</code> bytes from <code>a</code>
-	 */
-	public static byte[] tail(final byte[] a, final int length) {
-		if (a.length < length) {
-			return null;
-		}
-		byte[] result = new byte[length];
-		System.arraycopy(a, a.length - length, result, 0, length);
-		return result;
-	}
+  /**
+   * @param a
+   *          array
+   * @param length
+   *          amount of bytes to snarf
+   * @return Last <code>length</code> bytes from <code>a</code>
+   */
+  public static byte[] tail(final byte[] a, final int length) {
+    if (a.length < length) {
+      return null;
+    }
+    byte[] result = new byte[length];
+    System.arraycopy(a, a.length - length, result, 0, length);
+    return result;
+  }
 
-	/**
-	 * @param a
-	 *            array
-	 * @param length
-	 *            new array size
-	 * @return Value in <code>a</code> plus <code>length</code> prepended 0
-	 *         bytes
-	 */
-	public static byte[] padHead(final byte[] a, final int length) {
-		byte[] padding = new byte[length];
-		for (int i = 0; i < length; i++) {
-			padding[i] = 0;
-		}
-		return add(padding, a);
-	}
+  /**
+   * @param a
+   *          array
+   * @param length
+   *          new array size
+   * @return Value in <code>a</code> plus <code>length</code> prepended 0 bytes
+   */
+  public static byte[] padHead(final byte[] a, final int length) {
+    byte[] padding = new byte[length];
+    for (int i = 0; i < length; i++) {
+      padding[i] = 0;
+    }
+    return add(padding, a);
+  }
 
-	/**
-	 * @param a
-	 *            array
-	 * @param length
-	 *            new array size
-	 * @return Value in <code>a</code> plus <code>length</code> appended 0 bytes
-	 */
-	public static byte[] padTail(final byte[] a, final int length) {
-		byte[] padding = new byte[length];
-		for (int i = 0; i < length; i++) {
-			padding[i] = 0;
-		}
-		return add(a, padding);
-	}
+  /**
+   * @param a
+   *          array
+   * @param length
+   *          new array size
+   * @return Value in <code>a</code> plus <code>length</code> appended 0 bytes
+   */
+  public static byte[] padTail(final byte[] a, final int length) {
+    byte[] padding = new byte[length];
+    for (int i = 0; i < length; i++) {
+      padding[i] = 0;
+    }
+    return add(a, padding);
+  }
 
-	/**
-	 * Split passed range. Expensive operation relatively. Uses BigInteger math.
-	 * Useful splitting ranges for MapReduce jobs.
-	 * 
-	 * @param a
-	 *            Beginning of range
-	 * @param b
-	 *            End of range
-	 * @param num
-	 *            Number of times to split range. Pass 1 if you want to split
-	 *            the range in two; i.e. one split.
-	 * @return Array of dividing values
-	 */
-	public static byte[][] split(final byte[] a, final byte[] b, final int num) {
-		byte[][] ret = new byte[num + 2][];
-		int i = 0;
-		Iterable<byte[]> iter = iterateOnSplits(a, b, num);
-		if (iter == null)
-			return null;
-		for (byte[] elem : iter) {
-			ret[i++] = elem;
-		}
-		return ret;
-	}
+  /**
+   * Split passed range. Expensive operation relatively. Uses BigInteger math.
+   * Useful splitting ranges for MapReduce jobs.
+   * 
+   * @param a
+   *          Beginning of range
+   * @param b
+   *          End of range
+   * @param num
+   *          Number of times to split range. Pass 1 if you want to split the
+   *          range in two; i.e. one split.
+   * @return Array of dividing values
+   */
+  public static byte[][] split(final byte[] a, final byte[] b, final int num) {
+    byte[][] ret = new byte[num + 2][];
+    int i = 0;
+    Iterable<byte[]> iter = iterateOnSplits(a, b, num);
+    if (iter == null)
+      return null;
+    for (byte[] elem : iter) {
+      ret[i++] = elem;
+    }
+    return ret;
+  }
 
-	/**
-	 * Iterate over keys within the passed inclusive range.
-	 */
-	public static Iterable<byte[]> iterateOnSplits(final byte[] a,
-			final byte[] b, final int num) {
-		byte[] aPadded;
-		byte[] bPadded;
-		if (a.length < b.length) {
-			aPadded = padTail(a, b.length - a.length);
-			bPadded = b;
-		} else if (b.length < a.length) {
-			aPadded = a;
-			bPadded = padTail(b, a.length - b.length);
-		} else {
-			aPadded = a;
-			bPadded = b;
-		}
-		if (compareTo(aPadded, bPadded) >= 0) {
-			throw new IllegalArgumentException("b <= a");
-		}
-		if (num <= 0) {
-			throw new IllegalArgumentException("num cannot be < 0");
-		}
-		byte[] prependHeader = { 1, 0 };
-		final BigInteger startBI = new BigInteger(add(prependHeader, aPadded));
-		final BigInteger stopBI = new BigInteger(add(prependHeader, bPadded));
-		final BigInteger diffBI = stopBI.subtract(startBI);
-		final BigInteger splitsBI = BigInteger.valueOf(num + 1);
-		if (diffBI.compareTo(splitsBI) < 0) {
-			return null;
-		}
-		final BigInteger intervalBI;
-		try {
-			intervalBI = diffBI.divide(splitsBI);
-		} catch (Exception e) {
-			LOG.error("Exception caught during division", e);
-			return null;
-		}
+  /**
+   * Iterate over keys within the passed inclusive range.
+   */
+  public static Iterable<byte[]> iterateOnSplits(final byte[] a,
+      final byte[] b, final int num) {
+    byte[] aPadded;
+    byte[] bPadded;
+    if (a.length < b.length) {
+      aPadded = padTail(a, b.length - a.length);
+      bPadded = b;
+    } else if (b.length < a.length) {
+      aPadded = a;
+      bPadded = padTail(b, a.length - b.length);
+    } else {
+      aPadded = a;
+      bPadded = b;
+    }
+    if (compareTo(aPadded, bPadded) >= 0) {
+      throw new IllegalArgumentException("b <= a");
+    }
+    if (num <= 0) {
+      throw new IllegalArgumentException("num cannot be < 0");
+    }
+    byte[] prependHeader = { 1, 0 };
+    final BigInteger startBI = new BigInteger(add(prependHeader, aPadded));
+    final BigInteger stopBI = new BigInteger(add(prependHeader, bPadded));
+    final BigInteger diffBI = stopBI.subtract(startBI);
+    final BigInteger splitsBI = BigInteger.valueOf(num + 1);
+    if (diffBI.compareTo(splitsBI) < 0) {
+      return null;
+    }
+    final BigInteger intervalBI;
+    try {
+      intervalBI = diffBI.divide(splitsBI);
+    } catch (Exception e) {
+      LOG.error("Exception caught during division", e);
+      return null;
+    }
 
-		final Iterator<byte[]> iterator = new Iterator<byte[]>() {
-			private int i = -1;
+    final Iterator<byte[]> iterator = new Iterator<byte[]>() {
+      private int i = -1;
 
-			@Override
-			public boolean hasNext() {
-				return i < num + 1;
-			}
+      @Override
+      public boolean hasNext() {
+        return i < num + 1;
+      }
 
-			@Override
-			public byte[] next() {
-				i++;
-				if (i == 0)
-					return a;
-				if (i == num + 1)
-					return b;
+      @Override
+      public byte[] next() {
+        i++;
+        if (i == 0)
+          return a;
+        if (i == num + 1)
+          return b;
 
-				BigInteger curBI = startBI.add(intervalBI.multiply(BigInteger
-						.valueOf(i)));
-				byte[] padded = curBI.toByteArray();
-				if (padded[1] == 0)
-					padded = tail(padded, padded.length - 2);
-				else
-					padded = tail(padded, padded.length - 1);
-				return padded;
-			}
+        BigInteger curBI = startBI.add(intervalBI.multiply(BigInteger
+            .valueOf(i)));
+        byte[] padded = curBI.toByteArray();
+        if (padded[1] == 0)
+          padded = tail(padded, padded.length - 2);
+        else
+          padded = tail(padded, padded.length - 1);
+        return padded;
+      }
 
-			@Override
-			public void remove() {
-				throw new UnsupportedOperationException();
-			}
+      @Override
+      public void remove() {
+        throw new UnsupportedOperationException();
+      }
 
-		};
+    };
 
-		return new Iterable<byte[]>() {
-			@Override
-			public Iterator<byte[]> iterator() {
-				return iterator;
-			}
-		};
-	}
+    return new Iterable<byte[]>() {
+      @Override
+      public Iterator<byte[]> iterator() {
+        return iterator;
+      }
+    };
+  }
 
-	/**
-	 * @param t
-	 *            operands
-	 * @return Array of byte arrays made from passed array of Text
-	 */
-	public static byte[][] toByteArrays(final String[] t) {
-		byte[][] result = new byte[t.length][];
-		for (int i = 0; i < t.length; i++) {
-			result[i] = Bytes.toBytes(t[i]);
-		}
-		return result;
-	}
+  /**
+   * @param t
+   *          operands
+   * @return Array of byte arrays made from passed array of Text
+   */
+  public static byte[][] toByteArrays(final String[] t) {
+    byte[][] result = new byte[t.length][];
+    for (int i = 0; i < t.length; i++) {
+      result[i] = Bytes.toBytes(t[i]);
+    }
+    return result;
+  }
 
-	/**
-	 * @param column
-	 *            operand
-	 * @return A byte array of a byte array where first and only entry is
-	 *         <code>column</code>
-	 */
-	public static byte[][] toByteArrays(final String column) {
-		return toByteArrays(toBytes(column));
-	}
+  /**
+   * @param column
+   *          operand
+   * @return A byte array of a byte array where first and only entry is
+   *         <code>column</code>
+   */
+  public static byte[][] toByteArrays(final String column) {
+    return toByteArrays(toBytes(column));
+  }
 
-	/**
-	 * @param column
-	 *            operand
-	 * @return A byte array of a byte array where first and only entry is
-	 *         <code>column</code>
-	 */
-	public static byte[][] toByteArrays(final byte[] column) {
-		byte[][] result = new byte[1][];
-		result[0] = column;
-		return result;
-	}
+  /**
+   * @param column
+   *          operand
+   * @return A byte array of a byte array where first and only entry is
+   *         <code>column</code>
+   */
+  public static byte[][] toByteArrays(final byte[] column) {
+    byte[][] result = new byte[1][];
+    result[0] = column;
+    return result;
+  }
 
-	/**
-	 * Binary search for keys in indexes.
-	 * 
-	 * @param arr
-	 *            array of byte arrays to search for
-	 * @param key
-	 *            the key you want to find
-	 * @param offset
-	 *            the offset in the key you want to find
-	 * @param length
-	 *            the length of the key
-	 * @param comparator
-	 *            a comparator to compare.
-	 * @return index of key
-	 */
-	public static int binarySearch(byte[][] arr, byte[] key, int offset,
-			int length, RawComparator<byte[]> comparator) {
-		int low = 0;
-		int high = arr.length - 1;
+  /**
+   * Binary search for keys in indexes.
+   * 
+   * @param arr
+   *          array of byte arrays to search for
+   * @param key
+   *          the key you want to find
+   * @param offset
+   *          the offset in the key you want to find
+   * @param length
+   *          the length of the key
+   * @param comparator
+   *          a comparator to compare.
+   * @return index of key
+   */
+  public static int binarySearch(byte[][] arr, byte[] key, int offset,
+      int length, RawComparator<byte[]> comparator) {
+    int low = 0;
+    int high = arr.length - 1;
 
-		while (low <= high) {
-			int mid = (low + high) >>> 1;
-			// we have to compare in this order, because the comparator order
-			// has special logic when the 'left side' is a special key.
-			int cmp = comparator.compare(key, offset, length, arr[mid], 0,
-					arr[mid].length);
-			// key lives above the midpoint
-			if (cmp > 0)
-				low = mid + 1;
-			// key lives below the midpoint
-			else if (cmp < 0)
-				high = mid - 1;
-			// BAM. how often does this really happen?
-			else
-				return mid;
-		}
-		return -(low + 1);
-	}
+    while (low <= high) {
+      int mid = (low + high) >>> 1;
+      // we have to compare in this order, because the comparator order
+      // has special logic when the 'left side' is a special key.
+      int cmp = comparator.compare(key, offset, length, arr[mid], 0,
+          arr[mid].length);
+      // key lives above the midpoint
+      if (cmp > 0)
+        low = mid + 1;
+      // key lives below the midpoint
+      else if (cmp < 0)
+        high = mid - 1;
+      // BAM. how often does this really happen?
+      else
+        return mid;
+    }
+    return -(low + 1);
+  }
 
-	/**
-	 * Bytewise binary increment/deincrement of long contained in byte array on
-	 * given amount.
-	 * 
-	 * @param value
-	 *            - array of bytes containing long (length <= SIZEOF_LONG)
-	 * @param amount
-	 *            value will be incremented on (deincremented if negative)
-	 * @return array of bytes containing incremented long (length ==
-	 *         SIZEOF_LONG)
-	 * @throws IOException
-	 *             - if value.length > SIZEOF_LONG
-	 */
-	public static byte[] incrementBytes(byte[] value, long amount)
-			throws IOException {
-		byte[] val = value;
-		if (val.length < SIZEOF_LONG) {
-			// Hopefully this doesn't happen too often.
-			byte[] newvalue;
-			if (val[0] < 0) {
-				newvalue = new byte[] { -1, -1, -1, -1, -1, -1, -1, -1 };
-			} else {
-				newvalue = new byte[SIZEOF_LONG];
-			}
-			System.arraycopy(val, 0, newvalue, newvalue.length - val.length,
-					val.length);
-			val = newvalue;
-		} else if (val.length > SIZEOF_LONG) {
-			throw new IllegalArgumentException(
-					"Increment Bytes - value too big: " + val.length);
-		}
-		if (amount == 0)
-			return val;
-		if (val[0] < 0) {
-			return binaryIncrementNeg(val, amount);
-		}
-		return binaryIncrementPos(val, amount);
-	}
+  /**
+   * Bytewise binary increment/deincrement of long contained in byte array on
+   * given amount.
+   * 
+   * @param value
+   *          - array of bytes containing long (length <= SIZEOF_LONG)
+   * @param amount
+   *          value will be incremented on (deincremented if negative)
+   * @return array of bytes containing incremented long (length == SIZEOF_LONG)
+   * @throws IOException
+   *           - if value.length > SIZEOF_LONG
+   */
+  public static byte[] incrementBytes(byte[] value, long amount)
+      throws IOException {
+    byte[] val = value;
+    if (val.length < SIZEOF_LONG) {
+      // Hopefully this doesn't happen too often.
+      byte[] newvalue;
+      if (val[0] < 0) {
+        newvalue = new byte[] { -1, -1, -1, -1, -1, -1, -1, -1 };
+      } else {
+        newvalue = new byte[SIZEOF_LONG];
+      }
+      System.arraycopy(val, 0, newvalue, newvalue.length - val.length,
+          val.length);
+      val = newvalue;
+    } else if (val.length > SIZEOF_LONG) {
+      throw new IllegalArgumentException("Increment Bytes - value too big: "
+          + val.length);
+    }
+    if (amount == 0)
+      return val;
+    if (val[0] < 0) {
+      return binaryIncrementNeg(val, amount);
+    }
+    return binaryIncrementPos(val, amount);
+  }
 
-	/* increment/deincrement for positive value */
-	private static byte[] binaryIncrementPos(byte[] value, long amount) {
-		long amo = amount;
-		int sign = 1;
-		if (amount < 0) {
-			amo = -amount;
-			sign = -1;
-		}
-		for (int i = 0; i < value.length; i++) {
-			int cur = ((int) amo % 256) * sign;
-			amo = (amo >> 8);
-			int val = value[value.length - i - 1] & 0x0ff;
-			int total = val + cur;
-			if (total > 255) {
-				amo += sign;
-				total %= 256;
-			} else if (total < 0) {
-				amo -= sign;
-			}
-			value[value.length - i - 1] = (byte) total;
-			if (amo == 0)
-				return value;
-		}
-		return value;
-	}
+  /* increment/deincrement for positive value */
+  private static byte[] binaryIncrementPos(byte[] value, long amount) {
+    long amo = amount;
+    int sign = 1;
+    if (amount < 0) {
+      amo = -amount;
+      sign = -1;
+    }
+    for (int i = 0; i < value.length; i++) {
+      int cur = ((int) amo % 256) * sign;
+      amo = (amo >> 8);
+      int val = value[value.length - i - 1] & 0x0ff;
+      int total = val + cur;
+      if (total > 255) {
+        amo += sign;
+        total %= 256;
+      } else if (total < 0) {
+        amo -= sign;
+      }
+      value[value.length - i - 1] = (byte) total;
+      if (amo == 0)
+        return value;
+    }
+    return value;
+  }
 
-	/* increment/deincrement for negative value */
-	private static byte[] binaryIncrementNeg(byte[] value, long amount) {
-		long amo = amount;
-		int sign = 1;
-		if (amount < 0) {
-			amo = -amount;
-			sign = -1;
-		}
-		for (int i = 0; i < value.length; i++) {
-			int cur = ((int) amo % 256) * sign;
-			amo = (amo >> 8);
-			int val = ((~value[value.length - i - 1]) & 0x0ff) + 1;
-			int total = cur - val;
-			if (total >= 0) {
-				amo += sign;
-			} else if (total < -256) {
-				amo -= sign;
-				total %= 256;
-			}
-			value[value.length - i - 1] = (byte) total;
-			if (amo == 0)
-				return value;
-		}
-		return value;
-	}
+  /* increment/deincrement for negative value */
+  private static byte[] binaryIncrementNeg(byte[] value, long amount) {
+    long amo = amount;
+    int sign = 1;
+    if (amount < 0) {
+      amo = -amount;
+      sign = -1;
+    }
+    for (int i = 0; i < value.length; i++) {
+      int cur = ((int) amo % 256) * sign;
+      amo = (amo >> 8);
+      int val = ((~value[value.length - i - 1]) & 0x0ff) + 1;
+      int total = cur - val;
+      if (total >= 0) {
+        amo += sign;
+      } else if (total < -256) {
+        amo -= sign;
+        total %= 256;
+      }
+      value[value.length - i - 1] = (byte) total;
+      if (amo == 0)
+        return value;
+    }
+    return value;
+  }
 
 }
Index: src/java/org/apache/nutch/util/domain/DomainStatistics.java
===================================================================
--- src/java/org/apache/nutch/util/domain/DomainStatistics.java	(revision 1188268)
+++ src/java/org/apache/nutch/util/domain/DomainStatistics.java	(working copy)
@@ -59,183 +59,183 @@
  */
 public class DomainStatistics extends Configured implements Tool {
 
-	private static final Logger LOG = LoggerFactory.getLogger(DomainStatistics.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(DomainStatistics.class);
 
-	private static final Text FETCHED_TEXT = new Text("FETCHED");
-	private static final Text NOT_FETCHED_TEXT = new Text("NOT_FETCHED");
+  private static final Text FETCHED_TEXT = new Text("FETCHED");
+  private static final Text NOT_FETCHED_TEXT = new Text("NOT_FETCHED");
 
-	public static enum MyCounter {
-		FETCHED, NOT_FETCHED, EMPTY_RESULT
-	};
+  public static enum MyCounter {
+    FETCHED, NOT_FETCHED, EMPTY_RESULT
+  };
 
-	private static final int MODE_HOST = 1;
-	private static final int MODE_DOMAIN = 2;
-	private static final int MODE_SUFFIX = 3;
+  private static final int MODE_HOST = 1;
+  private static final int MODE_DOMAIN = 2;
+  private static final int MODE_SUFFIX = 3;
 
-	private Configuration conf;
+  private Configuration conf;
 
-	public int run(String[] args) throws IOException, ClassNotFoundException,
-			InterruptedException {
-		if (args.length < 3) {
-			System.out
-					.println("usage: DomainStatistics outDir host|domain|suffix [numOfReducer]");
-			return 1;
-		}
-		String outputDir = args[0];
-		int numOfReducers = 1;
+  public int run(String[] args) throws IOException, ClassNotFoundException,
+      InterruptedException {
+    if (args.length < 3) {
+      System.out
+          .println("usage: DomainStatistics outDir host|domain|suffix [numOfReducer]");
+      return 1;
+    }
+    String outputDir = args[0];
+    int numOfReducers = 1;
 
-		if (args.length > 2) {
-			numOfReducers = Integer.parseInt(args[2]);
-		}
+    if (args.length > 2) {
+      numOfReducers = Integer.parseInt(args[2]);
+    }
 
-		SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
-		long start = System.currentTimeMillis();
-		LOG.info("DomainStatistics: starting at " + sdf.format(start));
+    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+    long start = System.currentTimeMillis();
+    LOG.info("DomainStatistics: starting at " + sdf.format(start));
 
-		Job job = new NutchJob(getConf(), "Domain statistics");
+    Job job = new NutchJob(getConf(), "Domain statistics");
 
-		int mode = 0;
-		if (args[1].equals("host"))
-			mode = MODE_HOST;
-		else if (args[1].equals("domain"))
-			mode = MODE_DOMAIN;
-		else if (args[1].equals("suffix"))
-			mode = MODE_SUFFIX;
-		job.getConfiguration().setInt("domain.statistics.mode", mode);
+    int mode = 0;
+    if (args[1].equals("host"))
+      mode = MODE_HOST;
+    else if (args[1].equals("domain"))
+      mode = MODE_DOMAIN;
+    else if (args[1].equals("suffix"))
+      mode = MODE_SUFFIX;
+    job.getConfiguration().setInt("domain.statistics.mode", mode);
 
-		DataStore<String, WebPage> store = StorageUtils.createDataStore(
-				job.getConfiguration(), String.class, WebPage.class);
+    DataStore<String, WebPage> store = StorageUtils.createDataStore(
+        job.getConfiguration(), String.class, WebPage.class);
 
-		Query<String, WebPage> query = store.newQuery();
-		query.setFields(WebPage._ALL_FIELDS);
+    Query<String, WebPage> query = store.newQuery();
+    query.setFields(WebPage._ALL_FIELDS);
 
-		GoraMapper.initMapperJob(job, query, store, Text.class, LongWritable.class,
-				DomainStatisticsMapper.class, null, true);
+    GoraMapper.initMapperJob(job, query, store, Text.class, LongWritable.class,
+        DomainStatisticsMapper.class, null, true);
 
-		FileOutputFormat.setOutputPath(job, new Path(outputDir));
+    FileOutputFormat.setOutputPath(job, new Path(outputDir));
 
-		job.setOutputFormatClass(TextOutputFormat.class);
-		job.setMapOutputKeyClass(Text.class);
-		job.setMapOutputValueClass(LongWritable.class);
-		job.setOutputKeyClass(Text.class);
-		job.setOutputValueClass(LongWritable.class);
+    job.setOutputFormatClass(TextOutputFormat.class);
+    job.setMapOutputKeyClass(Text.class);
+    job.setMapOutputValueClass(LongWritable.class);
+    job.setOutputKeyClass(Text.class);
+    job.setOutputValueClass(LongWritable.class);
 
-		job.setReducerClass(DomainStatisticsReducer.class);
-		job.setCombinerClass(DomainStatisticsCombiner.class);
-		job.setNumReduceTasks(numOfReducers);
+    job.setReducerClass(DomainStatisticsReducer.class);
+    job.setCombinerClass(DomainStatisticsCombiner.class);
+    job.setNumReduceTasks(numOfReducers);
 
-		boolean success = job.waitForCompletion(true);
+    boolean success = job.waitForCompletion(true);
 
-		long end = System.currentTimeMillis();
-		LOG.info("DomainStatistics: finished at " + sdf.format(end)
-				+ ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    long end = System.currentTimeMillis();
+    LOG.info("DomainStatistics: finished at " + sdf.format(end) + ", elapsed: "
+        + TimingUtil.elapsedTime(start, end));
 
-		if (!success)
-			return -1;
-		return 0;
-	}
+    if (!success)
+      return -1;
+    return 0;
+  }
 
-	public Configuration getConf() {
-		return conf;
-	}
+  public Configuration getConf() {
+    return conf;
+  }
 
-	public void setConf(Configuration conf) {
-		this.conf = conf;
-	}
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+  }
 
-	public static class DomainStatisticsCombiner extends
-			Reducer<Text, LongWritable, Text, LongWritable> {
+  public static class DomainStatisticsCombiner extends
+      Reducer<Text, LongWritable, Text, LongWritable> {
 
-		@Override
-		public void reduce(Text key, Iterable<LongWritable> values,
-				Context context) throws IOException, InterruptedException {
+    @Override
+    public void reduce(Text key, Iterable<LongWritable> values, Context context)
+        throws IOException, InterruptedException {
 
-			long total = 0;
+      long total = 0;
 
-			for (LongWritable val : values)
-				total += val.get();
+      for (LongWritable val : values)
+        total += val.get();
 
-			context.write(key, new LongWritable(total));
-		}
+      context.write(key, new LongWritable(total));
+    }
 
-	}
+  }
 
-	public static class DomainStatisticsReducer extends
-			Reducer<Text, LongWritable, LongWritable, Text> {
+  public static class DomainStatisticsReducer extends
+      Reducer<Text, LongWritable, LongWritable, Text> {
 
-		@Override
-		public void reduce(Text key, Iterable<LongWritable> values,
-				Context context) throws IOException, InterruptedException {
+    @Override
+    public void reduce(Text key, Iterable<LongWritable> values, Context context)
+        throws IOException, InterruptedException {
 
-			long total = 0;
+      long total = 0;
 
-			for (LongWritable val : values)
-				total += val.get();
+      for (LongWritable val : values)
+        total += val.get();
 
-			// invert output
-			context.write(new LongWritable(total), key);
-		}
-	}
+      // invert output
+      context.write(new LongWritable(total), key);
+    }
+  }
 
-	public static class DomainStatisticsMapper extends
-			GoraMapper<String, WebPage, Text, LongWritable> {
-		LongWritable COUNT_1 = new LongWritable(1);
+  public static class DomainStatisticsMapper extends
+      GoraMapper<String, WebPage, Text, LongWritable> {
+    LongWritable COUNT_1 = new LongWritable(1);
 
-		private int mode = 0;
+    private int mode = 0;
 
-		public DomainStatisticsMapper() {
-		}
+    public DomainStatisticsMapper() {
+    }
 
-		public void setup(Context context) {
-			mode = context.getConfiguration().getInt("domain.statistics.mode",
-					MODE_DOMAIN);
-		}
+    public void setup(Context context) {
+      mode = context.getConfiguration().getInt("domain.statistics.mode",
+          MODE_DOMAIN);
+    }
 
-		public void close() {
-		}
+    public void close() {
+    }
 
-		@Override
-		protected void map(
-				String key,
-				WebPage value,
-				org.apache.hadoop.mapreduce.Mapper<String, WebPage, Text, LongWritable>.Context context)
-				throws IOException, InterruptedException {
-			if (value.getStatus() == CrawlStatus.STATUS_FETCHED) {
-				try {
-					URL url = new URL(key.toString());
-					String out = null;
-					switch (mode) {
-					case MODE_HOST:
-						out = url.getHost();
-						break;
-					case MODE_DOMAIN:
-						out = URLUtil.getDomainName(url);
-						break;
-					case MODE_SUFFIX:
-						out = URLUtil.getDomainSuffix(url).getDomain();
-						break;
-					}
-					if (out.trim().equals("")) {
-						LOG.info("url : " + url);
-						context.getCounter(MyCounter.EMPTY_RESULT).increment(1);
-					}
+    @Override
+    protected void map(
+        String key,
+        WebPage value,
+        org.apache.hadoop.mapreduce.Mapper<String, WebPage, Text, LongWritable>.Context context)
+        throws IOException, InterruptedException {
+      if (value.getStatus() == CrawlStatus.STATUS_FETCHED) {
+        try {
+          URL url = new URL(key.toString());
+          String out = null;
+          switch (mode) {
+          case MODE_HOST:
+            out = url.getHost();
+            break;
+          case MODE_DOMAIN:
+            out = URLUtil.getDomainName(url);
+            break;
+          case MODE_SUFFIX:
+            out = URLUtil.getDomainSuffix(url).getDomain();
+            break;
+          }
+          if (out.trim().equals("")) {
+            LOG.info("url : " + url);
+            context.getCounter(MyCounter.EMPTY_RESULT).increment(1);
+          }
 
-					context.write(new Text(out), COUNT_1);
-				} catch (Exception ex) {
-				}
-				context.getCounter(MyCounter.FETCHED).increment(1);
-				context.write(FETCHED_TEXT, COUNT_1);
-			} else {
-				context.getCounter(MyCounter.FETCHED).increment(1);
-				context.write(NOT_FETCHED_TEXT, COUNT_1);
-			}
+          context.write(new Text(out), COUNT_1);
+        } catch (Exception ex) {
+        }
+        context.getCounter(MyCounter.FETCHED).increment(1);
+        context.write(FETCHED_TEXT, COUNT_1);
+      } else {
+        context.getCounter(MyCounter.FETCHED).increment(1);
+        context.write(NOT_FETCHED_TEXT, COUNT_1);
+      }
 
-		}
-	}
+    }
+  }
 
-	public static void main(String[] args) throws Exception {
-		ToolRunner.run(NutchConfiguration.create(), new DomainStatistics(),
-				args);
-	}
+  public static void main(String[] args) throws Exception {
+    ToolRunner.run(NutchConfiguration.create(), new DomainStatistics(), args);
+  }
 
 }
Index: src/java/org/apache/nutch/util/domain/TopLevelDomain.java
===================================================================
--- src/java/org/apache/nutch/util/domain/TopLevelDomain.java	(revision 1188268)
+++ src/java/org/apache/nutch/util/domain/TopLevelDomain.java	(working copy)
@@ -18,41 +18,47 @@
 package org.apache.nutch.util.domain;
 
 /**
- * (From wikipedia) A top-level domain (TLD) is the last part of an 
- * Internet domain name; that is, the letters which follow the final 
- * dot of any domain name. For example, in the domain name 
- * <code>www.website.com</code>, the top-level domain is <code>com</code>.
+ * (From wikipedia) A top-level domain (TLD) is the last part of an Internet
+ * domain name; that is, the letters which follow the final dot of any domain
+ * name. For example, in the domain name <code>www.website.com</code>, the
+ * top-level domain is <code>com</code>.
+ * 
  * @author Enis Soztutar &lt;enis.soz.nutch@gmail.com&gt;
  * @see http://www.iana.org/
  * @see http://en.wikipedia.org/wiki/Top-level_domain
  */
 public class TopLevelDomain extends DomainSuffix {
 
-  public enum Type { INFRASTRUCTURE, GENERIC, COUNTRY };
-  
+  public enum Type {
+    INFRASTRUCTURE, GENERIC, COUNTRY
+  };
+
   private Type type;
   private String countryName = null;
-  
-  public TopLevelDomain(String domain, Type type, Status status, float boost){
+
+  public TopLevelDomain(String domain, Type type, Status status, float boost) {
     super(domain, status, boost);
     this.type = type;
   }
 
-  public TopLevelDomain(String domain, Status status, float boost, String countryName){
+  public TopLevelDomain(String domain, Status status, float boost,
+      String countryName) {
     super(domain, status, boost);
     this.type = Type.COUNTRY;
     this.countryName = countryName;
   }
-  
+
   public Type getType() {
     return type;
   }
 
-  /** Returns the country name if TLD is Country Code TLD
+  /**
+   * Returns the country name if TLD is Country Code TLD
+   * 
    * @return country name or null
-   */ 
-  public String getCountryName(){
+   */
+  public String getCountryName() {
     return countryName;
   }
-  
+
 }
Index: src/java/org/apache/nutch/util/domain/DomainSuffixes.java
===================================================================
--- src/java/org/apache/nutch/util/domain/DomainSuffixes.java	(revision 1188268)
+++ src/java/org/apache/nutch/util/domain/DomainSuffixes.java	(working copy)
@@ -25,57 +25,62 @@
 import org.apache.hadoop.util.StringUtils;
 
 /**
- * Storage class for <code>DomainSuffix</code> objects 
- * Note: this class is singleton
+ * Storage class for <code>DomainSuffix</code> objects Note: this class is
+ * singleton
+ * 
  * @author Enis Soztutar &lt;enis.soz.nutch@gmail.com&gt;
  */
 public class DomainSuffixes {
-  private static final Logger LOG = LoggerFactory.getLogger(DomainSuffixes.class);
-  
-  private HashMap<String, DomainSuffix> domains = new HashMap<String, DomainSuffix>(); 
-  
+  private static final Logger LOG = LoggerFactory
+      .getLogger(DomainSuffixes.class);
+
+  private HashMap<String, DomainSuffix> domains = new HashMap<String, DomainSuffix>();
+
   private static DomainSuffixes instance;
-  
+
   /** private ctor */
   private DomainSuffixes() {
     String file = "domain-suffixes.xml";
-    InputStream input = this.getClass().getClassLoader().getResourceAsStream(file);
+    InputStream input = this.getClass().getClassLoader()
+        .getResourceAsStream(file);
     try {
       new DomainSuffixesReader().read(this, input);
-    }
-    catch (Exception ex) {
+    } catch (Exception ex) {
       LOG.warn(StringUtils.stringifyException(ex));
     }
   }
-  
+
   /**
    * Singleton instance, lazy instantination
+   * 
    * @return
    */
   public static DomainSuffixes getInstance() {
-    if(instance == null) {
+    if (instance == null) {
       instance = new DomainSuffixes();
     }
     return instance;
   }
-  
+
   void addDomainSuffix(DomainSuffix tld) {
     domains.put(tld.getDomain(), tld);
   }
 
   /** return whether the extension is a registered domain entry */
   public boolean isDomainSuffix(String extension) {
-    return domains.containsKey(extension); 
+    return domains.containsKey(extension);
   }
-    
+
   /**
-   * Return the {@link DomainSuffix} object for the extension, if 
-   * extension is a top level domain returned object will be an 
-   * instance of {@link TopLevelDomain}
-   * @param extension of the domain
+   * Return the {@link DomainSuffix} object for the extension, if extension is a
+   * top level domain returned object will be an instance of
+   * {@link TopLevelDomain}
+   * 
+   * @param extension
+   *          of the domain
    */
   public DomainSuffix get(String extension) {
     return domains.get(extension);
   }
-  
+
 }
Index: src/java/org/apache/nutch/util/domain/DomainSuffixesReader.java
===================================================================
--- src/java/org/apache/nutch/util/domain/DomainSuffixesReader.java	(revision 1188268)
+++ src/java/org/apache/nutch/util/domain/DomainSuffixesReader.java	(working copy)
@@ -36,16 +36,17 @@
 import org.xml.sax.SAXException;
 
 /**
- * For parsing xml files containing domain suffix definitions.
- * Parsed xml files should validate against 
- * <code>domain-suffixes.xsd</code>  
+ * For parsing xml files containing domain suffix definitions. Parsed xml files
+ * should validate against <code>domain-suffixes.xsd</code>
+ * 
  * @author Enis Soztutar &lt;enis.soz.nutch@gmail.com&gt;
  */
 class DomainSuffixesReader {
 
-  private static final Logger LOG = LoggerFactory.getLogger(DomainSuffixesReader.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(DomainSuffixesReader.class);
 
-  void read(DomainSuffixes tldEntries, InputStream input) throws IOException{
+  void read(DomainSuffixes tldEntries, InputStream input) throws IOException {
     try {
 
       DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
@@ -54,28 +55,29 @@
       Document document = builder.parse(new InputSource(input));
 
       Element root = document.getDocumentElement();
-      
-      if(root != null && root.getTagName().equals("domains")) {
-        
-        Element tlds = (Element)root.getElementsByTagName("tlds").item(0);
-        Element suffixes = (Element)root.getElementsByTagName("suffixes").item(0);
-        
-        //read tlds
-        readITLDs(tldEntries, (Element)tlds.getElementsByTagName("itlds").item(0));
-        readGTLDs(tldEntries, (Element)tlds.getElementsByTagName("gtlds").item(0));
-        readCCTLDs(tldEntries, (Element)tlds.getElementsByTagName("cctlds").item(0));
-        
+
+      if (root != null && root.getTagName().equals("domains")) {
+
+        Element tlds = (Element) root.getElementsByTagName("tlds").item(0);
+        Element suffixes = (Element) root.getElementsByTagName("suffixes")
+            .item(0);
+
+        // read tlds
+        readITLDs(tldEntries, (Element) tlds.getElementsByTagName("itlds")
+            .item(0));
+        readGTLDs(tldEntries, (Element) tlds.getElementsByTagName("gtlds")
+            .item(0));
+        readCCTLDs(tldEntries, (Element) tlds.getElementsByTagName("cctlds")
+            .item(0));
+
         readSuffixes(tldEntries, suffixes);
-      }
-      else {
+      } else {
         throw new IOException("xml file is not valid");
       }
-    }
-    catch (ParserConfigurationException ex) {
+    } catch (ParserConfigurationException ex) {
       LOG.warn(StringUtils.stringifyException(ex));
       throw new IOException(ex.getMessage());
-    }
-    catch (SAXException ex) {
+    } catch (SAXException ex) {
       LOG.warn(StringUtils.stringifyException(ex));
       throw new IOException(ex.getMessage());
     }
@@ -83,22 +85,24 @@
 
   void readITLDs(DomainSuffixes tldEntries, Element el) {
     NodeList children = el.getElementsByTagName("tld");
-    for(int i=0;i<children.getLength();i++) {
-      tldEntries.addDomainSuffix(readGTLD((Element)children.item(i), Type.INFRASTRUCTURE));
+    for (int i = 0; i < children.getLength(); i++) {
+      tldEntries.addDomainSuffix(readGTLD((Element) children.item(i),
+          Type.INFRASTRUCTURE));
     }
   }
-    
+
   void readGTLDs(DomainSuffixes tldEntries, Element el) {
     NodeList children = el.getElementsByTagName("tld");
-    for(int i=0;i<children.getLength();i++) {
-      tldEntries.addDomainSuffix(readGTLD((Element)children.item(i), Type.GENERIC));
+    for (int i = 0; i < children.getLength(); i++) {
+      tldEntries.addDomainSuffix(readGTLD((Element) children.item(i),
+          Type.GENERIC));
     }
   }
 
   void readCCTLDs(DomainSuffixes tldEntries, Element el) throws IOException {
     NodeList children = el.getElementsByTagName("tld");
-    for(int i=0;i<children.getLength();i++) {
-      tldEntries.addDomainSuffix(readCCTLD((Element)children.item(i)));
+    for (int i = 0; i < children.getLength(); i++) {
+      tldEntries.addDomainSuffix(readCCTLD((Element) children.item(i)));
     }
   }
 
@@ -113,39 +117,40 @@
     String domain = el.getAttribute("domain");
     Status status = readStatus(el);
     float boost = readBoost(el);
-    String countryName = readCountryName(el); 
-    return new TopLevelDomain(domain, status, boost, countryName);  
+    String countryName = readCountryName(el);
+    return new TopLevelDomain(domain, status, boost, countryName);
   }
-  
+
   /** read optional field status */
   Status readStatus(Element el) {
     NodeList list = el.getElementsByTagName("status");
-    if(list == null || list.getLength() == 0)
+    if (list == null || list.getLength() == 0)
       return DomainSuffix.DEFAULT_STATUS;
     return Status.valueOf(list.item(0).getFirstChild().getNodeValue());
   }
-  
+
   /** read optional field boost */
   float readBoost(Element el) {
     NodeList list = el.getElementsByTagName("boost");
-    if(list == null || list.getLength() == 0)
+    if (list == null || list.getLength() == 0)
       return DomainSuffix.DEFAULT_BOOST;
     return Float.parseFloat(list.item(0).getFirstChild().getNodeValue());
   }
-  
-  /** read field countryname 
-    */
+
+  /**
+   * read field countryname
+   */
   String readCountryName(Element el) throws IOException {
     NodeList list = el.getElementsByTagName("country");
-    if(list == null || list.getLength() == 0)
+    if (list == null || list.getLength() == 0)
       throw new IOException("Country name should be given");
     return list.item(0).getNodeValue();
   }
-  
+
   void readSuffixes(DomainSuffixes tldEntries, Element el) {
     NodeList children = el.getElementsByTagName("suffix");
-    for(int i=0;i<children.getLength();i++) {
-      tldEntries.addDomainSuffix(readSuffix((Element)children.item(i)));
+    for (int i = 0; i < children.getLength(); i++) {
+      tldEntries.addDomainSuffix(readSuffix((Element) children.item(i)));
     }
   }
 
@@ -155,5 +160,5 @@
     float boost = readBoost(el);
     return new DomainSuffix(domain, status, boost);
   }
-  
+
 }
Index: src/java/org/apache/nutch/util/domain/DomainSuffix.java
===================================================================
--- src/java/org/apache/nutch/util/domain/DomainSuffix.java	(revision 1188268)
+++ src/java/org/apache/nutch/util/domain/DomainSuffix.java	(working copy)
@@ -18,17 +18,18 @@
 package org.apache.nutch.util.domain;
 
 /**
- * This class represents the last part of the host name, 
- * which is operated by authoritives, not individuals. This information 
- * is needed to find the domain name of a host. The domain name of a host
- * is defined to be the last part before the domain suffix, w/o subdomain 
- * names.  As an example the domain name of <br><code> http://lucene.apache.org/ 
- * </code><br> is <code> apache.org</code>   
- * <br>
- * This class holds three fields,  
- * <strong>domain</strong> field represents the suffix (such as "co.uk")
- * <strong>boost</strong> is a float for boosting score of url's with this suffix
- * <strong>status</strong> field represents domain's status
+ * This class represents the last part of the host name, which is operated by
+ * authoritives, not individuals. This information is needed to find the domain
+ * name of a host. The domain name of a host is defined to be the last part
+ * before the domain suffix, w/o subdomain names. As an example the domain name
+ * of <br>
+ * <code> http://lucene.apache.org/ 
+ * </code><br>
+ * is <code> apache.org</code> <br>
+ * This class holds three fields, <strong>domain</strong> field represents the
+ * suffix (such as "co.uk") <strong>boost</strong> is a float for boosting score
+ * of url's with this suffix <strong>status</strong> field represents domain's
+ * status
  * 
  * @author Enis Soztutar &lt;enis.soz.nutch@gmail.com&gt;
  * @see TopLevelDomain
@@ -37,10 +38,10 @@
 public class DomainSuffix {
 
   /**
-   * Enumeration of the status of the tld. Please see domain-suffixes.xml. 
+   * Enumeration of the status of the tld. Please see domain-suffixes.xml.
    */
-  public enum Status { INFRASTRUCTURE, SPONSORED, UNSPONSORED
-    , STARTUP, PROPOSED, DELETED, PSEUDO_DOMAIN, DEPRECATED, IN_USE, NOT_IN_USE, REJECTED
+  public enum Status {
+    INFRASTRUCTURE, SPONSORED, UNSPONSORED, STARTUP, PROPOSED, DELETED, PSEUDO_DOMAIN, DEPRECATED, IN_USE, NOT_IN_USE, REJECTED
   };
 
   private String domain;
@@ -49,7 +50,7 @@
 
   public static final float DEFAULT_BOOST = 1.0f;
   public static final Status DEFAULT_STATUS = Status.IN_USE;
-  
+
   public DomainSuffix(String domain, Status status, float boost) {
     this.domain = domain;
     this.status = status;
@@ -59,7 +60,7 @@
   public DomainSuffix(String domain) {
     this(domain, DEFAULT_STATUS, DEFAULT_BOOST);
   }
-  
+
   public String getDomain() {
     return domain;
   }
@@ -71,7 +72,7 @@
   public float getBoost() {
     return boost;
   }
-  
+
   @Override
   public String toString() {
     return domain;
Index: src/java/org/apache/nutch/util/GenericWritableConfigurable.java
===================================================================
--- src/java/org/apache/nutch/util/GenericWritableConfigurable.java	(revision 1188268)
+++ src/java/org/apache/nutch/util/GenericWritableConfigurable.java	(working copy)
@@ -24,12 +24,15 @@
 import org.apache.hadoop.io.GenericWritable;
 import org.apache.hadoop.io.Writable;
 
-/** A generic Writable wrapper that can inject Configuration to {@link Configurable}s */ 
-public abstract class GenericWritableConfigurable extends GenericWritable 
-                                                  implements Configurable {
+/**
+ * A generic Writable wrapper that can inject Configuration to
+ * {@link Configurable}s
+ */
+public abstract class GenericWritableConfigurable extends GenericWritable
+    implements Configurable {
 
   private Configuration conf;
-  
+
   public Configuration getConf() {
     return conf;
   }
@@ -37,7 +40,7 @@
   public void setConf(Configuration conf) {
     this.conf = conf;
   }
-  
+
   @Override
   public void readFields(DataInput in) throws IOException {
     byte type = in.readByte();
@@ -50,8 +53,8 @@
     }
     Writable w = get();
     if (w instanceof Configurable)
-      ((Configurable)w).setConf(conf);
+      ((Configurable) w).setConf(conf);
     w.readFields(in);
   }
-  
+
 }
Index: src/java/org/apache/nutch/util/PrefixStringMatcher.java
===================================================================
--- src/java/org/apache/nutch/util/PrefixStringMatcher.java	(revision 1188268)
+++ src/java/org/apache/nutch/util/PrefixStringMatcher.java	(working copy)
@@ -21,46 +21,47 @@
 import java.util.Iterator;
 
 /**
- * A class for efficiently matching <code>String</code>s against a set
- * of prefixes.
+ * A class for efficiently matching <code>String</code>s against a set of
+ * prefixes.
  */
 public class PrefixStringMatcher extends TrieStringMatcher {
 
   /**
    * Creates a new <code>PrefixStringMatcher</code> which will match
-   * <code>String</code>s with any prefix in the supplied array.
-   * Zero-length <code>Strings</code> are ignored.
+   * <code>String</code>s with any prefix in the supplied array. Zero-length
+   * <code>Strings</code> are ignored.
    */
   public PrefixStringMatcher(String[] prefixes) {
     super();
-    for (int i= 0; i < prefixes.length; i++)
+    for (int i = 0; i < prefixes.length; i++)
       addPatternForward(prefixes[i]);
   }
 
   /**
    * Creates a new <code>PrefixStringMatcher</code> which will match
-   * <code>String</code>s with any prefix in the supplied    
+   * <code>String</code>s with any prefix in the supplied
    * <code>Collection</code>.
-   *
-   * @throws ClassCastException if any <code>Object</code>s in the
-   * collection are not <code>String</code>s
+   * 
+   * @throws ClassCastException
+   *           if any <code>Object</code>s in the collection are not
+   *           <code>String</code>s
    */
   public PrefixStringMatcher(Collection prefixes) {
     super();
-    Iterator iter= prefixes.iterator();
+    Iterator iter = prefixes.iterator();
     while (iter.hasNext())
-      addPatternForward((String)iter.next());
+      addPatternForward((String) iter.next());
   }
 
   /**
-   * Returns true if the given <code>String</code> is matched by a
-   * prefix in the trie
+   * Returns true if the given <code>String</code> is matched by a prefix in the
+   * trie
    */
   public boolean matches(String input) {
-    TrieNode node= root;
-    for (int i= 0; i < input.length(); i++) {
-      node= node.getChild(input.charAt(i));
-      if (node == null) 
+    TrieNode node = root;
+    for (int i = 0; i < input.length(); i++) {
+      node = node.getChild(input.charAt(i));
+      if (node == null)
         return false;
       if (node.isTerminal())
         return true;
@@ -73,13 +74,13 @@
    * or <code>null<code> if no match exists.
    */
   public String shortestMatch(String input) {
-    TrieNode node= root;
-    for (int i= 0; i < input.length(); i++) {
-      node= node.getChild(input.charAt(i));
-      if (node == null) 
+    TrieNode node = root;
+    for (int i = 0; i < input.length(); i++) {
+      node = node.getChild(input.charAt(i));
+      if (node == null)
         return null;
       if (node.isTerminal())
-        return input.substring(0, i+1);
+        return input.substring(0, i + 1);
     }
     return null;
   }
@@ -89,29 +90,26 @@
    * or <code>null<code> if no match exists.
    */
   public String longestMatch(String input) {
-    TrieNode node= root;
-    String result= null;
-    for (int i= 0; i < input.length(); i++) {
-      node= node.getChild(input.charAt(i));
-      if (node == null) 
+    TrieNode node = root;
+    String result = null;
+    for (int i = 0; i < input.length(); i++) {
+      node = node.getChild(input.charAt(i));
+      if (node == null)
         break;
       if (node.isTerminal())
-        result= input.substring(0, i+1);
+        result = input.substring(0, i + 1);
     }
     return result;
   }
 
   public static final void main(String[] argv) {
-    PrefixStringMatcher matcher= 
-      new PrefixStringMatcher( 
-        new String[] 
-        {"abcd", "abc", "aac", "baz", "foo", "foobar"} );
+    PrefixStringMatcher matcher = new PrefixStringMatcher(new String[] {
+        "abcd", "abc", "aac", "baz", "foo", "foobar" });
 
-    String[] tests= {"a", "ab", "abc", "abcdefg", "apple", "aa", "aac",
-                     "aaccca", "abaz", "baz", "bazooka", "fo", "foobar",
-                     "kite", };
+    String[] tests = { "a", "ab", "abc", "abcdefg", "apple", "aa", "aac",
+        "aaccca", "abaz", "baz", "bazooka", "fo", "foobar", "kite", };
 
-    for (int i= 0; i < tests.length; i++) {
+    for (int i = 0; i < tests.length; i++) {
       System.out.println("testing: " + tests[i]);
       System.out.println("   matches: " + matcher.matches(tests[i]));
       System.out.println("  shortest: " + matcher.shortestMatch(tests[i]));
Index: src/java/org/apache/nutch/util/FSUtils.java
===================================================================
--- src/java/org/apache/nutch/util/FSUtils.java	(revision 1188268)
+++ src/java/org/apache/nutch/util/FSUtils.java	(working copy)
@@ -33,16 +33,20 @@
    * path. If removeOld is set to false then the old path will be set to the
    * name current.old.
    * 
-   * @param fs The FileSystem.
-   * @param current The end path, the one being replaced.
-   * @param replacement The path to replace with.
-   * @param removeOld True if we are removing the current path.
+   * @param fs
+   *          The FileSystem.
+   * @param current
+   *          The end path, the one being replaced.
+   * @param replacement
+   *          The path to replace with.
+   * @param removeOld
+   *          True if we are removing the current path.
    * 
-   * @throws IOException If an error occurs during replacement.
+   * @throws IOException
+   *           If an error occurs during replacement.
    */
   public static void replace(FileSystem fs, Path current, Path replacement,
-    boolean removeOld)
-    throws IOException {
+      boolean removeOld) throws IOException {
 
     // rename any current path to old
     Path old = new Path(current + ".old");
@@ -60,12 +64,14 @@
   /**
    * Closes a group of SequenceFile readers.
    * 
-   * @param readers The SequenceFile readers to close.
-   * @throws IOException If an error occurs while closing a reader.
+   * @param readers
+   *          The SequenceFile readers to close.
+   * @throws IOException
+   *           If an error occurs while closing a reader.
    */
   public static void closeReaders(SequenceFile.Reader[] readers)
-    throws IOException {
-    
+      throws IOException {
+
     // loop through the readers, closing one by one
     if (readers != null) {
       for (int i = 0; i < readers.length; i++) {
@@ -80,12 +86,13 @@
   /**
    * Closes a group of MapFile readers.
    * 
-   * @param readers The MapFile readers to close.
-   * @throws IOException If an error occurs while closing a reader.
+   * @param readers
+   *          The MapFile readers to close.
+   * @throws IOException
+   *           If an error occurs while closing a reader.
    */
-  public static void closeReaders(MapFile.Reader[] readers)
-    throws IOException {
-    
+  public static void closeReaders(MapFile.Reader[] readers) throws IOException {
+
     // loop through the readers closing one by one
     if (readers != null) {
       for (int i = 0; i < readers.length; i++) {
Index: src/java/org/apache/nutch/util/GZIPUtils.java
===================================================================
--- src/java/org/apache/nutch/util/GZIPUtils.java	(revision 1188268)
+++ src/java/org/apache/nutch/util/GZIPUtils.java	(working copy)
@@ -28,19 +28,18 @@
 import org.slf4j.LoggerFactory;
 
 /**
- *  A collection of utility methods for working on GZIPed data.
+ * A collection of utility methods for working on GZIPed data.
  */
 public class GZIPUtils {
-  
+
   private static final Logger LOG = LoggerFactory.getLogger(GZIPUtils.class);
-  private static final int EXPECTED_COMPRESSION_RATIO= 5;
-  private static final int BUF_SIZE= 4096;
+  private static final int EXPECTED_COMPRESSION_RATIO = 5;
+  private static final int BUF_SIZE = 4096;
 
   /**
-   * Returns an gunzipped copy of the input array.  If the gzipped
-   * input has been truncated or corrupted, a best-effort attempt is
-   * made to unzip as much as possible.  If no data can be extracted
-   * <code>null</code> is returned.
+   * Returns an gunzipped copy of the input array. If the gzipped input has been
+   * truncated or corrupted, a best-effort attempt is made to unzip as much as
+   * possible. If no data can be extracted <code>null</code> is returned.
    */
   public static final byte[] unzipBestEffort(byte[] in) {
     return unzipBestEffort(in, Integer.MAX_VALUE);
@@ -48,33 +47,32 @@
 
   /**
    * Returns an gunzipped copy of the input array, truncated to
-   * <code>sizeLimit</code> bytes, if necessary.  If the gzipped input
-   * has been truncated or corrupted, a best-effort attempt is made to
-   * unzip as much as possible.  If no data can be extracted
-   * <code>null</code> is returned.
+   * <code>sizeLimit</code> bytes, if necessary. If the gzipped input has been
+   * truncated or corrupted, a best-effort attempt is made to unzip as much as
+   * possible. If no data can be extracted <code>null</code> is returned.
    */
   public static final byte[] unzipBestEffort(byte[] in, int sizeLimit) {
     try {
-      // decompress using GZIPInputStream 
-      ByteArrayOutputStream outStream = 
-        new ByteArrayOutputStream(EXPECTED_COMPRESSION_RATIO * in.length);
+      // decompress using GZIPInputStream
+      ByteArrayOutputStream outStream = new ByteArrayOutputStream(
+          EXPECTED_COMPRESSION_RATIO * in.length);
 
-      GZIPInputStream inStream = 
-        new GZIPInputStream ( new ByteArrayInputStream(in) );
+      GZIPInputStream inStream = new GZIPInputStream(new ByteArrayInputStream(
+          in));
 
       byte[] buf = new byte[BUF_SIZE];
       int written = 0;
       while (true) {
         try {
           int size = inStream.read(buf);
-          if (size <= 0) 
+          if (size <= 0)
             break;
           if ((written + size) > sizeLimit) {
             outStream.write(buf, 0, sizeLimit - written);
             break;
           }
           outStream.write(buf, 0, size);
-          written+= size;
+          written += size;
         } catch (Exception e) {
           break;
         }
@@ -91,23 +89,23 @@
     }
   }
 
-
   /**
-   * Returns an gunzipped copy of the input array.  
-   * @throws IOException if the input cannot be properly decompressed
+   * Returns an gunzipped copy of the input array.
+   * 
+   * @throws IOException
+   *           if the input cannot be properly decompressed
    */
   public static final byte[] unzip(byte[] in) throws IOException {
-    // decompress using GZIPInputStream 
-    ByteArrayOutputStream outStream = 
-      new ByteArrayOutputStream(EXPECTED_COMPRESSION_RATIO * in.length);
+    // decompress using GZIPInputStream
+    ByteArrayOutputStream outStream = new ByteArrayOutputStream(
+        EXPECTED_COMPRESSION_RATIO * in.length);
 
-    GZIPInputStream inStream = 
-      new GZIPInputStream ( new ByteArrayInputStream(in) );
+    GZIPInputStream inStream = new GZIPInputStream(new ByteArrayInputStream(in));
 
     byte[] buf = new byte[BUF_SIZE];
     while (true) {
       int size = inStream.read(buf);
-      if (size <= 0) 
+      if (size <= 0)
         break;
       outStream.write(buf, 0, size);
     }
@@ -121,11 +119,11 @@
    */
   public static final byte[] zip(byte[] in) {
     try {
-      // compress using GZIPOutputStream 
-      ByteArrayOutputStream byteOut= 
-        new ByteArrayOutputStream(in.length / EXPECTED_COMPRESSION_RATIO);
+      // compress using GZIPOutputStream
+      ByteArrayOutputStream byteOut = new ByteArrayOutputStream(in.length
+          / EXPECTED_COMPRESSION_RATIO);
 
-      GZIPOutputStream outStream= new GZIPOutputStream(byteOut);
+      GZIPOutputStream outStream = new GZIPOutputStream(byteOut);
 
       try {
         outStream.write(in);
@@ -146,5 +144,5 @@
       return null;
     }
   }
-    
+
 }
Index: src/java/org/apache/nutch/util/ObjectCache.java
===================================================================
--- src/java/org/apache/nutch/util/ObjectCache.java	(revision 1188268)
+++ src/java/org/apache/nutch/util/ObjectCache.java	(working copy)
@@ -24,35 +24,33 @@
 import org.apache.hadoop.conf.Configuration;
 
 public class ObjectCache {
-  
+
   private static final Logger LOG = LoggerFactory.getLogger(ObjectCache.class);
-  
-  private static final WeakHashMap<Configuration, ObjectCache> CACHE = 
-    new WeakHashMap<Configuration, ObjectCache>();
 
+  private static final WeakHashMap<Configuration, ObjectCache> CACHE = new WeakHashMap<Configuration, ObjectCache>();
+
   private final HashMap<String, Object> objectMap;
-  
+
   private ObjectCache() {
     objectMap = new HashMap<String, Object>();
   }
-  
+
   public static ObjectCache get(Configuration conf) {
     ObjectCache objectCache = CACHE.get(conf);
     if (objectCache == null) {
-      LOG.debug("No object cache found for conf=" + conf 
-                  + ", instantiating a new object cache");
+      LOG.debug("No object cache found for conf=" + conf
+          + ", instantiating a new object cache");
       objectCache = new ObjectCache();
       CACHE.put(conf, objectCache);
     }
     return objectCache;
   }
-  
+
   public Object getObject(String key) {
     return objectMap.get(key);
   }
-  
+
   public void setObject(String key, Object value) {
     objectMap.put(key, value);
   }
 }
-
Index: src/java/org/apache/nutch/util/NodeWalker.java
===================================================================
--- src/java/org/apache/nutch/util/NodeWalker.java	(revision 1188268)
+++ src/java/org/apache/nutch/util/NodeWalker.java	(working copy)
@@ -22,13 +22,17 @@
 import org.w3c.dom.NodeList;
 
 /**
- * <p>A utility class that allows the walking of any DOM tree using a stack 
- * instead of recursion.  As the node tree is walked the next node is popped
- * off of the stack and all of its children are automatically added to the 
- * stack to be called in tree order.</p>
+ * <p>
+ * A utility class that allows the walking of any DOM tree using a stack instead
+ * of recursion. As the node tree is walked the next node is popped off of the
+ * stack and all of its children are automatically added to the stack to be
+ * called in tree order.
+ * </p>
  * 
- * <p>Currently this class is not thread safe.  It is assumed that only one
- * thread will be accessing the <code>NodeWalker</code> at any given time.</p>
+ * <p>
+ * Currently this class is not thread safe. It is assumed that only one thread
+ * will be accessing the <code>NodeWalker</code> at any given time.
+ * </p>
  */
 public class NodeWalker {
 
@@ -36,7 +40,7 @@
   private Node currentNode;
   private NodeList currentChildren;
   private Stack<Node> nodes;
-  
+
   /**
    * Starts the <code>Node</code> tree from the root node.
    * 
@@ -47,62 +51,68 @@
     nodes = new Stack<Node>();
     nodes.add(rootNode);
   }
-  
+
   /**
-   * <p>Returns the next <code>Node</code> on the stack and pushes all of its
-   * children onto the stack, allowing us to walk the node tree without the
-   * use of recursion.  If there are no more nodes on the stack then null is
-   * returned.</p>
+   * <p>
+   * Returns the next <code>Node</code> on the stack and pushes all of its
+   * children onto the stack, allowing us to walk the node tree without the use
+   * of recursion. If there are no more nodes on the stack then null is
+   * returned.
+   * </p>
    * 
-   * @return Node The next <code>Node</code> on the stack or null if there
-   * isn't a next node.
+   * @return Node The next <code>Node</code> on the stack or null if there isn't
+   *         a next node.
    */
   public Node nextNode() {
-    
+
     // if no next node return null
     if (!hasNext()) {
       return null;
     }
-    
+
     // pop the next node off of the stack and push all of its children onto
     // the stack
     currentNode = nodes.pop();
     currentChildren = currentNode.getChildNodes();
     int childLen = (currentChildren != null) ? currentChildren.getLength() : 0;
-    
+
     // put the children node on the stack in first to last order
     for (int i = childLen - 1; i >= 0; i--) {
       nodes.add(currentChildren.item(i));
     }
-    
+
     return currentNode;
   }
-  
+
   /**
-   * <p>Skips over and removes from the node stack the children of the last
-   * node.  When getting a next node from the walker, that node's children 
-   * are automatically added to the stack.  You can call this method to remove
-   * those children from the stack.</p>
+   * <p>
+   * Skips over and removes from the node stack the children of the last node.
+   * When getting a next node from the walker, that node's children are
+   * automatically added to the stack. You can call this method to remove those
+   * children from the stack.
+   * </p>
    * 
-   * <p>This is useful when you don't want to process deeper into the 
-   * current path of the node tree but you want to continue processing sibling
-   * nodes.</p>
-   *
+   * <p>
+   * This is useful when you don't want to process deeper into the current path
+   * of the node tree but you want to continue processing sibling nodes.
+   * </p>
+   * 
    */
   public void skipChildren() {
-    
+
     int childLen = (currentChildren != null) ? currentChildren.getLength() : 0;
-    
-    for (int i = 0 ; i < childLen ; i++) {
+
+    for (int i = 0; i < childLen; i++) {
       Node child = nodes.peek();
       if (child.equals(currentChildren.item(i))) {
         nodes.pop();
       }
     }
   }
-  
+
   /**
    * Returns true if there are more nodes on the current stack.
+   * 
    * @return
    */
   public boolean hasNext() {
Index: src/java/org/apache/nutch/util/TrieStringMatcher.java
===================================================================
--- src/java/org/apache/nutch/util/TrieStringMatcher.java	(revision 1188268)
+++ src/java/org/apache/nutch/util/TrieStringMatcher.java	(working copy)
@@ -17,21 +17,19 @@
 
 package org.apache.nutch.util;
 
-
 import java.util.Arrays;
 import java.util.LinkedList;
 import java.util.ListIterator;
 
 /**
- * TrieStringMatcher is a base class for simple tree-based string
- * matching.
- *
+ * TrieStringMatcher is a base class for simple tree-based string matching.
+ * 
  */
 public abstract class TrieStringMatcher {
   protected TrieNode root;
 
   protected TrieStringMatcher() {
-    this.root= new TrieNode('\000', false);
+    this.root = new TrieNode('\000', false);
   }
 
   /**
@@ -44,20 +42,19 @@
     protected boolean terminal;
 
     /**
-     * Creates a new TrieNode, which contains the given
-     * <code>nodeChar</code>.  If <code>isTerminal</code> is
-     * <code>true</code>, the new node is a <em>terminal</em> node in
-     * the trie.
-     */  
+     * Creates a new TrieNode, which contains the given <code>nodeChar</code>.
+     * If <code>isTerminal</code> is <code>true</code>, the new node is a
+     * <em>terminal</em> node in the trie.
+     */
     TrieNode(char nodeChar, boolean isTerminal) {
-      this.nodeChar= nodeChar;
-      this.terminal= isTerminal;
-      this.childrenList= new LinkedList<TrieNode>();
+      this.nodeChar = nodeChar;
+      this.terminal = isTerminal;
+      this.childrenList = new LinkedList<TrieNode>();
     }
 
     /**
-     * Returns <code>true</code> if this node is a <em>terminal</em>
-     * node in the trie.
+     * Returns <code>true</code> if this node is a <em>terminal</em> node in the
+     * trie.
      */
     boolean isTerminal() {
       return terminal;
@@ -65,67 +62,68 @@
 
     /**
      * Returns the child node of this node whose node-character is
-     * <code>nextChar</code>.  If no such node exists, one will be is
-     * added.  If <em>isTerminal</em> is <code>true</code>, the node 
-     * will be a terminal node in the trie.
+     * <code>nextChar</code>. If no such node exists, one will be is added. If
+     * <em>isTerminal</em> is <code>true</code>, the node will be a terminal
+     * node in the trie.
      */
     TrieNode getChildAddIfNotPresent(char nextChar, boolean isTerminal) {
       if (childrenList == null) {
-        childrenList= new LinkedList<TrieNode>();
+        childrenList = new LinkedList<TrieNode>();
         childrenList.addAll(Arrays.asList(children));
-        children= null;
+        children = null;
       }
 
       if (childrenList.size() == 0) {
-        TrieNode newNode= new TrieNode(nextChar, isTerminal);
+        TrieNode newNode = new TrieNode(nextChar, isTerminal);
         childrenList.add(newNode);
         return newNode;
       }
 
-      ListIterator<TrieNode> iter= childrenList.listIterator();
-      TrieNode node= iter.next();
-      while ( (node.nodeChar < nextChar) && iter.hasNext() ) 
-        node= iter.next();
-                        
+      ListIterator<TrieNode> iter = childrenList.listIterator();
+      TrieNode node = iter.next();
+      while ((node.nodeChar < nextChar) && iter.hasNext())
+        node = iter.next();
+
       if (node.nodeChar == nextChar) {
-        node.terminal= node.terminal | isTerminal;
+        node.terminal = node.terminal | isTerminal;
         return node;
       }
 
-      if (node.nodeChar > nextChar) 
+      if (node.nodeChar > nextChar)
         iter.previous();
 
-      TrieNode newNode= new TrieNode(nextChar, isTerminal);
+      TrieNode newNode = new TrieNode(nextChar, isTerminal);
       iter.add(newNode);
-      return newNode;                   
+      return newNode;
     }
 
     /**
      * Returns the child node of this node whose node-character is
-     * <code>nextChar</code>.  If no such node exists,
-     * <code>null</code> is returned.
+     * <code>nextChar</code>. If no such node exists, <code>null</code> is
+     * returned.
      */
     TrieNode getChild(char nextChar) {
       if (children == null) {
-        children= childrenList.toArray(new TrieNode[childrenList.size()]);
-        childrenList= null;
+        children = childrenList.toArray(new TrieNode[childrenList.size()]);
+        childrenList = null;
         Arrays.sort(children);
       }
 
-      int min= 0;
-      int max= children.length - 1;
-      int mid= 0;
+      int min = 0;
+      int max = children.length - 1;
+      int mid = 0;
       while (min < max) {
-        mid= (min + max) / 2;
-        if (children[mid].nodeChar == nextChar) 
+        mid = (min + max) / 2;
+        if (children[mid].nodeChar == nextChar)
           return children[mid];
         if (children[mid].nodeChar < nextChar)
-          min= mid + 1;
-        else // if (children[mid].nodeChar > nextChar)
-          max= mid - 1;
+          min = mid + 1;
+        else
+          // if (children[mid].nodeChar > nextChar)
+          max = mid - 1;
       }
 
-      if (min == max) 
+      if (min == max)
         if (children[min].nodeChar == nextChar)
           return children[min];
 
@@ -133,59 +131,57 @@
     }
 
     public int compareTo(TrieNode other) {
-      if (this.nodeChar < other.nodeChar) 
+      if (this.nodeChar < other.nodeChar)
         return -1;
-      if (this.nodeChar == other.nodeChar) 
+      if (this.nodeChar == other.nodeChar)
         return 0;
-//    if (this.nodeChar > other.nodeChar) 
+      // if (this.nodeChar > other.nodeChar)
       return 1;
     }
   }
 
   /**
    * Returns the next {@link TrieNode} visited, given that you are at
-   * <code>node</code>, and the the next character in the input is 
-   * the <code>idx</code>'th character of <code>s</code>.
+   * <code>node</code>, and the the next character in the input is the
+   * <code>idx</code>'th character of <code>s</code>.
    */
   protected final TrieNode matchChar(TrieNode node, String s, int idx) {
     return node.getChild(s.charAt(idx));
   }
 
   /**
-   * Adds any necessary nodes to the trie so that the given
-   * <code>String</code> can be decoded and the last character is
-   * represented by a terminal node.  Zero-length <code>Strings</code>
-   * are ignored.
+   * Adds any necessary nodes to the trie so that the given <code>String</code>
+   * can be decoded and the last character is represented by a terminal node.
+   * Zero-length <code>Strings</code> are ignored.
    */
   protected final void addPatternForward(String s) {
-    TrieNode node= root;
-    int stop= s.length() - 1;
+    TrieNode node = root;
+    int stop = s.length() - 1;
     int i;
     if (s.length() > 0) {
-      for (i= 0; i < stop; i++)
-        node= node.getChildAddIfNotPresent(s.charAt(i), false);
-      node= node.getChildAddIfNotPresent(s.charAt(i), true);
+      for (i = 0; i < stop; i++)
+        node = node.getChildAddIfNotPresent(s.charAt(i), false);
+      node = node.getChildAddIfNotPresent(s.charAt(i), true);
     }
   }
 
   /**
-   * Adds any necessary nodes to the trie so that the given
-   * <code>String</code> can be decoded <em>in reverse</em> and the
-   * first character is represented by a terminal node.  Zero-length
-   * <code>Strings</code> are ignored.
+   * Adds any necessary nodes to the trie so that the given <code>String</code>
+   * can be decoded <em>in reverse</em> and the first character is represented
+   * by a terminal node. Zero-length <code>Strings</code> are ignored.
    */
   protected final void addPatternBackward(String s) {
-    TrieNode node= root;
+    TrieNode node = root;
     if (s.length() > 0) {
-      for (int i= s.length()-1; i > 0; i--) 
-        node= node.getChildAddIfNotPresent(s.charAt(i), false);
-      node= node.getChildAddIfNotPresent(s.charAt(0), true);
+      for (int i = s.length() - 1; i > 0; i--)
+        node = node.getChildAddIfNotPresent(s.charAt(i), false);
+      node = node.getChildAddIfNotPresent(s.charAt(0), true);
     }
   }
 
   /**
-   * Returns true if the given <code>String</code> is matched by a
-   * pattern in the trie
+   * Returns true if the given <code>String</code> is matched by a pattern in
+   * the trie
    */
   public abstract boolean matches(String input);
 
Index: src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java
===================================================================
--- src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java	(revision 1188268)
+++ src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java	(working copy)
@@ -51,42 +51,44 @@
 import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.SolrDocumentList;
 
-/** 
+/**
  * Utility class for deleting duplicate documents from a solr index.
- *
+ * 
  * The algorithm goes like follows:
  * 
  * Preparation:
  * <ol>
  * <li>Query the solr server for the number of documents (say, N)</li>
- * <li>Partition N among M map tasks. For example, if we have two map tasks
- * the first map task will deal with solr documents from 0 - (N / 2 - 1) and
- * the second will deal with documents from (N / 2) to (N - 1).</li>
+ * <li>Partition N among M map tasks. For example, if we have two map tasks the
+ * first map task will deal with solr documents from 0 - (N / 2 - 1) and the
+ * second will deal with documents from (N / 2) to (N - 1).</li>
  * </ol>
  * 
  * MapReduce:
  * <ul>
- * <li>Map: Identity map where keys are digests and values are {@link SolrRecord}
- * instances(which contain id, boost and timestamp)</li>
+ * <li>Map: Identity map where keys are digests and values are
+ * {@link SolrRecord} instances(which contain id, boost and timestamp)</li>
  * <li>Reduce: After map, {@link SolrRecord}s with the same digest will be
- * grouped together. Now, of these documents with the same digests, delete
- * all of them except the one with the highest score (boost field). If two
- * (or more) documents have the same score, then the document with the latest
- * timestamp is kept. Again, every other is deleted from solr index.
- * </li>
+ * grouped together. Now, of these documents with the same digests, delete all
+ * of them except the one with the highest score (boost field). If two (or more)
+ * documents have the same score, then the document with the latest timestamp is
+ * kept. Again, every other is deleted from solr index.</li>
  * </ul>
  * 
- * Note that we assume that two documents in
- * a solr index will never have the same URL. So this class only deals with
- * documents with <b>different</b> URLs but the same digest. 
+ * Note that we assume that two documents in a solr index will never have the
+ * same URL. So this class only deals with documents with <b>different</b> URLs
+ * but the same digest.
  */
 public class SolrDeleteDuplicates
-extends Reducer<Text, SolrDeleteDuplicates.SolrRecord, Text, SolrDeleteDuplicates.SolrRecord>
-implements Tool {
+    extends
+    Reducer<Text, SolrDeleteDuplicates.SolrRecord, Text, SolrDeleteDuplicates.SolrRecord>
+    implements Tool {
 
-  public static final Logger LOG = LoggerFactory.getLogger(SolrDeleteDuplicates.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(SolrDeleteDuplicates.class);
 
-  private static final String SOLR_GET_ALL_QUERY = SolrConstants.ID_FIELD + ":[* TO *]";
+  private static final String SOLR_GET_ALL_QUERY = SolrConstants.ID_FIELD
+      + ":[* TO *]";
 
   private static final int NUM_MAX_DELETE_REQUEST = 1000;
 
@@ -96,7 +98,8 @@
     private long tstamp;
     private String id;
 
-    public SolrRecord() { }
+    public SolrRecord() {
+    }
 
     public SolrRecord(String id, float boost, long tstamp) {
       this.id = id;
@@ -117,10 +120,10 @@
     }
 
     public void readSolrDocument(SolrDocument doc) {
-      id = (String)doc.getFieldValue(SolrConstants.ID_FIELD);
-      boost = (Float)doc.getFieldValue(SolrConstants.BOOST_FIELD);
+      id = (String) doc.getFieldValue(SolrConstants.ID_FIELD);
+      boost = (Float) doc.getFieldValue(SolrConstants.BOOST_FIELD);
 
-      Date buffer = (Date)doc.getFieldValue(SolrConstants.TIMESTAMP_FIELD);
+      Date buffer = (Date) doc.getFieldValue(SolrConstants.TIMESTAMP_FIELD);
       tstamp = buffer.getTime();
     }
 
@@ -136,7 +139,7 @@
       Text.writeString(out, id);
       out.writeFloat(boost);
       out.writeLong(tstamp);
-    } 
+    }
   }
 
   public static class SolrInputSplit extends InputSplit {
@@ -144,7 +147,8 @@
     private int docBegin;
     private int numDocs;
 
-    public SolrInputSplit() { }
+    public SolrInputSplit() {
+    }
 
     public SolrInputSplit(int docBegin, int numDocs) {
       this.docBegin = docBegin;
@@ -162,10 +166,10 @@
 
     @Override
     public String[] getLocations() throws IOException {
-      return new String[] {} ;
+      return new String[] {};
     }
   }
-  
+
   public static class SolrRecordReader extends RecordReader<Text, SolrRecord> {
 
     private int currentDoc = 0;
@@ -173,21 +177,22 @@
     private Text text;
     private SolrRecord record;
     private SolrDocumentList solrDocs;
-    
+
     public SolrRecordReader(SolrDocumentList solrDocs, int numDocs) {
       this.solrDocs = solrDocs;
       this.numDocs = numDocs;
     }
-    
+
     @Override
     public void initialize(InputSplit split, TaskAttemptContext context)
         throws IOException, InterruptedException {
       text = new Text();
-      record = new SolrRecord();   
+      record = new SolrRecord();
     }
 
     @Override
-    public void close() throws IOException { }
+    public void close() throws IOException {
+    }
 
     @Override
     public float getProgress() throws IOException {
@@ -219,17 +224,18 @@
       currentDoc++;
       return true;
     }
-   
+
   };
 
   public static class SolrInputFormat extends InputFormat<Text, SolrRecord> {
-    
+
     @Override
-    public List<InputSplit> getSplits(JobContext context)
-    throws IOException, InterruptedException {
+    public List<InputSplit> getSplits(JobContext context) throws IOException,
+        InterruptedException {
       Configuration conf = context.getConfiguration();
       int numSplits = context.getNumReduceTasks();
-      SolrServer solr = new CommonsHttpSolrServer(conf.get(SolrConstants.SERVER_URL));
+      SolrServer solr = new CommonsHttpSolrServer(
+          conf.get(SolrConstants.SERVER_URL));
 
       final SolrQuery solrQuery = new SolrQuery(SOLR_GET_ALL_QUERY);
       solrQuery.setFields(SolrConstants.ID_FIELD);
@@ -242,8 +248,8 @@
         throw new IOException(e);
       }
 
-      int numResults = (int)response.getResults().getNumFound();
-      int numDocsPerSplit = (numResults / numSplits); 
+      int numResults = (int) response.getResults().getNumFound();
+      int numDocsPerSplit = (numResults / numSplits);
       int currentDoc = 0;
       List<InputSplit> splits = new ArrayList<InputSplit>();
       for (int i = 0; i < numSplits - 1; i++) {
@@ -259,14 +265,14 @@
     public RecordReader<Text, SolrRecord> createRecordReader(InputSplit split,
         TaskAttemptContext context) throws IOException, InterruptedException {
       Configuration conf = context.getConfiguration();
-      SolrServer solr = new CommonsHttpSolrServer(conf.get(SolrConstants.SERVER_URL));
+      SolrServer solr = new CommonsHttpSolrServer(
+          conf.get(SolrConstants.SERVER_URL));
       SolrInputSplit solrSplit = (SolrInputSplit) split;
       final int numDocs = (int) solrSplit.getLength();
-      
+
       SolrQuery solrQuery = new SolrQuery(SOLR_GET_ALL_QUERY);
       solrQuery.setFields(SolrConstants.ID_FIELD, SolrConstants.BOOST_FIELD,
-                          SolrConstants.TIMESTAMP_FIELD,
-                          SolrConstants.DIGEST_FIELD);
+          SolrConstants.TIMESTAMP_FIELD, SolrConstants.DIGEST_FIELD);
       solrQuery.setStart(solrSplit.getDocBegin());
       solrQuery.setRows(numDocs);
 
@@ -310,7 +316,6 @@
     }
   }
 
-
   @Override
   public void cleanup(Context context) throws IOException {
     try {
@@ -326,14 +331,14 @@
 
   @Override
   public void reduce(Text key, Iterable<SolrRecord> values, Context context)
-  throws IOException {
+      throws IOException {
     Iterator<SolrRecord> iterator = values.iterator();
     SolrRecord recordToKeep = iterator.next();
     while (iterator.hasNext()) {
       SolrRecord solrRecord = iterator.next();
-      if (solrRecord.getBoost() > recordToKeep.getBoost() ||
-          (solrRecord.getBoost() == recordToKeep.getBoost() && 
-              solrRecord.getTstamp() > recordToKeep.getTstamp())) {
+      if (solrRecord.getBoost() > recordToKeep.getBoost()
+          || (solrRecord.getBoost() == recordToKeep.getBoost() && solrRecord
+              .getTstamp() > recordToKeep.getTstamp())) {
         updateRequest.deleteById(recordToKeep.id);
         recordToKeep = solrRecord;
       } else {
@@ -352,13 +357,13 @@
     }
   }
 
-  public boolean dedup(String solrUrl)
-  throws IOException, InterruptedException, ClassNotFoundException {
+  public boolean dedup(String solrUrl) throws IOException,
+      InterruptedException, ClassNotFoundException {
     LOG.info("SolrDeleteDuplicates: starting...");
     LOG.info("SolrDeleteDuplicates: Solr url: " + solrUrl);
-    
+
     getConf().set(SolrConstants.SERVER_URL, solrUrl);
-    
+
     Job job = new Job(getConf(), "solrdedup");
 
     job.setInputFormatClass(SolrInputFormat.class);
@@ -368,11 +373,11 @@
     job.setMapperClass(Mapper.class);
     job.setReducerClass(SolrDeleteDuplicates.class);
 
-    return job.waitForCompletion(true);    
+    return job.waitForCompletion(true);
   }
 
-  public int run(String[] args)
-  throws IOException, InterruptedException, ClassNotFoundException {
+  public int run(String[] args) throws IOException, InterruptedException,
+      ClassNotFoundException {
     if (args.length != 1) {
       System.err.println("Usage: SolrDeleteDuplicates <solr url>");
       return 1;
Index: src/java/org/apache/nutch/indexer/solr/SolrIndexerJob.java
===================================================================
--- src/java/org/apache/nutch/indexer/solr/SolrIndexerJob.java	(revision 1188268)
+++ src/java/org/apache/nutch/indexer/solr/SolrIndexerJob.java	(working copy)
@@ -44,15 +44,15 @@
   public static Logger LOG = LoggerFactory.getLogger(SolrIndexerJob.class);
 
   @Override
-  public Map<String,Object> run(Map<String,Object> args) throws Exception {
-    String solrUrl = (String)args.get(Nutch.ARG_SOLR);
-    String batchId = (String)args.get(Nutch.ARG_BATCH);
+  public Map<String, Object> run(Map<String, Object> args) throws Exception {
+    String solrUrl = (String) args.get(Nutch.ARG_SOLR);
+    String batchId = (String) args.get(Nutch.ARG_BATCH);
     NutchIndexWriterFactory.addClassToConf(getConf(), SolrWriter.class);
     getConf().set(SolrConstants.SERVER_URL, solrUrl);
 
     currentJob = createIndexJob(getConf(), "solr-index", batchId);
     Path tmp = new Path("tmp_" + System.currentTimeMillis() + "-"
-                + new Random().nextInt());
+        + new Random().nextInt());
 
     FileOutputFormat.setOutputPath(currentJob, tmp);
     currentJob.waitForCompletion(true);
@@ -64,9 +64,7 @@
     LOG.info("SolrIndexerJob: starting");
 
     try {
-      run(ToolUtil.toArgMap(
-          Nutch.ARG_SOLR, solrUrl,
-          Nutch.ARG_BATCH, batchId));
+      run(ToolUtil.toArgMap(Nutch.ARG_SOLR, solrUrl, Nutch.ARG_BATCH, batchId));
       // do the commits once and for all the reducers in one go
       SolrServer solr = new CommonsHttpSolrServer(solrUrl);
       solr.commit();
@@ -79,7 +77,8 @@
 
   public int run(String[] args) throws Exception {
     if (args.length < 2) {
-      System.err.println("Usage: SolrIndexerJob <solr url> (<batchId> | -all | -reindex) [-crawlId <id>]");
+      System.err
+          .println("Usage: SolrIndexerJob <solr url> (<batchId> | -all | -reindex) [-crawlId <id>]");
       return -1;
     }
 
Index: src/java/org/apache/nutch/indexer/solr/SolrConstants.java
===================================================================
--- src/java/org/apache/nutch/indexer/solr/SolrConstants.java	(revision 1188268)
+++ src/java/org/apache/nutch/indexer/solr/SolrConstants.java	(working copy)
@@ -24,15 +24,15 @@
   public static final String COMMIT_SIZE = SOLR_PREFIX + "commit.size";
 
   public static final String MAPPING_FILE = SOLR_PREFIX + "mapping.file";
-  
+
   public static final String ID_FIELD = "id";
-  
+
   public static final String URL_FIELD = "url";
-  
+
   public static final String BOOST_FIELD = "boost";
-  
+
   public static final String TIMESTAMP_FIELD = "tstamp";
-  
+
   public static final String DIGEST_FIELD = "digest";
 
 }
Index: src/java/org/apache/nutch/indexer/solr/SolrWriter.java
===================================================================
--- src/java/org/apache/nutch/indexer/solr/SolrWriter.java	(revision 1188268)
+++ src/java/org/apache/nutch/indexer/solr/SolrWriter.java	(working copy)
@@ -35,14 +35,12 @@
   private SolrServer solr;
   private SolrMappingReader solrMapping;
 
-  private final List<SolrInputDocument> inputDocs =
-    new ArrayList<SolrInputDocument>();
+  private final List<SolrInputDocument> inputDocs = new ArrayList<SolrInputDocument>();
 
   private int commitSize;
 
   @Override
-  public void open(TaskAttemptContext job, String name)
-  throws IOException {
+  public void open(TaskAttemptContext job, String name) throws IOException {
     Configuration conf = job.getConfiguration();
     solr = new CommonsHttpSolrServer(conf.get(SolrConstants.SERVER_URL));
     commitSize = conf.getInt(SolrConstants.COMMIT_SIZE, 1000);
@@ -52,12 +50,12 @@
   @Override
   public void write(NutchDocument doc) throws IOException {
     final SolrInputDocument inputDoc = new SolrInputDocument();
-    for(final Entry<String, List<String>> e : doc) {
+    for (final Entry<String, List<String>> e : doc) {
       for (final String val : e.getValue()) {
         inputDoc.addField(solrMapping.mapKey(e.getKey()), val);
         String sCopy = solrMapping.mapCopyKey(e.getKey());
         if (sCopy != e.getKey()) {
-        	inputDoc.addField(sCopy, val);
+          inputDoc.addField(sCopy, val);
         }
       }
     }
Index: src/java/org/apache/nutch/indexer/solr/SolrMappingReader.java
===================================================================
--- src/java/org/apache/nutch/indexer/solr/SolrMappingReader.java	(revision 1188268)
+++ src/java/org/apache/nutch/indexer/solr/SolrMappingReader.java	(working copy)
@@ -38,16 +38,17 @@
 
 public class SolrMappingReader {
   public static Logger LOG = LoggerFactory.getLogger(SolrMappingReader.class);
-  
+
   private Configuration conf;
-  
+
   private Map<String, String> keyMap = new HashMap<String, String>();
   private Map<String, String> copyMap = new HashMap<String, String>();
   private String uniqueKey = "id";
-  
+
   public static synchronized SolrMappingReader getInstance(Configuration conf) {
     ObjectCache cache = ObjectCache.get(conf);
-    SolrMappingReader instance = (SolrMappingReader)cache.getObject(SolrMappingReader.class.getName());
+    SolrMappingReader instance = (SolrMappingReader) cache
+        .getObject(SolrMappingReader.class.getName());
     if (instance == null) {
       instance = new SolrMappingReader(conf);
       cache.setObject(SolrMappingReader.class.getName(), instance);
@@ -60,9 +61,10 @@
     parseMapping();
   }
 
-  private void parseMapping() {    
+  private void parseMapping() {
     InputStream ssInputStream = null;
-    ssInputStream = conf.getConfResourceAsInputStream(conf.get(SolrConstants.MAPPING_FILE, "solrindex-mapping.xml"));
+    ssInputStream = conf.getConfResourceAsInputStream(conf.get(
+        SolrConstants.MAPPING_FILE, "solrindex-mapping.xml"));
 
     InputSource inputSource = new InputSource(ssInputStream);
     try {
@@ -74,48 +76,50 @@
       if (fieldList.getLength() > 0) {
         for (int i = 0; i < fieldList.getLength(); i++) {
           Element element = (Element) fieldList.item(i);
-          LOG.info("source: " + element.getAttribute("source") + " dest: " + element.getAttribute("dest"));
-          keyMap.put(element.getAttribute("source"), element.getAttribute("dest"));
+          LOG.info("source: " + element.getAttribute("source") + " dest: "
+              + element.getAttribute("dest"));
+          keyMap.put(element.getAttribute("source"),
+              element.getAttribute("dest"));
         }
       }
       NodeList copyFieldList = rootElement.getElementsByTagName("copyField");
       if (copyFieldList.getLength() > 0) {
         for (int i = 0; i < copyFieldList.getLength(); i++) {
           Element element = (Element) copyFieldList.item(i);
-          LOG.info("source: " + element.getAttribute("source") + " dest: " + element.getAttribute("dest"));
-          copyMap.put(element.getAttribute("source"), element.getAttribute("dest"));
+          LOG.info("source: " + element.getAttribute("source") + " dest: "
+              + element.getAttribute("dest"));
+          copyMap.put(element.getAttribute("source"),
+              element.getAttribute("dest"));
         }
       }
       NodeList uniqueKeyItem = rootElement.getElementsByTagName("uniqueKey");
       if (uniqueKeyItem.getLength() > 1) {
         LOG.warn("More than one unique key definitions found in solr index mapping, using default 'id'");
         uniqueKey = "id";
-      }
-      else if (uniqueKeyItem.getLength() == 0) {
+      } else if (uniqueKeyItem.getLength() == 0) {
         LOG.warn("No unique key definition found in solr index mapping using, default 'id'");
+      } else {
+        uniqueKey = uniqueKeyItem.item(0).getFirstChild().getNodeValue();
       }
-      else{
-    	  uniqueKey = uniqueKeyItem.item(0).getFirstChild().getNodeValue();
-      }
     } catch (MalformedURLException e) {
-        LOG.warn(e.toString());
+      LOG.warn(e.toString());
     } catch (SAXException e) {
-        LOG.warn(e.toString());
+      LOG.warn(e.toString());
     } catch (IOException e) {
-    	LOG.warn(e.toString());
+      LOG.warn(e.toString());
     } catch (ParserConfigurationException e) {
-    	LOG.warn(e.toString());
-    } 
+      LOG.warn(e.toString());
+    }
   }
-	  
+
   public Map<String, String> getKeyMap() {
     return keyMap;
   }
-	  
+
   public Map<String, String> getCopyMap() {
     return copyMap;
   }
-	  
+
   public String getUniqueKey() {
     return uniqueKey;
   }
@@ -128,14 +132,14 @@
   }
 
   public String mapKey(String key) throws IOException {
-    if(keyMap.containsKey(key)) {
+    if (keyMap.containsKey(key)) {
       key = (String) keyMap.get(key);
     }
     return key;
   }
 
   public String mapCopyKey(String key) throws IOException {
-    if(copyMap.containsKey(key)) {
+    if (copyMap.containsKey(key)) {
       key = (String) copyMap.get(key);
     }
     return key;
Index: src/java/org/apache/nutch/indexer/NutchDocument.java
===================================================================
--- src/java/org/apache/nutch/indexer/NutchDocument.java	(revision 1188268)
+++ src/java/org/apache/nutch/indexer/NutchDocument.java	(working copy)
@@ -33,9 +33,9 @@
 import org.apache.hadoop.io.WritableUtils;
 import org.apache.nutch.metadata.Metadata;
 
-/** A {@link NutchDocument} is the unit of indexing.*/
-public class NutchDocument
-implements Writable, Iterable<Entry<String, List<String>>> {
+/** A {@link NutchDocument} is the unit of indexing. */
+public class NutchDocument implements Writable,
+    Iterable<Entry<String, List<String>>> {
 
   public static final byte VERSION = 1;
 
Index: src/java/org/apache/nutch/indexer/IndexerOutputFormat.java
===================================================================
--- src/java/org/apache/nutch/indexer/IndexerOutputFormat.java	(revision 1188268)
+++ src/java/org/apache/nutch/indexer/IndexerOutputFormat.java	(working copy)
@@ -22,15 +22,15 @@
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 
-public class IndexerOutputFormat
-extends FileOutputFormat<String, NutchDocument> {
+public class IndexerOutputFormat extends
+    FileOutputFormat<String, NutchDocument> {
 
   @Override
   public RecordWriter<String, NutchDocument> getRecordWriter(
       TaskAttemptContext job) throws IOException, InterruptedException {
 
-    final NutchIndexWriter[] writers =
-      NutchIndexWriterFactory.getNutchIndexWriters(job.getConfiguration());
+    final NutchIndexWriter[] writers = NutchIndexWriterFactory
+        .getNutchIndexWriters(job.getConfiguration());
 
     for (final NutchIndexWriter writer : writers) {
       writer.open(job, FileOutputFormat.getUniqueFile(job, "part", ""));
@@ -47,7 +47,7 @@
 
       @Override
       public void close(TaskAttemptContext context) throws IOException,
-      InterruptedException {
+          InterruptedException {
         for (final NutchIndexWriter writer : writers) {
           writer.close();
         }
Index: src/java/org/apache/nutch/indexer/IndexerReducer.java
===================================================================
--- src/java/org/apache/nutch/indexer/IndexerReducer.java	(revision 1188268)
+++ src/java/org/apache/nutch/indexer/IndexerReducer.java	(working copy)
@@ -31,8 +31,8 @@
 import org.apache.nutch.util.TableUtil;
 import org.apache.gora.store.DataStore;
 
-public class IndexerReducer
-extends Reducer<String, WebPage, String, NutchDocument> {
+public class IndexerReducer extends
+    Reducer<String, WebPage, String, NutchDocument> {
 
   public static final Logger LOG = IndexerJob.LOG;
 
@@ -55,8 +55,8 @@
   }
 
   @Override
-  protected void reduce(String key, Iterable<WebPage> values,
-      Context context) throws IOException, InterruptedException {
+  protected void reduce(String key, Iterable<WebPage> values, Context context)
+      throws IOException, InterruptedException {
     WebPage page = values.iterator().next();
     NutchDocument doc = new NutchDocument();
 
@@ -72,12 +72,13 @@
     try {
       doc = filters.filter(doc, url, page);
     } catch (IndexingException e) {
-      LOG.warn("Error indexing "+key+": "+e);
+      LOG.warn("Error indexing " + key + ": " + e);
       return;
     }
 
     // skip documents discarded by indexing filters
-    if (doc == null) return;
+    if (doc == null)
+      return;
 
     float boost = 1.0f;
     // run scoring filters
Index: src/java/org/apache/nutch/indexer/IndexerJob.java
===================================================================
--- src/java/org/apache/nutch/indexer/IndexerJob.java	(revision 1188268)
+++ src/java/org/apache/nutch/indexer/IndexerJob.java	(working copy)
@@ -56,20 +56,21 @@
     FIELDS.add(WebPage.Field.SCORE);
     FIELDS.add(WebPage.Field.MARKERS);
   }
-  
-  public static class IndexerMapper
-      extends GoraMapper<String, WebPage, String, WebPage> {
+
+  public static class IndexerMapper extends
+      GoraMapper<String, WebPage, String, WebPage> {
     protected Utf8 batchId;
 
     @Override
     public void setup(Context context) throws IOException {
       Configuration conf = context.getConfiguration();
-      batchId = new Utf8(conf.get(GeneratorJob.BATCH_ID, Nutch.ALL_BATCH_ID_STR));
+      batchId = new Utf8(
+          conf.get(GeneratorJob.BATCH_ID, Nutch.ALL_BATCH_ID_STR));
     }
 
     @Override
     public void map(String key, WebPage page, Context context)
-    throws IOException, InterruptedException {
+        throws IOException, InterruptedException {
       ParseStatus pstatus = page.getParseStatus();
       if (pstatus == null || !ParseStatusUtils.isSuccess(pstatus)
           || pstatus.getMinorCode() == ParseStatusCodes.SUCCESS_REDIRECT) {
@@ -80,17 +81,17 @@
       if (!batchId.equals(REINDEX)) {
         if (!NutchJob.shouldProcess(mark, batchId)) {
           if (LOG.isDebugEnabled()) {
-            LOG.debug("Skipping " + TableUtil.unreverseUrl(key) + "; different batch id");
+            LOG.debug("Skipping " + TableUtil.unreverseUrl(key)
+                + "; different batch id");
           }
           return;
         }
       }
 
       context.write(key, page);
-    }    
+    }
   }
 
-
   private static Collection<WebPage.Field> getFields(Job job) {
     Configuration conf = job.getConfiguration();
     Collection<WebPage.Field> columns = new HashSet<WebPage.Field>(FIELDS);
@@ -101,8 +102,8 @@
     return columns;
   }
 
-  protected Job createIndexJob(Configuration conf, String jobName, String batchId)
-  throws IOException, ClassNotFoundException {
+  protected Job createIndexJob(Configuration conf, String jobName,
+      String batchId) throws IOException, ClassNotFoundException {
     conf.set(GeneratorJob.BATCH_ID, batchId);
     Job job = new NutchJob(conf, jobName);
     // TODO: Figure out why this needs to be here
Index: src/java/org/apache/nutch/indexer/IndexingFilter.java
===================================================================
--- src/java/org/apache/nutch/indexer/IndexingFilter.java	(revision 1188268)
+++ src/java/org/apache/nutch/indexer/IndexingFilter.java	(working copy)
@@ -22,9 +22,9 @@
 import org.apache.nutch.plugin.FieldPluggable;
 import org.apache.nutch.storage.WebPage;
 
-
-/** Extension point for indexing.  Permits one to add metadata to the indexed
- * fields.  All plugins found which implement this extension point are run
+/**
+ * Extension point for indexing. Permits one to add metadata to the indexed
+ * fields. All plugins found which implement this extension point are run
  * sequentially on the parse.
  */
 public interface IndexingFilter extends FieldPluggable, Configurable {
@@ -33,15 +33,18 @@
 
   /**
    * Adds fields or otherwise modifies the document that will be indexed for a
-   * parse. Unwanted documents can be removed from indexing by returning a null value.
-   *
-   * @param doc document instance for collecting fields
-   * @param url page url
+   * parse. Unwanted documents can be removed from indexing by returning a null
+   * value.
+   * 
+   * @param doc
+   *          document instance for collecting fields
+   * @param url
+   *          page url
    * @param page
-   * @return modified (or a new) document instance, or null (meaning the document
-   * should be discarded)
+   * @return modified (or a new) document instance, or null (meaning the
+   *         document should be discarded)
    * @throws IndexingException
    */
   NutchDocument filter(NutchDocument doc, String url, WebPage page)
-  throws IndexingException;
+      throws IndexingException;
 }
Index: src/java/org/apache/nutch/indexer/IndexingFilters.java
===================================================================
--- src/java/org/apache/nutch/indexer/IndexingFilters.java	(revision 1188268)
+++ src/java/org/apache/nutch/indexer/IndexingFilters.java	(working copy)
@@ -32,12 +32,13 @@
 import org.apache.nutch.storage.WebPage;
 import org.apache.nutch.util.ObjectCache;
 
-/** Creates and caches {@link IndexingFilter} implementing plugins.*/
+/** Creates and caches {@link IndexingFilter} implementing plugins. */
 public class IndexingFilters {
 
   public static final String INDEXINGFILTER_ORDER = "indexingfilterhbase.order";
 
-  public final static Logger LOG = LoggerFactory.getLogger(IndexingFilters.class);
+  public final static Logger LOG = LoggerFactory
+      .getLogger(IndexingFilters.class);
 
   private IndexingFilter[] indexingFilters;
 
@@ -62,8 +63,7 @@
         if (point == null)
           throw new RuntimeException(IndexingFilter.X_POINT_ID + " not found.");
         Extension[] extensions = point.getExtensions();
-        HashMap<String, IndexingFilter> filterMap =
-          new HashMap<String, IndexingFilter>();
+        HashMap<String, IndexingFilter> filterMap = new HashMap<String, IndexingFilter>();
         for (int i = 0; i < extensions.length; i++) {
           Extension extension = extensions[i];
           IndexingFilter filter = (IndexingFilter) extension
@@ -78,9 +78,8 @@
          * indeterminate order
          */
         if (orderedFilters == null) {
-          objectCache.setObject(IndexingFilter.class.getName(),
-              filterMap.values().toArray(
-                  new IndexingFilter[0]));
+          objectCache.setObject(IndexingFilter.class.getName(), filterMap
+              .values().toArray(new IndexingFilter[0]));
           /* Otherwise run the filters in the required order */
         } else {
           ArrayList<IndexingFilter> filters = new ArrayList<IndexingFilter>();
@@ -90,8 +89,8 @@
               filters.add(filter);
             }
           }
-          objectCache.setObject(IndexingFilter.class.getName(), filters
-              .toArray(new IndexingFilter[filters.size()]));
+          objectCache.setObject(IndexingFilter.class.getName(),
+              filters.toArray(new IndexingFilter[filters.size()]));
         }
       } catch (PluginRuntimeException e) {
         throw new RuntimeException(e);
@@ -100,13 +99,15 @@
           .getObject(IndexingFilter.class.getName());
     }
   }
+
   /** Run all defined filters. */
   public NutchDocument filter(NutchDocument doc, String url, WebPage page)
-  throws IndexingException {
+      throws IndexingException {
     for (IndexingFilter indexingFilter : indexingFilters) {
       doc = indexingFilter.filter(doc, url, page);
       // break the loop if an indexing filter discards the doc
-      if (doc == null) return null;
+      if (doc == null)
+        return null;
     }
 
     return doc;
Index: src/java/org/apache/nutch/indexer/NutchIndexWriterFactory.java
===================================================================
--- src/java/org/apache/nutch/indexer/NutchIndexWriterFactory.java	(revision 1188268)
+++ src/java/org/apache/nutch/indexer/NutchIndexWriterFactory.java	(working copy)
@@ -26,8 +26,8 @@
     for (int i = 0; i < classes.length; i++) {
       final String clazz = classes[i];
       try {
-        final Class<NutchIndexWriter> implClass =
-          (Class<NutchIndexWriter>) Class.forName(clazz);
+        final Class<NutchIndexWriter> implClass = (Class<NutchIndexWriter>) Class
+            .forName(clazz);
         writers[i] = implClass.newInstance();
       } catch (final Exception e) {
         throw new RuntimeException("Couldn't create " + clazz, e);
@@ -37,7 +37,7 @@
   }
 
   public static void addClassToConf(Configuration conf,
-                                    Class<? extends NutchIndexWriter> clazz) {
+      Class<? extends NutchIndexWriter> clazz) {
     final String classes = conf.get("indexer.writer.classes");
     final String newClass = clazz.getName();
 
Index: src/java/org/apache/nutch/plugin/PluginRuntimeException.java
===================================================================
--- src/java/org/apache/nutch/plugin/PluginRuntimeException.java	(revision 1188268)
+++ src/java/org/apache/nutch/plugin/PluginRuntimeException.java	(working copy)
@@ -16,6 +16,7 @@
  * limitations under the License.
  */
 package org.apache.nutch.plugin;
+
 /**
  * <code>PluginRuntimeException</code> will be thrown until a exception in the
  * plugin managemnt occurs.
Index: src/java/org/apache/nutch/plugin/CircularDependencyException.java
===================================================================
--- src/java/org/apache/nutch/plugin/CircularDependencyException.java	(revision 1188268)
+++ src/java/org/apache/nutch/plugin/CircularDependencyException.java	(working copy)
@@ -16,7 +16,6 @@
  */
 package org.apache.nutch.plugin;
 
-
 /**
  * <code>CircularDependencyException</code> will be thrown if a circular
  * dependency is detected.
Index: src/java/org/apache/nutch/plugin/Pluggable.java
===================================================================
--- src/java/org/apache/nutch/plugin/Pluggable.java	(revision 1188268)
+++ src/java/org/apache/nutch/plugin/Pluggable.java	(working copy)
@@ -17,15 +17,14 @@
 package org.apache.nutch.plugin;
 
 /**
- * Defines the capability of a class to be plugged into Nutch.
- * This is a common interface that must be implemented by all
- * Nutch Extension Points.
- *
+ * Defines the capability of a class to be plugged into Nutch. This is a common
+ * interface that must be implemented by all Nutch Extension Points.
+ * 
  * @author J&eacute;r&ocirc;me Charron
- *
+ * 
  * @see <a href="http://wiki.apache.org/nutch/AboutPlugins">About Plugins</a>
- * @see <a href="package-summary.html#package_description">
- *      plugin package description</a>
+ * @see <a href="package-summary.html#package_description"> plugin package
+ *      description</a>
  */
 public interface Pluggable {
 }
Index: src/java/org/apache/nutch/plugin/PluginManifestParser.java
===================================================================
--- src/java/org/apache/nutch/plugin/PluginManifestParser.java	(revision 1188268)
+++ src/java/org/apache/nutch/plugin/PluginManifestParser.java	(working copy)
@@ -39,8 +39,8 @@
 import org.xml.sax.SAXException;
 
 /**
- * The <code>PluginManifestParser</code> parser just parse the manifest file
- * in all plugin directories.
+ * The <code>PluginManifestParser</code> parser just parse the manifest file in
+ * all plugin directories.
  * 
  * @author joa23
  */
@@ -185,7 +185,7 @@
     PluginDescriptor pluginDescriptor = new PluginDescriptor(id, version, name,
         providerName, pluginClazz, pPath, this.conf);
     LOG.debug("plugin: id=" + id + " name=" + name + " version=" + version
-          + " provider=" + providerName + "class=" + pluginClazz);
+        + " provider=" + providerName + "class=" + pluginClazz);
     parseExtension(rootElement, pluginDescriptor);
     parseExtensionPoints(rootElement, pluginDescriptor);
     parseLibraries(rootElement, pluginDescriptor);
@@ -292,8 +292,8 @@
             if (parameters != null) {
               for (int k = 0; k < parameters.getLength(); k++) {
                 Element param = (Element) parameters.item(k);
-                extension.addAttribute(param.getAttribute(ATTR_NAME), param
-                    .getAttribute("value"));
+                extension.addAttribute(param.getAttribute(ATTR_NAME),
+                    param.getAttribute("value"));
               }
             }
             pPluginDescriptor.addExtension(extension);
Index: src/java/org/apache/nutch/plugin/ExtensionPoint.java
===================================================================
--- src/java/org/apache/nutch/plugin/ExtensionPoint.java	(revision 1188268)
+++ src/java/org/apache/nutch/plugin/ExtensionPoint.java	(working copy)
@@ -15,6 +15,7 @@
  * limitations under the License.
  */
 package org.apache.nutch.plugin;
+
 import java.util.ArrayList;
 
 /**
@@ -76,7 +77,8 @@
   /**
    * Sets the extensionPointId.
    * 
-   * @param pId extension point id
+   * @param pId
+   *          extension point id
    */
   private void setId(String pId) {
     ftId = pId;
Index: src/java/org/apache/nutch/plugin/MissingDependencyException.java
===================================================================
--- src/java/org/apache/nutch/plugin/MissingDependencyException.java	(revision 1188268)
+++ src/java/org/apache/nutch/plugin/MissingDependencyException.java	(working copy)
@@ -17,8 +17,8 @@
 package org.apache.nutch.plugin;
 
 /**
- * <code>MissingDependencyException</code> will be thrown if a plugin
- * dependency cannot be found.
+ * <code>MissingDependencyException</code> will be thrown if a plugin dependency
+ * cannot be found.
  * 
  * @author J&eacute;r&ocirc;me Charron
  */
Index: src/java/org/apache/nutch/plugin/Extension.java
===================================================================
--- src/java/org/apache/nutch/plugin/Extension.java	(revision 1188268)
+++ src/java/org/apache/nutch/plugin/Extension.java	(working copy)
@@ -98,8 +98,10 @@
    * Adds a attribute and is only used until model creation at plugin system
    * start up.
    * 
-   * @param pKey a key
-   * @param pValue a value
+   * @param pKey
+   *          a key
+   * @param pValue
+   *          a value
    */
   public void addAttribute(String pKey, String pValue) {
     fAttributes.put(pKey, pValue);
@@ -109,7 +111,8 @@
    * Sets the Class that implement the concret extension and is only used until
    * model creation at system start up.
    * 
-   * @param extensionClazz The extensionClasname to set
+   * @param extensionClazz
+   *          The extensionClasname to set
    */
   public void setClazz(String extensionClazz) {
     fClazz = extensionClazz;
@@ -119,7 +122,8 @@
    * Sets the unique extension Id and is only used until model creation at
    * system start up.
    * 
-   * @param extensionID The extensionID to set
+   * @param extensionID
+   *          The extensionID to set
    */
   public void setId(String extensionID) {
     fId = extensionID;
Index: src/java/org/apache/nutch/plugin/PluginDescriptor.java
===================================================================
--- src/java/org/apache/nutch/plugin/PluginDescriptor.java	(revision 1188268)
+++ src/java/org/apache/nutch/plugin/PluginDescriptor.java	(working copy)
@@ -30,12 +30,11 @@
 import org.apache.hadoop.conf.Configuration;
 
 /**
- * The <code>PluginDescriptor</code> provide access to all meta information of
- * a nutch-plugin, as well to the internationalizable resources and the plugin
- * own classloader. There are meta information about <code>Plugin</code>,
- * <code>ExtensionPoint</code> and <code>Extension</code>. To provide
- * access to the meta data of a plugin via a descriptor allow a lazy loading
- * mechanism.
+ * The <code>PluginDescriptor</code> provide access to all meta information of a
+ * nutch-plugin, as well to the internationalizable resources and the plugin own
+ * classloader. There are meta information about <code>Plugin</code>,
+ * <code>ExtensionPoint</code> and <code>Extension</code>. To provide access to
+ * the meta data of a plugin via a descriptor allow a lazy loading mechanism.
  * 
  * @author joa23
  */
@@ -53,7 +52,8 @@
   private ArrayList<URL> fNotExportedLibs = new ArrayList<URL>();
   private ArrayList<Extension> fExtensions = new ArrayList<Extension>();
   private PluginClassLoader fClassLoader;
-  public static final Logger LOG = LoggerFactory.getLogger(PluginDescriptor.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(PluginDescriptor.class);
   private Configuration fConf;
 
   /**
@@ -206,7 +206,8 @@
   /**
    * Adds a dependency
    * 
-   * @param pId id of the dependent plugin
+   * @param pId
+   *          id of the dependent plugin
    */
   public void addDependency(String pId) {
     fDependencies.add(pId);
@@ -285,8 +286,8 @@
       LOG.debug(getPluginId() + " " + e.toString());
     }
     URL[] urls = arrayList.toArray(new URL[arrayList.size()]);
-    fClassLoader = new PluginClassLoader(urls, PluginDescriptor.class
-        .getClassLoader());
+    fClassLoader = new PluginClassLoader(urls,
+        PluginDescriptor.class.getClassLoader());
     return fClassLoader;
   }
 
@@ -308,7 +309,7 @@
     for (String id : pDescriptor.getDependencies()) {
       PluginDescriptor descriptor = PluginRepository.get(fConf)
           .getPluginDescriptor(id);
-      for (URL url: descriptor.getExportedLibUrls()) {
+      for (URL url : descriptor.getExportedLibUrls()) {
         pLibs.add(url);
       }
       collectLibs(pLibs, descriptor);
Index: src/java/org/apache/nutch/plugin/Plugin.java
===================================================================
--- src/java/org/apache/nutch/plugin/Plugin.java	(revision 1188268)
+++ src/java/org/apache/nutch/plugin/Plugin.java	(working copy)
@@ -33,8 +33,8 @@
  * The <code>Plugin</code> will be startuped and shutdown by the nutch plugin
  * management system.
  * 
- * A possible usecase of the <code>Plugin</code> implementation is to create
- * or close a database connection.
+ * A possible usecase of the <code>Plugin</code> implementation is to create or
+ * close a database connection.
  * 
  * @author joa23
  */
@@ -81,7 +81,8 @@
   }
 
   /**
-   * @param descriptor The descriptor to set
+   * @param descriptor
+   *          The descriptor to set
    */
   private void setDescriptor(PluginDescriptor descriptor) {
     fDescriptor = descriptor;
Index: src/java/org/apache/nutch/plugin/PluginRepository.java
===================================================================
--- src/java/org/apache/nutch/plugin/PluginRepository.java	(revision 1188268)
+++ src/java/org/apache/nutch/plugin/PluginRepository.java	(working copy)
@@ -55,7 +55,8 @@
 
   private Configuration conf;
 
-  public static final Logger LOG = LoggerFactory.getLogger(PluginRepository.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(PluginRepository.class);
 
   /**
    * @throws PluginRuntimeException
@@ -80,7 +81,7 @@
     try {
       installExtensions(fRegisteredPlugins);
     } catch (PluginRuntimeException e) {
-        LOG.error(e.toString());
+      LOG.error(e.toString());
       throw new RuntimeException(e.getMessage());
     }
     displayStatus();
@@ -107,8 +108,8 @@
       return;
     }
 
-    for (PluginDescriptor plugin: plugins) {
-      for(ExtensionPoint point:plugin.getExtenstionPoints()) {
+    for (PluginDescriptor plugin : plugins) {
+      for (ExtensionPoint point : plugin.getExtenstionPoints()) {
         String xpId = point.getId();
         LOG.debug("Adding extension point " + xpId);
         fExtensionPoints.put(xpId, point);
@@ -123,7 +124,7 @@
       throws PluginRuntimeException {
 
     for (PluginDescriptor descriptor : pRegisteredPlugins) {
-      for(Extension extension:descriptor.getExtensions()) {
+      for (Extension extension : descriptor.getExtensions()) {
         String xpId = extension.getTargetPoint();
         ExtensionPoint point = getExtensionPoint(xpId);
         if (point == null) {
@@ -151,7 +152,7 @@
     branch.put(plugin.getPluginId(), plugin);
 
     // Otherwise, checks each dependency
-    for(String id:plugin.getDependencies()) {
+    for (String id : plugin.getDependencies()) {
       PluginDescriptor dependency = plugins.get(id);
       if (dependency == null) {
         throw new MissingDependencyException("Missing dependency " + id
Index: src/plugin/index-anchor/src/java/org/apache/nutch/indexer/anchor/AnchorIndexingFilter.java
===================================================================
--- src/plugin/index-anchor/src/java/org/apache/nutch/indexer/anchor/AnchorIndexingFilter.java	(revision 1188268)
+++ src/plugin/index-anchor/src/java/org/apache/nutch/indexer/anchor/AnchorIndexingFilter.java	(working copy)
@@ -36,7 +36,8 @@
  */
 public class AnchorIndexingFilter implements IndexingFilter {
 
-  public static final Logger LOG = LoggerFactory.getLogger(AnchorIndexingFilter.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(AnchorIndexingFilter.class);
   private Configuration conf;
   private boolean deduplicate = false;
 
@@ -65,7 +66,7 @@
       throws IndexingException {
 
     // https://issues.apache.org/jira/browse/NUTCH-1037
-    WeakHashMap<String,Integer> map = new WeakHashMap<String,Integer>();
+    WeakHashMap<String, Integer> map = new WeakHashMap<String, Integer>();
 
     for (Entry<Utf8, Utf8> e : page.getInlinks().entrySet()) {
       String anchor = TableUtil.toString(e.getValue());
Index: src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagIndexingFilter.java
===================================================================
--- src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagIndexingFilter.java	(revision 1188268)
+++ src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagIndexingFilter.java	(working copy)
@@ -39,41 +39,41 @@
  */
 public class RelTagIndexingFilter implements IndexingFilter {
 
-	private Configuration conf;
+  private Configuration conf;
 
-	private static final Collection<WebPage.Field> FIELDS = new HashSet<WebPage.Field>();
+  private static final Collection<WebPage.Field> FIELDS = new HashSet<WebPage.Field>();
 
-	static {
-		FIELDS.add(WebPage.Field.BASE_URL);
-		FIELDS.add(WebPage.Field.METADATA);
-	}
+  static {
+    FIELDS.add(WebPage.Field.BASE_URL);
+    FIELDS.add(WebPage.Field.METADATA);
+  }
 
-	@Override
-	public Collection<Field> getFields() {
-		return FIELDS;
-	}
+  @Override
+  public Collection<Field> getFields() {
+    return FIELDS;
+  }
 
-	public void setConf(Configuration conf) {
-		this.conf = conf;
-	}
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+  }
 
-	public Configuration getConf() {
-		return this.conf;
-	}
+  public Configuration getConf() {
+    return this.conf;
+  }
 
-	@Override
-	public NutchDocument filter(NutchDocument doc, String url, WebPage page)
-			throws IndexingException {
-		// Check if some Rel-Tags found, possibly put there by RelTagParser
-		ByteBuffer bb = page.getFromMetadata(new Utf8(RelTagParser.REL_TAG));
-		
-		if (bb != null) {
-			String[] tags = new String(bb.array()).split("\t");
-			for (int i = 0; i < tags.length; i++) {
-				doc.add("tag", tags[i]);
-			}
-		}
+  @Override
+  public NutchDocument filter(NutchDocument doc, String url, WebPage page)
+      throws IndexingException {
+    // Check if some Rel-Tags found, possibly put there by RelTagParser
+    ByteBuffer bb = page.getFromMetadata(new Utf8(RelTagParser.REL_TAG));
 
-		return doc;
-	}
+    if (bb != null) {
+      String[] tags = new String(bb.array()).split("\t");
+      for (int i = 0; i < tags.length; i++) {
+        doc.add("tag", tags[i]);
+      }
+    }
+
+    return doc;
+  }
 }
Index: src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagParser.java
===================================================================
--- src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagParser.java	(revision 1188268)
+++ src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagParser.java	(working copy)
@@ -50,110 +50,110 @@
  */
 public class RelTagParser implements ParseFilter {
 
-	public final static Logger LOG = LoggerFactory.getLogger(RelTagParser.class);
+  public final static Logger LOG = LoggerFactory.getLogger(RelTagParser.class);
 
-	public final static String REL_TAG = "Rel-Tag";
+  public final static String REL_TAG = "Rel-Tag";
 
-	private Configuration conf = null;
+  private Configuration conf = null;
 
-	private static class Parser {
+  private static class Parser {
 
-		Set<String> tags = null;
+    Set<String> tags = null;
 
-		Parser(Node node) {
-			tags = new TreeSet<String>();
-			parse(node);
-		}
+    Parser(Node node) {
+      tags = new TreeSet<String>();
+      parse(node);
+    }
 
-		Set<String> getRelTags() {
-			return tags;
-		}
+    Set<String> getRelTags() {
+      return tags;
+    }
 
-		void parse(Node node) {
+    void parse(Node node) {
 
-			if (node.getNodeType() == Node.ELEMENT_NODE) {
-				// Look for <a> tag
-				if ("a".equalsIgnoreCase(node.getNodeName())) {
-					NamedNodeMap attrs = node.getAttributes();
-					Node hrefNode = attrs.getNamedItem("href");
-					// Checks that it contains a href attribute
-					if (hrefNode != null) {
-						Node relNode = attrs.getNamedItem("rel");
-						// Checks that it contains a rel attribute too
-						if (relNode != null) {
-							// Finaly checks that rel=tag
-							if ("tag".equalsIgnoreCase(relNode.getNodeValue())) {
-								String tag = parseTag(hrefNode.getNodeValue());
-								if (!StringUtil.isEmpty(tag)) {
-									tags.add(tag);
-								}
-							}
-						}
-					}
-				}
-			}
+      if (node.getNodeType() == Node.ELEMENT_NODE) {
+        // Look for <a> tag
+        if ("a".equalsIgnoreCase(node.getNodeName())) {
+          NamedNodeMap attrs = node.getAttributes();
+          Node hrefNode = attrs.getNamedItem("href");
+          // Checks that it contains a href attribute
+          if (hrefNode != null) {
+            Node relNode = attrs.getNamedItem("rel");
+            // Checks that it contains a rel attribute too
+            if (relNode != null) {
+              // Finaly checks that rel=tag
+              if ("tag".equalsIgnoreCase(relNode.getNodeValue())) {
+                String tag = parseTag(hrefNode.getNodeValue());
+                if (!StringUtil.isEmpty(tag)) {
+                  tags.add(tag);
+                }
+              }
+            }
+          }
+        }
+      }
 
-			// Recurse
-			NodeList children = node.getChildNodes();
-			for (int i = 0; children != null && i < children.getLength(); i++) {
-				parse(children.item(i));
-			}
-		}
+      // Recurse
+      NodeList children = node.getChildNodes();
+      for (int i = 0; children != null && i < children.getLength(); i++) {
+        parse(children.item(i));
+      }
+    }
 
-		private final static String parseTag(String url) {
-			String tag = null;
-			try {
-				URL u = new URL(url);
-				String path = u.getPath();
-				tag = URLDecoder.decode(
-						path.substring(path.lastIndexOf('/') + 1), "UTF-8");
-			} catch (Exception e) {
-				// Malformed tag...
-				tag = null;
-			}
-			return tag;
-		}
+    private final static String parseTag(String url) {
+      String tag = null;
+      try {
+        URL u = new URL(url);
+        String path = u.getPath();
+        tag = URLDecoder.decode(path.substring(path.lastIndexOf('/') + 1),
+            "UTF-8");
+      } catch (Exception e) {
+        // Malformed tag...
+        tag = null;
+      }
+      return tag;
+    }
 
-	}
+  }
 
-	public void setConf(Configuration conf) {
-		this.conf = conf;
-	}
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+  }
 
-	public Configuration getConf() {
-		return this.conf;
-	}
+  public Configuration getConf() {
+    return this.conf;
+  }
 
-	private static final Collection<WebPage.Field> FIELDS = new HashSet<WebPage.Field>();
+  private static final Collection<WebPage.Field> FIELDS = new HashSet<WebPage.Field>();
 
-	static {
-		FIELDS.add(WebPage.Field.BASE_URL);
-		FIELDS.add(WebPage.Field.METADATA);
-	}
-	
-	@Override
-	public Collection<Field> getFields() {
-		return FIELDS;
-	}
+  static {
+    FIELDS.add(WebPage.Field.BASE_URL);
+    FIELDS.add(WebPage.Field.METADATA);
+  }
 
-	@Override
-	/**
-	 * Scan the HTML document looking at possible rel-tags
-	 */
-	public Parse filter(String url, WebPage page, Parse parse,
-			HTMLMetaTags metaTags, DocumentFragment doc) {
-		// Trying to find the document's rel-tags
-		Parser parser = new Parser(doc);
-		Set<String> tags = parser.getRelTags();
-		// can't store multiple values in page metadata -> separate by tabs
-		StringBuffer sb = new StringBuffer();
-		Iterator<String> iter = tags.iterator();
-		while (iter.hasNext()) {
-			sb.append(iter.next());
-			sb.append("\t");
-		}
-		ByteBuffer bb = ByteBuffer.wrap(sb.toString().getBytes());
-		page.putToMetadata(new Utf8(REL_TAG), bb);
-		return parse;
-	}
+  @Override
+  public Collection<Field> getFields() {
+    return FIELDS;
+  }
+
+  @Override
+  /**
+   * Scan the HTML document looking at possible rel-tags
+   */
+  public Parse filter(String url, WebPage page, Parse parse,
+      HTMLMetaTags metaTags, DocumentFragment doc) {
+    // Trying to find the document's rel-tags
+    Parser parser = new Parser(doc);
+    Set<String> tags = parser.getRelTags();
+    // can't store multiple values in page metadata -> separate by tabs
+    StringBuffer sb = new StringBuffer();
+    Iterator<String> iter = tags.iterator();
+    while (iter.hasNext()) {
+      sb.append(iter.next());
+      sb.append("\t");
+    }
+    ByteBuffer bb = ByteBuffer.wrap(sb.toString().getBytes());
+    page.putToMetadata(new Utf8(REL_TAG), bb);
+    return parse;
+  }
 }
Index: src/plugin/tld/src/java/org/apache/nutch/scoring/tld/TLDScoringFilter.java
===================================================================
--- src/plugin/tld/src/java/org/apache/nutch/scoring/tld/TLDScoringFilter.java	(revision 1188268)
+++ src/plugin/tld/src/java/org/apache/nutch/scoring/tld/TLDScoringFilter.java	(working copy)
@@ -38,70 +38,70 @@
  */
 public class TLDScoringFilter implements ScoringFilter {
 
-	private Configuration conf;
-	private DomainSuffixes tldEntries;
+  private Configuration conf;
+  private DomainSuffixes tldEntries;
 
-	private final static Set<WebPage.Field> FIELDS = new HashSet<WebPage.Field>();
+  private final static Set<WebPage.Field> FIELDS = new HashSet<WebPage.Field>();
 
-	public TLDScoringFilter() {
-		tldEntries = DomainSuffixes.getInstance();
-	}
+  public TLDScoringFilter() {
+    tldEntries = DomainSuffixes.getInstance();
+  }
 
-	public Configuration getConf() {
-		return conf;
-	}
+  public Configuration getConf() {
+    return conf;
+  }
 
-	public void setConf(Configuration conf) {
-		this.conf = conf;
-	}
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+  }
 
-	@Override
-	public Collection<WebPage.Field> getFields() {
-		return FIELDS;
-	}
+  @Override
+  public Collection<WebPage.Field> getFields() {
+    return FIELDS;
+  }
 
-	@Override
-	public void injectedScore(String url, WebPage page)
-			throws ScoringFilterException {
-	}
+  @Override
+  public void injectedScore(String url, WebPage page)
+      throws ScoringFilterException {
+  }
 
-	@Override
-	public void initialScore(String url, WebPage page)
-			throws ScoringFilterException {
+  @Override
+  public void initialScore(String url, WebPage page)
+      throws ScoringFilterException {
 
-	}
+  }
 
-	@Override
-	public float generatorSortValue(String url, WebPage page, float initSort)
-			throws ScoringFilterException {
-		return initSort;
-	}
+  @Override
+  public float generatorSortValue(String url, WebPage page, float initSort)
+      throws ScoringFilterException {
+    return initSort;
+  }
 
-	@Override
-	public void distributeScoreToOutlinks(String fromUrl, WebPage page,
-			Collection<ScoreDatum> scoreData, int allCount)
-			throws ScoringFilterException {
-	}
+  @Override
+  public void distributeScoreToOutlinks(String fromUrl, WebPage page,
+      Collection<ScoreDatum> scoreData, int allCount)
+      throws ScoringFilterException {
+  }
 
-	@Override
-	public void updateScore(String url, WebPage page,
-			List<ScoreDatum> inlinkedScoreData) throws ScoringFilterException {
-	}
+  @Override
+  public void updateScore(String url, WebPage page,
+      List<ScoreDatum> inlinkedScoreData) throws ScoringFilterException {
+  }
 
-	@Override
-	public float indexerScore(String url, NutchDocument doc, WebPage page,
-			float initScore) throws ScoringFilterException {
-		List<String> tlds = doc.getFieldValues("tld");
-		float boost = 1.0f;
+  @Override
+  public float indexerScore(String url, NutchDocument doc, WebPage page,
+      float initScore) throws ScoringFilterException {
+    List<String> tlds = doc.getFieldValues("tld");
+    float boost = 1.0f;
 
-		if (tlds != null) {
-			for (String tld : tlds) {
-				DomainSuffix entry = tldEntries.get(tld);
-				if (entry != null)
-					boost *= entry.getBoost();
-			}
-		}
-		return initScore * boost;
-	}
+    if (tlds != null) {
+      for (String tld : tlds) {
+        DomainSuffix entry = tldEntries.get(tld);
+        if (entry != null)
+          boost *= entry.getBoost();
+      }
+    }
+    return initScore * boost;
+  }
 
 }
Index: src/plugin/tld/src/java/org/apache/nutch/indexer/tld/TLDIndexingFilter.java
===================================================================
--- src/plugin/tld/src/java/org/apache/nutch/indexer/tld/TLDIndexingFilter.java	(revision 1188268)
+++ src/plugin/tld/src/java/org/apache/nutch/indexer/tld/TLDIndexingFilter.java	(working copy)
@@ -38,12 +38,13 @@
  * @author Enis Soztutar &lt;enis.soz.nutch@gmail.com&gt;
  */
 public class TLDIndexingFilter implements IndexingFilter {
-  public static final Logger LOG = LoggerFactory.getLogger(TLDIndexingFilter.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(TLDIndexingFilter.class);
 
   private Configuration conf;
 
   private static final Collection<Field> fields = new ArrayList<Field>();
-  
+
   @Override
   public NutchDocument filter(NutchDocument doc, String url, WebPage page)
       throws IndexingException {
@@ -52,7 +53,7 @@
       DomainSuffix d = URLUtil.getDomainSuffix(_url);
       doc.add("tld", d.getDomain());
     } catch (Exception ex) {
-      LOG.warn("Exception in TLDIndexingFilter",ex);
+      LOG.warn("Exception in TLDIndexingFilter", ex);
     }
 
     return doc;
Index: src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java
===================================================================
--- src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java	(revision 1188268)
+++ src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java	(working copy)
@@ -55,11 +55,10 @@
 import org.w3c.dom.NodeList;
 
 /**
- * This class is a heuristic link extractor for JavaScript files and
- * code snippets. The general idea of a two-pass regex matching comes from
- * Heritrix. Parts of the code come from OutlinkExtractor.java
- * by Stephan Strittmatter.
- *
+ * This class is a heuristic link extractor for JavaScript files and code
+ * snippets. The general idea of a two-pass regex matching comes from Heritrix.
+ * Parts of the code come from OutlinkExtractor.java by Stephan Strittmatter.
+ * 
  * @author Andrzej Bialecki &lt;ab@getopt.org&gt;
  */
 public class JSParseFilter implements ParseFilter, Parser {
@@ -88,27 +87,33 @@
     return parse;
   }
 
-  private void walk(Node n, Parse parse, HTMLMetaTags metaTags, String base, List<Outlink> outlinks) {
+  private void walk(Node n, Parse parse, HTMLMetaTags metaTags, String base,
+      List<Outlink> outlinks) {
     if (n instanceof Element) {
       String name = n.getNodeName();
       if (name.equalsIgnoreCase("script")) {
         @SuppressWarnings("unused")
         String lang = null;
         Node lNode = n.getAttributes().getNamedItem("language");
-        if (lNode == null) lang = "javascript";
-        else lang = lNode.getNodeValue();
+        if (lNode == null)
+          lang = "javascript";
+        else
+          lang = lNode.getNodeValue();
         StringBuffer script = new StringBuffer();
         NodeList nn = n.getChildNodes();
         if (nn.getLength() > 0) {
           for (int i = 0; i < nn.getLength(); i++) {
-            if (i > 0) script.append('\n');
+            if (i > 0)
+              script.append('\n');
             script.append(nn.item(i).getNodeValue());
           }
           // if (LOG.isInfoEnabled()) {
-          //   LOG.info("script: language=" + lang + ", text: " + script.toString());
+          // LOG.info("script: language=" + lang + ", text: " +
+          // script.toString());
           // }
           Outlink[] links = getJSLinks(script.toString(), "", base);
-          if (links != null && links.length > 0) outlinks.addAll(Arrays.asList(links));
+          if (links != null && links.length > 0)
+            outlinks.addAll(Arrays.asList(links));
           // no other children of interest here, go one level up.
           return;
         }
@@ -120,7 +125,8 @@
           // Window: onload,onunload
           // Form: onchange,onsubmit,onreset,onselect,onblur,onfocus
           // Keyboard: onkeydown,onkeypress,onkeyup
-          // Mouse: onclick,ondbclick,onmousedown,onmouseout,onmousover,onmouseup
+          // Mouse:
+          // onclick,ondbclick,onmousedown,onmouseout,onmousover,onmouseup
           Node anode = attrs.item(i);
           Outlink[] links = null;
           if (anode.getNodeName().startsWith("on")) {
@@ -131,7 +137,8 @@
               links = getJSLinks(val, "", base);
             }
           }
-          if (links != null && links.length > 0) outlinks.addAll(Arrays.asList(links));
+          if (links != null && links.length > 0)
+            outlinks.addAll(Arrays.asList(links));
         }
       }
     }
@@ -144,35 +151,41 @@
   @Override
   public Parse getParse(String url, WebPage page) {
     String type = TableUtil.toString(page.getContentType());
-    if (type != null && !type.trim().equals("") && !type.toLowerCase().startsWith("application/x-javascript"))
-      return ParseStatusUtils.getEmptyParse(ParseStatusCodes.FAILED_INVALID_FORMAT,
-          "Content not JavaScript: '" + type + "'", getConf());
+    if (type != null && !type.trim().equals("")
+        && !type.toLowerCase().startsWith("application/x-javascript"))
+      return ParseStatusUtils.getEmptyParse(
+          ParseStatusCodes.FAILED_INVALID_FORMAT, "Content not JavaScript: '"
+              + type + "'", getConf());
     String script = new String(page.getContent().array());
     Outlink[] outlinks = getJSLinks(script, "", url);
-    if (outlinks == null) outlinks = new Outlink[0];
+    if (outlinks == null)
+      outlinks = new Outlink[0];
     // Title? use the first line of the script...
     String title;
     int idx = script.indexOf('\n');
     if (idx != -1) {
-      if (idx > MAX_TITLE_LEN) idx = MAX_TITLE_LEN;
+      if (idx > MAX_TITLE_LEN)
+        idx = MAX_TITLE_LEN;
       title = script.substring(0, idx);
     } else {
       idx = Math.min(MAX_TITLE_LEN, script.length());
       title = script.substring(0, idx);
     }
-    Parse parse =
-      new Parse(script, title, outlinks, ParseStatusUtils.STATUS_SUCCESS);
+    Parse parse = new Parse(script, title, outlinks,
+        ParseStatusUtils.STATUS_SUCCESS);
     return parse;
   }
 
   private static final String STRING_PATTERN = "(\\\\*(?:\"|\'))([^\\s\"\']+?)(?:\\1)";
   // A simple pattern. This allows also invalid URL characters.
   private static final String URI_PATTERN = "(^|\\s*?)/?\\S+?[/\\.]\\S+($|\\s*)";
+
   // Alternative pattern, which limits valid url characters.
-  //private static final String URI_PATTERN = "(^|\\s*?)[A-Za-z0-9/](([A-Za-z0-9$_.+!*,;/?:@&~=-])|%[A-Fa-f0-9]{2})+[/.](([A-Za-z0-9$_.+!*,;/?:@&~=-])|%[A-Fa-f0-9]{2})+(#([a-zA-Z0-9][a-zA-Z0-9$_.+!*,;/?:@&~=%-]*))?($|\\s*)";
+  // private static final String URI_PATTERN =
+  // "(^|\\s*?)[A-Za-z0-9/](([A-Za-z0-9$_.+!*,;/?:@&~=-])|%[A-Fa-f0-9]{2})+[/.](([A-Za-z0-9$_.+!*,;/?:@&~=-])|%[A-Fa-f0-9]{2})+(#([a-zA-Z0-9][a-zA-Z0-9$_.+!*,;/?:@&~=%-]*))?($|\\s*)";
 
   /**
-   *  This method extracts URLs from literals embedded in JavaScript.
+   * This method extracts URLs from literals embedded in JavaScript.
    */
   private Outlink[] getJSLinks(String plainText, String anchor, String base) {
 
@@ -182,17 +195,19 @@
     try {
       baseURL = new URL(base);
     } catch (Exception e) {
-      if (LOG.isErrorEnabled()) { LOG.error("getJSLinks", e); }
+      if (LOG.isErrorEnabled()) {
+        LOG.error("getJSLinks", e);
+      }
     }
 
     try {
       final PatternCompiler cp = new Perl5Compiler();
       final Pattern pattern = cp.compile(STRING_PATTERN,
           Perl5Compiler.CASE_INSENSITIVE_MASK | Perl5Compiler.READ_ONLY_MASK
-          | Perl5Compiler.MULTILINE_MASK);
+              | Perl5Compiler.MULTILINE_MASK);
       final Pattern pattern1 = cp.compile(URI_PATTERN,
           Perl5Compiler.CASE_INSENSITIVE_MASK | Perl5Compiler.READ_ONLY_MASK
-          | Perl5Compiler.MULTILINE_MASK);
+              | Perl5Compiler.MULTILINE_MASK);
       final PatternMatcher matcher = new Perl5Matcher();
 
       final PatternMatcher matcher1 = new Perl5Matcher();
@@ -201,26 +216,27 @@
       MatchResult result;
       String url;
 
-      //loop the matches
+      // loop the matches
       while (matcher.contains(input, pattern)) {
         result = matcher.getMatch();
         url = result.group(2);
         PatternMatcherInput input1 = new PatternMatcherInput(url);
         if (!matcher1.matches(input1, pattern1)) {
-          //if (LOG.isTraceEnabled()) { LOG.trace(" - invalid '" + url + "'"); }
+          // if (LOG.isTraceEnabled()) { LOG.trace(" - invalid '" + url + "'");
+          // }
           continue;
         }
         if (url.startsWith("www.")) {
           url = "http://" + url;
         } else {
-          // See if candidate URL is parseable.  If not, pass and move on to
+          // See if candidate URL is parseable. If not, pass and move on to
           // the next match.
           try {
             url = new URL(baseURL, url).toString();
           } catch (MalformedURLException ex) {
             if (LOG.isTraceEnabled()) {
-              LOG.trace(" - failed URL parse '" + url + "' and baseURL '" +
-                  baseURL + "'", ex);
+              LOG.trace(" - failed URL parse '" + url + "' and baseURL '"
+                  + baseURL + "'", ex);
             }
             continue;
           }
@@ -234,12 +250,14 @@
     } catch (Exception ex) {
       // if it is a malformed URL we just throw it away and continue with
       // extraction.
-      if (LOG.isErrorEnabled()) { LOG.error("getJSLinks", ex); }
+      if (LOG.isErrorEnabled()) {
+        LOG.error("getJSLinks", ex);
+      }
     }
 
     final Outlink[] retval;
 
-    //create array of the Outlinks
+    // create array of the Outlinks
     if (outlinks != null && outlinks.size() > 0) {
       retval = outlinks.toArray(new Outlink[0]);
     } else {
@@ -258,7 +276,8 @@
     BufferedReader br = new BufferedReader(new InputStreamReader(in, "UTF-8"));
     StringBuffer sb = new StringBuffer();
     String line = null;
-    while ((line = br.readLine()) != null) sb.append(line + "\n");
+    while ((line = br.readLine()) != null)
+      sb.append(line + "\n");
     JSParseFilter parseFilter = new JSParseFilter();
     parseFilter.setConf(NutchConfiguration.create());
     Outlink[] links = parseFilter.getJSLinks(sb.toString(), "", args[1]);
Index: src/plugin/urlnormalizer-basic/src/test/org/apache/nutch/net/urlnormalizer/basic/TestBasicURLNormalizer.java
===================================================================
--- src/plugin/urlnormalizer-basic/src/test/org/apache/nutch/net/urlnormalizer/basic/TestBasicURLNormalizer.java	(revision 1188268)
+++ src/plugin/urlnormalizer-basic/src/test/org/apache/nutch/net/urlnormalizer/basic/TestBasicURLNormalizer.java	(working copy)
@@ -27,7 +27,7 @@
 public class TestBasicURLNormalizer extends TestCase {
   private BasicURLNormalizer normalizer;
   private Configuration conf;
-  
+
   public TestBasicURLNormalizer(String name) {
     super(name);
     normalizer = new BasicURLNormalizer();
@@ -56,64 +56,51 @@
     // check that references are removed
     normalizeTest("http://foo.com/foo.html#ref", "http://foo.com/foo.html");
 
-    //     // check that encoding is normalized
-    //     normalizeTest("http://foo.com/%66oo.html", "http://foo.com/foo.html");
+    // // check that encoding is normalized
+    // normalizeTest("http://foo.com/%66oo.html", "http://foo.com/foo.html");
 
     // check that unnecessary "../" are removed
 
-    normalizeTest("http://foo.com/aa/./foo.html",
-                  "http://foo.com/aa/foo.html" );
-    normalizeTest("http://foo.com/aa/../",
-                  "http://foo.com/" );
-    normalizeTest("http://foo.com/aa/bb/../",
-                  "http://foo.com/aa/");
-    normalizeTest("http://foo.com/aa/..",
-                  "http://foo.com/aa/..");
+    normalizeTest("http://foo.com/aa/./foo.html", "http://foo.com/aa/foo.html");
+    normalizeTest("http://foo.com/aa/../", "http://foo.com/");
+    normalizeTest("http://foo.com/aa/bb/../", "http://foo.com/aa/");
+    normalizeTest("http://foo.com/aa/..", "http://foo.com/aa/..");
     normalizeTest("http://foo.com/aa/bb/cc/../../foo.html",
-                  "http://foo.com/aa/foo.html");
+        "http://foo.com/aa/foo.html");
     normalizeTest("http://foo.com/aa/bb/../cc/dd/../ee/foo.html",
-                  "http://foo.com/aa/cc/ee/foo.html");
-    normalizeTest("http://foo.com/../foo.html",
-                  "http://foo.com/foo.html" );
-    normalizeTest("http://foo.com/../../foo.html",
-                  "http://foo.com/foo.html" );
-    normalizeTest("http://foo.com/../aa/../foo.html",
-                  "http://foo.com/foo.html" );
-    normalizeTest("http://foo.com/aa/../../foo.html",
-                  "http://foo.com/foo.html" );
+        "http://foo.com/aa/cc/ee/foo.html");
+    normalizeTest("http://foo.com/../foo.html", "http://foo.com/foo.html");
+    normalizeTest("http://foo.com/../../foo.html", "http://foo.com/foo.html");
+    normalizeTest("http://foo.com/../aa/../foo.html", "http://foo.com/foo.html");
+    normalizeTest("http://foo.com/aa/../../foo.html", "http://foo.com/foo.html");
     normalizeTest("http://foo.com/aa/../bb/../foo.html/../../",
-                  "http://foo.com/" );
-    normalizeTest("http://foo.com/../aa/foo.html",
-                  "http://foo.com/aa/foo.html" );
-    normalizeTest("http://foo.com/../aa/../foo.html",
-                  "http://foo.com/foo.html" );
+        "http://foo.com/");
+    normalizeTest("http://foo.com/../aa/foo.html", "http://foo.com/aa/foo.html");
+    normalizeTest("http://foo.com/../aa/../foo.html", "http://foo.com/foo.html");
     normalizeTest("http://foo.com/a..a/foo.html",
-                  "http://foo.com/a..a/foo.html" );
-    normalizeTest("http://foo.com/a..a/../foo.html",
-                  "http://foo.com/foo.html" );
+        "http://foo.com/a..a/foo.html");
+    normalizeTest("http://foo.com/a..a/../foo.html", "http://foo.com/foo.html");
     normalizeTest("http://foo.com/foo.foo/../foo.html",
-                  "http://foo.com/foo.html" );
+        "http://foo.com/foo.html");
     normalizeTest("http://foo.com//aa/bb/foo.html",
-                  "http://foo.com/aa/bb/foo.html" );
+        "http://foo.com/aa/bb/foo.html");
     normalizeTest("http://foo.com/aa//bb/foo.html",
-                  "http://foo.com/aa/bb/foo.html" );
+        "http://foo.com/aa/bb/foo.html");
     normalizeTest("http://foo.com/aa/bb//foo.html",
-                  "http://foo.com/aa/bb/foo.html" );
+        "http://foo.com/aa/bb/foo.html");
     normalizeTest("http://foo.com//aa//bb//foo.html",
-                  "http://foo.com/aa/bb/foo.html" );
+        "http://foo.com/aa/bb/foo.html");
     normalizeTest("http://foo.com////aa////bb////foo.html",
-                  "http://foo.com/aa/bb/foo.html" );
+        "http://foo.com/aa/bb/foo.html");
   }
 
   private void normalizeTest(String weird, String normal) throws Exception {
-    assertEquals(normal, normalizer.normalize(weird, URLNormalizers.SCOPE_DEFAULT));
+    assertEquals(normal,
+        normalizer.normalize(weird, URLNormalizers.SCOPE_DEFAULT));
   }
 
   public static void main(String[] args) throws Exception {
     new TestBasicURLNormalizer("test").testNormalizer();
   }
 
-
-
-
 }
\ No newline at end of file
Index: src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/BasicURLNormalizer.java
===================================================================
--- src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/BasicURLNormalizer.java	(revision 1188268)
+++ src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/BasicURLNormalizer.java	(working copy)
@@ -33,181 +33,174 @@
 
 /** Converts URLs to a normal form . */
 public class BasicURLNormalizer implements URLNormalizer {
-    public static final Logger LOG = LoggerFactory.getLogger(BasicURLNormalizer.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(BasicURLNormalizer.class);
 
-    private Perl5Compiler compiler = new Perl5Compiler();
-    private ThreadLocal matchers = new ThreadLocal() {
-        protected synchronized Object initialValue() {
-          return new Perl5Matcher();
-        }
-      };
-    private Rule relativePathRule = null;
-    private Rule leadingRelativePathRule = null;
-    private Rule currentPathRule = null;
-    private Rule adjacentSlashRule = null;
+  private Perl5Compiler compiler = new Perl5Compiler();
+  private ThreadLocal matchers = new ThreadLocal() {
+    protected synchronized Object initialValue() {
+      return new Perl5Matcher();
+    }
+  };
+  private Rule relativePathRule = null;
+  private Rule leadingRelativePathRule = null;
+  private Rule currentPathRule = null;
+  private Rule adjacentSlashRule = null;
 
-    private Configuration conf;
+  private Configuration conf;
 
-    public BasicURLNormalizer() {
-      try {
-        // this pattern tries to find spots like "/xx/../" in the url, which
-        // could be replaced by "/" xx consists of chars, different then "/"
-        // (slash) and needs to have at least one char different from "."
-        relativePathRule = new Rule();
-        relativePathRule.pattern = (Perl5Pattern)
-          compiler.compile("(/[^/]*[^/.]{1}[^/]*/\\.\\./)",
-                           Perl5Compiler.READ_ONLY_MASK);
-        relativePathRule.substitution = new Perl5Substitution("/");
+  public BasicURLNormalizer() {
+    try {
+      // this pattern tries to find spots like "/xx/../" in the url, which
+      // could be replaced by "/" xx consists of chars, different then "/"
+      // (slash) and needs to have at least one char different from "."
+      relativePathRule = new Rule();
+      relativePathRule.pattern = (Perl5Pattern) compiler.compile(
+          "(/[^/]*[^/.]{1}[^/]*/\\.\\./)", Perl5Compiler.READ_ONLY_MASK);
+      relativePathRule.substitution = new Perl5Substitution("/");
 
-        // this pattern tries to find spots like leading "/../" in the url,
-        // which could be replaced by "/"
-        leadingRelativePathRule = new Rule();
-        leadingRelativePathRule.pattern = (Perl5Pattern)
-          compiler.compile("^(/\\.\\./)+", Perl5Compiler.READ_ONLY_MASK);
-        leadingRelativePathRule.substitution = new Perl5Substitution("/");
+      // this pattern tries to find spots like leading "/../" in the url,
+      // which could be replaced by "/"
+      leadingRelativePathRule = new Rule();
+      leadingRelativePathRule.pattern = (Perl5Pattern) compiler.compile(
+          "^(/\\.\\./)+", Perl5Compiler.READ_ONLY_MASK);
+      leadingRelativePathRule.substitution = new Perl5Substitution("/");
 
-        // this pattern tries to find spots like "/./" in the url,
-        // which could be replaced by "/"
-        currentPathRule = new Rule();
-        currentPathRule.pattern = (Perl5Pattern)
-          compiler.compile("(/\\./)", Perl5Compiler.READ_ONLY_MASK);
-        currentPathRule.substitution = new Perl5Substitution("/");
+      // this pattern tries to find spots like "/./" in the url,
+      // which could be replaced by "/"
+      currentPathRule = new Rule();
+      currentPathRule.pattern = (Perl5Pattern) compiler.compile("(/\\./)",
+          Perl5Compiler.READ_ONLY_MASK);
+      currentPathRule.substitution = new Perl5Substitution("/");
 
-        // this pattern tries to find spots like "xx//yy" in the url,
-        // which could be replaced by a "/"
-        adjacentSlashRule = new Rule();
-        adjacentSlashRule.pattern = (Perl5Pattern)      
-          compiler.compile("/{2,}", Perl5Compiler.READ_ONLY_MASK);     
-        adjacentSlashRule.substitution = new Perl5Substitution("/");
-        
-      } catch (MalformedPatternException e) {
-        e.printStackTrace(LogUtil.getWarnStream(LOG));
-        throw new RuntimeException(e);
-      }
+      // this pattern tries to find spots like "xx//yy" in the url,
+      // which could be replaced by a "/"
+      adjacentSlashRule = new Rule();
+      adjacentSlashRule.pattern = (Perl5Pattern) compiler.compile("/{2,}",
+          Perl5Compiler.READ_ONLY_MASK);
+      adjacentSlashRule.substitution = new Perl5Substitution("/");
+
+    } catch (MalformedPatternException e) {
+      e.printStackTrace(LogUtil.getWarnStream(LOG));
+      throw new RuntimeException(e);
     }
+  }
 
-    public String normalize(String urlString, String scope)
-            throws MalformedURLException {
-        if ("".equals(urlString))                     // permit empty
-            return urlString;
+  public String normalize(String urlString, String scope)
+      throws MalformedURLException {
+    if ("".equals(urlString)) // permit empty
+      return urlString;
 
-        urlString = urlString.trim();                 // remove extra spaces
+    urlString = urlString.trim(); // remove extra spaces
 
-        URL url = new URL(urlString);
+    URL url = new URL(urlString);
 
-        String protocol = url.getProtocol();
-        String host = url.getHost();
-        int port = url.getPort();
-        String file = url.getFile();
+    String protocol = url.getProtocol();
+    String host = url.getHost();
+    int port = url.getPort();
+    String file = url.getFile();
 
-        boolean changed = false;
+    boolean changed = false;
 
-        if (!urlString.startsWith(protocol))        // protocol was lowercased
-            changed = true;
+    if (!urlString.startsWith(protocol)) // protocol was lowercased
+      changed = true;
 
-        if ("http".equals(protocol) || "ftp".equals(protocol)) {
+    if ("http".equals(protocol) || "ftp".equals(protocol)) {
 
-            if (host != null) {
-                String newHost = host.toLowerCase();    // lowercase host
-                if (!host.equals(newHost)) {
-                    host = newHost;
-                    changed = true;
-                }
-            }
+      if (host != null) {
+        String newHost = host.toLowerCase(); // lowercase host
+        if (!host.equals(newHost)) {
+          host = newHost;
+          changed = true;
+        }
+      }
 
-            if (port == url.getDefaultPort()) {       // uses default port
-                port = -1;                              // so don't specify it
-                changed = true;
-            }
+      if (port == url.getDefaultPort()) { // uses default port
+        port = -1; // so don't specify it
+        changed = true;
+      }
 
-            if (file == null || "".equals(file)) {    // add a slash
-                file = "/";
-                changed = true;
-            }
+      if (file == null || "".equals(file)) { // add a slash
+        file = "/";
+        changed = true;
+      }
 
-            if (url.getRef() != null) {                 // remove the ref
-                changed = true;
-            }
+      if (url.getRef() != null) { // remove the ref
+        changed = true;
+      }
 
-            // check for unnecessary use of "/../"
-            String file2 = substituteUnnecessaryRelativePaths(file);
+      // check for unnecessary use of "/../"
+      String file2 = substituteUnnecessaryRelativePaths(file);
 
-            if (!file.equals(file2)) {
-                changed = true;
-                file = file2;
-            }
+      if (!file.equals(file2)) {
+        changed = true;
+        file = file2;
+      }
 
-        }
-
-        if (changed)
-            urlString = new URL(protocol, host, port, file).toString();
-
-        return urlString;
     }
 
-    private String substituteUnnecessaryRelativePaths(String file) {
-        String fileWorkCopy = file;
-        int oldLen = file.length();
-        int newLen = oldLen - 1;
+    if (changed)
+      urlString = new URL(protocol, host, port, file).toString();
 
-        // All substitutions will be done step by step, to ensure that certain
-        // constellations will be normalized, too
-        //
-        // For example: "/aa/bb/../../cc/../foo.html will be normalized in the
-        // following manner:
-        //   "/aa/bb/../../cc/../foo.html"
-        //   "/aa/../cc/../foo.html"
-        //   "/cc/../foo.html"
-        //   "/foo.html"
-        //
-        // The normalization also takes care of leading "/../", which will be
-        // replaced by "/", because this is a rather a sign of bad webserver
-        // configuration than of a wanted link.  For example, urls like
-        // "http://www.foo.com/../" should return a http 404 error instead of
-        // redirecting to "http://www.foo.com".
-        //
-        Perl5Matcher matcher = (Perl5Matcher)matchers.get();
+    return urlString;
+  }
 
-        while (oldLen != newLen) {
-            // substitue first occurence of "/xx/../" by "/"
-            oldLen = fileWorkCopy.length();
-            fileWorkCopy = Util.substitute
-              (matcher, relativePathRule.pattern,
-               relativePathRule.substitution, fileWorkCopy, 1);
+  private String substituteUnnecessaryRelativePaths(String file) {
+    String fileWorkCopy = file;
+    int oldLen = file.length();
+    int newLen = oldLen - 1;
 
-            // remove leading "/../"
-            fileWorkCopy = Util.substitute
-              (matcher, leadingRelativePathRule.pattern,
-               leadingRelativePathRule.substitution, fileWorkCopy, 1);
+    // All substitutions will be done step by step, to ensure that certain
+    // constellations will be normalized, too
+    //
+    // For example: "/aa/bb/../../cc/../foo.html will be normalized in the
+    // following manner:
+    // "/aa/bb/../../cc/../foo.html"
+    // "/aa/../cc/../foo.html"
+    // "/cc/../foo.html"
+    // "/foo.html"
+    //
+    // The normalization also takes care of leading "/../", which will be
+    // replaced by "/", because this is a rather a sign of bad webserver
+    // configuration than of a wanted link. For example, urls like
+    // "http://www.foo.com/../" should return a http 404 error instead of
+    // redirecting to "http://www.foo.com".
+    //
+    Perl5Matcher matcher = (Perl5Matcher) matchers.get();
 
-            // remove unnecessary "/./"
-            fileWorkCopy = Util.substitute
-            (matcher, currentPathRule.pattern,
-            		currentPathRule.substitution, fileWorkCopy, 1);
-            
-            
-            // collapse adjacent slashes with "/"
-            fileWorkCopy = Util.substitute
-            (matcher, adjacentSlashRule.pattern,
-              adjacentSlashRule.substitution, fileWorkCopy, 1);
-            
-            newLen = fileWorkCopy.length();
-        }
+    while (oldLen != newLen) {
+      // substitue first occurence of "/xx/../" by "/"
+      oldLen = fileWorkCopy.length();
+      fileWorkCopy = Util.substitute(matcher, relativePathRule.pattern,
+          relativePathRule.substitution, fileWorkCopy, 1);
 
-        return fileWorkCopy;
-    }
+      // remove leading "/../"
+      fileWorkCopy = Util.substitute(matcher, leadingRelativePathRule.pattern,
+          leadingRelativePathRule.substitution, fileWorkCopy, 1);
 
+      // remove unnecessary "/./"
+      fileWorkCopy = Util.substitute(matcher, currentPathRule.pattern,
+          currentPathRule.substitution, fileWorkCopy, 1);
 
-    /**
-     * Class which holds a compiled pattern and its corresponding substition
-     * string.
-     */
-    private static class Rule {
-        public Perl5Pattern pattern;
-        public Perl5Substitution substitution;
+      // collapse adjacent slashes with "/"
+      fileWorkCopy = Util.substitute(matcher, adjacentSlashRule.pattern,
+          adjacentSlashRule.substitution, fileWorkCopy, 1);
+
+      newLen = fileWorkCopy.length();
     }
 
+    return fileWorkCopy;
+  }
 
+  /**
+   * Class which holds a compiled pattern and its corresponding substition
+   * string.
+   */
+  private static class Rule {
+    public Perl5Pattern pattern;
+    public Perl5Substitution substitution;
+  }
+
   public void setConf(Configuration conf) {
     this.conf = conf;
   }
@@ -217,4 +210,3 @@
   }
 
 }
-
Index: src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java
===================================================================
--- src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java	(revision 1188268)
+++ src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java	(working copy)
@@ -38,7 +38,8 @@
 
 /** Adds basic searchable fields to a document. */
 public class BasicIndexingFilter implements IndexingFilter {
-  public static final Logger LOG = LoggerFactory.getLogger(BasicIndexingFilter.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(BasicIndexingFilter.class);
 
   private int MAX_TITLE_LENGTH;
   private Configuration conf;
@@ -109,7 +110,8 @@
     }
 
     // add timestamp when fetched, for deduplication
-    String tstamp = DateUtil.getThreadLocalDateFormat().format(new Date(page.getFetchTime()));
+    String tstamp = DateUtil.getThreadLocalDateFormat().format(
+        new Date(page.getFetchTime()));
     doc.add("tstamp", tstamp);
 
     return doc;
Index: src/plugin/urlfilter-validator/src/java/org/apache/nutch/urlfilter/validator/UrlValidator.java
===================================================================
--- src/plugin/urlfilter-validator/src/java/org/apache/nutch/urlfilter/validator/UrlValidator.java	(revision 1188268)
+++ src/plugin/urlfilter-validator/src/java/org/apache/nutch/urlfilter/validator/UrlValidator.java	(working copy)
@@ -23,12 +23,16 @@
 import org.apache.nutch.net.URLFilter;
 
 /**
- * <p>Validates URLs.</p>
- *
- * <p>Originally based in on php script by Debbie Dyer, validation.php v1.2b,
- * Date: 03/07/02,
- * http://javascript.internet.com. However, this validation now bears little
- * resemblance to the php original.</p>
+ * <p>
+ * Validates URLs.
+ * </p>
+ * 
+ * <p>
+ * Originally based in on php script by Debbie Dyer, validation.php v1.2b, Date:
+ * 03/07/02, http://javascript.internet.com. However, this validation now bears
+ * little resemblance to the php original.
+ * </p>
+ * 
  * <pre>
  *   Example of usage:
  *    UrlValidator urlValidator = UrlValidator.get();
@@ -37,17 +41,17 @@
  *    } else {
  *       System.out.println("url is invalid");
  *    }
- *
+ * 
  *   prints out "url is valid"
- *  </pre>
- *
- * <p>Based on UrlValidator code from Apache commons-validator.</p>
- *
- * @see
- * <a href='http://www.ietf.org/rfc/rfc2396.txt' >
- *  Uniform Resource Identifiers (URI): Generic Syntax
- * </a>
- *
+ * </pre>
+ * 
+ * <p>
+ * Based on UrlValidator code from Apache commons-validator.
+ * </p>
+ * 
+ * @see <a href='http://www.ietf.org/rfc/rfc2396.txt' > Uniform Resource
+ *      Identifiers (URI): Generic Syntax </a>
+ * 
  */
 public class UrlValidator implements URLFilter {
 
@@ -61,7 +65,7 @@
 
   private static final String SCHEME_CHARS = ALPHA_CHARS;
 
-  // Drop numeric, and  "+-." for now
+  // Drop numeric, and "+-." for now
   private static final String AUTHORITY_CHARS = ALPHA_NUMERIC_CHARS + "\\-\\.";
 
   private static final String ATOM = VALID_CHARS + '+';
@@ -69,9 +73,9 @@
   /**
    * This expression derived/taken from the BNF for URI (RFC2396).
    */
-  private static final Pattern URL_PATTERN =
-    Pattern.compile("^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)" +
-                    "(\\?([^#]*))?(#(.*))?");
+  private static final Pattern URL_PATTERN = Pattern
+      .compile("^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)"
+          + "(\\?([^#]*))?(#(.*))?");
 
   /**
    * Schema/Protocol (ie. http:, ftp:, file:, etc).
@@ -90,11 +94,11 @@
   /**
    * Protocol (ie. http:, ftp:,https:).
    */
-  private static final Pattern SCHEME_PATTERN =
-    Pattern.compile("^[" + SCHEME_CHARS + "]+");
+  private static final Pattern SCHEME_PATTERN = Pattern.compile("^["
+      + SCHEME_CHARS + "]+");
 
-  private static final Pattern AUTHORITY_PATTERN =
-    Pattern.compile("^([" + AUTHORITY_CHARS + "]*)(:\\d*)?(.*)?");
+  private static final Pattern AUTHORITY_PATTERN = Pattern.compile("^(["
+      + AUTHORITY_CHARS + "]*)(:\\d*)?(.*)?");
 
   private static final int PARSE_AUTHORITY_HOST_IP = 1;
 
@@ -105,28 +109,26 @@
    */
   private static final int PARSE_AUTHORITY_EXTRA = 3;
 
-  private static final Pattern PATH_PATTERN =
-    Pattern.compile("^(/[-\\w:@&?=+,.!/~*'%$_;\\(\\)]*)?$");
+  private static final Pattern PATH_PATTERN = Pattern
+      .compile("^(/[-\\w:@&?=+,.!/~*'%$_;\\(\\)]*)?$");
 
   private static final Pattern QUERY_PATTERN = Pattern.compile("^(.*)$");
 
-  private static final Pattern LEGAL_ASCII_PATTERN =
-    Pattern.compile("^[\\x21-\\x7E]+$");
+  private static final Pattern LEGAL_ASCII_PATTERN = Pattern
+      .compile("^[\\x21-\\x7E]+$");
 
-  private static final Pattern IP_V4_DOMAIN_PATTERN =
-    Pattern.compile("^(\\d{1,3})[.](\\d{1,3})[.](\\d{1,3})[.](\\d{1,3})$");
+  private static final Pattern IP_V4_DOMAIN_PATTERN = Pattern
+      .compile("^(\\d{1,3})[.](\\d{1,3})[.](\\d{1,3})[.](\\d{1,3})$");
 
-  private static final Pattern DOMAIN_PATTERN =
-    Pattern.compile("^" + ATOM + "(\\." + ATOM + ")*$");
+  private static final Pattern DOMAIN_PATTERN = Pattern.compile("^" + ATOM
+      + "(\\." + ATOM + ")*$");
 
-  private static final Pattern PORT_PATTERN =
-    Pattern.compile("^:(\\d{1,5})$");
+  private static final Pattern PORT_PATTERN = Pattern.compile("^:(\\d{1,5})$");
 
-  private static final Pattern ATOM_PATTERN =
-    Pattern.compile("(" + ATOM + ")");
+  private static final Pattern ATOM_PATTERN = Pattern.compile("(" + ATOM + ")");
 
-  private static final Pattern ALPHA_PATTERN =
-    Pattern.compile("^[" + ALPHA_CHARS + "]");
+  private static final Pattern ALPHA_PATTERN = Pattern.compile("^["
+      + ALPHA_CHARS + "]");
 
   private Configuration conf;
 
@@ -143,10 +145,13 @@
   }
 
   /**
-   * <p>Checks if a field has a valid url address.</p>
-   *
-   * @param value The value validation is being performed on.
-   * A <code>null</code> value is considered invalid.
+   * <p>
+   * Checks if a field has a valid url address.
+   * </p>
+   * 
+   * @param value
+   *          The value validation is being performed on. A <code>null</code>
+   *          value is considered invalid.
    * @return true if the url is valid.
    */
   private boolean isValid(String value) {
@@ -184,11 +189,13 @@
   }
 
   /**
-   * Validate scheme. If schemes[] was initialized to a non null,
-   * then only those scheme's are allowed.  Note this is slightly different
-   * than for the constructor.
-   * @param scheme The scheme to validate.  A <code>null</code> value is
-   * considered invalid.
+   * Validate scheme. If schemes[] was initialized to a non null, then only
+   * those scheme's are allowed. Note this is slightly different than for the
+   * constructor.
+   * 
+   * @param scheme
+   *          The scheme to validate. A <code>null</code> value is considered
+   *          invalid.
    * @return true if valid.
    */
   private boolean isValidScheme(String scheme) {
@@ -200,10 +207,12 @@
   }
 
   /**
-   * Returns true if the authority is properly formatted.  An authority is
-   * the combination of hostname and port.  A <code>null</code> authority
-   * value is considered invalid.
-   * @param authority Authority value to validate.
+   * Returns true if the authority is properly formatted. An authority is the
+   * combination of hostname and port. A <code>null</code> authority value is
+   * considered invalid.
+   * 
+   * @param authority
+   *          Authority value to validate.
    * @return true if authority (hostname and port) is valid.
    */
   private boolean isValidAuthority(String authority) {
@@ -235,7 +244,7 @@
           if (Integer.parseInt(ipSegment) > 255) {
             return false;
           }
-        } catch(NumberFormatException e) {
+        } catch (NumberFormatException e) {
           return false;
         }
 
@@ -251,8 +260,8 @@
       // TODO: Rewrite to use ArrayList and .add semantics: see VALIDATOR-203
       char[] chars = hostIP.toCharArray();
       int size = 1;
-      for(int i=0; i<chars.length; i++) {
-        if(chars[i] == '.') {
+      for (int i = 0; i < chars.length; i++) {
+        if (chars[i] == '.') {
           size++;
         }
       }
@@ -264,8 +273,7 @@
       while (atomMatcher.find()) {
         domainSegment[segCount] = atomMatcher.group();
         segLen = domainSegment[segCount].length() + 1;
-        hostIP = (segLen >= hostIP.length()) ? ""
-                                             : hostIP.substring(segLen);
+        hostIP = (segLen >= hostIP.length()) ? "" : hostIP.substring(segLen);
         segCount++;
       }
       String topLevel = domainSegment[segCount - 1];
@@ -300,10 +308,13 @@
   }
 
   /**
-   * <p>Checks if the field isn't null and length of the field is greater
-   * than zero not including whitespace.</p>
-   *
-   * @param value The value validation is being performed on.
+   * <p>
+   * Checks if the field isn't null and length of the field is greater than zero
+   * not including whitespace.
+   * </p>
+   * 
+   * @param value
+   *          The value validation is being performed on.
    * @return true if blank or null.
    */
   private boolean isBlankOrNull(String value) {
@@ -311,9 +322,11 @@
   }
 
   /**
-   * Returns true if the path is valid.  A <code>null</code> value is
-   * considered invalid.
-   * @param path Path value to validate.
+   * Returns true if the path is valid. A <code>null</code> value is considered
+   * invalid.
+   * 
+   * @param path
+   *          Path value to validate.
    * @return true if path is valid.
    */
   private boolean isValidPath(String path) {
@@ -335,7 +348,9 @@
   /**
    * Returns true if the query is null or it's a properly formatted query
    * string.
-   * @param query Query value to validate.
+   * 
+   * @param query
+   *          Query value to validate.
    * @return true if query is valid.
    */
   private boolean isValidQuery(String query) {
@@ -348,8 +363,11 @@
 
   /**
    * Returns the number of times the token appears in the target.
-   * @param token Token value to be counted.
-   * @param target Target value to count tokens in.
+   * 
+   * @param token
+   *          Token value to be counted.
+   * @param target
+   *          Target value to count tokens in.
    * @return the number of tokens.
    */
   private int countToken(String token, String target) {
Index: src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/OPICScoringFilter.java
===================================================================
--- src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/OPICScoringFilter.java	(revision 1188268)
+++ src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/OPICScoringFilter.java	(working copy)
@@ -39,17 +39,17 @@
 
 /**
  * This plugin implements a variant of an Online Page Importance Computation
- * (OPIC) score, described in this paper:
- * <a href="http://www2003.org/cdrom/papers/refereed/p007/p7-abiteboul.html"/>
- * Abiteboul, Serge and Preda, Mihai and Cobena, Gregory (2003),
- * Adaptive On-Line Page Importance Computation
- * </a>.
- *
+ * (OPIC) score, described in this paper: <a
+ * href="http://www2003.org/cdrom/papers/refereed/p007/p7-abiteboul.html"/>
+ * Abiteboul, Serge and Preda, Mihai and Cobena, Gregory (2003), Adaptive
+ * On-Line Page Importance Computation </a>.
+ * 
  * @author Andrzej Bialecki
  */
 public class OPICScoringFilter implements ScoringFilter {
 
-  private final static Logger LOG = LoggerFactory.getLogger(OPICScoringFilter.class);
+  private final static Logger LOG = LoggerFactory
+      .getLogger(OPICScoringFilter.class);
 
   private final static Utf8 CASH_KEY = new Utf8("_csh_");
 
@@ -81,28 +81,33 @@
 
   @Override
   public void injectedScore(String url, WebPage row)
-  throws ScoringFilterException {
+      throws ScoringFilterException {
     float score = row.getScore();
     row.putToMetadata(CASH_KEY, ByteBuffer.wrap(Bytes.toBytes(score)));
   }
 
-  /** Set to 0.0f (unknown value) - inlink contributions will bring it to
-   * a correct level. Newly discovered pages have at least one inlink. */
+  /**
+   * Set to 0.0f (unknown value) - inlink contributions will bring it to a
+   * correct level. Newly discovered pages have at least one inlink.
+   */
   @Override
-  public void initialScore(String url, WebPage row) throws ScoringFilterException {
+  public void initialScore(String url, WebPage row)
+      throws ScoringFilterException {
     row.setScore(0.0f);
     row.putToMetadata(CASH_KEY, ByteBuffer.wrap(Bytes.toBytes(0.0f)));
   }
 
   /** Use {@link WebPage#getScore()}. */
   @Override
-  public float generatorSortValue(String url, WebPage row, float initSort) throws ScoringFilterException {
+  public float generatorSortValue(String url, WebPage row, float initSort)
+      throws ScoringFilterException {
     return row.getScore() * initSort;
   }
 
   /** Increase the score by a sum of inlinked scores. */
   @Override
-  public void updateScore(String url, WebPage row, List<ScoreDatum> inlinkedScoreData) {
+  public void updateScore(String url, WebPage row,
+      List<ScoreDatum> inlinkedScoreData) {
     float adjust = 0.0f;
     for (ScoreDatum scoreDatum : inlinkedScoreData) {
       adjust += scoreDatum.getScore();
@@ -119,9 +124,8 @@
 
   /** Get cash on hand, divide it by the number of outlinks and apply. */
   @Override
-  public void distributeScoreToOutlinks(String fromUrl,
-      WebPage row, Collection<ScoreDatum> scoreData,
-      int allCount) {
+  public void distributeScoreToOutlinks(String fromUrl, WebPage row,
+      Collection<ScoreDatum> scoreData, int allCount) {
     ByteBuffer cashRaw = row.getFromMetadata(CASH_KEY);
     if (cashRaw == null) {
       return;
@@ -139,7 +143,7 @@
       try {
         String toHost = new URL(scoreDatum.getUrl()).getHost();
         String fromHost = new URL(fromUrl.toString()).getHost();
-        if(toHost.equalsIgnoreCase(fromHost)){
+        if (toHost.equalsIgnoreCase(fromHost)) {
           scoreDatum.setScore(internalScore);
         } else {
           scoreDatum.setScore(externalScore);
@@ -153,9 +157,10 @@
     row.putToMetadata(CASH_KEY, ByteBuffer.wrap(Bytes.toBytes(0.0f)));
   }
 
-  /** Dampen the boost value by scorePower.*/
-  public float indexerScore(String url, NutchDocument doc, WebPage row, float initScore) {
-    return (float)Math.pow(row.getScore(), scorePower) * initScore;
+  /** Dampen the boost value by scorePower. */
+  public float indexerScore(String url, NutchDocument doc, WebPage row,
+      float initScore) {
+    return (float) Math.pow(row.getScore(), scorePower) * initScore;
   }
 
   @Override
Index: src/plugin/scoring-link/src/java/org/apache/nutch/scoring/link/LinkAnalysisScoringFilter.java
===================================================================
--- src/plugin/scoring-link/src/java/org/apache/nutch/scoring/link/LinkAnalysisScoringFilter.java	(revision 1188268)
+++ src/plugin/scoring-link/src/java/org/apache/nutch/scoring/link/LinkAnalysisScoringFilter.java	(working copy)
@@ -30,65 +30,65 @@
 
 public class LinkAnalysisScoringFilter implements ScoringFilter {
 
-	private Configuration conf;
-	private float normalizedScore = 1.00f;
+  private Configuration conf;
+  private float normalizedScore = 1.00f;
 
-	private final static Set<WebPage.Field> FIELDS = new HashSet<WebPage.Field>();
+  private final static Set<WebPage.Field> FIELDS = new HashSet<WebPage.Field>();
 
-	static {
-		FIELDS.add(WebPage.Field.METADATA);
-		FIELDS.add(WebPage.Field.SCORE);
-	}
+  static {
+    FIELDS.add(WebPage.Field.METADATA);
+    FIELDS.add(WebPage.Field.SCORE);
+  }
 
-	public LinkAnalysisScoringFilter() {
-	}
+  public LinkAnalysisScoringFilter() {
+  }
 
-	public Configuration getConf() {
-		return conf;
-	}
+  public Configuration getConf() {
+    return conf;
+  }
 
-	public void setConf(Configuration conf) {
-		this.conf = conf;
-		normalizedScore = conf.getFloat("link.analyze.normalize.score", 1.00f);
-	}
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+    normalizedScore = conf.getFloat("link.analyze.normalize.score", 1.00f);
+  }
 
-	@Override
-	public Collection<WebPage.Field> getFields() {
-		return FIELDS;
-	}
+  @Override
+  public Collection<WebPage.Field> getFields() {
+    return FIELDS;
+  }
 
-	@Override
-	public void injectedScore(String url, WebPage page)
-			throws ScoringFilterException {
-	}
+  @Override
+  public void injectedScore(String url, WebPage page)
+      throws ScoringFilterException {
+  }
 
-	@Override
-	public void initialScore(String url, WebPage page)
-			throws ScoringFilterException {
-		page.setScore(0.0f);
-	}
+  @Override
+  public void initialScore(String url, WebPage page)
+      throws ScoringFilterException {
+    page.setScore(0.0f);
+  }
 
-	@Override
-	public float generatorSortValue(String url, WebPage page, float initSort)
-			throws ScoringFilterException {
-		return page.getScore() * initSort;
-	}
+  @Override
+  public float generatorSortValue(String url, WebPage page, float initSort)
+      throws ScoringFilterException {
+    return page.getScore() * initSort;
+  }
 
-	@Override
-	public void distributeScoreToOutlinks(String fromUrl, WebPage page,
-			Collection<ScoreDatum> scoreData, int allCount)
-			throws ScoringFilterException {
-	}
+  @Override
+  public void distributeScoreToOutlinks(String fromUrl, WebPage page,
+      Collection<ScoreDatum> scoreData, int allCount)
+      throws ScoringFilterException {
+  }
 
-	@Override
-	public void updateScore(String url, WebPage page,
-			List<ScoreDatum> inlinkedScoreData) throws ScoringFilterException {
-	}
+  @Override
+  public void updateScore(String url, WebPage page,
+      List<ScoreDatum> inlinkedScoreData) throws ScoringFilterException {
+  }
 
-	@Override
-	public float indexerScore(String url, NutchDocument doc, WebPage page,
-			float initScore) throws ScoringFilterException {
-		return (normalizedScore * page.getScore());
-	}
+  @Override
+  public float indexerScore(String url, NutchDocument doc, WebPage page,
+      float initScore) throws ScoringFilterException {
+    return (normalizedScore * page.getScore());
+  }
 
 }
Index: src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java
===================================================================
--- src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java	(revision 1188268)
+++ src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java	(working copy)
@@ -42,16 +42,19 @@
 import java.util.ArrayList;
 
 /**
- * Filters URLs based on a file of URL prefixes. The file is named by
- * (1) property "urlfilter.prefix.file" in ./conf/nutch-default.xml, and
- * (2) attribute "file" in plugin.xml of this plugin
- * Attribute "file" has higher precedence if defined.
- *
- * <p>The format of this file is one URL prefix per line.</p>
+ * Filters URLs based on a file of URL prefixes. The file is named by (1)
+ * property "urlfilter.prefix.file" in ./conf/nutch-default.xml, and (2)
+ * attribute "file" in plugin.xml of this plugin Attribute "file" has higher
+ * precedence if defined.
+ * 
+ * <p>
+ * The format of this file is one URL prefix per line.
+ * </p>
  */
 public class PrefixURLFilter implements URLFilter {
 
-  private static final Logger LOG = LoggerFactory.getLogger(PrefixURLFilter.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(PrefixURLFilter.class);
 
   // read in attribute "file" of this plugin.
   private static String attributeFile = null;
@@ -61,7 +64,7 @@
   private Configuration conf;
 
   public PrefixURLFilter() throws IOException {
-   
+
   }
 
   public PrefixURLFilter(String stringRules) throws IOException {
@@ -75,43 +78,43 @@
       return url;
   }
 
-  private TrieStringMatcher readConfiguration(Reader reader)
-    throws IOException {
-    
-    BufferedReader in=new BufferedReader(reader);
+  private TrieStringMatcher readConfiguration(Reader reader) throws IOException {
+
+    BufferedReader in = new BufferedReader(reader);
     List urlprefixes = new ArrayList();
     String line;
 
-    while((line=in.readLine())!=null) {
+    while ((line = in.readLine()) != null) {
       if (line.length() == 0)
         continue;
 
-      char first=line.charAt(0);
+      char first = line.charAt(0);
       switch (first) {
-      case ' ' : case '\n' : case '#' :           // skip blank & comment lines
+      case ' ':
+      case '\n':
+      case '#': // skip blank & comment lines
         continue;
-      default :
-	urlprefixes.add(line);
+      default:
+        urlprefixes.add(line);
       }
     }
 
     return new PrefixStringMatcher(urlprefixes);
   }
 
-  public static void main(String args[])
-    throws IOException {
-    
+  public static void main(String args[]) throws IOException {
+
     PrefixURLFilter filter;
     if (args.length >= 1)
       filter = new PrefixURLFilter(args[0]);
     else
       filter = new PrefixURLFilter();
-    
-    BufferedReader in=new BufferedReader(new InputStreamReader(System.in));
+
+    BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
     String line;
-    while((line=in.readLine())!=null) {
-      String out=filter.filter(line);
-      if(out!=null) {
+    while ((line = in.readLine()) != null) {
+      String out = filter.filter(line);
+      if (out != null) {
         System.out.println(out);
       }
     }
@@ -121,8 +124,8 @@
     this.conf = conf;
 
     String pluginName = "urlfilter-prefix";
-    Extension[] extensions = PluginRepository.get(conf).getExtensionPoint(
-        URLFilter.class.getName()).getExtensions();
+    Extension[] extensions = PluginRepository.get(conf)
+        .getExtensionPoint(URLFilter.class.getName()).getExtensions();
     for (int i = 0; i < extensions.length; i++) {
       Extension extension = extensions[i];
       if (extension.getDescriptor().getPluginId().equals(pluginName)) {
@@ -139,8 +142,8 @@
       }
     } else {
       // if (LOG.isWarnEnabled()) {
-      //   LOG.warn("Attribute \"file\" is not defined in plugin.xml for
-      //   plugin "+pluginName);
+      // LOG.warn("Attribute \"file\" is not defined in plugin.xml for
+      // plugin "+pluginName);
       // }
     }
 
@@ -162,7 +165,9 @@
       try {
         trie = readConfiguration(reader);
       } catch (IOException e) {
-        if (LOG.isErrorEnabled()) { LOG.error(e.getMessage()); }
+        if (LOG.isErrorEnabled()) {
+          LOG.error(e.getMessage());
+        }
         // TODO mb@media-style.com: throw Exception? Because broken api.
         throw new RuntimeException(e.getMessage(), e);
       }
@@ -172,5 +177,5 @@
   public Configuration getConf() {
     return this.conf;
   }
-  
+
 }
Index: src/plugin/parse-swf/src/test/org/apache/nutch/parse/swf/TestSWFParser.java
===================================================================
--- src/plugin/parse-swf/src/test/org/apache/nutch/parse/swf/TestSWFParser.java	(revision 1188268)
+++ src/plugin/parse-swf/src/test/org/apache/nutch/parse/swf/TestSWFParser.java	(working copy)
@@ -35,45 +35,50 @@
 
 import junit.framework.TestCase;
 
-/** 
+/**
  * Unit tests for SWFParser.
- *
+ * 
  * @author Andrzej Bialecki
  */
 public class TestSWFParser extends TestCase {
 
   private String fileSeparator = System.getProperty("file.separator");
   // This system property is defined in ./src/plugin/build-plugin.xml
-  private String sampleDir = System.getProperty("test.data",".");
-  
-  private String[] sampleFiles = new String[]{"test1.swf", "test2.swf", "test3.swf"};
-  private String[] sampleTexts = new String[]{"test1.txt", "test2.txt", "test3.txt"};
+  private String sampleDir = System.getProperty("test.data", ".");
+
+  private String[] sampleFiles = new String[] { "test1.swf", "test2.swf",
+      "test3.swf" };
+  private String[] sampleTexts = new String[] { "test1.txt", "test2.txt",
+      "test3.txt" };
   private String[] texts = new String[sampleTexts.length];
 
-  public TestSWFParser(String name) { 
+  public TestSWFParser(String name) {
     super(name);
     for (int i = 0; i < sampleFiles.length; i++) {
-    try {
-      // read the test string
-      FileInputStream fis = new FileInputStream(sampleDir + fileSeparator + sampleTexts[i]);
-      StringBuffer sb = new StringBuffer();
-      int len = 0;
-      InputStreamReader isr = new InputStreamReader(fis, "UTF-8");
-      char[] buf = new char[1024];
-      while ((len = isr.read(buf)) > 0) {
-        sb.append(buf, 0, len);
+      try {
+        // read the test string
+        FileInputStream fis = new FileInputStream(sampleDir + fileSeparator
+            + sampleTexts[i]);
+        StringBuffer sb = new StringBuffer();
+        int len = 0;
+        InputStreamReader isr = new InputStreamReader(fis, "UTF-8");
+        char[] buf = new char[1024];
+        while ((len = isr.read(buf)) > 0) {
+          sb.append(buf, 0, len);
+        }
+        isr.close();
+        sampleTexts[i] = sb.toString().replaceAll("[ \t\r\n]+", " ").trim();
+      } catch (Exception e) {
+        e.printStackTrace();
       }
-      isr.close();
-      sampleTexts[i] = sb.toString().replaceAll("[ \t\r\n]+", " ").trim();
-    } catch (Exception e) {
-      e.printStackTrace();
     }
-    }
   }
 
-  protected void setUp() {}
+  protected void setUp() {
+  }
 
-  protected void tearDown() {}
+  protected void tearDown() {
+  }
 
   public void testIt() throws ProtocolException, ParseException {
     String urlString;
@@ -86,7 +91,8 @@
       urlString = "file:" + sampleDir + fileSeparator + sampleFiles[i];
 
       protocol = new ProtocolFactory(conf).getProtocol(urlString);
-      content = protocol.getProtocolOutput(new Text(urlString), new CrawlDatum()).getContent();
+      content = protocol.getProtocolOutput(new Text(urlString),
+          new CrawlDatum()).getContent();
 
       parse = new ParseUtil(conf).parse(content).get(content.getUrl());
 
Index: src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/SWFParser.java
===================================================================
--- src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/SWFParser.java	(revision 1188268)
+++ src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/SWFParser.java	(working copy)
@@ -47,11 +47,13 @@
  * @author Andrzej Bialecki
  */
 public class SWFParser implements Parser {
-  public static final Logger LOG = LoggerFactory.getLogger("org.apache.nutch.parse.swf");
+  public static final Logger LOG = LoggerFactory
+      .getLogger("org.apache.nutch.parse.swf");
 
   private Configuration conf = null;
 
-  public SWFParser() {}
+  public SWFParser() {
+  }
 
   public void setConf(Configuration conf) {
     this.conf = conf;
@@ -71,10 +73,12 @@
       byte[] raw = content.getContent();
 
       String contentLength = content.getMetadata().get(Response.CONTENT_LENGTH);
-      if (contentLength != null && raw.length != Integer.parseInt(contentLength)) {
-        return new ParseStatus(ParseStatus.FAILED, ParseStatus.FAILED_TRUNCATED,
-                               "Content truncated at " + raw.length +
-                               " bytes. Parser can't handle incomplete files.").getEmptyParseResult(content.getUrl(), getConf());
+      if (contentLength != null
+          && raw.length != Integer.parseInt(contentLength)) {
+        return new ParseStatus(ParseStatus.FAILED,
+            ParseStatus.FAILED_TRUNCATED, "Content truncated at " + raw.length
+                + " bytes. Parser can't handle incomplete files.")
+            .getEmptyParseResult(content.getUrl(), getConf());
       }
       ExtractText extractor = new ExtractText();
 
@@ -90,7 +94,8 @@
       reader.readFile();
       text = extractor.getText();
       String atext = extractor.getActionText();
-      if (atext != null && atext.length() > 0) text += "\n--------\n" + atext;
+      if (atext != null && atext.length() > 0)
+        text += "\n--------\n" + atext;
       // harvest potential outlinks
       String[] links = extractor.getUrls();
       for (int i = 0; i < links.length; i++) {
@@ -98,19 +103,25 @@
         outlinks.add(out);
       }
       Outlink[] olinks = OutlinkExtractor.getOutlinks(text, conf);
-      if (olinks != null) for (int i = 0; i < olinks.length; i++) {
-        outlinks.add(olinks[i]);
-      }
+      if (olinks != null)
+        for (int i = 0; i < olinks.length; i++) {
+          outlinks.add(olinks[i]);
+        }
     } catch (Exception e) { // run time exception
       e.printStackTrace(LogUtil.getErrorStream(LOG));
-      return new ParseStatus(ParseStatus.FAILED, "Can't be handled as SWF document. " + e).getEmptyParseResult(content.getUrl(), getConf());
-    } 
-    if (text == null) text = "";
+      return new ParseStatus(ParseStatus.FAILED,
+          "Can't be handled as SWF document. " + e).getEmptyParseResult(
+          content.getUrl(), getConf());
+    }
+    if (text == null)
+      text = "";
 
-    Outlink[] links = (Outlink[]) outlinks.toArray(new Outlink[outlinks.size()]);
+    Outlink[] links = (Outlink[]) outlinks
+        .toArray(new Outlink[outlinks.size()]);
     ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS, "", links,
-                                        content.getMetadata());
-    return ParseResult.createParseResult(content.getUrl(), new ParseImpl(text, parseData));
+        content.getMetadata());
+    return ParseResult.createParseResult(content.getUrl(), new ParseImpl(text,
+        parseData));
   }
 
   /**
@@ -122,10 +133,9 @@
     byte[] buf = new byte[in.available()];
     in.read(buf);
     SWFParser parser = new SWFParser();
-    ParseResult parseResult = parser.getParse(new Content("file:" + args[0], "file:" + args[0],
-                                          buf, "application/x-shockwave-flash",
-                                          new Metadata(),
-                                          NutchConfiguration.create()));
+    ParseResult parseResult = parser.getParse(new Content("file:" + args[0],
+        "file:" + args[0], buf, "application/x-shockwave-flash",
+        new Metadata(), NutchConfiguration.create()));
     Parse p = parseResult.get("file:" + args[0]);
     System.out.println("Parse Text:");
     System.out.println(p.getText());
@@ -170,7 +180,8 @@
     StringBuffer res = new StringBuffer();
     Iterator it = strings.iterator();
     while (it.hasNext()) {
-      if (res.length() > 0) res.append(' ');
+      if (res.length() > 0)
+        res.append(' ');
       res.append(it.next());
     }
     return res.toString();
@@ -178,10 +189,12 @@
 
   public String getActionText() {
     StringBuffer res = new StringBuffer();
-    String[] strings = (String[])actionStrings.toArray(new String[actionStrings.size()]);
+    String[] strings = (String[]) actionStrings
+        .toArray(new String[actionStrings.size()]);
     Arrays.sort(strings);
     for (int i = 0; i < strings.length; i++) {
-      if (i > 0) res.append('\n');
+      if (i > 0)
+        res.append('\n');
       res.append(strings[i]);
     }
     return res.toString();
@@ -198,14 +211,16 @@
     return res;
   }
 
-  public void tagDefineFontInfo2(int arg0, String arg1, int arg2, int[] arg3, int arg4) throws IOException {
+  public void tagDefineFontInfo2(int arg0, String arg1, int arg2, int[] arg3,
+      int arg4) throws IOException {
     tagDefineFontInfo(arg0, arg1, arg2, arg3);
   }
 
   /**
    * SWFTagTypes interface Save the Text Font character code info
    */
-  public void tagDefineFontInfo(int fontId, String fontName, int flags, int[] codes) throws IOException {
+  public void tagDefineFontInfo(int fontId, String fontName, int flags,
+      int[] codes) throws IOException {
     // System.out.println("-defineFontInfo id=" + fontId + ", name=" +
     // fontName);
     fontCodes.put(new Integer(fontId), codes);
@@ -215,16 +230,16 @@
   // XXX codes anyway, so we just give up.
   /*
    * public SWFVectors tagDefineFont(int arg0, int arg1) throws IOException {
-   *    return null;
-   * }
+   * return null; }
    */
 
   /**
    * SWFTagTypes interface. Save the character code info.
    */
-  public SWFVectors tagDefineFont2(int id, int flags, String name, int numGlyphs, int ascent, int descent, int leading,
-          int[] codes, int[] advances, Rect[] bounds, int[] kernCodes1, int[] kernCodes2, int[] kernAdjustments)
-          throws IOException {
+  public SWFVectors tagDefineFont2(int id, int flags, String name,
+      int numGlyphs, int ascent, int descent, int leading, int[] codes,
+      int[] advances, Rect[] bounds, int[] kernCodes1, int[] kernCodes2,
+      int[] kernAdjustments) throws IOException {
     // System.out.println("-defineFontInfo id=" + id + ", name=" + name);
     fontCodes.put(new Integer(id), (codes != null) ? codes : new int[0]);
 
@@ -234,9 +249,10 @@
   /**
    * SWFTagTypes interface. Dump any initial text in the field.
    */
-  public void tagDefineTextField(int fieldId, String fieldName, String initialText, Rect boundary, int flags,
-          AlphaColor textColor, int alignment, int fontId, int fontSize, int charLimit, int leftMargin,
-          int rightMargin, int indentation, int lineSpacing) throws IOException {
+  public void tagDefineTextField(int fieldId, String fieldName,
+      String initialText, Rect boundary, int flags, AlphaColor textColor,
+      int alignment, int fontId, int fontSize, int charLimit, int leftMargin,
+      int rightMargin, int indentation, int lineSpacing) throws IOException {
     if (initialText != null) {
       strings.add(initialText);
     }
@@ -245,7 +261,8 @@
   /**
    * SWFTagTypes interface
    */
-  public SWFText tagDefineText(int id, Rect bounds, Matrix matrix) throws IOException {
+  public SWFText tagDefineText(int id, Rect bounds, Matrix matrix)
+      throws IOException {
     lastBounds = curBounds;
     curBounds = bounds;
     return new TextDumper();
@@ -257,7 +274,8 @@
   /**
    * SWFTagTypes interface
    */
-  public SWFText tagDefineText2(int id, Rect bounds, Matrix matrix) throws IOException {
+  public SWFText tagDefineText2(int id, Rect bounds, Matrix matrix)
+      throws IOException {
     lastBounds = curBounds;
     curBounds = bounds;
     return new TextDumper();
@@ -275,15 +293,16 @@
     public void setY(int y) {
       if (firstY)
         firstY = false;
-      else strings.add("\n"); // Change in Y - dump a new line
+      else
+        strings.add("\n"); // Change in Y - dump a new line
     }
 
     /*
      * There are some issues with this method: sometimes SWF files define their
-     * own font, so short of OCR we cannot guess what is the glyph code -> character
-     * mapping. Additionally, some files don't use literal space character, instead
-     * they adjust glyphAdvances. We don't handle it at all - in such cases the text
-     * will be all glued together.
+     * own font, so short of OCR we cannot guess what is the glyph code ->
+     * character mapping. Additionally, some files don't use literal space
+     * character, instead they adjust glyphAdvances. We don't handle it at all -
+     * in such cases the text will be all glued together.
      */
     public void text(int[] glyphIndices, int[] glyphAdvances) {
       // System.out.println("-text id=" + fontId);
@@ -312,9 +331,11 @@
       strings.add(new String(chars));
     }
 
-    public void color(Color color) {}
+    public void color(Color color) {
+    }
 
-    public void setX(int x) {}
+    public void setX(int x) {
+    }
 
     public void done() {
       strings.add("\n");
@@ -372,7 +393,8 @@
   public void lookupTable(String[] values) throws IOException {
     // System.out.println("-lookupTable: " + values.length);
     for (int i = 0; i < values.length; i++) {
-      if (!strings.contains(values[i])) strings.add(values[i]);
+      if (!strings.contains(values[i]))
+        strings.add(values[i]);
     }
     super.lookupTable(values);
     dict = values;
@@ -385,7 +407,7 @@
   }
 
   public void getURL(int vars, int mode) {
-  // System.out.println("-getURL: vars=" + vars + ", mode=" + mode);
+    // System.out.println("-getURL: vars=" + vars + ", mode=" + mode);
   }
 
   public void getURL(String url, String target) throws IOException {
@@ -460,7 +482,8 @@
     super.setTarget(var);
   }
 
-  public SWFActionBlock startFunction(String var, String[] params) throws IOException {
+  public SWFActionBlock startFunction(String var, String[] params)
+      throws IOException {
     // System.out.println("-startFunction1: var=" + var);
     stack.push(var);
     strings.remove(var);
@@ -472,7 +495,8 @@
     return this;
   }
 
-  public SWFActionBlock startFunction2(String var, int arg1, int arg2, String[] params, int[] arg3) throws IOException {
+  public SWFActionBlock startFunction2(String var, int arg1, int arg2,
+      String[] params, int[] arg3) throws IOException {
     // System.out.println("-startFunction2: var=" + var);
     stack.push(var);
     strings.remove(var);
@@ -703,6 +727,7 @@
     // tolerate underruns
     if (this.size() == 0)
       return null;
-    else return super.pop();
+    else
+      return super.pop();
   }
 }
Index: src/plugin/urlfilter-suffix/src/test/org/apache/nutch/urlfilter/suffix/TestSuffixURLFilter.java
===================================================================
--- src/plugin/urlfilter-suffix/src/test/org/apache/nutch/urlfilter/suffix/TestSuffixURLFilter.java	(revision 1188268)
+++ src/plugin/urlfilter-suffix/src/test/org/apache/nutch/urlfilter/suffix/TestSuffixURLFilter.java	(working copy)
@@ -24,115 +24,58 @@
 import junit.framework.TestSuite;
 import junit.textui.TestRunner;
 
-
 /**
  * JUnit test for <code>SuffixURLFilter</code>.
- *
+ * 
  * @author Andrzej Bialecki
  */
 public class TestSuffixURLFilter extends TestCase {
-  private static final String suffixes =
-    "# this is a comment\n" +
-    "\n" +
-    ".gif\n" +
-    ".jpg\n" +
-    ".js\n";
-  
+  private static final String suffixes = "# this is a comment\n" + "\n"
+      + ".gif\n" + ".jpg\n" + ".js\n";
+
   private static final String[] urls = new String[] {
-    "http://www.example.com/test.gif",
-    "http://www.example.com/TEST.GIF",
-    "http://www.example.com/test.jpg",
-    "http://www.example.com/test.JPG",
-    "http://www.example.com/test.html",
-    "http://www.example.com/test.HTML",
-    "http://www.example.com/test.html?q=abc.js",
-    "http://www.example.com/test.js?foo=bar&baz=bar#12333",
-  };
-  
-  private static String[] urlsModeAccept = new String[] {
-    null,
-    urls[1],
-    null,
-    urls[3],
-    urls[4],
-    urls[5],
-    null,
-    urls[7]
-  };
-  
-  private static String[] urlsModeReject = new String[] {
-    urls[0],
-    null,
-    urls[2],
-    null,
-    null,
-    null,
-    urls[6],
-    null
-  };
-  
-  private static String[] urlsModeAcceptIgnoreCase = new String[] {
-    null,
-    null,
-    null,
-    null,
-    urls[4],
-    urls[5],
-    null,
-    urls[7]
-  };
- 
-  private static String[] urlsModeRejectIgnoreCase = new String[] {
-    urls[0],
-    urls[1],
-    urls[2],
-    urls[3],
-    null,
-    null,
-    urls[6],
-    null
-  };
-  
-  private static String[] urlsModeAcceptAndPathFilter = new String[] {
-    null,
-    urls[1],
-    null,
-    urls[3],
-    urls[4],
-    urls[5],
-    urls[6],
-    null
-  };
-  
-  private static String[] urlsModeAcceptAndNonPathFilter = new String[] {
-    null,
-    urls[1],
-    null,
-    urls[3],
-    urls[4],
-    urls[5],
-    null,
-    urls[7]
-  };
-  
+      "http://www.example.com/test.gif", "http://www.example.com/TEST.GIF",
+      "http://www.example.com/test.jpg", "http://www.example.com/test.JPG",
+      "http://www.example.com/test.html", "http://www.example.com/test.HTML",
+      "http://www.example.com/test.html?q=abc.js",
+      "http://www.example.com/test.js?foo=bar&baz=bar#12333", };
+
+  private static String[] urlsModeAccept = new String[] { null, urls[1], null,
+      urls[3], urls[4], urls[5], null, urls[7] };
+
+  private static String[] urlsModeReject = new String[] { urls[0], null,
+      urls[2], null, null, null, urls[6], null };
+
+  private static String[] urlsModeAcceptIgnoreCase = new String[] { null, null,
+      null, null, urls[4], urls[5], null, urls[7] };
+
+  private static String[] urlsModeRejectIgnoreCase = new String[] { urls[0],
+      urls[1], urls[2], urls[3], null, null, urls[6], null };
+
+  private static String[] urlsModeAcceptAndPathFilter = new String[] { null,
+      urls[1], null, urls[3], urls[4], urls[5], urls[6], null };
+
+  private static String[] urlsModeAcceptAndNonPathFilter = new String[] { null,
+      urls[1], null, urls[3], urls[4], urls[5], null, urls[7] };
+
   private SuffixURLFilter filter = null;
-  
+
   public TestSuffixURLFilter(String testName) {
     super(testName);
   }
-  
+
   public static Test suite() {
     return new TestSuite(TestSuffixURLFilter.class);
   }
-  
+
   public static void main(String[] args) {
     TestRunner.run(suite());
   }
-  
+
   public void setUp() throws IOException {
     filter = new SuffixURLFilter(new StringReader(suffixes));
   }
-  
+
   public void testModeAccept() {
     filter.setIgnoreCase(false);
     filter.setModeAccept(true);
@@ -164,7 +107,7 @@
       assertTrue(urlsModeRejectIgnoreCase[i] == filter.filter(urls[i]));
     }
   }
-  
+
   public void testModeAcceptAndNonPathFilter() {
     filter.setModeAccept(true);
     filter.setFilterFromPath(false);
@@ -172,7 +115,7 @@
       assertTrue(urlsModeAcceptAndNonPathFilter[i] == filter.filter(urls[i]));
     }
   }
-  
+
   public void testModeAcceptAndPathFilter() {
     filter.setModeAccept(true);
     filter.setFilterFromPath(true);
Index: src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java
===================================================================
--- src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java	(revision 1188268)
+++ src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java	(working copy)
@@ -51,14 +51,15 @@
  * Attribute "file" has higher precedence if defined. If the config file is
  * missing, all URLs will be rejected.
  * 
- * <p>This filter can be configured to work in one of two modes:
+ * <p>
+ * This filter can be configured to work in one of two modes:
  * <ul>
- * <li><b>default to reject</b> ('-'): in this mode, only URLs that match suffixes
- * specified in the config file will be accepted, all other URLs will be
- * rejected.</li>
- * <li><b>default to accept</b> ('+'): in this mode, only URLs that match suffixes
- * specified in the config file will be rejected, all other URLs will be
- * accepted.</li>
+ * <li><b>default to reject</b> ('-'): in this mode, only URLs that match
+ * suffixes specified in the config file will be accepted, all other URLs will
+ * be rejected.</li>
+ * <li><b>default to accept</b> ('+'): in this mode, only URLs that match
+ * suffixes specified in the config file will be rejected, all other URLs will
+ * be accepted.</li>
  * </ul>
  * <p>
  * The format of this config file is one URL suffix per line, with no preceding
@@ -67,10 +68,10 @@
  * </p>
  * <p>
  * A single '+' or '-' sign not followed by any suffix must be used once, to
- * signify the mode this plugin operates in. An optional single 'I' can be appended,
- * to signify that suffix matches should be case-insensitive. The default, if 
- * not specified, is to use case-sensitive matches, i.e. suffix '.JPG'
- * does not match '.jpg'.
+ * signify the mode this plugin operates in. An optional single 'I' can be
+ * appended, to signify that suffix matches should be case-insensitive. The
+ * default, if not specified, is to use case-sensitive matches, i.e. suffix
+ * '.JPG' does not match '.jpg'.
  * </p>
  * <p>
  * NOTE: the format of this file is different from urlfilter-prefix, because
@@ -82,8 +83,8 @@
  * <h4>Example 1</h4>
  * <p>
  * The configuration shown below will accept all URLs with '.html' or '.htm'
- * suffixes (case-sensitive - '.HTML' or '.HTM' will be rejected),
- * and prohibit all other suffixes.
+ * suffixes (case-sensitive - '.HTML' or '.HTM' will be rejected), and prohibit
+ * all other suffixes.
  * <p>
  * 
  * <pre>
@@ -91,7 +92,7 @@
  *  
  *  # prohibit all unknown, case-sensitive matching
  *  -
- *
+ * 
  *  # collect only HTML files.
  *  .html
  *  .htm
@@ -119,11 +120,13 @@
  * </pre>
  * 
  * </p>
+ * 
  * @author Andrzej Bialecki
  */
 public class SuffixURLFilter implements URLFilter {
 
-  private static final Logger LOG = LoggerFactory.getLogger(SuffixURLFilter.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(SuffixURLFilter.class);
 
   // read in attribute "file" of this plugin.
   private String attributeFile = null;
@@ -144,11 +147,13 @@
   }
 
   public String filter(String url) {
-    if (url == null) return null;
+    if (url == null)
+      return null;
     String _url;
     if (ignoreCase)
       _url = url.toLowerCase();
-    else _url = url;
+    else
+      _url = url;
     if (filterFromPath) {
       try {
         URL pUrl = new URL(_url);
@@ -160,11 +165,15 @@
 
     String a = suffixes.shortestMatch(_url);
     if (a == null) {
-      if (modeAccept) return url;
-      else return null;
+      if (modeAccept)
+        return url;
+      else
+        return null;
     } else {
-      if (modeAccept) return null;
-      else return url;
+      if (modeAccept)
+        return null;
+      else
+        return url;
     }
   }
 
@@ -187,30 +196,31 @@
     String line;
 
     while ((line = in.readLine()) != null) {
-      if (line.length() == 0) continue;
+      if (line.length() == 0)
+        continue;
 
       char first = line.charAt(0);
       switch (first) {
-        case ' ':
-        case '\n':
-        case '#': // skip blank & comment lines
-          break;
-        case '-':
-          allow = false;
-          if(line.contains("P"))
-            filterFromPath = true;
-          if(line.contains("I"))
-            ignore = true;
-          break;
-        case '+':
-          allow = true;
-          if(line.contains("P"))
-            filterFromPath = true;
-          if(line.contains("I"))
-            ignore = true;
-          break;
-        default:
-          aSuffixes.add(line);
+      case ' ':
+      case '\n':
+      case '#': // skip blank & comment lines
+        break;
+      case '-':
+        allow = false;
+        if (line.contains("P"))
+          filterFromPath = true;
+        if (line.contains("I"))
+          ignore = true;
+        break;
+      case '+':
+        allow = true;
+        if (line.contains("P"))
+          filterFromPath = true;
+        if (line.contains("I"))
+          ignore = true;
+        break;
+      default:
+        aSuffixes.add(line);
       }
     }
     if (ignore) {
@@ -249,7 +259,8 @@
     this.conf = conf;
 
     String pluginName = "urlfilter-suffix";
-    Extension[] extensions = PluginRepository.get(conf).getExtensionPoint(URLFilter.class.getName()).getExtensions();
+    Extension[] extensions = PluginRepository.get(conf)
+        .getExtensionPoint(URLFilter.class.getName()).getExtensions();
     for (int i = 0; i < extensions.length; i++) {
       Extension extension = extensions[i];
       if (extension.getDescriptor().getPluginId().equals(pluginName)) {
@@ -257,22 +268,25 @@
         break;
       }
     }
-    if (attributeFile != null && attributeFile.trim().equals("")) attributeFile = null;
+    if (attributeFile != null && attributeFile.trim().equals(""))
+      attributeFile = null;
     if (attributeFile != null) {
       if (LOG.isInfoEnabled()) {
-        LOG.info("Attribute \"file\" is defined for plugin " + pluginName + " as " + attributeFile);
+        LOG.info("Attribute \"file\" is defined for plugin " + pluginName
+            + " as " + attributeFile);
       }
     } else {
       // if (LOG.isWarnEnabled()) {
-      //   LOG.warn("Attribute \"file\" is not defined in plugin.xml for
-      //   plugin "+pluginName);
+      // LOG.warn("Attribute \"file\" is not defined in plugin.xml for
+      // plugin "+pluginName);
       // }
     }
 
     String file = conf.get("urlfilter.suffix.file");
     String stringRules = conf.get("urlfilter.suffix.rules");
     // attribute "file" takes precedence if defined
-    if (attributeFile != null) file = attributeFile;
+    if (attributeFile != null)
+      file = attributeFile;
     Reader reader = null;
     if (stringRules != null) { // takes precedence over files
       reader = new StringReader(stringRules);
@@ -283,7 +297,9 @@
     try {
       readConfiguration(reader);
     } catch (IOException e) {
-      if (LOG.isErrorEnabled()) { LOG.error(e.getMessage()); }
+      if (LOG.isErrorEnabled()) {
+        LOG.error(e.getMessage());
+      }
       throw new RuntimeException(e.getMessage(), e);
     }
   }
Index: src/plugin/parse-zip/src/test/org/apache/nutch/parse/zip/TestZipParser.java
===================================================================
--- src/plugin/parse-zip/src/test/org/apache/nutch/parse/zip/TestZipParser.java	(revision 1188268)
+++ src/plugin/parse-zip/src/test/org/apache/nutch/parse/zip/TestZipParser.java	(working copy)
@@ -34,30 +34,32 @@
 
 import junit.framework.TestCase;
 
-/** 
+/**
  * Based on Unit tests for MSWordParser by John Xing
- *
+ * 
  * @author Rohit Kulkarni & Ashish Vaidya
  */
 public class TestZipParser extends TestCase {
 
   private String fileSeparator = System.getProperty("file.separator");
   // This system property is defined in ./src/plugin/build-plugin.xml
-  private String sampleDir = System.getProperty("test.data",".");
-  
+  private String sampleDir = System.getProperty("test.data", ".");
+
   // Make sure sample files are copied to "test.data"
-  
-  private String[] sampleFiles = {"test.zip"};
 
+  private String[] sampleFiles = { "test.zip" };
+
   private String expectedText = "textfile.txt This is text file number 1 ";
 
-  public TestZipParser(String name) { 
-    super(name); 
+  public TestZipParser(String name) {
+    super(name);
   }
 
-  protected void setUp() {}
+  protected void setUp() {
+  }
 
-  protected void tearDown() {}
+  protected void tearDown() {
+  }
 
   public void testIt() throws ProtocolException, ParseException {
     String urlString;
@@ -70,8 +72,10 @@
       urlString = "file:" + sampleDir + fileSeparator + sampleFiles[i];
 
       protocol = new ProtocolFactory(conf).getProtocol(urlString);
-      content = protocol.getProtocolOutput(new Text(urlString), new CrawlDatum()).getContent();
-      parse = new ParseUtil(conf).parseByExtensionId("parse-zip",content).get(content.getUrl());
+      content = protocol.getProtocolOutput(new Text(urlString),
+          new CrawlDatum()).getContent();
+      parse = new ParseUtil(conf).parseByExtensionId("parse-zip", content).get(
+          content.getUrl());
       assertTrue(parse.getText().equals(expectedText));
     }
   }
Index: src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java
===================================================================
--- src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java	(revision 1188268)
+++ src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java	(working copy)
@@ -61,9 +61,12 @@
     Properties properties = null;
 
     try {
-      final String contentLen = content.getMetadata().get(Response.CONTENT_LENGTH);
+      final String contentLen = content.getMetadata().get(
+          Response.CONTENT_LENGTH);
       final int len = Integer.parseInt(contentLen);
-      if (LOG.isDebugEnabled()) { LOG.debug("ziplen: " + len); }
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("ziplen: " + len);
+      }
       final byte[] contentInBytes = content.getContent();
       final ByteArrayInputStream bainput = new ByteArrayInputStream(
           contentInBytes);
@@ -85,7 +88,8 @@
 
     } catch (Exception e) {
       return new ParseStatus(ParseStatus.FAILED,
-          "Can't be handled as Zip document. " + e).getEmptyParseResult(content.getUrl(), getConf());
+          "Can't be handled as Zip document. " + e).getEmptyParseResult(
+          content.getUrl(), getConf());
     }
 
     if (resultText == null) {
@@ -98,11 +102,13 @@
 
     outlinks = (Outlink[]) outLinksList.toArray(new Outlink[0]);
     final ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS,
-                                              resultTitle, outlinks,
-                                              content.getMetadata());
+        resultTitle, outlinks, content.getMetadata());
 
-    if (LOG.isTraceEnabled()) { LOG.trace("Zip file parsed sucessfully !!"); }
-    return ParseResult.createParseResult(content.getUrl(), new ParseImpl(resultText, parseData));
+    if (LOG.isTraceEnabled()) {
+      LOG.trace("Zip file parsed sucessfully !!");
+    }
+    return ParseResult.createParseResult(content.getUrl(), new ParseImpl(
+        resultText, parseData));
   }
 
   public void setConf(Configuration conf) {
Index: src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipTextExtractor.java
===================================================================
--- src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipTextExtractor.java	(revision 1188268)
+++ src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipTextExtractor.java	(working copy)
@@ -44,45 +44,44 @@
 import org.apache.nutch.protocol.Content;
 import org.apache.nutch.util.MimeUtil;
 
-
-
 /**
- *
+ * 
  * @author Rohit Kulkarni & Ashish Vaidya
  */
 public class ZipTextExtractor {
-  
+
   /** Get the MimeTypes resolver instance. */
   private MimeUtil MIME;
-  
-  public static final Logger LOG = LoggerFactory.getLogger(ZipTextExtractor.class);
 
+  public static final Logger LOG = LoggerFactory
+      .getLogger(ZipTextExtractor.class);
+
   private Configuration conf;
-  
-  
+
   /** Creates a new instance of ZipTextExtractor */
   public ZipTextExtractor(Configuration conf) {
     this.conf = conf;
     this.MIME = new MimeUtil(conf);
   }
-  
-  public String extractText(InputStream input, String url, List outLinksList) throws IOException {
+
+  public String extractText(InputStream input, String url, List outLinksList)
+      throws IOException {
     String resultText = "";
     byte temp;
-    
+
     ZipInputStream zin = new ZipInputStream(input);
-    
+
     ZipEntry entry;
-    
+
     while ((entry = zin.getNextEntry()) != null) {
-      
+
       if (!entry.isDirectory()) {
         int size = (int) entry.getSize();
         byte[] b = new byte[size];
-        for(int x = 0; x < size; x++) {
+        for (int x = 0; x < size; x++) {
           int err = zin.read();
-          if(err != -1) {
-            b[x] = (byte)err;
+          if (err != -1) {
+            b[x] = (byte) err;
           }
         }
         String newurl = url + "/";
@@ -96,29 +95,33 @@
           String contentType = MIME.getMimeType(fname).getName();
           try {
             Metadata metadata = new Metadata();
-            metadata.set(Response.CONTENT_LENGTH, Long.toString(entry.getSize()));
+            metadata.set(Response.CONTENT_LENGTH,
+                Long.toString(entry.getSize()));
             metadata.set(Response.CONTENT_TYPE, contentType);
-            Content content = new Content(newurl, base, b, contentType, metadata, this.conf);
-            Parse parse = new ParseUtil(this.conf).parse(content).get(content.getUrl());
+            Content content = new Content(newurl, base, b, contentType,
+                metadata, this.conf);
+            Parse parse = new ParseUtil(this.conf).parse(content).get(
+                content.getUrl());
             ParseData theParseData = parse.getData();
             Outlink[] theOutlinks = theParseData.getOutlinks();
-            
-            for(int count = 0; count < theOutlinks.length; count++) {
-              outLinksList.add(new Outlink(theOutlinks[count].getToUrl(), theOutlinks[count].getAnchor()));
+
+            for (int count = 0; count < theOutlinks.length; count++) {
+              outLinksList.add(new Outlink(theOutlinks[count].getToUrl(),
+                  theOutlinks[count].getAnchor()));
             }
-            
+
             resultText += entry.getName() + " " + parse.getText() + " ";
           } catch (ParseException e) {
-            if (LOG.isInfoEnabled()) { 
-              LOG.info("fetch okay, but can't parse " + fname + ", reason: " + e.getMessage());
+            if (LOG.isInfoEnabled()) {
+              LOG.info("fetch okay, but can't parse " + fname + ", reason: "
+                  + e.getMessage());
             }
           }
         }
       }
     }
-    
+
     return resultText;
   }
-  
+
 }
-
Index: src/plugin/protocol-sftp/src/java/org/apache/nutch/protocol/sftp/Sftp.java
===================================================================
--- src/plugin/protocol-sftp/src/java/org/apache/nutch/protocol/sftp/Sftp.java	(revision 1188268)
+++ src/plugin/protocol-sftp/src/java/org/apache/nutch/protocol/sftp/Sftp.java	(working copy)
@@ -159,12 +159,14 @@
       bytes = new byte[size];
       iStream.read(bytes);
     } catch (SftpException e) {
-      logger.error("SftpException in getFileProtocolOutput(), file: "
-          + url.getFile(), e);
+      logger
+          .error(
+              "SftpException in getFileProtocolOutput(), file: "
+                  + url.getFile(), e);
       throw e;
     } catch (IOException e) {
-      logger.error("IOException in getFileProtocolOutput(), file: "
-          + url.getFile(), e);
+      logger.error(
+          "IOException in getFileProtocolOutput(), file: " + url.getFile(), e);
       throw e;
     } finally {
       if (iStream != null) {
@@ -208,8 +210,8 @@
 
       Metadata metadata = new Metadata();
       metadata.set(Response.CONTENT_TYPE, "text/html");
-      metadata.set(Response.CONTENT_LENGTH, String.valueOf(directoryList
-          .length()));
+      metadata.set(Response.CONTENT_LENGTH,
+          String.valueOf(directoryList.length()));
       metadata.set(Response.LAST_MODIFIED, channelSftp.lstat(url.getFile())
           .getMtimeString());
       metadata.set(Response.LOCATION, url.toExternalForm());
@@ -239,7 +241,7 @@
     if (server == null) {
       return;
     }
-    
+
     if (channelSftpByHostMap.containsKey(server)) {
       return;
     }
Index: src/plugin/urlfilter-automaton/src/test/org/apache/nutch/urlfilter/automaton/TestAutomatonURLFilter.java
===================================================================
--- src/plugin/urlfilter-automaton/src/test/org/apache/nutch/urlfilter/automaton/TestAutomatonURLFilter.java	(revision 1188268)
+++ src/plugin/urlfilter-automaton/src/test/org/apache/nutch/urlfilter/automaton/TestAutomatonURLFilter.java	(working copy)
@@ -29,22 +29,21 @@
 // Nutch imports
 import org.apache.nutch.urlfilter.api.RegexURLFilterBaseTest;
 
-
 /**
  * JUnit based test of class <code>AutomatonURLFilter</code>.
- *
+ * 
  * @author J&eacute;r&ocirc;me Charron
  */
 public class TestAutomatonURLFilter extends RegexURLFilterBaseTest {
-  
+
   public TestAutomatonURLFilter(String testName) {
     super(testName);
   }
-  
+
   public static Test suite() {
     return new TestSuite(TestAutomatonURLFilter.class);
   }
-  
+
   public static void main(String[] args) {
     TestRunner.run(suite());
   }
@@ -57,7 +56,7 @@
       return null;
     }
   }
-  
+
   public void test() {
     test("WholeWebCrawling");
     test("IntranetCrawling");
Index: src/plugin/urlfilter-automaton/src/java/org/apache/nutch/urlfilter/automaton/AutomatonURLFilter.java
===================================================================
--- src/plugin/urlfilter-automaton/src/java/org/apache/nutch/urlfilter/automaton/AutomatonURLFilter.java	(revision 1188268)
+++ src/plugin/urlfilter-automaton/src/java/org/apache/nutch/urlfilter/automaton/AutomatonURLFilter.java	(working copy)
@@ -32,12 +32,11 @@
 import org.apache.nutch.urlfilter.api.RegexRule;
 import org.apache.nutch.urlfilter.api.RegexURLFilterBase;
 
-
 /**
- * RegexURLFilterBase implementation based on the
- * <a href="http://www.brics.dk/automaton/">dk.brics.automaton</a>
- * Finite-State Automata for Java<sup>TM</sup>.
- *
+ * RegexURLFilterBase implementation based on the <a
+ * href="http://www.brics.dk/automaton/">dk.brics.automaton</a> Finite-State
+ * Automata for Java<sup>TM</sup>.
+ * 
  * @author J&eacute;r&ocirc;me Charron
  * @see <a href="http://www.brics.dk/automaton/">dk.brics.automaton</a>
  */
@@ -49,24 +48,24 @@
     super();
   }
 
-  public AutomatonURLFilter(String filename)
-    throws IOException, PatternSyntaxException {
+  public AutomatonURLFilter(String filename) throws IOException,
+      PatternSyntaxException {
     super(filename);
   }
 
-  AutomatonURLFilter(Reader reader)
-    throws IOException, IllegalArgumentException {
+  AutomatonURLFilter(Reader reader) throws IOException,
+      IllegalArgumentException {
     super(reader);
   }
 
-  
-  /* ----------------------------------- *
-   * <implementation:RegexURLFilterBase> *
-   * ----------------------------------- */
-  
+  /*
+   * ----------------------------------- * <implementation:RegexURLFilterBase> *
+   * -----------------------------------
+   */
+
   /**
-   * Rules specified as a config property will override rules specified
-   * as a config file.
+   * Rules specified as a config property will override rules specified as a
+   * config file.
    */
   protected Reader getRulesReader(Configuration conf) throws IOException {
     String stringRules = conf.get(URLFILTER_AUTOMATON_RULES);
@@ -81,21 +80,20 @@
   protected RegexRule createRule(boolean sign, String regex) {
     return new Rule(sign, regex);
   }
-  
-  /* ------------------------------------ *
-   * </implementation:RegexURLFilterBase> *
-   * ------------------------------------ */
 
-  
+  /*
+   * ------------------------------------ * </implementation:RegexURLFilterBase>
+   * * ------------------------------------
+   */
+
   public static void main(String args[]) throws IOException {
     main(new AutomatonURLFilter(), args);
   }
 
-
   private class Rule extends RegexRule {
-    
+
     private RunAutomaton automaton;
-    
+
     Rule(boolean sign, String regex) {
       super(sign, regex);
       automaton = new RunAutomaton(new RegExp(regex, RegExp.ALL).toAutomaton());
@@ -105,5 +103,5 @@
       return automaton.run(url);
     }
   }
-  
+
 }
Index: src/plugin/urlnormalizer-regex/src/test/org/apache/nutch/net/urlnormalizer/regex/TestRegexURLNormalizer.java
===================================================================
--- src/plugin/urlnormalizer-regex/src/test/org/apache/nutch/net/urlnormalizer/regex/TestRegexURLNormalizer.java	(revision 1188268)
+++ src/plugin/urlnormalizer-regex/src/test/org/apache/nutch/net/urlnormalizer/regex/TestRegexURLNormalizer.java	(working copy)
@@ -40,17 +40,19 @@
 
 /** Unit tests for RegexUrlNormalizer. */
 public class TestRegexURLNormalizer extends TestCase {
-  private static final Logger LOG = LoggerFactory.getLogger(TestRegexURLNormalizer.class);
-  
+  private static final Logger LOG = LoggerFactory
+      .getLogger(TestRegexURLNormalizer.class);
+
   private RegexURLNormalizer normalizer;
   private Configuration conf;
   private HashMap testData = new HashMap();
-  
+
   // This system property is defined in ./src/plugin/build-plugin.xml
   private String sampleDir = System.getProperty("test.data", ".");
+
   // Make sure sample files are copied to "test.data" as specified in
   // ./src/plugin/urlnormalizer-regex/build.xml during plugin compilation.
-  
+
   public TestRegexURLNormalizer(String name) throws IOException {
     super(name);
     normalizer = new RegexURLNormalizer();
@@ -58,7 +60,8 @@
     normalizer.setConf(conf);
     File[] configs = new File(sampleDir).listFiles(new FileFilter() {
       public boolean accept(File f) {
-        if (f.getName().endsWith(".xml") && f.getName().startsWith("regex-normalize-"))
+        if (f.getName().endsWith(".xml")
+            && f.getName().startsWith("regex-normalize-"))
           return true;
         return false;
       }
@@ -78,41 +81,44 @@
   }
 
   public void testNormalizerDefault() throws Exception {
-    normalizeTest((NormalizedURL[])testData.get(URLNormalizers.SCOPE_DEFAULT),
-            URLNormalizers.SCOPE_DEFAULT);
+    normalizeTest((NormalizedURL[]) testData.get(URLNormalizers.SCOPE_DEFAULT),
+        URLNormalizers.SCOPE_DEFAULT);
   }
 
   public void testNormalizerScope() throws Exception {
     Iterator it = testData.keySet().iterator();
     while (it.hasNext()) {
-      String scope = (String)it.next();
-      normalizeTest((NormalizedURL[])testData.get(scope), scope);
+      String scope = (String) it.next();
+      normalizeTest((NormalizedURL[]) testData.get(scope), scope);
     }
   }
 
-  private void normalizeTest(NormalizedURL[] urls, String scope) throws Exception {
+  private void normalizeTest(NormalizedURL[] urls, String scope)
+      throws Exception {
     for (int i = 0; i < urls.length; i++) {
       String url = urls[i].url;
       String normalized = normalizer.normalize(urls[i].url, scope);
       String expected = urls[i].expectedURL;
-      LOG.info("scope: " + scope + " url: " + url + " | normalized: " + normalized + " | expected: " + expected);
+      LOG.info("scope: " + scope + " url: " + url + " | normalized: "
+          + normalized + " | expected: " + expected);
       assertEquals(urls[i].expectedURL, normalized);
     }
   }
-	
+
   private void bench(int loops, String scope) {
     long start = System.currentTimeMillis();
     try {
-      NormalizedURL[] expected = (NormalizedURL[])testData.get(scope);
-      if (expected == null) return;
+      NormalizedURL[] expected = (NormalizedURL[]) testData.get(scope);
+      if (expected == null)
+        return;
       for (int i = 0; i < loops; i++) {
         normalizeTest(expected, scope);
       }
     } catch (Exception e) {
       fail(e.toString());
     }
-    LOG.info("bench time (" + loops + ") " +
-             (System.currentTimeMillis() - start) + "ms");
+    LOG.info("bench time (" + loops + ") "
+        + (System.currentTimeMillis() - start) + "ms");
   }
 
   private static class NormalizedURL {
@@ -128,17 +134,18 @@
 
   private NormalizedURL[] readTestFile(String scope) throws IOException {
     File f = new File(sampleDir, "regex-normalize-" + scope + ".test");
-    BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(f), "UTF-8"));
+    BufferedReader in = new BufferedReader(new InputStreamReader(
+        new FileInputStream(f), "UTF-8"));
     List list = new ArrayList();
     String line;
-    while((line = in.readLine()) != null) {
-      if (  line.trim().length() == 0 ||
-            line.startsWith("#") ||
-            line.startsWith(" ")) continue;
+    while ((line = in.readLine()) != null) {
+      if (line.trim().length() == 0 || line.startsWith("#")
+          || line.startsWith(" "))
+        continue;
       list.add(new NormalizedURL(line));
     }
     return (NormalizedURL[]) list.toArray(new NormalizedURL[list.size()]);
-  }  
+  }
 
   public static void main(String[] args) throws Exception {
     if (args.length == 0) {
@@ -152,7 +159,8 @@
       if (args[i].equals("-bench")) {
         bench = true;
         iter = Integer.parseInt(args[++i]);
-      } else scope = args[i];
+      } else
+        scope = args[i];
     }
     if (scope == null) {
       System.err.println("Missing required scope name.");
@@ -163,11 +171,12 @@
       System.exit(-1);
     }
     TestRegexURLNormalizer test = new TestRegexURLNormalizer("test");
-    NormalizedURL[] urls = (NormalizedURL[])test.testData.get(scope);
+    NormalizedURL[] urls = (NormalizedURL[]) test.testData.get(scope);
     if (urls == null) {
-      LOG.warn("Missing test data for scope '" + scope + "', using default scope.");
+      LOG.warn("Missing test data for scope '" + scope
+          + "', using default scope.");
       scope = URLNormalizers.SCOPE_DEFAULT;
-      urls = (NormalizedURL[])test.testData.get(scope);
+      urls = (NormalizedURL[]) test.testData.get(scope);
     }
     if (bench) {
       test.bench(iter, scope);
@@ -176,6 +185,4 @@
     }
   }
 
-
-
 }
Index: src/plugin/urlnormalizer-regex/src/java/org/apache/nutch/net/urlnormalizer/regex/RegexURLNormalizer.java
===================================================================
--- src/plugin/urlnormalizer-regex/src/java/org/apache/nutch/net/urlnormalizer/regex/RegexURLNormalizer.java	(revision 1188268)
+++ src/plugin/urlnormalizer-regex/src/java/org/apache/nutch/net/urlnormalizer/regex/RegexURLNormalizer.java	(working copy)
@@ -51,19 +51,23 @@
  * Allows users to do regex substitutions on all/any URLs that are encountered,
  * which is useful for stripping session IDs from URLs.
  * 
- * <p>This class uses the <tt>urlnormalizer.regex.file</tt> property.
- * It should be set to the file name of an xml file which should contain the
- * patterns and substitutions to be done on encountered URLs.
+ * <p>
+ * This class uses the <tt>urlnormalizer.regex.file</tt> property. It should be
+ * set to the file name of an xml file which should contain the patterns and
+ * substitutions to be done on encountered URLs.
  * </p>
- * <p>This class also supports different rules depending on the scope. Please see
- * the javadoc in {@link org.apache.nutch.net.URLNormalizers} for more details.</p>
+ * <p>
+ * This class also supports different rules depending on the scope. Please see
+ * the javadoc in {@link org.apache.nutch.net.URLNormalizers} for more details.
+ * </p>
  * 
  * @author Luke Baker
  * @author Andrzej Bialecki
  */
 public class RegexURLNormalizer extends Configured implements URLNormalizer {
 
-  private static final Logger LOG = LoggerFactory.getLogger(RegexURLNormalizer.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(RegexURLNormalizer.class);
 
   /**
    * Class which holds a compiled pattern and its corresponding substition
@@ -76,7 +80,7 @@
   }
 
   private HashMap scopedRules;
-  
+
   private static final List EMPTY_RULES = Collections.EMPTY_LIST;
 
   /**
@@ -96,7 +100,7 @@
    * configuration files for it.
    */
   public RegexURLNormalizer(Configuration conf, String filename)
-          throws IOException, PatternSyntaxException {
+      throws IOException, PatternSyntaxException {
     super(conf);
     List rules = readConfigurationFile(filename);
     if (rules != null)
@@ -105,7 +109,8 @@
 
   public void setConf(Configuration conf) {
     super.setConf(conf);
-    if (conf == null) return;
+    if (conf == null)
+      return;
     // the default constructor was called
     if (this.scopedRules == null) {
       String filename = getConf().get("urlnormalizer.regex.file");
@@ -137,15 +142,16 @@
   void setConfiguration(Reader reader, String scope) {
     List rules = readConfiguration(reader);
     scopedRules.put(scope, rules);
-    LOG.debug("Set config for scope '" + scope + "': " + rules.size() + " rules.");
+    LOG.debug("Set config for scope '" + scope + "': " + rules.size()
+        + " rules.");
   }
-  
+
   /**
    * This function does the replacements by iterating through all the regex
    * patterns. It accepts a string url as input and returns the altered string.
    */
   public synchronized String regexNormalize(String urlString, String scope) {
-    List curRules = (List)scopedRules.get(scope);
+    List curRules = (List) scopedRules.get(scope);
     if (curRules == null) {
       // try to populate
       String configFile = getConf().get("urlnormalizer.regex.file." + scope);
@@ -170,7 +176,7 @@
     }
     if (curRules == EMPTY_RULES || curRules == null) {
       // use global rules
-      curRules = (List)scopedRules.get(URLNormalizers.SCOPE_DEFAULT);
+      curRules = (List) scopedRules.get(URLNormalizers.SCOPE_DEFAULT);
     }
     Iterator i = curRules.iterator();
     while (i.hasNext()) {
@@ -184,7 +190,7 @@
   }
 
   public synchronized String normalize(String urlString, String scope)
-          throws MalformedURLException {
+      throws MalformedURLException {
     return regexNormalize(urlString, scope);
   }
 
@@ -201,17 +207,17 @@
       return EMPTY_RULES;
     }
   }
-  
+
   private List readConfiguration(Reader reader) {
     List rules = new ArrayList();
     try {
 
       // borrowed heavily from code in Configuration.java
       Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder()
-              .parse(new InputSource(reader));
+          .parse(new InputSource(reader));
       Element root = doc.getDocumentElement();
       if ((!"regex-normalize".equals(root.getTagName()))
-              && (LOG.isErrorEnabled())) {
+          && (LOG.isErrorEnabled())) {
         LOG.error("bad conf file: top-level element not <regex-normalize>");
       }
       NodeList regexes = root.getChildNodes();
@@ -234,7 +240,7 @@
           if ("pattern".equals(field.getTagName()) && field.hasChildNodes())
             patternValue = ((Text) field.getFirstChild()).getData();
           if ("substitution".equals(field.getTagName())
-                  && field.hasChildNodes())
+              && field.hasChildNodes())
             subValue = ((Text) field.getFirstChild()).getData();
           if (!field.hasChildNodes())
             subValue = "";
@@ -252,16 +258,18 @@
       }
       return EMPTY_RULES;
     }
-    if (rules.size() == 0) return EMPTY_RULES;
+    if (rules.size() == 0)
+      return EMPTY_RULES;
     return rules;
   }
 
   /** Spits out patterns and substitutions that are in the configuration file. */
   public static void main(String args[]) throws PatternSyntaxException,
-          IOException {
+      IOException {
     RegexURLNormalizer normalizer = new RegexURLNormalizer();
     normalizer.setConf(NutchConfiguration.create());
-    Iterator i = ((List)normalizer.scopedRules.get(URLNormalizers.SCOPE_DEFAULT)).iterator();
+    Iterator i = ((List) normalizer.scopedRules
+        .get(URLNormalizers.SCOPE_DEFAULT)).iterator();
     System.out.println("* Rules for 'DEFAULT' scope:");
     while (i.hasNext()) {
       Rule r = (Rule) i.next();
@@ -275,10 +283,11 @@
     if (normalizer.scopedRules.size() > 1) {
       Iterator it = normalizer.scopedRules.keySet().iterator();
       while (it.hasNext()) {
-        String scope = (String)it.next();
-        if (URLNormalizers.SCOPE_DEFAULT.equals(scope)) continue;
+        String scope = (String) it.next();
+        if (URLNormalizers.SCOPE_DEFAULT.equals(scope))
+          continue;
         System.out.println("* Rules for '" + scope + "' scope:");
-        i = ((List)normalizer.scopedRules.get(scope)).iterator();
+        i = ((List) normalizer.scopedRules.get(scope)).iterator();
         while (i.hasNext()) {
           Rule r = (Rule) i.next();
           System.out.print("  " + r.pattern.pattern() + " -> ");
@@ -289,10 +298,12 @@
     if (args.length > 0) {
       System.out.println("\n---------- Normalizer test -----------");
       String scope = URLNormalizers.SCOPE_DEFAULT;
-      if (args.length > 1) scope = args[1];
+      if (args.length > 1)
+        scope = args[1];
       System.out.println("Scope: " + scope);
       System.out.println("Input url:  '" + args[0] + "'");
-      System.out.println("Output url: '" + normalizer.normalize(args[0], scope) + "'");
+      System.out.println("Output url: '" + normalizer.normalize(args[0], scope)
+          + "'");
     }
     System.exit(0);
   }
Index: src/plugin/protocol-file/src/test/org/apache/nutch/protocol/file/TestProtocolFile.java
===================================================================
--- src/plugin/protocol-file/src/test/org/apache/nutch/protocol/file/TestProtocolFile.java	(revision 1188268)
+++ src/plugin/protocol-file/src/test/org/apache/nutch/protocol/file/TestProtocolFile.java	(working copy)
@@ -36,9 +36,10 @@
  * @author mattmann
  * @version $Revision$
  * 
- * <p>
- * Unit tests for the {@link File}Protocol.
- * </p>.
+ *          <p>
+ *          Unit tests for the {@link File}Protocol.
+ *          </p>
+ *          .
  */
 public class TestProtocolFile extends TestCase {
 
@@ -46,12 +47,13 @@
   private String sampleDir = System.getProperty("test.data", ".");
 
   private static final String[] testTextFiles = new String[] {
-      "testprotocolfile.txt", "testprotocolfile_(encoded).txt", "testprotocolfile_%28encoded%29.txt" };
+      "testprotocolfile.txt", "testprotocolfile_(encoded).txt",
+      "testprotocolfile_%28encoded%29.txt" };
 
   private static final String expectedMimeType = "text/plain";
-  
+
   private Configuration conf;
-  
+
   protected void setUp() {
     conf = NutchConfiguration.create();
   }
@@ -61,12 +63,12 @@
       setContentType(testTextFile);
     }
   }
-  
+
   /**
-   * Tests the setting of the <code>Response.CONTENT_TYPE</code> metadata
-   * field.
-   * @throws ProtocolNotFound 
+   * Tests the setting of the <code>Response.CONTENT_TYPE</code> metadata field.
    * 
+   * @throws ProtocolNotFound
+   * 
    * @since NUTCH-384
    * 
    */
@@ -75,19 +77,19 @@
     assertNotNull(urlString);
     WebPage datum = new WebPage();
     Protocol protocol = new ProtocolFactory(conf).getProtocol(urlString);
-    ProtocolOutput output = protocol.getProtocolOutput(urlString,datum);
+    ProtocolOutput output = protocol.getProtocolOutput(urlString, datum);
     assertNotNull(output);
 
     assertEquals("Status code: [" + output.getStatus().getCode()
         + "], not equal to: [" + ProtocolStatusCodes.SUCCESS + "]: args: ["
-        + output.getStatus().getArgs() + "]", ProtocolStatusCodes.SUCCESS, output
-        .getStatus().getCode());
+        + output.getStatus().getArgs() + "]", ProtocolStatusCodes.SUCCESS,
+        output.getStatus().getCode());
     assertNotNull(output.getContent());
     assertNotNull(output.getContent().getContentType());
     assertEquals(expectedMimeType, output.getContent().getContentType());
     assertNotNull(output.getContent().getMetadata());
-    assertEquals(expectedMimeType, output.getContent().getMetadata().get(
-        Response.CONTENT_TYPE));
+    assertEquals(expectedMimeType,
+        output.getContent().getMetadata().get(Response.CONTENT_TYPE));
 
   }
 
Index: src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java
===================================================================
--- src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java	(revision 1188268)
+++ src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java	(working copy)
@@ -59,7 +59,7 @@
   static final int MAX_REDIRECTS = 5;
 
   int maxContentLength;
-  
+
   boolean crawlParents;
 
   private Configuration conf;
Index: src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileError.java
===================================================================
--- src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileError.java	(revision 1188268)
+++ src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileError.java	(working copy)
@@ -17,14 +17,17 @@
 
 package org.apache.nutch.protocol.file;
 
-/** Thrown for File error codes.
+/**
+ * Thrown for File error codes.
  */
 public class FileError extends FileException {
 
   private int code;
-  
-  public int getCode(int code) { return code; }
 
+  public int getCode(int code) {
+    return code;
+  }
+
   public FileError(int code) {
     super("File Error: " + code);
     this.code = code;
Index: src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java
===================================================================
--- src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java	(revision 1188268)
+++ src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java	(working copy)
@@ -31,31 +31,27 @@
 import org.apache.nutch.util.MimeUtil;
 import org.apache.tika.mime.MimeType;
 
-
 /************************************
- * FileResponse.java mimics file replies as http response.
- * It tries its best to follow http's way for headers, response codes
- * as well as exceptions.
- *
- * Comments:
- * (1) java.net.URL and java.net.URLConnection can handle file: scheme.
- * However they are not flexible enough, so not used in this implementation.
- *
- * (2) java.io.File is used for its abstractness across platforms.
- * Warning:
- * java.io.File API (1.4.2) does not elaborate on how special files,
- * such as /dev/* in unix and /proc/* on linux, are treated. Tests show
- *  (a) java.io.File.isFile() return false for /dev/*
- *  (b) java.io.File.isFile() return true for /proc/*
- *  (c) java.io.File.length() return 0 for /proc/*
- * We are probably oaky for now. Could be buggy here.
- * How about special files on windows?
- *
- * (3) java.io.File API (1.4.2) does not seem to know unix hard link files.
- * They are just treated as individual files.
- *
+ * FileResponse.java mimics file replies as http response. It tries its best to
+ * follow http's way for headers, response codes as well as exceptions.
+ * 
+ * Comments: (1) java.net.URL and java.net.URLConnection can handle file:
+ * scheme. However they are not flexible enough, so not used in this
+ * implementation.
+ * 
+ * (2) java.io.File is used for its abstractness across platforms. Warning:
+ * java.io.File API (1.4.2) does not elaborate on how special files, such as
+ * /dev/* in unix and /proc/* on linux, are treated. Tests show (a)
+ * java.io.File.isFile() return false for /dev/* (b) java.io.File.isFile()
+ * return true for /proc/* (c) java.io.File.length() return 0 for /proc/* We are
+ * probably oaky for now. Could be buggy here. How about special files on
+ * windows?
+ * 
+ * (3) java.io.File API (1.4.2) does not seem to know unix hard link files. They
+ * are just treated as individual files.
+ * 
  * (4) No funcy POSIX file attributes yet. May never need?
- *
+ * 
  * @author John Xing
  ***********************************/
 public class FileResponse {
@@ -69,33 +65,36 @@
 
   private final File file;
   private Configuration conf;
-  
+
   private MimeUtil MIME;
 
   /** Returns the response code. */
-  public int getCode() { return code; }
+  public int getCode() {
+    return code;
+  }
 
   /** Returns the value of a named header. */
   public String getHeader(String name) {
     return headers.get(name);
   }
 
-  public byte[] getContent() { return content; }
+  public byte[] getContent() {
+    return content;
+  }
 
   public Content toContent() {
     return new Content(orig, base, (content != null ? content : EMPTY_CONTENT),
-                       getHeader(Response.CONTENT_TYPE),
-                       headers, this.conf);
+        getHeader(Response.CONTENT_TYPE), headers, this.conf);
   }
-  
+
   public FileResponse(URL url, WebPage page, File file, Configuration conf)
-    throws FileException, IOException {
+      throws FileException, IOException {
 
     this.orig = url.toString();
     this.base = url.toString();
     this.file = file;
     this.conf = conf;
-    
+
     MIME = new MimeUtil(conf);
 
     if (!"file".equals(url.getProtocol()))
@@ -118,22 +117,22 @@
       path = java.net.URLDecoder.decode(path, "UTF-8");
     } catch (UnsupportedEncodingException ex) {
     }
-    
+
     try {
 
       this.content = null;
 
       // url.toURI() is only in j2se 1.5.0
-      //java.io.File f = new java.io.File(url.toURI());
+      // java.io.File f = new java.io.File(url.toURI());
       java.io.File f = new java.io.File(path);
 
       if (!f.exists()) {
-        this.code = 404;  // http Not Found
+        this.code = 404; // http Not Found
         return;
       }
 
       if (!f.canRead()) {
-        this.code = 401;  // http Unauthorized
+        this.code = 401; // http Unauthorized
         return;
       }
 
@@ -142,15 +141,16 @@
       // where case is insensitive
       if (!f.equals(f.getCanonicalFile())) {
         // set headers
-        //hdrs.put("Location", f.getCanonicalFile().toURI());
+        // hdrs.put("Location", f.getCanonicalFile().toURI());
         headers.set(Response.LOCATION, f.getCanonicalFile().toURL().toString());
 
-        this.code = 300;  // http redirect
+        this.code = 300; // http redirect
         return;
       }
       if (f.lastModified() <= page.getModifiedTime()) {
         this.code = 304;
-        this.headers.set("Last-Modified", HttpDateFormat.toString(f.lastModified()));
+        this.headers.set("Last-Modified",
+            HttpDateFormat.toString(f.lastModified()));
         return;
       }
 
@@ -170,45 +170,46 @@
   }
 
   // get file as http response
-  private void getFileAsHttpResponse(java.io.File f)
-    throws FileException, IOException {
+  private void getFileAsHttpResponse(java.io.File f) throws FileException,
+      IOException {
 
     // ignore file of size larger than
     // Integer.MAX_VALUE = 2^31-1 = 2147483647
     long size = f.length();
     if (size > Integer.MAX_VALUE) {
-      throw new FileException("file is too large, size: "+size);
+      throw new FileException("file is too large, size: " + size);
       // or we can do this?
-      // this.code = 400;  // http Bad request
+      // this.code = 400; // http Bad request
       // return;
     }
 
     // capture content
     int len = (int) size;
-    
+
     if (this.file.maxContentLength >= 0 && len > this.file.maxContentLength)
       len = this.file.maxContentLength;
 
     this.content = new byte[len];
 
     java.io.InputStream is = new java.io.FileInputStream(f);
-    int offset = 0; int n = 0;
+    int offset = 0;
+    int n = 0;
     while (offset < len
-      && (n = is.read(this.content, offset, len-offset)) >= 0) {
+        && (n = is.read(this.content, offset, len - offset)) >= 0) {
       offset += n;
     }
     if (offset < len) { // keep whatever already have, but issue a warning
       if (File.LOG.isWarnEnabled()) {
-        File.LOG.warn("not enough bytes read from file: "+f.getPath());
+        File.LOG.warn("not enough bytes read from file: " + f.getPath());
       }
     }
-    is.close(); 
+    is.close();
 
     // set headers
     headers.set(Response.CONTENT_LENGTH, new Long(size).toString());
-    headers.set(Response.LAST_MODIFIED, HttpDateFormat.toString(f
-        .lastModified()));
-    
+    headers.set(Response.LAST_MODIFIED,
+        HttpDateFormat.toString(f.lastModified()));
+
     MimeType mimeType = MIME.getMimeType(f);
     String mimeTypeString = mimeType != null ? mimeType.getName() : "";
     headers.set(Response.CONTENT_TYPE, mimeTypeString);
@@ -218,33 +219,33 @@
   }
 
   // get dir list as http response
-  private void getDirAsHttpResponse(java.io.File f)
-    throws IOException {
+  private void getDirAsHttpResponse(java.io.File f) throws IOException {
 
     String path = f.toString();
     if (this.file.crawlParents)
-        this.content = list2html(f.listFiles(), path, "/".equals(path) ? false : true);
+      this.content = list2html(f.listFiles(), path, "/".equals(path) ? false
+          : true);
     else
-        this.content = list2html(f.listFiles(), path, false);
+      this.content = list2html(f.listFiles(), path, false);
 
     // set headers
     headers.set(Response.CONTENT_LENGTH,
-      new Integer(this.content.length).toString());
+        new Integer(this.content.length).toString());
     headers.set(Response.CONTENT_TYPE, "text/html");
     headers.set(Response.LAST_MODIFIED,
-      HttpDateFormat.toString(f.lastModified()));
+        HttpDateFormat.toString(f.lastModified()));
 
     // response code
     this.code = 200; // http OK
   }
 
   // generate html page from dir list
-  private byte[] list2html(java.io.File[] list,
-    String path, boolean includeDotDot) {
+  private byte[] list2html(java.io.File[] list, String path,
+      boolean includeDotDot) {
 
     StringBuffer x = new StringBuffer("<html><head>");
-    x.append("<title>Index of "+path+"</title></head>\n");
-    x.append("<body><h1>Index of "+path+"</h1><pre>\n");
+    x.append("<title>Index of " + path + "</title></head>\n");
+    x.append("<body><h1>Index of " + path + "</h1><pre>\n");
 
     if (includeDotDot) {
       x.append("<a href='../'>../</a>\t-\t-\t-\n");
@@ -253,20 +254,20 @@
     // fix me: we might want to sort list here! but not now.
 
     java.io.File f;
-    for (int i=0; i<list.length; i++) {
+    for (int i = 0; i < list.length; i++) {
       f = list[i];
       String name = f.getName();
       String time = HttpDateFormat.toString(f.lastModified());
       if (f.isDirectory()) {
         // java 1.4.2 api says dir itself and parent dir are not listed
         // so the following is not needed.
-        //if (name.equals(".") || name.equals(".."))
-        //  continue;
-        x.append("<a href='"+name+"/"+"'>"+name+"/</a>\t");
-        x.append(time+"\t-\n");
+        // if (name.equals(".") || name.equals(".."))
+        // continue;
+        x.append("<a href='" + name + "/" + "'>" + name + "/</a>\t");
+        x.append(time + "\t-\n");
       } else if (f.isFile()) {
-        x.append("<a href='"+name+    "'>"+name+"</a>\t");
-        x.append(time+"\t"+f.length()+"\n");
+        x.append("<a href='" + name + "'>" + name + "</a>\t");
+        x.append(time + "\t" + f.length() + "\n");
       } else {
         // ignore any other
       }
Index: src/plugin/urlfilter-regex/src/test/org/apache/nutch/urlfilter/regex/TestRegexURLFilter.java
===================================================================
--- src/plugin/urlfilter-regex/src/test/org/apache/nutch/urlfilter/regex/TestRegexURLFilter.java	(revision 1188268)
+++ src/plugin/urlfilter-regex/src/test/org/apache/nutch/urlfilter/regex/TestRegexURLFilter.java	(working copy)
@@ -29,22 +29,21 @@
 // Nutch imports
 import org.apache.nutch.urlfilter.api.RegexURLFilterBaseTest;
 
-
 /**
  * JUnit based test of class <code>RegexURLFilter</code>.
- *
+ * 
  * @author J&eacute;r&ocirc;me Charron
  */
 public class TestRegexURLFilter extends RegexURLFilterBaseTest {
-  
+
   public TestRegexURLFilter(String testName) {
     super(testName);
   }
-  
+
   public static Test suite() {
     return new TestSuite(TestRegexURLFilter.class);
   }
-  
+
   public static void main(String[] args) {
     TestRunner.run(suite());
   }
@@ -57,7 +56,7 @@
       return null;
     }
   }
-  
+
   public void test() {
     test("WholeWebCrawling");
     test("IntranetCrawling");
Index: src/plugin/urlfilter-regex/src/java/org/apache/nutch/urlfilter/regex/RegexURLFilter.java
===================================================================
--- src/plugin/urlfilter-regex/src/java/org/apache/nutch/urlfilter/regex/RegexURLFilter.java	(revision 1188268)
+++ src/plugin/urlfilter-regex/src/java/org/apache/nutch/urlfilter/regex/RegexURLFilter.java	(working copy)
@@ -30,13 +30,12 @@
 import org.apache.nutch.urlfilter.api.RegexURLFilterBase;
 import org.apache.nutch.util.NutchConfiguration;
 
-
 /**
  * Filters URLs based on a file of regular expressions using the
  * {@link java.util.regex Java Regex implementation}.
  */
 public class RegexURLFilter extends RegexURLFilterBase {
-  
+
   public static final String URLFILTER_REGEX_FILE = "urlfilter.regex.file";
   public static final String URLFILTER_REGEX_RULES = "urlfilter.regex.rules";
 
@@ -44,24 +43,23 @@
     super();
   }
 
-  public RegexURLFilter(String filename)
-    throws IOException, PatternSyntaxException {
+  public RegexURLFilter(String filename) throws IOException,
+      PatternSyntaxException {
     super(filename);
   }
 
-  RegexURLFilter(Reader reader)
-    throws IOException, IllegalArgumentException {
+  RegexURLFilter(Reader reader) throws IOException, IllegalArgumentException {
     super(reader);
   }
 
-  
-  /* ----------------------------------- *
-   * <implementation:RegexURLFilterBase> *
-   * ----------------------------------- */
-  
+  /*
+   * ----------------------------------- * <implementation:RegexURLFilterBase> *
+   * -----------------------------------
+   */
+
   /**
-   * Rules specified as a config property will override rules specified
-   * as a config file.
+   * Rules specified as a config property will override rules specified as a
+   * config file.
    */
   protected Reader getRulesReader(Configuration conf) throws IOException {
     String stringRules = conf.get(URLFILTER_REGEX_RULES);
@@ -76,23 +74,22 @@
   protected RegexRule createRule(boolean sign, String regex) {
     return new Rule(sign, regex);
   }
-  
-  /* ------------------------------------ *
-   * </implementation:RegexURLFilterBase> *
-   * ------------------------------------ */
 
-  
+  /*
+   * ------------------------------------ * </implementation:RegexURLFilterBase>
+   * * ------------------------------------
+   */
+
   public static void main(String args[]) throws IOException {
     RegexURLFilter filter = new RegexURLFilter();
     filter.setConf(NutchConfiguration.create());
     main(filter, args);
   }
 
-
   private class Rule extends RegexRule {
-    
+
     private Pattern pattern;
-    
+
     Rule(boolean sign, String regex) {
       super(sign, regex);
       pattern = Pattern.compile(regex);
@@ -102,5 +99,5 @@
       return pattern.matcher(url).find();
     }
   }
-  
+
 }
Index: src/plugin/lib-http/src/test/org/apache/nutch/protocol/http/api/TestRobotRulesParser.java
===================================================================
--- src/plugin/lib-http/src/test/org/apache/nutch/protocol/http/api/TestRobotRulesParser.java	(revision 1188268)
+++ src/plugin/lib-http/src/test/org/apache/nutch/protocol/http/api/TestRobotRulesParser.java	(working copy)
@@ -22,287 +22,231 @@
 import junit.framework.TestCase;
 
 public class TestRobotRulesParser extends TestCase {
-  private static final String LF= "\n";
-  private static final String CR= "\r";
-  private static final String CRLF= "\r\n";
-  
-  private static final boolean[] ACCEPT_ALL = {
-    true,   // "/a",	      
-    true,   // "/a/",	      
-    true,   // "/a/bloh/foo.html"
-    true,   // "/b",	      
-    true,   // "/b/a",	      
-    true,   // "/b/a/index.html",
-    true,   // "/b/b/foo.html",  
-    true,   // "/c",	      
-    true,   // "/c/a",	      
-    true,   // "/c/a/index.html",
-    true,   // "/c/b/foo.html",  
-    true,   // "/d",	      
-    true,   // "/d/a",	      
-    true,   // "/e/a/index.html",
-    true,   // "/e/d",	      
-    true,   // "/e/d/foo.html",  
-    true,   // "/e/doh.html",    
-    true,   // "/f/index.html",  
-    true,   // "/foo/bar.html",  
-    true,   // "/f/",
-  };
-  
-  private static final String[] ROBOTS_STRINGS= new String[] {
-    "User-Agent: Agent1 #foo" + CR 
-    + "Disallow: /a" + CR 
-    + "Disallow: /b/a" + CR 
-    + "#Disallow: /c" + CR 
-    + "" + CR 
-    + "" + CR 
-    + "User-Agent: Agent2 Agent3#foo" + CR 
-    + "User-Agent: Agent4" + CR 
-    + "Disallow: /d" + CR 
-    + "Disallow: /e/d/" + CR
-    + "" + CR 
-    + "User-Agent: *" + CR 
-    + "Disallow: /foo/bar/" + CR,
-    null  // Used to test EMPTY_RULES
-  };
+  private static final String LF = "\n";
+  private static final String CR = "\r";
+  private static final String CRLF = "\r\n";
 
-  private static final String[] AGENT_STRINGS= new String[] {
-    "Agent1",
-    "Agent2",
-    "Agent3",
-    "Agent4",
-    "Agent5",
+  private static final boolean[] ACCEPT_ALL = { true, // "/a",
+      true, // "/a/",
+      true, // "/a/bloh/foo.html"
+      true, // "/b",
+      true, // "/b/a",
+      true, // "/b/a/index.html",
+      true, // "/b/b/foo.html",
+      true, // "/c",
+      true, // "/c/a",
+      true, // "/c/a/index.html",
+      true, // "/c/b/foo.html",
+      true, // "/d",
+      true, // "/d/a",
+      true, // "/e/a/index.html",
+      true, // "/e/d",
+      true, // "/e/d/foo.html",
+      true, // "/e/doh.html",
+      true, // "/f/index.html",
+      true, // "/foo/bar.html",
+      true, // "/f/",
   };
 
-  private static final boolean[][] NOT_IN_ROBOTS_STRING= new boolean[][] {
-    { 
-      false, 
-      false,
-      false,
-      false,
-      true,
-    },
-    { 
-      false, 
-      false,
-      false,
-      false,
-      true,
-    }    
+  private static final String[] ROBOTS_STRINGS = new String[] {
+      "User-Agent: Agent1 #foo" + CR + "Disallow: /a" + CR + "Disallow: /b/a"
+          + CR + "#Disallow: /c" + CR + "" + CR + "" + CR
+          + "User-Agent: Agent2 Agent3#foo" + CR + "User-Agent: Agent4" + CR
+          + "Disallow: /d" + CR + "Disallow: /e/d/" + CR + "" + CR
+          + "User-Agent: *" + CR + "Disallow: /foo/bar/" + CR, null // Used to
+                                                                    // test
+                                                                    // EMPTY_RULES
   };
 
-  private static final String[] TEST_PATHS= new String[] {
-    "/a",
-    "/a/",
-    "/a/bloh/foo.html",
-    "/b",
-    "/b/a",
-    "/b/a/index.html",
-    "/b/b/foo.html",
-    "/c",
-    "/c/a",
-    "/c/a/index.html",
-    "/c/b/foo.html",
-    "/d",
-    "/d/a",
-    "/e/a/index.html",
-    "/e/d",
-    "/e/d/foo.html",
-    "/e/doh.html",
-    "/f/index.html",
-    "/foo/bar/baz.html",  
-    "/f/",
-  };
+  private static final String[] AGENT_STRINGS = new String[] { "Agent1",
+      "Agent2", "Agent3", "Agent4", "Agent5", };
 
-  private static final boolean[][][] ALLOWED= new boolean[][][] {
-    { // ROBOTS_STRINGS[0]
+  private static final boolean[][] NOT_IN_ROBOTS_STRING = new boolean[][] {
+      { false, false, false, false, true, },
+      { false, false, false, false, true, } };
+
+  private static final String[] TEST_PATHS = new String[] { "/a", "/a/",
+      "/a/bloh/foo.html", "/b", "/b/a", "/b/a/index.html", "/b/b/foo.html",
+      "/c", "/c/a", "/c/a/index.html", "/c/b/foo.html", "/d", "/d/a",
+      "/e/a/index.html", "/e/d", "/e/d/foo.html", "/e/doh.html",
+      "/f/index.html", "/foo/bar/baz.html", "/f/", };
+
+  private static final boolean[][][] ALLOWED = new boolean[][][] { { // ROBOTS_STRINGS[0]
       { // Agent1
-	false,  // "/a",	      
-	false,  // "/a/",	      
-	false,  // "/a/bloh/foo.html"
-	true,   // "/b",	      
-	false,  // "/b/a",	      
-	false,  // "/b/a/index.html",
-	true,   // "/b/b/foo.html",  
-	true,   // "/c",	      
-	true,   // "/c/a",	      
-	true,   // "/c/a/index.html",
-	true,   // "/c/b/foo.html",  
-	true,   // "/d",	      
-	true,   // "/d/a",	      
-	true,   // "/e/a/index.html",
-	true,   // "/e/d",	      
-	true,   // "/e/d/foo.html",  
-	true,   // "/e/doh.html",    
-	true,   // "/f/index.html",  
-	true,   // "/foo/bar.html",  
-	true,   // "/f/",  
-      }, 
-      { // Agent2
-	true,   // "/a",	      
-	true,   // "/a/",	      
-	true,   // "/a/bloh/foo.html"
-	true,   // "/b",	      
-	true,   // "/b/a",	      
-	true,   // "/b/a/index.html",
-	true,   // "/b/b/foo.html",  
-	true,   // "/c",	      
-	true,   // "/c/a",	      
-	true,   // "/c/a/index.html",
-	true,   // "/c/b/foo.html",  
-	false,  // "/d",	      
-	false,  // "/d/a",	      
-	true,   // "/e/a/index.html",
-	true,   // "/e/d",	      
-	false,  // "/e/d/foo.html",  
-	true,   // "/e/doh.html",    
-	true,   // "/f/index.html",  
-	true,   // "/foo/bar.html",  
-	true,   // "/f/",  
-      },
-      { // Agent3
-	true,   // "/a",	      
-	true,   // "/a/",	      
-	true,   // "/a/bloh/foo.html"
-	true,   // "/b",	      
-	true,   // "/b/a",	      
-	true,   // "/b/a/index.html",
-	true,   // "/b/b/foo.html",  
-	true,   // "/c",	      
-	true,   // "/c/a",	      
-	true,   // "/c/a/index.html",
-	true,   // "/c/b/foo.html",  
-	false,  // "/d",	      
-	false,  // "/d/a",	      
-	true,   // "/e/a/index.html",
-	true,   // "/e/d",	      
-	false,  // "/e/d/foo.html",  
-	true,   // "/e/doh.html",    
-	true,   // "/f/index.html",  
-	true,   // "/foo/bar.html",  
-	true,   // "/f/",  
-      },
-      { // Agent4
-	true,   // "/a",	      
-	true,   // "/a/",	      
-	true,   // "/a/bloh/foo.html"
-	true,   // "/b",	      
-	true,   // "/b/a",	      
-	true,   // "/b/a/index.html",
-	true,   // "/b/b/foo.html",  
-	true,   // "/c",	      
-	true,   // "/c/a",	      
-	true,   // "/c/a/index.html",
-	true,   // "/c/b/foo.html",  
-	false,  // "/d",	      
-	false,  // "/d/a",	      
-	true,   // "/e/a/index.html",
-	true,   // "/e/d",	      
-	false,  // "/e/d/foo.html",  
-	true,   // "/e/doh.html",    
-	true,   // "/f/index.html",  
-	true,   // "/foo/bar.html",  
-	true,   // "/f/",  
-      },
-      { // Agent5/"*"
-	true,   // "/a",	      
-	true,   // "/a/",	      
-	true,   // "/a/bloh/foo.html"
-	true,   // "/b",	      
-	true,   // "/b/a",	      
-	true,   // "/b/a/index.html",
-	true,   // "/b/b/foo.html",  
-	true,   // "/c",	      
-	true,   // "/c/a",	      
-	true,   // "/c/a/index.html",
-	true,   // "/c/b/foo.html",  
-	true,   // "/d",	      
-	true,   // "/d/a",	      
-	true,   // "/e/a/index.html",
-	true,   // "/e/d",	      
-	true,   // "/e/d/foo.html",  
-	true,   // "/e/doh.html",    
-	true,   // "/f/index.html",  
-	false,  // "/foo/bar.html",  
-	true,   // "/f/",  
-      }
-    },
-    { // ROBOTS_STRINGS[1]
+          false, // "/a",
+              false, // "/a/",
+              false, // "/a/bloh/foo.html"
+              true, // "/b",
+              false, // "/b/a",
+              false, // "/b/a/index.html",
+              true, // "/b/b/foo.html",
+              true, // "/c",
+              true, // "/c/a",
+              true, // "/c/a/index.html",
+              true, // "/c/b/foo.html",
+              true, // "/d",
+              true, // "/d/a",
+              true, // "/e/a/index.html",
+              true, // "/e/d",
+              true, // "/e/d/foo.html",
+              true, // "/e/doh.html",
+              true, // "/f/index.html",
+              true, // "/foo/bar.html",
+              true, // "/f/",
+          }, { // Agent2
+          true, // "/a",
+              true, // "/a/",
+              true, // "/a/bloh/foo.html"
+              true, // "/b",
+              true, // "/b/a",
+              true, // "/b/a/index.html",
+              true, // "/b/b/foo.html",
+              true, // "/c",
+              true, // "/c/a",
+              true, // "/c/a/index.html",
+              true, // "/c/b/foo.html",
+              false, // "/d",
+              false, // "/d/a",
+              true, // "/e/a/index.html",
+              true, // "/e/d",
+              false, // "/e/d/foo.html",
+              true, // "/e/doh.html",
+              true, // "/f/index.html",
+              true, // "/foo/bar.html",
+              true, // "/f/",
+          }, { // Agent3
+          true, // "/a",
+              true, // "/a/",
+              true, // "/a/bloh/foo.html"
+              true, // "/b",
+              true, // "/b/a",
+              true, // "/b/a/index.html",
+              true, // "/b/b/foo.html",
+              true, // "/c",
+              true, // "/c/a",
+              true, // "/c/a/index.html",
+              true, // "/c/b/foo.html",
+              false, // "/d",
+              false, // "/d/a",
+              true, // "/e/a/index.html",
+              true, // "/e/d",
+              false, // "/e/d/foo.html",
+              true, // "/e/doh.html",
+              true, // "/f/index.html",
+              true, // "/foo/bar.html",
+              true, // "/f/",
+          }, { // Agent4
+          true, // "/a",
+              true, // "/a/",
+              true, // "/a/bloh/foo.html"
+              true, // "/b",
+              true, // "/b/a",
+              true, // "/b/a/index.html",
+              true, // "/b/b/foo.html",
+              true, // "/c",
+              true, // "/c/a",
+              true, // "/c/a/index.html",
+              true, // "/c/b/foo.html",
+              false, // "/d",
+              false, // "/d/a",
+              true, // "/e/a/index.html",
+              true, // "/e/d",
+              false, // "/e/d/foo.html",
+              true, // "/e/doh.html",
+              true, // "/f/index.html",
+              true, // "/foo/bar.html",
+              true, // "/f/",
+          }, { // Agent5/"*"
+          true, // "/a",
+              true, // "/a/",
+              true, // "/a/bloh/foo.html"
+              true, // "/b",
+              true, // "/b/a",
+              true, // "/b/a/index.html",
+              true, // "/b/b/foo.html",
+              true, // "/c",
+              true, // "/c/a",
+              true, // "/c/a/index.html",
+              true, // "/c/b/foo.html",
+              true, // "/d",
+              true, // "/d/a",
+              true, // "/e/a/index.html",
+              true, // "/e/d",
+              true, // "/e/d/foo.html",
+              true, // "/e/doh.html",
+              true, // "/f/index.html",
+              false, // "/foo/bar.html",
+              true, // "/f/",
+          } }, { // ROBOTS_STRINGS[1]
       ACCEPT_ALL, // Agent 1
-      ACCEPT_ALL, // Agent 2
-      ACCEPT_ALL, // Agent 3
-      ACCEPT_ALL, // Agent 4
-      ACCEPT_ALL, // Agent 5
-    }
-  };
- 
+          ACCEPT_ALL, // Agent 2
+          ACCEPT_ALL, // Agent 3
+          ACCEPT_ALL, // Agent 4
+          ACCEPT_ALL, // Agent 5
+      } };
+
   public TestRobotRulesParser(String name) {
     super(name);
   }
 
   public void testRobotsOneAgent() {
-    for (int i= 0; i < ROBOTS_STRINGS.length; i++) {
-      for (int j= 0; j < AGENT_STRINGS.length; j++) {
-	testRobots(i, new String[] { AGENT_STRINGS[j] },
-		   TEST_PATHS, ALLOWED[i][j]);
+    for (int i = 0; i < ROBOTS_STRINGS.length; i++) {
+      for (int j = 0; j < AGENT_STRINGS.length; j++) {
+        testRobots(i, new String[] { AGENT_STRINGS[j] }, TEST_PATHS,
+            ALLOWED[i][j]);
       }
     }
   }
 
   public void testRobotsTwoAgents() {
-    for (int i= 0; i < ROBOTS_STRINGS.length; i++) {
-      for (int j= 0; j < AGENT_STRINGS.length; j++) {
-	for (int k= 0; k < AGENT_STRINGS.length; k++) {
-	  int key= j;
-	  if (NOT_IN_ROBOTS_STRING[i][j])
-	    key= k;
-	  testRobots(i, new String[] { AGENT_STRINGS[j], AGENT_STRINGS[k] },
-		     TEST_PATHS, ALLOWED[i][key]);
-	}
+    for (int i = 0; i < ROBOTS_STRINGS.length; i++) {
+      for (int j = 0; j < AGENT_STRINGS.length; j++) {
+        for (int k = 0; k < AGENT_STRINGS.length; k++) {
+          int key = j;
+          if (NOT_IN_ROBOTS_STRING[i][j])
+            key = k;
+          testRobots(i, new String[] { AGENT_STRINGS[j], AGENT_STRINGS[k] },
+              TEST_PATHS, ALLOWED[i][key]);
+        }
       }
     }
   }
-  
+
   public void testCrawlDelay() {
     RobotRulesParser p = new RobotRulesParser(new String[] { "nutchbot" });
-    String delayRule1 = "User-agent: nutchbot" + CR +
-                        "Crawl-delay: 10" + CR +
-                        "User-agent: foobot" + CR +
-                        "Crawl-delay: 20" + CR +
-                        "User-agent: *" + CR + 
-                        "Disallow:/baz" + CR;
-    String delayRule2 = "User-agent: foobot" + CR +
-                        "Crawl-delay: 20" + CR +
-                        "User-agent: *" + CR + 
-                        "Disallow:/baz" + CR;
+    String delayRule1 = "User-agent: nutchbot" + CR + "Crawl-delay: 10" + CR
+        + "User-agent: foobot" + CR + "Crawl-delay: 20" + CR + "User-agent: *"
+        + CR + "Disallow:/baz" + CR;
+    String delayRule2 = "User-agent: foobot" + CR + "Crawl-delay: 20" + CR
+        + "User-agent: *" + CR + "Disallow:/baz" + CR;
     RobotRuleSet rules = p.parseRules(delayRule1.getBytes());
     long crawlDelay = rules.getCrawlDelay();
-    assertTrue("testing crawl delay for agent nutchbot - rule 1", (crawlDelay == 10000));
+    assertTrue("testing crawl delay for agent nutchbot - rule 1",
+        (crawlDelay == 10000));
     rules = p.parseRules(delayRule2.getBytes());
     crawlDelay = rules.getCrawlDelay();
-    assertTrue("testing crawl delay for agent nutchbot - rule 2", (crawlDelay == -1));
+    assertTrue("testing crawl delay for agent nutchbot - rule 2",
+        (crawlDelay == -1));
   }
 
   // helper
 
-  public void testRobots(int robotsString, String[] agents, String[] paths, 
-			 boolean[] allowed) {
-    String agentsString= agents[0];
-    for (int i= 1; i < agents.length; i++)
-      agentsString= agentsString + "," + agents[i];
-    RobotRulesParser p= new RobotRulesParser(agents);
-    RobotRuleSet rules= p.parseRules(ROBOTS_STRINGS[robotsString] != null
-                                     ? ROBOTS_STRINGS[robotsString].getBytes()
-                                     : null);
-    for (int i= 0; i < paths.length; i++) {
-      assertTrue("testing robots file "+robotsString+", on agents ("
-		 + agentsString + "), and path " + TEST_PATHS[i] + "; got " 
-		 + rules.isAllowed(TEST_PATHS[i]) + ", rules are: " + LF
-				   + rules,
-		 rules.isAllowed(TEST_PATHS[i]) == allowed[i]);
+  public void testRobots(int robotsString, String[] agents, String[] paths,
+      boolean[] allowed) {
+    String agentsString = agents[0];
+    for (int i = 1; i < agents.length; i++)
+      agentsString = agentsString + "," + agents[i];
+    RobotRulesParser p = new RobotRulesParser(agents);
+    RobotRuleSet rules = p
+        .parseRules(ROBOTS_STRINGS[robotsString] != null ? ROBOTS_STRINGS[robotsString]
+            .getBytes() : null);
+    for (int i = 0; i < paths.length; i++) {
+      assertTrue(
+          "testing robots file " + robotsString + ", on agents ("
+              + agentsString + "), and path " + TEST_PATHS[i] + "; got "
+              + rules.isAllowed(TEST_PATHS[i]) + ", rules are: " + LF + rules,
+          rules.isAllowed(TEST_PATHS[i]) == allowed[i]);
     }
   }
 
-
-  
 }
Index: src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/BlockedException.java
===================================================================
--- src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/BlockedException.java	(revision 1188268)
+++ src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/BlockedException.java	(working copy)
@@ -19,7 +19,7 @@
 
 @SuppressWarnings("serial")
 public class BlockedException extends HttpException {
-  
+
   public BlockedException(String msg) {
     super(msg);
   }
Index: src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java
===================================================================
--- src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java	(revision 1188268)
+++ src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java	(working copy)
@@ -44,7 +44,6 @@
  */
 public abstract class HttpBase implements Protocol {
 
-
   public static final int BUFFER_SIZE = 8 * 1024;
 
   private static final byte[] EMPTY_CONTENT = new byte[0];
@@ -67,12 +66,10 @@
   protected int maxContent = 64 * 1024;
 
   /** The Nutch 'User-Agent' request header */
-  protected String userAgent = getAgentString(
-      "NutchCVS", null, "Nutch",
+  protected String userAgent = getAgentString("NutchCVS", null, "Nutch",
       "http://lucene.apache.org/nutch/bot.html",
-  "nutch-agent@lucene.apache.org");
+      "nutch-agent@lucene.apache.org");
 
-
   /** The "Accept-Language" request header value. */
   protected String acceptLanguage = "en-us,en-gb,en;q=0.7,*;q=0.3";
 
@@ -111,8 +108,9 @@
     this.useProxy = (proxyHost != null && proxyHost.length() > 0);
     this.timeout = conf.getInt("http.timeout", 10000);
     this.maxContent = conf.getInt("http.content.limit", 64 * 1024);
-    this.userAgent = getAgentString(conf.get("http.agent.name"), conf.get("http.agent.version"), conf
-        .get("http.agent.description"), conf.get("http.agent.url"), conf.get("http.agent.email"));
+    this.userAgent = getAgentString(conf.get("http.agent.name"),
+        conf.get("http.agent.version"), conf.get("http.agent.description"),
+        conf.get("http.agent.url"), conf.get("http.agent.email"));
     this.acceptLanguage = conf.get("http.accept.language", acceptLanguage);
     this.mimeTypes = new MimeUtil(conf);
     this.useHttp11 = conf.getBoolean("http.useHttp11", false);
@@ -125,8 +123,6 @@
     return this.conf;
   }
 
-
-
   public ProtocolOutput getProtocolOutput(String url, WebPage page) {
 
     try {
@@ -137,74 +133,81 @@
       byte[] content = response.getContent();
       Content c = new Content(u.toString(), u.toString(),
           (content == null ? EMPTY_CONTENT : content),
-          response.getHeader("Content-Type"),
-          response.getHeaders(), mimeTypes);
+          response.getHeader("Content-Type"), response.getHeaders(), mimeTypes);
 
       if (code == 200) { // got a good response
         return new ProtocolOutput(c); // return it
 
       } else if (code == 410) { // page is gone
-        return new ProtocolOutput(c,
-            ProtocolStatusUtils.makeStatus(ProtocolStatusCodes.GONE, "Http: " + code + " url=" + url));
+        return new ProtocolOutput(c, ProtocolStatusUtils.makeStatus(
+            ProtocolStatusCodes.GONE, "Http: " + code + " url=" + url));
       } else if (code >= 300 && code < 400) { // handle redirect
         String location = response.getHeader("Location");
         // some broken servers, such as MS IIS, use lowercase header name...
-        if (location == null) location = response.getHeader("location");
-        if (location == null) location = "";
+        if (location == null)
+          location = response.getHeader("location");
+        if (location == null)
+          location = "";
         u = new URL(u, location);
         int protocolStatusCode;
         switch (code) {
-        case 300:   // multiple choices, preferred value in Location
+        case 300: // multiple choices, preferred value in Location
           protocolStatusCode = ProtocolStatusCodes.MOVED;
           break;
-        case 301:   // moved permanently
-        case 305:   // use proxy (Location is URL of proxy)
+        case 301: // moved permanently
+        case 305: // use proxy (Location is URL of proxy)
           protocolStatusCode = ProtocolStatusCodes.MOVED;
           break;
-        case 302:   // found (temporarily moved)
-        case 303:   // see other (redirect after POST)
-        case 307:   // temporary redirect
+        case 302: // found (temporarily moved)
+        case 303: // see other (redirect after POST)
+        case 307: // temporary redirect
           protocolStatusCode = ProtocolStatusUtils.TEMP_MOVED;
           break;
-        case 304:   // not modified
+        case 304: // not modified
           protocolStatusCode = ProtocolStatusUtils.NOTMODIFIED;
           break;
         default:
           protocolStatusCode = ProtocolStatusUtils.MOVED;
         }
         // handle this in the higher layer.
-        return new ProtocolOutput(c, ProtocolStatusUtils.makeStatus(protocolStatusCode, u));
+        return new ProtocolOutput(c, ProtocolStatusUtils.makeStatus(
+            protocolStatusCode, u));
       } else if (code == 400) { // bad request, mark as GONE
-        if (logger.isTraceEnabled()) { logger.trace("400 Bad request: " + u); }
-        return new ProtocolOutput(c, ProtocolStatusUtils.makeStatus(ProtocolStatusCodes.GONE, u));
-      } else if (code == 401) { // requires authorization, but no valid auth provided.
-        if (logger.isTraceEnabled()) { logger.trace("401 Authentication Required"); }
-        return new ProtocolOutput(c,
-            ProtocolStatusUtils.makeStatus(ProtocolStatusCodes.ACCESS_DENIED,
-                "Authentication required: "+ url));
+        if (logger.isTraceEnabled()) {
+          logger.trace("400 Bad request: " + u);
+        }
+        return new ProtocolOutput(c, ProtocolStatusUtils.makeStatus(
+            ProtocolStatusCodes.GONE, u));
+      } else if (code == 401) { // requires authorization, but no valid auth
+                                // provided.
+        if (logger.isTraceEnabled()) {
+          logger.trace("401 Authentication Required");
+        }
+        return new ProtocolOutput(c, ProtocolStatusUtils.makeStatus(
+            ProtocolStatusCodes.ACCESS_DENIED, "Authentication required: "
+                + url));
       } else if (code == 404) {
-        return new ProtocolOutput(c,
-            ProtocolStatusUtils.makeStatus(ProtocolStatusCodes.NOTFOUND, u));
+        return new ProtocolOutput(c, ProtocolStatusUtils.makeStatus(
+            ProtocolStatusCodes.NOTFOUND, u));
       } else if (code == 410) { // permanently GONE
-        return new ProtocolOutput(c,
-            ProtocolStatusUtils.makeStatus(ProtocolStatusCodes.GONE, u));
+        return new ProtocolOutput(c, ProtocolStatusUtils.makeStatus(
+            ProtocolStatusCodes.GONE, u));
       } else {
-        return new ProtocolOutput(c,
-            ProtocolStatusUtils.makeStatus(ProtocolStatusCodes.EXCEPTION, "Http code=" + code + ", url="
-                + u));
+        return new ProtocolOutput(c, ProtocolStatusUtils.makeStatus(
+            ProtocolStatusCodes.EXCEPTION, "Http code=" + code + ", url=" + u));
       }
     } catch (Throwable e) {
       e.printStackTrace(LogUtil.getErrorStream(logger));
-      return new ProtocolOutput(null,
-          ProtocolStatusUtils.makeStatus(ProtocolStatusCodes.EXCEPTION, e.toString()));
+      return new ProtocolOutput(null, ProtocolStatusUtils.makeStatus(
+          ProtocolStatusCodes.EXCEPTION, e.toString()));
     }
   }
 
-  /* -------------------------- *
-   * </implementation:Protocol> *
-   * -------------------------- */
+  /*
+   * -------------------------- * </implementation:Protocol> *
+   * --------------------------
+   */
 
-
   public String getProxyHost() {
     return proxyHost;
   }
@@ -228,46 +231,45 @@
   public String getUserAgent() {
     return userAgent;
   }
-  
-  /** Value of "Accept-Language" request header sent by Nutch.
+
+  /**
+   * Value of "Accept-Language" request header sent by Nutch.
+   * 
    * @return The value of the header "Accept-Language" header.
    */
   public String getAcceptLanguage() {
-         return acceptLanguage;
+    return acceptLanguage;
   }
 
   public boolean getUseHttp11() {
     return useHttp11;
   }
 
-  private static String getAgentString(String agentName,
-      String agentVersion,
-      String agentDesc,
-      String agentURL,
-      String agentEmail) {
+  private static String getAgentString(String agentName, String agentVersion,
+      String agentDesc, String agentURL, String agentEmail) {
 
-    if ( (agentName == null) || (agentName.trim().length() == 0) ) {
+    if ((agentName == null) || (agentName.trim().length() == 0)) {
       // TODO : NUTCH-258
       if (LOGGER.isErrorEnabled()) {
         LOGGER.error("No User-Agent string set (http.agent.name)!");
       }
     }
 
-    StringBuffer buf= new StringBuffer();
+    StringBuffer buf = new StringBuffer();
 
     buf.append(agentName);
     if (agentVersion != null) {
       buf.append("/");
       buf.append(agentVersion);
     }
-    if ( ((agentDesc != null) && (agentDesc.length() != 0))
+    if (((agentDesc != null) && (agentDesc.length() != 0))
         || ((agentEmail != null) && (agentEmail.length() != 0))
-        || ((agentURL != null) && (agentURL.length() != 0)) ) {
+        || ((agentURL != null) && (agentURL.length() != 0))) {
       buf.append(" (");
 
       if ((agentDesc != null) && (agentDesc.length() != 0)) {
         buf.append(agentDesc);
-        if ( (agentURL != null) || (agentEmail != null) )
+        if ((agentURL != null) || (agentEmail != null))
           buf.append("; ");
       }
 
@@ -296,9 +298,12 @@
     }
   }
 
-  public byte[] processGzipEncoded(byte[] compressed, URL url) throws IOException {
+  public byte[] processGzipEncoded(byte[] compressed, URL url)
+      throws IOException {
 
-    if (LOGGER.isTraceEnabled()) { LOGGER.trace("uncompressing...."); }
+    if (LOGGER.isTraceEnabled()) {
+      LOGGER.trace("uncompressing....");
+    }
 
     byte[] content;
     if (getMaxContent() >= 0) {
@@ -312,25 +317,29 @@
 
     if (LOGGER.isTraceEnabled()) {
       LOGGER.trace("fetched " + compressed.length
-          + " bytes of compressed content (expanded to "
-          + content.length + " bytes) from " + url);
+          + " bytes of compressed content (expanded to " + content.length
+          + " bytes) from " + url);
     }
     return content;
   }
 
-  public byte[] processDeflateEncoded(byte[] compressed, URL url) throws IOException {
+  public byte[] processDeflateEncoded(byte[] compressed, URL url)
+      throws IOException {
 
-    if (LOGGER.isTraceEnabled()) { LOGGER.trace("inflating...."); }
+    if (LOGGER.isTraceEnabled()) {
+      LOGGER.trace("inflating....");
+    }
 
-    byte[] content = DeflateUtils.inflateBestEffort(compressed, getMaxContent());
+    byte[] content = DeflateUtils
+        .inflateBestEffort(compressed, getMaxContent());
 
     if (content == null)
       throw new IOException("inflateBestEffort returned null");
 
     if (LOGGER.isTraceEnabled()) {
       LOGGER.trace("fetched " + compressed.length
-                 + " bytes of compressed content (expanded to "
-                 + content.length + " bytes) from " + url);
+          + " bytes of compressed content (expanded to " + content.length
+          + " bytes) from " + url);
     }
     return content;
   }
@@ -355,13 +364,14 @@
       } else if (i != args.length - 1) {
         System.err.println(usage);
         System.exit(-1);
-      } else // root is required parameter
+      } else
+        // root is required parameter
         url = args[i];
     }
 
-    //    if (verbose) {
-    //      LOGGER.setLevel(Level.FINE);
-    //    }
+    // if (verbose) {
+    // LOGGER.setLevel(Level.FINE);
+    // }
 
     ProtocolOutput out = http.getProtocolOutput(url, new WebPage());
     Content content = out.getContent();
@@ -369,8 +379,8 @@
     System.out.println("Status: " + out.getStatus());
     if (content != null) {
       System.out.println("Content Type: " + content.getContentType());
-      System.out.println("Content Length: " +
-          content.getMetadata().get(Response.CONTENT_LENGTH));
+      System.out.println("Content Length: "
+          + content.getMetadata().get(Response.CONTENT_LENGTH));
       System.out.println("Content:");
       String text = new String(content.getContent());
       System.out.println(text);
@@ -378,11 +388,9 @@
 
   }
 
+  protected abstract Response getResponse(URL url, WebPage page,
+      boolean followRedirects) throws ProtocolException, IOException;
 
-  protected abstract Response getResponse(URL url,
-      WebPage page, boolean followRedirects)
-  throws ProtocolException, IOException;
-
   @Override
   public RobotRules getRobotRules(String url, WebPage page) {
     return robots.getRobotRulesSet(this, url);
Index: src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/RobotRulesParser.java
===================================================================
--- src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/RobotRulesParser.java	(revision 1188268)
+++ src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/RobotRulesParser.java	(working copy)
@@ -37,29 +37,28 @@
 import org.apache.nutch.protocol.RobotRules;
 import org.apache.nutch.storage.WebPage;
 
-
 /**
- * This class handles the parsing of <code>robots.txt</code> files.
- * It emits RobotRules objects, which describe the download permissions
- * as described in RobotRulesParser.
- *
+ * This class handles the parsing of <code>robots.txt</code> files. It emits
+ * RobotRules objects, which describe the download permissions as described in
+ * RobotRulesParser.
+ * 
  * @author Tom Pierce
  * @author Mike Cafarella
  * @author Doug Cutting
  */
 public class RobotRulesParser implements Configurable {
 
-  public static final Logger LOG = LoggerFactory.getLogger(RobotRulesParser.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(RobotRulesParser.class);
 
   private boolean allowForbidden = false;
 
-  private static final Hashtable<String, RobotRules> CACHE =
-    new Hashtable<String, RobotRules>();
+  private static final Hashtable<String, RobotRules> CACHE = new Hashtable<String, RobotRules>();
 
-  private static final String CHARACTER_ENCODING= "UTF-8";
-  private static final int NO_PRECEDENCE= Integer.MAX_VALUE;
+  private static final String CHARACTER_ENCODING = "UTF-8";
+  private static final int NO_PRECEDENCE = Integer.MAX_VALUE;
 
-  private static final RobotRuleSet EMPTY_RULES= new RobotRuleSet();
+  private static final RobotRuleSet EMPTY_RULES = new RobotRuleSet();
 
   private static RobotRuleSet FORBID_ALL_RULES = getForbidAllRules();
 
@@ -67,8 +66,8 @@
   private HashMap<String, Integer> robotNames;
 
   /**
-   * This class holds the rules which were parsed from a robots.txt
-   * file, and can test paths against those rules.
+   * This class holds the rules which were parsed from a robots.txt file, and
+   * can test paths against those rules.
    */
   public static class RobotRuleSet implements RobotRules {
     ArrayList<RobotsEntry> tmpEntries = new ArrayList<RobotsEntry>();
@@ -108,7 +107,7 @@
     private void clearPrefixes() {
       if (tmpEntries == null) {
         tmpEntries = new ArrayList<RobotsEntry>();
-        entries= null;
+        entries = null;
       } else {
         tmpEntries.clear();
       }
@@ -143,39 +142,39 @@
     }
 
     /**
-     *  Returns <code>false</code> if the <code>robots.txt</code> file
-     *  prohibits us from accessing the given <code>url</code>, or
-     *  <code>true</code> otherwise.
+     * Returns <code>false</code> if the <code>robots.txt</code> file prohibits
+     * us from accessing the given <code>url</code>, or <code>true</code>
+     * otherwise.
      */
     public boolean isAllowed(URL url) {
-      String path = url.getPath();                  // check rules
+      String path = url.getPath(); // check rules
       if ((path == null) || "".equals(path)) {
-        path= "/";
+        path = "/";
       }
       return isAllowed(path);
     }
 
     /**
-     *  Returns <code>false</code> if the <code>robots.txt</code> file
-     *  prohibits us from accessing the given <code>path</code>, or
-     *  <code>true</code> otherwise.
+     * Returns <code>false</code> if the <code>robots.txt</code> file prohibits
+     * us from accessing the given <code>path</code>, or <code>true</code>
+     * otherwise.
      */
     public boolean isAllowed(String path) {
       try {
-        path= URLDecoder.decode(path, CHARACTER_ENCODING);
+        path = URLDecoder.decode(path, CHARACTER_ENCODING);
       } catch (Exception e) {
         // just ignore it- we can still try to match
         // path prefixes
       }
 
       if (entries == null) {
-        entries= new RobotsEntry[tmpEntries.size()];
-        entries= tmpEntries.toArray(entries);
-        tmpEntries= null;
+        entries = new RobotsEntry[tmpEntries.size()];
+        entries = tmpEntries.toArray(entries);
+        tmpEntries = null;
       }
 
-      int pos= 0;
-      int end= entries.length;
+      int pos = 0;
+      int end = entries.length;
       while (pos < end) {
         if (path.startsWith(entries[pos].prefix))
           return entries[pos].allowed;
@@ -189,31 +188,31 @@
      */
     @Override
     public String toString() {
-      isAllowed("x");  // force String[] representation
-      StringBuffer buf= new StringBuffer();
-      for (int i= 0; i < entries.length; i++)
+      isAllowed("x"); // force String[] representation
+      StringBuffer buf = new StringBuffer();
+      for (int i = 0; i < entries.length; i++)
         if (entries[i].allowed)
           buf.append("Allow: " + entries[i].prefix
-                     + System.getProperty("line.separator"));
+              + System.getProperty("line.separator"));
         else
           buf.append("Disallow: " + entries[i].prefix
-                     + System.getProperty("line.separator"));
+              + System.getProperty("line.separator"));
       return buf.toString();
     }
   }
 
+  RobotRulesParser() {
+  }
 
-  RobotRulesParser() { }
-
   public RobotRulesParser(Configuration conf) {
     setConf(conf);
   }
 
+  /*
+   * ---------------------------------- * <implementation:Configurable> *
+   * ----------------------------------
+   */
 
-  /* ---------------------------------- *
-   * <implementation:Configurable> *
-   * ---------------------------------- */
-
   public void setConf(Configuration conf) {
     this.conf = conf;
     allowForbidden = conf.getBoolean("http.robots.403.allow", false);
@@ -233,7 +232,7 @@
 
     //
     // If there are no agents for robots-parsing, use our
-    // default agent-string.  If both are present, our agent-string
+    // default agent-string. If both are present, our agent-string
     // should be the first one we advertise to robots-parsing.
     //
     if (agents.size() == 0) {
@@ -245,7 +244,7 @@
       agents.add(0, agentName);
       if (LOG.isErrorEnabled()) {
         LOG.error("Agent we advertise (" + agentName
-                + ") not listed first in 'http.robots.agents' property!");
+            + ") not listed first in 'http.robots.agents' property!");
       }
     }
     setRobotNames(agents.toArray(new String[agents.size()]));
@@ -255,13 +254,14 @@
     return conf;
   }
 
-  /* ---------------------------------- *
-   * <implementation:Configurable> *
-   * ---------------------------------- */
+  /*
+   * ---------------------------------- * <implementation:Configurable> *
+   * ----------------------------------
+   */
 
   private void setRobotNames(String[] robotNames) {
-    this.robotNames= new HashMap<String, Integer>();
-    for (int i= 0; i < robotNames.length; i++) {
+    this.robotNames = new HashMap<String, Integer>();
+    for (int i = 0; i < robotNames.length; i++) {
       this.robotNames.put(robotNames[i].toLowerCase(), new Integer(i));
     }
     // always make sure "*" is included
@@ -270,91 +270,90 @@
   }
 
   /**
-   *  Creates a new <code>RobotRulesParser</code> which will use the
-   *  supplied <code>robotNames</code> when choosing which stanza to
-   *  follow in <code>robots.txt</code> files.  Any name in the array
-   *  may be matched.  The order of the <code>robotNames</code>
-   *  determines the precedence- if many names are matched, only the
-   *  rules associated with the robot name having the smallest index
-   *  will be used.
+   * Creates a new <code>RobotRulesParser</code> which will use the supplied
+   * <code>robotNames</code> when choosing which stanza to follow in
+   * <code>robots.txt</code> files. Any name in the array may be matched. The
+   * order of the <code>robotNames</code> determines the precedence- if many
+   * names are matched, only the rules associated with the robot name having the
+   * smallest index will be used.
    */
   RobotRulesParser(String[] robotNames) {
     setRobotNames(robotNames);
   }
 
   /**
-   * Returns a {@link RobotRuleSet} object which encapsulates the
-   * rules parsed from the supplied <code>robotContent</code>.
+   * Returns a {@link RobotRuleSet} object which encapsulates the rules parsed
+   * from the supplied <code>robotContent</code>.
    */
   RobotRuleSet parseRules(byte[] robotContent) {
     if (robotContent == null)
       return EMPTY_RULES;
 
-    String content= new String (robotContent);
+    String content = new String(robotContent);
 
-    StringTokenizer lineParser= new StringTokenizer(content, "\n\r");
+    StringTokenizer lineParser = new StringTokenizer(content, "\n\r");
 
-    RobotRuleSet bestRulesSoFar= null;
-    int bestPrecedenceSoFar= NO_PRECEDENCE;
+    RobotRuleSet bestRulesSoFar = null;
+    int bestPrecedenceSoFar = NO_PRECEDENCE;
 
-    RobotRuleSet currentRules= new RobotRuleSet();
-    int currentPrecedence= NO_PRECEDENCE;
+    RobotRuleSet currentRules = new RobotRuleSet();
+    int currentPrecedence = NO_PRECEDENCE;
 
-    boolean addRules= false;    // in stanza for our robot
-    boolean doneAgents= false;  // detect multiple agent lines
+    boolean addRules = false; // in stanza for our robot
+    boolean doneAgents = false; // detect multiple agent lines
 
     while (lineParser.hasMoreTokens()) {
-      String line= lineParser.nextToken();
+      String line = lineParser.nextToken();
 
       // trim out comments and whitespace
-      int hashPos= line.indexOf("#");
+      int hashPos = line.indexOf("#");
       if (hashPos >= 0)
-        line= line.substring(0, hashPos);
-      line= line.trim();
+        line = line.substring(0, hashPos);
+      line = line.trim();
 
-      if ( (line.length() >= 11)
-           && (line.substring(0, 11).equalsIgnoreCase("User-agent:")) ) {
+      if ((line.length() >= 11)
+          && (line.substring(0, 11).equalsIgnoreCase("User-agent:"))) {
 
         if (doneAgents) {
           if (currentPrecedence < bestPrecedenceSoFar) {
-            bestPrecedenceSoFar= currentPrecedence;
-            bestRulesSoFar= currentRules;
-            currentPrecedence= NO_PRECEDENCE;
-            currentRules= new RobotRuleSet();
+            bestPrecedenceSoFar = currentPrecedence;
+            bestRulesSoFar = currentRules;
+            currentPrecedence = NO_PRECEDENCE;
+            currentRules = new RobotRuleSet();
           }
-          addRules= false;
+          addRules = false;
         }
-        doneAgents= false;
+        doneAgents = false;
 
-        String agentNames= line.substring(line.indexOf(":") + 1);
-        agentNames= agentNames.trim();
-        StringTokenizer agentTokenizer= new StringTokenizer(agentNames);
+        String agentNames = line.substring(line.indexOf(":") + 1);
+        agentNames = agentNames.trim();
+        StringTokenizer agentTokenizer = new StringTokenizer(agentNames);
 
         while (agentTokenizer.hasMoreTokens()) {
           // for each agent listed, see if it's us:
-          String agentName= agentTokenizer.nextToken().toLowerCase();
+          String agentName = agentTokenizer.nextToken().toLowerCase();
 
-          Integer precedenceInt= robotNames.get(agentName);
+          Integer precedenceInt = robotNames.get(agentName);
 
           if (precedenceInt != null) {
-            int precedence= precedenceInt.intValue();
-            if ( (precedence < currentPrecedence)
-                 && (precedence < bestPrecedenceSoFar) )
-              currentPrecedence= precedence;
+            int precedence = precedenceInt.intValue();
+            if ((precedence < currentPrecedence)
+                && (precedence < bestPrecedenceSoFar))
+              currentPrecedence = precedence;
           }
         }
 
         if (currentPrecedence < bestPrecedenceSoFar)
-          addRules= true;
+          addRules = true;
 
-      } else if ( (line.length() >= 9)
-                  && (line.substring(0, 9).equalsIgnoreCase("Disallow:")) ) {
+      } else if ((line.length() >= 9)
+          && (line.substring(0, 9).equalsIgnoreCase("Disallow:"))) {
 
-        doneAgents= true;
-        String path= line.substring(line.indexOf(":") + 1);
-        path= path.trim();
+        doneAgents = true;
+        String path = line.substring(line.indexOf(":") + 1);
+        path = path.trim();
         try {
-          path= URLDecoder.decode(path, CHARACTER_ENCODING);
+          path = URLDecoder.decode(path, CHARACTER_ENCODING);
         } catch (Exception e) {
           if (LOG.isWarnEnabled()) {
             LOG.warn("error parsing robots rules- can't decode path: " + path);
@@ -364,32 +363,33 @@
         if (path.length() == 0) { // "empty rule"
           if (addRules)
             currentRules.clearPrefixes();
-        } else {  // rule with path
+        } else { // rule with path
           if (addRules)
             currentRules.addPrefix(path, false);
         }
 
-      } else if ( (line.length() >= 6)
-                  && (line.substring(0, 6).equalsIgnoreCase("Allow:")) ) {
+      } else if ((line.length() >= 6)
+          && (line.substring(0, 6).equalsIgnoreCase("Allow:"))) {
 
-        doneAgents= true;
-        String path= line.substring(line.indexOf(":") + 1);
-        path= path.trim();
+        doneAgents = true;
+        String path = line.substring(line.indexOf(":") + 1);
+        path = path.trim();
 
         if (path.length() == 0) {
           // "empty rule"- treat same as empty disallow
           if (addRules)
             currentRules.clearPrefixes();
-        } else {  // rule with path
+        } else { // rule with path
           if (addRules)
             currentRules.addPrefix(path, true);
         }
-      } else if ( (line.length() >= 12)
-                  && (line.substring(0, 12).equalsIgnoreCase("Crawl-Delay:"))) {
+      } else if ((line.length() >= 12)
+          && (line.substring(0, 12).equalsIgnoreCase("Crawl-Delay:"))) {
         doneAgents = true;
         if (addRules) {
           long crawlDelay = -1;
-          String delay = line.substring("Crawl-Delay:".length(), line.length()).trim();
+          String delay = line.substring("Crawl-Delay:".length(), line.length())
+              .trim();
           if (delay.length() > 0) {
             try {
               crawlDelay = Long.parseLong(delay) * 1000; // sec to millisec
@@ -403,8 +403,8 @@
     }
 
     if (currentPrecedence < bestPrecedenceSoFar) {
-      bestPrecedenceSoFar= currentPrecedence;
-      bestRulesSoFar= currentRules;
+      bestPrecedenceSoFar = currentPrecedence;
+      bestRulesSoFar = currentRules;
     }
 
     if (bestPrecedenceSoFar == NO_PRECEDENCE)
@@ -413,22 +413,20 @@
   }
 
   /**
-   *  Returns a <code>RobotRuleSet</code> object appropriate for use
-   *  when the <code>robots.txt</code> file is empty or missing; all
-   *  requests are allowed.
+   * Returns a <code>RobotRuleSet</code> object appropriate for use when the
+   * <code>robots.txt</code> file is empty or missing; all requests are allowed.
    */
   static RobotRuleSet getEmptyRules() {
     return EMPTY_RULES;
   }
 
   /**
-   *  Returns a <code>RobotRuleSet</code> object appropriate for use
-   *  when the <code>robots.txt</code> file is not fetched due to a
-   *  <code>403/Forbidden</code> response; all requests are
-   *  disallowed.
+   * Returns a <code>RobotRuleSet</code> object appropriate for use when the
+   * <code>robots.txt</code> file is not fetched due to a
+   * <code>403/Forbidden</code> response; all requests are disallowed.
    */
   static RobotRuleSet getForbidAllRules() {
-    RobotRuleSet rules= new RobotRuleSet();
+    RobotRuleSet rules = new RobotRuleSet();
     rules.addPrefix("", false);
     return rules;
   }
@@ -447,16 +445,18 @@
 
     String host = url.getHost().toLowerCase(); // normalize to lower case
 
-    RobotRuleSet robotRules = (RobotRuleSet)CACHE.get(host);
+    RobotRuleSet robotRules = (RobotRuleSet) CACHE.get(host);
 
     boolean cacheRule = true;
 
-    if (robotRules == null) {                     // cache miss
+    if (robotRules == null) { // cache miss
       URL redir = null;
-      if (LOG.isTraceEnabled()) { LOG.trace("cache miss " + url); }
+      if (LOG.isTraceEnabled()) {
+        LOG.trace("cache miss " + url);
+      }
       try {
         Response response = http.getResponse(new URL(url, "/robots.txt"),
-                                             new WebPage(), true);
+            new WebPage(), true);
         // try one level of redirection ?
         if (response.getCode() == 301 || response.getCode() == 302) {
           String redirection = response.getHeader("Location");
@@ -476,15 +476,15 @@
           }
         }
 
-        if (response.getCode() == 200)               // found rules: parse them
+        if (response.getCode() == 200) // found rules: parse them
           robotRules = parseRules(response.getContent());
-        else if ( (response.getCode() == 403) && (!allowForbidden) )
-          robotRules = FORBID_ALL_RULES;            // use forbid all
+        else if ((response.getCode() == 403) && (!allowForbidden))
+          robotRules = FORBID_ALL_RULES; // use forbid all
         else if (response.getCode() >= 500) {
           cacheRule = false;
           robotRules = EMPTY_RULES;
-        }else
-          robotRules = EMPTY_RULES;                 // use default rules
+        } else
+          robotRules = EMPTY_RULES; // use default rules
       } catch (Throwable t) {
         if (LOG.isInfoEnabled()) {
           LOG.info("Couldn't get robots.txt for " + url + ": " + t.toString());
@@ -494,7 +494,7 @@
       }
 
       if (cacheRule) {
-        CACHE.put(host, robotRules);  // cache rules for host
+        CACHE.put(host, robotRules); // cache rules for host
         if (redir != null && !redir.getHost().equals(host)) {
           // cache also for the redirected host
           CACHE.put(redir.getHost(), robotRules);
@@ -504,22 +504,22 @@
     return robotRules;
   }
 
-  public boolean isAllowed(HttpBase http, URL url)
-      throws ProtocolException, IOException {
-    String path = url.getPath();                  // check rules
+  public boolean isAllowed(HttpBase http, URL url) throws ProtocolException,
+      IOException {
+    String path = url.getPath(); // check rules
     if ((path == null) || "".equals(path)) {
-      path= "/";
+      path = "/";
     }
 
     return getRobotRulesSet(http, url).isAllowed(path);
   }
 
-  public long getCrawlDelay(HttpBase http, URL url)
-      throws ProtocolException, IOException {
+  public long getCrawlDelay(HttpBase http, URL url) throws ProtocolException,
+      IOException {
     return getRobotRulesSet(http, url).getCrawlDelay();
   }
 
-  private final static int BUFSIZE= 2048;
+  private final static int BUFSIZE = 2048;
 
   /** command-line main for testing */
   public static void main(String[] argv) {
@@ -527,9 +527,12 @@
       System.out.println("Usage:");
       System.out.println("   java <robots-file> <url-file> <agent-name>+");
       System.out.println("");
-      System.out.println("The <robots-file> will be parsed as a robots.txt file,");
-      System.out.println("using the given <agent-name> to select rules.  URLs ");
-      System.out.println("will be read (one per line) from <url-file>, and tested");
+      System.out
+          .println("The <robots-file> will be parsed as a robots.txt file,");
+      System.out
+          .println("using the given <agent-name> to select rules.  URLs ");
+      System.out
+          .println("will be read (one per line) from <url-file>, and tested");
       System.out.println("against the rules.");
       System.exit(-1);
     }
@@ -538,49 +541,47 @@
       LineNumberReader testsIn = new LineNumberReader(new FileReader(argv[1]));
       String[] robotNames = new String[argv.length - 2];
 
-      for (int i= 0; i < argv.length - 2; i++)
-        robotNames[i] = argv[i+2];
+      for (int i = 0; i < argv.length - 2; i++)
+        robotNames[i] = argv[i + 2];
 
       ArrayList<byte[]> bufs = new ArrayList<byte[]>();
       byte[] buf = new byte[BUFSIZE];
       int totBytes = 0;
 
-      int rsize= robotsIn.read(buf);
+      int rsize = robotsIn.read(buf);
       while (rsize >= 0) {
-        totBytes+= rsize;
+        totBytes += rsize;
         if (rsize != BUFSIZE) {
-          byte[] tmp= new byte[rsize];
+          byte[] tmp = new byte[rsize];
           System.arraycopy(buf, 0, tmp, 0, rsize);
           bufs.add(tmp);
         } else {
           bufs.add(buf);
-          buf= new byte[BUFSIZE];
+          buf = new byte[BUFSIZE];
         }
-        rsize= robotsIn.read(buf);
+        rsize = robotsIn.read(buf);
       }
 
-      byte[] robotsBytes= new byte[totBytes];
+      byte[] robotsBytes = new byte[totBytes];
       int pos = 0;
 
       for (int i = 0; i < bufs.size(); i++) {
         byte[] currBuf = bufs.get(i);
         int currBufLen = currBuf.length;
         System.arraycopy(currBuf, 0, robotsBytes, pos, currBufLen);
-        pos+= currBufLen;
+        pos += currBufLen;
       }
 
-      RobotRulesParser parser =
-        new RobotRulesParser(robotNames);
-      RobotRuleSet rules= parser.parseRules(robotsBytes);
+      RobotRulesParser parser = new RobotRulesParser(robotNames);
+      RobotRuleSet rules = parser.parseRules(robotsBytes);
       System.out.println("Rules:");
       System.out.println(rules);
       System.out.println();
 
       String testPath = testsIn.readLine().trim();
       while (testPath != null) {
-        System.out.println( (rules.isAllowed(testPath) ?
-                             "allowed" : "not allowed")
-                            + ":\t" + testPath);
+        System.out.println((rules.isAllowed(testPath) ? "allowed"
+            : "not allowed") + ":\t" + testPath);
         testPath = testsIn.readLine();
       }
 
Index: src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpException.java
===================================================================
--- src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpException.java	(revision 1188268)
+++ src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpException.java	(working copy)
@@ -19,7 +19,6 @@
 // Nutch imports
 import org.apache.nutch.protocol.ProtocolException;
 
-
 public class HttpException extends ProtocolException {
 
   public HttpException() {
Index: src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionBadSystResponse.java
===================================================================
--- src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionBadSystResponse.java	(revision 1188268)
+++ src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionBadSystResponse.java	(working copy)
@@ -19,7 +19,7 @@
 
 /**
  * Exception indicating bad reply of SYST command.
- *
+ * 
  * @author John Xing
  */
 public class FtpExceptionBadSystResponse extends FtpException {
Index: src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionControlClosedByForcedDataClose.java
===================================================================
--- src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionControlClosedByForcedDataClose.java	(revision 1188268)
+++ src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionControlClosedByForcedDataClose.java	(working copy)
@@ -18,9 +18,9 @@
 package org.apache.nutch.protocol.ftp;
 
 /**
- * Exception indicating control channel is closed by server end, due to
- * forced closure of data channel at client (our) end.
- *
+ * Exception indicating control channel is closed by server end, due to forced
+ * closure of data channel at client (our) end.
+ * 
  * @author John Xing
  */
 public class FtpExceptionControlClosedByForcedDataClose extends FtpException {
Index: src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/PrintCommandListener.java
===================================================================
--- src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/PrintCommandListener.java	(revision 1188268)
+++ src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/PrintCommandListener.java	(working copy)
@@ -28,45 +28,44 @@
 
 /***
  * This is a support class for logging all ftp command/reply traffic.
- *
+ * 
  * @author John Xing
  ***/
-public class PrintCommandListener implements ProtocolCommandListener
-{
-    private Logger __logger;
+public class PrintCommandListener implements ProtocolCommandListener {
+  private Logger __logger;
 
-    public PrintCommandListener(Logger logger)
-    {
-        __logger = logger;
-    }
+  public PrintCommandListener(Logger logger) {
+    __logger = logger;
+  }
 
-    public void protocolCommandSent(ProtocolCommandEvent event) {
-      try {
-        __logIt(event);
-      } catch (IOException e) {
-        if (__logger.isInfoEnabled()) {
-          __logger.info("PrintCommandListener.protocolCommandSent(): "+e);
-        }
+  public void protocolCommandSent(ProtocolCommandEvent event) {
+    try {
+      __logIt(event);
+    } catch (IOException e) {
+      if (__logger.isInfoEnabled()) {
+        __logger.info("PrintCommandListener.protocolCommandSent(): " + e);
       }
     }
+  }
 
-    public void protocolReplyReceived(ProtocolCommandEvent event) {
-      try {
-        __logIt(event);
-      } catch (IOException e) {
-        if (__logger.isInfoEnabled()) {
-          __logger.info("PrintCommandListener.protocolReplyReceived(): "+e);
-        }
+  public void protocolReplyReceived(ProtocolCommandEvent event) {
+    try {
+      __logIt(event);
+    } catch (IOException e) {
+      if (__logger.isInfoEnabled()) {
+        __logger.info("PrintCommandListener.protocolReplyReceived(): " + e);
       }
     }
+  }
 
-    private void __logIt(ProtocolCommandEvent event) throws IOException {
-      if (!__logger.isInfoEnabled()) { return; }
-      BufferedReader br =
-        new BufferedReader(new StringReader(event.getMessage()));
-      String line;
-      while ((line = br.readLine()) != null) {
-        __logger.info("ftp> "+line);
-      }
+  private void __logIt(ProtocolCommandEvent event) throws IOException {
+    if (!__logger.isInfoEnabled()) {
+      return;
     }
+    BufferedReader br = new BufferedReader(new StringReader(event.getMessage()));
+    String line;
+    while ((line = br.readLine()) != null) {
+      __logger.info("ftp> " + line);
+    }
+  }
 }
Index: src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpError.java
===================================================================
--- src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpError.java	(revision 1188268)
+++ src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpError.java	(working copy)
@@ -17,14 +17,17 @@
 
 package org.apache.nutch.protocol.ftp;
 
-/** Thrown for Ftp error codes.
+/**
+ * Thrown for Ftp error codes.
  */
 public class FtpError extends FtpException {
 
   private int code;
-  
-  public int getCode(int code) { return code; }
 
+  public int getCode(int code) {
+    return code;
+  }
+
   public FtpError(int code) {
     super("Ftp Error: " + code);
     this.code = code;
Index: src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpException.java
===================================================================
--- src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpException.java	(revision 1188268)
+++ src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpException.java	(working copy)
@@ -20,9 +20,9 @@
 import org.apache.nutch.protocol.ProtocolException;
 
 /***
- * Superclass for important exceptions thrown during FTP talk,
- * that must be handled with care.
- *
+ * Superclass for important exceptions thrown during FTP talk, that must be
+ * handled with care.
+ * 
  * @author John Xing
  */
 public class FtpException extends ProtocolException {
Index: src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java
===================================================================
--- src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java	(revision 1188268)
+++ src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java	(working copy)
@@ -17,7 +17,6 @@
 
 package org.apache.nutch.protocol.ftp;
 
-
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.net.InetAddress;
@@ -38,18 +37,14 @@
 import org.apache.nutch.storage.WebPage;
 import org.apache.nutch.util.LogUtil;
 
-
 /************************************
- * FtpResponse.java mimics ftp replies as http response.
- * It tries its best to follow http's way for headers, response codes
- * as well as exceptions.
- *
- * Comments:
- * In this class, all FtpException*.java thrown by Client.java
- * and some important commons-net exceptions passed by Client.java
- * must have been properly dealt with. They'd better not be leaked
- * to the caller of this class.
- *
+ * FtpResponse.java mimics ftp replies as http response. It tries its best to
+ * follow http's way for headers, response codes as well as exceptions.
+ * 
+ * Comments: In this class, all FtpException*.java thrown by Client.java and
+ * some important commons-net exceptions passed by Client.java must have been
+ * properly dealt with. They'd better not be leaked to the caller of this class.
+ * 
  * @author John Xing
  ***********************************/
 public class FtpResponse {
@@ -65,23 +60,26 @@
   private Configuration conf;
 
   /** Returns the response code. */
-  public int getCode() { return code; }
+  public int getCode() {
+    return code;
+  }
 
   /** Returns the value of a named header. */
   public String getHeader(String name) {
     return headers.get(name);
   }
 
-  public byte[] getContent() { return content; }
+  public byte[] getContent() {
+    return content;
+  }
 
   public Content toContent() {
     return new Content(orig, base, (content != null ? content : EMPTY_CONTENT),
-                       getHeader(Response.CONTENT_TYPE),
-                       headers, this.conf);
+        getHeader(Response.CONTENT_TYPE), headers, this.conf);
   }
 
   public FtpResponse(URL url, WebPage page, Ftp ftp, Configuration conf)
-    throws FtpException, IOException {
+      throws FtpException, IOException {
 
     this.orig = url.toString();
     this.base = url.toString();
@@ -103,11 +101,11 @@
 
       if (ftp.followTalk) {
         if (Ftp.LOG.isInfoEnabled()) {
-          Ftp.LOG.info("fetching "+url);
+          Ftp.LOG.info("fetching " + url);
         }
       } else {
         if (Ftp.LOG.isTraceEnabled()) {
-          Ftp.LOG.trace("fetching "+url);
+          Ftp.LOG.trace("fetching " + url);
         }
       }
 
@@ -117,7 +115,7 @@
       // should start anew.
       if (ftp.client != null && ftp.keepConnection
           && ftp.renewalTime < System.currentTimeMillis()) {
-        if (Ftp.LOG.isInfoEnabled()) { 
+        if (Ftp.LOG.isInfoEnabled()) {
           Ftp.LOG.info("delete client because idled too long");
         }
         ftp.client = null;
@@ -131,8 +129,9 @@
         // the real client
         ftp.client = new Client();
         // when to renew, take the lesser
-        //ftp.renewalTime = System.currentTimeMillis()
-        //  + ((ftp.timeout<ftp.serverTimeout) ? ftp.timeout : ftp.serverTimeout);
+        // ftp.renewalTime = System.currentTimeMillis()
+        // + ((ftp.timeout<ftp.serverTimeout) ? ftp.timeout :
+        // ftp.serverTimeout);
 
         // timeout for control connection
         ftp.client.setDefaultTimeout(ftp.timeout);
@@ -141,8 +140,8 @@
 
         // follow ftp talk?
         if (ftp.followTalk)
-          ftp.client.addProtocolCommandListener(
-            new PrintCommandListener(ftp.LOG));
+          ftp.client.addProtocolCommandListener(new PrintCommandListener(
+              ftp.LOG));
       }
 
       // quit from previous site if at a different site now
@@ -150,8 +149,8 @@
         InetAddress remoteAddress = ftp.client.getRemoteAddress();
         if (!addr.equals(remoteAddress)) {
           if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
-            Ftp.LOG.info("disconnect from "+remoteAddress
-            +" before connect to "+addr);
+            Ftp.LOG.info("disconnect from " + remoteAddress
+                + " before connect to " + addr);
           }
           // quit from current site
           ftp.client.logout();
@@ -163,22 +162,22 @@
       if (!ftp.client.isConnected()) {
 
         if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
-          Ftp.LOG.info("connect to "+addr);
+          Ftp.LOG.info("connect to " + addr);
         }
 
         ftp.client.connect(addr);
         if (!FTPReply.isPositiveCompletion(ftp.client.getReplyCode())) {
           ftp.client.disconnect();
           if (Ftp.LOG.isWarnEnabled()) {
-            Ftp.LOG.warn("ftp.client.connect() failed: "
-              + addr + " " + ftp.client.getReplyString());
+            Ftp.LOG.warn("ftp.client.connect() failed: " + addr + " "
+                + ftp.client.getReplyString());
           }
           this.code = 500; // http Internal Server Error
           return;
         }
 
         if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
-          Ftp.LOG.info("log into "+addr);
+          Ftp.LOG.info("log into " + addr);
         }
 
         if (!ftp.client.login(ftp.userName, ftp.passWord)) {
@@ -189,9 +188,9 @@
           // (not dealt with here at all) .
           ftp.client.disconnect();
           if (Ftp.LOG.isWarnEnabled()) {
-            Ftp.LOG.warn("ftp.client.login() failed: "+addr);
+            Ftp.LOG.warn("ftp.client.login() failed: " + addr);
           }
-          this.code = 401;  // http Unauthorized
+          this.code = 401; // http Unauthorized
           return;
         }
 
@@ -200,14 +199,14 @@
           ftp.client.logout();
           ftp.client.disconnect();
           if (Ftp.LOG.isWarnEnabled()) {
-            Ftp.LOG.warn("ftp.client.setFileType() failed: "+addr);
+            Ftp.LOG.warn("ftp.client.setFileType() failed: " + addr);
           }
           this.code = 500; // http Internal Server Error
           return;
         }
 
         if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
-          Ftp.LOG.info("set parser for "+addr);
+          Ftp.LOG.info("set parser for " + addr);
         }
 
         // SYST is valid only after login
@@ -218,17 +217,18 @@
           if (parserKey.startsWith("UNKNOWN Type: L8"))
             parserKey = "UNIX Type: L8";
           ftp.parser = (new DefaultFTPFileEntryParserFactory())
-            .createFileEntryParser(parserKey);
+              .createFileEntryParser(parserKey);
         } catch (FtpExceptionBadSystResponse e) {
           if (Ftp.LOG.isWarnEnabled()) {
-            Ftp.LOG.warn("ftp.client.getSystemName() failed: "+addr+" "+e);
+            Ftp.LOG
+                .warn("ftp.client.getSystemName() failed: " + addr + " " + e);
           }
           ftp.parser = null;
         } catch (ParserInitializationException e) {
           // ParserInitializationException is RuntimeException defined in
           // org.apache.commons.net.ftp.parser.ParserInitializationException
           if (Ftp.LOG.isWarnEnabled()) {
-            Ftp.LOG.warn("createFileEntryParser() failed. "+addr+" "+e);
+            Ftp.LOG.warn("createFileEntryParser() failed. " + addr + " " + e);
           }
           ftp.parser = null;
         } finally {
@@ -236,7 +236,7 @@
             // do not log as severe, otherwise
             // FetcherThread/RequestScheduler will abort
             if (Ftp.LOG.isWarnEnabled()) {
-              Ftp.LOG.warn("ftp.parser is null: "+addr);
+              Ftp.LOG.warn("ftp.parser is null: " + addr);
             }
             ftp.client.logout();
             ftp.client.disconnect();
@@ -262,10 +262,11 @@
       // reset next renewalTime, take the lesser
       if (ftp.client != null && ftp.keepConnection) {
         ftp.renewalTime = System.currentTimeMillis()
-          + ((ftp.timeout<ftp.serverTimeout) ? ftp.timeout : ftp.serverTimeout);
+            + ((ftp.timeout < ftp.serverTimeout) ? ftp.timeout
+                : ftp.serverTimeout);
         if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
           Ftp.LOG.info("reset renewalTime to "
-            + HttpDateFormat.toString(ftp.renewalTime));
+              + HttpDateFormat.toString(ftp.renewalTime));
         }
       }
 
@@ -273,15 +274,15 @@
       // may have deleted ftp.client
       if (ftp.client != null && !ftp.keepConnection) {
         if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
-          Ftp.LOG.info("disconnect from "+addr);
+          Ftp.LOG.info("disconnect from " + addr);
         }
         ftp.client.logout();
         ftp.client.disconnect();
       }
-      
+
     } catch (Exception e) {
       if (ftp.LOG.isWarnEnabled()) {
-        ftp.LOG.warn(""+e);
+        ftp.LOG.warn("" + e);
         e.printStackTrace(LogUtil.getWarnStream(ftp.LOG));
       }
       // for any un-foreseen exception (run time exception or not),
@@ -292,21 +293,21 @@
       ftp.client = null;
       // or do explicit garbage collection?
       // System.gc();
-// can we be less dramatic, using the following instead?
-// probably unnecessary for our practical purpose here
-//      try {
-//        ftp.client.logout();
-//        ftp.client.disconnect();
-//      }
+      // can we be less dramatic, using the following instead?
+      // probably unnecessary for our practical purpose here
+      // try {
+      // ftp.client.logout();
+      // ftp.client.disconnect();
+      // }
       throw new FtpException(e);
-      //throw e;
+      // throw e;
     }
 
   }
 
   // get ftp file as http response
   private void getFileAsHttpResponse(String path, long lastModified)
-    throws IOException {
+      throws IOException {
 
     ByteArrayOutputStream os = null;
     List list = null;
@@ -318,9 +319,9 @@
 
       FTPFile ftpFile = (FTPFile) list.get(0);
       this.headers.set(Response.CONTENT_LENGTH,
-                       new Long(ftpFile.getSize()).toString());
+          new Long(ftpFile.getSize()).toString());
       this.headers.set(Response.LAST_MODIFIED,
-                       HttpDateFormat.toString(ftpFile.getTimestamp()));
+          HttpDateFormat.toString(ftpFile.getTimestamp()));
       // don't retrieve the file if not changed.
       if (ftpFile.getTimestamp().getTimeInMillis() <= lastModified) {
         code = 304;
@@ -331,11 +332,11 @@
 
       this.content = os.toByteArray();
 
-//      // approximate bytes sent and read
-//      if (this.httpAccounting != null) {
-//        this.httpAccounting.incrementBytesSent(path.length());
-//        this.httpAccounting.incrementBytesRead(this.content.length);
-//      }
+      // // approximate bytes sent and read
+      // if (this.httpAccounting != null) {
+      // this.httpAccounting.incrementBytesSent(path.length());
+      // this.httpAccounting.incrementBytesRead(this.content.length);
+      // }
 
       this.code = 200; // http OK
 
@@ -344,64 +345,64 @@
       // control connection is off, clean up
       // ftp.client.disconnect();
       if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
-        Ftp.LOG.info("delete client because server cut off control channel: "+e);
+        Ftp.LOG.info("delete client because server cut off control channel: "
+            + e);
       }
       ftp.client = null;
 
       // in case this FtpExceptionControlClosedByForcedDataClose is
       // thrown by retrieveList() (not retrieveFile()) above,
       if (os == null) { // indicating throwing by retrieveList()
-        //throw new FtpException("fail to get attibutes: "+path);
+        // throw new FtpException("fail to get attibutes: "+path);
         if (Ftp.LOG.isWarnEnabled()) {
-          Ftp.LOG.warn(
-              "Please try larger maxContentLength for ftp.client.retrieveList(). "
-            + e);
+          Ftp.LOG
+              .warn("Please try larger maxContentLength for ftp.client.retrieveList(). "
+                  + e);
         }
         // in a way, this is our request fault
-        this.code = 400;  // http Bad request
+        this.code = 400; // http Bad request
         return;
       }
 
       FTPFile ftpFile = (FTPFile) list.get(0);
       this.headers.set(Response.CONTENT_LENGTH,
-                       new Long(ftpFile.getSize()).toString());
-      //this.headers.put("content-type", "text/html");
+          new Long(ftpFile.getSize()).toString());
+      // this.headers.put("content-type", "text/html");
       this.headers.set(Response.LAST_MODIFIED,
-                      HttpDateFormat.toString(ftpFile.getTimestamp()));
+          HttpDateFormat.toString(ftpFile.getTimestamp()));
       this.content = os.toByteArray();
       if (ftpFile.getTimestamp().getTimeInMillis() <= lastModified) {
         code = 304;
         return;
       }
 
-//      // approximate bytes sent and read
-//      if (this.httpAccounting != null) {
-//        this.httpAccounting.incrementBytesSent(path.length());
-//        this.httpAccounting.incrementBytesRead(this.content.length);
-//      }
+      // // approximate bytes sent and read
+      // if (this.httpAccounting != null) {
+      // this.httpAccounting.incrementBytesSent(path.length());
+      // this.httpAccounting.incrementBytesRead(this.content.length);
+      // }
 
       this.code = 200; // http OK
 
     } catch (FtpExceptionCanNotHaveDataConnection e) {
 
       if (FTPReply.isPositiveCompletion(ftp.client.cwd(path))) {
-      // it is not a file, but dir, so redirect as a dir
+        // it is not a file, but dir, so redirect as a dir
         this.headers.set(Response.LOCATION, path + "/");
-        this.code = 300;  // http redirect
+        this.code = 300; // http redirect
         // fixme, should we do ftp.client.cwd("/"), back to top dir?
       } else {
-      // it is not a dir either
-        this.code = 404;  // http Not Found
+        // it is not a dir either
+        this.code = 404; // http Not Found
       }
 
     } catch (FtpExceptionUnknownForcedDataClose e) {
       // Please note control channel is still live.
       // in a way, this is our request fault
       if (Ftp.LOG.isWarnEnabled()) {
-        Ftp.LOG.warn(
-            "Unrecognized reply after forced close of data channel. "
-          + "If this is acceptable, please modify Client.java accordingly. "
-          + e);
+        Ftp.LOG.warn("Unrecognized reply after forced close of data channel. "
+            + "If this is acceptable, please modify Client.java accordingly. "
+            + e);
       }
       this.code = 400; // http Bad Request
     }
@@ -410,14 +411,14 @@
 
   // get ftp dir list as http response
   private void getDirAsHttpResponse(String path, long lastModified)
-    throws IOException {
+      throws IOException {
     List list = new LinkedList();
 
     try {
 
       // change to that dir first
       if (!FTPReply.isPositiveCompletion(ftp.client.cwd(path))) {
-        this.code = 404;  // http Not Found
+        this.code = 404; // http Not Found
         return;
       }
 
@@ -426,15 +427,15 @@
       ftp.client.retrieveList(null, list, ftp.maxContentLength, ftp.parser);
       this.content = list2html(list, path, "/".equals(path) ? false : true);
       this.headers.set(Response.CONTENT_LENGTH,
-                       new Integer(this.content.length).toString());
+          new Integer(this.content.length).toString());
       this.headers.set(Response.CONTENT_TYPE, "text/html");
       // this.headers.put("Last-Modified", null);
 
-//      // approximate bytes sent and read
-//      if (this.httpAccounting != null) {
-//        this.httpAccounting.incrementBytesSent(path.length());
-//        this.httpAccounting.incrementBytesRead(this.content.length);
-//      }
+      // // approximate bytes sent and read
+      // if (this.httpAccounting != null) {
+      // this.httpAccounting.incrementBytesSent(path.length());
+      // this.httpAccounting.incrementBytesRead(this.content.length);
+      // }
 
       this.code = 200; // http OK
 
@@ -443,21 +444,22 @@
       // control connection is off, clean up
       // ftp.client.disconnect();
       if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
-        Ftp.LOG.info("delete client because server cut off control channel: "+e);
+        Ftp.LOG.info("delete client because server cut off control channel: "
+            + e);
       }
       ftp.client = null;
 
       this.content = list2html(list, path, "/".equals(path) ? false : true);
       this.headers.set(Response.CONTENT_LENGTH,
-                       new Integer(this.content.length).toString());
+          new Integer(this.content.length).toString());
       this.headers.set(Response.CONTENT_TYPE, "text/html");
       // this.headers.put("Last-Modified", null);
 
-//      // approximate bytes sent and read
-//      if (this.httpAccounting != null) {
-//        this.httpAccounting.incrementBytesSent(path.length());
-//        this.httpAccounting.incrementBytesRead(this.content.length);
-//      }
+      // // approximate bytes sent and read
+      // if (this.httpAccounting != null) {
+      // this.httpAccounting.incrementBytesSent(path.length());
+      // this.httpAccounting.incrementBytesRead(this.content.length);
+      // }
 
       this.code = 200; // http OK
 
@@ -465,14 +467,15 @@
       // Please note control channel is still live.
       // in a way, this is our request fault
       if (Ftp.LOG.isWarnEnabled()) {
-        Ftp.LOG.warn(
-            "Unrecognized reply after forced close of data channel. "
-          + "If this is acceptable, please modify Client.java accordingly. "
-          + e);
+        Ftp.LOG.warn("Unrecognized reply after forced close of data channel. "
+            + "If this is acceptable, please modify Client.java accordingly. "
+            + e);
       }
       this.code = 400; // http Bad Request
     } catch (FtpExceptionCanNotHaveDataConnection e) {
-      if (Ftp.LOG.isWarnEnabled()) { Ftp.LOG.warn(""+ e); }
+      if (Ftp.LOG.isWarnEnabled()) {
+        Ftp.LOG.warn("" + e);
+      }
       this.code = 500; // http Iternal Server Error
     }
 
@@ -481,16 +484,17 @@
   // generate html page from ftp dir list
   private byte[] list2html(List list, String path, boolean includeDotDot) {
 
-    //StringBuffer x = new StringBuffer("<!doctype html public \"-//ietf//dtd html//en\"><html><head>");
+    // StringBuffer x = new
+    // StringBuffer("<!doctype html public \"-//ietf//dtd html//en\"><html><head>");
     StringBuffer x = new StringBuffer("<html><head>");
-    x.append("<title>Index of "+path+"</title></head>\n");
-    x.append("<body><h1>Index of "+path+"</h1><pre>\n");
+    x.append("<title>Index of " + path + "</title></head>\n");
+    x.append("<body><h1>Index of " + path + "</h1><pre>\n");
 
     if (includeDotDot) {
       x.append("<a href='../'>../</a>\t-\t-\t-\n");
     }
 
-    for (int i=0; i<list.size(); i++) {
+    for (int i = 0; i < list.size(); i++) {
       FTPFile f = (FTPFile) list.get(i);
       String name = f.getName();
       String time = HttpDateFormat.toString(f.getTimestamp());
@@ -498,11 +502,11 @@
         // some ftp server LIST "." and "..", we skip them here
         if (name.equals(".") || name.equals(".."))
           continue;
-        x.append("<a href='"+name+"/"+"'>"+name+"/</a>\t");
-        x.append(time+"\t-\n");
+        x.append("<a href='" + name + "/" + "'>" + name + "/</a>\t");
+        x.append(time + "\t-\n");
       } else if (f.isFile()) {
-        x.append("<a href='"+name+    "'>"+name+"</a>\t");
-        x.append(time+"\t"+f.getSize()+"\n");
+        x.append("<a href='" + name + "'>" + name + "</a>\t");
+        x.append(time + "\t" + f.getSize() + "\n");
       } else {
         // ignore isSymbolicLink()
         // ignore isUnknown()
Index: src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionCanNotHaveDataConnection.java
===================================================================
--- src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionCanNotHaveDataConnection.java	(revision 1188268)
+++ src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionCanNotHaveDataConnection.java	(working copy)
@@ -19,7 +19,7 @@
 
 /**
  * Exception indicating failure of opening data connection.
- *
+ * 
  * @author John Xing
  */
 public class FtpExceptionCanNotHaveDataConnection extends FtpException {
Index: src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionUnknownForcedDataClose.java
===================================================================
--- src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionUnknownForcedDataClose.java	(revision 1188268)
+++ src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionUnknownForcedDataClose.java	(working copy)
@@ -18,9 +18,9 @@
 package org.apache.nutch.protocol.ftp;
 
 /**
- * Exception indicating unrecognizable reply from server after
- * forced closure of data channel by client (our) side.
- *
+ * Exception indicating unrecognizable reply from server after forced closure of
+ * data channel by client (our) side.
+ * 
  * @author John Xing
  */
 public class FtpExceptionUnknownForcedDataClose extends FtpException {
Index: src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java
===================================================================
--- src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java	(revision 1188268)
+++ src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java	(working copy)
@@ -40,535 +40,521 @@
 import org.apache.commons.net.ftp.FTPConnectionClosedException;
 
 /***********************************************
- * Client.java encapsulates functionalities necessary for nutch to
- * get dir list and retrieve file from an FTP server.
- * This class takes care of all low level details of interacting
- * with an FTP server and provides a convenient higher level interface.
- *
+ * Client.java encapsulates functionalities necessary for nutch to get dir list
+ * and retrieve file from an FTP server. This class takes care of all low level
+ * details of interacting with an FTP server and provides a convenient higher
+ * level interface.
+ * 
  * Modified from FtpClient.java in apache commons-net.
  * 
- * Notes by John Xing:
- * ftp server implementations are hardly uniform and none seems to follow
- * RFCs whole-heartedly. We have no choice, but assume common denominator
- * as following:
- * (1) Use stream mode for data tranfer. Block mode will be better for
- *     multiple file downloading and partial file downloading. However
- *     not every ftpd has block mode support.
- * (2) Use passive mode for data connection.
- *     So nutch will work if we run behind firewall.
- * (3) Data connection is opened/closed per ftp command for the reasons
- *     listed in (1). There are ftp servers out there,
- *     when partial downloading is enforeced by closing data channel
- *     socket on our client side, the server side immediately closes
- *     control channel (socket). Our codes deal with such a bad behavior.
- * (4) LIST is used to obtain remote file attributes if possible.
- *     MDTM & SIZE would be nice, but not as ubiquitously implemented as LIST.
- * (5) Avoid using ABOR in single thread? Do not use it at all.
- *
- * About exceptions:
- * Some specific exceptions are re-thrown as one of FtpException*.java
- * In fact, each function throws FtpException*.java or pass IOException.
- *
+ * Notes by John Xing: ftp server implementations are hardly uniform and none
+ * seems to follow RFCs whole-heartedly. We have no choice, but assume common
+ * denominator as following: (1) Use stream mode for data tranfer. Block mode
+ * will be better for multiple file downloading and partial file downloading.
+ * However not every ftpd has block mode support. (2) Use passive mode for data
+ * connection. So nutch will work if we run behind firewall. (3) Data connection
+ * is opened/closed per ftp command for the reasons listed in (1). There are ftp
+ * servers out there, when partial downloading is enforeced by closing data
+ * channel socket on our client side, the server side immediately closes control
+ * channel (socket). Our codes deal with such a bad behavior. (4) LIST is used
+ * to obtain remote file attributes if possible. MDTM & SIZE would be nice, but
+ * not as ubiquitously implemented as LIST. (5) Avoid using ABOR in single
+ * thread? Do not use it at all.
+ * 
+ * About exceptions: Some specific exceptions are re-thrown as one of
+ * FtpException*.java In fact, each function throws FtpException*.java or pass
+ * IOException.
+ * 
  * @author John Xing
  ***********************************************/
 
-public class Client extends FTP
-{
-    private int __dataTimeout;
-    private int __passivePort;
-    private String __passiveHost;
-    private int __fileType, __fileFormat;
-    private boolean __remoteVerificationEnabled;
-    private FTPFileEntryParser __entryParser;
-    private String __systemName;
+public class Client extends FTP {
+  private int __dataTimeout;
+  private int __passivePort;
+  private String __passiveHost;
+  private int __fileType, __fileFormat;
+  private boolean __remoteVerificationEnabled;
+  private FTPFileEntryParser __entryParser;
+  private String __systemName;
 
-    // constructor
-    public Client()
-    {
-        __initDefaults();
-        __dataTimeout = -1;
-        __remoteVerificationEnabled = true;
-    }
+  // constructor
+  public Client() {
+    __initDefaults();
+    __dataTimeout = -1;
+    __remoteVerificationEnabled = true;
+  }
 
-    // defaults when initialize
-    private void __initDefaults()
-    {
-        __passiveHost        = null;
-        __passivePort        = -1;
-        __fileType           = FTP.ASCII_FILE_TYPE;
-        __fileFormat         = FTP.NON_PRINT_TEXT_FORMAT;
-        __systemName         = null;
-        __entryParser        = null;
-    }
+  // defaults when initialize
+  private void __initDefaults() {
+    __passiveHost = null;
+    __passivePort = -1;
+    __fileType = FTP.ASCII_FILE_TYPE;
+    __fileFormat = FTP.NON_PRINT_TEXT_FORMAT;
+    __systemName = null;
+    __entryParser = null;
+  }
 
-    // parse reply for pass()
-    private void __parsePassiveModeReply(String reply)
-    throws MalformedServerReplyException
-    {
-        int i, index, lastIndex;
-        String octet1, octet2;
-        StringBuffer host;
+  // parse reply for pass()
+  private void __parsePassiveModeReply(String reply)
+      throws MalformedServerReplyException {
+    int i, index, lastIndex;
+    String octet1, octet2;
+    StringBuffer host;
 
-        reply = reply.substring(reply.indexOf('(') + 1,
-                                reply.indexOf(')')).trim();
+    reply = reply.substring(reply.indexOf('(') + 1, reply.indexOf(')')).trim();
 
-        host = new StringBuffer(24);
-        lastIndex = 0;
-        index = reply.indexOf(',');
-        host.append(reply.substring(lastIndex, index));
+    host = new StringBuffer(24);
+    lastIndex = 0;
+    index = reply.indexOf(',');
+    host.append(reply.substring(lastIndex, index));
 
-        for (i = 0; i < 3; i++)
-        {
-            host.append('.');
-            lastIndex = index + 1;
-            index = reply.indexOf(',', lastIndex);
-            host.append(reply.substring(lastIndex, index));
-        }
+    for (i = 0; i < 3; i++) {
+      host.append('.');
+      lastIndex = index + 1;
+      index = reply.indexOf(',', lastIndex);
+      host.append(reply.substring(lastIndex, index));
+    }
 
-        lastIndex = index + 1;
-        index = reply.indexOf(',', lastIndex);
+    lastIndex = index + 1;
+    index = reply.indexOf(',', lastIndex);
 
-        octet1 = reply.substring(lastIndex, index);
-        octet2 = reply.substring(index + 1);
+    octet1 = reply.substring(lastIndex, index);
+    octet2 = reply.substring(index + 1);
 
-        // index and lastIndex now used as temporaries
-        try
-        {
-            index = Integer.parseInt(octet1);
-            lastIndex = Integer.parseInt(octet2);
-        }
-        catch (NumberFormatException e)
-        {
-            throw new MalformedServerReplyException(
-                "Could not parse passive host information.\nServer Reply: " + reply);
-        }
+    // index and lastIndex now used as temporaries
+    try {
+      index = Integer.parseInt(octet1);
+      lastIndex = Integer.parseInt(octet2);
+    } catch (NumberFormatException e) {
+      throw new MalformedServerReplyException(
+          "Could not parse passive host information.\nServer Reply: " + reply);
+    }
 
-        index <<= 8;
-        index |= lastIndex;
+    index <<= 8;
+    index |= lastIndex;
 
-        __passiveHost = host.toString();
-        __passivePort = index;
-    }
+    __passiveHost = host.toString();
+    __passivePort = index;
+  }
 
-    // open passive data connection socket
-    protected Socket __openPassiveDataConnection(int command, String arg)
+  // open passive data connection socket
+  protected Socket __openPassiveDataConnection(int command, String arg)
       throws IOException, FtpExceptionCanNotHaveDataConnection {
-        Socket socket;
+    Socket socket;
 
-//        // 20040317, xing, accommodate ill-behaved servers, see below
-//        int port_previous = __passivePort;
+    // // 20040317, xing, accommodate ill-behaved servers, see below
+    // int port_previous = __passivePort;
 
-        if (pasv() != FTPReply.ENTERING_PASSIVE_MODE)
-          throw new FtpExceptionCanNotHaveDataConnection(
-            "pasv() failed. " + getReplyString());
+    if (pasv() != FTPReply.ENTERING_PASSIVE_MODE)
+      throw new FtpExceptionCanNotHaveDataConnection("pasv() failed. "
+          + getReplyString());
 
-        try {
-          __parsePassiveModeReply(getReplyStrings()[0]);
-        } catch (MalformedServerReplyException e) {
-          throw new FtpExceptionCanNotHaveDataConnection(e.getMessage());
-        }
+    try {
+      __parsePassiveModeReply(getReplyStrings()[0]);
+    } catch (MalformedServerReplyException e) {
+      throw new FtpExceptionCanNotHaveDataConnection(e.getMessage());
+    }
 
-//        // 20040317, xing, accommodate ill-behaved servers, see above
-//        int count = 0;
-//        System.err.println("__passivePort "+__passivePort);
-//        System.err.println("port_previous "+port_previous);
-//        while (__passivePort == port_previous) {
-//          // just quit if too many tries. make it an exception here?
-//          if (count++ > 10)
-//            return null;
-//          // slow down further for each new try
-//          Thread.sleep(500*count);
-//          if (pasv() != FTPReply.ENTERING_PASSIVE_MODE)
-//            throw new FtpExceptionCanNotHaveDataConnection(
-//              "pasv() failed. " + getReplyString());
-//            //return null;
-//          try {
-//            __parsePassiveModeReply(getReplyStrings()[0]);
-//          } catch (MalformedServerReplyException e) {
-//            throw new FtpExceptionCanNotHaveDataConnection(e.getMessage());
-//          }
-//        }
+    // // 20040317, xing, accommodate ill-behaved servers, see above
+    // int count = 0;
+    // System.err.println("__passivePort "+__passivePort);
+    // System.err.println("port_previous "+port_previous);
+    // while (__passivePort == port_previous) {
+    // // just quit if too many tries. make it an exception here?
+    // if (count++ > 10)
+    // return null;
+    // // slow down further for each new try
+    // Thread.sleep(500*count);
+    // if (pasv() != FTPReply.ENTERING_PASSIVE_MODE)
+    // throw new FtpExceptionCanNotHaveDataConnection(
+    // "pasv() failed. " + getReplyString());
+    // //return null;
+    // try {
+    // __parsePassiveModeReply(getReplyStrings()[0]);
+    // } catch (MalformedServerReplyException e) {
+    // throw new FtpExceptionCanNotHaveDataConnection(e.getMessage());
+    // }
+    // }
 
-        socket = _socketFactory_.createSocket(__passiveHost, __passivePort);
+    socket = _socketFactory_.createSocket(__passiveHost, __passivePort);
 
-        if (!FTPReply.isPositivePreliminary(sendCommand(command, arg))) {
-          socket.close();
-          return null;
-        }
+    if (!FTPReply.isPositivePreliminary(sendCommand(command, arg))) {
+      socket.close();
+      return null;
+    }
 
-        if (__remoteVerificationEnabled && !verifyRemote(socket))
-        {
-            InetAddress host1, host2;
+    if (__remoteVerificationEnabled && !verifyRemote(socket)) {
+      InetAddress host1, host2;
 
-            host1 = socket.getInetAddress();
-            host2 = getRemoteAddress();
+      host1 = socket.getInetAddress();
+      host2 = getRemoteAddress();
 
-            socket.close();
+      socket.close();
 
-            // our precaution
-            throw new FtpExceptionCanNotHaveDataConnection(
-                "Host attempting data connection " + host1.getHostAddress() +
-                " is not same as server " + host2.getHostAddress() +
-                " So we intentionally close it for security precaution."
-                );
-        }
+      // our precaution
+      throw new FtpExceptionCanNotHaveDataConnection(
+          "Host attempting data connection " + host1.getHostAddress()
+              + " is not same as server " + host2.getHostAddress()
+              + " So we intentionally close it for security precaution.");
+    }
 
-        if (__dataTimeout >= 0)
-            socket.setSoTimeout(__dataTimeout);
+    if (__dataTimeout >= 0)
+      socket.setSoTimeout(__dataTimeout);
 
-        return socket;
-    }
+    return socket;
+  }
 
-    /***
-     * Sets the timeout in milliseconds to use for data connection.
-     * set immediately after opening the data connection.
-     ***/
-    public void setDataTimeout(int timeout)
-    {
-        __dataTimeout = timeout;
-    }
+  /***
+   * Sets the timeout in milliseconds to use for data connection. set
+   * immediately after opening the data connection.
+   ***/
+  public void setDataTimeout(int timeout) {
+    __dataTimeout = timeout;
+  }
 
-    /***
-     * Closes the connection to the FTP server and restores
-     * connection parameters to the default values.
-     * <p>
-     * @exception IOException If an error occurs while disconnecting.
-     ***/
-    public void disconnect() throws IOException
-    {
-        __initDefaults();
-        super.disconnect();
-        // no worry for data connection, since we always close it
-        // in every ftp command that invloves data connection
-    }
+  /***
+   * Closes the connection to the FTP server and restores connection parameters
+   * to the default values.
+   * <p>
+   * 
+   * @exception IOException
+   *              If an error occurs while disconnecting.
+   ***/
+  public void disconnect() throws IOException {
+    __initDefaults();
+    super.disconnect();
+    // no worry for data connection, since we always close it
+    // in every ftp command that invloves data connection
+  }
 
-    /***
-     * Enable or disable verification that the remote host taking part
-     * of a data connection is the same as the host to which the control
-     * connection is attached.  The default is for verification to be
-     * enabled.  You may set this value at any time, whether the
-     * FTPClient is currently connected or not.
-     * <p>
-     * @param enable True to enable verification, false to disable verification.
-     ***/
-    public void setRemoteVerificationEnabled(boolean enable)
-    {
-        __remoteVerificationEnabled = enable;
-    }
+  /***
+   * Enable or disable verification that the remote host taking part of a data
+   * connection is the same as the host to which the control connection is
+   * attached. The default is for verification to be enabled. You may set this
+   * value at any time, whether the FTPClient is currently connected or not.
+   * <p>
+   * 
+   * @param enable
+   *          True to enable verification, false to disable verification.
+   ***/
+  public void setRemoteVerificationEnabled(boolean enable) {
+    __remoteVerificationEnabled = enable;
+  }
 
-    /***
-     * Return whether or not verification of the remote host participating
-     * in data connections is enabled.  The default behavior is for
-     * verification to be enabled.
-     * <p>
-     * @return True if verification is enabled, false if not.
-     ***/
-    public boolean isRemoteVerificationEnabled()
-    {
-        return __remoteVerificationEnabled;
-    }
+  /***
+   * Return whether or not verification of the remote host participating in data
+   * connections is enabled. The default behavior is for verification to be
+   * enabled.
+   * <p>
+   * 
+   * @return True if verification is enabled, false if not.
+   ***/
+  public boolean isRemoteVerificationEnabled() {
+    return __remoteVerificationEnabled;
+  }
 
-    /***
-     * Login to the FTP server using the provided username and password.
-     * <p>
-     * @param username The username to login under.
-     * @param password The password to use.
-     * @return True if successfully completed, false if not.
-     * @exception FTPConnectionClosedException
-     *      If the FTP server prematurely closes the connection as a result
-     *      of the client being idle or some other reason causing the server
-     *      to send FTP reply code 421.  This exception may be caught either
-     *      as an IOException or independently as itself.
-     * @exception IOException  If an I/O error occurs while either sending a
-     *      command to the server or receiving a reply from the server.
-     ***/
-    public boolean login(String username, String password) throws IOException
-    {
-        user(username);
+  /***
+   * Login to the FTP server using the provided username and password.
+   * <p>
+   * 
+   * @param username
+   *          The username to login under.
+   * @param password
+   *          The password to use.
+   * @return True if successfully completed, false if not.
+   * @exception FTPConnectionClosedException
+   *              If the FTP server prematurely closes the connection as a
+   *              result of the client being idle or some other reason causing
+   *              the server to send FTP reply code 421. This exception may be
+   *              caught either as an IOException or independently as itself.
+   * @exception IOException
+   *              If an I/O error occurs while either sending a command to the
+   *              server or receiving a reply from the server.
+   ***/
+  public boolean login(String username, String password) throws IOException {
+    user(username);
 
-        if (FTPReply.isPositiveCompletion(getReplyCode()))
-            return true;
+    if (FTPReply.isPositiveCompletion(getReplyCode()))
+      return true;
 
-        // If we get here, we either have an error code, or an intermmediate
-        // reply requesting password.
-        if (!FTPReply.isPositiveIntermediate(getReplyCode()))
-            return false;
+    // If we get here, we either have an error code, or an intermmediate
+    // reply requesting password.
+    if (!FTPReply.isPositiveIntermediate(getReplyCode()))
+      return false;
 
-        return FTPReply.isPositiveCompletion(pass(password));
-    }
+    return FTPReply.isPositiveCompletion(pass(password));
+  }
 
-    /***
-     * Logout of the FTP server by sending the QUIT command.
-     * <p>
-     * @return True if successfully completed, false if not.
-     * @exception FTPConnectionClosedException
-     *      If the FTP server prematurely closes the connection as a result
-     *      of the client being idle or some other reason causing the server
-     *      to send FTP reply code 421.  This exception may be caught either
-     *      as an IOException or independently as itself.
-     * @exception IOException  If an I/O error occurs while either sending a
-     *      command to the server or receiving a reply from the server.
-     ***/
-    public boolean logout() throws IOException
-    {
-        return FTPReply.isPositiveCompletion(quit());
-    }
+  /***
+   * Logout of the FTP server by sending the QUIT command.
+   * <p>
+   * 
+   * @return True if successfully completed, false if not.
+   * @exception FTPConnectionClosedException
+   *              If the FTP server prematurely closes the connection as a
+   *              result of the client being idle or some other reason causing
+   *              the server to send FTP reply code 421. This exception may be
+   *              caught either as an IOException or independently as itself.
+   * @exception IOException
+   *              If an I/O error occurs while either sending a command to the
+   *              server or receiving a reply from the server.
+   ***/
+  public boolean logout() throws IOException {
+    return FTPReply.isPositiveCompletion(quit());
+  }
 
-    // retrieve list reply for path
-    public void retrieveList(String path, List entries, int limit,
-      FTPFileEntryParser parser)
-      throws IOException,
-        FtpExceptionCanNotHaveDataConnection,
-        FtpExceptionUnknownForcedDataClose,
-        FtpExceptionControlClosedByForcedDataClose {
-      Socket socket = __openPassiveDataConnection(FTPCommand.LIST, path);
+  // retrieve list reply for path
+  public void retrieveList(String path, List entries, int limit,
+      FTPFileEntryParser parser) throws IOException,
+      FtpExceptionCanNotHaveDataConnection, FtpExceptionUnknownForcedDataClose,
+      FtpExceptionControlClosedByForcedDataClose {
+    Socket socket = __openPassiveDataConnection(FTPCommand.LIST, path);
 
-      if (socket == null)
-        throw new FtpExceptionCanNotHaveDataConnection("LIST "
+    if (socket == null)
+      throw new FtpExceptionCanNotHaveDataConnection("LIST "
           + ((path == null) ? "" : path));
 
-      BufferedReader reader =
-          new BufferedReader(new InputStreamReader(socket.getInputStream()));
+    BufferedReader reader = new BufferedReader(new InputStreamReader(
+        socket.getInputStream()));
 
-      // force-close data channel socket, when download limit is reached
-      boolean mandatory_close = false;
+    // force-close data channel socket, when download limit is reached
+    boolean mandatory_close = false;
 
-      //List entries = new LinkedList();
-      int count = 0;
-      String line = parser.readNextEntry(reader);
-      while (line != null) {
-        FTPFile ftpFile = parser.parseFTPEntry(line);
-        // skip non-formatted lines
-        if (ftpFile == null) {
-          line = parser.readNextEntry(reader);
-          continue;
-        }
-        entries.add(ftpFile);
-        count += line.length();
-        // impose download limit if limit >= 0, otherwise no limit
-        // here, cut off is up to the line when total bytes is just over limit
-        if (limit >= 0 && count > limit) {
-          mandatory_close = true;
-          break;
-        }
+    // List entries = new LinkedList();
+    int count = 0;
+    String line = parser.readNextEntry(reader);
+    while (line != null) {
+      FTPFile ftpFile = parser.parseFTPEntry(line);
+      // skip non-formatted lines
+      if (ftpFile == null) {
         line = parser.readNextEntry(reader);
+        continue;
       }
+      entries.add(ftpFile);
+      count += line.length();
+      // impose download limit if limit >= 0, otherwise no limit
+      // here, cut off is up to the line when total bytes is just over limit
+      if (limit >= 0 && count > limit) {
+        mandatory_close = true;
+        break;
+      }
+      line = parser.readNextEntry(reader);
+    }
 
-      //if (mandatory_close)
-      // you always close here, no matter mandatory_close or not.
-      // however different ftp servers respond differently, see below.
-      socket.close();
+    // if (mandatory_close)
+    // you always close here, no matter mandatory_close or not.
+    // however different ftp servers respond differently, see below.
+    socket.close();
 
-      // scenarios:
-      // (1) mandatory_close is false, download limit not reached
-      //     no special care here
-      // (2) mandatory_close is true, download limit is reached
-      //     different servers have different reply codes:
+    // scenarios:
+    // (1) mandatory_close is false, download limit not reached
+    // no special care here
+    // (2) mandatory_close is true, download limit is reached
+    // different servers have different reply codes:
 
-      try {
-        int reply = getReply();
-        if (!_notBadReply(reply))
-          throw new FtpExceptionUnknownForcedDataClose(getReplyString());
-      } catch (FTPConnectionClosedException e) {
-        // some ftp servers will close control channel if data channel socket
-        // is closed by our end before all data has been read out. Check:
-        // tux414.q-tam.hp.com FTP server (hp.com version whp02)
-        // so must catch FTPConnectionClosedException thrown by getReply() above
-        //disconnect();
-        throw new FtpExceptionControlClosedByForcedDataClose(e.getMessage());
-      }
-
+    try {
+      int reply = getReply();
+      if (!_notBadReply(reply))
+        throw new FtpExceptionUnknownForcedDataClose(getReplyString());
+    } catch (FTPConnectionClosedException e) {
+      // some ftp servers will close control channel if data channel socket
+      // is closed by our end before all data has been read out. Check:
+      // tux414.q-tam.hp.com FTP server (hp.com version whp02)
+      // so must catch FTPConnectionClosedException thrown by getReply() above
+      // disconnect();
+      throw new FtpExceptionControlClosedByForcedDataClose(e.getMessage());
     }
 
-    // retrieve file for path
-    public void retrieveFile(String path, OutputStream os, int limit)
-      throws IOException,
-        FtpExceptionCanNotHaveDataConnection,
-        FtpExceptionUnknownForcedDataClose,
-        FtpExceptionControlClosedByForcedDataClose {
+  }
 
-      Socket socket = __openPassiveDataConnection(FTPCommand.RETR, path);
+  // retrieve file for path
+  public void retrieveFile(String path, OutputStream os, int limit)
+      throws IOException, FtpExceptionCanNotHaveDataConnection,
+      FtpExceptionUnknownForcedDataClose,
+      FtpExceptionControlClosedByForcedDataClose {
 
-      if (socket == null)
-        throw new FtpExceptionCanNotHaveDataConnection("RETR "
+    Socket socket = __openPassiveDataConnection(FTPCommand.RETR, path);
+
+    if (socket == null)
+      throw new FtpExceptionCanNotHaveDataConnection("RETR "
           + ((path == null) ? "" : path));
 
-      InputStream input = socket.getInputStream();
+    InputStream input = socket.getInputStream();
 
-      // 20040318, xing, treat everything as BINARY_FILE_TYPE for now
-      // do we ever need ASCII_FILE_TYPE?
-      //if (__fileType == ASCII_FILE_TYPE)
-      // input = new FromNetASCIIInputStream(input);
+    // 20040318, xing, treat everything as BINARY_FILE_TYPE for now
+    // do we ever need ASCII_FILE_TYPE?
+    // if (__fileType == ASCII_FILE_TYPE)
+    // input = new FromNetASCIIInputStream(input);
 
-      // fixme, should we instruct server here for binary file type?
+    // fixme, should we instruct server here for binary file type?
 
-      // force-close data channel socket
-      boolean mandatory_close = false;
+    // force-close data channel socket
+    boolean mandatory_close = false;
 
-      int len; int count = 0;
-      byte[] buf =
-        new byte[org.apache.commons.net.io.Util.DEFAULT_COPY_BUFFER_SIZE];
-      while((len=input.read(buf,0,buf.length)) != -1){
-        count += len;
-        // impose download limit if limit >= 0, otherwise no limit
-        // here, cut off is exactly of limit bytes
-        if (limit >= 0 && count > limit) {
-          os.write(buf,0,len-(count-limit));
-          mandatory_close = true;
-          break;
-        }
-        os.write(buf,0,len);
-        os.flush();
+    int len;
+    int count = 0;
+    byte[] buf = new byte[org.apache.commons.net.io.Util.DEFAULT_COPY_BUFFER_SIZE];
+    while ((len = input.read(buf, 0, buf.length)) != -1) {
+      count += len;
+      // impose download limit if limit >= 0, otherwise no limit
+      // here, cut off is exactly of limit bytes
+      if (limit >= 0 && count > limit) {
+        os.write(buf, 0, len - (count - limit));
+        mandatory_close = true;
+        break;
       }
+      os.write(buf, 0, len);
+      os.flush();
+    }
 
-      //if (mandatory_close)
-      // you always close here, no matter mandatory_close or not.
-      // however different ftp servers respond differently, see below.
-      socket.close();
+    // if (mandatory_close)
+    // you always close here, no matter mandatory_close or not.
+    // however different ftp servers respond differently, see below.
+    socket.close();
 
-      // scenarios:
-      // (1) mandatory_close is false, download limit not reached
-      //     no special care here
-      // (2) mandatory_close is true, download limit is reached
-      //     different servers have different reply codes:
+    // scenarios:
+    // (1) mandatory_close is false, download limit not reached
+    // no special care here
+    // (2) mandatory_close is true, download limit is reached
+    // different servers have different reply codes:
 
-      // do not need this
-      //sendCommand("ABOR");
+    // do not need this
+    // sendCommand("ABOR");
 
-      try {
-        int reply = getReply();
-        if (!_notBadReply(reply))
-          throw new FtpExceptionUnknownForcedDataClose(getReplyString());
-      } catch (FTPConnectionClosedException e) {
-        // some ftp servers will close control channel if data channel socket
-        // is closed by our end before all data has been read out. Check:
-        // tux414.q-tam.hp.com FTP server (hp.com version whp02)
-        // so must catch FTPConnectionClosedException thrown by getReply() above
-        //disconnect();
-        throw new FtpExceptionControlClosedByForcedDataClose(e.getMessage());
-      }
-
+    try {
+      int reply = getReply();
+      if (!_notBadReply(reply))
+        throw new FtpExceptionUnknownForcedDataClose(getReplyString());
+    } catch (FTPConnectionClosedException e) {
+      // some ftp servers will close control channel if data channel socket
+      // is closed by our end before all data has been read out. Check:
+      // tux414.q-tam.hp.com FTP server (hp.com version whp02)
+      // so must catch FTPConnectionClosedException thrown by getReply() above
+      // disconnect();
+      throw new FtpExceptionControlClosedByForcedDataClose(e.getMessage());
     }
 
-    // reply check after closing data connection
-    private boolean _notBadReply(int reply) {
+  }
 
-      if (FTPReply.isPositiveCompletion(reply)) {
-        // do nothing
-      } else if (reply == 426) { // FTPReply.TRANSFER_ABORTED
+  // reply check after closing data connection
+  private boolean _notBadReply(int reply) {
+
+    if (FTPReply.isPositiveCompletion(reply)) {
+      // do nothing
+    } else if (reply == 426) { // FTPReply.TRANSFER_ABORTED
       // some ftp servers reply 426, e.g.,
       // foggy FTP server (Version wu-2.6.2(2)
-        // there is second reply witing? no!
-        //getReply();
-      } else if (reply == 450) { // FTPReply.FILE_ACTION_NOT_TAKEN
+      // there is second reply witing? no!
+      // getReply();
+    } else if (reply == 450) { // FTPReply.FILE_ACTION_NOT_TAKEN
       // some ftp servers reply 450, e.g.,
       // ProFTPD [ftp.kernel.org]
-        // there is second reply witing? no!
-        //getReply();
-      } else if (reply == 451) { // FTPReply.ACTION_ABORTED
+      // there is second reply witing? no!
+      // getReply();
+    } else if (reply == 451) { // FTPReply.ACTION_ABORTED
       // some ftp servers reply 451, e.g.,
       // ProFTPD [ftp.kernel.org]
-        // there is second reply witing? no!
-        //getReply();
-      } else if (reply == 451) { // FTPReply.ACTION_ABORTED
-      } else {
+      // there is second reply witing? no!
+      // getReply();
+    } else if (reply == 451) { // FTPReply.ACTION_ABORTED
+    } else {
       // what other kind of ftp server out there?
-        return false;
-      }
+      return false;
+    }
 
+    return true;
+  }
+
+  /***
+   * Sets the file type to be transferred. This should be one of
+   * <code> FTP.ASCII_FILE_TYPE </code>, <code> FTP.IMAGE_FILE_TYPE </code>,
+   * etc. The file type only needs to be set when you want to change the type.
+   * After changing it, the new type stays in effect until you change it again.
+   * The default file type is <code> FTP.ASCII_FILE_TYPE </code> if this method
+   * is never called.
+   * <p>
+   * 
+   * @param fileType
+   *          The <code> _FILE_TYPE </code> constant indcating the type of file.
+   * @return True if successfully completed, false if not.
+   * @exception FTPConnectionClosedException
+   *              If the FTP server prematurely closes the connection as a
+   *              result of the client being idle or some other reason causing
+   *              the server to send FTP reply code 421. This exception may be
+   *              caught either as an IOException or independently as itself.
+   * @exception IOException
+   *              If an I/O error occurs while either sending a command to the
+   *              server or receiving a reply from the server.
+   ***/
+  public boolean setFileType(int fileType) throws IOException {
+    if (FTPReply.isPositiveCompletion(type(fileType))) {
+      __fileType = fileType;
+      __fileFormat = FTP.NON_PRINT_TEXT_FORMAT;
       return true;
     }
+    return false;
+  }
 
-    /***
-     * Sets the file type to be transferred.  This should be one of 
-     * <code> FTP.ASCII_FILE_TYPE </code>, <code> FTP.IMAGE_FILE_TYPE </code>,
-     * etc.  The file type only needs to be set when you want to change the
-     * type.  After changing it, the new type stays in effect until you change
-     * it again.  The default file type is <code> FTP.ASCII_FILE_TYPE </code>
-     * if this method is never called.
-     * <p>
-     * @param fileType The <code> _FILE_TYPE </code> constant indcating the
-     *                 type of file.
-     * @return True if successfully completed, false if not.
-     * @exception FTPConnectionClosedException
-     *      If the FTP server prematurely closes the connection as a result
-     *      of the client being idle or some other reason causing the server
-     *      to send FTP reply code 421.  This exception may be caught either
-     *      as an IOException or independently as itself.
-     * @exception IOException  If an I/O error occurs while either sending a
-     *      command to the server or receiving a reply from the server.
-     ***/
-    public boolean setFileType(int fileType) throws IOException
-    {
-        if (FTPReply.isPositiveCompletion(type(fileType)))
-        {
-            __fileType = fileType;
-            __fileFormat = FTP.NON_PRINT_TEXT_FORMAT;
-            return true;
-        }
-        return false;
+  /***
+   * Fetches the system type name from the server and returns the string. This
+   * value is cached for the duration of the connection after the first call to
+   * this method. In other words, only the first time that you invoke this
+   * method will it issue a SYST command to the FTP server. FTPClient will
+   * remember the value and return the cached value until a call to disconnect.
+   * <p>
+   * 
+   * @return The system type name obtained from the server. null if the
+   *         information could not be obtained.
+   * @exception FTPConnectionClosedException
+   *              If the FTP server prematurely closes the connection as a
+   *              result of the client being idle or some other reason causing
+   *              the server to send FTP reply code 421. This exception may be
+   *              caught either as an IOException or independently as itself.
+   * @exception IOException
+   *              If an I/O error occurs while either sending a command to the
+   *              server or receiving a reply from the server.
+   ***/
+  public String getSystemName() throws IOException, FtpExceptionBadSystResponse {
+    // if (syst() == FTPReply.NAME_SYSTEM_TYPE)
+    // Technically, we should expect a NAME_SYSTEM_TYPE response, but
+    // in practice FTP servers deviate, so we soften the condition to
+    // a positive completion.
+    if (__systemName == null && FTPReply.isPositiveCompletion(syst())) {
+      __systemName = (getReplyStrings()[0]).substring(4);
+    } else {
+      throw new FtpExceptionBadSystResponse("Bad response of SYST: "
+          + getReplyString());
     }
 
-    /***
-     * Fetches the system type name from the server and returns the string.
-     * This value is cached for the duration of the connection after the
-     * first call to this method.  In other words, only the first time
-     * that you invoke this method will it issue a SYST command to the
-     * FTP server.  FTPClient will remember the value and return the
-     * cached value until a call to disconnect.
-     * <p>
-     * @return The system type name obtained from the server.  null if the
-     *       information could not be obtained.
-     * @exception FTPConnectionClosedException
-     *      If the FTP server prematurely closes the connection as a result
-     *      of the client being idle or some other reason causing the server
-     *      to send FTP reply code 421.  This exception may be caught either
-     *      as an IOException or independently as itself.
-     * @exception IOException  If an I/O error occurs while either sending a
-     *  command to the server or receiving a reply from the server.
-     ***/
-    public String getSystemName()
-      throws IOException, FtpExceptionBadSystResponse
-    {
-      //if (syst() == FTPReply.NAME_SYSTEM_TYPE)
-      // Technically, we should expect a NAME_SYSTEM_TYPE response, but
-      // in practice FTP servers deviate, so we soften the condition to
-      // a positive completion.
-        if (__systemName == null && FTPReply.isPositiveCompletion(syst())) {
-            __systemName = (getReplyStrings()[0]).substring(4);
-        } else {
-            throw new FtpExceptionBadSystResponse(
-              "Bad response of SYST: " + getReplyString());
-        }
+    return __systemName;
+  }
 
-        return __systemName;
-    }
+  /***
+   * Sends a NOOP command to the FTP server. This is useful for preventing
+   * server timeouts.
+   * <p>
+   * 
+   * @return True if successfully completed, false if not.
+   * @exception FTPConnectionClosedException
+   *              If the FTP server prematurely closes the connection as a
+   *              result of the client being idle or some other reason causing
+   *              the server to send FTP reply code 421. This exception may be
+   *              caught either as an IOException or independently as itself.
+   * @exception IOException
+   *              If an I/O error occurs while either sending a command to the
+   *              server or receiving a reply from the server.
+   ***/
+  public boolean sendNoOp() throws IOException {
+    return FTPReply.isPositiveCompletion(noop());
+  }
 
-    /***
-     * Sends a NOOP command to the FTP server.  This is useful for preventing
-     * server timeouts.
-     * <p>
-     * @return True if successfully completed, false if not.
-     * @exception FTPConnectionClosedException
-     *      If the FTP server prematurely closes the connection as a result
-     *      of the client being idle or some other reason causing the server
-     *      to send FTP reply code 421.  This exception may be caught either
-     *      as an IOException or independently as itself.
-     * @exception IOException  If an I/O error occurs while either sending a
-     *      command to the server or receiving a reply from the server.
-     ***/
-    public boolean sendNoOp() throws IOException
-    {
-        return FTPReply.isPositiveCompletion(noop());
-    }
+  // client.stat(path);
+  // client.sendCommand("STAT");
+  // client.sendCommand("STAT",path);
+  // client.sendCommand("MDTM",path);
+  // client.sendCommand("SIZE",path);
+  // client.sendCommand("HELP","SITE");
+  // client.sendCommand("SYST");
+  // client.setRestartOffset(120);
 
-//    client.stat(path);
-//    client.sendCommand("STAT");
-//    client.sendCommand("STAT",path);
-//    client.sendCommand("MDTM",path);
-//    client.sendCommand("SIZE",path);
-//    client.sendCommand("HELP","SITE");
-//    client.sendCommand("SYST");
-//    client.setRestartOffset(120);
-
 }
Index: src/plugin/creativecommons/src/test/org/creativecommons/nutch/TestCCParseFilter.java
===================================================================
--- src/plugin/creativecommons/src/test/org/creativecommons/nutch/TestCCParseFilter.java	(revision 1188268)
+++ src/plugin/creativecommons/src/test/org/creativecommons/nutch/TestCCParseFilter.java	(working copy)
@@ -37,55 +37,53 @@
 
 public class TestCCParseFilter extends TestCase {
 
-	private static final File testDir = new File(
-			System.getProperty("test.input"));
+  private static final File testDir = new File(System.getProperty("test.input"));
 
-	public void testPages() throws Exception {
-		pageTest(new File(testDir, "anchor.html"), "http://foo.com/",
-				"http://creativecommons.org/licenses/by-nc-sa/1.0", "a", null);
-		// Tika returns <a> whereas parse-html returns <rel>
-		// check later
-		pageTest(new File(testDir, "rel.html"), "http://foo.com/",
-				"http://creativecommons.org/licenses/by-nc/2.0", "rel", null);
-		// Tika returns <a> whereas parse-html returns <rdf>
-		// check later
-		pageTest(new File(testDir, "rdf.html"), "http://foo.com/",
-				"http://creativecommons.org/licenses/by-nc/1.0", "rdf", "text");
-	}
+  public void testPages() throws Exception {
+    pageTest(new File(testDir, "anchor.html"), "http://foo.com/",
+        "http://creativecommons.org/licenses/by-nc-sa/1.0", "a", null);
+    // Tika returns <a> whereas parse-html returns <rel>
+    // check later
+    pageTest(new File(testDir, "rel.html"), "http://foo.com/",
+        "http://creativecommons.org/licenses/by-nc/2.0", "rel", null);
+    // Tika returns <a> whereas parse-html returns <rdf>
+    // check later
+    pageTest(new File(testDir, "rdf.html"), "http://foo.com/",
+        "http://creativecommons.org/licenses/by-nc/1.0", "rdf", "text");
+  }
 
-	public void pageTest(File file, String url, String license,
-			String location, String type) throws Exception {
+  public void pageTest(File file, String url, String license, String location,
+      String type) throws Exception {
 
-		String contentType = "text/html";
-		InputStream in = new FileInputStream(file);
-		ByteArrayOutputStream out = new ByteArrayOutputStream(
-				(int) file.length());
-		byte[] buffer = new byte[1024];
-		int i;
-		while ((i = in.read(buffer)) != -1) {
-			out.write(buffer, 0, i);
-		}
-		in.close();
-		byte[] bytes = out.toByteArray();
-		Configuration conf = NutchConfiguration.create();
+    String contentType = "text/html";
+    InputStream in = new FileInputStream(file);
+    ByteArrayOutputStream out = new ByteArrayOutputStream((int) file.length());
+    byte[] buffer = new byte[1024];
+    int i;
+    while ((i = in.read(buffer)) != -1) {
+      out.write(buffer, 0, i);
+    }
+    in.close();
+    byte[] bytes = out.toByteArray();
+    Configuration conf = NutchConfiguration.create();
 
-		WebPage page = new WebPage();
-		page.setBaseUrl(new Utf8(url));
-		page.setContent(ByteBuffer.wrap(bytes));
-		MimeUtil mimeutil = new MimeUtil(conf);
-		MimeType mtype = mimeutil.getMimeType(file);
-		page.setContentType(new Utf8(mtype.getName()));
+    WebPage page = new WebPage();
+    page.setBaseUrl(new Utf8(url));
+    page.setContent(ByteBuffer.wrap(bytes));
+    MimeUtil mimeutil = new MimeUtil(conf);
+    MimeType mtype = mimeutil.getMimeType(file);
+    page.setContentType(new Utf8(mtype.getName()));
 
-		new ParseUtil(conf).parse(url, page);
+    new ParseUtil(conf).parse(url, page);
 
-		ByteBuffer bb = page.getFromMetadata(new Utf8("License-Url"));
-		assertEquals(license, new String(bb.array()));
-		bb = page.getFromMetadata(new Utf8("License-Location"));
-		assertEquals(location, new String(bb.array()));
-		bb = page.getFromMetadata(new Utf8("Work-Type"));
-		if (bb == null)
-			assertEquals(type, null);
-		else
-			assertEquals(type, new String(bb.array()));
-	}
+    ByteBuffer bb = page.getFromMetadata(new Utf8("License-Url"));
+    assertEquals(license, new String(bb.array()));
+    bb = page.getFromMetadata(new Utf8("License-Location"));
+    assertEquals(location, new String(bb.array()));
+    bb = page.getFromMetadata(new Utf8("Work-Type"));
+    if (bb == null)
+      assertEquals(type, null);
+    else
+      assertEquals(type, new String(bb.array()));
+  }
 }
Index: src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCIndexingFilter.java
===================================================================
--- src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCIndexingFilter.java	(revision 1188268)
+++ src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCIndexingFilter.java	(working copy)
@@ -37,101 +37,101 @@
 
 /** Adds basic searchable fields to a document. */
 public class CCIndexingFilter implements IndexingFilter {
-	public static final Logger LOG = LoggerFactory.getLogger(CCIndexingFilter.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(CCIndexingFilter.class);
 
-	/** The name of the document field we use. */
-	public static String FIELD = "cc";
+  /** The name of the document field we use. */
+  public static String FIELD = "cc";
 
-	private Configuration conf;
+  private Configuration conf;
 
-	private static final Collection<WebPage.Field> FIELDS = new HashSet<WebPage.Field>();
+  private static final Collection<WebPage.Field> FIELDS = new HashSet<WebPage.Field>();
 
-	static {
-		FIELDS.add(WebPage.Field.BASE_URL);
-		FIELDS.add(WebPage.Field.METADATA);
-	}
+  static {
+    FIELDS.add(WebPage.Field.BASE_URL);
+    FIELDS.add(WebPage.Field.METADATA);
+  }
 
-	/**
-	 * Add the features represented by a license URL. Urls are of the form
-	 * "http://creativecommons.org/licenses/xx-xx/xx/xx", where "xx" names a
-	 * license feature.
-	 */
-	public void addUrlFeatures(NutchDocument doc, String urlString) {
-		try {
-			URL url = new URL(urlString);
+  /**
+   * Add the features represented by a license URL. Urls are of the form
+   * "http://creativecommons.org/licenses/xx-xx/xx/xx", where "xx" names a
+   * license feature.
+   */
+  public void addUrlFeatures(NutchDocument doc, String urlString) {
+    try {
+      URL url = new URL(urlString);
 
-			// tokenize the path of the url, breaking at slashes and dashes
-			StringTokenizer names = new StringTokenizer(url.getPath(), "/-");
+      // tokenize the path of the url, breaking at slashes and dashes
+      StringTokenizer names = new StringTokenizer(url.getPath(), "/-");
 
-			if (names.hasMoreTokens())
-				names.nextToken(); // throw away "licenses"
+      if (names.hasMoreTokens())
+        names.nextToken(); // throw away "licenses"
 
-			// add a feature per component after "licenses"
-			while (names.hasMoreTokens()) {
-				String feature = names.nextToken();
-				addFeature(doc, feature);
-			}
-		} catch (MalformedURLException e) {
-			if (LOG.isWarnEnabled()) {
-				LOG.warn("CC: failed to parse url: " + urlString + " : " + e);
-			}
-		}
-	}
+      // add a feature per component after "licenses"
+      while (names.hasMoreTokens()) {
+        String feature = names.nextToken();
+        addFeature(doc, feature);
+      }
+    } catch (MalformedURLException e) {
+      if (LOG.isWarnEnabled()) {
+        LOG.warn("CC: failed to parse url: " + urlString + " : " + e);
+      }
+    }
+  }
 
-	private void addFeature(NutchDocument doc, String feature) {
-		doc.add(FIELD, feature);
-	}
+  private void addFeature(NutchDocument doc, String feature) {
+    doc.add(FIELD, feature);
+  }
 
-	public void setConf(Configuration conf) {
-		this.conf = conf;
-	}
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+  }
 
-	public Configuration getConf() {
-		return this.conf;
-	}
+  public Configuration getConf() {
+    return this.conf;
+  }
 
-	@Override
-	public Collection<Field> getFields() {
-		return FIELDS;
-	}
+  @Override
+  public Collection<Field> getFields() {
+    return FIELDS;
+  }
 
-	@Override
-	public NutchDocument filter(NutchDocument doc, String url, WebPage page)
-			throws IndexingException {
+  @Override
+  public NutchDocument filter(NutchDocument doc, String url, WebPage page)
+      throws IndexingException {
 
-		ByteBuffer blicense = page.getFromMetadata(new Utf8(
-				CreativeCommons.LICENSE_URL));
-		if (blicense != null) {
-			String licenseUrl = new String(blicense.array());
-			if (LOG.isInfoEnabled()) {
-				LOG.info("CC: indexing " + licenseUrl + " for: "
-						+ url.toString());
-			}
+    ByteBuffer blicense = page.getFromMetadata(new Utf8(
+        CreativeCommons.LICENSE_URL));
+    if (blicense != null) {
+      String licenseUrl = new String(blicense.array());
+      if (LOG.isInfoEnabled()) {
+        LOG.info("CC: indexing " + licenseUrl + " for: " + url.toString());
+      }
 
-			// add the entire license as cc:license=xxx
-			addFeature(doc, "license=" + licenseUrl);
+      // add the entire license as cc:license=xxx
+      addFeature(doc, "license=" + licenseUrl);
 
-			// index license attributes extracted of the license url
-			addUrlFeatures(doc, licenseUrl);
-		}
+      // index license attributes extracted of the license url
+      addUrlFeatures(doc, licenseUrl);
+    }
 
-		// index the license location as cc:meta=xxx
-		ByteBuffer blicenseloc = page.getFromMetadata(new Utf8(
-				CreativeCommons.LICENSE_LOCATION));
-		if (blicenseloc != null) {
-			String licenseLocation = new String(blicenseloc.array());
-			addFeature(doc, "meta=" + licenseLocation);
-		}
+    // index the license location as cc:meta=xxx
+    ByteBuffer blicenseloc = page.getFromMetadata(new Utf8(
+        CreativeCommons.LICENSE_LOCATION));
+    if (blicenseloc != null) {
+      String licenseLocation = new String(blicenseloc.array());
+      addFeature(doc, "meta=" + licenseLocation);
+    }
 
-		// index the work type cc:type=xxx
-		ByteBuffer bworkType = page.getFromMetadata(new Utf8(
-				CreativeCommons.WORK_TYPE));
-		if (bworkType != null) {
-			String workType = new String(bworkType.array());
-			addFeature(doc, workType);
-		}
+    // index the work type cc:type=xxx
+    ByteBuffer bworkType = page.getFromMetadata(new Utf8(
+        CreativeCommons.WORK_TYPE));
+    if (bworkType != null) {
+      String workType = new String(bworkType.array());
+      addFeature(doc, workType);
+    }
 
-		return doc;
-	}
+    return doc;
+  }
 
 }
Index: src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCParseFilter.java
===================================================================
--- src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCParseFilter.java	(revision 1188268)
+++ src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCParseFilter.java	(working copy)
@@ -50,282 +50,277 @@
 
 /** Adds metadata identifying the Creative Commons license used, if any. */
 public class CCParseFilter implements ParseFilter {
-	public static final Logger LOG = LoggerFactory.getLogger(CCParseFilter.class);
+  public static final Logger LOG = LoggerFactory.getLogger(CCParseFilter.class);
 
-	/** Walks DOM tree, looking for RDF in comments and licenses in anchors. */
-	public static class Walker {
-		private URL base; // base url of page
-		private String rdfLicense; // subject url found, if any
-		private URL relLicense; // license url found, if any
-		private URL anchorLicense; // anchor url found, if any
-		private String workType; // work type URI
+  /** Walks DOM tree, looking for RDF in comments and licenses in anchors. */
+  public static class Walker {
+    private URL base; // base url of page
+    private String rdfLicense; // subject url found, if any
+    private URL relLicense; // license url found, if any
+    private URL anchorLicense; // anchor url found, if any
+    private String workType; // work type URI
 
-		private Walker(URL base) {
-			this.base = base;
-		}
+    private Walker(URL base) {
+      this.base = base;
+    }
 
-		/** Scan the document adding attributes to metadata. */
-		public static void walk(Node doc, URL base, WebPage page,
-				Configuration conf) throws ParseException {
+    /** Scan the document adding attributes to metadata. */
+    public static void walk(Node doc, URL base, WebPage page, Configuration conf)
+        throws ParseException {
 
-			// walk the DOM tree, scanning for license data
-			Walker walker = new Walker(base);
-			walker.walk(doc);
+      // walk the DOM tree, scanning for license data
+      Walker walker = new Walker(base);
+      walker.walk(doc);
 
-			// interpret results of walk
-			String licenseUrl = null;
-			String licenseLocation = null;
-			if (walker.rdfLicense != null) { // 1st choice: subject in RDF
-				licenseLocation = "rdf";
-				licenseUrl = walker.rdfLicense;
-			} else if (walker.relLicense != null) { // 2nd: anchor w/
-													// rel=license
-				licenseLocation = "rel";
-				licenseUrl = walker.relLicense.toString();
-			} else if (walker.anchorLicense != null) { // 3rd: anchor w/ CC
-														// license
-				licenseLocation = "a";
-				licenseUrl = walker.anchorLicense.toString();
-			} else if (conf.getBoolean("creativecommons.exclude.unlicensed",
-					false)) {
-				throw new ParseException("No CC license.  Excluding.");
-			}
+      // interpret results of walk
+      String licenseUrl = null;
+      String licenseLocation = null;
+      if (walker.rdfLicense != null) { // 1st choice: subject in RDF
+        licenseLocation = "rdf";
+        licenseUrl = walker.rdfLicense;
+      } else if (walker.relLicense != null) { // 2nd: anchor w/
+        // rel=license
+        licenseLocation = "rel";
+        licenseUrl = walker.relLicense.toString();
+      } else if (walker.anchorLicense != null) { // 3rd: anchor w/ CC
+        // license
+        licenseLocation = "a";
+        licenseUrl = walker.anchorLicense.toString();
+      } else if (conf.getBoolean("creativecommons.exclude.unlicensed", false)) {
+        throw new ParseException("No CC license.  Excluding.");
+      }
 
-			// add license to metadata
-			if (licenseUrl != null) {
-				if (LOG.isInfoEnabled()) {
-					LOG.info("CC: found " + licenseUrl + " in "
-							+ licenseLocation + " of " + base);
-				}
-				page.putToMetadata(new Utf8(CreativeCommons.LICENSE_URL),
-						ByteBuffer.wrap(licenseUrl.getBytes()));
-				page.putToMetadata(new Utf8(CreativeCommons.LICENSE_LOCATION),
-						ByteBuffer.wrap(licenseLocation.getBytes()));
-			}
+      // add license to metadata
+      if (licenseUrl != null) {
+        if (LOG.isInfoEnabled()) {
+          LOG.info("CC: found " + licenseUrl + " in " + licenseLocation
+              + " of " + base);
+        }
+        page.putToMetadata(new Utf8(CreativeCommons.LICENSE_URL),
+            ByteBuffer.wrap(licenseUrl.getBytes()));
+        page.putToMetadata(new Utf8(CreativeCommons.LICENSE_LOCATION),
+            ByteBuffer.wrap(licenseLocation.getBytes()));
+      }
 
-			if (walker.workType != null) {
-				if (LOG.isInfoEnabled()) {
-					LOG.info("CC: found " + walker.workType + " in " + base);
-				}
-				page.putToMetadata(new Utf8(CreativeCommons.WORK_TYPE),
-						ByteBuffer.wrap(walker.workType.getBytes()));
-			}
+      if (walker.workType != null) {
+        if (LOG.isInfoEnabled()) {
+          LOG.info("CC: found " + walker.workType + " in " + base);
+        }
+        page.putToMetadata(new Utf8(CreativeCommons.WORK_TYPE),
+            ByteBuffer.wrap(walker.workType.getBytes()));
+      }
 
-		}
+    }
 
-		/** Scan the document looking for RDF in comments and license elements. */
-		private void walk(Node node) {
+    /** Scan the document looking for RDF in comments and license elements. */
+    private void walk(Node node) {
 
-			// check element nodes for license URL
-			if (node instanceof Element) {
-				findLicenseUrl((Element) node);
-			}
+      // check element nodes for license URL
+      if (node instanceof Element) {
+        findLicenseUrl((Element) node);
+      }
 
-			// check comment nodes for license RDF
-			if (node instanceof Comment) {
-				findRdf(((Comment) node).getData());
-			}
+      // check comment nodes for license RDF
+      if (node instanceof Comment) {
+        findRdf(((Comment) node).getData());
+      }
 
-			// recursively walk child nodes
-			NodeList children = node.getChildNodes();
-			for (int i = 0; children != null && i < children.getLength(); i++) {
-				walk(children.item(i));
-			}
-		}
+      // recursively walk child nodes
+      NodeList children = node.getChildNodes();
+      for (int i = 0; children != null && i < children.getLength(); i++) {
+        walk(children.item(i));
+      }
+    }
 
-		/**
-		 * Extract license url from element, if any. Thse are the href attribute
-		 * of anchor elements with rel="license". These must also point to
-		 * http://creativecommons.org/licenses/.
-		 */
-		private void findLicenseUrl(Element element) {
-			// only look in Anchor elements
-			if (!"a".equalsIgnoreCase(element.getTagName()))
-				return;
+    /**
+     * Extract license url from element, if any. Thse are the href attribute of
+     * anchor elements with rel="license". These must also point to
+     * http://creativecommons.org/licenses/.
+     */
+    private void findLicenseUrl(Element element) {
+      // only look in Anchor elements
+      if (!"a".equalsIgnoreCase(element.getTagName()))
+        return;
 
-			// require an href
-			String href = element.getAttribute("href");
-			if (href == null)
-				return;
+      // require an href
+      String href = element.getAttribute("href");
+      if (href == null)
+        return;
 
-			try {
-				URL url = new URL(base, href); // resolve the url
+      try {
+        URL url = new URL(base, href); // resolve the url
 
-				// check that it's a CC license URL
-				if ("http".equalsIgnoreCase(url.getProtocol())
-						&& "creativecommons.org"
-								.equalsIgnoreCase(url.getHost())
-						&& url.getPath() != null
-						&& url.getPath().startsWith("/licenses/")
-						&& url.getPath().length() > "/licenses/".length()) {
+        // check that it's a CC license URL
+        if ("http".equalsIgnoreCase(url.getProtocol())
+            && "creativecommons.org".equalsIgnoreCase(url.getHost())
+            && url.getPath() != null && url.getPath().startsWith("/licenses/")
+            && url.getPath().length() > "/licenses/".length()) {
 
-					// check rel="license"
-					String rel = element.getAttribute("rel");
-					if (rel != null && "license".equals(rel)
-							&& this.relLicense == null) {
-						this.relLicense = url; // found rel license
-					} else if (this.anchorLicense == null) {
-						this.anchorLicense = url; // found anchor license
-					}
-				}
-			} catch (MalformedURLException e) { // ignore malformed urls
-			}
-		}
+          // check rel="license"
+          String rel = element.getAttribute("rel");
+          if (rel != null && "license".equals(rel) && this.relLicense == null) {
+            this.relLicense = url; // found rel license
+          } else if (this.anchorLicense == null) {
+            this.anchorLicense = url; // found anchor license
+          }
+        }
+      } catch (MalformedURLException e) { // ignore malformed urls
+      }
+    }
 
-		/** Configure a namespace aware XML parser. */
-		private static final DocumentBuilderFactory FACTORY = DocumentBuilderFactory
-				.newInstance();
-		static {
-			FACTORY.setNamespaceAware(true);
-		}
+    /** Configure a namespace aware XML parser. */
+    private static final DocumentBuilderFactory FACTORY = DocumentBuilderFactory
+        .newInstance();
+    static {
+      FACTORY.setNamespaceAware(true);
+    }
 
-		/** Creative Commons' namespace URI. */
-		private static final String CC_NS = "http://web.resource.org/cc/";
+    /** Creative Commons' namespace URI. */
+    private static final String CC_NS = "http://web.resource.org/cc/";
 
-		/** Dublin Core namespace URI. */
-		private static final String DC_NS = "http://purl.org/dc/elements/1.1/";
+    /** Dublin Core namespace URI. */
+    private static final String DC_NS = "http://purl.org/dc/elements/1.1/";
 
-		/** RDF syntax namespace URI. */
-		private static final String RDF_NS = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
+    /** RDF syntax namespace URI. */
+    private static final String RDF_NS = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
 
-		private void findRdf(String comment) {
-			// first check for likely RDF in comment
-			int rdfPosition = comment.indexOf("RDF");
-			if (rdfPosition < 0)
-				return; // no RDF, abort
-			int nsPosition = comment.indexOf(CC_NS);
-			if (nsPosition < 0)
-				return; // no RDF, abort
+    private void findRdf(String comment) {
+      // first check for likely RDF in comment
+      int rdfPosition = comment.indexOf("RDF");
+      if (rdfPosition < 0)
+        return; // no RDF, abort
+      int nsPosition = comment.indexOf(CC_NS);
+      if (nsPosition < 0)
+        return; // no RDF, abort
 
-			// try to parse the XML
-			Document doc;
-			try {
-				DocumentBuilder parser = FACTORY.newDocumentBuilder();
-				doc = parser.parse(new InputSource(new StringReader(comment)));
-			} catch (Exception e) {
-				if (LOG.isWarnEnabled()) {
-					LOG.warn("CC: Failed to parse RDF in " + base + ": " + e);
-				}
-				// e.printStackTrace();
-				return;
-			}
+      // try to parse the XML
+      Document doc;
+      try {
+        DocumentBuilder parser = FACTORY.newDocumentBuilder();
+        doc = parser.parse(new InputSource(new StringReader(comment)));
+      } catch (Exception e) {
+        if (LOG.isWarnEnabled()) {
+          LOG.warn("CC: Failed to parse RDF in " + base + ": " + e);
+        }
+        // e.printStackTrace();
+        return;
+      }
 
-			// check that root is rdf:RDF
-			NodeList roots = doc.getElementsByTagNameNS(RDF_NS, "RDF");
-			if (roots.getLength() != 1) {
-				if (LOG.isWarnEnabled()) {
-					LOG.warn("CC: No RDF root in " + base);
-				}
-				return;
-			}
-			Element rdf = (Element) roots.item(0);
+      // check that root is rdf:RDF
+      NodeList roots = doc.getElementsByTagNameNS(RDF_NS, "RDF");
+      if (roots.getLength() != 1) {
+        if (LOG.isWarnEnabled()) {
+          LOG.warn("CC: No RDF root in " + base);
+        }
+        return;
+      }
+      Element rdf = (Element) roots.item(0);
 
-			// get cc:License nodes inside rdf:RDF
-			NodeList licenses = rdf.getElementsByTagNameNS(CC_NS, "License");
-			for (int i = 0; i < licenses.getLength(); i++) {
+      // get cc:License nodes inside rdf:RDF
+      NodeList licenses = rdf.getElementsByTagNameNS(CC_NS, "License");
+      for (int i = 0; i < licenses.getLength(); i++) {
 
-				Element l = (Element) licenses.item(i);
+        Element l = (Element) licenses.item(i);
 
-				// license is rdf:about= attribute from cc:License
-				this.rdfLicense = l.getAttributeNodeNS(RDF_NS, "about")
-						.getValue();
+        // license is rdf:about= attribute from cc:License
+        this.rdfLicense = l.getAttributeNodeNS(RDF_NS, "about").getValue();
 
-				// walk predicates of cc:License
-				NodeList predicates = l.getChildNodes();
-				for (int j = 0; j < predicates.getLength(); j++) {
-					Node predicateNode = predicates.item(j);
-					if (!(predicateNode instanceof Element))
-						continue;
-					Element predicateElement = (Element) predicateNode;
+        // walk predicates of cc:License
+        NodeList predicates = l.getChildNodes();
+        for (int j = 0; j < predicates.getLength(); j++) {
+          Node predicateNode = predicates.item(j);
+          if (!(predicateNode instanceof Element))
+            continue;
+          Element predicateElement = (Element) predicateNode;
 
-					// extract predicates of cc:xxx predicates
-					if (!CC_NS.equals(predicateElement.getNamespaceURI())) {
-						continue;
-					}
-					String predicate = predicateElement.getLocalName();
+          // extract predicates of cc:xxx predicates
+          if (!CC_NS.equals(predicateElement.getNamespaceURI())) {
+            continue;
+          }
+          String predicate = predicateElement.getLocalName();
 
-					// object is rdf:resource from cc:xxx predicates
-					String object = predicateElement.getAttributeNodeNS(RDF_NS,
-							"resource").getValue();
+          // object is rdf:resource from cc:xxx predicates
+          String object = predicateElement.getAttributeNodeNS(RDF_NS,
+              "resource").getValue();
 
-					// add object and predicate to metadata
-					// metadata.put(object, predicate);
-					// if (LOG.isInfoEnabled()) {
-					// LOG.info("CC: found: "+predicate+"="+object);
-					// }
-				}
-			}
+          // add object and predicate to metadata
+          // metadata.put(object, predicate);
+          // if (LOG.isInfoEnabled()) {
+          // LOG.info("CC: found: "+predicate+"="+object);
+          // }
+        }
+      }
 
-			// get cc:Work nodes from rdf:RDF
-			NodeList works = rdf.getElementsByTagNameNS(CC_NS, "Work");
-			for (int i = 0; i < works.getLength(); i++) {
-				Element l = (Element) works.item(i);
+      // get cc:Work nodes from rdf:RDF
+      NodeList works = rdf.getElementsByTagNameNS(CC_NS, "Work");
+      for (int i = 0; i < works.getLength(); i++) {
+        Element l = (Element) works.item(i);
 
-				// get dc:type nodes from cc:Work
-				NodeList types = rdf.getElementsByTagNameNS(DC_NS, "type");
-				for (int j = 0; j < types.getLength(); j++) {
-					Element type = (Element) types.item(j);
-					String workUri = type
-							.getAttributeNodeNS(RDF_NS, "resource").getValue();
-					this.workType = (String) WORK_TYPE_NAMES.get(workUri);
-					break;
-				}
-			}
-		}
-	}
+        // get dc:type nodes from cc:Work
+        NodeList types = rdf.getElementsByTagNameNS(DC_NS, "type");
+        for (int j = 0; j < types.getLength(); j++) {
+          Element type = (Element) types.item(j);
+          String workUri = type.getAttributeNodeNS(RDF_NS, "resource")
+              .getValue();
+          this.workType = (String) WORK_TYPE_NAMES.get(workUri);
+          break;
+        }
+      }
+    }
+  }
 
-	private static final Collection<WebPage.Field> FIELDS = new HashSet<WebPage.Field>();
+  private static final Collection<WebPage.Field> FIELDS = new HashSet<WebPage.Field>();
 
-	static {
-		FIELDS.add(WebPage.Field.BASE_URL);
-		FIELDS.add(WebPage.Field.METADATA);
-	}
+  static {
+    FIELDS.add(WebPage.Field.BASE_URL);
+    FIELDS.add(WebPage.Field.METADATA);
+  }
 
-	private static final HashMap<String,String> WORK_TYPE_NAMES = new HashMap<String,String>();
-	static {
-		WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/MovingImage", "video");
-		WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/StillImage", "image");
-		WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Sound", "audio");
-		WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Text", "text");
-		WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Interactive",
-				"interactive");
-		WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Software", "software");
-		WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Image", "image");
-	}
+  private static final HashMap<String, String> WORK_TYPE_NAMES = new HashMap<String, String>();
+  static {
+    WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/MovingImage", "video");
+    WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/StillImage", "image");
+    WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Sound", "audio");
+    WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Text", "text");
+    WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Interactive",
+        "interactive");
+    WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Software", "software");
+    WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Image", "image");
+  }
 
-	private Configuration conf;
+  private Configuration conf;
 
-	public void setConf(Configuration conf) {
-		this.conf = conf;
-	}
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+  }
 
-	public Configuration getConf() {
-		return this.conf;
-	}
+  public Configuration getConf() {
+    return this.conf;
+  }
 
-	@Override
-	public Collection<Field> getFields() {
-		return FIELDS;
-	}
+  @Override
+  public Collection<Field> getFields() {
+    return FIELDS;
+  }
 
-	/**
-	 * Adds metadata or otherwise modifies a parse of an HTML document, given
-	 * the DOM tree of a page.
-	 */
-	@Override
-	public Parse filter(String url, WebPage page, Parse parse,
-			HTMLMetaTags metaTags, DocumentFragment doc) {
-		// construct base url
-		URL base;
-		try {
-			base = new URL(page.getBaseUrl().toString());
-			// extract license metadata
-			Walker.walk(doc, base, page, getConf());
-		} catch (Exception e) {
-			LOG.error("Error parsing " + url, e);
-			return ParseStatusUtils.getEmptyParse(e, getConf());
-		}
+  /**
+   * Adds metadata or otherwise modifies a parse of an HTML document, given the
+   * DOM tree of a page.
+   */
+  @Override
+  public Parse filter(String url, WebPage page, Parse parse,
+      HTMLMetaTags metaTags, DocumentFragment doc) {
+    // construct base url
+    URL base;
+    try {
+      base = new URL(page.getBaseUrl().toString());
+      // extract license metadata
+      Walker.walk(doc, base, page, getConf());
+    } catch (Exception e) {
+      LOG.error("Error parsing " + url, e);
+      return ParseStatusUtils.getEmptyParse(e, getConf());
+    }
 
-		return parse;
-	}
+    return parse;
+  }
 }
Index: src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestRSSParser.java
===================================================================
--- src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestRSSParser.java	(revision 1188268)
+++ src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestRSSParser.java	(working copy)
@@ -83,7 +83,7 @@
    * file</li>
    * </ul>
    */
-  public void testIt()throws ProtocolException, ParseException, IOException {
+  public void testIt() throws ProtocolException, ParseException, IOException {
     String urlString;
     Protocol protocol;
     Parse parse;
Index: src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestPdfParser.java
===================================================================
--- src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestPdfParser.java	(revision 1188268)
+++ src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestPdfParser.java	(working copy)
@@ -43,52 +43,52 @@
  */
 public class TestPdfParser extends TestCase {
 
-    private String fileSeparator = System.getProperty("file.separator");
-    // This system property is defined in ./src/plugin/build-plugin.xml
-    private String sampleDir = System.getProperty("test.data", ".");
-    // Make sure sample files are copied to "test.data" as specified in
-    // ./src/plugin/parse-pdf/build.xml during plugin compilation.
-    // Check ./src/plugin/parse-pdf/sample/README.txt for what they are.
-    private String[] sampleFiles = { "pdftest.pdf", "encrypted.pdf" };
+  private String fileSeparator = System.getProperty("file.separator");
+  // This system property is defined in ./src/plugin/build-plugin.xml
+  private String sampleDir = System.getProperty("test.data", ".");
+  // Make sure sample files are copied to "test.data" as specified in
+  // ./src/plugin/parse-pdf/build.xml during plugin compilation.
+  // Check ./src/plugin/parse-pdf/sample/README.txt for what they are.
+  private String[] sampleFiles = { "pdftest.pdf", "encrypted.pdf" };
 
-    private String expectedText = "A VERY SMALL PDF FILE";
+  private String expectedText = "A VERY SMALL PDF FILE";
 
-    public TestPdfParser(String name) {
-	super(name);
-    }
+  public TestPdfParser(String name) {
+    super(name);
+  }
 
-    protected void setUp() {
-    }
+  protected void setUp() {
+  }
 
-    protected void tearDown() {
-    }
+  protected void tearDown() {
+  }
 
-    public void testIt() throws ProtocolException, ParseException, IOException {
-	String urlString;
-	Parse parse;
-	Configuration conf = NutchConfiguration.create();
-	MimeUtil mimeutil = new MimeUtil(conf);
+  public void testIt() throws ProtocolException, ParseException, IOException {
+    String urlString;
+    Parse parse;
+    Configuration conf = NutchConfiguration.create();
+    MimeUtil mimeutil = new MimeUtil(conf);
 
-	for (int i = 0; i < sampleFiles.length; i++) {
-	    urlString = "file:" + sampleDir + fileSeparator + sampleFiles[i];
+    for (int i = 0; i < sampleFiles.length; i++) {
+      urlString = "file:" + sampleDir + fileSeparator + sampleFiles[i];
 
-	    File file = new File(sampleDir + fileSeparator + sampleFiles[i]);
-	    byte[] bytes = new byte[(int) file.length()];
-	    DataInputStream in = new DataInputStream(new FileInputStream(file));
-	    in.readFully(bytes);
-	    in.close();
+      File file = new File(sampleDir + fileSeparator + sampleFiles[i]);
+      byte[] bytes = new byte[(int) file.length()];
+      DataInputStream in = new DataInputStream(new FileInputStream(file));
+      in.readFully(bytes);
+      in.close();
 
-	    WebPage page = new WebPage();
-	    page.setBaseUrl(new Utf8(urlString));
-	    page.setContent(ByteBuffer.wrap(bytes));
-	    MimeType mtype = mimeutil.getMimeType(file);
-	    page.setContentType(new Utf8(mtype.getName()));
+      WebPage page = new WebPage();
+      page.setBaseUrl(new Utf8(urlString));
+      page.setContent(ByteBuffer.wrap(bytes));
+      MimeType mtype = mimeutil.getMimeType(file);
+      page.setContentType(new Utf8(mtype.getName()));
 
-	    parse = new ParseUtil(conf).parse(urlString, page);
+      parse = new ParseUtil(conf).parse(urlString, page);
 
-	    int index = parse.getText().indexOf(expectedText);
-	    assertTrue(index > 0);
-	}
+      int index = parse.getText().indexOf(expectedText);
+      assertTrue(index > 0);
     }
+  }
 
 }
Index: src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestMSWordParser.java
===================================================================
--- src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestMSWordParser.java	(revision 1188268)
+++ src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestMSWordParser.java	(working copy)
@@ -43,67 +43,67 @@
  */
 public class TestMSWordParser extends TestCase {
 
-    private String fileSeparator = System.getProperty("file.separator");
-    // This system property is defined in ./src/plugin/build-plugin.xml
-    private String sampleDir = System.getProperty("test.data", ".");
-    // Make sure sample files are copied to "test.data" as specified in
-    // ./src/plugin/parse-msword/build.xml during plugin compilation.
-    // Check ./src/plugin/parse-msword/sample/README.txt for what they are.
-    private String[] sampleFiles = { "word97.doc" };
+  private String fileSeparator = System.getProperty("file.separator");
+  // This system property is defined in ./src/plugin/build-plugin.xml
+  private String sampleDir = System.getProperty("test.data", ".");
+  // Make sure sample files are copied to "test.data" as specified in
+  // ./src/plugin/parse-msword/build.xml during plugin compilation.
+  // Check ./src/plugin/parse-msword/sample/README.txt for what they are.
+  private String[] sampleFiles = { "word97.doc" };
 
-    private String expectedText = "This is a sample doc file prepared for nutch.";
+  private String expectedText = "This is a sample doc file prepared for nutch.";
 
-    private Configuration conf;
+  private Configuration conf;
 
-    public TestMSWordParser(String name) {
-	super(name);
-    }
+  public TestMSWordParser(String name) {
+    super(name);
+  }
 
-    protected void setUp() {
-	conf = NutchConfiguration.create();
-	conf.set("file.content.limit", "-1");
-    }
+  protected void setUp() {
+    conf = NutchConfiguration.create();
+    conf.set("file.content.limit", "-1");
+  }
 
-    protected void tearDown() {
-    }
+  protected void tearDown() {
+  }
 
-    public String getTextContent(String fileName) throws ProtocolException,
-	    ParseException, IOException {
-	String urlString = sampleDir + fileSeparator + fileName;
+  public String getTextContent(String fileName) throws ProtocolException,
+      ParseException, IOException {
+    String urlString = sampleDir + fileSeparator + fileName;
 
-	File file = new File(urlString);
-	byte[] bytes = new byte[(int) file.length()];
-	DataInputStream in = new DataInputStream(new FileInputStream(file));
-	in.readFully(bytes);
-	in.close();
-	Parse parse;
-	WebPage page = new WebPage();
-	page.setBaseUrl(new Utf8("file:"+urlString));
-	page.setContent(ByteBuffer.wrap(bytes));
-	// set the content type?
-	MimeUtil mimeutil = new MimeUtil(conf);
-	MimeType mtype = mimeutil.getMimeType(file);
-	page.setContentType(new Utf8(mtype.getName()));
-		
-	parse = new ParseUtil(conf).parse("file:"+urlString, page);
-	return parse.getText();
-    }
+    File file = new File(urlString);
+    byte[] bytes = new byte[(int) file.length()];
+    DataInputStream in = new DataInputStream(new FileInputStream(file));
+    in.readFully(bytes);
+    in.close();
+    Parse parse;
+    WebPage page = new WebPage();
+    page.setBaseUrl(new Utf8("file:" + urlString));
+    page.setContent(ByteBuffer.wrap(bytes));
+    // set the content type?
+    MimeUtil mimeutil = new MimeUtil(conf);
+    MimeType mtype = mimeutil.getMimeType(file);
+    page.setContentType(new Utf8(mtype.getName()));
 
-    public void testIt() throws ProtocolException, ParseException, IOException {
-	for (int i = 0; i < sampleFiles.length; i++) {
-	    String found = getTextContent(sampleFiles[i]);
-	    assertTrue("text found : '" + found + "'", found
-		    .startsWith(expectedText));
-	}
+    parse = new ParseUtil(conf).parse("file:" + urlString, page);
+    return parse.getText();
+  }
+
+  public void testIt() throws ProtocolException, ParseException, IOException {
+    for (int i = 0; i < sampleFiles.length; i++) {
+      String found = getTextContent(sampleFiles[i]);
+      assertTrue("text found : '" + found + "'", found.startsWith(expectedText));
     }
+  }
 
-    public void testOpeningDocs() throws ProtocolException, ParseException, IOException {
-	String[] filenames = new File(sampleDir).list();
-	for (int i = 0; i < filenames.length; i++) {
-	    if (filenames[i].endsWith(".doc") == false)
-		continue;
-	    assertTrue("cann't read content of " + filenames[i],
-		    getTextContent(filenames[i]).length() > 0);
-	}
+  public void testOpeningDocs() throws ProtocolException, ParseException,
+      IOException {
+    String[] filenames = new File(sampleDir).list();
+    for (int i = 0; i < filenames.length; i++) {
+      if (filenames[i].endsWith(".doc") == false)
+        continue;
+      assertTrue("cann't read content of " + filenames[i],
+          getTextContent(filenames[i]).length() > 0);
     }
+  }
 }
Index: src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestRTFParser.java
===================================================================
--- src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestRTFParser.java	(revision 1188268)
+++ src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestRTFParser.java	(working copy)
@@ -60,59 +60,55 @@
  */
 public class TestRTFParser extends TestCase {
 
-    private String fileSeparator = System.getProperty("file.separator");
-    // This system property is defined in ./src/plugin/build-plugin.xml
-    private String sampleDir = System.getProperty("test.data", ".");
-    // Make sure sample files are copied to "test.data" as specified in
-    // ./src/plugin/parse-rtf/build.xml during plugin compilation.
-    // Check ./src/plugin/parse-rtf/sample/README.txt for what they are.
-    private String rtfFile = "test.rtf";
+  private String fileSeparator = System.getProperty("file.separator");
+  // This system property is defined in ./src/plugin/build-plugin.xml
+  private String sampleDir = System.getProperty("test.data", ".");
+  // Make sure sample files are copied to "test.data" as specified in
+  // ./src/plugin/parse-rtf/build.xml during plugin compilation.
+  // Check ./src/plugin/parse-rtf/sample/README.txt for what they are.
+  private String rtfFile = "test.rtf";
 
-    public TestRTFParser(String name) {
-	super(name);
-    }
+  public TestRTFParser(String name) {
+    super(name);
+  }
 
-    protected void setUp() {
-    }
+  protected void setUp() {
+  }
 
-    protected void tearDown() {
-    }
+  protected void tearDown() {
+  }
 
-    public void testIt() throws ProtocolException, ParseException, IOException {
-        /* Temporarily disabled - see Tika-748
+  public void testIt() throws ProtocolException, ParseException, IOException {
+    /*
+     * Temporarily disabled - see Tika-748
+     * 
+     * String urlString; Parse parse; Configuration conf =
+     * NutchConfiguration.create(); MimeUtil mimeutil = new MimeUtil(conf);
+     * 
+     * urlString = "file:" + sampleDir + fileSeparator + rtfFile;
+     * 
+     * File file = new File(sampleDir + fileSeparator + rtfFile); byte[] bytes =
+     * new byte[(int) file.length()]; DataInputStream in = new
+     * DataInputStream(new FileInputStream(file)); in.readFully(bytes);
+     * in.close();
+     * 
+     * WebPage page = new WebPage(); page.setBaseUrl(new Utf8(urlString));
+     * page.setContent(ByteBuffer.wrap(bytes)); MimeType mtype =
+     * mimeutil.getMimeType(file); page.setContentType(new
+     * Utf8(mtype.getName()));
+     * 
+     * parse = new ParseUtil(conf).parse(urlString, page);
+     * 
+     * String text = parse.getText();
+     * assertEquals("The quick brown fox jumps over the lazy dog", text.trim());
+     * 
+     * String title = parse.getTitle(); // HOW DO WE GET THE PARSE METADATA? //
+     * Metadata meta = parse();
+     * 
+     * // METADATA extraction is not yet supported in Tika //
+     * assertEquals("test rft document", title); // assertEquals("tests",
+     * meta.get(DublinCore.SUBJECT));
+     */
+  }
 
-	String urlString;
-	Parse parse;
-	Configuration conf = NutchConfiguration.create();
-	MimeUtil mimeutil = new MimeUtil(conf);
-
-	urlString = "file:" + sampleDir + fileSeparator + rtfFile;
-
-	File file = new File(sampleDir + fileSeparator + rtfFile);
-	byte[] bytes = new byte[(int) file.length()];
-	DataInputStream in = new DataInputStream(new FileInputStream(file));
-	in.readFully(bytes);
-	in.close();
-
-	WebPage page = new WebPage();
-	page.setBaseUrl(new Utf8(urlString));
-	page.setContent(ByteBuffer.wrap(bytes));
-	MimeType mtype = mimeutil.getMimeType(file);
-	page.setContentType(new Utf8(mtype.getName()));
-
-	parse = new ParseUtil(conf).parse(urlString, page);
-
-	String text = parse.getText();
-	assertEquals("The quick brown fox jumps over the lazy dog", text.trim());
-
-	String title = parse.getTitle();
-	// HOW DO WE GET THE PARSE METADATA?
-	// Metadata meta = parse();
-
-	// METADATA extraction is not yet supported in Tika
-	// assertEquals("test rft document", title);
-	// assertEquals("tests", meta.get(DublinCore.SUBJECT));
-        */
-    }
-
 }
Index: src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestOOParser.java
===================================================================
--- src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestOOParser.java	(revision 1188268)
+++ src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestOOParser.java	(working copy)
@@ -44,83 +44,83 @@
  */
 public class TestOOParser extends TestCase {
 
-    private String fileSeparator = System.getProperty("file.separator");
-    // This system property is defined in ./src/plugin/build-plugin.xml
-    private String sampleDir = System.getProperty("test.data", ".");
-    // Make sure sample files are copied to "test.data" as specified in
-    // ./src/plugin/parse-oo/build.xml during plugin compilation.
-    private String[] sampleFiles = { "ootest.odt", "ootest.sxw" };
+  private String fileSeparator = System.getProperty("file.separator");
+  // This system property is defined in ./src/plugin/build-plugin.xml
+  private String sampleDir = System.getProperty("test.data", ".");
+  // Make sure sample files are copied to "test.data" as specified in
+  // ./src/plugin/parse-oo/build.xml during plugin compilation.
+  private String[] sampleFiles = { "ootest.odt", "ootest.sxw" };
 
-    private String sampleText = "ootest.txt";
+  private String sampleText = "ootest.txt";
 
-    private String expectedText;
+  private String expectedText;
 
-    public TestOOParser(String name) {
-	super(name);
-	try {
-	    // read the test string
-	    FileInputStream fis = new FileInputStream(sampleDir + fileSeparator
-		    + sampleText);
-	    StringBuffer sb = new StringBuffer();
-	    int len = 0;
-	    InputStreamReader isr = new InputStreamReader(fis, "UTF-8");
-	    char[] buf = new char[1024];
-	    while ((len = isr.read(buf)) > 0) {
-		sb.append(buf, 0, len);
-	    }
-	    isr.close();
-	    expectedText = sb.toString();
-	    // normalize space
-	    expectedText = expectedText.replaceAll("[ \t\r\n]+", " ");
-	} catch (Exception e) {
-	    e.printStackTrace();
-	}
+  public TestOOParser(String name) {
+    super(name);
+    try {
+      // read the test string
+      FileInputStream fis = new FileInputStream(sampleDir + fileSeparator
+          + sampleText);
+      StringBuffer sb = new StringBuffer();
+      int len = 0;
+      InputStreamReader isr = new InputStreamReader(fis, "UTF-8");
+      char[] buf = new char[1024];
+      while ((len = isr.read(buf)) > 0) {
+        sb.append(buf, 0, len);
+      }
+      isr.close();
+      expectedText = sb.toString();
+      // normalize space
+      expectedText = expectedText.replaceAll("[ \t\r\n]+", " ");
+    } catch (Exception e) {
+      e.printStackTrace();
     }
+  }
 
-    protected void setUp() {
-    }
+  protected void setUp() {
+  }
 
-    protected void tearDown() {
-    }
+  protected void tearDown() {
+  }
 
-    public void testIt() throws ProtocolException, ParseException, IOException {
-	String urlString;
-	Parse parse;
-	Configuration conf = NutchConfiguration.create();
-	MimeUtil mimeutil = new MimeUtil(conf);
+  public void testIt() throws ProtocolException, ParseException, IOException {
+    String urlString;
+    Parse parse;
+    Configuration conf = NutchConfiguration.create();
+    MimeUtil mimeutil = new MimeUtil(conf);
 
-	System.out.println("Expected : " + expectedText);
+    System.out.println("Expected : " + expectedText);
 
-	for (int i = 0; i < sampleFiles.length; i++) {
-	    urlString = "file:" + sampleDir + fileSeparator + sampleFiles[i];
+    for (int i = 0; i < sampleFiles.length; i++) {
+      urlString = "file:" + sampleDir + fileSeparator + sampleFiles[i];
 
-	    if (sampleFiles[i].startsWith("ootest") == false)
-		continue;
+      if (sampleFiles[i].startsWith("ootest") == false)
+        continue;
 
-	    File file = new File(sampleDir + fileSeparator + sampleFiles[i]);
-	    byte[] bytes = new byte[(int) file.length()];
-	    DataInputStream in = new DataInputStream(new FileInputStream(file));
-	    in.readFully(bytes);
-	    in.close();
+      File file = new File(sampleDir + fileSeparator + sampleFiles[i]);
+      byte[] bytes = new byte[(int) file.length()];
+      DataInputStream in = new DataInputStream(new FileInputStream(file));
+      in.readFully(bytes);
+      in.close();
 
-	    WebPage page = new WebPage();
-	    page.setBaseUrl(new Utf8(urlString));
-	    page.setContent(ByteBuffer.wrap(bytes));
-	    MimeType mtype = mimeutil.getMimeType(file);
-	    page.setContentType(new Utf8(mtype.getName()));
+      WebPage page = new WebPage();
+      page.setBaseUrl(new Utf8(urlString));
+      page.setContent(ByteBuffer.wrap(bytes));
+      MimeType mtype = mimeutil.getMimeType(file);
+      page.setContentType(new Utf8(mtype.getName()));
 
-	    parse = new ParseUtil(conf).parse(urlString, page);
+      parse = new ParseUtil(conf).parse(urlString, page);
 
-	    String text = parse.getText().replaceAll("[ \t\r\n]+", " ").trim();
+      String text = parse.getText().replaceAll("[ \t\r\n]+", " ").trim();
 
-	    // simply test for the presence of a text - the ordering of the
-	    // elements
-	    // may differ from what was expected
-	    // in the previous tests
-	    assertTrue(text != null && text.length() > 0);
+      // simply test for the presence of a text - the ordering of the
+      // elements
+      // may differ from what was expected
+      // in the previous tests
+      assertTrue(text != null && text.length() > 0);
 
-	    System.out.println("Found " + sampleFiles[i] + ": " + text);
-	}
+      System.out.println("Found " + sampleFiles[i] + ": " + text);
     }
+  }
 
 }
Index: src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/DOMContentUtilsTest.java
===================================================================
--- src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/DOMContentUtilsTest.java	(revision 1188268)
+++ src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/DOMContentUtilsTest.java	(working copy)
@@ -45,374 +45,337 @@
  */
 public class DOMContentUtilsTest extends TestCase {
 
-	private static final String[] testPages = {
-			// 0.
-			new String(
-					"<html><head><title> title </title><script> script </script>"
-							+ "</head><body> body <a href=\"http://www.nutch.org\">"
-							+ " anchor </a><!--comment-->" + "</body></html>"),
-			// 1.
-			new String(
-					"<html><head><title> title </title><script> script </script>"
-							+ "</head><body> body <a href=\"/\">"
-							+ " home </a><!--comment-->"
-							+ "<style> style </style>"
-							+ " <a href=\"bot.html\">" + " bots </a>"
-							+ "</body></html>"),
-			// 2.
-			new String("<html><head><title> </title>" + "</head><body> "
-					+ "<a href=\"/\"> separate this "
-					+ "<a href=\"ok\"> from this" + "</a></a>"
-					+ "</body></html>"),
-			// 3.
-			// this one relies on certain neko fixup behavior, possibly
-			// distributing the anchors into the LI's-but not the other
-			// anchors (outside of them, instead)! So you get a tree that
-			// looks like:
-			// ... <li> <a href=/> home </a> </li>
-			// <li> <a href=/> <a href="1"> 1 </a> </a> </li>
-			// <li> <a href=/> <a href="1"> <a href="2"> 2 </a> </a> </a> </li>
-			new String("<html><head><title> my title </title>"
-					+ "</head><body> body " + "<ul>"
-					+ "<li> <a href=\"/\"> home" + "<li> <a href=\"1\"> 1"
-					+ "<li> <a href=\"2\"> 2" + "</ul>" + "</body></html>"),
-			// 4.
-			// test frameset link extraction. The invalid frame in the middle
-			// will be
-			// fixed to a third standalone frame.
-			new String("<html><head><title> my title </title>"
-					+ "</head><frameset rows=\"20,*\"> "
-					+ "<frame src=\"top.html\">" + "</frame>"
-					+ "<frameset cols=\"20,*\">" + "<frame src=\"left.html\">"
-					+ "</frame>" + "<frame src=\"invalid.html\"/>" + "</frame>"
-					+ "<frame src=\"right.html\">" + "</frame>" + "</frameset>"
-					+ "</frameset>" + "</body></html>"),
-			// 5.
-			// test <area> and <iframe> link extraction + url normalization
-			new String(
-					"<html><head><title> my title </title>"
-							+ "</head><body>"
-							+ "<img src=\"logo.gif\" usemap=\"#green\" border=\"0\">"
-							+ "<map name=\"green\">"
-							+ "<area shape=\"polygon\" coords=\"19,44,45,11,87\" href=\"../index.html\">"
-							+ "<area shape=\"rect\" coords=\"128,132,241,179\" href=\"#bottom\">"
-							+ "<area shape=\"circle\" coords=\"68,211,35\" href=\"../bot.html\">"
-							+ "</map>"
-							+ "<a name=\"bottom\"/><h1> the bottom </h1> "
-							+ "<iframe src=\"../docs/index.html\"/>"
-							+ "</body></html>"),
-			// 6.
-			// test whitespace processing for plain text extraction
-			new String(
-					"<html><head>\n <title> my\t\n  title\r\n </title>\n"
-							+ " </head>\n"
-							+ " <body>\n"
-							+ "    <h1> Whitespace\ttest  </h1> \n"
-							+ "\t<a href=\"../index.html\">\n  \twhitespace  test\r\n\t</a>  \t\n"
-							+ "    <p> This is<span> a whitespace<span></span> test</span>. Newlines\n"
-							+ "should appear as space too.</p><p>Tabs\tare spaces too.\n</p>"
-							+ "    This\t<b>is a</b> break -&gt;<br>and the line after<i> break</i>.<br>\n"
-							+ "<table>"
-							+ "    <tr><td>one</td><td>two</td><td>three</td></tr>\n"
-							+ "    <tr><td>space here </td><td> space there</td><td>no space</td></tr>"
-							+ "\t<tr><td>one\r\ntwo</td><td>two\tthree</td><td>three\r\tfour</td></tr>\n"
-							+ "</table>put some text here<Br>and there."
-							+ "<h2>End\tthis\rmadness\n!</h2>\r\n"
-							+ "         .        .        .         ."
-							+ "</body>  </html>"),
-			// 7.
-			// test that <a rel=nofollow> links are not returned
-			new String(
-					"<html><head></head><body>"
-							+ "<a href=\"http://www.nutch.org\" rel=\"nofollow\"> ignore </a>"
-							+ "<a rel=\"nofollow\" href=\"http://www.nutch.org\"> ignore </a>"
-							+ "</body></html>"),
-			// 8.
-			// test that POST form actions are skipped
-			new String(
-					"<html><head></head><body>"
-							+ "<form method='POST' action='/search.jsp'><input type=text>"
-							+ "<input type=submit><p>test1</p></form>"
-							+ "<form method='GET' action='/dummy.jsp'><input type=text>"
-							+ "<input type=submit><p>test2</p></form></body></html>"),
-			// 9.
-			// test that all form actions are skipped
-			new String(
-					"<html><head></head><body>"
-							+ "<form method='POST' action='/search.jsp'><input type=text>"
-							+ "<input type=submit><p>test1</p></form>"
-							+ "<form method='GET' action='/dummy.jsp'><input type=text>"
-							+ "<input type=submit><p>test2</p></form></body></html>"),
-			// 10.
-			new String("<html><head><title> title </title>" + "</head><body>"
-					+ "<a href=\";x\">anchor1</a>"
-					+ "<a href=\"g;x\">anchor2</a>"
-					+ "<a href=\"g;x?y#s\">anchor3</a>" + "</body></html>"),
-			// 11.
-			new String("<html><head><title> title </title>" + "</head><body>"
-					+ "<a href=\"g\">anchor1</a>"
-					+ "<a href=\"g?y#s\">anchor2</a>"
-					+ "<a href=\"?y=1\">anchor3</a>"
-					+ "<a href=\"?y=1#s\">anchor4</a>"
-					+ "<a href=\"?y=1;somethingelse\">anchor5</a>"
-					+ "</body></html>"), };
+  private static final String[] testPages = {
+      // 0.
+      new String("<html><head><title> title </title><script> script </script>"
+          + "</head><body> body <a href=\"http://www.nutch.org\">"
+          + " anchor </a><!--comment-->" + "</body></html>"),
+      // 1.
+      new String("<html><head><title> title </title><script> script </script>"
+          + "</head><body> body <a href=\"/\">" + " home </a><!--comment-->"
+          + "<style> style </style>" + " <a href=\"bot.html\">" + " bots </a>"
+          + "</body></html>"),
+      // 2.
+      new String("<html><head><title> </title>" + "</head><body> "
+          + "<a href=\"/\"> separate this " + "<a href=\"ok\"> from this"
+          + "</a></a>" + "</body></html>"),
+      // 3.
+      // this one relies on certain neko fixup behavior, possibly
+      // distributing the anchors into the LI's-but not the other
+      // anchors (outside of them, instead)! So you get a tree that
+      // looks like:
+      // ... <li> <a href=/> home </a> </li>
+      // <li> <a href=/> <a href="1"> 1 </a> </a> </li>
+      // <li> <a href=/> <a href="1"> <a href="2"> 2 </a> </a> </a> </li>
+      new String("<html><head><title> my title </title>"
+          + "</head><body> body " + "<ul>" + "<li> <a href=\"/\"> home"
+          + "<li> <a href=\"1\"> 1" + "<li> <a href=\"2\"> 2" + "</ul>"
+          + "</body></html>"),
+      // 4.
+      // test frameset link extraction. The invalid frame in the middle
+      // will be
+      // fixed to a third standalone frame.
+      new String("<html><head><title> my title </title>"
+          + "</head><frameset rows=\"20,*\"> " + "<frame src=\"top.html\">"
+          + "</frame>" + "<frameset cols=\"20,*\">"
+          + "<frame src=\"left.html\">" + "</frame>"
+          + "<frame src=\"invalid.html\"/>" + "</frame>"
+          + "<frame src=\"right.html\">" + "</frame>" + "</frameset>"
+          + "</frameset>" + "</body></html>"),
+      // 5.
+      // test <area> and <iframe> link extraction + url normalization
+      new String(
+          "<html><head><title> my title </title>"
+              + "</head><body>"
+              + "<img src=\"logo.gif\" usemap=\"#green\" border=\"0\">"
+              + "<map name=\"green\">"
+              + "<area shape=\"polygon\" coords=\"19,44,45,11,87\" href=\"../index.html\">"
+              + "<area shape=\"rect\" coords=\"128,132,241,179\" href=\"#bottom\">"
+              + "<area shape=\"circle\" coords=\"68,211,35\" href=\"../bot.html\">"
+              + "</map>" + "<a name=\"bottom\"/><h1> the bottom </h1> "
+              + "<iframe src=\"../docs/index.html\"/>" + "</body></html>"),
+      // 6.
+      // test whitespace processing for plain text extraction
+      new String(
+          "<html><head>\n <title> my\t\n  title\r\n </title>\n"
+              + " </head>\n"
+              + " <body>\n"
+              + "    <h1> Whitespace\ttest  </h1> \n"
+              + "\t<a href=\"../index.html\">\n  \twhitespace  test\r\n\t</a>  \t\n"
+              + "    <p> This is<span> a whitespace<span></span> test</span>. Newlines\n"
+              + "should appear as space too.</p><p>Tabs\tare spaces too.\n</p>"
+              + "    This\t<b>is a</b> break -&gt;<br>and the line after<i> break</i>.<br>\n"
+              + "<table>"
+              + "    <tr><td>one</td><td>two</td><td>three</td></tr>\n"
+              + "    <tr><td>space here </td><td> space there</td><td>no space</td></tr>"
+              + "\t<tr><td>one\r\ntwo</td><td>two\tthree</td><td>three\r\tfour</td></tr>\n"
+              + "</table>put some text here<Br>and there."
+              + "<h2>End\tthis\rmadness\n!</h2>\r\n"
+              + "         .        .        .         ." + "</body>  </html>"),
+      // 7.
+      // test that <a rel=nofollow> links are not returned
+      new String("<html><head></head><body>"
+          + "<a href=\"http://www.nutch.org\" rel=\"nofollow\"> ignore </a>"
+          + "<a rel=\"nofollow\" href=\"http://www.nutch.org\"> ignore </a>"
+          + "</body></html>"),
+      // 8.
+      // test that POST form actions are skipped
+      new String("<html><head></head><body>"
+          + "<form method='POST' action='/search.jsp'><input type=text>"
+          + "<input type=submit><p>test1</p></form>"
+          + "<form method='GET' action='/dummy.jsp'><input type=text>"
+          + "<input type=submit><p>test2</p></form></body></html>"),
+      // 9.
+      // test that all form actions are skipped
+      new String("<html><head></head><body>"
+          + "<form method='POST' action='/search.jsp'><input type=text>"
+          + "<input type=submit><p>test1</p></form>"
+          + "<form method='GET' action='/dummy.jsp'><input type=text>"
+          + "<input type=submit><p>test2</p></form></body></html>"),
+      // 10.
+      new String("<html><head><title> title </title>" + "</head><body>"
+          + "<a href=\";x\">anchor1</a>" + "<a href=\"g;x\">anchor2</a>"
+          + "<a href=\"g;x?y#s\">anchor3</a>" + "</body></html>"),
+      // 11.
+      new String("<html><head><title> title </title>" + "</head><body>"
+          + "<a href=\"g\">anchor1</a>" + "<a href=\"g?y#s\">anchor2</a>"
+          + "<a href=\"?y=1\">anchor3</a>" + "<a href=\"?y=1#s\">anchor4</a>"
+          + "<a href=\"?y=1;somethingelse\">anchor5</a>" + "</body></html>"), };
 
-	private static int SKIP = 9;
+  private static int SKIP = 9;
 
-	private static String[] testBaseHrefs = { "http://www.nutch.org",
-			"http://www.nutch.org/docs/foo.html", "http://www.nutch.org/docs/",
-			"http://www.nutch.org/docs/", "http://www.nutch.org/frames/",
-			"http://www.nutch.org/maps/", "http://www.nutch.org/whitespace/",
-			"http://www.nutch.org//", "http://www.nutch.org/",
-			"http://www.nutch.org/", "http://www.nutch.org/",
-			"http://www.nutch.org/;something" };
+  private static String[] testBaseHrefs = { "http://www.nutch.org",
+      "http://www.nutch.org/docs/foo.html", "http://www.nutch.org/docs/",
+      "http://www.nutch.org/docs/", "http://www.nutch.org/frames/",
+      "http://www.nutch.org/maps/", "http://www.nutch.org/whitespace/",
+      "http://www.nutch.org//", "http://www.nutch.org/",
+      "http://www.nutch.org/", "http://www.nutch.org/",
+      "http://www.nutch.org/;something" };
 
-	private static final DocumentFragment testDOMs[] = new DocumentFragment[testPages.length];
+  private static final DocumentFragment testDOMs[] = new DocumentFragment[testPages.length];
 
-	private static URL[] testBaseHrefURLs = new URL[testPages.length];
+  private static URL[] testBaseHrefURLs = new URL[testPages.length];
 
-	private static final String[] answerText = {
-			"body anchor",
-			"body home bots",
-			"separate this from this",
-			"body home 1 2",
-			"",
-			"the bottom",
-			"Whitespace test whitespace test "
-					+ "This is a whitespace test . Newlines should appear as space too. "
-					+ "Tabs are spaces too. This is a break -> and the line after break . "
-					+ "one two three space here space there no space "
-					+ "one two two three three four put some text here and there. "
-					+ "End this madness ! . . . .", "ignore ignore",
-			"test1 test2", "test1 test2", "anchor1 anchor2 anchor3",
-			"anchor1 anchor2 anchor3 anchor4 anchor5" };
+  private static final String[] answerText = {
+      "body anchor",
+      "body home bots",
+      "separate this from this",
+      "body home 1 2",
+      "",
+      "the bottom",
+      "Whitespace test whitespace test "
+          + "This is a whitespace test . Newlines should appear as space too. "
+          + "Tabs are spaces too. This is a break -> and the line after break . "
+          + "one two three space here space there no space "
+          + "one two two three three four put some text here and there. "
+          + "End this madness ! . . . .", "ignore ignore", "test1 test2",
+      "test1 test2", "anchor1 anchor2 anchor3",
+      "anchor1 anchor2 anchor3 anchor4 anchor5" };
 
-	private static final String[] answerTitle = { "title", "title", "",
-			"my title", "my title", "my title", "my title", "", "", "",
-			"title", "title" };
+  private static final String[] answerTitle = { "title", "title", "",
+      "my title", "my title", "my title", "my title", "", "", "", "title",
+      "title" };
 
-	// note: should be in page-order
-	private static Outlink[][] answerOutlinks;
+  // note: should be in page-order
+  private static Outlink[][] answerOutlinks;
 
-	private static Configuration conf;
-	private static DOMContentUtils utils = null;
-	
-	public static final Logger Logger = LoggerFactory.getLogger(DOMContentUtilsTest.class);
+  private static Configuration conf;
+  private static DOMContentUtils utils = null;
 
-	public DOMContentUtilsTest(String name) {
-		super(name);
-	}
+  public static final Logger Logger = LoggerFactory
+      .getLogger(DOMContentUtilsTest.class);
 
-	private static void setup() throws Exception {
-		conf = NutchConfiguration.create();
-		conf.setBoolean("parser.html.form.use_action", true);
-		utils = new DOMContentUtils(conf);
-		TikaParser tikaParser = new TikaParser();
-		tikaParser.setConf(conf);
-		Parser parser = tikaParser.getTikaConfig().getParser("text/html");
-		for (int i = 0; i < testPages.length; i++) {
-			Metadata tikamd = new Metadata();
+  public DOMContentUtilsTest(String name) {
+    super(name);
+  }
 
-			HTMLDocumentImpl doc = new HTMLDocumentImpl();
-			doc.setErrorChecking(false);
-			DocumentFragment root = doc.createDocumentFragment();
-			DOMBuilder domhandler = new DOMBuilder(doc, root);
-			ParseContext context = new ParseContext();
-			// to add once available in Tika
-			//context.set(HtmlMapper.class, IdentityHtmlMapper.INSTANCE);
-			try {
-				parser.parse(new ByteArrayInputStream(testPages[i].getBytes()),
-						domhandler, tikamd, context);
-				testBaseHrefURLs[i] = new URL(testBaseHrefs[i]);
-			} catch (Exception e) {
-				e.printStackTrace();
-				fail("caught exception: " + e);
-			}
-			testDOMs[i] = root;
-			DOMSerializerImpl ds = new DOMSerializerImpl();
-			System.out.println("input " + i + ": '" + testPages[i] + "'");
-			System.out.println("output " + i + ": '" + ds.writeToString(root)
-					+ "'");
+  private static void setup() throws Exception {
+    conf = NutchConfiguration.create();
+    conf.setBoolean("parser.html.form.use_action", true);
+    utils = new DOMContentUtils(conf);
+    TikaParser tikaParser = new TikaParser();
+    tikaParser.setConf(conf);
+    Parser parser = tikaParser.getTikaConfig().getParser("text/html");
+    for (int i = 0; i < testPages.length; i++) {
+      Metadata tikamd = new Metadata();
 
-		}
-		answerOutlinks = new Outlink[][] {
-				// 0
-				{ new Outlink("http://www.nutch.org", "anchor"), },
-				// 1
-				{
-				  new Outlink("http://www.nutch.org/", "home"),
-				  new Outlink("http://www.nutch.org/docs/bot.html",
-								"bots"), },
-				// 2
-				{
-					new Outlink("http://www.nutch.org/", "separate this"),
-					new Outlink("http://www.nutch.org/docs/ok", "from this"), },
-				
-				// 3	
-				{   new Outlink("http://www.nutch.org/", "home"),
-					new Outlink("http://www.nutch.org/docs/1", "1"),
-					new Outlink("http://www.nutch.org/docs/2", "2"), },
-				// 4	
-				{
-					new Outlink("http://www.nutch.org/frames/top.html", ""),
-					new Outlink("http://www.nutch.org/frames/left.html", ""),
-					new Outlink("http://www.nutch.org/frames/invalid.html",""),
-					new Outlink("http://www.nutch.org/frames/right.html",""), 
-				},
-				// 5
-				{ 
-					new Outlink("http://www.nutch.org/maps/logo.gif", ""),
-					new Outlink("http://www.nutch.org/index.html", ""),
-					new Outlink("http://www.nutch.org/maps/#bottom", ""),
-					new Outlink("http://www.nutch.org/bot.html", ""),
-					new Outlink("http://www.nutch.org/docs/index.html", "") 
-				},
-				// 6
-				{ new Outlink("http://www.nutch.org/index.html",
-						"whitespace test"), 
-				},
-				// 7
-				{},
-				// 8
-				{ new Outlink("http://www.nutch.org/dummy.jsp", "test2"), },
-				// 9
-				{},
-				// 10 
-				{ 
-				 new Outlink("http://www.nutch.org/;x", "anchor1"),
-				 new Outlink("http://www.nutch.org/g;x", "anchor2"),
-				 new Outlink("http://www.nutch.org/g;x?y#s", "anchor3") 
-				},
-				// 11
-				{
-				 new Outlink("http://www.nutch.org/g;something","anchor1"),
-				 new Outlink("http://www.nutch.org/g;something?y#s", "anchor2"),
-				 new Outlink("http://www.nutch.org/;something?y=1", "anchor3"),
-				 new Outlink("http://www.nutch.org/;something?y=1#s", "anchor4"),
-				 new Outlink("http://www.nutch.org/?y=1;somethingelse", "anchor5") }
-				};
+      HTMLDocumentImpl doc = new HTMLDocumentImpl();
+      doc.setErrorChecking(false);
+      DocumentFragment root = doc.createDocumentFragment();
+      DOMBuilder domhandler = new DOMBuilder(doc, root);
+      ParseContext context = new ParseContext();
+      // to add once available in Tika
+      // context.set(HtmlMapper.class, IdentityHtmlMapper.INSTANCE);
+      try {
+        parser.parse(new ByteArrayInputStream(testPages[i].getBytes()),
+            domhandler, tikamd, context);
+        testBaseHrefURLs[i] = new URL(testBaseHrefs[i]);
+      } catch (Exception e) {
+        e.printStackTrace();
+        fail("caught exception: " + e);
+      }
+      testDOMs[i] = root;
+      DOMSerializerImpl ds = new DOMSerializerImpl();
+      System.out.println("input " + i + ": '" + testPages[i] + "'");
+      System.out.println("output " + i + ": '" + ds.writeToString(root) + "'");
 
-	}
+    }
+    answerOutlinks = new Outlink[][] {
+        // 0
+        { new Outlink("http://www.nutch.org", "anchor"), },
+        // 1
+        { new Outlink("http://www.nutch.org/", "home"),
+            new Outlink("http://www.nutch.org/docs/bot.html", "bots"), },
+        // 2
+        { new Outlink("http://www.nutch.org/", "separate this"),
+            new Outlink("http://www.nutch.org/docs/ok", "from this"), },
 
-	private static boolean equalsIgnoreWhitespace(String s1, String s2) {
-		StringTokenizer st1 = new StringTokenizer(s1);
-		StringTokenizer st2 = new StringTokenizer(s2);
+        // 3
+        { new Outlink("http://www.nutch.org/", "home"),
+            new Outlink("http://www.nutch.org/docs/1", "1"),
+            new Outlink("http://www.nutch.org/docs/2", "2"), },
+        // 4
+        { new Outlink("http://www.nutch.org/frames/top.html", ""),
+            new Outlink("http://www.nutch.org/frames/left.html", ""),
+            new Outlink("http://www.nutch.org/frames/invalid.html", ""),
+            new Outlink("http://www.nutch.org/frames/right.html", ""), },
+        // 5
+        { new Outlink("http://www.nutch.org/maps/logo.gif", ""),
+            new Outlink("http://www.nutch.org/index.html", ""),
+            new Outlink("http://www.nutch.org/maps/#bottom", ""),
+            new Outlink("http://www.nutch.org/bot.html", ""),
+            new Outlink("http://www.nutch.org/docs/index.html", "") },
+        // 6
+        { new Outlink("http://www.nutch.org/index.html", "whitespace test"), },
+        // 7
+        {},
+        // 8
+        { new Outlink("http://www.nutch.org/dummy.jsp", "test2"), },
+        // 9
+        {},
+        // 10
+        { new Outlink("http://www.nutch.org/;x", "anchor1"),
+            new Outlink("http://www.nutch.org/g;x", "anchor2"),
+            new Outlink("http://www.nutch.org/g;x?y#s", "anchor3") },
+        // 11
+        { new Outlink("http://www.nutch.org/g;something", "anchor1"),
+            new Outlink("http://www.nutch.org/g;something?y#s", "anchor2"),
+            new Outlink("http://www.nutch.org/;something?y=1", "anchor3"),
+            new Outlink("http://www.nutch.org/;something?y=1#s", "anchor4"),
+            new Outlink("http://www.nutch.org/?y=1;somethingelse", "anchor5") } };
 
-		while (st1.hasMoreTokens()) {
-			if (!st2.hasMoreTokens()) {
-			 Logger.info("st1+ '" + st1.nextToken() + "'");
-				return false;
-			}
-			String st1Token = st1.nextToken();
-			String st2Token = st2.nextToken();
-			if (!st1Token.equals(st2Token)) {
-			 Logger.info("st1:'" + st1Token + "' != st2:'" + st2Token + "'");
-				return false;
-			}
-		}
-		if (st2.hasMoreTokens()) {
-			System.err.println("st2+ '" + st2.nextToken() + "'");
-			return false;
-		}
-		return true;
-	}
+  }
 
-	public void testGetText() throws Exception {
-		if (testDOMs[0] == null)
-			setup();
-		for (int i = 0; i < testPages.length; i++) {
-			StringBuffer sb = new StringBuffer();
-			utils.getText(sb, testDOMs[i]);
-			String text = sb.toString();
-			assertTrue(
-					"example " + i + " : expecting text: " + answerText[i]
-							+ System.getProperty("line.separator")
-							+ System.getProperty("line.separator")
-							+ "got text: " + text,
-					equalsIgnoreWhitespace(answerText[i], text));
-		}
-	}
+  private static boolean equalsIgnoreWhitespace(String s1, String s2) {
+    StringTokenizer st1 = new StringTokenizer(s1);
+    StringTokenizer st2 = new StringTokenizer(s2);
 
-	// won't work with Tika - the title is stored in the metadata but
-	// not put in the XHTML representation
-	public void testGetTitle() throws Exception {
-		if (testDOMs[0] == null)
-			setup();
-		for (int i = 0; i < testPages.length; i++) {
-			StringBuffer sb = new StringBuffer();
-			utils.getTitle(sb, testDOMs[i]);
-			String title = sb.toString();
-			assertTrue(
-					"example " + i + " : expecting title: " + answerTitle[i]
-							+ System.getProperty("line.separator")
-							+ System.getProperty("line.separator")
-							+ "got title: " + title,
-					equalsIgnoreWhitespace(answerTitle[i], title));
-		}
-	}
+    while (st1.hasMoreTokens()) {
+      if (!st2.hasMoreTokens()) {
+        Logger.info("st1+ '" + st1.nextToken() + "'");
+        return false;
+      }
+      String st1Token = st1.nextToken();
+      String st2Token = st2.nextToken();
+      if (!st1Token.equals(st2Token)) {
+        Logger.info("st1:'" + st1Token + "' != st2:'" + st2Token + "'");
+        return false;
+      }
+    }
+    if (st2.hasMoreTokens()) {
+      System.err.println("st2+ '" + st2.nextToken() + "'");
+      return false;
+    }
+    return true;
+  }
 
-	public void testGetOutlinks() throws Exception {
-		if (testDOMs[0] == null)
-			setup();
-		for (int i = 0; i < testPages.length; i++) {
-			ArrayList<Outlink> outlinks = new ArrayList<Outlink>();
-			if (i == SKIP) {
-				conf.setBoolean("parser.html.form.use_action", false);
-				utils.setConf(conf);
-			} else {
-				conf.setBoolean("parser.html.form.use_action", true);
-				utils.setConf(conf);
-			}
-			utils.getOutlinks(testBaseHrefURLs[i], outlinks, testDOMs[i]);
-			Outlink[] outlinkArr = new Outlink[outlinks.size()];
-			outlinkArr = outlinks.toArray(outlinkArr);
-			compareOutlinks(i, answerOutlinks[i], outlinkArr);
-		}
-	}
+  public void testGetText() throws Exception {
+    if (testDOMs[0] == null)
+      setup();
+    for (int i = 0; i < testPages.length; i++) {
+      StringBuffer sb = new StringBuffer();
+      utils.getText(sb, testDOMs[i]);
+      String text = sb.toString();
+      assertTrue(
+          "example " + i + " : expecting text: " + answerText[i]
+              + System.getProperty("line.separator")
+              + System.getProperty("line.separator") + "got text: " + text,
+          equalsIgnoreWhitespace(answerText[i], text));
+    }
+  }
 
-	private static final void appendOutlinks(StringBuffer sb, Outlink[] o) {
-		for (int i = 0; i < o.length; i++) {
-			sb.append(o[i].toString());
-			sb.append(System.getProperty("line.separator"));
-		}
-	}
+  // won't work with Tika - the title is stored in the metadata but
+  // not put in the XHTML representation
+  public void testGetTitle() throws Exception {
+    if (testDOMs[0] == null)
+      setup();
+    for (int i = 0; i < testPages.length; i++) {
+      StringBuffer sb = new StringBuffer();
+      utils.getTitle(sb, testDOMs[i]);
+      String title = sb.toString();
+      assertTrue(
+          "example " + i + " : expecting title: " + answerTitle[i]
+              + System.getProperty("line.separator")
+              + System.getProperty("line.separator") + "got title: " + title,
+          equalsIgnoreWhitespace(answerTitle[i], title));
+    }
+  }
 
-	private static final String outlinksString(Outlink[] o) {
-		StringBuffer sb = new StringBuffer();
-		appendOutlinks(sb, o);
-		return sb.toString();
-	}
+  public void testGetOutlinks() throws Exception {
+    if (testDOMs[0] == null)
+      setup();
+    for (int i = 0; i < testPages.length; i++) {
+      ArrayList<Outlink> outlinks = new ArrayList<Outlink>();
+      if (i == SKIP) {
+        conf.setBoolean("parser.html.form.use_action", false);
+        utils.setConf(conf);
+      } else {
+        conf.setBoolean("parser.html.form.use_action", true);
+        utils.setConf(conf);
+      }
+      utils.getOutlinks(testBaseHrefURLs[i], outlinks, testDOMs[i]);
+      Outlink[] outlinkArr = new Outlink[outlinks.size()];
+      outlinkArr = outlinks.toArray(outlinkArr);
+      compareOutlinks(i, answerOutlinks[i], outlinkArr);
+    }
+  }
 
-	private static final void compareOutlinks(int test, Outlink[] o1,
-			Outlink[] o2) {
-		if (o1.length != o2.length) {
-			assertTrue(
-					"test " + test
-							+ ", got wrong number of outlinks (expecting "
-							+ o1.length + ", got " + o2.length + ")"
-							+ System.getProperty("line.separator") + "answer: "
-							+ System.getProperty("line.separator")
-							+ outlinksString(o1)
-							+ System.getProperty("line.separator") + "got: "
-							+ System.getProperty("line.separator")
-							+ outlinksString(o2)
-							+ System.getProperty("line.separator"), false);
-		}
+  private static final void appendOutlinks(StringBuffer sb, Outlink[] o) {
+    for (int i = 0; i < o.length; i++) {
+      sb.append(o[i].toString());
+      sb.append(System.getProperty("line.separator"));
+    }
+  }
 
-		for (int i = 0; i < o1.length; i++) {
-			if (!o1[i].equals(o2[i])) {
-				assertTrue(
-						"test " + test + ", got wrong outlinks at position "
-								+ i + System.getProperty("line.separator")
-								+ "answer: "
-								+ System.getProperty("line.separator")
-								+ o1[i].toString()
-								+ System.getProperty("line.separator")
-								+ "got: "
-								+ System.getProperty("line.separator")
-								+ o2[i].toString(), false);
+  private static final String outlinksString(Outlink[] o) {
+    StringBuffer sb = new StringBuffer();
+    appendOutlinks(sb, o);
+    return sb.toString();
+  }
 
-			}
-		}
-	}
+  private static final void compareOutlinks(int test, Outlink[] o1, Outlink[] o2) {
+    if (o1.length != o2.length) {
+      assertTrue(
+          "test " + test + ", got wrong number of outlinks (expecting "
+              + o1.length + ", got " + o2.length + ")"
+              + System.getProperty("line.separator") + "answer: "
+              + System.getProperty("line.separator") + outlinksString(o1)
+              + System.getProperty("line.separator") + "got: "
+              + System.getProperty("line.separator") + outlinksString(o2)
+              + System.getProperty("line.separator"), false);
+    }
+
+    for (int i = 0; i < o1.length; i++) {
+      if (!o1[i].equals(o2[i])) {
+        assertTrue(
+            "test " + test + ", got wrong outlinks at position " + i
+                + System.getProperty("line.separator") + "answer: "
+                + System.getProperty("line.separator") + o1[i].toString()
+                + System.getProperty("line.separator") + "got: "
+                + System.getProperty("line.separator") + o2[i].toString(),
+            false);
+
+      }
+    }
+  }
 }
Index: src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/TikaParser.java
===================================================================
--- src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/TikaParser.java	(revision 1188268)
+++ src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/TikaParser.java	(working copy)
@@ -101,8 +101,8 @@
           message, getConf());
     }
 
-    LOG.debug("Using Tika parser " + parser.getClass().getName() + " for mime-type "
-        + mimeType);
+    LOG.debug("Using Tika parser " + parser.getClass().getName()
+        + " for mime-type " + mimeType);
 
     Metadata tikamd = new Metadata();
 
@@ -116,7 +116,7 @@
     try {
       parser.parse(new ByteArrayInputStream(raw), domhandler, tikamd, context);
     } catch (Exception e) {
-      LOG.error("Error parsing "+url,e);
+      LOG.error("Error parsing " + url, e);
       return ParseStatusUtils.getEmptyParse(e, getConf());
     }
 
@@ -165,10 +165,11 @@
     // populate Nutch metadata with Tika metadata
     String[] TikaMDNames = tikamd.names();
     for (String tikaMDName : TikaMDNames) {
-      if (tikaMDName.equalsIgnoreCase(Metadata.TITLE)) continue;
+      if (tikaMDName.equalsIgnoreCase(Metadata.TITLE))
+        continue;
       // TODO what if multivalued?
-      page.putToMetadata(new Utf8(tikaMDName), ByteBuffer.wrap(Bytes.toBytes(tikamd
-          .get(tikaMDName))));
+      page.putToMetadata(new Utf8(tikaMDName),
+          ByteBuffer.wrap(Bytes.toBytes(tikamd.get(tikaMDName))));
     }
 
     // no outlinks? try OutlinkExtractor e.g works for mime types where no
@@ -189,8 +190,8 @@
     parse = htmlParseFilters.filter(url, page, parse, metaTags, root);
 
     if (metaTags.getNoCache()) { // not okay to cache
-      page.putToMetadata(new Utf8(Nutch.CACHING_FORBIDDEN_KEY), ByteBuffer.wrap(Bytes
-          .toBytes(cachingPolicy)));
+      page.putToMetadata(new Utf8(Nutch.CACHING_FORBIDDEN_KEY),
+          ByteBuffer.wrap(Bytes.toBytes(cachingPolicy)));
     }
 
     return parse;
@@ -214,10 +215,10 @@
         Nutch.CACHING_FORBIDDEN_CONTENT);
   }
 
-  public TikaConfig getTikaConfig(){
-	  return this.tikaConfig;
+  public TikaConfig getTikaConfig() {
+    return this.tikaConfig;
   }
-  
+
   public Configuration getConf() {
     return this.conf;
   }
Index: src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/XMLCharacterRecognizer.java
===================================================================
--- src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/XMLCharacterRecognizer.java	(revision 1188268)
+++ src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/XMLCharacterRecognizer.java	(working copy)
@@ -26,40 +26,42 @@
 package org.apache.nutch.parse.tika;
 
 /**
- * Class used to verify whether the specified <var>ch</var> 
- * conforms to the XML 1.0 definition of whitespace. 
+ * Class used to verify whether the specified <var>ch</var> conforms to the XML
+ * 1.0 definition of whitespace.
  */
-class XMLCharacterRecognizer
-{
+class XMLCharacterRecognizer {
 
   /**
-   * Returns whether the specified <var>ch</var> conforms to the XML 1.0 definition
-   * of whitespace.  Refer to <A href="http://www.w3.org/TR/1998/REC-xml-19980210#NT-S">
-   * the definition of <CODE>S</CODE></A> for details.
-   * @param ch Character to check as XML whitespace.
+   * Returns whether the specified <var>ch</var> conforms to the XML 1.0
+   * definition of whitespace. Refer to <A
+   * href="http://www.w3.org/TR/1998/REC-xml-19980210#NT-S"> the definition of
+   * <CODE>S</CODE></A> for details.
+   * 
+   * @param ch
+   *          Character to check as XML whitespace.
    * @return =true if <var>ch</var> is XML whitespace; otherwise =false.
    */
-  static boolean isWhiteSpace(char ch)
-  {
+  static boolean isWhiteSpace(char ch) {
     return (ch == 0x20) || (ch == 0x09) || (ch == 0xD) || (ch == 0xA);
   }
 
   /**
    * Tell if the string is whitespace.
-   *
-   * @param ch Character array to check as XML whitespace.
-   * @param start Start index of characters in the array
-   * @param length Number of characters in the array 
-   * @return True if the characters in the array are 
-   * XML whitespace; otherwise, false.
+   * 
+   * @param ch
+   *          Character array to check as XML whitespace.
+   * @param start
+   *          Start index of characters in the array
+   * @param length
+   *          Number of characters in the array
+   * @return True if the characters in the array are XML whitespace; otherwise,
+   *         false.
    */
-  static boolean isWhiteSpace(char ch[], int start, int length)
-  {
+  static boolean isWhiteSpace(char ch[], int start, int length) {
 
     int end = start + length;
 
-    for (int s = start; s < end; s++)
-    {
+    for (int s = start; s < end; s++) {
       if (!isWhiteSpace(ch[s]))
         return false;
     }
@@ -69,39 +71,36 @@
 
   /**
    * Tell if the string is whitespace.
-   *
-   * @param buf StringBuffer to check as XML whitespace.
+   * 
+   * @param buf
+   *          StringBuffer to check as XML whitespace.
    * @return True if characters in buffer are XML whitespace, false otherwise
    */
-  static boolean isWhiteSpace(StringBuffer buf)
-  {
+  static boolean isWhiteSpace(StringBuffer buf) {
 
     int n = buf.length();
 
-    for (int i = 0; i < n; i++)
-    {
+    for (int i = 0; i < n; i++) {
       if (!isWhiteSpace(buf.charAt(i)))
         return false;
     }
 
     return true;
   }
-  
+
   /**
    * Tell if the string is whitespace.
-   *
-   * @param s String to check as XML whitespace.
+   * 
+   * @param s
+   *          String to check as XML whitespace.
    * @return True if characters in buffer are XML whitespace, false otherwise
    */
-  static boolean isWhiteSpace(String s)
-  {
+  static boolean isWhiteSpace(String s) {
 
-    if(null != s)
-    {
+    if (null != s) {
       int n = s.length();
-  
-      for (int i = 0; i < n; i++)
-      {
+
+      for (int i = 0; i < n; i++) {
         if (!isWhiteSpace(s.charAt(i)))
           return false;
       }
Index: src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/DOMBuilder.java
===================================================================
--- src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/DOMBuilder.java	(revision 1188268)
+++ src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/DOMBuilder.java	(working copy)
@@ -39,136 +39,125 @@
 import org.xml.sax.ContentHandler;
 import org.xml.sax.Locator;
 import org.xml.sax.ext.LexicalHandler;
+
 /**
- * This class takes SAX events (in addition to some extra events
- * that SAX doesn't handle yet) and adds the result to a document
- * or document fragment.
+ * This class takes SAX events (in addition to some extra events that SAX
+ * doesn't handle yet) and adds the result to a document or document fragment.
  */
-class DOMBuilder
-        implements ContentHandler, LexicalHandler
-{
+class DOMBuilder implements ContentHandler, LexicalHandler {
 
-  /** Root document          */
+  /** Root document */
   public Document m_doc;
 
-  /** Current node           */
+  /** Current node */
   protected Node m_currentNode = null;
 
-  /** First node of document fragment or null if not a DocumentFragment     */
+  /** First node of document fragment or null if not a DocumentFragment */
   public DocumentFragment m_docFrag = null;
 
-  /** Vector of element nodes          */
+  /** Vector of element nodes */
   protected Stack m_elemStack = new Stack();
 
   /**
-   * DOMBuilder instance constructor... it will add the DOM nodes
-   * to the document fragment.
-   *
-   * @param doc Root document
-   * @param node Current node
+   * DOMBuilder instance constructor... it will add the DOM nodes to the
+   * document fragment.
+   * 
+   * @param doc
+   *          Root document
+   * @param node
+   *          Current node
    */
-  DOMBuilder(Document doc, Node node)
-  {
+  DOMBuilder(Document doc, Node node) {
     m_doc = doc;
     m_currentNode = node;
   }
 
   /**
-   * DOMBuilder instance constructor... it will add the DOM nodes
-   * to the document fragment.
-   *
-   * @param doc Root document
-   * @param docFrag Document fragment
+   * DOMBuilder instance constructor... it will add the DOM nodes to the
+   * document fragment.
+   * 
+   * @param doc
+   *          Root document
+   * @param docFrag
+   *          Document fragment
    */
-  DOMBuilder(Document doc, DocumentFragment docFrag)
-  {
+  DOMBuilder(Document doc, DocumentFragment docFrag) {
     m_doc = doc;
     m_docFrag = docFrag;
   }
 
   /**
-   * DOMBuilder instance constructor... it will add the DOM nodes
-   * to the document.
-   *
-   * @param doc Root document
+   * DOMBuilder instance constructor... it will add the DOM nodes to the
+   * document.
+   * 
+   * @param doc
+   *          Root document
    */
-  DOMBuilder(Document doc)
-  {
+  DOMBuilder(Document doc) {
     m_doc = doc;
   }
 
   /**
-   * Get the root node of the DOM being created.  This
-   * is either a Document or a DocumentFragment.
-   *
+   * Get the root node of the DOM being created. This is either a Document or a
+   * DocumentFragment.
+   * 
    * @return The root document or document fragment if not null
    */
-  Node getRootNode()
-  {
+  Node getRootNode() {
     return (null != m_docFrag) ? (Node) m_docFrag : (Node) m_doc;
   }
 
   /**
    * Get the node currently being processed.
-   *
+   * 
    * @return the current node being processed
    */
-  Node getCurrentNode()
-  {
+  Node getCurrentNode() {
     return m_currentNode;
   }
 
   /**
    * Return null since there is no Writer for this class.
-   *
+   * 
    * @return null
    */
-  java.io.Writer getWriter()
-  {
+  java.io.Writer getWriter() {
     return null;
   }
 
   /**
    * Append a node to the current container.
-   *
-   * @param newNode New node to append
+   * 
+   * @param newNode
+   *          New node to append
    */
-  protected void append(Node newNode) throws org.xml.sax.SAXException
-  {
+  protected void append(Node newNode) throws org.xml.sax.SAXException {
 
     Node currentNode = m_currentNode;
 
-    if (null != currentNode)
-    {
+    if (null != currentNode) {
       currentNode.appendChild(newNode);
 
       // System.out.println(newNode.getNodeName());
-    }
-    else if (null != m_docFrag)
-    {
+    } else if (null != m_docFrag) {
       m_docFrag.appendChild(newNode);
-    }
-    else
-    {
+    } else {
       boolean ok = true;
       short type = newNode.getNodeType();
 
-      if (type == Node.TEXT_NODE)
-      {
+      if (type == Node.TEXT_NODE) {
         String data = newNode.getNodeValue();
 
-        if ((null != data) && (data.trim().length() > 0))
-        {
-          throw new org.xml.sax.SAXException("Warning: can't output text before document element!  Ignoring...");
+        if ((null != data) && (data.trim().length() > 0)) {
+          throw new org.xml.sax.SAXException(
+              "Warning: can't output text before document element!  Ignoring...");
         }
 
         ok = false;
-      }
-      else if (type == Node.ELEMENT_NODE)
-      {
-        if (m_doc.getDocumentElement() != null)
-        {
-          throw new org.xml.sax.SAXException("Can't have more than one root on a DOM!");
+      } else if (type == Node.ELEMENT_NODE) {
+        if (m_doc.getDocumentElement() != null) {
+          throw new org.xml.sax.SAXException(
+              "Can't have more than one root on a DOM!");
         }
       }
 
@@ -179,132 +168,139 @@
 
   /**
    * Receive an object for locating the origin of SAX document events.
-   *
-   * <p>SAX parsers are strongly encouraged (though not absolutely
-   * required) to supply a locator: if it does so, it must supply
-   * the locator to the application by invoking this method before
-   * invoking any of the other methods in the ContentHandler
-   * interface.</p>
-   *
-   * <p>The locator allows the application to determine the end
-   * position of any document-related event, even if the parser is
-   * not reporting an error.  Typically, the application will
-   * use this information for reporting its own errors (such as
-   * character content that does not match an application's
-   * business rules).  The information returned by the locator
-   * is probably not sufficient for use with a search engine.</p>
-   *
-   * <p>Note that the locator will return correct information only
-   * during the invocation of the events in this interface.  The
-   * application should not attempt to use it at any other time.</p>
-   *
-   * @param locator An object that can return the location of
-   *                any SAX document event.
+   * 
+   * <p>
+   * SAX parsers are strongly encouraged (though not absolutely required) to
+   * supply a locator: if it does so, it must supply the locator to the
+   * application by invoking this method before invoking any of the other
+   * methods in the ContentHandler interface.
+   * </p>
+   * 
+   * <p>
+   * The locator allows the application to determine the end position of any
+   * document-related event, even if the parser is not reporting an error.
+   * Typically, the application will use this information for reporting its own
+   * errors (such as character content that does not match an application's
+   * business rules). The information returned by the locator is probably not
+   * sufficient for use with a search engine.
+   * </p>
+   * 
+   * <p>
+   * Note that the locator will return correct information only during the
+   * invocation of the events in this interface. The application should not
+   * attempt to use it at any other time.
+   * </p>
+   * 
+   * @param locator
+   *          An object that can return the location of any SAX document event.
    * @see org.xml.sax.Locator
    */
-  public void setDocumentLocator(Locator locator)
-  {
+  public void setDocumentLocator(Locator locator) {
 
     // No action for the moment.
   }
 
   /**
    * Receive notification of the beginning of a document.
-   *
-   * <p>The SAX parser will invoke this method only once, before any
-   * other methods in this interface or in DTDHandler (except for
-   * setDocumentLocator).</p>
+   * 
+   * <p>
+   * The SAX parser will invoke this method only once, before any other methods
+   * in this interface or in DTDHandler (except for setDocumentLocator).
+   * </p>
    */
-  public void startDocument() throws org.xml.sax.SAXException
-  {
+  public void startDocument() throws org.xml.sax.SAXException {
 
     // No action for the moment.
   }
 
   /**
    * Receive notification of the end of a document.
-   *
-   * <p>The SAX parser will invoke this method only once, and it will
-   * be the last method invoked during the parse.  The parser shall
-   * not invoke this method until it has either abandoned parsing
-   * (because of an unrecoverable error) or reached the end of
-   * input.</p>
+   * 
+   * <p>
+   * The SAX parser will invoke this method only once, and it will be the last
+   * method invoked during the parse. The parser shall not invoke this method
+   * until it has either abandoned parsing (because of an unrecoverable error)
+   * or reached the end of input.
+   * </p>
    */
-  public void endDocument() throws org.xml.sax.SAXException
-  {
+  public void endDocument() throws org.xml.sax.SAXException {
 
     // No action for the moment.
   }
 
   /**
    * Receive notification of the beginning of an element.
-   *
-   * <p>The Parser will invoke this method at the beginning of every
-   * element in the XML document; there will be a corresponding
-   * endElement() event for every startElement() event (even when the
-   * element is empty). All of the element's content will be
-   * reported, in order, before the corresponding endElement()
-   * event.</p>
-   *
-   * <p>If the element name has a namespace prefix, the prefix will
-   * still be attached.  Note that the attribute list provided will
-   * contain only attributes with explicit values (specified or
-   * defaulted): #IMPLIED attributes will be omitted.</p>
-   *
-   *
-   * @param ns The namespace of the node
-   * @param localName The local part of the qualified name
-   * @param name The element name.
-   * @param atts The attributes attached to the element, if any.
+   * 
+   * <p>
+   * The Parser will invoke this method at the beginning of every element in the
+   * XML document; there will be a corresponding endElement() event for every
+   * startElement() event (even when the element is empty). All of the element's
+   * content will be reported, in order, before the corresponding endElement()
+   * event.
+   * </p>
+   * 
+   * <p>
+   * If the element name has a namespace prefix, the prefix will still be
+   * attached. Note that the attribute list provided will contain only
+   * attributes with explicit values (specified or defaulted): #IMPLIED
+   * attributes will be omitted.
+   * </p>
+   * 
+   * 
+   * @param ns
+   *          The namespace of the node
+   * @param localName
+   *          The local part of the qualified name
+   * @param name
+   *          The element name.
+   * @param atts
+   *          The attributes attached to the element, if any.
    * @see #endElement
    * @see org.xml.sax.Attributes
    */
-  public void startElement(
-          String ns, String localName, String name, Attributes atts)
-            throws org.xml.sax.SAXException
-  {
+  public void startElement(String ns, String localName, String name,
+      Attributes atts) throws org.xml.sax.SAXException {
 
     Element elem;
 
-	// Note that the namespace-aware call must be used to correctly
-	// construct a Level 2 DOM, even for non-namespaced nodes.
+    // Note that the namespace-aware call must be used to correctly
+    // construct a Level 2 DOM, even for non-namespaced nodes.
     if ((null == ns) || (ns.length() == 0))
-      elem = m_doc.createElementNS(null,name);
+      elem = m_doc.createElementNS(null, name);
     else
       elem = m_doc.createElementNS(ns, name);
 
     append(elem);
 
-    try
-    {
+    try {
       int nAtts = atts.getLength();
 
-      if (0 != nAtts)
-      {
-        for (int i = 0; i < nAtts; i++)
-        {
+      if (0 != nAtts) {
+        for (int i = 0; i < nAtts; i++) {
 
-          //System.out.println("type " + atts.getType(i) + " name " + atts.getLocalName(i) );
+          // System.out.println("type " + atts.getType(i) + " name " +
+          // atts.getLocalName(i) );
           // First handle a possible ID attribute
           if (atts.getType(i).equalsIgnoreCase("ID"))
             setIDAttribute(atts.getValue(i), elem);
 
           String attrNS = atts.getURI(i);
 
-          if("".equals(attrNS))
+          if ("".equals(attrNS))
             attrNS = null; // DOM represents no-namespace as null
 
           // System.out.println("attrNS: "+attrNS+", localName: "+atts.getQName(i)
-          //                   +", qname: "+atts.getQName(i)+", value: "+atts.getValue(i));
+          // +", qname: "+atts.getQName(i)+", value: "+atts.getValue(i));
           // Crimson won't let us set an xmlns: attribute on the DOM.
           String attrQName = atts.getQName(i);
 
-          // In SAX, xmlns: attributes have an empty namespace, while in DOM they should have the xmlns namespace
+          // In SAX, xmlns: attributes have an empty namespace, while in DOM
+          // they should have the xmlns namespace
           if (attrQName.startsWith("xmlns:"))
             attrNS = "http://www.w3.org/2000/xmlns/";
 
           // ALWAYS use the DOM Level 2 call!
-          elem.setAttributeNS(attrNS,attrQName, atts.getValue(i));
+          elem.setAttributeNS(attrNS, attrQName, atts.getValue(i));
         }
       }
 
@@ -315,9 +311,7 @@
       m_currentNode = elem;
 
       // append(elem);
-    }
-    catch(java.lang.Exception de)
-    {
+    } catch (java.lang.Exception de) {
       // de.printStackTrace();
       throw new org.xml.sax.SAXException(de);
     }
@@ -325,74 +319,87 @@
   }
 
   /**
-
-
-
+   * 
+   * 
+   * 
    * Receive notification of the end of an element.
-   *
-   * <p>The SAX parser will invoke this method at the end of every
-   * element in the XML document; there will be a corresponding
-   * startElement() event for every endElement() event (even when the
-   * element is empty).</p>
-   *
-   * <p>If the element name has a namespace prefix, the prefix will
-   * still be attached to the name.</p>
-   *
-   *
-   * @param ns the namespace of the element
-   * @param localName The local part of the qualified name of the element
-   * @param name The element name
+   * 
+   * <p>
+   * The SAX parser will invoke this method at the end of every element in the
+   * XML document; there will be a corresponding startElement() event for every
+   * endElement() event (even when the element is empty).
+   * </p>
+   * 
+   * <p>
+   * If the element name has a namespace prefix, the prefix will still be
+   * attached to the name.
+   * </p>
+   * 
+   * 
+   * @param ns
+   *          the namespace of the element
+   * @param localName
+   *          The local part of the qualified name of the element
+   * @param name
+   *          The element name
    */
   public void endElement(String ns, String localName, String name)
-          throws org.xml.sax.SAXException
-  {
+      throws org.xml.sax.SAXException {
     m_elemStack.pop();
-    m_currentNode = m_elemStack.isEmpty() ? null : (Node)m_elemStack.peek();
+    m_currentNode = m_elemStack.isEmpty() ? null : (Node) m_elemStack.peek();
   }
 
   /**
    * Set an ID string to node association in the ID table.
-   *
-   * @param id The ID string.
-   * @param elem The associated ID.
+   * 
+   * @param id
+   *          The ID string.
+   * @param elem
+   *          The associated ID.
    */
-  public void setIDAttribute(String id, Element elem)
-  {
+  public void setIDAttribute(String id, Element elem) {
 
     // Do nothing. This method is meant to be overiden.
   }
 
   /**
    * Receive notification of character data.
-   *
-   * <p>The Parser will call this method to report each chunk of
-   * character data.  SAX parsers may return all contiguous character
-   * data in a single chunk, or they may split it into several
-   * chunks; however, all of the characters in any single event
-   * must come from the same external entity, so that the Locator
-   * provides useful information.</p>
-   *
-   * <p>The application must not attempt to read from the array
-   * outside of the specified range.</p>
-   *
-   * <p>Note that some parsers will report whitespace using the
-   * ignorableWhitespace() method rather than this one (validating
-   * parsers must do so).</p>
-   *
-   * @param ch The characters from the XML document.
-   * @param start The start position in the array.
-   * @param length The number of characters to read from the array.
+   * 
+   * <p>
+   * The Parser will call this method to report each chunk of character data.
+   * SAX parsers may return all contiguous character data in a single chunk, or
+   * they may split it into several chunks; however, all of the characters in
+   * any single event must come from the same external entity, so that the
+   * Locator provides useful information.
+   * </p>
+   * 
+   * <p>
+   * The application must not attempt to read from the array outside of the
+   * specified range.
+   * </p>
+   * 
+   * <p>
+   * Note that some parsers will report whitespace using the
+   * ignorableWhitespace() method rather than this one (validating parsers must
+   * do so).
+   * </p>
+   * 
+   * @param ch
+   *          The characters from the XML document.
+   * @param start
+   *          The start position in the array.
+   * @param length
+   *          The number of characters to read from the array.
    * @see #ignorableWhitespace
    * @see org.xml.sax.Locator
    */
-  public void characters(char ch[], int start, int length) throws org.xml.sax.SAXException
-  {
-    if(isOutsideDocElem()
-       && XMLCharacterRecognizer.isWhiteSpace(ch, start, length))
-      return;  // avoid DOM006 Hierarchy request error
+  public void characters(char ch[], int start, int length)
+      throws org.xml.sax.SAXException {
+    if (isOutsideDocElem()
+        && XMLCharacterRecognizer.isWhiteSpace(ch, start, length))
+      return; // avoid DOM006 Hierarchy request error
 
-    if (m_inCData)
-    {
+    if (m_inCData) {
       cdata(ch, start, length);
 
       return;
@@ -400,57 +407,55 @@
 
     String s = new String(ch, start, length);
     Node childNode;
-    childNode =  m_currentNode != null ? m_currentNode.getLastChild(): null;
-    if( childNode != null && childNode.getNodeType() == Node.TEXT_NODE ){
-       ((Text)childNode).appendData(s);
+    childNode = m_currentNode != null ? m_currentNode.getLastChild() : null;
+    if (childNode != null && childNode.getNodeType() == Node.TEXT_NODE) {
+      ((Text) childNode).appendData(s);
+    } else {
+      Text text = m_doc.createTextNode(s);
+      append(text);
     }
-    else{
-       Text text = m_doc.createTextNode(s);
-       append(text);
-    }
   }
 
   /**
-   * If available, when the disable-output-escaping attribute is used,
-   * output raw text without escaping.  A PI will be inserted in front
-   * of the node with the name "lotusxsl-next-is-raw" and a value of
-   * "formatter-to-dom".
-   *
-   * @param ch Array containing the characters
-   * @param start Index to start of characters in the array
-   * @param length Number of characters in the array
+   * If available, when the disable-output-escaping attribute is used, output
+   * raw text without escaping. A PI will be inserted in front of the node with
+   * the name "lotusxsl-next-is-raw" and a value of "formatter-to-dom".
+   * 
+   * @param ch
+   *          Array containing the characters
+   * @param start
+   *          Index to start of characters in the array
+   * @param length
+   *          Number of characters in the array
    */
   public void charactersRaw(char ch[], int start, int length)
-          throws org.xml.sax.SAXException
-  {
-    if(isOutsideDocElem()
-       && XMLCharacterRecognizer.isWhiteSpace(ch, start, length))
-      return;  // avoid DOM006 Hierarchy request error
+      throws org.xml.sax.SAXException {
+    if (isOutsideDocElem()
+        && XMLCharacterRecognizer.isWhiteSpace(ch, start, length))
+      return; // avoid DOM006 Hierarchy request error
 
-
     String s = new String(ch, start, length);
 
     append(m_doc.createProcessingInstruction("xslt-next-is-raw",
-                                             "formatter-to-dom"));
+        "formatter-to-dom"));
     append(m_doc.createTextNode(s));
   }
 
   /**
    * Report the beginning of an entity.
-   *
-   * The start and end of the document entity are not reported.
-   * The start and end of the external DTD subset are reported
-   * using the pseudo-name "[dtd]".  All other events must be
-   * properly nested within start/end entity events.
-   *
-   * @param name The name of the entity.  If it is a parameter
-   *        entity, the name will begin with '%'.
+   * 
+   * The start and end of the document entity are not reported. The start and
+   * end of the external DTD subset are reported using the pseudo-name "[dtd]".
+   * All other events must be properly nested within start/end entity events.
+   * 
+   * @param name
+   *          The name of the entity. If it is a parameter entity, the name will
+   *          begin with '%'.
    * @see #endEntity
    * @see org.xml.sax.ext.DeclHandler#internalEntityDecl
    * @see org.xml.sax.ext.DeclHandler#externalEntityDecl
    */
-  public void startEntity(String name) throws org.xml.sax.SAXException
-  {
+  public void startEntity(String name) throws org.xml.sax.SAXException {
 
     // Almost certainly the wrong behavior...
     // entityReference(name);
@@ -458,49 +463,58 @@
 
   /**
    * Report the end of an entity.
-   *
-   * @param name The name of the entity that is ending.
+   * 
+   * @param name
+   *          The name of the entity that is ending.
    * @see #startEntity
    */
-  public void endEntity(String name) throws org.xml.sax.SAXException{}
+  public void endEntity(String name) throws org.xml.sax.SAXException {
+  }
 
   /**
    * Receive notivication of a entityReference.
-   *
-   * @param name name of the entity reference
+   * 
+   * @param name
+   *          name of the entity reference
    */
-  public void entityReference(String name) throws org.xml.sax.SAXException
-  {
+  public void entityReference(String name) throws org.xml.sax.SAXException {
     append(m_doc.createEntityReference(name));
   }
 
   /**
    * Receive notification of ignorable whitespace in element content.
-   *
-   * <p>Validating Parsers must use this method to report each chunk
-   * of ignorable whitespace (see the W3C XML 1.0 recommendation,
-   * section 2.10): non-validating parsers may also use this method
-   * if they are capable of parsing and using content models.</p>
-   *
-   * <p>SAX parsers may return all contiguous whitespace in a single
-   * chunk, or they may split it into several chunks; however, all of
-   * the characters in any single event must come from the same
-   * external entity, so that the Locator provides useful
-   * information.</p>
-   *
-   * <p>The application must not attempt to read from the array
-   * outside of the specified range.</p>
-   *
-   * @param ch The characters from the XML document.
-   * @param start The start position in the array.
-   * @param length The number of characters to read from the array.
+   * 
+   * <p>
+   * Validating Parsers must use this method to report each chunk of ignorable
+   * whitespace (see the W3C XML 1.0 recommendation, section 2.10):
+   * non-validating parsers may also use this method if they are capable of
+   * parsing and using content models.
+   * </p>
+   * 
+   * <p>
+   * SAX parsers may return all contiguous whitespace in a single chunk, or they
+   * may split it into several chunks; however, all of the characters in any
+   * single event must come from the same external entity, so that the Locator
+   * provides useful information.
+   * </p>
+   * 
+   * <p>
+   * The application must not attempt to read from the array outside of the
+   * specified range.
+   * </p>
+   * 
+   * @param ch
+   *          The characters from the XML document.
+   * @param start
+   *          The start position in the array.
+   * @param length
+   *          The number of characters to read from the array.
    * @see #characters
    */
   public void ignorableWhitespace(char ch[], int start, int length)
-          throws org.xml.sax.SAXException
-  {
-    if(isOutsideDocElem())
-      return;  // avoid DOM006 Hierarchy request error
+      throws org.xml.sax.SAXException {
+    if (isOutsideDocElem())
+      return; // avoid DOM006 Hierarchy request error
 
     String s = new String(ch, start, length);
 
@@ -509,232 +523,244 @@
 
   /**
    * Tell if the current node is outside the document element.
-   *
+   * 
    * @return true if the current node is outside the document element.
    */
-   private boolean isOutsideDocElem()
-   {
-      return (null == m_docFrag) && m_elemStack.size() == 0 && (null == m_currentNode || m_currentNode.getNodeType() == Node.DOCUMENT_NODE);
-   }
+  private boolean isOutsideDocElem() {
+    return (null == m_docFrag)
+        && m_elemStack.size() == 0
+        && (null == m_currentNode || m_currentNode.getNodeType() == Node.DOCUMENT_NODE);
+  }
 
   /**
    * Receive notification of a processing instruction.
-   *
-   * <p>The Parser will invoke this method once for each processing
-   * instruction found: note that processing instructions may occur
-   * before or after the main document element.</p>
-   *
-   * <p>A SAX parser should never report an XML declaration (XML 1.0,
-   * section 2.8) or a text declaration (XML 1.0, section 4.3.1)
-   * using this method.</p>
-   *
-   * @param target The processing instruction target.
-   * @param data The processing instruction data, or null if
-   *        none was supplied.
+   * 
+   * <p>
+   * The Parser will invoke this method once for each processing instruction
+   * found: note that processing instructions may occur before or after the main
+   * document element.
+   * </p>
+   * 
+   * <p>
+   * A SAX parser should never report an XML declaration (XML 1.0, section 2.8)
+   * or a text declaration (XML 1.0, section 4.3.1) using this method.
+   * </p>
+   * 
+   * @param target
+   *          The processing instruction target.
+   * @param data
+   *          The processing instruction data, or null if none was supplied.
    */
   public void processingInstruction(String target, String data)
-          throws org.xml.sax.SAXException
-  {
+      throws org.xml.sax.SAXException {
     append(m_doc.createProcessingInstruction(target, data));
   }
 
   /**
    * Report an XML comment anywhere in the document.
-   *
-   * This callback will be used for comments inside or outside the
-   * document element, including comments in the external DTD
-   * subset (if read).
-   *
-   * @param ch An array holding the characters in the comment.
-   * @param start The starting position in the array.
-   * @param length The number of characters to use from the array.
+   * 
+   * This callback will be used for comments inside or outside the document
+   * element, including comments in the external DTD subset (if read).
+   * 
+   * @param ch
+   *          An array holding the characters in the comment.
+   * @param start
+   *          The starting position in the array.
+   * @param length
+   *          The number of characters to use from the array.
    */
-  public void comment(char ch[], int start, int length) throws org.xml.sax.SAXException
-  {
+  public void comment(char ch[], int start, int length)
+      throws org.xml.sax.SAXException {
     // tagsoup sometimes submits invalid values here
-    if (ch == null || start < 0 || length >= (ch.length - start) || length < 0) return;
+    if (ch == null || start < 0 || length >= (ch.length - start) || length < 0)
+      return;
     append(m_doc.createComment(new String(ch, start, length)));
   }
 
-  /** Flag indicating that we are processing a CData section          */
+  /** Flag indicating that we are processing a CData section */
   protected boolean m_inCData = false;
 
   /**
    * Report the start of a CDATA section.
-   *
+   * 
    * @see #endCDATA
    */
-  public void startCDATA() throws org.xml.sax.SAXException
-  {
+  public void startCDATA() throws org.xml.sax.SAXException {
     m_inCData = true;
     append(m_doc.createCDATASection(""));
   }
 
   /**
    * Report the end of a CDATA section.
-   *
+   * 
    * @see #startCDATA
    */
-  public void endCDATA() throws org.xml.sax.SAXException
-  {
+  public void endCDATA() throws org.xml.sax.SAXException {
     m_inCData = false;
   }
 
   /**
    * Receive notification of cdata.
-   *
-   * <p>The Parser will call this method to report each chunk of
-   * character data.  SAX parsers may return all contiguous character
-   * data in a single chunk, or they may split it into several
-   * chunks; however, all of the characters in any single event
-   * must come from the same external entity, so that the Locator
-   * provides useful information.</p>
-   *
-   * <p>The application must not attempt to read from the array
-   * outside of the specified range.</p>
-   *
-   * <p>Note that some parsers will report whitespace using the
-   * ignorableWhitespace() method rather than this one (validating
-   * parsers must do so).</p>
-   *
-   * @param ch The characters from the XML document.
-   * @param start The start position in the array.
-   * @param length The number of characters to read from the array.
+   * 
+   * <p>
+   * The Parser will call this method to report each chunk of character data.
+   * SAX parsers may return all contiguous character data in a single chunk, or
+   * they may split it into several chunks; however, all of the characters in
+   * any single event must come from the same external entity, so that the
+   * Locator provides useful information.
+   * </p>
+   * 
+   * <p>
+   * The application must not attempt to read from the array outside of the
+   * specified range.
+   * </p>
+   * 
+   * <p>
+   * Note that some parsers will report whitespace using the
+   * ignorableWhitespace() method rather than this one (validating parsers must
+   * do so).
+   * </p>
+   * 
+   * @param ch
+   *          The characters from the XML document.
+   * @param start
+   *          The start position in the array.
+   * @param length
+   *          The number of characters to read from the array.
    * @see #ignorableWhitespace
    * @see org.xml.sax.Locator
    */
-  public void cdata(char ch[], int start, int length) throws org.xml.sax.SAXException
-  {
-    if(isOutsideDocElem()
-       && XMLCharacterRecognizer.isWhiteSpace(ch, start, length))
-      return;  // avoid DOM006 Hierarchy request error
+  public void cdata(char ch[], int start, int length)
+      throws org.xml.sax.SAXException {
+    if (isOutsideDocElem()
+        && XMLCharacterRecognizer.isWhiteSpace(ch, start, length))
+      return; // avoid DOM006 Hierarchy request error
 
     String s = new String(ch, start, length);
 
-    // XXX ab@apache.org: modified from the original, to accomodate TagSoup. 
+    // XXX ab@apache.org: modified from the original, to accomodate TagSoup.
     Node n = m_currentNode.getLastChild();
     if (n instanceof CDATASection)
-      ((CDATASection)n).appendData(s);
+      ((CDATASection) n).appendData(s);
     else if (n instanceof Comment)
-      ((Comment)n).appendData(s);
+      ((Comment) n).appendData(s);
   }
 
   /**
    * Report the start of DTD declarations, if any.
-   *
-   * Any declarations are assumed to be in the internal subset
-   * unless otherwise indicated.
-   *
-   * @param name The document type name.
-   * @param publicId The declared public identifier for the
-   *        external DTD subset, or null if none was declared.
-   * @param systemId The declared system identifier for the
-   *        external DTD subset, or null if none was declared.
+   * 
+   * Any declarations are assumed to be in the internal subset unless otherwise
+   * indicated.
+   * 
+   * @param name
+   *          The document type name.
+   * @param publicId
+   *          The declared public identifier for the external DTD subset, or
+   *          null if none was declared.
+   * @param systemId
+   *          The declared system identifier for the external DTD subset, or
+   *          null if none was declared.
    * @see #endDTD
    * @see #startEntity
    */
   public void startDTD(String name, String publicId, String systemId)
-          throws org.xml.sax.SAXException
-  {
+      throws org.xml.sax.SAXException {
 
     // Do nothing for now.
   }
 
   /**
    * Report the end of DTD declarations.
-   *
+   * 
    * @see #startDTD
    */
-  public void endDTD() throws org.xml.sax.SAXException
-  {
+  public void endDTD() throws org.xml.sax.SAXException {
 
     // Do nothing for now.
   }
 
   /**
    * Begin the scope of a prefix-URI Namespace mapping.
-   *
-   * <p>The information from this event is not necessary for
-   * normal Namespace processing: the SAX XML reader will
-   * automatically replace prefixes for element and attribute
-   * names when the http://xml.org/sax/features/namespaces
-   * feature is true (the default).</p>
-   *
-   * <p>There are cases, however, when applications need to
-   * use prefixes in character data or in attribute values,
-   * where they cannot safely be expanded automatically; the
-   * start/endPrefixMapping event supplies the information
-   * to the application to expand prefixes in those contexts
-   * itself, if necessary.</p>
-   *
-   * <p>Note that start/endPrefixMapping events are not
-   * guaranteed to be properly nested relative to each-other:
-   * all startPrefixMapping events will occur before the
-   * corresponding startElement event, and all endPrefixMapping
-   * events will occur after the corresponding endElement event,
-   * but their order is not guaranteed.</p>
-   *
-   * @param prefix The Namespace prefix being declared.
-   * @param uri The Namespace URI the prefix is mapped to.
+   * 
+   * <p>
+   * The information from this event is not necessary for normal Namespace
+   * processing: the SAX XML reader will automatically replace prefixes for
+   * element and attribute names when the http://xml.org/sax/features/namespaces
+   * feature is true (the default).
+   * </p>
+   * 
+   * <p>
+   * There are cases, however, when applications need to use prefixes in
+   * character data or in attribute values, where they cannot safely be expanded
+   * automatically; the start/endPrefixMapping event supplies the information to
+   * the application to expand prefixes in those contexts itself, if necessary.
+   * </p>
+   * 
+   * <p>
+   * Note that start/endPrefixMapping events are not guaranteed to be properly
+   * nested relative to each-other: all startPrefixMapping events will occur
+   * before the corresponding startElement event, and all endPrefixMapping
+   * events will occur after the corresponding endElement event, but their order
+   * is not guaranteed.
+   * </p>
+   * 
+   * @param prefix
+   *          The Namespace prefix being declared.
+   * @param uri
+   *          The Namespace URI the prefix is mapped to.
    * @see #endPrefixMapping
    * @see #startElement
    */
   public void startPrefixMapping(String prefix, String uri)
-          throws org.xml.sax.SAXException
-  {
+      throws org.xml.sax.SAXException {
 
     /*
-    // Not sure if this is needed or wanted
-    // Also, it fails in the stree.
-    if((null != m_currentNode)
-       && (m_currentNode.getNodeType() == Node.ELEMENT_NODE))
-    {
-      String qname;
-      if(((null != prefix) && (prefix.length() == 0))
-         || (null == prefix))
-        qname = "xmlns";
-      else
-        qname = "xmlns:"+prefix;
-
-      Element elem = (Element)m_currentNode;
-      String val = elem.getAttribute(qname); // Obsolete, should be DOM2...?
-      if(val == null)
-      {
-        elem.setAttributeNS("http://www.w3.org/XML/1998/namespace",
-                            qname, uri);
-      }
-    }
-    */
+     * // Not sure if this is needed or wanted // Also, it fails in the stree.
+     * if((null != m_currentNode) && (m_currentNode.getNodeType() ==
+     * Node.ELEMENT_NODE)) { String qname; if(((null != prefix) &&
+     * (prefix.length() == 0)) || (null == prefix)) qname = "xmlns"; else qname
+     * = "xmlns:"+prefix;
+     * 
+     * Element elem = (Element)m_currentNode; String val =
+     * elem.getAttribute(qname); // Obsolete, should be DOM2...? if(val == null)
+     * { elem.setAttributeNS("http://www.w3.org/XML/1998/namespace", qname,
+     * uri); } }
+     */
   }
 
   /**
    * End the scope of a prefix-URI mapping.
-   *
-   * <p>See startPrefixMapping for details.  This event will
-   * always occur after the corresponding endElement event,
-   * but the order of endPrefixMapping events is not otherwise
-   * guaranteed.</p>
-   *
-   * @param prefix The prefix that was being mapping.
+   * 
+   * <p>
+   * See startPrefixMapping for details. This event will always occur after the
+   * corresponding endElement event, but the order of endPrefixMapping events is
+   * not otherwise guaranteed.
+   * </p>
+   * 
+   * @param prefix
+   *          The prefix that was being mapping.
    * @see #startPrefixMapping
    * @see #endElement
    */
-  public void endPrefixMapping(String prefix) throws org.xml.sax.SAXException{}
+  public void endPrefixMapping(String prefix) throws org.xml.sax.SAXException {
+  }
 
   /**
    * Receive notification of a skipped entity.
-   *
-   * <p>The Parser will invoke this method once for each entity
-   * skipped.  Non-validating processors may skip entities if they
-   * have not seen the declarations (because, for example, the
-   * entity was declared in an external DTD subset).  All processors
-   * may skip external entities, depending on the values of the
-   * http://xml.org/sax/features/external-general-entities and the
-   * http://xml.org/sax/features/external-parameter-entities
-   * properties.</p>
-   *
-   * @param name The name of the skipped entity.  If it is a
-   *        parameter entity, the name will begin with '%'.
+   * 
+   * <p>
+   * The Parser will invoke this method once for each entity skipped.
+   * Non-validating processors may skip entities if they have not seen the
+   * declarations (because, for example, the entity was declared in an external
+   * DTD subset). All processors may skip external entities, depending on the
+   * values of the http://xml.org/sax/features/external-general-entities and the
+   * http://xml.org/sax/features/external-parameter-entities properties.
+   * </p>
+   * 
+   * @param name
+   *          The name of the skipped entity. If it is a parameter entity, the
+   *          name will begin with '%'.
    */
-  public void skippedEntity(String name) throws org.xml.sax.SAXException{}
+  public void skippedEntity(String name) throws org.xml.sax.SAXException {
+  }
 }
Index: src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/DOMContentUtils.java
===================================================================
--- src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/DOMContentUtils.java	(revision 1188268)
+++ src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/DOMContentUtils.java	(working copy)
@@ -33,35 +33,35 @@
 /**
  * A collection of methods for extracting content from DOM trees.
  * 
- * This class holds a few utility methods for pulling content out of 
- * DOM nodes, such as getOutlinks, getText, etc.
- *
+ * This class holds a few utility methods for pulling content out of DOM nodes,
+ * such as getOutlinks, getText, etc.
+ * 
  */
 public class DOMContentUtils {
 
   private static class LinkParams {
-	private String elName;
-	private String attrName;
-	private int childLen;
-      
-	private LinkParams(String elName, String attrName, int childLen) {
-          this.elName = elName;
-          this.attrName = attrName;
-          this.childLen = childLen;
-      }
-      
-	public String toString() {
-          return "LP[el=" + elName + ",attr=" + attrName + ",len=" + childLen + "]";
-      }
+    private String elName;
+    private String attrName;
+    private int childLen;
+
+    private LinkParams(String elName, String attrName, int childLen) {
+      this.elName = elName;
+      this.attrName = attrName;
+      this.childLen = childLen;
+    }
+
+    public String toString() {
+      return "LP[el=" + elName + ",attr=" + attrName + ",len=" + childLen + "]";
+    }
   }
-  
+
   private HashMap linkParams = new HashMap();
   private Configuration conf;
-  
+
   public DOMContentUtils(Configuration conf) {
     setConf(conf);
   }
-  
+
   public void setConf(Configuration conf) {
     // forceTags is used to override configurable tag ignoring, later on
     Collection<String> forceTags = new ArrayList<String>(1);
@@ -83,59 +83,57 @@
 
     // remove unwanted link tags from the linkParams map
     String[] ignoreTags = conf.getStrings("parser.html.outlinks.ignore_tags");
-    for ( int i = 0 ; ignoreTags != null && i < ignoreTags.length ; i++ ) {
-      if ( ! forceTags.contains(ignoreTags[i]) )
+    for (int i = 0; ignoreTags != null && i < ignoreTags.length; i++) {
+      if (!forceTags.contains(ignoreTags[i]))
         linkParams.remove(ignoreTags[i]);
     }
   }
-  
+
   /**
-   * This method takes a {@link StringBuffer} and a DOM {@link Node},
-   * and will append all the content text found beneath the DOM node to 
-   * the <code>StringBuffer</code>.
-   *
+   * This method takes a {@link StringBuffer} and a DOM {@link Node}, and will
+   * append all the content text found beneath the DOM node to the
+   * <code>StringBuffer</code>.
+   * 
    * <p>
-   *
-   * If <code>abortOnNestedAnchors</code> is true, DOM traversal will
-   * be aborted and the <code>StringBuffer</code> will not contain
-   * any text encountered after a nested anchor is found.
    * 
+   * If <code>abortOnNestedAnchors</code> is true, DOM traversal will be aborted
+   * and the <code>StringBuffer</code> will not contain any text encountered
+   * after a nested anchor is found.
+   * 
    * <p>
-   *
+   * 
    * @return true if nested anchors were found
    */
-  private boolean getText(StringBuffer sb, Node node, 
-                                      boolean abortOnNestedAnchors) {
+  private boolean getText(StringBuffer sb, Node node,
+      boolean abortOnNestedAnchors) {
     if (getTextHelper(sb, node, abortOnNestedAnchors, 0)) {
       return true;
-    } 
+    }
     return false;
   }
 
-
   /**
-   * This is a convinience method, equivalent to {@link
-   * #getText(StringBuffer,Node,boolean) getText(sb, node, false)}.
+   * This is a convinience method, equivalent to
+   * {@link #getText(StringBuffer,Node,boolean) getText(sb, node, false)}.
    * 
    */
   public void getText(StringBuffer sb, Node node) {
     getText(sb, node, false);
   }
 
-  // returns true if abortOnNestedAnchors is true and we find nested 
+  // returns true if abortOnNestedAnchors is true and we find nested
   // anchors
-  private boolean getTextHelper(StringBuffer sb, Node node, 
-                                             boolean abortOnNestedAnchors,
-                                             int anchorDepth) {
+  private boolean getTextHelper(StringBuffer sb, Node node,
+      boolean abortOnNestedAnchors, int anchorDepth) {
     boolean abort = false;
     NodeWalker walker = new NodeWalker(node);
-    
+
     while (walker.hasNext()) {
-    
+
       Node currentNode = walker.nextNode();
       String nodeName = currentNode.getNodeName();
       short nodeType = currentNode.getNodeType();
-      
+
       if ("script".equalsIgnoreCase(nodeName)) {
         walker.skipChildren();
       }
@@ -147,7 +145,7 @@
         if (anchorDepth > 1) {
           abort = true;
           break;
-        }        
+        }
       }
       if (nodeType == Node.COMMENT_NODE) {
         walker.skipChildren();
@@ -158,44 +156,45 @@
         text = text.replaceAll("\\s+", " ");
         text = text.trim();
         if (text.length() > 0) {
-          if (sb.length() > 0) sb.append(' ');
-        	sb.append(text);
+          if (sb.length() > 0)
+            sb.append(' ');
+          sb.append(text);
         }
       }
     }
-    
+
     return abort;
   }
 
   /**
-   * This method takes a {@link StringBuffer} and a DOM {@link Node},
-   * and will append the content text found beneath the first
-   * <code>title</code> node to the <code>StringBuffer</code>.
-   *
+   * This method takes a {@link StringBuffer} and a DOM {@link Node}, and will
+   * append the content text found beneath the first <code>title</code> node to
+   * the <code>StringBuffer</code>.
+   * 
    * @return true if a title node was found, false otherwise
    */
   public boolean getTitle(StringBuffer sb, Node node) {
-    
+
     NodeWalker walker = new NodeWalker(node);
-    
+
     while (walker.hasNext()) {
-  
+
       Node currentNode = walker.nextNode();
       String nodeName = currentNode.getNodeName();
       short nodeType = currentNode.getNodeType();
-      
+
       if ("body".equalsIgnoreCase(nodeName)) { // stop after HEAD
         return false;
       }
-  
+
       if (nodeType == Node.ELEMENT_NODE) {
         if ("title".equalsIgnoreCase(nodeName)) {
           getText(sb, currentNode);
           return true;
         }
       }
-    }      
-    
+    }
+
     return false;
   }
 
@@ -203,28 +202,29 @@
   URL getBase(Node node) {
 
     NodeWalker walker = new NodeWalker(node);
-    
+
     while (walker.hasNext()) {
-  
+
       Node currentNode = walker.nextNode();
       String nodeName = currentNode.getNodeName();
       short nodeType = currentNode.getNodeType();
-      
+
       // is this node a BASE tag?
       if (nodeType == Node.ELEMENT_NODE) {
-  
+
         if ("body".equalsIgnoreCase(nodeName)) { // stop after HEAD
           return null;
         }
-  
+
         if ("base".equalsIgnoreCase(nodeName)) {
           NamedNodeMap attrs = currentNode.getAttributes();
-          for (int i= 0; i < attrs.getLength(); i++ ) {
+          for (int i = 0; i < attrs.getLength(); i++) {
             Node attr = attrs.item(i);
             if ("href".equalsIgnoreCase(attr.getNodeName())) {
               try {
                 return new URL(attr.getNodeValue());
-              } catch (MalformedURLException e) {}
+              } catch (MalformedURLException e) {
+              }
             }
           }
         }
@@ -235,10 +235,9 @@
     return null;
   }
 
-
   private boolean hasOnlyWhiteSpace(Node node) {
-    String val= node.getNodeValue();
-    for (int i= 0; i < val.length(); i++) {
+    String val = node.getNodeValue();
+    for (int i = 0; i < val.length(); i++) {
       if (!Character.isWhitespace(val.charAt(i)))
         return false;
     }
@@ -247,50 +246,49 @@
 
   // this only covers a few cases of empty links that are symptomatic
   // of nekohtml's DOM-fixup process...
-  private boolean shouldThrowAwayLink(Node node, NodeList children, 
-                                              int childLen, LinkParams params) {
+  private boolean shouldThrowAwayLink(Node node, NodeList children,
+      int childLen, LinkParams params) {
     if (childLen == 0) {
-      // this has no inner structure 
-      if (params.childLen == 0) return false;
-      else return true;
-    } else if ((childLen == 1) 
-               && (children.item(0).getNodeType() == Node.ELEMENT_NODE)
-               && (params.elName.equalsIgnoreCase(children.item(0).getNodeName()))) { 
+      // this has no inner structure
+      if (params.childLen == 0)
+        return false;
+      else
+        return true;
+    } else if ((childLen == 1)
+        && (children.item(0).getNodeType() == Node.ELEMENT_NODE)
+        && (params.elName.equalsIgnoreCase(children.item(0).getNodeName()))) {
       // single nested link
       return true;
 
     } else if (childLen == 2) {
 
-      Node c0= children.item(0);
-      Node c1= children.item(1);
+      Node c0 = children.item(0);
+      Node c1 = children.item(1);
 
       if ((c0.getNodeType() == Node.ELEMENT_NODE)
           && (params.elName.equalsIgnoreCase(c0.getNodeName()))
-          && (c1.getNodeType() == Node.TEXT_NODE) 
-          && hasOnlyWhiteSpace(c1) ) {
+          && (c1.getNodeType() == Node.TEXT_NODE) && hasOnlyWhiteSpace(c1)) {
         // single link followed by whitespace node
         return true;
       }
 
       if ((c1.getNodeType() == Node.ELEMENT_NODE)
           && (params.elName.equalsIgnoreCase(c1.getNodeName()))
-          && (c0.getNodeType() == Node.TEXT_NODE) 
-          && hasOnlyWhiteSpace(c0) ) {
+          && (c0.getNodeType() == Node.TEXT_NODE) && hasOnlyWhiteSpace(c0)) {
         // whitespace node followed by single link
         return true;
       }
 
     } else if (childLen == 3) {
-      Node c0= children.item(0);
-      Node c1= children.item(1);
-      Node c2= children.item(2);
-      
+      Node c0 = children.item(0);
+      Node c1 = children.item(1);
+      Node c2 = children.item(2);
+
       if ((c1.getNodeType() == Node.ELEMENT_NODE)
           && (params.elName.equalsIgnoreCase(c1.getNodeName()))
-          && (c0.getNodeType() == Node.TEXT_NODE) 
-          && (c2.getNodeType() == Node.TEXT_NODE) 
-          && hasOnlyWhiteSpace(c0)
-          && hasOnlyWhiteSpace(c2) ) {
+          && (c0.getNodeType() == Node.TEXT_NODE)
+          && (c2.getNodeType() == Node.TEXT_NODE) && hasOnlyWhiteSpace(c0)
+          && hasOnlyWhiteSpace(c2)) {
         // single link surrounded by whitespace nodes
         return true;
       }
@@ -298,122 +296,121 @@
 
     return false;
   }
-  
+
   /**
-   * Handles cases where the url param information is encoded into the base
-   * url as opposed to the target.
+   * Handles cases where the url param information is encoded into the base url
+   * as opposed to the target.
    * <p>
-   * If the taget contains params (i.e. ';xxxx') information then the target 
+   * If the taget contains params (i.e. ';xxxx') information then the target
    * params information is assumed to be correct and any base params information
-   * is ignored.  If the base contains params information but the tareget does
+   * is ignored. If the base contains params information but the tareget does
    * not, then the params information is moved to the target allowing it to be
    * correctly determined by the java.net.URL class.
    * 
-   * @param base The base URL.
-   * @param target The target path from the base URL.
+   * @param base
+   *          The base URL.
+   * @param target
+   *          The target path from the base URL.
    * 
    * @return URL A URL with the params information correctly encoded.
    * 
-   * @throws MalformedURLException If the url is not a well formed URL.
+   * @throws MalformedURLException
+   *           If the url is not a well formed URL.
    */
-  private URL fixEmbeddedParams(URL base, String target) 
-    throws MalformedURLException{
-    
+  private URL fixEmbeddedParams(URL base, String target)
+      throws MalformedURLException {
+
     // the target contains params information or the base doesn't then no
     // conversion necessary, return regular URL
     if (target.indexOf(';') >= 0 || base.toString().indexOf(';') == -1) {
       return new URL(base, target);
     }
-    
+
     // get the base url and it params information
     String baseURL = base.toString();
     int startParams = baseURL.indexOf(';');
     String params = baseURL.substring(startParams);
-    
+
     // if the target has a query string then put the params information after
     // any path but before the query string, otherwise just append to the path
     int startQS = target.indexOf('?');
     if (startQS >= 0) {
-      target = target.substring(0, startQS) + params + 
-        target.substring(startQS);
-    }
-    else {
+      target = target.substring(0, startQS) + params
+          + target.substring(startQS);
+    } else {
       target += params;
     }
-    
+
     return new URL(base, target);
   }
 
   /**
-   * This method finds all anchors below the supplied DOM
-   * <code>node</code>, and creates appropriate {@link Outlink}
-   * records for each (relative to the supplied <code>base</code>
-   * URL), and adds them to the <code>outlinks</code> {@link
-   * ArrayList}.
-   *
+   * This method finds all anchors below the supplied DOM <code>node</code>, and
+   * creates appropriate {@link Outlink} records for each (relative to the
+   * supplied <code>base</code> URL), and adds them to the <code>outlinks</code>
+   * {@link ArrayList}.
+   * 
    * <p>
-   *
-   * Links without inner structure (tags, text, etc) are discarded, as
-   * are links which contain only single nested links and empty text
-   * nodes (this is a common DOM-fixup artifact, at least with
-   * nekohtml).
+   * 
+   * Links without inner structure (tags, text, etc) are discarded, as are links
+   * which contain only single nested links and empty text nodes (this is a
+   * common DOM-fixup artifact, at least with nekohtml).
    */
-  public void getOutlinks(URL base, ArrayList outlinks, 
-                                       Node node) {
-    
+  public void getOutlinks(URL base, ArrayList outlinks, Node node) {
+
     NodeWalker walker = new NodeWalker(node);
     while (walker.hasNext()) {
-      
+
       Node currentNode = walker.nextNode();
       String nodeName = currentNode.getNodeName();
-      short nodeType = currentNode.getNodeType();      
+      short nodeType = currentNode.getNodeType();
       NodeList children = currentNode.getChildNodes();
-      int childLen = (children != null) ? children.getLength() : 0; 
-      
+      int childLen = (children != null) ? children.getLength() : 0;
+
       if (nodeType == Node.ELEMENT_NODE) {
-        
+
         nodeName = nodeName.toLowerCase();
-        LinkParams params = (LinkParams)linkParams.get(nodeName);
+        LinkParams params = (LinkParams) linkParams.get(nodeName);
         if (params != null) {
           if (!shouldThrowAwayLink(currentNode, children, childLen, params)) {
-  
+
             StringBuffer linkText = new StringBuffer();
             getText(linkText, currentNode, true);
-  
+
             NamedNodeMap attrs = currentNode.getAttributes();
             String target = null;
             boolean noFollow = false;
             boolean post = false;
-            for (int i= 0; i < attrs.getLength(); i++ ) {
+            for (int i = 0; i < attrs.getLength(); i++) {
               Node attr = attrs.item(i);
               String attrName = attr.getNodeName();
               if (params.attrName.equalsIgnoreCase(attrName)) {
                 target = attr.getNodeValue();
-              } else if ("rel".equalsIgnoreCase(attrName) &&
-                         "nofollow".equalsIgnoreCase(attr.getNodeValue())) {
+              } else if ("rel".equalsIgnoreCase(attrName)
+                  && "nofollow".equalsIgnoreCase(attr.getNodeValue())) {
                 noFollow = true;
-              } else if ("method".equalsIgnoreCase(attrName) &&
-                         "post".equalsIgnoreCase(attr.getNodeValue())) {
+              } else if ("method".equalsIgnoreCase(attrName)
+                  && "post".equalsIgnoreCase(attr.getNodeValue())) {
                 post = true;
               }
             }
             if (target != null && !noFollow && !post)
               try {
-                
-                URL url = (base.toString().indexOf(';') > 0) ? 
-                  fixEmbeddedParams(base, target) :  new URL(base, target);
-                outlinks.add(new Outlink(url.toString(),
-                                         linkText.toString().trim()));
+
+                URL url = (base.toString().indexOf(';') > 0) ? fixEmbeddedParams(
+                    base, target) : new URL(base, target);
+                outlinks.add(new Outlink(url.toString(), linkText.toString()
+                    .trim()));
               } catch (MalformedURLException e) {
                 // don't care
               }
           }
           // this should not have any children, skip them
-          if (params.childLen == 0) continue;
+          if (params.childLen == 0)
+            continue;
         }
       }
     }
   }
 
 }
-
Index: src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/HTMLMetaProcessor.java
===================================================================
--- src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/HTMLMetaProcessor.java	(revision 1188268)
+++ src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/HTMLMetaProcessor.java	(working copy)
@@ -23,32 +23,31 @@
 import org.w3c.dom.*;
 
 /**
- * Class for parsing META Directives from DOM trees.  This class
- * handles specifically Robots META directives (all, none, nofollow,
- * noindex), finding BASE HREF tags, and HTTP-EQUIV no-cache
- * instructions. All meta directives are stored in a HTMLMetaTags instance.
+ * Class for parsing META Directives from DOM trees. This class handles
+ * specifically Robots META directives (all, none, nofollow, noindex), finding
+ * BASE HREF tags, and HTTP-EQUIV no-cache instructions. All meta directives are
+ * stored in a HTMLMetaTags instance.
  */
 public class HTMLMetaProcessor {
 
   /**
-   * Utility class with indicators for the robots directives "noindex"
-   * and "nofollow", and HTTP-EQUIV/no-cache
+   * Utility class with indicators for the robots directives "noindex" and
+   * "nofollow", and HTTP-EQUIV/no-cache
    */
-  
+
   /**
-   * Sets the indicators in <code>robotsMeta</code> to appropriate
-   * values, based on any META tags found under the given
-   * <code>node</code>.
+   * Sets the indicators in <code>robotsMeta</code> to appropriate values, based
+   * on any META tags found under the given <code>node</code>.
    */
-  public static final void getMetaTags (
-    HTMLMetaTags metaTags, Node node, URL currURL) {
+  public static final void getMetaTags(HTMLMetaTags metaTags, Node node,
+      URL currURL) {
 
     metaTags.reset();
     getMetaTagsHelper(metaTags, node, currURL);
   }
 
-  private static final void getMetaTagsHelper(
-    HTMLMetaTags metaTags, Node node, URL currURL) {
+  private static final void getMetaTagsHelper(HTMLMetaTags metaTags, Node node,
+      URL currURL) {
 
     if (node.getNodeType() == Node.ELEMENT_NODE) {
 
@@ -63,7 +62,7 @@
         Node equivNode = null;
         Node contentNode = null;
         // Retrieves name, http-equiv and content attribues
-        for (int i=0; i<attrs.getLength(); i++) {
+        for (int i = 0; i < attrs.getLength(); i++) {
           Node attr = attrs.item(i);
           String attrName = attr.getNodeName().toLowerCase();
           if (attrName.equals("name")) {
@@ -74,44 +73,44 @@
             contentNode = attr;
           }
         }
-        
+
         if (nameNode != null) {
           if (contentNode != null) {
             String name = nameNode.getNodeValue().toLowerCase();
-            metaTags.getGeneralTags().setProperty(name, contentNode.getNodeValue());
+            metaTags.getGeneralTags().setProperty(name,
+                contentNode.getNodeValue());
             if ("robots".equals(name)) {
-  
+
               if (contentNode != null) {
-                String directives = 
-                  contentNode.getNodeValue().toLowerCase();
+                String directives = contentNode.getNodeValue().toLowerCase();
                 int index = directives.indexOf("none");
-  
+
                 if (index >= 0) {
                   metaTags.setNoIndex();
                   metaTags.setNoFollow();
                 }
-  
+
                 index = directives.indexOf("all");
                 if (index >= 0) {
                   // do nothing...
                 }
-  
+
                 index = directives.indexOf("noindex");
                 if (index >= 0) {
                   metaTags.setNoIndex();
                 }
-  
+
                 index = directives.indexOf("nofollow");
                 if (index >= 0) {
                   metaTags.setNoFollow();
                 }
-                
+
                 index = directives.indexOf("noarchive");
                 if (index >= 0) {
                   metaTags.setNoCache();
                 }
-              } 
-  
+              }
+
             } // end if (name == robots)
           }
         }
@@ -124,14 +123,15 @@
             if ("pragma".equals(name)) {
               content = content.toLowerCase();
               int index = content.indexOf("no-cache");
-              if (index >= 0) 
+              if (index >= 0)
                 metaTags.setNoCache();
             } else if ("refresh".equals(name)) {
               int idx = content.indexOf(';');
               String time = null;
               if (idx == -1) { // just the refresh time
                 time = content;
-              } else time = content.substring(0, idx);
+              } else
+                time = content.substring(0, idx);
               try {
                 metaTags.setRefreshTime(Integer.parseInt(time));
                 // skip this if we couldn't parse the time
@@ -142,9 +142,11 @@
               URL refreshUrl = null;
               if (metaTags.getRefresh() && idx != -1) { // set the URL
                 idx = content.toLowerCase().indexOf("url=");
-                if (idx == -1) { // assume a mis-formatted entry with just the url
+                if (idx == -1) { // assume a mis-formatted entry with just the
+                                 // url
                   idx = content.indexOf(';') + 1;
-                } else idx += 4;
+                } else
+                  idx += 4;
                 if (idx != -1) {
                   String url = content.substring(idx);
                   try {
@@ -187,13 +189,13 @@
           try {
             if (currURL == null)
               url = new URL(urlString);
-            else 
+            else
               url = new URL(currURL, urlString);
           } catch (Exception e) {
             ;
           }
 
-          if (url != null) 
+          if (url != null)
             metaTags.setBaseHref(url);
         }
 
Index: src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/TikaConfig.java
===================================================================
--- src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/TikaConfig.java	(revision 1188268)
+++ src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/TikaConfig.java	(working copy)
@@ -47,204 +47,195 @@
  */
 public class TikaConfig {
 
-    private final Map<String, Parser> parsers = new HashMap<String, Parser>();
+  private final Map<String, Parser> parsers = new HashMap<String, Parser>();
 
-    private final MimeTypes mimeTypes;
+  private final MimeTypes mimeTypes;
 
-    public TikaConfig(String file) throws TikaException, IOException,
-	    SAXException {
-	this(new File(file));
-    }
+  public TikaConfig(String file) throws TikaException, IOException,
+      SAXException {
+    this(new File(file));
+  }
 
-    public TikaConfig(File file) throws TikaException, IOException,
-	    SAXException {
-	this(getBuilder().parse(file));
-    }
+  public TikaConfig(File file) throws TikaException, IOException, SAXException {
+    this(getBuilder().parse(file));
+  }
 
-    public TikaConfig(URL url) throws TikaException, IOException, SAXException {
-	this(getBuilder().parse(url.toString()));
-    }
+  public TikaConfig(URL url) throws TikaException, IOException, SAXException {
+    this(getBuilder().parse(url.toString()));
+  }
 
-    public TikaConfig(InputStream stream) throws TikaException, IOException,
-	    SAXException {
-	this(getBuilder().parse(stream));
-    }
+  public TikaConfig(InputStream stream) throws TikaException, IOException,
+      SAXException {
+    this(getBuilder().parse(stream));
+  }
 
-    /**
-     * @deprecated This method will be removed in Apache Tika 1.0
-     * @see <a
-     *      href="https://issues.apache.org/jira/browse/TIKA-275">TIKA-275</a>
-     */
-    public TikaConfig(InputStream stream, Parser delegate)
-	    throws TikaException, IOException, SAXException {
-	this(stream);
-    }
+  /**
+   * @deprecated This method will be removed in Apache Tika 1.0
+   * @see <a href="https://issues.apache.org/jira/browse/TIKA-275">TIKA-275</a>
+   */
+  public TikaConfig(InputStream stream, Parser delegate) throws TikaException,
+      IOException, SAXException {
+    this(stream);
+  }
 
-    public TikaConfig(Document document) throws TikaException, IOException {
-	this(document.getDocumentElement());
-    }
+  public TikaConfig(Document document) throws TikaException, IOException {
+    this(document.getDocumentElement());
+  }
 
-    /**
-     * @deprecated This method will be removed in Apache Tika 1.0
-     * @see <a
-     *      href="https://issues.apache.org/jira/browse/TIKA-275">TIKA-275</a>
-     */
-    public TikaConfig(Document document, Parser delegate) throws TikaException,
-	    IOException {
-	this(document);
+  /**
+   * @deprecated This method will be removed in Apache Tika 1.0
+   * @see <a href="https://issues.apache.org/jira/browse/TIKA-275">TIKA-275</a>
+   */
+  public TikaConfig(Document document, Parser delegate) throws TikaException,
+      IOException {
+    this(document);
+  }
+
+  public TikaConfig(Element element) throws TikaException, IOException {
+    Element mtr = getChild(element, "mimeTypeRepository");
+    if (mtr != null && mtr.hasAttribute("resource")) {
+      mimeTypes = MimeTypesFactory.create(mtr.getAttribute("resource"));
+    } else {
+      mimeTypes = MimeTypesFactory.create("tika-mimetypes.xml");
     }
 
-    public TikaConfig(Element element) throws TikaException, IOException {
-	Element mtr = getChild(element, "mimeTypeRepository");
-	if (mtr != null && mtr.hasAttribute("resource")) {
-	    mimeTypes = MimeTypesFactory.create(mtr.getAttribute("resource"));
-	} else {
-	    mimeTypes = MimeTypesFactory.create("tika-mimetypes.xml");
-	}
+    NodeList nodes = element.getElementsByTagName("parser");
+    for (int i = 0; i < nodes.getLength(); i++) {
+      Element node = (Element) nodes.item(i);
+      String name = node.getAttribute("class");
 
-	NodeList nodes = element.getElementsByTagName("parser");
-	for (int i = 0; i < nodes.getLength(); i++) {
-	    Element node = (Element) nodes.item(i);
-	    String name = node.getAttribute("class");
+      try {
+        Class<?> parserClass = Class.forName(name);
+        Object instance = parserClass.newInstance();
+        if (!(instance instanceof Parser)) {
+          throw new TikaException("Configured class is not a Tika Parser: "
+              + name);
+        }
+        Parser parser = (Parser) instance;
 
-	    try {
-		Class<?> parserClass = Class.forName(name);
-		Object instance = parserClass.newInstance();
-		if (!(instance instanceof Parser)) {
-		    throw new TikaException(
-			    "Configured class is not a Tika Parser: " + name);
-		}
-		Parser parser = (Parser) instance;
-
-		NodeList mimes = node.getElementsByTagName("mime");
-		if (mimes.getLength() > 0) {
-		    for (int j = 0; j < mimes.getLength(); j++) {
-			parsers.put(getText(mimes.item(j)).trim(), parser);
-		    }
-		} else {
-		    ParseContext context = new ParseContext();
-		    for (MediaType type : parser.getSupportedTypes(context)) {
-			parsers.put(type.toString(), parser);
-		    }
-		}
-	    } catch (ClassNotFoundException e) {
-		throw new TikaException("Configured parser class not found: "
-			+ name, e);
-	    } catch (IllegalAccessException e) {
-		throw new TikaException("Unable to access a parser class: "
-			+ name, e);
-	    } catch (InstantiationException e) {
-		throw new TikaException(
-			"Unable to instantiate a parser class: " + name, e);
-	    }
-	}
+        NodeList mimes = node.getElementsByTagName("mime");
+        if (mimes.getLength() > 0) {
+          for (int j = 0; j < mimes.getLength(); j++) {
+            parsers.put(getText(mimes.item(j)).trim(), parser);
+          }
+        } else {
+          ParseContext context = new ParseContext();
+          for (MediaType type : parser.getSupportedTypes(context)) {
+            parsers.put(type.toString(), parser);
+          }
+        }
+      } catch (ClassNotFoundException e) {
+        throw new TikaException("Configured parser class not found: " + name, e);
+      } catch (IllegalAccessException e) {
+        throw new TikaException("Unable to access a parser class: " + name, e);
+      } catch (InstantiationException e) {
+        throw new TikaException(
+            "Unable to instantiate a parser class: " + name, e);
+      }
     }
+  }
 
-    public TikaConfig() throws MimeTypeException, IOException {
-	ParseContext context = new ParseContext();
-	Iterator<Parser> iterator = ServiceRegistry.lookupProviders(
-		Parser.class, this.getClass().getClassLoader());
-	while (iterator.hasNext()) {
-	    Parser parser = iterator.next();
-	    for (MediaType type : parser.getSupportedTypes(context)) {
-		parsers.put(type.toString(), parser);
-	    }
-	}
-	mimeTypes = MimeTypesFactory.create("tika-mimetypes.xml");
+  public TikaConfig() throws MimeTypeException, IOException {
+    ParseContext context = new ParseContext();
+    Iterator<Parser> iterator = ServiceRegistry.lookupProviders(Parser.class,
+        this.getClass().getClassLoader());
+    while (iterator.hasNext()) {
+      Parser parser = iterator.next();
+      for (MediaType type : parser.getSupportedTypes(context)) {
+        parsers.put(type.toString(), parser);
+      }
     }
+    mimeTypes = MimeTypesFactory.create("tika-mimetypes.xml");
+  }
 
-    /**
-     * @deprecated This method will be removed in Apache Tika 1.0
-     * @see <a
-     *      href="https://issues.apache.org/jira/browse/TIKA-275">TIKA-275</a>
-     */
-    public TikaConfig(Element element, Parser delegate) throws TikaException,
-	    IOException {
-	this(element);
-    }
+  /**
+   * @deprecated This method will be removed in Apache Tika 1.0
+   * @see <a href="https://issues.apache.org/jira/browse/TIKA-275">TIKA-275</a>
+   */
+  public TikaConfig(Element element, Parser delegate) throws TikaException,
+      IOException {
+    this(element);
+  }
 
-    private String getText(Node node) {
-	if (node.getNodeType() == Node.TEXT_NODE) {
-	    return node.getNodeValue();
-	} else if (node.getNodeType() == Node.ELEMENT_NODE) {
-	    StringBuilder builder = new StringBuilder();
-	    NodeList list = node.getChildNodes();
-	    for (int i = 0; i < list.getLength(); i++) {
-		builder.append(getText(list.item(i)));
-	    }
-	    return builder.toString();
-	} else {
-	    return "";
-	}
+  private String getText(Node node) {
+    if (node.getNodeType() == Node.TEXT_NODE) {
+      return node.getNodeValue();
+    } else if (node.getNodeType() == Node.ELEMENT_NODE) {
+      StringBuilder builder = new StringBuilder();
+      NodeList list = node.getChildNodes();
+      for (int i = 0; i < list.getLength(); i++) {
+        builder.append(getText(list.item(i)));
+      }
+      return builder.toString();
+    } else {
+      return "";
     }
+  }
 
-    /**
-     * Returns the parser instance configured for the given MIME type. Returns
-     * <code>null</code> if the given MIME type is unknown.
-     * 
-     * @param mimeType
-     *            MIME type
-     * @return configured Parser instance, or <code>null</code>
-     */
-    public Parser getParser(String mimeType) {
-	return parsers.get(mimeType);
-    }
+  /**
+   * Returns the parser instance configured for the given MIME type. Returns
+   * <code>null</code> if the given MIME type is unknown.
+   * 
+   * @param mimeType
+   *          MIME type
+   * @return configured Parser instance, or <code>null</code>
+   */
+  public Parser getParser(String mimeType) {
+    return parsers.get(mimeType);
+  }
 
-    public Map<String, Parser> getParsers() {
-	return parsers;
-    }
+  public Map<String, Parser> getParsers() {
+    return parsers;
+  }
 
-    public MimeTypes getMimeRepository() {
-	return mimeTypes;
-    }
+  public MimeTypes getMimeRepository() {
+    return mimeTypes;
+  }
 
-    /**
-     * Provides a default configuration (TikaConfig). Currently creates a new
-     * instance each time it's called; we may be able to have it return a shared
-     * instance once it is completely immutable.
-     * 
-     * @return default configuration
-     */
-    public static TikaConfig getDefaultConfig() {
-	try {
-	    return new TikaConfig();
-	} catch (IOException e) {
-	    throw new RuntimeException("Unable to read default configuration",
-		    e);
-	} catch (TikaException e) {
-	    throw new RuntimeException(
-		    "Unable to access default configuration", e);
-	}
+  /**
+   * Provides a default configuration (TikaConfig). Currently creates a new
+   * instance each time it's called; we may be able to have it return a shared
+   * instance once it is completely immutable.
+   * 
+   * @return default configuration
+   */
+  public static TikaConfig getDefaultConfig() {
+    try {
+      return new TikaConfig();
+    } catch (IOException e) {
+      throw new RuntimeException("Unable to read default configuration", e);
+    } catch (TikaException e) {
+      throw new RuntimeException("Unable to access default configuration", e);
     }
+  }
 
-    /**
-     * @deprecated This method will be removed in Apache Tika 1.0
-     * @see <a
-     *      href="https://issues.apache.org/jira/browse/TIKA-275">TIKA-275</a>
-     */
-    public static TikaConfig getDefaultConfig(Parser delegate)
-	    throws TikaException {
-	return getDefaultConfig();
-    }
+  /**
+   * @deprecated This method will be removed in Apache Tika 1.0
+   * @see <a href="https://issues.apache.org/jira/browse/TIKA-275">TIKA-275</a>
+   */
+  public static TikaConfig getDefaultConfig(Parser delegate)
+      throws TikaException {
+    return getDefaultConfig();
+  }
 
-    private static DocumentBuilder getBuilder() throws TikaException {
-	try {
-	    return DocumentBuilderFactory.newInstance().newDocumentBuilder();
-	} catch (ParserConfigurationException e) {
-	    throw new TikaException("XML parser not available", e);
-	}
+  private static DocumentBuilder getBuilder() throws TikaException {
+    try {
+      return DocumentBuilderFactory.newInstance().newDocumentBuilder();
+    } catch (ParserConfigurationException e) {
+      throw new TikaException("XML parser not available", e);
     }
+  }
 
-    private static Element getChild(Element element, String name) {
-	Node child = element.getFirstChild();
-	while (child != null) {
-	    if (child.getNodeType() == Node.ELEMENT_NODE
-		    && name.equals(child.getNodeName())) {
-		return (Element) child;
-	    }
-	    child = child.getNextSibling();
-	}
-	return null;
+  private static Element getChild(Element element, String name) {
+    Node child = element.getFirstChild();
+    while (child != null) {
+      if (child.getNodeType() == Node.ELEMENT_NODE
+          && name.equals(child.getNodeName())) {
+        return (Element) child;
+      }
+      child = child.getNextSibling();
     }
+    return null;
+  }
 
 }
\ No newline at end of file
Index: src/plugin/lib-regex-filter/src/test/org/apache/nutch/urlfilter/api/RegexURLFilterBaseTest.java
===================================================================
--- src/plugin/lib-regex-filter/src/test/org/apache/nutch/urlfilter/api/RegexURLFilterBaseTest.java	(revision 1188268)
+++ src/plugin/lib-regex-filter/src/test/org/apache/nutch/urlfilter/api/RegexURLFilterBaseTest.java	(working copy)
@@ -34,60 +34,59 @@
 // Nutch imports
 import org.apache.nutch.net.URLFilter;
 
-
 /**
  * JUnit based test of class <code>RegexURLFilterBase</code>.
- *
+ * 
  * @author J&eacute;r&ocirc;me Charron
  */
 public abstract class RegexURLFilterBaseTest extends TestCase {
-  
+
   /** My logger */
-  protected static final Logger LOG = LoggerFactory.getLogger(RegexURLFilterBaseTest.class);  
+  protected static final Logger LOG = LoggerFactory
+      .getLogger(RegexURLFilterBaseTest.class);
 
-  private final static String SEPARATOR = System.getProperty("file.separator");  
+  private final static String SEPARATOR = System.getProperty("file.separator");
   private final static String SAMPLES = System.getProperty("test.data", ".");
-  
+
   public RegexURLFilterBaseTest(String testName) {
     super(testName);
   }
-  
+
   protected abstract URLFilter getURLFilter(Reader rules);
 
   protected void bench(int loops, String file) {
     try {
-      bench(loops,
-            new FileReader(SAMPLES + SEPARATOR + file + ".rules"),
-            new FileReader(SAMPLES + SEPARATOR + file + ".urls"));
+      bench(loops, new FileReader(SAMPLES + SEPARATOR + file + ".rules"),
+          new FileReader(SAMPLES + SEPARATOR + file + ".urls"));
     } catch (Exception e) {
       fail(e.toString());
     }
   }
-  
+
   protected void bench(int loops, Reader rules, Reader urls) {
     long start = System.currentTimeMillis();
     try {
       URLFilter filter = getURLFilter(rules);
       FilteredURL[] expected = readURLFile(urls);
-      for (int i=0; i<loops; i++) {
+      for (int i = 0; i < loops; i++) {
         test(filter, expected);
       }
     } catch (Exception e) {
       fail(e.toString());
     }
-    LOG.info("bench time (" + loops + ") " +
-             (System.currentTimeMillis()-start) + "ms");
+    LOG.info("bench time (" + loops + ") "
+        + (System.currentTimeMillis() - start) + "ms");
   }
-  
+
   protected void test(String file) {
     try {
       test(new FileReader(SAMPLES + SEPARATOR + file + ".rules"),
-           new FileReader(SAMPLES + SEPARATOR + file + ".urls"));
+          new FileReader(SAMPLES + SEPARATOR + file + ".urls"));
     } catch (Exception e) {
       fail(e.toString());
     }
   }
-  
+
   protected void test(Reader rules, Reader urls) {
     try {
       test(getURLFilter(rules), readURLFile(urls));
@@ -95,9 +94,9 @@
       fail(e.toString());
     }
   }
-  
+
   protected void test(URLFilter filter, FilteredURL[] expected) {
-    for (int i=0; i<expected.length; i++) {
+    for (int i = 0; i < expected.length; i++) {
       String result = filter.filter(expected[i].url);
       if (result != null) {
         assertTrue(expected[i].url, expected[i].sign);
@@ -106,37 +105,37 @@
       }
     }
   }
-  
+
   private static FilteredURL[] readURLFile(Reader reader) throws IOException {
     BufferedReader in = new BufferedReader(reader);
     List list = new ArrayList();
     String line;
-    while((line=in.readLine()) != null) {
+    while ((line = in.readLine()) != null) {
       if (line.length() != 0) {
         list.add(new FilteredURL(line));
       }
     }
     return (FilteredURL[]) list.toArray(new FilteredURL[list.size()]);
   }
-    
+
   private static class FilteredURL {
-  
+
     boolean sign;
     String url;
 
     FilteredURL(String line) {
       switch (line.charAt(0)) {
-      case '+' : 
+      case '+':
         sign = true;
         break;
-      case '-' :
+      case '-':
         sign = false;
         break;
-      default :
+      default:
         // Simply ignore...
       }
       url = line.substring(1);
     }
   }
-  
+
 }
Index: src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/RegexRule.java
===================================================================
--- src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/RegexRule.java	(revision 1188268)
+++ src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/RegexRule.java	(working copy)
@@ -18,10 +18,9 @@
 
 import org.apache.nutch.net.*;
 
-
 /**
  * A generic regular expression rule.
- *
+ * 
  * @author J&eacute;r&ocirc;me Charron
  */
 public abstract class RegexRule {
@@ -31,13 +30,15 @@
 
   /**
    * Constructs a new regular expression rule.
-   *
-   * @param sign specifies if this rule must filter-in or filter-out.
-   *        A <code>true</code> value means that any url matching this rule
-   *        must be accepted, a <code>false</code> value means that any url
-   *        matching this rule must be rejected.
-   * @param regex is the regular expression used for matching (see
-   *        {@link #match(String)} method).
+   * 
+   * @param sign
+   *          specifies if this rule must filter-in or filter-out. A
+   *          <code>true</code> value means that any url matching this rule must
+   *          be accepted, a <code>false</code> value means that any url
+   *          matching this rule must be rejected.
+   * @param regex
+   *          is the regular expression used for matching (see
+   *          {@link #match(String)} method).
    */
   protected RegexRule(boolean sign, String regex) {
     this.sign = sign;
@@ -46,19 +47,22 @@
 
   /**
    * Return if this rule is used for filtering-in or out.
-   *
+   * 
    * @return <code>true</code> if any url matching this rule must be accepted,
    *         otherwise <code>false</code>.
    */
-  protected boolean accept() { return sign; }
-  
+  protected boolean accept() {
+    return sign;
+  }
+
   /**
    * Checks if a url matches this rule.
-   * @param url is the url to check.
-   * @return <code>true</code> if the specified url matches this rule,
-   *         otherwise <code>false</code>.
+   * 
+   * @param url
+   *          is the url to check.
+   * @return <code>true</code> if the specified url matches this rule, otherwise
+   *         <code>false</code>.
    */
   protected abstract boolean match(String url);
 
 }
-
Index: src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/RegexURLFilterBase.java
===================================================================
--- src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/RegexURLFilterBase.java	(revision 1188268)
+++ src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/RegexURLFilterBase.java	(working copy)
@@ -37,28 +37,32 @@
 // Nutch imports
 import org.apache.nutch.net.*;
 
-
 /**
- * Generic {@link org.apache.nutch.net.URLFilter URL filter} based on
- * regular expressions.
- *
- * <p>The regular expressions rules are expressed in a file. The file of rules
- * is provided by each implementation using the
- * {@link #getRulesFile(Configuration)} method.</p>
+ * Generic {@link org.apache.nutch.net.URLFilter URL filter} based on regular
+ * expressions.
  * 
- * <p>The format of this file is made of many rules (one per line):<br/>
+ * <p>
+ * The regular expressions rules are expressed in a file. The file of rules is
+ * provided by each implementation using the
+ * {@link #getRulesFile(Configuration)} method.
+ * </p>
+ * 
+ * <p>
+ * The format of this file is made of many rules (one per line):<br/>
  * <code>
  * [+-]&lt;regex&gt;
  * </code><br/>
- * where plus (<code>+</code>)means go ahead and index it and minus 
- * (<code>-</code>)means no.</p>
- *
+ * where plus (<code>+</code>)means go ahead and index it and minus (
+ * <code>-</code>)means no.
+ * </p>
+ * 
  * @author J&eacute;r&ocirc;me Charron
  */
 public abstract class RegexURLFilterBase implements URLFilter {
 
   /** My logger */
-  private final static Logger LOG = LoggerFactory.getLogger(RegexURLFilterBase.class);
+  private final static Logger LOG = LoggerFactory
+      .getLogger(RegexURLFilterBase.class);
 
   /** An array of applicable rules */
   private RegexRule[] rules;
@@ -66,24 +70,28 @@
   /** The current configuration */
   private Configuration conf;
 
-
   /**
    * Constructs a new empty RegexURLFilterBase
    */
-  public RegexURLFilterBase() { }
+  public RegexURLFilterBase() {
+  }
 
   /**
    * Constructs a new RegexURLFilter and init it with a file of rules.
-   * @param filename is the name of rules file.
+   * 
+   * @param filename
+   *          is the name of rules file.
    */
-  public RegexURLFilterBase(File filename)
-    throws IOException, IllegalArgumentException {
+  public RegexURLFilterBase(File filename) throws IOException,
+      IllegalArgumentException {
     this(new FileReader(filename));
   }
-  
+
   /**
    * Constructs a new RegexURLFilter and inits it with a list of rules.
-   * @param rules string with a list of rules, one rule per line
+   * 
+   * @param rules
+   *          string with a list of rules, one rule per line
    * @throws IOException
    * @throws IllegalArgumentException
    */
@@ -94,68 +102,82 @@
 
   /**
    * Constructs a new RegexURLFilter and init it with a Reader of rules.
-   * @param reader is a reader of rules.
+   * 
+   * @param reader
+   *          is a reader of rules.
    */
-  protected RegexURLFilterBase(Reader reader)
-    throws IOException, IllegalArgumentException {
+  protected RegexURLFilterBase(Reader reader) throws IOException,
+      IllegalArgumentException {
     rules = readRules(reader);
   }
-  
+
   /**
    * Creates a new {@link RegexRule}.
-   * @param sign of the regular expression.
-   *        A <code>true</code> value means that any URL matching this rule
-   *        must be included, whereas a <code>false</code>
-   *        value means that any URL matching this rule must be excluded.
-   * @param regex is the regular expression associated to this rule.
+   * 
+   * @param sign
+   *          of the regular expression. A <code>true</code> value means that
+   *          any URL matching this rule must be included, whereas a
+   *          <code>false</code> value means that any URL matching this rule
+   *          must be excluded.
+   * @param regex
+   *          is the regular expression associated to this rule.
    */
   protected abstract RegexRule createRule(boolean sign, String regex);
-  
+
   /**
-   * Returns the name of the file of rules to use for
-   * a particular implementation.
-   * @param conf is the current configuration.
+   * Returns the name of the file of rules to use for a particular
+   * implementation.
+   * 
+   * @param conf
+   *          is the current configuration.
    * @return the name of the resource containing the rules to use.
    */
-  protected abstract Reader getRulesReader(Configuration conf) throws IOException;
-  
-  
-  /* -------------------------- *
-   * <implementation:URLFilter> *
-   * -------------------------- */
-  
+  protected abstract Reader getRulesReader(Configuration conf)
+      throws IOException;
+
+  /*
+   * -------------------------- * <implementation:URLFilter> *
+   * --------------------------
+   */
+
   // Inherited Javadoc
   public synchronized String filter(String url) {
-    for (int i=0; i<rules.length; i++) {
+    for (int i = 0; i < rules.length; i++) {
       if (rules[i].match(url)) {
         return rules[i].accept() ? url : null;
       }
-    };
+    }
+    ;
     return null;
   }
 
-  /* --------------------------- *
-   * </implementation:URLFilter> *
-   * --------------------------- */
-  
-  
-  /* ----------------------------- *
-   * <implementation:Configurable> *
-   * ----------------------------- */
-  
+  /*
+   * --------------------------- * </implementation:URLFilter> *
+   * ---------------------------
+   */
+
+  /*
+   * ----------------------------- * <implementation:Configurable> *
+   * -----------------------------
+   */
+
   public void setConf(Configuration conf) {
     this.conf = conf;
     Reader reader = null;
     try {
       reader = getRulesReader(conf);
     } catch (Exception e) {
-      if (LOG.isErrorEnabled()) { LOG.error(e.getMessage()); }
-      throw new RuntimeException(e.getMessage(), e);      
+      if (LOG.isErrorEnabled()) {
+        LOG.error(e.getMessage());
+      }
+      throw new RuntimeException(e.getMessage(), e);
     }
     try {
       rules = readRules(reader);
     } catch (IOException e) {
-      if (LOG.isErrorEnabled()) { LOG.error(e.getMessage()); }
+      if (LOG.isErrorEnabled()) {
+        LOG.error(e.getMessage());
+      }
       throw new RuntimeException(e.getMessage(), e);
     }
   }
@@ -163,45 +185,51 @@
   public Configuration getConf() {
     return this.conf;
   }
-  
-  /* ------------------------------ *
-   * </implementation:Configurable> *
-   * ------------------------------ */
-  
 
+  /*
+   * ------------------------------ * </implementation:Configurable> *
+   * ------------------------------
+   */
+
   /**
    * Read the specified file of rules.
-   * @param reader is a reader of regular expressions rules.
+   * 
+   * @param reader
+   *          is a reader of regular expressions rules.
    * @return the corresponding {@RegexRule rules}.
    */
-  private RegexRule[] readRules(Reader reader)
-    throws IOException, IllegalArgumentException {
+  private RegexRule[] readRules(Reader reader) throws IOException,
+      IllegalArgumentException {
 
     BufferedReader in = new BufferedReader(reader);
     List rules = new ArrayList();
     String line;
-       
-    while((line=in.readLine())!=null) {
+
+    while ((line = in.readLine()) != null) {
       if (line.length() == 0) {
         continue;
       }
-      char first=line.charAt(0);
-      boolean sign=false;
+      char first = line.charAt(0);
+      boolean sign = false;
       switch (first) {
-      case '+' : 
-        sign=true;
+      case '+':
+        sign = true;
         break;
-      case '-' :
-        sign=false;
+      case '-':
+        sign = false;
         break;
-      case ' ' : case '\n' : case '#' :           // skip blank & comment lines
+      case ' ':
+      case '\n':
+      case '#': // skip blank & comment lines
         continue;
-      default :
-        throw new IOException("Invalid first character: "+line);
+      default:
+        throw new IOException("Invalid first character: " + line);
       }
 
       String regex = line.substring(1);
-      if (LOG.isTraceEnabled()) { LOG.trace("Adding rule [" + regex + "]"); }
+      if (LOG.isTraceEnabled()) {
+        LOG.trace("Adding rule [" + regex + "]");
+      }
       RegexRule rule = createRule(sign, regex);
       rules.add(rule);
     }
@@ -210,18 +238,20 @@
 
   /**
    * Filter the standard input using a RegexURLFilterBase.
-   * @param filter is the RegexURLFilterBase to use for filtering the
-   *        standard input.
-   * @param args some optional parameters (not used).
+   * 
+   * @param filter
+   *          is the RegexURLFilterBase to use for filtering the standard input.
+   * @param args
+   *          some optional parameters (not used).
    */
   public static void main(RegexURLFilterBase filter, String args[])
-    throws IOException, IllegalArgumentException {
+      throws IOException, IllegalArgumentException {
 
     BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
     String line;
-    while((line=in.readLine())!=null) {
+    while ((line = in.readLine()) != null) {
       String out = filter.filter(line);
-      if (out!=null) {
+      if (out != null) {
         System.out.print("+");
         System.out.println(out);
       } else {
Index: src/plugin/feed/src/test/org/apache/nutch/parse/feed/TestFeedParser.java
===================================================================
--- src/plugin/feed/src/test/org/apache/nutch/parse/feed/TestFeedParser.java	(revision 1188268)
+++ src/plugin/feed/src/test/org/apache/nutch/parse/feed/TestFeedParser.java	(working copy)
@@ -44,7 +44,7 @@
  * 
  * @author mattmann
  * 
- * Test Suite for the {@link FeedParser}.
+ *         Test Suite for the {@link FeedParser}.
  * 
  */
 public class TestFeedParser extends TestCase {
@@ -106,18 +106,17 @@
 
       assertEquals(3, parseResult.size());
 
-      boolean hasLink1 = false, hasLink2 = false, hasLink3=false;
+      boolean hasLink1 = false, hasLink2 = false, hasLink3 = false;
 
       for (Iterator<Map.Entry<Text, Parse>> j = parseResult.iterator(); j
           .hasNext();) {
         Map.Entry<Text, Parse> entry = j.next();
-        if (entry.getKey().toString().equals(
-            "http://www-scf.usc.edu/~mattmann/")) {
+        if (entry.getKey().toString()
+            .equals("http://www-scf.usc.edu/~mattmann/")) {
           hasLink1 = true;
         } else if (entry.getKey().toString().equals("http://www.nutch.org/")) {
           hasLink2 = true;
-        }
-        else if(entry.getKey().toString().equals(urlString)){
+        } else if (entry.getKey().toString().equals(urlString)) {
           hasLink3 = true;
         }
 
Index: src/plugin/feed/src/java/org/apache/nutch/parse/feed/FeedParser.java
===================================================================
--- src/plugin/feed/src/java/org/apache/nutch/parse/feed/FeedParser.java	(revision 1188268)
+++ src/plugin/feed/src/java/org/apache/nutch/parse/feed/FeedParser.java	(working copy)
@@ -65,10 +65,10 @@
  * @author mattmann
  * @since NUTCH-444
  * 
- * <p>
- * A new RSS/ATOM Feed{@link Parser} that rapidly parses all referenced links
- * and content present in the feed.
- * </p>
+ *        <p>
+ *        A new RSS/ATOM Feed{@link Parser} that rapidly parses all referenced
+ *        links and content present in the feed.
+ *        </p>
  * 
  */
 public class FeedParser implements Parser {
@@ -99,8 +99,8 @@
    *          A {@link Content} object representing the feed that is being
    *          parsed by this {@link Parser}.
    * 
-   * @return A {@link ParseResult} containing all {@link Parse}d feeds that
-   *         were present in the feed file that this {@link Parser} dealt with.
+   * @return A {@link ParseResult} containing all {@link Parse}d feeds that were
+   *         present in the feed file that this {@link Parser} dealt with.
    * 
    */
   public ParseResult getParse(Content content) {
@@ -111,8 +111,8 @@
     detector.autoDetectClues(content, true);
     String encoding = detector.guessEncoding(content, defaultEncoding);
     try {
-      InputSource input = new InputSource(new ByteArrayInputStream(content
-          .getContent()));
+      InputSource input = new InputSource(new ByteArrayInputStream(
+          content.getContent()));
       input.setEncoding(encoding);
       SyndFeedInput feedInput = new SyndFeedInput();
       feed = feedInput.build(input);
@@ -171,8 +171,8 @@
     this.parserFactory = new ParserFactory(conf);
     this.normalizers = new URLNormalizers(conf, URLNormalizers.SCOPE_OUTLINK);
     this.filters = new URLFilters(conf);
-    this.defaultEncoding =
-      conf.get("parser.character.encoding.default", "windows-1252");
+    this.defaultEncoding = conf.get("parser.character.encoding.default",
+        "windows-1252");
   }
 
   /**
@@ -275,9 +275,9 @@
       ParseData data = parse.getData();
       data.getContentMeta().remove(Response.CONTENT_TYPE);
       mergeMetadata(data.getParseMeta(), parseMeta);
-      parseResult.put(link, new ParseText(parse.getText()), new ParseData(
-          ParseStatus.STATUS_SUCCESS, title, data.getOutlinks(), data
-              .getContentMeta(), data.getParseMeta()));
+      parseResult.put(link, new ParseText(parse.getText()),
+          new ParseData(ParseStatus.STATUS_SUCCESS, title, data.getOutlinks(),
+              data.getContentMeta(), data.getParseMeta()));
     } else {
       contentMeta.remove(Response.CONTENT_TYPE);
       parseResult.put(link, new ParseText(text), new ParseData(
Index: src/plugin/feed/src/java/org/apache/nutch/indexer/feed/FeedIndexingFilter.java
===================================================================
--- src/plugin/feed/src/java/org/apache/nutch/indexer/feed/FeedIndexingFilter.java	(revision 1188268)
+++ src/plugin/feed/src/java/org/apache/nutch/indexer/feed/FeedIndexingFilter.java	(working copy)
@@ -41,80 +41,79 @@
  * @author mattmann
  * @since NUTCH-444
  * 
- * An {@link IndexingFilter} implementation to pull out the
- * relevant extracted {@link Metadata} fields from the RSS feeds
- * and into the index.
- *
+ *        An {@link IndexingFilter} implementation to pull out the relevant
+ *        extracted {@link Metadata} fields from the RSS feeds and into the
+ *        index.
+ * 
  */
 public class FeedIndexingFilter implements IndexingFilter {
-  
+
   public static final String dateFormatStr = "yyyyMMddHHmm";
-  
+
   private Configuration conf;
-  
+
   private final static String PUBLISHED_DATE = "publishedDate";
-  
+
   private final static String UPDATED_DATE = "updatedDate";
-  
+
   /**
    * Extracts out the relevant fields:
    * 
    * <ul>
-   *  <li>FEED_AUTHOR</li>
-   *  <li>FEED_TAGS</li>
-   *  <li>FEED_PUBLISHED</li>
-   *  <li>FEED_UPDATED</li>
-   *  <li>FEED</li>
+   * <li>FEED_AUTHOR</li>
+   * <li>FEED_TAGS</li>
+   * <li>FEED_PUBLISHED</li>
+   * <li>FEED_UPDATED</li>
+   * <li>FEED</li>
    * </ul>
    * 
-   * And sends them to the {@link Indexer} for indexing within the Nutch
-   * index.
-   *  
+   * And sends them to the {@link Indexer} for indexing within the Nutch index.
+   * 
    */
-  public NutchDocument filter(NutchDocument doc, Parse parse, Text url, CrawlDatum datum,
-                         Inlinks inlinks) throws IndexingException {
+  public NutchDocument filter(NutchDocument doc, Parse parse, Text url,
+      CrawlDatum datum, Inlinks inlinks) throws IndexingException {
     ParseData parseData = parse.getData();
     Metadata parseMeta = parseData.getParseMeta();
-    
+
     String[] authors = parseMeta.getValues(Feed.FEED_AUTHOR);
     String[] tags = parseMeta.getValues(Feed.FEED_TAGS);
     String published = parseMeta.get(Feed.FEED_PUBLISHED);
     String updated = parseMeta.get(Feed.FEED_UPDATED);
     String feed = parseMeta.get(Feed.FEED);
-    
+
     if (authors != null) {
       for (String author : authors) {
         doc.add(Feed.FEED_AUTHOR, author);
       }
     }
-    
+
     if (tags != null) {
       for (String tag : tags) {
         doc.add(Feed.FEED_TAGS, tag);
       }
     }
-    
+
     if (feed != null)
       doc.add(Feed.FEED, feed);
-    
+
     if (published != null) {
       Date date = new Date(Long.parseLong(published));
-      String dateString =  DateUtil.getThreadLocalDateFormat().format(date);
+      String dateString = DateUtil.getThreadLocalDateFormat().format(date);
       doc.add(PUBLISHED_DATE, dateString);
     }
-    
+
     if (updated != null) {
       Date date = new Date(Long.parseLong(updated));
       String dateString = DateUtil.getThreadLocalDateFormat().format(date);
       doc.add(UPDATED_DATE, dateString);
     }
-        
+
     return doc;
   }
 
   /**
-   * @return the {@link Configuration} object used to configure
-   * this {@link IndexingFilter}.
+   * @return the {@link Configuration} object used to configure this
+   *         {@link IndexingFilter}.
    */
   public Configuration getConf() {
     return conf;
@@ -124,8 +123,9 @@
    * Sets the {@link Configuration} object used to configure this
    * {@link IndexingFilter}.
    * 
-   * @param conf The {@link Configuration} object used to configure
-   * this {@link IndexingFilter}.
+   * @param conf
+   *          The {@link Configuration} object used to configure this
+   *          {@link IndexingFilter}.
    */
   public void setConf(Configuration conf) {
     this.conf = conf;
Index: src/plugin/language-identifier/src/test/org/apache/nutch/analysis/lang/TestHTMLLanguageParser.java
===================================================================
--- src/plugin/language-identifier/src/test/org/apache/nutch/analysis/lang/TestHTMLLanguageParser.java	(revision 1188268)
+++ src/plugin/language-identifier/src/test/org/apache/nutch/analysis/lang/TestHTMLLanguageParser.java	(working copy)
@@ -93,8 +93,8 @@
         { "torp, stuga, uthyres, bed & breakfast", null } };
 
     for (int i = 0; i < 44; i++) {
-      assertEquals(tests[i][1], HTMLLanguageParser.LanguageParser
-          .parseLanguage(tests[i][0]));
+      assertEquals(tests[i][1],
+          HTMLLanguageParser.LanguageParser.parseLanguage(tests[i][0]));
     }
   }
 
@@ -147,8 +147,7 @@
     page.setBaseUrl(BASE);
     page.setContent(ByteBuffer.wrap(text.getBytes()));
     page.setContentType(new Utf8("text/html"));
-    page
-        .putToHeaders(EncodingDetector.CONTENT_TYPE_UTF8, new Utf8("text/html"));
+    page.putToHeaders(EncodingDetector.CONTENT_TYPE_UTF8, new Utf8("text/html"));
     return page;
   }
 }
Index: src/plugin/language-identifier/src/java/org/apache/nutch/analysis/lang/LanguageIndexingFilter.java
===================================================================
--- src/plugin/language-identifier/src/java/org/apache/nutch/analysis/lang/LanguageIndexingFilter.java	(revision 1188268)
+++ src/plugin/language-identifier/src/java/org/apache/nutch/analysis/lang/LanguageIndexingFilter.java	(working copy)
@@ -34,11 +34,10 @@
 /**
  * An {@link org.apache.nutch.indexer.IndexingFilter} that adds a
  * <code>lang</code> (language) field to the document.
- *
- * It tries to find the language of the document by checking
- * if {@link HTMLLanguageParser} has added some language
- * information
- *
+ * 
+ * It tries to find the language of the document by checking if
+ * {@link HTMLLanguageParser} has added some language information
+ * 
  * @author Sami Siren
  * @author Jerome Charron
  */
@@ -58,7 +57,8 @@
   /**
    * Constructs a new Language Indexing Filter.
    */
-  public LanguageIndexingFilter() {}
+  public LanguageIndexingFilter() {
+  }
 
   public NutchDocument filter(NutchDocument doc, String url, WebPage page)
       throws IndexingException {
Index: src/plugin/language-identifier/src/java/org/apache/nutch/analysis/lang/HTMLLanguageParser.java
===================================================================
--- src/plugin/language-identifier/src/java/org/apache/nutch/analysis/lang/HTMLLanguageParser.java	(revision 1188268)
+++ src/plugin/language-identifier/src/java/org/apache/nutch/analysis/lang/HTMLLanguageParser.java	(working copy)
@@ -50,7 +50,8 @@
  */
 public class HTMLLanguageParser implements ParseFilter {
 
-  public static final Logger LOG = LoggerFactory.getLogger(HTMLLanguageParser.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(HTMLLanguageParser.class);
 
   private static final Collection<WebPage.Field> FIELDS = new HashSet<WebPage.Field>();
 
@@ -114,8 +115,8 @@
     }
 
     if (lang != null) {
-      page.putToMetadata(new Utf8(Metadata.LANGUAGE), ByteBuffer.wrap(lang
-          .getBytes()));
+      page.putToMetadata(new Utf8(Metadata.LANGUAGE),
+          ByteBuffer.wrap(lang.getBytes()));
       return parse;
     }
 
@@ -155,7 +156,7 @@
 
       String content = parse.getText();
       if (content != null) {
-       text.append(" ").append(content.toString());
+        text.append(" ").append(content.toString());
       }
 
       LanguageIdentifier identifier = new LanguageIdentifier(text.toString());
Index: src/plugin/subcollection/src/test/org/apache/nutch/collection/TestSubcollection.java
===================================================================
--- src/plugin/subcollection/src/test/org/apache/nutch/collection/TestSubcollection.java	(revision 1188268)
+++ src/plugin/subcollection/src/test/org/apache/nutch/collection/TestSubcollection.java	(working copy)
@@ -25,29 +25,31 @@
 import junit.framework.TestCase;
 
 public class TestSubcollection extends TestCase {
-  
-  /**Test filtering logic
+
+  /**
+   * Test filtering logic
    * 
    * @throws Exception
    */
   public void testFilter() throws Exception {
-    Subcollection sc=new Subcollection(NutchConfiguration.create());
+    Subcollection sc = new Subcollection(NutchConfiguration.create());
     sc.setWhiteList("www.nutch.org\nwww.apache.org");
     sc.setBlackList("jpg\nwww.apache.org/zecret/");
-    
-    //matches whitelist
-    assertEquals("http://www.apache.org/index.html", sc.filter("http://www.apache.org/index.html"));
-    
-    //matches blacklist
+
+    // matches whitelist
+    assertEquals("http://www.apache.org/index.html",
+        sc.filter("http://www.apache.org/index.html"));
+
+    // matches blacklist
     assertEquals(null, sc.filter("http://www.apache.org/zecret/index.html"));
     assertEquals(null, sc.filter("http://www.apache.org/img/image.jpg"));
-    
-    //no match
+
+    // no match
     assertEquals(null, sc.filter("http://www.google.com/"));
   }
-  
-  public void testInput(){
-    StringBuffer xml=new StringBuffer();
+
+  public void testInput() {
+    StringBuffer xml = new StringBuffer();
     xml.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
     xml.append("<!-- just a comment -->");
     xml.append("<subcollections>");
@@ -63,44 +65,45 @@
     xml.append("</blacklist>");
     xml.append("</subcollection>");
     xml.append("</subcollections>");
-    
-    InputStream is=new ByteArrayInputStream(xml.toString().getBytes());
-    
-    CollectionManager cm=new CollectionManager();
+
+    InputStream is = new ByteArrayInputStream(xml.toString().getBytes());
+
+    CollectionManager cm = new CollectionManager();
     cm.parse(is);
-    
-    Collection c=cm.getAll();
-    
+
+    Collection c = cm.getAll();
+
     // test that size matches
-    assertEquals(1,c.size());
-    
-    Subcollection collection=(Subcollection)c.toArray()[0];
-    
-    //test collection id
+    assertEquals(1, c.size());
+
+    Subcollection collection = (Subcollection) c.toArray()[0];
+
+    // test collection id
     assertEquals("nutch", collection.getId());
-    
-    //test collection name
+
+    // test collection name
     assertEquals("nutch collection", collection.getName());
 
-    //test whitelist
-    assertEquals(2,collection.whiteList.size());
-    
-    String wlUrl=(String)collection.whiteList.get(0);
+    // test whitelist
+    assertEquals(2, collection.whiteList.size());
+
+    String wlUrl = (String) collection.whiteList.get(0);
     assertEquals("http://lucene.apache.org/nutch/", wlUrl);
 
-    wlUrl=(String)collection.whiteList.get(1);
+    wlUrl = (String) collection.whiteList.get(1);
     assertEquals("http://wiki.apache.org/nutch/", wlUrl);
-    
-    //matches whitelist
-    assertEquals("http://lucene.apache.org/nutch/", collection.filter("http://lucene.apache.org/nutch/"));
 
-    //test blacklist
-    assertEquals(1,collection.blackList.size());
+    // matches whitelist
+    assertEquals("http://lucene.apache.org/nutch/",
+        collection.filter("http://lucene.apache.org/nutch/"));
 
-    String blUrl=(String)collection.blackList.get(0);
+    // test blacklist
+    assertEquals(1, collection.blackList.size());
+
+    String blUrl = (String) collection.blackList.get(0);
     assertEquals("http://www.xxx.yyy", blUrl);
 
-    //no match
+    // no match
     assertEquals(null, collection.filter("http://www.google.com/"));
   }
 }
Index: src/plugin/subcollection/src/java/org/apache/nutch/collection/Subcollection.java
===================================================================
--- src/plugin/subcollection/src/java/org/apache/nutch/collection/Subcollection.java	(revision 1188268)
+++ src/plugin/subcollection/src/java/org/apache/nutch/collection/Subcollection.java	(working copy)
@@ -31,30 +31,30 @@
  * SubCollection represents a subset of index, you can define url patterns that
  * will indicate that particular page (url) is part of SubCollection.
  */
-public class Subcollection extends Configured implements URLFilter{
-  
-  public static final String TAG_COLLECTIONS="subcollections";
-  public static final String TAG_COLLECTION="subcollection";
-  public static final String TAG_WHITELIST="whitelist";
-  public static final String TAG_BLACKLIST="blacklist";
-  public static final String TAG_NAME="name";
-  public static final String TAG_ID="id";
+public class Subcollection extends Configured implements URLFilter {
 
+  public static final String TAG_COLLECTIONS = "subcollections";
+  public static final String TAG_COLLECTION = "subcollection";
+  public static final String TAG_WHITELIST = "whitelist";
+  public static final String TAG_BLACKLIST = "blacklist";
+  public static final String TAG_NAME = "name";
+  public static final String TAG_ID = "id";
+
   ArrayList blackList = new ArrayList();
 
   ArrayList whiteList = new ArrayList();
 
-  /** 
+  /**
    * SubCollection identifier
    */
   String id;
 
-  /** 
+  /**
    * SubCollection name
    */
   String name;
 
-  /** 
+  /**
    * SubCollection whitelist as String
    */
   String wlString;
@@ -64,21 +64,24 @@
    */
   String blString;
 
-  /** public Constructor
+  /**
+   * public Constructor
    * 
-   * @param id id of SubCollection
-   * @param name name of SubCollection
+   * @param id
+   *          id of SubCollection
+   * @param name
+   *          name of SubCollection
    */
   public Subcollection(String id, String name, Configuration conf) {
     this(conf);
-    this.id=id;
+    this.id = id;
     this.name = name;
   }
 
-  public Subcollection(Configuration conf){
+  public Subcollection(Configuration conf) {
     super(conf);
   }
-  
+
   /**
    * @return Returns the name
    */
@@ -203,7 +206,8 @@
   /**
    * Set contents of blacklist from String
    * 
-   * @param list the blacklist contents
+   * @param list
+   *          the blacklist contents
    */
   public void setBlackList(String list) {
     this.blString = list;
@@ -213,7 +217,8 @@
   /**
    * Set contents of whitelist from String
    * 
-   * @param list the whitelist contents
+   * @param list
+   *          the whitelist contents
    */
   public void setWhiteList(String list) {
     this.wlString = list;
Index: src/plugin/subcollection/src/java/org/apache/nutch/collection/CollectionManager.java
===================================================================
--- src/plugin/subcollection/src/java/org/apache/nutch/collection/CollectionManager.java	(revision 1188268)
+++ src/plugin/subcollection/src/java/org/apache/nutch/collection/CollectionManager.java	(working copy)
@@ -46,199 +46,197 @@
 
 public class CollectionManager extends Configured {
 
-	public static final String DEFAULT_FILE_NAME = "subcollections.xml";
+  public static final String DEFAULT_FILE_NAME = "subcollections.xml";
 
-	static final Logger LOG = LoggerFactory.getLogger(CollectionManager.class);
+  static final Logger LOG = LoggerFactory.getLogger(CollectionManager.class);
 
-	transient Map collectionMap = new HashMap();
+  transient Map collectionMap = new HashMap();
 
-	transient URL configfile;
+  transient URL configfile;
 
-	public CollectionManager(Configuration conf) {
-		super(conf);
-		init();
-	}
+  public CollectionManager(Configuration conf) {
+    super(conf);
+    init();
+  }
 
-	/**
-	 * Used for testing
-	 */
-	protected CollectionManager() {
-		super(NutchConfiguration.create());
-	}
+  /**
+   * Used for testing
+   */
+  protected CollectionManager() {
+    super(NutchConfiguration.create());
+  }
 
-	protected void init() {
-		try {
-			if (LOG.isInfoEnabled()) {
-				LOG.info("initializing CollectionManager");
-			}
-			// initialize known subcollections
-			configfile = getConf().getResource(
-					getConf().get("subcollections.config", DEFAULT_FILE_NAME));
+  protected void init() {
+    try {
+      if (LOG.isInfoEnabled()) {
+        LOG.info("initializing CollectionManager");
+      }
+      // initialize known subcollections
+      configfile = getConf().getResource(
+          getConf().get("subcollections.config", DEFAULT_FILE_NAME));
 
-			InputStream input = getConf().getConfResourceAsInputStream(
-					getConf().get("subcollections.config", DEFAULT_FILE_NAME));
-			parse(input);
-		} catch (Exception e) {
-			if (LOG.isWarnEnabled()) {
-				LOG.warn("Error occured:" + e);
-				e.printStackTrace(LogUtil.getWarnStream(LOG));
-			}
-		}
-	}
+      InputStream input = getConf().getConfResourceAsInputStream(
+          getConf().get("subcollections.config", DEFAULT_FILE_NAME));
+      parse(input);
+    } catch (Exception e) {
+      if (LOG.isWarnEnabled()) {
+        LOG.warn("Error occured:" + e);
+        e.printStackTrace(LogUtil.getWarnStream(LOG));
+      }
+    }
+  }
 
-	protected void parse(InputStream input) {
-		Element collections = DomUtil.getDom(input);
+  protected void parse(InputStream input) {
+    Element collections = DomUtil.getDom(input);
 
-		if (collections != null) {
-			NodeList nodeList = collections
-					.getElementsByTagName(Subcollection.TAG_COLLECTION);
+    if (collections != null) {
+      NodeList nodeList = collections
+          .getElementsByTagName(Subcollection.TAG_COLLECTION);
 
-			if (LOG.isInfoEnabled()) {
-				LOG.info("file has" + nodeList.getLength() + " elements");
-			}
+      if (LOG.isInfoEnabled()) {
+        LOG.info("file has" + nodeList.getLength() + " elements");
+      }
 
-			for (int i = 0; i < nodeList.getLength(); i++) {
-				Element scElem = (Element) nodeList.item(i);
-				Subcollection subCol = new Subcollection(getConf());
-				subCol.initialize(scElem);
-				collectionMap.put(subCol.name, subCol);
-			}
-		} else if (LOG.isInfoEnabled()) {
-			LOG.info("Cannot find collections");
-		}
-	}
+      for (int i = 0; i < nodeList.getLength(); i++) {
+        Element scElem = (Element) nodeList.item(i);
+        Subcollection subCol = new Subcollection(getConf());
+        subCol.initialize(scElem);
+        collectionMap.put(subCol.name, subCol);
+      }
+    } else if (LOG.isInfoEnabled()) {
+      LOG.info("Cannot find collections");
+    }
+  }
 
-	public static CollectionManager getCollectionManager(Configuration conf) {
-		String key = "collectionmanager";
-		ObjectCache objectCache = ObjectCache.get(conf);
-		CollectionManager impl = (CollectionManager) objectCache.getObject(key);
-		if (impl == null) {
-			try {
-				if (LOG.isInfoEnabled()) {
-					LOG.info("Instantiating CollectionManager");
-				}
-				impl = new CollectionManager(conf);
-				objectCache.setObject(key, impl);
-			} catch (Exception e) {
-				throw new RuntimeException("Couldn't create CollectionManager",
-						e);
-			}
-		}
-		return impl;
-	}
+  public static CollectionManager getCollectionManager(Configuration conf) {
+    String key = "collectionmanager";
+    ObjectCache objectCache = ObjectCache.get(conf);
+    CollectionManager impl = (CollectionManager) objectCache.getObject(key);
+    if (impl == null) {
+      try {
+        if (LOG.isInfoEnabled()) {
+          LOG.info("Instantiating CollectionManager");
+        }
+        impl = new CollectionManager(conf);
+        objectCache.setObject(key, impl);
+      } catch (Exception e) {
+        throw new RuntimeException("Couldn't create CollectionManager", e);
+      }
+    }
+    return impl;
+  }
 
-	/**
-	 * Returns named subcollection
-	 * 
-	 * @param id
-	 * @return Named SubCollection (or null if not existing)
-	 */
-	public Subcollection getSubColection(final String id) {
-		return (Subcollection) collectionMap.get(id);
-	}
+  /**
+   * Returns named subcollection
+   * 
+   * @param id
+   * @return Named SubCollection (or null if not existing)
+   */
+  public Subcollection getSubColection(final String id) {
+    return (Subcollection) collectionMap.get(id);
+  }
 
-	/**
-	 * Delete named subcollection
-	 * 
-	 * @param id
-	 *            Id of SubCollection to delete
-	 */
-	public void deleteSubCollection(final String id) throws IOException {
-		final Subcollection subCol = getSubColection(id);
-		if (subCol != null) {
-			collectionMap.remove(id);
-		}
-	}
+  /**
+   * Delete named subcollection
+   * 
+   * @param id
+   *          Id of SubCollection to delete
+   */
+  public void deleteSubCollection(final String id) throws IOException {
+    final Subcollection subCol = getSubColection(id);
+    if (subCol != null) {
+      collectionMap.remove(id);
+    }
+  }
 
-	/**
-	 * Create a new subcollection.
-	 * 
-	 * @param name
-	 *            Name of SubCollection to create
-	 * @return Created SubCollection or null if allready existed
-	 */
-	public Subcollection createSubCollection(final String id, final String name) {
-		Subcollection subCol = null;
+  /**
+   * Create a new subcollection.
+   * 
+   * @param name
+   *          Name of SubCollection to create
+   * @return Created SubCollection or null if allready existed
+   */
+  public Subcollection createSubCollection(final String id, final String name) {
+    Subcollection subCol = null;
 
-		if (!collectionMap.containsKey(id)) {
-			subCol = new Subcollection(id, name, getConf());
-			collectionMap.put(id, subCol);
-		}
+    if (!collectionMap.containsKey(id)) {
+      subCol = new Subcollection(id, name, getConf());
+      collectionMap.put(id, subCol);
+    }
 
-		return subCol;
-	}
+    return subCol;
+  }
 
-	/**
-	 * Return names of collections url is part of
-	 * 
-	 * @param url
-	 *            The url to test against Collections
-	 * @return Space delimited string of collection names url is part of
-	 */
-	public List<String> getSubCollections(final String url) {
-		List<String> collections = new ArrayList<String>();
-		final Iterator iterator = collectionMap.values().iterator();
+  /**
+   * Return names of collections url is part of
+   * 
+   * @param url
+   *          The url to test against Collections
+   * @return Space delimited string of collection names url is part of
+   */
+  public List<String> getSubCollections(final String url) {
+    List<String> collections = new ArrayList<String>();
+    final Iterator iterator = collectionMap.values().iterator();
 
-		while (iterator.hasNext()) {
-			final Subcollection subCol = (Subcollection) iterator.next();
-			if (subCol.filter(url) != null) {
-				collections.add(subCol.name);
-			}
-		}
-		if (LOG.isTraceEnabled()) {
-			LOG.trace("subcollections:"
-					+ Arrays.toString(collections.toArray()));
-		}
+    while (iterator.hasNext()) {
+      final Subcollection subCol = (Subcollection) iterator.next();
+      if (subCol.filter(url) != null) {
+        collections.add(subCol.name);
+      }
+    }
+    if (LOG.isTraceEnabled()) {
+      LOG.trace("subcollections:" + Arrays.toString(collections.toArray()));
+    }
 
-		return collections;
-	}
+    return collections;
+  }
 
-	/**
-	 * Returns all collections
-	 * 
-	 * @return All collections CollectionManager knows about
-	 */
-	public Collection getAll() {
-		return collectionMap.values();
-	}
+  /**
+   * Returns all collections
+   * 
+   * @return All collections CollectionManager knows about
+   */
+  public Collection getAll() {
+    return collectionMap.values();
+  }
 
-	/**
-	 * Save collections into file
-	 * 
-	 * @throws Exception
-	 */
-	public void save() throws IOException {
-		try {
-			final FileOutputStream fos = new FileOutputStream(new File(
-					configfile.getFile()));
-			final Document doc = new DocumentImpl();
-			final Element collections = doc
-					.createElement(Subcollection.TAG_COLLECTIONS);
-			final Iterator iterator = collectionMap.values().iterator();
+  /**
+   * Save collections into file
+   * 
+   * @throws Exception
+   */
+  public void save() throws IOException {
+    try {
+      final FileOutputStream fos = new FileOutputStream(new File(
+          configfile.getFile()));
+      final Document doc = new DocumentImpl();
+      final Element collections = doc
+          .createElement(Subcollection.TAG_COLLECTIONS);
+      final Iterator iterator = collectionMap.values().iterator();
 
-			while (iterator.hasNext()) {
-				final Subcollection subCol = (Subcollection) iterator.next();
-				final Element collection = doc
-						.createElement(Subcollection.TAG_COLLECTION);
-				collections.appendChild(collection);
-				final Element name = doc.createElement(Subcollection.TAG_NAME);
-				name.setNodeValue(subCol.getName());
-				collection.appendChild(name);
-				final Element whiteList = doc
-						.createElement(Subcollection.TAG_WHITELIST);
-				whiteList.setNodeValue(subCol.getWhiteListString());
-				collection.appendChild(whiteList);
-				final Element blackList = doc
-						.createElement(Subcollection.TAG_BLACKLIST);
-				blackList.setNodeValue(subCol.getBlackListString());
-				collection.appendChild(blackList);
-			}
+      while (iterator.hasNext()) {
+        final Subcollection subCol = (Subcollection) iterator.next();
+        final Element collection = doc
+            .createElement(Subcollection.TAG_COLLECTION);
+        collections.appendChild(collection);
+        final Element name = doc.createElement(Subcollection.TAG_NAME);
+        name.setNodeValue(subCol.getName());
+        collection.appendChild(name);
+        final Element whiteList = doc
+            .createElement(Subcollection.TAG_WHITELIST);
+        whiteList.setNodeValue(subCol.getWhiteListString());
+        collection.appendChild(whiteList);
+        final Element blackList = doc
+            .createElement(Subcollection.TAG_BLACKLIST);
+        blackList.setNodeValue(subCol.getBlackListString());
+        collection.appendChild(blackList);
+      }
 
-			DomUtil.saveDom(fos, collections);
-			fos.flush();
-			fos.close();
-		} catch (FileNotFoundException e) {
-			throw new IOException(e.toString());
-		}
-	}
+      DomUtil.saveDom(fos, collections);
+      fos.flush();
+      fos.close();
+    } catch (FileNotFoundException e) {
+      throw new IOException(e.toString());
+    }
+  }
 }
Index: src/plugin/subcollection/src/java/org/apache/nutch/indexer/subcollection/SubcollectionIndexingFilter.java
===================================================================
--- src/plugin/subcollection/src/java/org/apache/nutch/indexer/subcollection/SubcollectionIndexingFilter.java	(revision 1188268)
+++ src/plugin/subcollection/src/java/org/apache/nutch/indexer/subcollection/SubcollectionIndexingFilter.java	(working copy)
@@ -32,48 +32,49 @@
 import org.apache.nutch.util.NutchConfiguration;
 
 public class SubcollectionIndexingFilter extends Configured implements
-		IndexingFilter {
+    IndexingFilter {
 
-	public SubcollectionIndexingFilter() {
-		super(NutchConfiguration.create());
-	}
+  public SubcollectionIndexingFilter() {
+    super(NutchConfiguration.create());
+  }
 
-	public SubcollectionIndexingFilter(Configuration conf) {
-		super(conf);
-	}
+  public SubcollectionIndexingFilter(Configuration conf) {
+    super(conf);
+  }
 
-	/**
-	 * Doc field name
-	 */
-	public static final String FIELD_NAME = "subcollection";
+  /**
+   * Doc field name
+   */
+  public static final String FIELD_NAME = "subcollection";
 
-	/**
-	 * Logger
-	 */
-	public static final Logger LOG = LoggerFactory
-			.getLogger(SubcollectionIndexingFilter.class);
+  /**
+   * Logger
+   */
+  public static final Logger LOG = LoggerFactory
+      .getLogger(SubcollectionIndexingFilter.class);
 
-	/**
-	 * "Mark" document to be a part of subcollection
-	 * 
-	 * @param doc
-	 * @param url
-	 */
-	private void addSubCollectionField(NutchDocument doc, String url) {
-		for (String collname: CollectionManager.getCollectionManager(getConf()).getSubCollections(url)) {
-			doc.add(FIELD_NAME, collname);
-		}
-	}
+  /**
+   * "Mark" document to be a part of subcollection
+   * 
+   * @param doc
+   * @param url
+   */
+  private void addSubCollectionField(NutchDocument doc, String url) {
+    for (String collname : CollectionManager.getCollectionManager(getConf())
+        .getSubCollections(url)) {
+      doc.add(FIELD_NAME, collname);
+    }
+  }
 
-	@Override
-	public Collection<Field> getFields() {
-		return new ArrayList<Field>();
-	}
+  @Override
+  public Collection<Field> getFields() {
+    return new ArrayList<Field>();
+  }
 
-	@Override
-	public NutchDocument filter(NutchDocument doc, String url, WebPage page)
-			throws IndexingException {
-		addSubCollectionField(doc, url);
-		return doc;
-	}
+  @Override
+  public NutchDocument filter(NutchDocument doc, String url, WebPage page)
+      throws IndexingException {
+    addSubCollectionField(doc, url);
+    return doc;
+  }
 }
Index: src/plugin/index-more/src/test/org/apache/nutch/indexer/more/TestMoreIndexingFilter.java
===================================================================
--- src/plugin/index-more/src/test/org/apache/nutch/indexer/more/TestMoreIndexingFilter.java	(revision 1188268)
+++ src/plugin/index-more/src/test/org/apache/nutch/indexer/more/TestMoreIndexingFilter.java	(working copy)
@@ -35,7 +35,7 @@
     assertContentType(conf, "text/html", "text/html");
     assertContentType(conf, "text/html; charset=UTF-8", "text/html");
   }
-  
+
   public void testGetParts() {
     String[] parts = MoreIndexingFilter.getParts("text/html");
     assertParts(parts, 2, "text", "html");
@@ -45,34 +45,34 @@
   /**
    * @since NUTCH-901
    */
-  public void testNoParts(){
-     Configuration conf = NutchConfiguration.create();
-     conf.setBoolean("moreIndexingFilter.indexMimeTypeParts", false);
-     MoreIndexingFilter filter = new MoreIndexingFilter();
-     filter.setConf(conf);
-     assertNotNull(filter);
-     NutchDocument doc = new NutchDocument();
-     try{
-       filter.filter(doc, "http://nutch.apache.org/index.html", new WebPage());
-     }
-     catch(Exception e){
-       e.printStackTrace();
-       fail(e.getMessage());
-     }
-     assertNotNull(doc);
-     assertTrue(doc.getFieldNames().contains("type"));
-     assertEquals(1, doc.getFieldValues("type").size());
-     assertEquals("text/html", doc.getFieldValue("type"));     
+  public void testNoParts() {
+    Configuration conf = NutchConfiguration.create();
+    conf.setBoolean("moreIndexingFilter.indexMimeTypeParts", false);
+    MoreIndexingFilter filter = new MoreIndexingFilter();
+    filter.setConf(conf);
+    assertNotNull(filter);
+    NutchDocument doc = new NutchDocument();
+    try {
+      filter.filter(doc, "http://nutch.apache.org/index.html", new WebPage());
+    } catch (Exception e) {
+      e.printStackTrace();
+      fail(e.getMessage());
+    }
+    assertNotNull(doc);
+    assertTrue(doc.getFieldNames().contains("type"));
+    assertEquals(1, doc.getFieldValues("type").size());
+    assertEquals("text/html", doc.getFieldValue("type"));
   }
-  
+
   private void assertParts(String[] parts, int count, String... expected) {
     assertEquals(count, parts.length);
     for (int i = 0; i < expected.length; i++) {
       assertEquals(expected[i], parts[i]);
     }
   }
-  
-  private void assertContentType(Configuration conf, String source, String expected) throws IndexingException {
+
+  private void assertContentType(Configuration conf, String source,
+      String expected) throws IndexingException {
     MoreIndexingFilter filter = new MoreIndexingFilter();
     filter.setConf(conf);
     WebPage page = new WebPage();
Index: src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java
===================================================================
--- src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java	(revision 1188268)
+++ src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java	(working copy)
@@ -58,7 +58,8 @@
  */
 
 public class MoreIndexingFilter implements IndexingFilter {
-  public static final Logger LOG = LoggerFactory.getLogger(MoreIndexingFilter.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(MoreIndexingFilter.class);
 
   /** Get the MimeTypes resolver instance. */
   private MimeUtil MIME;
@@ -89,7 +90,8 @@
     // String lastModified = data.getMeta(Metadata.LAST_MODIFIED);
     if (lastModified != null) { // try parse last-modified
       time = getTime(lastModified.toString(), url); // use as time
-      String formlastModified = DateUtil.getThreadLocalDateFormat().format(new Date(time));
+      String formlastModified = DateUtil.getThreadLocalDateFormat().format(
+          new Date(time));
       // store as string
       doc.add("lastModified", formlastModified);
     }
@@ -99,7 +101,8 @@
       time = page.getFetchTime(); // use fetch time
     }
 
-    String dateString = DateUtil.getThreadLocalDateFormat().format(new Date(time));
+    String dateString = DateUtil.getThreadLocalDateFormat().format(
+        new Date(time));
 
     // un-stored, indexed and un-tokenized
     doc.add("date", dateString);
@@ -114,17 +117,19 @@
     } catch (ParseException e) {
       // try to parse it as date in alternative format
       try {
-        Date parsedDate = DateUtils.parseDate(date, new String[] {
-            "EEE MMM dd HH:mm:ss yyyy", "EEE MMM dd HH:mm:ss yyyy zzz",
-            "EEE MMM dd HH:mm:ss zzz yyyy", "EEE, dd MMM yyyy HH:mm:ss zzz",
-            "EEE,dd MMM yyyy HH:mm:ss zzz", "EEE, dd MMM yyyy HH:mm:sszzz",
-            "EEE, dd MMM yyyy HH:mm:ss", "EEE, dd-MMM-yy HH:mm:ss zzz",
-            "yyyy/MM/dd HH:mm:ss.SSS zzz", "yyyy/MM/dd HH:mm:ss.SSS",
-            "yyyy/MM/dd HH:mm:ss zzz", "yyyy/MM/dd", "yyyy.MM.dd HH:mm:ss",
-            "yyyy-MM-dd HH:mm", "MMM dd yyyy HH:mm:ss. zzz",
-            "MMM dd yyyy HH:mm:ss zzz", "dd.MM.yyyy HH:mm:ss zzz",
-            "dd MM yyyy HH:mm:ss zzz", "dd.MM.yyyy; HH:mm:ss",
-            "dd.MM.yyyy HH:mm:ss", "dd.MM.yyyy zzz", "yyyy-MM-dd'T'HH:mm:ss'Z'" });
+        Date parsedDate = DateUtils.parseDate(date,
+            new String[] { "EEE MMM dd HH:mm:ss yyyy",
+                "EEE MMM dd HH:mm:ss yyyy zzz", "EEE MMM dd HH:mm:ss zzz yyyy",
+                "EEE, dd MMM yyyy HH:mm:ss zzz",
+                "EEE,dd MMM yyyy HH:mm:ss zzz", "EEE, dd MMM yyyy HH:mm:sszzz",
+                "EEE, dd MMM yyyy HH:mm:ss", "EEE, dd-MMM-yy HH:mm:ss zzz",
+                "yyyy/MM/dd HH:mm:ss.SSS zzz", "yyyy/MM/dd HH:mm:ss.SSS",
+                "yyyy/MM/dd HH:mm:ss zzz", "yyyy/MM/dd", "yyyy.MM.dd HH:mm:ss",
+                "yyyy-MM-dd HH:mm", "MMM dd yyyy HH:mm:ss. zzz",
+                "MMM dd yyyy HH:mm:ss zzz", "dd.MM.yyyy HH:mm:ss zzz",
+                "dd MM yyyy HH:mm:ss zzz", "dd.MM.yyyy; HH:mm:ss",
+                "dd.MM.yyyy HH:mm:ss", "dd.MM.yyyy zzz",
+                "yyyy-MM-dd'T'HH:mm:ss'Z'" });
         time = parsedDate.getTime();
         // if (LOG.isWarnEnabled()) {
         // LOG.warn(url + ": parsed date: " + date +" to:"+time);
@@ -202,7 +207,7 @@
     if (conf.getBoolean("moreIndexingFilter.indexMimeTypeParts", true)) {
       String[] parts = getParts(contentType.toString());
 
-      for(String part: parts) {
+      for (String part : parts) {
         doc.add("type", part);
       }
     }
Index: src/plugin/parse-ext/src/test/org/apache/nutch/parse/ext/TestExtParser.java
===================================================================
--- src/plugin/parse-ext/src/test/org/apache/nutch/parse/ext/TestExtParser.java	(revision 1188268)
+++ src/plugin/parse-ext/src/test/org/apache/nutch/parse/ext/TestExtParser.java	(working copy)
@@ -38,15 +38,14 @@
 import java.io.FileOutputStream;
 import java.io.IOException;
 
-/** 
- * Unit tests for ExtParser.
- * First creates a temp file with fixed content, then fetch
- * and parse it using external command 'cat' and 'md5sum' alternately
- * for 10 times. Doing so also does a light stress test for class
- * CommandRunner.java (as used in ExtParser.java).
- *
+/**
+ * Unit tests for ExtParser. First creates a temp file with fixed content, then
+ * fetch and parse it using external command 'cat' and 'md5sum' alternately for
+ * 10 times. Doing so also does a light stress test for class CommandRunner.java
+ * (as used in ExtParser.java).
+ * 
  * Warning: currently only do test on linux platform.
- *
+ * 
  * @author John Xing
  */
 public class TestExtParser extends TestCase {
@@ -59,8 +58,8 @@
   // echo -n "nutch rocks nutch rocks nutch rocks" | md5sum
   private String expectedMD5sum = "df46711a1a48caafc98b1c3b83aa1526";
 
-  public TestExtParser(String name) { 
-    super(name); 
+  public TestExtParser(String name) {
+    super(name);
   }
 
   protected void setUp() throws ProtocolException, IOException {
@@ -71,10 +70,11 @@
       File tempDir = new File(path);
       if (!tempDir.exists())
         tempDir.mkdir();
-      tempFile = File.createTempFile("nutch.test.plugin.ExtParser.",".txt",tempDir);
+      tempFile = File.createTempFile("nutch.test.plugin.ExtParser.", ".txt",
+          tempDir);
     } else {
       // otherwise in java.io.tmpdir
-      tempFile = File.createTempFile("nutch.test.plugin.ExtParser.",".txt");
+      tempFile = File.createTempFile("nutch.test.plugin.ExtParser.", ".txt");
     }
     urlString = tempFile.toURL().toString();
 
@@ -83,8 +83,10 @@
     fos.close();
 
     // get nutch content
-    Protocol protocol = new ProtocolFactory(NutchConfiguration.create()).getProtocol(urlString);
-    content = protocol.getProtocolOutput(new Text(urlString), new CrawlDatum()).getContent();
+    Protocol protocol = new ProtocolFactory(NutchConfiguration.create())
+        .getProtocol(urlString);
+    content = protocol.getProtocolOutput(new Text(urlString), new CrawlDatum())
+        .getContent();
     protocol = null;
   }
 
@@ -93,8 +95,8 @@
     content = null;
 
     // clean temp file
-    //if (tempFile != null && tempFile.exists())
-    //  tempFile.delete();
+    // if (tempFile != null && tempFile.exists())
+    // tempFile.delete();
   }
 
   public void testIt() throws ParseException {
@@ -102,24 +104,27 @@
 
     // now test only on linux platform
     if (!System.getProperty("os.name").equalsIgnoreCase("linux")) {
-      System.err.println("Current OS is "+System.getProperty("os.name")+".");
+      System.err
+          .println("Current OS is " + System.getProperty("os.name") + ".");
       System.err.println("No test is run on OS other than linux.");
       return;
     }
 
     Configuration conf = NutchConfiguration.create();
     // loop alternately, total 10*2 times of invoking external command
-    for (int i=0; i<10; i++) {
+    for (int i = 0; i < 10; i++) {
       // check external parser that does 'cat'
       contentType = "application/vnd.nutch.example.cat";
       content.setContentType(contentType);
-      parse = new ParseUtil(conf).parseByExtensionId("parse-ext", content).get(content.getUrl());
-      assertEquals(expectedText,parse.getText());
+      parse = new ParseUtil(conf).parseByExtensionId("parse-ext", content).get(
+          content.getUrl());
+      assertEquals(expectedText, parse.getText());
 
       // check external parser that does 'md5sum'
       contentType = "application/vnd.nutch.example.md5sum";
       content.setContentType(contentType);
-      parse = new ParseUtil(conf).parseByExtensionId("parse-ext", content).get(content.getUrl());
+      parse = new ParseUtil(conf).parseByExtensionId("parse-ext", content).get(
+          content.getUrl());
       assertTrue(parse.getText().startsWith(expectedMD5sum));
     }
   }
Index: src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/ExtParser.java
===================================================================
--- src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/ExtParser.java	(revision 1188268)
+++ src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/ExtParser.java	(working copy)
@@ -52,20 +52,23 @@
 
 public class ExtParser implements Parser {
 
-  public static final Logger LOG = LoggerFactory.getLogger("org.apache.nutch.parse.ext");
+  public static final Logger LOG = LoggerFactory
+      .getLogger("org.apache.nutch.parse.ext");
 
   static final int BUFFER_SIZE = 4096;
 
   static final int TIMEOUT_DEFAULT = 30; // in seconds
 
-  // handy map from String contentType to String[] {command, timeoutString, encoding}
+  // handy map from String contentType to String[] {command, timeoutString,
+  // encoding}
   Hashtable TYPE_PARAMS_MAP = new Hashtable();
 
-  private Configuration conf;  
+  private Configuration conf;
 
   private boolean loaded = false;
 
-  public ExtParser () { }
+  public ExtParser() {
+  }
 
   public ParseResult getParse(Content content) {
 
@@ -74,14 +77,15 @@
     String[] params = (String[]) TYPE_PARAMS_MAP.get(contentType);
     if (params == null)
       return new ParseStatus(ParseStatus.FAILED,
-                      "No external command defined for contentType: " + contentType).getEmptyParseResult(content.getUrl(), getConf());
+          "No external command defined for contentType: " + contentType)
+          .getEmptyParseResult(content.getUrl(), getConf());
 
     String command = params[0];
     int timeout = Integer.parseInt(params[1]);
     String encoding = params[2];
 
     if (LOG.isTraceEnabled()) {
-      LOG.trace("Use "+command+ " with timeout="+timeout+"secs");
+      LOG.trace("Use " + command + " with timeout=" + timeout + "secs");
     }
 
     String text = null;
@@ -93,19 +97,19 @@
 
       String contentLength = content.getMetadata().get(Response.CONTENT_LENGTH);
       if (contentLength != null
-            && raw.length != Integer.parseInt(contentLength)) {
-          return new ParseStatus(ParseStatus.FAILED, ParseStatus.FAILED_TRUNCATED,
-                "Content truncated at " + raw.length
-            +" bytes. Parser can't handle incomplete "
-            + contentType + " file.").getEmptyParseResult(content.getUrl(), getConf());
+          && raw.length != Integer.parseInt(contentLength)) {
+        return new ParseStatus(ParseStatus.FAILED,
+            ParseStatus.FAILED_TRUNCATED, "Content truncated at " + raw.length
+                + " bytes. Parser can't handle incomplete " + contentType
+                + " file.").getEmptyParseResult(content.getUrl(), getConf());
       }
 
       ByteArrayOutputStream os = new ByteArrayOutputStream(BUFFER_SIZE);
-      ByteArrayOutputStream es = new ByteArrayOutputStream(BUFFER_SIZE/4);
+      ByteArrayOutputStream es = new ByteArrayOutputStream(BUFFER_SIZE / 4);
 
       CommandRunner cr = new CommandRunner();
 
-      cr.setCommand(command+ " " +contentType);
+      cr.setCommand(command + " " + contentType);
       cr.setInputStream(new ByteArrayInputStream(raw));
       cr.setStdOutputStream(os);
       cr.setStdErrorStream(es);
@@ -115,14 +119,15 @@
       cr.evaluate();
 
       if (cr.getExitValue() != 0)
-        return new ParseStatus(ParseStatus.FAILED,
-                        "External command " + command
-                        + " failed with error: " + es.toString()).getEmptyParseResult(content.getUrl(), getConf());
+        return new ParseStatus(ParseStatus.FAILED, "External command "
+            + command + " failed with error: " + es.toString())
+            .getEmptyParseResult(content.getUrl(), getConf());
 
       text = os.toString(encoding);
 
     } catch (Exception e) { // run time exception
-      return new ParseStatus(e).getEmptyParseResult(content.getUrl(), getConf());
+      return new ParseStatus(e)
+          .getEmptyParseResult(content.getUrl(), getConf());
     }
 
     if (text == null)
@@ -135,15 +140,15 @@
     Outlink[] outlinks = OutlinkExtractor.getOutlinks(text, getConf());
 
     ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS, title,
-                                        outlinks, content.getMetadata());
-    return ParseResult.createParseResult(content.getUrl(), 
-                                         new ParseImpl(text, parseData));
+        outlinks, content.getMetadata());
+    return ParseResult.createParseResult(content.getUrl(), new ParseImpl(text,
+        parseData));
   }
-  
+
   public void setConf(Configuration conf) {
     this.conf = conf;
-    Extension[] extensions = PluginRepository.get(conf).getExtensionPoint(
-        "org.apache.nutch.parse.Parser").getExtensions();
+    Extension[] extensions = PluginRepository.get(conf)
+        .getExtensionPoint("org.apache.nutch.parse.Parser").getExtensions();
 
     String contentType, command, timeoutString, encoding;
 
@@ -165,13 +170,14 @@
       // null encoding means default
       encoding = extension.getAttribute("encoding");
       if (encoding == null)
-          encoding = Charset.defaultCharset().name();
+        encoding = Charset.defaultCharset().name();
 
       timeoutString = extension.getAttribute("timeout");
       if (timeoutString == null || timeoutString.equals(""))
         timeoutString = "" + TIMEOUT_DEFAULT;
 
-      TYPE_PARAMS_MAP.put(contentType, new String[] { command, timeoutString, encoding });
+      TYPE_PARAMS_MAP.put(contentType, new String[] { command, timeoutString,
+          encoding });
     }
   }
 
Index: src/plugin/urlnormalizer-pass/src/test/org/apache/nutch/net/urlnormalizer/pass/TestPassURLNormalizer.java
===================================================================
--- src/plugin/urlnormalizer-pass/src/test/org/apache/nutch/net/urlnormalizer/pass/TestPassURLNormalizer.java	(revision 1188268)
+++ src/plugin/urlnormalizer-pass/src/test/org/apache/nutch/net/urlnormalizer/pass/TestPassURLNormalizer.java	(working copy)
@@ -16,7 +16,6 @@
  */
 package org.apache.nutch.net.urlnormalizer.pass;
 
-
 import java.net.MalformedURLException;
 
 import org.apache.hadoop.conf.Configuration;
@@ -29,7 +28,7 @@
 
   public void testPassURLNormalizer() {
     Configuration conf = NutchConfiguration.create();
-    
+
     PassURLNormalizer normalizer = new PassURLNormalizer();
     normalizer.setConf(conf);
     String url = "http://www.example.com/test/..//";
@@ -39,7 +38,7 @@
     } catch (MalformedURLException mue) {
       fail(mue.toString());
     }
-    
+
     assertEquals(url, result);
   }
 }
Index: src/plugin/urlnormalizer-pass/src/java/org/apache/nutch/net/urlnormalizer/pass/PassURLNormalizer.java
===================================================================
--- src/plugin/urlnormalizer-pass/src/java/org/apache/nutch/net/urlnormalizer/pass/PassURLNormalizer.java	(revision 1188268)
+++ src/plugin/urlnormalizer-pass/src/java/org/apache/nutch/net/urlnormalizer/pass/PassURLNormalizer.java	(working copy)
@@ -24,15 +24,17 @@
 
 /**
  * This URLNormalizer doesn't change urls. It is sometimes useful if for a given
- * scope at least one normalizer must be defined but no transformations are required.
+ * scope at least one normalizer must be defined but no transformations are
+ * required.
  * 
  * @author Andrzej Bialecki
  */
 public class PassURLNormalizer implements URLNormalizer {
 
   private Configuration conf;
-  
-  public String normalize(String urlString, String scope) throws MalformedURLException {
+
+  public String normalize(String urlString, String scope)
+      throws MalformedURLException {
     return urlString;
   }
 
@@ -41,7 +43,7 @@
   }
 
   public void setConf(Configuration conf) {
-    this.conf = conf;    
+    this.conf = conf;
   }
 
 }
Index: src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestRobotsMetaProcessor.java
===================================================================
--- src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestRobotsMetaProcessor.java	(revision 1188268)
+++ src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestRobotsMetaProcessor.java	(working copy)
@@ -31,124 +31,100 @@
 
 /** Unit tests for HTMLMetaProcessor. */
 public class TestRobotsMetaProcessor extends TestCase {
-  public TestRobotsMetaProcessor(String name) { 
-    super(name); 
+  public TestRobotsMetaProcessor(String name) {
+    super(name);
   }
 
   /*
+   * 
+   * some sample tags:
+   * 
+   * <meta name="robots" content="index,follow"> <meta name="robots"
+   * content="noindex,follow"> <meta name="robots" content="index,nofollow">
+   * <meta name="robots" content="noindex,nofollow">
+   * 
+   * <META HTTP-EQUIV="Pragma" CONTENT="no-cache">
+   */
 
-  some sample tags:
+  public static String[] tests = {
+      "<html><head><title>test page</title>"
+          + "<META NAME=\"ROBOTS\" CONTENT=\"NONE\"> "
+          + "<META HTTP-EQUIV=\"PRAGMA\" CONTENT=\"NO-CACHE\"> "
+          + "</head><body>" + " some text" + "</body></html>",
 
-  <meta name="robots" content="index,follow">
-  <meta name="robots" content="noindex,follow">
-  <meta name="robots" content="index,nofollow">
-  <meta name="robots" content="noindex,nofollow">
+      "<html><head><title>test page</title>"
+          + "<meta name=\"robots\" content=\"all\"> "
+          + "<meta http-equiv=\"pragma\" content=\"no-cache\"> "
+          + "</head><body>" + " some text" + "</body></html>",
 
-  <META HTTP-EQUIV="Pragma" CONTENT="no-cache">
+      "<html><head><title>test page</title>"
+          + "<MeTa NaMe=\"RoBoTs\" CoNtEnT=\"nOnE\"> "
+          + "<MeTa HtTp-EqUiV=\"pRaGmA\" cOnTeNt=\"No-CaChE\"> "
+          + "</head><body>" + " some text" + "</body></html>",
 
-  */
+      "<html><head><title>test page</title>"
+          + "<meta name=\"robots\" content=\"none\"> " + "</head><body>"
+          + " some text" + "</body></html>",
 
+      "<html><head><title>test page</title>"
+          + "<meta name=\"robots\" content=\"noindex,nofollow\"> "
+          + "</head><body>" + " some text" + "</body></html>",
 
-  public static String[] tests= 
-  {
-    "<html><head><title>test page</title>"
-    + "<META NAME=\"ROBOTS\" CONTENT=\"NONE\"> "
-    + "<META HTTP-EQUIV=\"PRAGMA\" CONTENT=\"NO-CACHE\"> "
-    + "</head><body>"
-    + " some text"
-    + "</body></html>",
+      "<html><head><title>test page</title>"
+          + "<meta name=\"robots\" content=\"noindex,follow\"> "
+          + "</head><body>" + " some text" + "</body></html>",
 
-    "<html><head><title>test page</title>"
-    + "<meta name=\"robots\" content=\"all\"> "
-    + "<meta http-equiv=\"pragma\" content=\"no-cache\"> "
-    + "</head><body>"
-    + " some text"
-    + "</body></html>",
+      "<html><head><title>test page</title>"
+          + "<meta name=\"robots\" content=\"index,nofollow\"> "
+          + "</head><body>" + " some text" + "</body></html>",
 
-    "<html><head><title>test page</title>"
-    + "<MeTa NaMe=\"RoBoTs\" CoNtEnT=\"nOnE\"> "
-    + "<MeTa HtTp-EqUiV=\"pRaGmA\" cOnTeNt=\"No-CaChE\"> "
-    + "</head><body>"
-    + " some text"
-    + "</body></html>",
+      "<html><head><title>test page</title>"
+          + "<meta name=\"robots\" content=\"index,follow\"> "
+          + "<base href=\"http://www.nutch.org/\">" + "</head><body>"
+          + " some text" + "</body></html>",
 
-    "<html><head><title>test page</title>"
-    + "<meta name=\"robots\" content=\"none\"> "
-    + "</head><body>"
-    + " some text"
-    + "</body></html>",
+      "<html><head><title>test page</title>" + "<meta name=\"robots\"> "
+          + "<base href=\"http://www.nutch.org/base/\">" + "</head><body>"
+          + " some text" + "</body></html>",
 
-    "<html><head><title>test page</title>"
-    + "<meta name=\"robots\" content=\"noindex,nofollow\"> "
-    + "</head><body>"
-    + " some text"
-    + "</body></html>",
-
-    "<html><head><title>test page</title>"
-    + "<meta name=\"robots\" content=\"noindex,follow\"> "
-    + "</head><body>"
-    + " some text"
-    + "</body></html>",
-
-    "<html><head><title>test page</title>"
-    + "<meta name=\"robots\" content=\"index,nofollow\"> "
-    + "</head><body>"
-    + " some text"
-    + "</body></html>",
-
-    "<html><head><title>test page</title>"
-    + "<meta name=\"robots\" content=\"index,follow\"> "
-    + "<base href=\"http://www.nutch.org/\">"
-    + "</head><body>"
-    + " some text"
-    + "</body></html>",
-
-    "<html><head><title>test page</title>"
-    + "<meta name=\"robots\"> "
-    + "<base href=\"http://www.nutch.org/base/\">"
-    + "</head><body>"
-    + " some text"
-    + "</body></html>",
-
   };
 
-  public static final boolean[][] answers= {
-    {true, true, true},     // NONE
-    {false, false, true},   // all
-    {true, true, true},     // nOnE
-    {true, true, false},    // none
-    {true, true, false},    // noindex,nofollow
-    {true, false, false},   // noindex,follow
-    {false, true, false},   // index,nofollow
-    {false, false, false},  // index,follow
-    {false, false, false},  // missing!
+  public static final boolean[][] answers = { { true, true, true }, // NONE
+      { false, false, true }, // all
+      { true, true, true }, // nOnE
+      { true, true, false }, // none
+      { true, true, false }, // noindex,nofollow
+      { true, false, false }, // noindex,follow
+      { false, true, false }, // index,nofollow
+      { false, false, false }, // index,follow
+      { false, false, false }, // missing!
   };
 
   private URL[][] currURLsAndAnswers;
 
   public void testRobotsMetaProcessor() {
-    DOMFragmentParser parser= new DOMFragmentParser();;
+    DOMFragmentParser parser = new DOMFragmentParser();
+    ;
 
-    try { 
-      currURLsAndAnswers= new URL[][] {
-        {new URL("http://www.nutch.org"), null},
-        {new URL("http://www.nutch.org"), null},
-        {new URL("http://www.nutch.org"), null},
-        {new URL("http://www.nutch.org"), null},
-        {new URL("http://www.nutch.org"), null},
-        {new URL("http://www.nutch.org"), null},
-        {new URL("http://www.nutch.org"), null},
-        {new URL("http://www.nutch.org/foo/"), 
-         new URL("http://www.nutch.org/")},
-        {new URL("http://www.nutch.org"), 
-         new URL("http://www.nutch.org/base/")}
-      };
+    try {
+      currURLsAndAnswers = new URL[][] {
+          { new URL("http://www.nutch.org"), null },
+          { new URL("http://www.nutch.org"), null },
+          { new URL("http://www.nutch.org"), null },
+          { new URL("http://www.nutch.org"), null },
+          { new URL("http://www.nutch.org"), null },
+          { new URL("http://www.nutch.org"), null },
+          { new URL("http://www.nutch.org"), null },
+          { new URL("http://www.nutch.org/foo/"),
+              new URL("http://www.nutch.org/") },
+          { new URL("http://www.nutch.org"),
+              new URL("http://www.nutch.org/base/") } };
     } catch (Exception e) {
       assertTrue("couldn't make test URLs!", false);
     }
 
-    for (int i= 0; i < tests.length; i++) {
-      byte[] bytes= tests[i].getBytes();
+    for (int i = 0; i < tests.length; i++) {
+      byte[] bytes = tests[i].getBytes();
 
       DocumentFragment node = new HTMLDocumentImpl().createDocumentFragment();
 
@@ -158,24 +134,22 @@
         e.printStackTrace();
       }
 
-      HTMLMetaTags robotsMeta= new HTMLMetaTags();
-      HTMLMetaProcessor.getMetaTags(robotsMeta, node, 
-                                                  currURLsAndAnswers[i][0]);
+      HTMLMetaTags robotsMeta = new HTMLMetaTags();
+      HTMLMetaProcessor.getMetaTags(robotsMeta, node, currURLsAndAnswers[i][0]);
 
       assertTrue("got index wrong on test " + i,
-                 robotsMeta.getNoIndex() == answers[i][0]);
+          robotsMeta.getNoIndex() == answers[i][0]);
       assertTrue("got follow wrong on test " + i,
-                 robotsMeta.getNoFollow() == answers[i][1]);
+          robotsMeta.getNoFollow() == answers[i][1]);
       assertTrue("got cache wrong on test " + i,
-                 robotsMeta.getNoCache() == answers[i][2]);
-      assertTrue("got base href wrong on test " + i + " (got "
-                 + robotsMeta.getBaseHref() + ")",
-                 ( (robotsMeta.getBaseHref() == null)
-                    && (currURLsAndAnswers[i][1] == null) )
-                 || ( (robotsMeta.getBaseHref() != null)
-                      && robotsMeta.getBaseHref().equals(
-                        currURLsAndAnswers[i][1]) ) );
-      
+          robotsMeta.getNoCache() == answers[i][2]);
+      assertTrue(
+          "got base href wrong on test " + i + " (got "
+              + robotsMeta.getBaseHref() + ")",
+          ((robotsMeta.getBaseHref() == null) && (currURLsAndAnswers[i][1] == null))
+              || ((robotsMeta.getBaseHref() != null) && robotsMeta
+                  .getBaseHref().equals(currURLsAndAnswers[i][1])));
+
     }
   }
 
Index: src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestDOMContentUtils.java
===================================================================
--- src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestDOMContentUtils.java	(revision 1188268)
+++ src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestDOMContentUtils.java	(working copy)
@@ -34,320 +34,243 @@
 import org.w3c.dom.*;
 import org.apache.html.dom.*;
 
-/** 
+/**
  * Unit tests for DOMContentUtils.
  */
 public class TestDOMContentUtils extends TestCase {
 
-  private static final String[] testPages= { 
-    new String("<html><head><title> title </title><script> script </script>"
-               + "</head><body> body <a href=\"http://www.nutch.org\">"
-               + " anchor </a><!--comment-->"
-               + "</body></html>"),
-    new String("<html><head><title> title </title><script> script </script>"
-               + "</head><body> body <a href=\"/\">"
-               + " home </a><!--comment-->"
-               + "<style> style </style>"
-               + " <a href=\"bot.html\">"
-               + " bots </a>"
-               + "</body></html>"),
-    new String("<html><head><title> </title>"
-               + "</head><body> "
-               + "<a href=\"/\"> separate this "
-               + "<a href=\"ok\"> from this"
-               + "</a></a>"
-               + "</body></html>"),
-    // this one relies on certain neko fixup behavior, possibly
-    // distributing the anchors into the LI's-but not the other
-    // anchors (outside of them, instead)!  So you get a tree that
-    // looks like:
-    // ... <li> <a href=/> home </a> </li>
-    //     <li> <a href=/> <a href="1"> 1 </a> </a> </li>
-    //     <li> <a href=/> <a href="1"> <a href="2"> 2 </a> </a> </a> </li>
-    new String("<html><head><title> my title </title>"
-               + "</head><body> body "
-               + "<ul>"
-               + "<li> <a href=\"/\"> home"
-               + "<li> <a href=\"1\"> 1"
-               + "<li> <a href=\"2\"> 2"
-               + "</ul>"
-               + "</body></html>"),
-    // test frameset link extraction. The invalid frame in the middle will be
-    // fixed to a third standalone frame.
-    new String("<html><head><title> my title </title>"
-               + "</head><frameset rows=\"20,*\"> "
-               + "<frame src=\"top.html\">"
-               + "</frame>"
-               + "<frameset cols=\"20,*\">"
-               + "<frame src=\"left.html\">"
-               + "<frame src=\"invalid.html\"/>"
-               + "</frame>"
-               + "<frame src=\"right.html\">"
-               + "</frame>"
-               + "</frameset>"
-               + "</frameset>"
-               + "</body></html>"),
-    // test <area> and <iframe> link extraction + url normalization
-    new String("<html><head><title> my title </title>"
-               + "</head><body>"
-               + "<img src=\"logo.gif\" usemap=\"#green\" border=\"0\">"
-			   + "<map name=\"green\">"
-			   + "<area shape=\"polygon\" coords=\"19,44,45,11,87\" href=\"../index.html\">"
-			   + "<area shape=\"rect\" coords=\"128,132,241,179\" href=\"#bottom\">"
-			   + "<area shape=\"circle\" coords=\"68,211,35\" href=\"../bot.html\">"
-			   + "</map>"
-               + "<a name=\"bottom\"/><h1> the bottom </h1> "
-               + "<iframe src=\"../docs/index.html\"/>"
-               + "</body></html>"),
-    // test whitespace processing for plain text extraction
-    new String("<html><head>\n <title> my\t\n  title\r\n </title>\n"
-               + " </head>\n"
-               + " <body>\n"
-               + "    <h1> Whitespace\ttest  </h1> \n"
-               + "\t<a href=\"../index.html\">\n  \twhitespace  test\r\n\t</a>  \t\n"
-               + "    <p> This is<span> a whitespace<span></span> test</span>. Newlines\n"
-               + "should appear as space too.</p><p>Tabs\tare spaces too.\n</p>"
-               + "    This\t<b>is a</b> break -&gt;<br>and the line after<i> break</i>.<br>\n"
-               + "<table>"
-               + "    <tr><td>one</td><td>two</td><td>three</td></tr>\n"
-               + "    <tr><td>space here </td><td> space there</td><td>no space</td></tr>"
-               + "\t<tr><td>one\r\ntwo</td><td>two\tthree</td><td>three\r\tfour</td></tr>\n"
-               + "</table>put some text here<Br>and there."
-               + "<h2>End\tthis\rmadness\n!</h2>\r\n"
-               + "         .        .        .         ."
-               + "</body>  </html>"),
+  private static final String[] testPages = {
+      new String("<html><head><title> title </title><script> script </script>"
+          + "</head><body> body <a href=\"http://www.nutch.org\">"
+          + " anchor </a><!--comment-->" + "</body></html>"),
+      new String("<html><head><title> title </title><script> script </script>"
+          + "</head><body> body <a href=\"/\">" + " home </a><!--comment-->"
+          + "<style> style </style>" + " <a href=\"bot.html\">" + " bots </a>"
+          + "</body></html>"),
+      new String("<html><head><title> </title>" + "</head><body> "
+          + "<a href=\"/\"> separate this " + "<a href=\"ok\"> from this"
+          + "</a></a>" + "</body></html>"),
+      // this one relies on certain neko fixup behavior, possibly
+      // distributing the anchors into the LI's-but not the other
+      // anchors (outside of them, instead)! So you get a tree that
+      // looks like:
+      // ... <li> <a href=/> home </a> </li>
+      // <li> <a href=/> <a href="1"> 1 </a> </a> </li>
+      // <li> <a href=/> <a href="1"> <a href="2"> 2 </a> </a> </a> </li>
+      new String("<html><head><title> my title </title>"
+          + "</head><body> body " + "<ul>" + "<li> <a href=\"/\"> home"
+          + "<li> <a href=\"1\"> 1" + "<li> <a href=\"2\"> 2" + "</ul>"
+          + "</body></html>"),
+      // test frameset link extraction. The invalid frame in the middle will be
+      // fixed to a third standalone frame.
+      new String("<html><head><title> my title </title>"
+          + "</head><frameset rows=\"20,*\"> " + "<frame src=\"top.html\">"
+          + "</frame>" + "<frameset cols=\"20,*\">"
+          + "<frame src=\"left.html\">" + "<frame src=\"invalid.html\"/>"
+          + "</frame>" + "<frame src=\"right.html\">" + "</frame>"
+          + "</frameset>" + "</frameset>" + "</body></html>"),
+      // test <area> and <iframe> link extraction + url normalization
+      new String(
+          "<html><head><title> my title </title>"
+              + "</head><body>"
+              + "<img src=\"logo.gif\" usemap=\"#green\" border=\"0\">"
+              + "<map name=\"green\">"
+              + "<area shape=\"polygon\" coords=\"19,44,45,11,87\" href=\"../index.html\">"
+              + "<area shape=\"rect\" coords=\"128,132,241,179\" href=\"#bottom\">"
+              + "<area shape=\"circle\" coords=\"68,211,35\" href=\"../bot.html\">"
+              + "</map>" + "<a name=\"bottom\"/><h1> the bottom </h1> "
+              + "<iframe src=\"../docs/index.html\"/>" + "</body></html>"),
+      // test whitespace processing for plain text extraction
+      new String(
+          "<html><head>\n <title> my\t\n  title\r\n </title>\n"
+              + " </head>\n"
+              + " <body>\n"
+              + "    <h1> Whitespace\ttest  </h1> \n"
+              + "\t<a href=\"../index.html\">\n  \twhitespace  test\r\n\t</a>  \t\n"
+              + "    <p> This is<span> a whitespace<span></span> test</span>. Newlines\n"
+              + "should appear as space too.</p><p>Tabs\tare spaces too.\n</p>"
+              + "    This\t<b>is a</b> break -&gt;<br>and the line after<i> break</i>.<br>\n"
+              + "<table>"
+              + "    <tr><td>one</td><td>two</td><td>three</td></tr>\n"
+              + "    <tr><td>space here </td><td> space there</td><td>no space</td></tr>"
+              + "\t<tr><td>one\r\ntwo</td><td>two\tthree</td><td>three\r\tfour</td></tr>\n"
+              + "</table>put some text here<Br>and there."
+              + "<h2>End\tthis\rmadness\n!</h2>\r\n"
+              + "         .        .        .         ." + "</body>  </html>"),
 
-    // test that <a rel=nofollow> links are not returned
-    new String("<html><head></head><body>"
-               + "<a href=\"http://www.nutch.org\" rel=\"nofollow\"> ignore </a>"
-               + "<a rel=\"nofollow\" href=\"http://www.nutch.org\"> ignore </a>"
-               + "</body></html>"),
-    // test that POST form actions are skipped
-    new String("<html><head></head><body>"
-            + "<form method='POST' action='/search.jsp'><input type=text>"
-            + "<input type=submit><p>test1</p></form>"
-            + "<form method='GET' action='/dummy.jsp'><input type=text>"
-            + "<input type=submit><p>test2</p></form></body></html>"),
-    // test that all form actions are skipped
-    new String("<html><head></head><body>"
-            + "<form method='POST' action='/search.jsp'><input type=text>"
-            + "<input type=submit><p>test1</p></form>"
-            + "<form method='GET' action='/dummy.jsp'><input type=text>"
-            + "<input type=submit><p>test2</p></form></body></html>"),
-    new String("<html><head><title> title </title>"
-      + "</head><body>"
-      + "<a href=\";x\">anchor1</a>"
-      + "<a href=\"g;x\">anchor2</a>"
-      + "<a href=\"g;x?y#s\">anchor3</a>"
-      + "</body></html>"),  
-    new String("<html><head><title> title </title>"
-        + "</head><body>"
-        + "<a href=\"g\">anchor1</a>"
-        + "<a href=\"g?y#s\">anchor2</a>"
-        + "<a href=\"?y=1\">anchor3</a>"
-        + "<a href=\"?y=1#s\">anchor4</a>"
-        + "<a href=\"?y=1;somethingelse\">anchor5</a>"
-        + "</body></html>"), 
-  };
-  
+      // test that <a rel=nofollow> links are not returned
+      new String("<html><head></head><body>"
+          + "<a href=\"http://www.nutch.org\" rel=\"nofollow\"> ignore </a>"
+          + "<a rel=\"nofollow\" href=\"http://www.nutch.org\"> ignore </a>"
+          + "</body></html>"),
+      // test that POST form actions are skipped
+      new String("<html><head></head><body>"
+          + "<form method='POST' action='/search.jsp'><input type=text>"
+          + "<input type=submit><p>test1</p></form>"
+          + "<form method='GET' action='/dummy.jsp'><input type=text>"
+          + "<input type=submit><p>test2</p></form></body></html>"),
+      // test that all form actions are skipped
+      new String("<html><head></head><body>"
+          + "<form method='POST' action='/search.jsp'><input type=text>"
+          + "<input type=submit><p>test1</p></form>"
+          + "<form method='GET' action='/dummy.jsp'><input type=text>"
+          + "<input type=submit><p>test2</p></form></body></html>"),
+      new String("<html><head><title> title </title>" + "</head><body>"
+          + "<a href=\";x\">anchor1</a>" + "<a href=\"g;x\">anchor2</a>"
+          + "<a href=\"g;x?y#s\">anchor3</a>" + "</body></html>"),
+      new String("<html><head><title> title </title>" + "</head><body>"
+          + "<a href=\"g\">anchor1</a>" + "<a href=\"g?y#s\">anchor2</a>"
+          + "<a href=\"?y=1\">anchor3</a>" + "<a href=\"?y=1#s\">anchor4</a>"
+          + "<a href=\"?y=1;somethingelse\">anchor5</a>" + "</body></html>"), };
+
   private static int SKIP = 9;
 
-  private static String[] testBaseHrefs= {
-    "http://www.nutch.org",     
-    "http://www.nutch.org/docs/foo.html",     
-    "http://www.nutch.org/docs/",     
-    "http://www.nutch.org/docs/",
-    "http://www.nutch.org/frames/",     
-    "http://www.nutch.org/maps/",
-    "http://www.nutch.org/whitespace/",
-    "http://www.nutch.org//",
-    "http://www.nutch.org/",
-    "http://www.nutch.org/",
-    "http://www.nutch.org/",
-    "http://www.nutch.org/;something"
-  };
-    
-  private static final DocumentFragment testDOMs[]=
-    new DocumentFragment[testPages.length];
+  private static String[] testBaseHrefs = { "http://www.nutch.org",
+      "http://www.nutch.org/docs/foo.html", "http://www.nutch.org/docs/",
+      "http://www.nutch.org/docs/", "http://www.nutch.org/frames/",
+      "http://www.nutch.org/maps/", "http://www.nutch.org/whitespace/",
+      "http://www.nutch.org//", "http://www.nutch.org/",
+      "http://www.nutch.org/", "http://www.nutch.org/",
+      "http://www.nutch.org/;something" };
 
-  private static URL[] testBaseHrefURLs= 
-    new URL[testPages.length];
+  private static final DocumentFragment testDOMs[] = new DocumentFragment[testPages.length];
 
+  private static URL[] testBaseHrefURLs = new URL[testPages.length];
 
-  private static final String[] answerText= {
-    "title body anchor",
-    "title body home bots",
-    "separate this from this",
-    "my title body home 1 2",
-    "my title",
-    "my title the bottom",
-    "my title Whitespace test whitespace test "
-        + "This is a whitespace test . Newlines should appear as space too. "
-        + "Tabs are spaces too. This is a break -> and the line after break . "
-        + "one two three space here space there no space "
-        + "one two two three three four put some text here and there. "
-        + "End this madness ! . . . .",
-    "ignore ignore",
-    "test1 test2",
-    "test1 test2",
-    "title anchor1 anchor2 anchor3",
-    "title anchor1 anchor2 anchor3 anchor4 anchor5"
-  };
+  private static final String[] answerText = {
+      "title body anchor",
+      "title body home bots",
+      "separate this from this",
+      "my title body home 1 2",
+      "my title",
+      "my title the bottom",
+      "my title Whitespace test whitespace test "
+          + "This is a whitespace test . Newlines should appear as space too. "
+          + "Tabs are spaces too. This is a break -> and the line after break . "
+          + "one two three space here space there no space "
+          + "one two two three three four put some text here and there. "
+          + "End this madness ! . . . .", "ignore ignore", "test1 test2",
+      "test1 test2", "title anchor1 anchor2 anchor3",
+      "title anchor1 anchor2 anchor3 anchor4 anchor5" };
 
-  private static final String[] answerTitle= {
-    "title",
-    "title",
-    "",
-    "my title",
-    "my title",
-    "my title",
-    "my title",
-    "",
-    "",
-    "",
-    "title",
-    "title"
-  };
+  private static final String[] answerTitle = { "title", "title", "",
+      "my title", "my title", "my title", "my title", "", "", "", "title",
+      "title" };
 
   // note: should be in page-order
   private static Outlink[][] answerOutlinks;
-  
+
   private static Configuration conf;
   private static DOMContentUtils utils = null;
-  
-  public TestDOMContentUtils(String name) { 
-    super(name); 
+
+  public TestDOMContentUtils(String name) {
+    super(name);
   }
 
   private static void setup() {
     conf = NutchConfiguration.create();
     conf.setBoolean("parser.html.form.use_action", true);
     utils = new DOMContentUtils(conf);
-    DOMFragmentParser parser= new DOMFragmentParser();
-    for (int i= 0; i < testPages.length; i++) {
-        DocumentFragment node= 
-          new HTMLDocumentImpl().createDocumentFragment();
-        try {
-          parser.parse(
-            new InputSource( 
-              new ByteArrayInputStream(testPages[i].getBytes()) ),
+    DOMFragmentParser parser = new DOMFragmentParser();
+    for (int i = 0; i < testPages.length; i++) {
+      DocumentFragment node = new HTMLDocumentImpl().createDocumentFragment();
+      try {
+        parser.parse(
+            new InputSource(new ByteArrayInputStream(testPages[i].getBytes())),
             node);
-          testBaseHrefURLs[i]= new URL(testBaseHrefs[i]);
-        } catch (Exception e) {
-          assertTrue("caught exception: " + e, false);
-        } 
-      testDOMs[i]= node;
+        testBaseHrefURLs[i] = new URL(testBaseHrefs[i]);
+      } catch (Exception e) {
+        assertTrue("caught exception: " + e, false);
+      }
+      testDOMs[i] = node;
     }
     try {
-    answerOutlinks = new Outlink[][]{ 
-        {
-          new Outlink("http://www.nutch.org", "anchor"),
-        },
-        {
-          new Outlink("http://www.nutch.org/", "home"),
-          new Outlink("http://www.nutch.org/docs/bot.html", "bots"),
-        },
-        {
-          new Outlink("http://www.nutch.org/", "separate this"),
-          new Outlink("http://www.nutch.org/docs/ok", "from this"),
-        },
-        {
-          new Outlink("http://www.nutch.org/", "home"),
-          new Outlink("http://www.nutch.org/docs/1", "1"),
-          new Outlink("http://www.nutch.org/docs/2", "2"),
-        },
-        {
-          new Outlink("http://www.nutch.org/frames/top.html", ""),
-          new Outlink("http://www.nutch.org/frames/left.html", ""),
-          new Outlink("http://www.nutch.org/frames/invalid.html", ""),
-          new Outlink("http://www.nutch.org/frames/right.html", ""),
-        },
-        {
-          new Outlink("http://www.nutch.org/maps/logo.gif", ""),
-          new Outlink("http://www.nutch.org/index.html", ""),
-          new Outlink("http://www.nutch.org/maps/#bottom", ""),
-          new Outlink("http://www.nutch.org/bot.html", ""),
-          new Outlink("http://www.nutch.org/docs/index.html", ""),
-        },
-        {
-          new Outlink("http://www.nutch.org/index.html", "whitespace test"),
-        },
-        {
-        },
-        {
-          new Outlink("http://www.nutch.org/dummy.jsp", "test2"),
-        },
-        {
-        },
-        {
-          new Outlink("http://www.nutch.org/;x", "anchor1"),
-          new Outlink("http://www.nutch.org/g;x", "anchor2"),
-          new Outlink("http://www.nutch.org/g;x?y#s", "anchor3")
-        },
-        {
-          new Outlink("http://www.nutch.org/g;something", "anchor1"),
-          new Outlink("http://www.nutch.org/g;something?y#s", "anchor2"),
-          new Outlink("http://www.nutch.org/;something?y=1", "anchor3"),
-          new Outlink("http://www.nutch.org/;something?y=1#s", "anchor4"),
-          new Outlink("http://www.nutch.org/?y=1;somethingelse", "anchor5")
-        }
-    };
+      answerOutlinks = new Outlink[][] {
+          { new Outlink("http://www.nutch.org", "anchor"), },
+          { new Outlink("http://www.nutch.org/", "home"),
+              new Outlink("http://www.nutch.org/docs/bot.html", "bots"), },
+          { new Outlink("http://www.nutch.org/", "separate this"),
+              new Outlink("http://www.nutch.org/docs/ok", "from this"), },
+          { new Outlink("http://www.nutch.org/", "home"),
+              new Outlink("http://www.nutch.org/docs/1", "1"),
+              new Outlink("http://www.nutch.org/docs/2", "2"), },
+          { new Outlink("http://www.nutch.org/frames/top.html", ""),
+              new Outlink("http://www.nutch.org/frames/left.html", ""),
+              new Outlink("http://www.nutch.org/frames/invalid.html", ""),
+              new Outlink("http://www.nutch.org/frames/right.html", ""), },
+          { new Outlink("http://www.nutch.org/maps/logo.gif", ""),
+              new Outlink("http://www.nutch.org/index.html", ""),
+              new Outlink("http://www.nutch.org/maps/#bottom", ""),
+              new Outlink("http://www.nutch.org/bot.html", ""),
+              new Outlink("http://www.nutch.org/docs/index.html", ""), },
+          { new Outlink("http://www.nutch.org/index.html", "whitespace test"), },
+          {},
+          { new Outlink("http://www.nutch.org/dummy.jsp", "test2"), },
+          {},
+          { new Outlink("http://www.nutch.org/;x", "anchor1"),
+              new Outlink("http://www.nutch.org/g;x", "anchor2"),
+              new Outlink("http://www.nutch.org/g;x?y#s", "anchor3") },
+          { new Outlink("http://www.nutch.org/g;something", "anchor1"),
+              new Outlink("http://www.nutch.org/g;something?y#s", "anchor2"),
+              new Outlink("http://www.nutch.org/;something?y=1", "anchor3"),
+              new Outlink("http://www.nutch.org/;something?y=1#s", "anchor4"),
+              new Outlink("http://www.nutch.org/?y=1;somethingelse", "anchor5") } };
 
     } catch (MalformedURLException e) {
-        
+
+    }
   }
-  }
 
   private static boolean equalsIgnoreWhitespace(String s1, String s2) {
-    StringTokenizer st1= new StringTokenizer(s1);
-    StringTokenizer st2= new StringTokenizer(s2);
+    StringTokenizer st1 = new StringTokenizer(s1);
+    StringTokenizer st2 = new StringTokenizer(s2);
 
     while (st1.hasMoreTokens()) {
-      if (!st2.hasMoreTokens()) 
+      if (!st2.hasMoreTokens())
         return false;
-      if ( ! st1.nextToken().equals(st2.nextToken()) )
+      if (!st1.nextToken().equals(st2.nextToken()))
         return false;
     }
-    if (st2.hasMoreTokens()) 
+    if (st2.hasMoreTokens())
       return false;
     return true;
   }
 
   public void testGetText() {
-    if (testDOMs[0] == null) 
+    if (testDOMs[0] == null)
       setup();
-    for (int i= 0; i < testPages.length; i++) {
-      StringBuilder sb= new StringBuilder();
+    for (int i = 0; i < testPages.length; i++) {
+      StringBuilder sb = new StringBuilder();
       utils.getText(sb, testDOMs[i]);
-      String text= sb.toString();
-      assertTrue("expecting text: " + answerText[i] 
-                 + System.getProperty("line.separator") 
-                 + System.getProperty("line.separator") 
-                 + "got text: "+ text, 
-                 equalsIgnoreWhitespace(answerText[i], text));
+      String text = sb.toString();
+      assertTrue(
+          "expecting text: " + answerText[i]
+              + System.getProperty("line.separator")
+              + System.getProperty("line.separator") + "got text: " + text,
+          equalsIgnoreWhitespace(answerText[i], text));
     }
   }
 
   public void testGetTitle() {
-    if (testDOMs[0] == null) 
+    if (testDOMs[0] == null)
       setup();
-    for (int i= 0; i < testPages.length; i++) {
-      StringBuilder sb= new StringBuilder();
+    for (int i = 0; i < testPages.length; i++) {
+      StringBuilder sb = new StringBuilder();
       utils.getTitle(sb, testDOMs[i]);
-      String text= sb.toString();
-      assertTrue("expecting text: " + answerText[i] 
-                 + System.getProperty("line.separator") 
-                 + System.getProperty("line.separator") 
-                 + "got text: "+ text, 
-                 equalsIgnoreWhitespace(answerTitle[i], text));
+      String text = sb.toString();
+      assertTrue(
+          "expecting text: " + answerText[i]
+              + System.getProperty("line.separator")
+              + System.getProperty("line.separator") + "got text: " + text,
+          equalsIgnoreWhitespace(answerTitle[i], text));
     }
   }
 
   public void testGetOutlinks() {
-    if (testDOMs[0] == null) 
+    if (testDOMs[0] == null)
       setup();
-    for (int i= 0; i < testPages.length; i++) {
-      ArrayList<Outlink> outlinks= new ArrayList<Outlink>();
+    for (int i = 0; i < testPages.length; i++) {
+      ArrayList<Outlink> outlinks = new ArrayList<Outlink>();
       if (i == SKIP) {
         conf.setBoolean("parser.html.form.use_action", false);
         utils.setConf(conf);
@@ -356,52 +279,48 @@
         utils.setConf(conf);
       }
       utils.getOutlinks(testBaseHrefURLs[i], outlinks, testDOMs[i]);
-      Outlink[] outlinkArr= new Outlink[outlinks.size()];
-      outlinkArr= outlinks.toArray(outlinkArr);
+      Outlink[] outlinkArr = new Outlink[outlinks.size()];
+      outlinkArr = outlinks.toArray(outlinkArr);
       compareOutlinks(answerOutlinks[i], outlinkArr);
     }
   }
 
   private static final void appendOutlinks(StringBuffer sb, Outlink[] o) {
-    for (int i= 0; i < o.length; i++) {
+    for (int i = 0; i < o.length; i++) {
       sb.append(o[i].toString());
       sb.append(System.getProperty("line.separator"));
     }
   }
 
   private static final String outlinksString(Outlink[] o) {
-    StringBuffer sb= new StringBuffer();
+    StringBuffer sb = new StringBuffer();
     appendOutlinks(sb, o);
     return sb.toString();
   }
 
   private static final void compareOutlinks(Outlink[] o1, Outlink[] o2) {
     if (o1.length != o2.length) {
-      assertTrue("got wrong number of outlinks (expecting " + o1.length 
-                 + ", got " + o2.length + ")" 
-                 + System.getProperty("line.separator") 
-                 + "answer: " + System.getProperty("line.separator") 
-                 + outlinksString(o1) 
-                 + System.getProperty("line.separator") 
-                 + "got: " + System.getProperty("line.separator") 
-                 + outlinksString(o2)
-                 + System.getProperty("line.separator"),
-                 false
-        );
+      assertTrue(
+          "got wrong number of outlinks (expecting " + o1.length + ", got "
+              + o2.length + ")" + System.getProperty("line.separator")
+              + "answer: " + System.getProperty("line.separator")
+              + outlinksString(o1) + System.getProperty("line.separator")
+              + "got: " + System.getProperty("line.separator")
+              + outlinksString(o2) + System.getProperty("line.separator"),
+          false);
     }
 
-    for (int i= 0; i < o1.length; i++) {
+    for (int i = 0; i < o1.length; i++) {
       if (!o1[i].equals(o2[i])) {
-        assertTrue("got wrong outlinks at position " + i
-                   + System.getProperty("line.separator") 
-                   + "answer: " + System.getProperty("line.separator") 
-                   + "'" + o1[i].getToUrl() + "', anchor: '" + o1[i].getAnchor() + "'"
-                   + System.getProperty("line.separator") 
-                   + "got: " + System.getProperty("line.separator") 
-                   + "'" + o2[i].getToUrl() + "', anchor: '" + o2[i].getAnchor() + "'",
-                   false
-          );
-        
+        assertTrue(
+            "got wrong outlinks at position " + i
+                + System.getProperty("line.separator") + "answer: "
+                + System.getProperty("line.separator") + "'" + o1[i].getToUrl()
+                + "', anchor: '" + o1[i].getAnchor() + "'"
+                + System.getProperty("line.separator") + "got: "
+                + System.getProperty("line.separator") + "'" + o2[i].getToUrl()
+                + "', anchor: '" + o2[i].getAnchor() + "'", false);
+
       }
     }
   }
Index: src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java
===================================================================
--- src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java	(revision 1188268)
+++ src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java	(working copy)
@@ -62,20 +62,20 @@
 import org.xml.sax.SAXException;
 
 public class HtmlParser implements Parser {
-  public static final Logger LOG = LoggerFactory.getLogger("org.apache.nutch.parse.html");
+  public static final Logger LOG = LoggerFactory
+      .getLogger("org.apache.nutch.parse.html");
 
-  // I used 1000 bytes at first, but  found that some documents have
+  // I used 1000 bytes at first, but found that some documents have
   // meta tag well past the first 1000 bytes.
   // (e.g. http://cn.promo.yahoo.com/customcare/music.html)
   private static final int CHUNK_SIZE = 2000;
 
   // NUTCH-1006 Meta equiv with single quotes not accepted
-  private static Pattern metaPattern =
-    Pattern.compile("<meta\\s+([^>]*http-equiv=(\"|')?content-type(\"|')?[^>]*)>",
-        Pattern.CASE_INSENSITIVE);
-  private static Pattern charsetPattern =
-    Pattern.compile("charset=\\s*([a-z][_\\-0-9a-z]*)",
-        Pattern.CASE_INSENSITIVE);
+  private static Pattern metaPattern = Pattern.compile(
+      "<meta\\s+([^>]*http-equiv=(\"|')?content-type(\"|')?[^>]*)>",
+      Pattern.CASE_INSENSITIVE);
+  private static Pattern charsetPattern = Pattern.compile(
+      "charset=\\s*([a-z][_\\-0-9a-z]*)", Pattern.CASE_INSENSITIVE);
 
   private static Collection<WebPage.Field> FIELDS = new HashSet<WebPage.Field>();
 
@@ -87,23 +87,21 @@
 
   /**
    * Given a <code>byte[]</code> representing an html file of an
-   * <em>unknown</em> encoding,  read out 'charset' parameter in the meta tag
-   * from the first <code>CHUNK_SIZE</code> bytes.
-   * If there's no meta tag for Content-Type or no charset is specified,
-   * <code>null</code> is returned.  <br />
-   * FIXME: non-byte oriented character encodings (UTF-16, UTF-32)
-   * can't be handled with this.
-   * We need to do something similar to what's done by mozilla
-   * (http://lxr.mozilla.org/seamonkey/source/parser/htmlparser/src/nsParser.cpp#1993).
-   * See also http://www.w3.org/TR/REC-xml/#sec-guessing
-   * <br />
-   *
-   * @param content <code>byte[]</code> representation of an html file
+   * <em>unknown</em> encoding, read out 'charset' parameter in the meta tag
+   * from the first <code>CHUNK_SIZE</code> bytes. If there's no meta tag for
+   * Content-Type or no charset is specified, <code>null</code> is returned. <br />
+   * FIXME: non-byte oriented character encodings (UTF-16, UTF-32) can't be
+   * handled with this. We need to do something similar to what's done by
+   * mozilla
+   * (http://lxr.mozilla.org/seamonkey/source/parser/htmlparser/src/nsParser
+   * .cpp#1993). See also http://www.w3.org/TR/REC-xml/#sec-guessing <br />
+   * 
+   * @param content
+   *          <code>byte[]</code> representation of an html file
    */
 
   private static String sniffCharacterEncoding(byte[] content) {
-    int length = content.length < CHUNK_SIZE ?
-        content.length : CHUNK_SIZE;
+    int length = content.length < CHUNK_SIZE ? content.length : CHUNK_SIZE;
 
     // We don't care about non-ASCII parts so that it's sufficient
     // to just inflate each byte to a 16-bit value by padding.
@@ -111,8 +109,7 @@
     // {U+0041, U+0082, U+00B7}.
     String str = "";
     try {
-      str = new String(content, 0, length,
-          Charset.forName("ASCII").toString());
+      str = new String(content, 0, length, Charset.forName("ASCII").toString());
     } catch (UnsupportedEncodingException e) {
       // code should never come here, but just in case...
       return null;
@@ -159,7 +156,8 @@
     DocumentFragment root;
     try {
       byte[] contentInOctets = page.getContent().array();
-      InputSource input = new InputSource(new ByteArrayInputStream(contentInOctets));
+      InputSource input = new InputSource(new ByteArrayInputStream(
+          contentInOctets));
 
       EncodingDetector detector = new EncodingDetector(conf);
       detector.autoDetectClues(page, true);
@@ -170,7 +168,9 @@
       metadata.set(Metadata.CHAR_ENCODING_FOR_CONVERSION, encoding);
 
       input.setEncoding(encoding);
-      if (LOG.isTraceEnabled()) { LOG.trace("Parsing..."); }
+      if (LOG.isTraceEnabled()) {
+        LOG.trace("Parsing...");
+      }
       root = parse(input);
     } catch (IOException e) {
       return ParseStatusUtils.getEmptyParse(e, getConf());
@@ -189,25 +189,31 @@
       LOG.trace("Meta tags for " + base + ": " + metaTags.toString());
     }
     // check meta directives
-    if (!metaTags.getNoIndex()) {               // okay to index
+    if (!metaTags.getNoIndex()) { // okay to index
       StringBuilder sb = new StringBuilder();
-      if (LOG.isTraceEnabled()) { LOG.trace("Getting text..."); }
-      utils.getText(sb, root);          // extract text
+      if (LOG.isTraceEnabled()) {
+        LOG.trace("Getting text...");
+      }
+      utils.getText(sb, root); // extract text
       text = sb.toString();
       sb.setLength(0);
-      if (LOG.isTraceEnabled()) { LOG.trace("Getting title..."); }
-      utils.getTitle(sb, root);         // extract title
+      if (LOG.isTraceEnabled()) {
+        LOG.trace("Getting title...");
+      }
+      utils.getTitle(sb, root); // extract title
       title = sb.toString().trim();
     }
 
-    if (!metaTags.getNoFollow()) {              // okay to follow links
-      ArrayList<Outlink> l = new ArrayList<Outlink>();   // extract outlinks
+    if (!metaTags.getNoFollow()) { // okay to follow links
+      ArrayList<Outlink> l = new ArrayList<Outlink>(); // extract outlinks
       URL baseTag = utils.getBase(root);
-      if (LOG.isTraceEnabled()) { LOG.trace("Getting links..."); }
-      utils.getOutlinks(baseTag!=null?baseTag:base, l, root);
+      if (LOG.isTraceEnabled()) {
+        LOG.trace("Getting links...");
+      }
+      utils.getOutlinks(baseTag != null ? baseTag : base, l, root);
       outlinks = l.toArray(new Outlink[l.size()]);
       if (LOG.isTraceEnabled()) {
-        LOG.trace("found "+outlinks.length+" outlinks in "+ url);
+        LOG.trace("found " + outlinks.length + " outlinks in " + url);
       }
     }
 
@@ -222,7 +228,7 @@
     Parse parse = new Parse(text, title, outlinks, status);
     parse = htmlParseFilters.filter(url, page, parse, metaTags, root);
 
-    if (metaTags.getNoCache()) {             // not okay to cache
+    if (metaTags.getNoCache()) { // not okay to cache
       page.putToMetadata(new Utf8(Nutch.CACHING_FORBIDDEN_KEY),
           ByteBuffer.wrap(Bytes.toBytes(cachingPolicy)));
     }
@@ -233,7 +239,8 @@
   private DocumentFragment parse(InputSource input) throws Exception {
     if (parserImpl.equalsIgnoreCase("tagsoup"))
       return parseTagSoup(input);
-    else return parseNeko(input);
+    else
+      return parseNeko(input);
   }
 
   private DocumentFragment parseTagSoup(InputSource input) throws Exception {
@@ -244,7 +251,8 @@
     reader.setContentHandler(builder);
     reader.setFeature(org.ccil.cowan.tagsoup.Parser.ignoreBogonsFeature, true);
     reader.setFeature(org.ccil.cowan.tagsoup.Parser.bogonsEmptyFeature, false);
-    reader.setProperty("http://xml.org/sax/properties/lexical-handler", builder);
+    reader
+        .setProperty("http://xml.org/sax/properties/lexical-handler", builder);
     reader.parse(input);
     return frag;
   }
@@ -254,17 +262,24 @@
     try {
       parser.setFeature("http://cyberneko.org/html/features/augmentations",
           true);
-      parser.setProperty("http://cyberneko.org/html/properties/default-encoding",
+      parser.setProperty(
+          "http://cyberneko.org/html/properties/default-encoding",
           defaultCharEncoding);
-      parser.setFeature("http://cyberneko.org/html/features/scanner/ignore-specified-charset",
+      parser
+          .setFeature(
+              "http://cyberneko.org/html/features/scanner/ignore-specified-charset",
+              true);
+      parser
+          .setFeature(
+              "http://cyberneko.org/html/features/balance-tags/ignore-outside-content",
+              false);
+      parser.setFeature(
+          "http://cyberneko.org/html/features/balance-tags/document-fragment",
           true);
-      parser.setFeature("http://cyberneko.org/html/features/balance-tags/ignore-outside-content",
-          false);
-      parser.setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment",
-          true);
       parser.setFeature("http://cyberneko.org/html/features/report-errors",
           LOG.isTraceEnabled());
-    } catch (SAXException e) {}
+    } catch (SAXException e) {
+    }
     // convert Document to DocumentFragment
     HTMLDocumentImpl doc = new HTMLDocumentImpl();
     doc.setErrorChecking(false);
@@ -274,16 +289,21 @@
     res.appendChild(frag);
 
     try {
-      while(true) {
+      while (true) {
         frag = doc.createDocumentFragment();
         parser.parse(input, frag);
-        if (!frag.hasChildNodes()) break;
+        if (!frag.hasChildNodes())
+          break;
         if (LOG.isInfoEnabled()) {
-          LOG.info(" - new frag, " + frag.getChildNodes().getLength() + " nodes.");
+          LOG.info(" - new frag, " + frag.getChildNodes().getLength()
+              + " nodes.");
         }
         res.appendChild(frag);
       }
-    } catch (Exception x) { x.printStackTrace(LogUtil.getWarnStream(LOG));};
+    } catch (Exception x) {
+      x.printStackTrace(LogUtil.getWarnStream(LOG));
+    }
+    ;
     return res;
   }
 
@@ -308,11 +328,11 @@
   }
 
   public static void main(String[] args) throws Exception {
-    //LOG.setLevel(Level.FINE);
+    // LOG.setLevel(Level.FINE);
     String name = args[0];
-    String url = "file:"+name;
+    String url = "file:" + name;
     File file = new File(name);
-    byte[] bytes = new byte[(int)file.length()];
+    byte[] bytes = new byte[(int) file.length()];
     DataInputStream in = new DataInputStream(new FileInputStream(file));
     in.readFully(bytes);
     Configuration conf = NutchConfiguration.create();
@@ -323,8 +343,8 @@
     page.setContent(ByteBuffer.wrap(bytes));
     page.setContentType(new Utf8("text/html"));
     Parse parse = parser.getParse(url, page);
-    System.out.println("title: "+parse.getTitle());
-    System.out.println("text: "+parse.getText());
+    System.out.println("title: " + parse.getTitle());
+    System.out.println("text: " + parse.getText());
     System.out.println("outlinks: " + Arrays.toString(parse.getOutlinks()));
 
   }
Index: src/plugin/parse-html/src/java/org/apache/nutch/parse/html/XMLCharacterRecognizer.java
===================================================================
--- src/plugin/parse-html/src/java/org/apache/nutch/parse/html/XMLCharacterRecognizer.java	(revision 1188268)
+++ src/plugin/parse-html/src/java/org/apache/nutch/parse/html/XMLCharacterRecognizer.java	(working copy)
@@ -26,40 +26,42 @@
 package org.apache.nutch.parse.html;
 
 /**
- * Class used to verify whether the specified <var>ch</var> 
- * conforms to the XML 1.0 definition of whitespace. 
+ * Class used to verify whether the specified <var>ch</var> conforms to the XML
+ * 1.0 definition of whitespace.
  */
-public class XMLCharacterRecognizer
-{
+public class XMLCharacterRecognizer {
 
   /**
-   * Returns whether the specified <var>ch</var> conforms to the XML 1.0 definition
-   * of whitespace.  Refer to <A href="http://www.w3.org/TR/1998/REC-xml-19980210#NT-S">
-   * the definition of <CODE>S</CODE></A> for details.
-   * @param ch Character to check as XML whitespace.
+   * Returns whether the specified <var>ch</var> conforms to the XML 1.0
+   * definition of whitespace. Refer to <A
+   * href="http://www.w3.org/TR/1998/REC-xml-19980210#NT-S"> the definition of
+   * <CODE>S</CODE></A> for details.
+   * 
+   * @param ch
+   *          Character to check as XML whitespace.
    * @return =true if <var>ch</var> is XML whitespace; otherwise =false.
    */
-  public static boolean isWhiteSpace(char ch)
-  {
+  public static boolean isWhiteSpace(char ch) {
     return (ch == 0x20) || (ch == 0x09) || (ch == 0xD) || (ch == 0xA);
   }
 
   /**
    * Tell if the string is whitespace.
-   *
-   * @param ch Character array to check as XML whitespace.
-   * @param start Start index of characters in the array
-   * @param length Number of characters in the array 
-   * @return True if the characters in the array are 
-   * XML whitespace; otherwise, false.
+   * 
+   * @param ch
+   *          Character array to check as XML whitespace.
+   * @param start
+   *          Start index of characters in the array
+   * @param length
+   *          Number of characters in the array
+   * @return True if the characters in the array are XML whitespace; otherwise,
+   *         false.
    */
-  public static boolean isWhiteSpace(char ch[], int start, int length)
-  {
+  public static boolean isWhiteSpace(char ch[], int start, int length) {
 
     int end = start + length;
 
-    for (int s = start; s < end; s++)
-    {
+    for (int s = start; s < end; s++) {
       if (!isWhiteSpace(ch[s]))
         return false;
     }
@@ -69,39 +71,36 @@
 
   /**
    * Tell if the string is whitespace.
-   *
-   * @param buf StringBuffer to check as XML whitespace.
+   * 
+   * @param buf
+   *          StringBuffer to check as XML whitespace.
    * @return True if characters in buffer are XML whitespace, false otherwise
    */
-  public static boolean isWhiteSpace(StringBuffer buf)
-  {
+  public static boolean isWhiteSpace(StringBuffer buf) {
 
     int n = buf.length();
 
-    for (int i = 0; i < n; i++)
-    {
+    for (int i = 0; i < n; i++) {
       if (!isWhiteSpace(buf.charAt(i)))
         return false;
     }
 
     return true;
   }
-  
+
   /**
    * Tell if the string is whitespace.
-   *
-   * @param s String to check as XML whitespace.
+   * 
+   * @param s
+   *          String to check as XML whitespace.
    * @return True if characters in buffer are XML whitespace, false otherwise
    */
-  public static boolean isWhiteSpace(String s)
-  {
+  public static boolean isWhiteSpace(String s) {
 
-    if(null != s)
-    {
+    if (null != s) {
       int n = s.length();
-  
-      for (int i = 0; i < n; i++)
-      {
+
+      for (int i = 0; i < n; i++) {
         if (!isWhiteSpace(s.charAt(i)))
           return false;
       }
Index: src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMBuilder.java
===================================================================
--- src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMBuilder.java	(revision 1188268)
+++ src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMBuilder.java	(working copy)
@@ -39,136 +39,125 @@
 import org.xml.sax.ContentHandler;
 import org.xml.sax.Locator;
 import org.xml.sax.ext.LexicalHandler;
+
 /**
- * This class takes SAX events (in addition to some extra events
- * that SAX doesn't handle yet) and adds the result to a document
- * or document fragment.
+ * This class takes SAX events (in addition to some extra events that SAX
+ * doesn't handle yet) and adds the result to a document or document fragment.
  */
-public class DOMBuilder
-        implements ContentHandler, LexicalHandler
-{
+public class DOMBuilder implements ContentHandler, LexicalHandler {
 
-  /** Root document          */
+  /** Root document */
   public Document m_doc;
 
-  /** Current node           */
+  /** Current node */
   protected Node m_currentNode = null;
 
-  /** First node of document fragment or null if not a DocumentFragment     */
+  /** First node of document fragment or null if not a DocumentFragment */
   public DocumentFragment m_docFrag = null;
 
-  /** Vector of element nodes          */
+  /** Vector of element nodes */
   protected Stack<Element> m_elemStack = new Stack<Element>();
 
   /**
-   * DOMBuilder instance constructor... it will add the DOM nodes
-   * to the document fragment.
-   *
-   * @param doc Root document
-   * @param node Current node
+   * DOMBuilder instance constructor... it will add the DOM nodes to the
+   * document fragment.
+   * 
+   * @param doc
+   *          Root document
+   * @param node
+   *          Current node
    */
-  public DOMBuilder(Document doc, Node node)
-  {
+  public DOMBuilder(Document doc, Node node) {
     m_doc = doc;
     m_currentNode = node;
   }
 
   /**
-   * DOMBuilder instance constructor... it will add the DOM nodes
-   * to the document fragment.
-   *
-   * @param doc Root document
-   * @param docFrag Document fragment
+   * DOMBuilder instance constructor... it will add the DOM nodes to the
+   * document fragment.
+   * 
+   * @param doc
+   *          Root document
+   * @param docFrag
+   *          Document fragment
    */
-  public DOMBuilder(Document doc, DocumentFragment docFrag)
-  {
+  public DOMBuilder(Document doc, DocumentFragment docFrag) {
     m_doc = doc;
     m_docFrag = docFrag;
   }
 
   /**
-   * DOMBuilder instance constructor... it will add the DOM nodes
-   * to the document.
-   *
-   * @param doc Root document
+   * DOMBuilder instance constructor... it will add the DOM nodes to the
+   * document.
+   * 
+   * @param doc
+   *          Root document
    */
-  public DOMBuilder(Document doc)
-  {
+  public DOMBuilder(Document doc) {
     m_doc = doc;
   }
 
   /**
-   * Get the root node of the DOM being created.  This
-   * is either a Document or a DocumentFragment.
-   *
+   * Get the root node of the DOM being created. This is either a Document or a
+   * DocumentFragment.
+   * 
    * @return The root document or document fragment if not null
    */
-  public Node getRootNode()
-  {
+  public Node getRootNode() {
     return (null != m_docFrag) ? (Node) m_docFrag : (Node) m_doc;
   }
 
   /**
    * Get the node currently being processed.
-   *
+   * 
    * @return the current node being processed
    */
-  public Node getCurrentNode()
-  {
+  public Node getCurrentNode() {
     return m_currentNode;
   }
 
   /**
    * Return null since there is no Writer for this class.
-   *
+   * 
    * @return null
    */
-  public java.io.Writer getWriter()
-  {
+  public java.io.Writer getWriter() {
     return null;
   }
 
   /**
    * Append a node to the current container.
-   *
-   * @param newNode New node to append
+   * 
+   * @param newNode
+   *          New node to append
    */
-  protected void append(Node newNode) throws org.xml.sax.SAXException
-  {
+  protected void append(Node newNode) throws org.xml.sax.SAXException {
 
     Node currentNode = m_currentNode;
 
-    if (null != currentNode)
-    {
+    if (null != currentNode) {
       currentNode.appendChild(newNode);
 
       // System.out.println(newNode.getNodeName());
-    }
-    else if (null != m_docFrag)
-    {
+    } else if (null != m_docFrag) {
       m_docFrag.appendChild(newNode);
-    }
-    else
-    {
+    } else {
       boolean ok = true;
       short type = newNode.getNodeType();
 
-      if (type == Node.TEXT_NODE)
-      {
+      if (type == Node.TEXT_NODE) {
         String data = newNode.getNodeValue();
 
-        if ((null != data) && (data.trim().length() > 0))
-        {
-          throw new org.xml.sax.SAXException("Warning: can't output text before document element!  Ignoring...");
+        if ((null != data) && (data.trim().length() > 0)) {
+          throw new org.xml.sax.SAXException(
+              "Warning: can't output text before document element!  Ignoring...");
         }
 
         ok = false;
-      }
-      else if (type == Node.ELEMENT_NODE)
-      {
-        if (m_doc.getDocumentElement() != null)
-        {
-          throw new org.xml.sax.SAXException("Can't have more than one root on a DOM!");
+      } else if (type == Node.ELEMENT_NODE) {
+        if (m_doc.getDocumentElement() != null) {
+          throw new org.xml.sax.SAXException(
+              "Can't have more than one root on a DOM!");
         }
       }
 
@@ -179,132 +168,139 @@
 
   /**
    * Receive an object for locating the origin of SAX document events.
-   *
-   * <p>SAX parsers are strongly encouraged (though not absolutely
-   * required) to supply a locator: if it does so, it must supply
-   * the locator to the application by invoking this method before
-   * invoking any of the other methods in the ContentHandler
-   * interface.</p>
-   *
-   * <p>The locator allows the application to determine the end
-   * position of any document-related event, even if the parser is
-   * not reporting an error.  Typically, the application will
-   * use this information for reporting its own errors (such as
-   * character content that does not match an application's
-   * business rules).  The information returned by the locator
-   * is probably not sufficient for use with a search engine.</p>
-   *
-   * <p>Note that the locator will return correct information only
-   * during the invocation of the events in this interface.  The
-   * application should not attempt to use it at any other time.</p>
-   *
-   * @param locator An object that can return the location of
-   *                any SAX document event.
+   * 
+   * <p>
+   * SAX parsers are strongly encouraged (though not absolutely required) to
+   * supply a locator: if it does so, it must supply the locator to the
+   * application by invoking this method before invoking any of the other
+   * methods in the ContentHandler interface.
+   * </p>
+   * 
+   * <p>
+   * The locator allows the application to determine the end position of any
+   * document-related event, even if the parser is not reporting an error.
+   * Typically, the application will use this information for reporting its own
+   * errors (such as character content that does not match an application's
+   * business rules). The information returned by the locator is probably not
+   * sufficient for use with a search engine.
+   * </p>
+   * 
+   * <p>
+   * Note that the locator will return correct information only during the
+   * invocation of the events in this interface. The application should not
+   * attempt to use it at any other time.
+   * </p>
+   * 
+   * @param locator
+   *          An object that can return the location of any SAX document event.
    * @see org.xml.sax.Locator
    */
-  public void setDocumentLocator(Locator locator)
-  {
+  public void setDocumentLocator(Locator locator) {
 
     // No action for the moment.
   }
 
   /**
    * Receive notification of the beginning of a document.
-   *
-   * <p>The SAX parser will invoke this method only once, before any
-   * other methods in this interface or in DTDHandler (except for
-   * setDocumentLocator).</p>
+   * 
+   * <p>
+   * The SAX parser will invoke this method only once, before any other methods
+   * in this interface or in DTDHandler (except for setDocumentLocator).
+   * </p>
    */
-  public void startDocument() throws org.xml.sax.SAXException
-  {
+  public void startDocument() throws org.xml.sax.SAXException {
 
     // No action for the moment.
   }
 
   /**
    * Receive notification of the end of a document.
-   *
-   * <p>The SAX parser will invoke this method only once, and it will
-   * be the last method invoked during the parse.  The parser shall
-   * not invoke this method until it has either abandoned parsing
-   * (because of an unrecoverable error) or reached the end of
-   * input.</p>
+   * 
+   * <p>
+   * The SAX parser will invoke this method only once, and it will be the last
+   * method invoked during the parse. The parser shall not invoke this method
+   * until it has either abandoned parsing (because of an unrecoverable error)
+   * or reached the end of input.
+   * </p>
    */
-  public void endDocument() throws org.xml.sax.SAXException
-  {
+  public void endDocument() throws org.xml.sax.SAXException {
 
     // No action for the moment.
   }
 
   /**
    * Receive notification of the beginning of an element.
-   *
-   * <p>The Parser will invoke this method at the beginning of every
-   * element in the XML document; there will be a corresponding
-   * endElement() event for every startElement() event (even when the
-   * element is empty). All of the element's content will be
-   * reported, in order, before the corresponding endElement()
-   * event.</p>
-   *
-   * <p>If the element name has a namespace prefix, the prefix will
-   * still be attached.  Note that the attribute list provided will
-   * contain only attributes with explicit values (specified or
-   * defaulted): #IMPLIED attributes will be omitted.</p>
-   *
-   *
-   * @param ns The namespace of the node
-   * @param localName The local part of the qualified name
-   * @param name The element name.
-   * @param atts The attributes attached to the element, if any.
+   * 
+   * <p>
+   * The Parser will invoke this method at the beginning of every element in the
+   * XML document; there will be a corresponding endElement() event for every
+   * startElement() event (even when the element is empty). All of the element's
+   * content will be reported, in order, before the corresponding endElement()
+   * event.
+   * </p>
+   * 
+   * <p>
+   * If the element name has a namespace prefix, the prefix will still be
+   * attached. Note that the attribute list provided will contain only
+   * attributes with explicit values (specified or defaulted): #IMPLIED
+   * attributes will be omitted.
+   * </p>
+   * 
+   * 
+   * @param ns
+   *          The namespace of the node
+   * @param localName
+   *          The local part of the qualified name
+   * @param name
+   *          The element name.
+   * @param atts
+   *          The attributes attached to the element, if any.
    * @see #endElement
    * @see org.xml.sax.Attributes
    */
-  public void startElement(
-          String ns, String localName, String name, Attributes atts)
-            throws org.xml.sax.SAXException
-  {
+  public void startElement(String ns, String localName, String name,
+      Attributes atts) throws org.xml.sax.SAXException {
 
     Element elem;
 
-	// Note that the namespace-aware call must be used to correctly
-	// construct a Level 2 DOM, even for non-namespaced nodes.
+    // Note that the namespace-aware call must be used to correctly
+    // construct a Level 2 DOM, even for non-namespaced nodes.
     if ((null == ns) || (ns.length() == 0))
-      elem = m_doc.createElementNS(null,name);
+      elem = m_doc.createElementNS(null, name);
     else
       elem = m_doc.createElementNS(ns, name);
 
     append(elem);
 
-    try
-    {
+    try {
       int nAtts = atts.getLength();
 
-      if (0 != nAtts)
-      {
-        for (int i = 0; i < nAtts; i++)
-        {
+      if (0 != nAtts) {
+        for (int i = 0; i < nAtts; i++) {
 
-          //System.out.println("type " + atts.getType(i) + " name " + atts.getLocalName(i) );
+          // System.out.println("type " + atts.getType(i) + " name " +
+          // atts.getLocalName(i) );
           // First handle a possible ID attribute
           if (atts.getType(i).equalsIgnoreCase("ID"))
             setIDAttribute(atts.getValue(i), elem);
 
           String attrNS = atts.getURI(i);
 
-          if("".equals(attrNS))
+          if ("".equals(attrNS))
             attrNS = null; // DOM represents no-namespace as null
 
           // System.out.println("attrNS: "+attrNS+", localName: "+atts.getQName(i)
-          //                   +", qname: "+atts.getQName(i)+", value: "+atts.getValue(i));
+          // +", qname: "+atts.getQName(i)+", value: "+atts.getValue(i));
           // Crimson won't let us set an xmlns: attribute on the DOM.
           String attrQName = atts.getQName(i);
 
-          // In SAX, xmlns: attributes have an empty namespace, while in DOM they should have the xmlns namespace
+          // In SAX, xmlns: attributes have an empty namespace, while in DOM
+          // they should have the xmlns namespace
           if (attrQName.startsWith("xmlns:"))
             attrNS = "http://www.w3.org/2000/xmlns/";
 
           // ALWAYS use the DOM Level 2 call!
-          elem.setAttributeNS(attrNS,attrQName, atts.getValue(i));
+          elem.setAttributeNS(attrNS, attrQName, atts.getValue(i));
         }
       }
 
@@ -315,9 +311,7 @@
       m_currentNode = elem;
 
       // append(elem);
-    }
-    catch(java.lang.Exception de)
-    {
+    } catch (java.lang.Exception de) {
       // de.printStackTrace();
       throw new org.xml.sax.SAXException(de);
     }
@@ -325,74 +319,87 @@
   }
 
   /**
-
-
-
+   * 
+   * 
+   * 
    * Receive notification of the end of an element.
-   *
-   * <p>The SAX parser will invoke this method at the end of every
-   * element in the XML document; there will be a corresponding
-   * startElement() event for every endElement() event (even when the
-   * element is empty).</p>
-   *
-   * <p>If the element name has a namespace prefix, the prefix will
-   * still be attached to the name.</p>
-   *
-   *
-   * @param ns the namespace of the element
-   * @param localName The local part of the qualified name of the element
-   * @param name The element name
+   * 
+   * <p>
+   * The SAX parser will invoke this method at the end of every element in the
+   * XML document; there will be a corresponding startElement() event for every
+   * endElement() event (even when the element is empty).
+   * </p>
+   * 
+   * <p>
+   * If the element name has a namespace prefix, the prefix will still be
+   * attached to the name.
+   * </p>
+   * 
+   * 
+   * @param ns
+   *          the namespace of the element
+   * @param localName
+   *          The local part of the qualified name of the element
+   * @param name
+   *          The element name
    */
   public void endElement(String ns, String localName, String name)
-          throws org.xml.sax.SAXException
-  {
+      throws org.xml.sax.SAXException {
     m_elemStack.pop();
     m_currentNode = m_elemStack.isEmpty() ? null : m_elemStack.peek();
   }
 
   /**
    * Set an ID string to node association in the ID table.
-   *
-   * @param id The ID string.
-   * @param elem The associated ID.
+   * 
+   * @param id
+   *          The ID string.
+   * @param elem
+   *          The associated ID.
    */
-  public void setIDAttribute(String id, Element elem)
-  {
+  public void setIDAttribute(String id, Element elem) {
 
     // Do nothing. This method is meant to be overiden.
   }
 
   /**
    * Receive notification of character data.
-   *
-   * <p>The Parser will call this method to report each chunk of
-   * character data.  SAX parsers may return all contiguous character
-   * data in a single chunk, or they may split it into several
-   * chunks; however, all of the characters in any single event
-   * must come from the same external entity, so that the Locator
-   * provides useful information.</p>
-   *
-   * <p>The application must not attempt to read from the array
-   * outside of the specified range.</p>
-   *
-   * <p>Note that some parsers will report whitespace using the
-   * ignorableWhitespace() method rather than this one (validating
-   * parsers must do so).</p>
-   *
-   * @param ch The characters from the XML document.
-   * @param start The start position in the array.
-   * @param length The number of characters to read from the array.
+   * 
+   * <p>
+   * The Parser will call this method to report each chunk of character data.
+   * SAX parsers may return all contiguous character data in a single chunk, or
+   * they may split it into several chunks; however, all of the characters in
+   * any single event must come from the same external entity, so that the
+   * Locator provides useful information.
+   * </p>
+   * 
+   * <p>
+   * The application must not attempt to read from the array outside of the
+   * specified range.
+   * </p>
+   * 
+   * <p>
+   * Note that some parsers will report whitespace using the
+   * ignorableWhitespace() method rather than this one (validating parsers must
+   * do so).
+   * </p>
+   * 
+   * @param ch
+   *          The characters from the XML document.
+   * @param start
+   *          The start position in the array.
+   * @param length
+   *          The number of characters to read from the array.
    * @see #ignorableWhitespace
    * @see org.xml.sax.Locator
    */
-  public void characters(char ch[], int start, int length) throws org.xml.sax.SAXException
-  {
-    if(isOutsideDocElem()
-       && XMLCharacterRecognizer.isWhiteSpace(ch, start, length))
-      return;  // avoid DOM006 Hierarchy request error
+  public void characters(char ch[], int start, int length)
+      throws org.xml.sax.SAXException {
+    if (isOutsideDocElem()
+        && XMLCharacterRecognizer.isWhiteSpace(ch, start, length))
+      return; // avoid DOM006 Hierarchy request error
 
-    if (m_inCData)
-    {
+    if (m_inCData) {
       cdata(ch, start, length);
 
       return;
@@ -400,57 +407,55 @@
 
     String s = new String(ch, start, length);
     Node childNode;
-    childNode =  m_currentNode != null ? m_currentNode.getLastChild(): null;
-    if( childNode != null && childNode.getNodeType() == Node.TEXT_NODE ){
-       ((Text)childNode).appendData(s);
+    childNode = m_currentNode != null ? m_currentNode.getLastChild() : null;
+    if (childNode != null && childNode.getNodeType() == Node.TEXT_NODE) {
+      ((Text) childNode).appendData(s);
+    } else {
+      Text text = m_doc.createTextNode(s);
+      append(text);
     }
-    else{
-       Text text = m_doc.createTextNode(s);
-       append(text);
-    }
   }
 
   /**
-   * If available, when the disable-output-escaping attribute is used,
-   * output raw text without escaping.  A PI will be inserted in front
-   * of the node with the name "lotusxsl-next-is-raw" and a value of
-   * "formatter-to-dom".
-   *
-   * @param ch Array containing the characters
-   * @param start Index to start of characters in the array
-   * @param length Number of characters in the array
+   * If available, when the disable-output-escaping attribute is used, output
+   * raw text without escaping. A PI will be inserted in front of the node with
+   * the name "lotusxsl-next-is-raw" and a value of "formatter-to-dom".
+   * 
+   * @param ch
+   *          Array containing the characters
+   * @param start
+   *          Index to start of characters in the array
+   * @param length
+   *          Number of characters in the array
    */
   public void charactersRaw(char ch[], int start, int length)
-          throws org.xml.sax.SAXException
-  {
-    if(isOutsideDocElem()
-       && XMLCharacterRecognizer.isWhiteSpace(ch, start, length))
-      return;  // avoid DOM006 Hierarchy request error
+      throws org.xml.sax.SAXException {
+    if (isOutsideDocElem()
+        && XMLCharacterRecognizer.isWhiteSpace(ch, start, length))
+      return; // avoid DOM006 Hierarchy request error
 
-
     String s = new String(ch, start, length);
 
     append(m_doc.createProcessingInstruction("xslt-next-is-raw",
-                                             "formatter-to-dom"));
+        "formatter-to-dom"));
     append(m_doc.createTextNode(s));
   }
 
   /**
    * Report the beginning of an entity.
-   *
-   * The start and end of the document entity are not reported.
-   * The start and end of the external DTD subset are reported
-   * using the pseudo-name "[dtd]".  All other events must be
-   * properly nested within start/end entity events.
-   *
-   * @param name The name of the entity.  If it is a parameter
-   *        entity, the name will begin with '%'.
+   * 
+   * The start and end of the document entity are not reported. The start and
+   * end of the external DTD subset are reported using the pseudo-name "[dtd]".
+   * All other events must be properly nested within start/end entity events.
+   * 
+   * @param name
+   *          The name of the entity. If it is a parameter entity, the name will
+   *          begin with '%'.
    * @see #endEntity
    * @see org.xml.sax.ext.DeclHandler#internalEntityDecl
    * @see org.xml.sax.ext.DeclHandler#externalEntityDecl
    */
-  public void startEntity(String name) throws org.xml.sax.SAXException
-  {
+  public void startEntity(String name) throws org.xml.sax.SAXException {
 
     // Almost certainly the wrong behavior...
     // entityReference(name);
@@ -458,49 +463,58 @@
 
   /**
    * Report the end of an entity.
-   *
-   * @param name The name of the entity that is ending.
+   * 
+   * @param name
+   *          The name of the entity that is ending.
    * @see #startEntity
    */
-  public void endEntity(String name) throws org.xml.sax.SAXException{}
+  public void endEntity(String name) throws org.xml.sax.SAXException {
+  }
 
   /**
    * Receive notivication of a entityReference.
-   *
-   * @param name name of the entity reference
+   * 
+   * @param name
+   *          name of the entity reference
    */
-  public void entityReference(String name) throws org.xml.sax.SAXException
-  {
+  public void entityReference(String name) throws org.xml.sax.SAXException {
     append(m_doc.createEntityReference(name));
   }
 
   /**
    * Receive notification of ignorable whitespace in element content.
-   *
-   * <p>Validating Parsers must use this method to report each chunk
-   * of ignorable whitespace (see the W3C XML 1.0 recommendation,
-   * section 2.10): non-validating parsers may also use this method
-   * if they are capable of parsing and using content models.</p>
-   *
-   * <p>SAX parsers may return all contiguous whitespace in a single
-   * chunk, or they may split it into several chunks; however, all of
-   * the characters in any single event must come from the same
-   * external entity, so that the Locator provides useful
-   * information.</p>
-   *
-   * <p>The application must not attempt to read from the array
-   * outside of the specified range.</p>
-   *
-   * @param ch The characters from the XML document.
-   * @param start The start position in the array.
-   * @param length The number of characters to read from the array.
+   * 
+   * <p>
+   * Validating Parsers must use this method to report each chunk of ignorable
+   * whitespace (see the W3C XML 1.0 recommendation, section 2.10):
+   * non-validating parsers may also use this method if they are capable of
+   * parsing and using content models.
+   * </p>
+   * 
+   * <p>
+   * SAX parsers may return all contiguous whitespace in a single chunk, or they
+   * may split it into several chunks; however, all of the characters in any
+   * single event must come from the same external entity, so that the Locator
+   * provides useful information.
+   * </p>
+   * 
+   * <p>
+   * The application must not attempt to read from the array outside of the
+   * specified range.
+   * </p>
+   * 
+   * @param ch
+   *          The characters from the XML document.
+   * @param start
+   *          The start position in the array.
+   * @param length
+   *          The number of characters to read from the array.
    * @see #characters
    */
   public void ignorableWhitespace(char ch[], int start, int length)
-          throws org.xml.sax.SAXException
-  {
-    if(isOutsideDocElem())
-      return;  // avoid DOM006 Hierarchy request error
+      throws org.xml.sax.SAXException {
+    if (isOutsideDocElem())
+      return; // avoid DOM006 Hierarchy request error
 
     String s = new String(ch, start, length);
 
@@ -509,232 +523,244 @@
 
   /**
    * Tell if the current node is outside the document element.
-   *
+   * 
    * @return true if the current node is outside the document element.
    */
-   private boolean isOutsideDocElem()
-   {
-      return (null == m_docFrag) && m_elemStack.size() == 0 && (null == m_currentNode || m_currentNode.getNodeType() == Node.DOCUMENT_NODE);
-   }
+  private boolean isOutsideDocElem() {
+    return (null == m_docFrag)
+        && m_elemStack.size() == 0
+        && (null == m_currentNode || m_currentNode.getNodeType() == Node.DOCUMENT_NODE);
+  }
 
   /**
    * Receive notification of a processing instruction.
-   *
-   * <p>The Parser will invoke this method once for each processing
-   * instruction found: note that processing instructions may occur
-   * before or after the main document element.</p>
-   *
-   * <p>A SAX parser should never report an XML declaration (XML 1.0,
-   * section 2.8) or a text declaration (XML 1.0, section 4.3.1)
-   * using this method.</p>
-   *
-   * @param target The processing instruction target.
-   * @param data The processing instruction data, or null if
-   *        none was supplied.
+   * 
+   * <p>
+   * The Parser will invoke this method once for each processing instruction
+   * found: note that processing instructions may occur before or after the main
+   * document element.
+   * </p>
+   * 
+   * <p>
+   * A SAX parser should never report an XML declaration (XML 1.0, section 2.8)
+   * or a text declaration (XML 1.0, section 4.3.1) using this method.
+   * </p>
+   * 
+   * @param target
+   *          The processing instruction target.
+   * @param data
+   *          The processing instruction data, or null if none was supplied.
    */
   public void processingInstruction(String target, String data)
-          throws org.xml.sax.SAXException
-  {
+      throws org.xml.sax.SAXException {
     append(m_doc.createProcessingInstruction(target, data));
   }
 
   /**
    * Report an XML comment anywhere in the document.
-   *
-   * This callback will be used for comments inside or outside the
-   * document element, including comments in the external DTD
-   * subset (if read).
-   *
-   * @param ch An array holding the characters in the comment.
-   * @param start The starting position in the array.
-   * @param length The number of characters to use from the array.
+   * 
+   * This callback will be used for comments inside or outside the document
+   * element, including comments in the external DTD subset (if read).
+   * 
+   * @param ch
+   *          An array holding the characters in the comment.
+   * @param start
+   *          The starting position in the array.
+   * @param length
+   *          The number of characters to use from the array.
    */
-  public void comment(char ch[], int start, int length) throws org.xml.sax.SAXException
-  {
+  public void comment(char ch[], int start, int length)
+      throws org.xml.sax.SAXException {
     // tagsoup sometimes submits invalid values here
-    if (ch == null || start < 0 || length >= (ch.length - start) || length < 0) return;
+    if (ch == null || start < 0 || length >= (ch.length - start) || length < 0)
+      return;
     append(m_doc.createComment(new String(ch, start, length)));
   }
 
-  /** Flag indicating that we are processing a CData section          */
+  /** Flag indicating that we are processing a CData section */
   protected boolean m_inCData = false;
 
   /**
    * Report the start of a CDATA section.
-   *
+   * 
    * @see #endCDATA
    */
-  public void startCDATA() throws org.xml.sax.SAXException
-  {
+  public void startCDATA() throws org.xml.sax.SAXException {
     m_inCData = true;
     append(m_doc.createCDATASection(""));
   }
 
   /**
    * Report the end of a CDATA section.
-   *
+   * 
    * @see #startCDATA
    */
-  public void endCDATA() throws org.xml.sax.SAXException
-  {
+  public void endCDATA() throws org.xml.sax.SAXException {
     m_inCData = false;
   }
 
   /**
    * Receive notification of cdata.
-   *
-   * <p>The Parser will call this method to report each chunk of
-   * character data.  SAX parsers may return all contiguous character
-   * data in a single chunk, or they may split it into several
-   * chunks; however, all of the characters in any single event
-   * must come from the same external entity, so that the Locator
-   * provides useful information.</p>
-   *
-   * <p>The application must not attempt to read from the array
-   * outside of the specified range.</p>
-   *
-   * <p>Note that some parsers will report whitespace using the
-   * ignorableWhitespace() method rather than this one (validating
-   * parsers must do so).</p>
-   *
-   * @param ch The characters from the XML document.
-   * @param start The start position in the array.
-   * @param length The number of characters to read from the array.
+   * 
+   * <p>
+   * The Parser will call this method to report each chunk of character data.
+   * SAX parsers may return all contiguous character data in a single chunk, or
+   * they may split it into several chunks; however, all of the characters in
+   * any single event must come from the same external entity, so that the
+   * Locator provides useful information.
+   * </p>
+   * 
+   * <p>
+   * The application must not attempt to read from the array outside of the
+   * specified range.
+   * </p>
+   * 
+   * <p>
+   * Note that some parsers will report whitespace using the
+   * ignorableWhitespace() method rather than this one (validating parsers must
+   * do so).
+   * </p>
+   * 
+   * @param ch
+   *          The characters from the XML document.
+   * @param start
+   *          The start position in the array.
+   * @param length
+   *          The number of characters to read from the array.
    * @see #ignorableWhitespace
    * @see org.xml.sax.Locator
    */
-  public void cdata(char ch[], int start, int length) throws org.xml.sax.SAXException
-  {
-    if(isOutsideDocElem()
-       && XMLCharacterRecognizer.isWhiteSpace(ch, start, length))
-      return;  // avoid DOM006 Hierarchy request error
+  public void cdata(char ch[], int start, int length)
+      throws org.xml.sax.SAXException {
+    if (isOutsideDocElem()
+        && XMLCharacterRecognizer.isWhiteSpace(ch, start, length))
+      return; // avoid DOM006 Hierarchy request error
 
     String s = new String(ch, start, length);
 
-    // XXX ab@apache.org: modified from the original, to accomodate TagSoup. 
+    // XXX ab@apache.org: modified from the original, to accomodate TagSoup.
     Node n = m_currentNode.getLastChild();
     if (n instanceof CDATASection)
-      ((CDATASection)n).appendData(s);
+      ((CDATASection) n).appendData(s);
     else if (n instanceof Comment)
-      ((Comment)n).appendData(s);
+      ((Comment) n).appendData(s);
   }
 
   /**
    * Report the start of DTD declarations, if any.
-   *
-   * Any declarations are assumed to be in the internal subset
-   * unless otherwise indicated.
-   *
-   * @param name The document type name.
-   * @param publicId The declared public identifier for the
-   *        external DTD subset, or null if none was declared.
-   * @param systemId The declared system identifier for the
-   *        external DTD subset, or null if none was declared.
+   * 
+   * Any declarations are assumed to be in the internal subset unless otherwise
+   * indicated.
+   * 
+   * @param name
+   *          The document type name.
+   * @param publicId
+   *          The declared public identifier for the external DTD subset, or
+   *          null if none was declared.
+   * @param systemId
+   *          The declared system identifier for the external DTD subset, or
+   *          null if none was declared.
    * @see #endDTD
    * @see #startEntity
    */
   public void startDTD(String name, String publicId, String systemId)
-          throws org.xml.sax.SAXException
-  {
+      throws org.xml.sax.SAXException {
 
     // Do nothing for now.
   }
 
   /**
    * Report the end of DTD declarations.
-   *
+   * 
    * @see #startDTD
    */
-  public void endDTD() throws org.xml.sax.SAXException
-  {
+  public void endDTD() throws org.xml.sax.SAXException {
 
     // Do nothing for now.
   }
 
   /**
    * Begin the scope of a prefix-URI Namespace mapping.
-   *
-   * <p>The information from this event is not necessary for
-   * normal Namespace processing: the SAX XML reader will
-   * automatically replace prefixes for element and attribute
-   * names when the http://xml.org/sax/features/namespaces
-   * feature is true (the default).</p>
-   *
-   * <p>There are cases, however, when applications need to
-   * use prefixes in character data or in attribute values,
-   * where they cannot safely be expanded automatically; the
-   * start/endPrefixMapping event supplies the information
-   * to the application to expand prefixes in those contexts
-   * itself, if necessary.</p>
-   *
-   * <p>Note that start/endPrefixMapping events are not
-   * guaranteed to be properly nested relative to each-other:
-   * all startPrefixMapping events will occur before the
-   * corresponding startElement event, and all endPrefixMapping
-   * events will occur after the corresponding endElement event,
-   * but their order is not guaranteed.</p>
-   *
-   * @param prefix The Namespace prefix being declared.
-   * @param uri The Namespace URI the prefix is mapped to.
+   * 
+   * <p>
+   * The information from this event is not necessary for normal Namespace
+   * processing: the SAX XML reader will automatically replace prefixes for
+   * element and attribute names when the http://xml.org/sax/features/namespaces
+   * feature is true (the default).
+   * </p>
+   * 
+   * <p>
+   * There are cases, however, when applications need to use prefixes in
+   * character data or in attribute values, where they cannot safely be expanded
+   * automatically; the start/endPrefixMapping event supplies the information to
+   * the application to expand prefixes in those contexts itself, if necessary.
+   * </p>
+   * 
+   * <p>
+   * Note that start/endPrefixMapping events are not guaranteed to be properly
+   * nested relative to each-other: all startPrefixMapping events will occur
+   * before the corresponding startElement event, and all endPrefixMapping
+   * events will occur after the corresponding endElement event, but their order
+   * is not guaranteed.
+   * </p>
+   * 
+   * @param prefix
+   *          The Namespace prefix being declared.
+   * @param uri
+   *          The Namespace URI the prefix is mapped to.
    * @see #endPrefixMapping
    * @see #startElement
    */
   public void startPrefixMapping(String prefix, String uri)
-          throws org.xml.sax.SAXException
-  {
+      throws org.xml.sax.SAXException {
 
     /*
-    // Not sure if this is needed or wanted
-    // Also, it fails in the stree.
-    if((null != m_currentNode)
-       && (m_currentNode.getNodeType() == Node.ELEMENT_NODE))
-    {
-      String qname;
-      if(((null != prefix) && (prefix.length() == 0))
-         || (null == prefix))
-        qname = "xmlns";
-      else
-        qname = "xmlns:"+prefix;
-
-      Element elem = (Element)m_currentNode;
-      String val = elem.getAttribute(qname); // Obsolete, should be DOM2...?
-      if(val == null)
-      {
-        elem.setAttributeNS("http://www.w3.org/XML/1998/namespace",
-                            qname, uri);
-      }
-    }
-    */
+     * // Not sure if this is needed or wanted // Also, it fails in the stree.
+     * if((null != m_currentNode) && (m_currentNode.getNodeType() ==
+     * Node.ELEMENT_NODE)) { String qname; if(((null != prefix) &&
+     * (prefix.length() == 0)) || (null == prefix)) qname = "xmlns"; else qname
+     * = "xmlns:"+prefix;
+     * 
+     * Element elem = (Element)m_currentNode; String val =
+     * elem.getAttribute(qname); // Obsolete, should be DOM2...? if(val == null)
+     * { elem.setAttributeNS("http://www.w3.org/XML/1998/namespace", qname,
+     * uri); } }
+     */
   }
 
   /**
    * End the scope of a prefix-URI mapping.
-   *
-   * <p>See startPrefixMapping for details.  This event will
-   * always occur after the corresponding endElement event,
-   * but the order of endPrefixMapping events is not otherwise
-   * guaranteed.</p>
-   *
-   * @param prefix The prefix that was being mapping.
+   * 
+   * <p>
+   * See startPrefixMapping for details. This event will always occur after the
+   * corresponding endElement event, but the order of endPrefixMapping events is
+   * not otherwise guaranteed.
+   * </p>
+   * 
+   * @param prefix
+   *          The prefix that was being mapping.
    * @see #startPrefixMapping
    * @see #endElement
    */
-  public void endPrefixMapping(String prefix) throws org.xml.sax.SAXException{}
+  public void endPrefixMapping(String prefix) throws org.xml.sax.SAXException {
+  }
 
   /**
    * Receive notification of a skipped entity.
-   *
-   * <p>The Parser will invoke this method once for each entity
-   * skipped.  Non-validating processors may skip entities if they
-   * have not seen the declarations (because, for example, the
-   * entity was declared in an external DTD subset).  All processors
-   * may skip external entities, depending on the values of the
-   * http://xml.org/sax/features/external-general-entities and the
-   * http://xml.org/sax/features/external-parameter-entities
-   * properties.</p>
-   *
-   * @param name The name of the skipped entity.  If it is a
-   *        parameter entity, the name will begin with '%'.
+   * 
+   * <p>
+   * The Parser will invoke this method once for each entity skipped.
+   * Non-validating processors may skip entities if they have not seen the
+   * declarations (because, for example, the entity was declared in an external
+   * DTD subset). All processors may skip external entities, depending on the
+   * values of the http://xml.org/sax/features/external-general-entities and the
+   * http://xml.org/sax/features/external-parameter-entities properties.
+   * </p>
+   * 
+   * @param name
+   *          The name of the skipped entity. If it is a parameter entity, the
+   *          name will begin with '%'.
    */
-  public void skippedEntity(String name) throws org.xml.sax.SAXException{}
+  public void skippedEntity(String name) throws org.xml.sax.SAXException {
+  }
 }
Index: src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMContentUtils.java
===================================================================
--- src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMContentUtils.java	(revision 1188268)
+++ src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMContentUtils.java	(working copy)
@@ -32,34 +32,34 @@
 /**
  * A collection of methods for extracting content from DOM trees.
  * 
- * This class holds a few utility methods for pulling content out of 
- * DOM nodes, such as getOutlinks, getText, etc.
- *
+ * This class holds a few utility methods for pulling content out of DOM nodes,
+ * such as getOutlinks, getText, etc.
+ * 
  */
 public class DOMContentUtils {
 
   public static class LinkParams {
     public String elName;
     public String attrName;
-      public int childLen;
-      
-      public LinkParams(String elName, String attrName, int childLen) {
-          this.elName = elName;
-          this.attrName = attrName;
-          this.childLen = childLen;
-      }
-      
-      public String toString() {
-          return "LP[el=" + elName + ",attr=" + attrName + ",len=" + childLen + "]";
-      }
+    public int childLen;
+
+    public LinkParams(String elName, String attrName, int childLen) {
+      this.elName = elName;
+      this.attrName = attrName;
+      this.childLen = childLen;
+    }
+
+    public String toString() {
+      return "LP[el=" + elName + ",attr=" + attrName + ",len=" + childLen + "]";
+    }
   }
-  
+
   private HashMap<String, LinkParams> linkParams = new HashMap<String, LinkParams>();
-  
+
   public DOMContentUtils(Configuration conf) {
     setConf(conf);
   }
-  
+
   public void setConf(Configuration conf) {
     // forceTags is used to override configurable tag ignoring, later on
     Collection<String> forceTags = new ArrayList<String>(1);
@@ -80,59 +80,57 @@
 
     // remove unwanted link tags from the linkParams map
     String[] ignoreTags = conf.getStrings("parser.html.outlinks.ignore_tags");
-    for ( int i = 0 ; ignoreTags != null && i < ignoreTags.length ; i++ ) {
-      if ( ! forceTags.contains(ignoreTags[i]) )
+    for (int i = 0; ignoreTags != null && i < ignoreTags.length; i++) {
+      if (!forceTags.contains(ignoreTags[i]))
         linkParams.remove(ignoreTags[i]);
     }
   }
-  
+
   /**
-   * This method takes a {@link StringBuilder} and a DOM {@link Node},
-   * and will append all the content text found beneath the DOM node to 
-   * the <code>StringBuilder</code>.
-   *
+   * This method takes a {@link StringBuilder} and a DOM {@link Node}, and will
+   * append all the content text found beneath the DOM node to the
+   * <code>StringBuilder</code>.
+   * 
    * <p>
-   *
-   * If <code>abortOnNestedAnchors</code> is true, DOM traversal will
-   * be aborted and the <code>StringBuffer</code> will not contain
-   * any text encountered after a nested anchor is found.
    * 
+   * If <code>abortOnNestedAnchors</code> is true, DOM traversal will be aborted
+   * and the <code>StringBuffer</code> will not contain any text encountered
+   * after a nested anchor is found.
+   * 
    * <p>
-   *
+   * 
    * @return true if nested anchors were found
    */
-  public boolean getText(StringBuilder sb, Node node, 
-                                      boolean abortOnNestedAnchors) {
+  public boolean getText(StringBuilder sb, Node node,
+      boolean abortOnNestedAnchors) {
     if (getTextHelper(sb, node, abortOnNestedAnchors, 0)) {
       return true;
-    } 
+    }
     return false;
   }
 
-
   /**
-   * This is a convinience method, equivalent to {@link
-   * #getText(StringBuffer,Node,boolean) getText(sb, node, false)}.
+   * This is a convinience method, equivalent to
+   * {@link #getText(StringBuffer,Node,boolean) getText(sb, node, false)}.
    * 
    */
   public void getText(StringBuilder sb, Node node) {
     getText(sb, node, false);
   }
 
-  // returns true if abortOnNestedAnchors is true and we find nested 
+  // returns true if abortOnNestedAnchors is true and we find nested
   // anchors
-  private boolean getTextHelper(StringBuilder sb, Node node, 
-                                             boolean abortOnNestedAnchors,
-                                             int anchorDepth) {
+  private boolean getTextHelper(StringBuilder sb, Node node,
+      boolean abortOnNestedAnchors, int anchorDepth) {
     boolean abort = false;
     NodeWalker walker = new NodeWalker(node);
-    
+
     while (walker.hasNext()) {
-    
+
       Node currentNode = walker.nextNode();
       String nodeName = currentNode.getNodeName();
       short nodeType = currentNode.getNodeType();
-      
+
       if ("script".equalsIgnoreCase(nodeName)) {
         walker.skipChildren();
       }
@@ -144,7 +142,7 @@
         if (anchorDepth > 1) {
           abort = true;
           break;
-        }        
+        }
       }
       if (nodeType == Node.COMMENT_NODE) {
         walker.skipChildren();
@@ -155,44 +153,45 @@
         text = text.replaceAll("\\s+", " ");
         text = text.trim();
         if (text.length() > 0) {
-          if (sb.length() > 0) sb.append(' ');
-        	sb.append(text);
+          if (sb.length() > 0)
+            sb.append(' ');
+          sb.append(text);
         }
       }
     }
-    
+
     return abort;
   }
 
   /**
-   * This method takes a {@link StringBuffer} and a DOM {@link Node},
-   * and will append the content text found beneath the first
-   * <code>title</code> node to the <code>StringBuffer</code>.
-   *
+   * This method takes a {@link StringBuffer} and a DOM {@link Node}, and will
+   * append the content text found beneath the first <code>title</code> node to
+   * the <code>StringBuffer</code>.
+   * 
    * @return true if a title node was found, false otherwise
    */
   public boolean getTitle(StringBuilder sb, Node node) {
-    
+
     NodeWalker walker = new NodeWalker(node);
-    
+
     while (walker.hasNext()) {
-  
+
       Node currentNode = walker.nextNode();
       String nodeName = currentNode.getNodeName();
       short nodeType = currentNode.getNodeType();
-      
+
       if ("body".equalsIgnoreCase(nodeName)) { // stop after HEAD
         return false;
       }
-  
+
       if (nodeType == Node.ELEMENT_NODE) {
         if ("title".equalsIgnoreCase(nodeName)) {
           getText(sb, currentNode);
           return true;
         }
       }
-    }      
-    
+    }
+
     return false;
   }
 
@@ -200,28 +199,29 @@
   public URL getBase(Node node) {
 
     NodeWalker walker = new NodeWalker(node);
-    
+
     while (walker.hasNext()) {
-  
+
       Node currentNode = walker.nextNode();
       String nodeName = currentNode.getNodeName();
       short nodeType = currentNode.getNodeType();
-      
+
       // is this node a BASE tag?
       if (nodeType == Node.ELEMENT_NODE) {
-  
+
         if ("body".equalsIgnoreCase(nodeName)) { // stop after HEAD
           return null;
         }
-  
+
         if ("base".equalsIgnoreCase(nodeName)) {
           NamedNodeMap attrs = currentNode.getAttributes();
-          for (int i= 0; i < attrs.getLength(); i++ ) {
+          for (int i = 0; i < attrs.getLength(); i++) {
             Node attr = attrs.item(i);
             if ("href".equalsIgnoreCase(attr.getNodeName())) {
               try {
                 return new URL(attr.getNodeValue());
-              } catch (MalformedURLException e) {}
+              } catch (MalformedURLException e) {
+              }
             }
           }
         }
@@ -232,10 +232,9 @@
     return null;
   }
 
-
   private boolean hasOnlyWhiteSpace(Node node) {
-    String val= node.getNodeValue();
-    for (int i= 0; i < val.length(); i++) {
+    String val = node.getNodeValue();
+    for (int i = 0; i < val.length(); i++) {
       if (!Character.isWhitespace(val.charAt(i)))
         return false;
     }
@@ -244,50 +243,49 @@
 
   // this only covers a few cases of empty links that are symptomatic
   // of nekohtml's DOM-fixup process...
-  private boolean shouldThrowAwayLink(Node node, NodeList children, 
-                                              int childLen, LinkParams params) {
+  private boolean shouldThrowAwayLink(Node node, NodeList children,
+      int childLen, LinkParams params) {
     if (childLen == 0) {
-      // this has no inner structure 
-      if (params.childLen == 0) return false;
-      else return true;
-    } else if ((childLen == 1) 
-               && (children.item(0).getNodeType() == Node.ELEMENT_NODE)
-               && (params.elName.equalsIgnoreCase(children.item(0).getNodeName()))) { 
+      // this has no inner structure
+      if (params.childLen == 0)
+        return false;
+      else
+        return true;
+    } else if ((childLen == 1)
+        && (children.item(0).getNodeType() == Node.ELEMENT_NODE)
+        && (params.elName.equalsIgnoreCase(children.item(0).getNodeName()))) {
       // single nested link
       return true;
 
     } else if (childLen == 2) {
 
-      Node c0= children.item(0);
-      Node c1= children.item(1);
+      Node c0 = children.item(0);
+      Node c1 = children.item(1);
 
       if ((c0.getNodeType() == Node.ELEMENT_NODE)
           && (params.elName.equalsIgnoreCase(c0.getNodeName()))
-          && (c1.getNodeType() == Node.TEXT_NODE) 
-          && hasOnlyWhiteSpace(c1) ) {
+          && (c1.getNodeType() == Node.TEXT_NODE) && hasOnlyWhiteSpace(c1)) {
         // single link followed by whitespace node
         return true;
       }
 
       if ((c1.getNodeType() == Node.ELEMENT_NODE)
           && (params.elName.equalsIgnoreCase(c1.getNodeName()))
-          && (c0.getNodeType() == Node.TEXT_NODE) 
-          && hasOnlyWhiteSpace(c0) ) {
+          && (c0.getNodeType() == Node.TEXT_NODE) && hasOnlyWhiteSpace(c0)) {
         // whitespace node followed by single link
         return true;
       }
 
     } else if (childLen == 3) {
-      Node c0= children.item(0);
-      Node c1= children.item(1);
-      Node c2= children.item(2);
-      
+      Node c0 = children.item(0);
+      Node c1 = children.item(1);
+      Node c2 = children.item(2);
+
       if ((c1.getNodeType() == Node.ELEMENT_NODE)
           && (params.elName.equalsIgnoreCase(c1.getNodeName()))
-          && (c0.getNodeType() == Node.TEXT_NODE) 
-          && (c2.getNodeType() == Node.TEXT_NODE) 
-          && hasOnlyWhiteSpace(c0)
-          && hasOnlyWhiteSpace(c2) ) {
+          && (c0.getNodeType() == Node.TEXT_NODE)
+          && (c2.getNodeType() == Node.TEXT_NODE) && hasOnlyWhiteSpace(c0)
+          && hasOnlyWhiteSpace(c2)) {
         // single link surrounded by whitespace nodes
         return true;
       }
@@ -295,122 +293,121 @@
 
     return false;
   }
-  
+
   /**
-   * Handles cases where the url param information is encoded into the base
-   * url as opposed to the target.
+   * Handles cases where the url param information is encoded into the base url
+   * as opposed to the target.
    * <p>
-   * If the taget contains params (i.e. ';xxxx') information then the target 
+   * If the taget contains params (i.e. ';xxxx') information then the target
    * params information is assumed to be correct and any base params information
-   * is ignored.  If the base contains params information but the tareget does
+   * is ignored. If the base contains params information but the tareget does
    * not, then the params information is moved to the target allowing it to be
    * correctly determined by the java.net.URL class.
    * 
-   * @param base The base URL.
-   * @param target The target path from the base URL.
+   * @param base
+   *          The base URL.
+   * @param target
+   *          The target path from the base URL.
    * 
    * @return URL A URL with the params information correctly encoded.
    * 
-   * @throws MalformedURLException If the url is not a well formed URL.
+   * @throws MalformedURLException
+   *           If the url is not a well formed URL.
    */
-  private URL fixEmbeddedParams(URL base, String target) 
-    throws MalformedURLException{
-    
+  private URL fixEmbeddedParams(URL base, String target)
+      throws MalformedURLException {
+
     // the target contains params information or the base doesn't then no
     // conversion necessary, return regular URL
     if (target.indexOf(';') >= 0 || base.toString().indexOf(';') == -1) {
       return new URL(base, target);
     }
-    
+
     // get the base url and it params information
     String baseURL = base.toString();
     int startParams = baseURL.indexOf(';');
     String params = baseURL.substring(startParams);
-    
+
     // if the target has a query string then put the params information after
     // any path but before the query string, otherwise just append to the path
     int startQS = target.indexOf('?');
     if (startQS >= 0) {
-      target = target.substring(0, startQS) + params + 
-        target.substring(startQS);
-    }
-    else {
+      target = target.substring(0, startQS) + params
+          + target.substring(startQS);
+    } else {
       target += params;
     }
-    
+
     return new URL(base, target);
   }
 
   /**
-   * This method finds all anchors below the supplied DOM
-   * <code>node</code>, and creates appropriate {@link Outlink}
-   * records for each (relative to the supplied <code>base</code>
-   * URL), and adds them to the <code>outlinks</code> {@link
-   * ArrayList}.
-   *
+   * This method finds all anchors below the supplied DOM <code>node</code>, and
+   * creates appropriate {@link Outlink} records for each (relative to the
+   * supplied <code>base</code> URL), and adds them to the <code>outlinks</code>
+   * {@link ArrayList}.
+   * 
    * <p>
-   *
-   * Links without inner structure (tags, text, etc) are discarded, as
-   * are links which contain only single nested links and empty text
-   * nodes (this is a common DOM-fixup artifact, at least with
-   * nekohtml).
+   * 
+   * Links without inner structure (tags, text, etc) are discarded, as are links
+   * which contain only single nested links and empty text nodes (this is a
+   * common DOM-fixup artifact, at least with nekohtml).
    */
-  public void getOutlinks(URL base, ArrayList<Outlink> outlinks, 
-                                       Node node) {
-    
+  public void getOutlinks(URL base, ArrayList<Outlink> outlinks, Node node) {
+
     NodeWalker walker = new NodeWalker(node);
     while (walker.hasNext()) {
-      
+
       Node currentNode = walker.nextNode();
       String nodeName = currentNode.getNodeName();
-      short nodeType = currentNode.getNodeType();      
+      short nodeType = currentNode.getNodeType();
       NodeList children = currentNode.getChildNodes();
-      int childLen = (children != null) ? children.getLength() : 0; 
-      
+      int childLen = (children != null) ? children.getLength() : 0;
+
       if (nodeType == Node.ELEMENT_NODE) {
-        
+
         nodeName = nodeName.toLowerCase();
         LinkParams params = linkParams.get(nodeName);
         if (params != null) {
           if (!shouldThrowAwayLink(currentNode, children, childLen, params)) {
-  
+
             StringBuilder linkText = new StringBuilder();
             getText(linkText, currentNode, true);
-  
+
             NamedNodeMap attrs = currentNode.getAttributes();
             String target = null;
             boolean noFollow = false;
             boolean post = false;
-            for (int i= 0; i < attrs.getLength(); i++ ) {
+            for (int i = 0; i < attrs.getLength(); i++) {
               Node attr = attrs.item(i);
               String attrName = attr.getNodeName();
               if (params.attrName.equalsIgnoreCase(attrName)) {
                 target = attr.getNodeValue();
-              } else if ("rel".equalsIgnoreCase(attrName) &&
-                         "nofollow".equalsIgnoreCase(attr.getNodeValue())) {
+              } else if ("rel".equalsIgnoreCase(attrName)
+                  && "nofollow".equalsIgnoreCase(attr.getNodeValue())) {
                 noFollow = true;
-              } else if ("method".equalsIgnoreCase(attrName) &&
-                         "post".equalsIgnoreCase(attr.getNodeValue())) {
+              } else if ("method".equalsIgnoreCase(attrName)
+                  && "post".equalsIgnoreCase(attr.getNodeValue())) {
                 post = true;
               }
             }
             if (target != null && !noFollow && !post)
               try {
-                
-                URL url = (base.toString().indexOf(';') > 0) ? 
-                  fixEmbeddedParams(base, target) :  new URL(base, target);
-                outlinks.add(new Outlink(url.toString(),
-                                         linkText.toString().trim()));
+
+                URL url = (base.toString().indexOf(';') > 0) ? fixEmbeddedParams(
+                    base, target) : new URL(base, target);
+                outlinks.add(new Outlink(url.toString(), linkText.toString()
+                    .trim()));
               } catch (MalformedURLException e) {
                 // don't care
               }
           }
           // this should not have any children, skip them
-          if (params.childLen == 0) continue;
+          if (params.childLen == 0)
+            continue;
         }
       }
     }
   }
 
 }
-
Index: src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HTMLMetaProcessor.java
===================================================================
--- src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HTMLMetaProcessor.java	(revision 1188268)
+++ src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HTMLMetaProcessor.java	(working copy)
@@ -23,32 +23,31 @@
 import org.w3c.dom.*;
 
 /**
- * Class for parsing META Directives from DOM trees.  This class
- * handles specifically Robots META directives (all, none, nofollow,
- * noindex), finding BASE HREF tags, and HTTP-EQUIV no-cache
- * instructions. All meta directives are stored in a HTMLMetaTags instance.
+ * Class for parsing META Directives from DOM trees. This class handles
+ * specifically Robots META directives (all, none, nofollow, noindex), finding
+ * BASE HREF tags, and HTTP-EQUIV no-cache instructions. All meta directives are
+ * stored in a HTMLMetaTags instance.
  */
 public class HTMLMetaProcessor {
 
   /**
-   * Utility class with indicators for the robots directives "noindex"
-   * and "nofollow", and HTTP-EQUIV/no-cache
+   * Utility class with indicators for the robots directives "noindex" and
+   * "nofollow", and HTTP-EQUIV/no-cache
    */
-  
+
   /**
-   * Sets the indicators in <code>robotsMeta</code> to appropriate
-   * values, based on any META tags found under the given
-   * <code>node</code>.
+   * Sets the indicators in <code>robotsMeta</code> to appropriate values, based
+   * on any META tags found under the given <code>node</code>.
    */
-  public static final void getMetaTags (
-    HTMLMetaTags metaTags, Node node, URL currURL) {
+  public static final void getMetaTags(HTMLMetaTags metaTags, Node node,
+      URL currURL) {
 
     metaTags.reset();
     getMetaTagsHelper(metaTags, node, currURL);
   }
 
-  private static final void getMetaTagsHelper(
-    HTMLMetaTags metaTags, Node node, URL currURL) {
+  private static final void getMetaTagsHelper(HTMLMetaTags metaTags, Node node,
+      URL currURL) {
 
     if (node.getNodeType() == Node.ELEMENT_NODE) {
 
@@ -63,7 +62,7 @@
         Node equivNode = null;
         Node contentNode = null;
         // Retrieves name, http-equiv and content attribues
-        for (int i=0; i<attrs.getLength(); i++) {
+        for (int i = 0; i < attrs.getLength(); i++) {
           Node attr = attrs.item(i);
           String attrName = attr.getNodeName().toLowerCase();
           if (attrName.equals("name")) {
@@ -74,44 +73,44 @@
             contentNode = attr;
           }
         }
-        
+
         if (nameNode != null) {
           if (contentNode != null) {
             String name = nameNode.getNodeValue().toLowerCase();
-            metaTags.getGeneralTags().setProperty(name, contentNode.getNodeValue());
+            metaTags.getGeneralTags().setProperty(name,
+                contentNode.getNodeValue());
             if ("robots".equals(name)) {
-  
+
               if (contentNode != null) {
-                String directives = 
-                  contentNode.getNodeValue().toLowerCase();
+                String directives = contentNode.getNodeValue().toLowerCase();
                 int index = directives.indexOf("none");
-  
+
                 if (index >= 0) {
                   metaTags.setNoIndex();
                   metaTags.setNoFollow();
                 }
-  
+
                 index = directives.indexOf("all");
                 if (index >= 0) {
                   // do nothing...
                 }
-  
+
                 index = directives.indexOf("noindex");
                 if (index >= 0) {
                   metaTags.setNoIndex();
                 }
-  
+
                 index = directives.indexOf("nofollow");
                 if (index >= 0) {
                   metaTags.setNoFollow();
                 }
-                
+
                 index = directives.indexOf("noarchive");
                 if (index >= 0) {
                   metaTags.setNoCache();
                 }
-              } 
-  
+              }
+
             } // end if (name == robots)
           }
         }
@@ -124,14 +123,15 @@
             if ("pragma".equals(name)) {
               content = content.toLowerCase();
               int index = content.indexOf("no-cache");
-              if (index >= 0) 
+              if (index >= 0)
                 metaTags.setNoCache();
             } else if ("refresh".equals(name)) {
               int idx = content.indexOf(';');
               String time = null;
               if (idx == -1) { // just the refresh time
                 time = content;
-              } else time = content.substring(0, idx);
+              } else
+                time = content.substring(0, idx);
               try {
                 metaTags.setRefreshTime(Integer.parseInt(time));
                 // skip this if we couldn't parse the time
@@ -142,9 +142,11 @@
               URL refreshUrl = null;
               if (metaTags.getRefresh() && idx != -1) { // set the URL
                 idx = content.toLowerCase().indexOf("url=");
-                if (idx == -1) { // assume a mis-formatted entry with just the url
+                if (idx == -1) { // assume a mis-formatted entry with just the
+                                 // url
                   idx = content.indexOf(';') + 1;
-                } else idx += 4;
+                } else
+                  idx += 4;
                 if (idx != -1) {
                   String url = content.substring(idx);
                   try {
@@ -187,13 +189,13 @@
           try {
             if (currURL == null)
               url = new URL(urlString);
-            else 
+            else
               url = new URL(currURL, urlString);
           } catch (Exception e) {
             ;
           }
 
-          if (url != null) 
+          if (url != null)
             metaTags.setBaseHref(url);
         }
 
Index: src/plugin/urlfilter-domain/src/test/org/apache/nutch/urlfilter/domain/TestDomainURLFilter.java
===================================================================
--- src/plugin/urlfilter-domain/src/test/org/apache/nutch/urlfilter/domain/TestDomainURLFilter.java	(revision 1188268)
+++ src/plugin/urlfilter-domain/src/test/org/apache/nutch/urlfilter/domain/TestDomainURLFilter.java	(working copy)
@@ -23,10 +23,10 @@
 import org.apache.hadoop.conf.Configuration;
 import org.apache.nutch.util.NutchConfiguration;
 
-public class TestDomainURLFilter
-  extends TestCase {
+public class TestDomainURLFilter extends TestCase {
 
-  protected static final Logger LOG = LoggerFactory.getLogger(TestDomainURLFilter.class);
+  protected static final Logger LOG = LoggerFactory
+      .getLogger(TestDomainURLFilter.class);
 
   private final static String SEPARATOR = System.getProperty("file.separator");
   private final static String SAMPLES = System.getProperty("test.data", ".");
@@ -35,8 +35,7 @@
     super(testName);
   }
 
-  public void testFilter()
-    throws Exception {
+  public void testFilter() throws Exception {
 
     String domainFile = SAMPLES + SEPARATOR + "hosts.txt";
     Configuration conf = NutchConfiguration.create();
Index: src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/DomainURLFilter.java
===================================================================
--- src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/DomainURLFilter.java	(revision 1188268)
+++ src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/DomainURLFilter.java	(working copy)
@@ -35,35 +35,48 @@
 import org.apache.nutch.util.domain.DomainSuffix;
 
 /**
- * <p>Filters URLs based on a file containing domain suffixes, domain names, and
+ * <p>
+ * Filters URLs based on a file containing domain suffixes, domain names, and
  * hostnames. Only a url that matches one of the suffixes, domains, or hosts
- * present in the file is allowed.</p>
+ * present in the file is allowed.
+ * </p>
  * 
- * <p>Urls are checked in order of domain suffix, domain name, and hostname
- * against entries in the domain file. The domain file would be setup as follows
- * with one entry per line:
+ * <p>
+ * Urls are checked in order of domain suffix, domain name, and hostname against
+ * entries in the domain file. The domain file would be setup as follows with
+ * one entry per line:
  * 
- * <pre> com apache.org www.apache.org </pre>
+ * <pre>
+ * com apache.org www.apache.org
+ * </pre>
  * 
- * <p>The first line is an example of a filter that would allow all .com
- * domains. The second line allows all urls from apache.org and all of its
- * subdomains such as lucene.apache.org and hadoop.apache.org. The third line
- * would allow only urls from www.apache.org. There is no specific ordering to
- * entries. The entries are from more general to more specific with the more
- * general overridding the more specific.</p>
+ * <p>
+ * The first line is an example of a filter that would allow all .com domains.
+ * The second line allows all urls from apache.org and all of its subdomains
+ * such as lucene.apache.org and hadoop.apache.org. The third line would allow
+ * only urls from www.apache.org. There is no specific ordering to entries. The
+ * entries are from more general to more specific with the more general
+ * overridding the more specific.
+ * </p>
  * 
  * The domain file defaults to domain-urlfilter.txt in the classpath but can be
  * overridden using the:
  * 
- * <ul> <ol>property "urlfilter.domain.file" in ./conf/nutch-*.xml, and</ol>
- * <ol>attribute "file" in plugin.xml of this plugin</ol> </ul>
+ * <ul>
+ * <ol>
+ * property "urlfilter.domain.file" in ./conf/nutch-*.xml, and
+ * </ol>
+ * <ol>
+ * attribute "file" in plugin.xml of this plugin
+ * </ol>
+ * </ul>
  * 
  * the attribute "file" has higher precedence if defined.
  */
-public class DomainURLFilter
-  implements URLFilter {
+public class DomainURLFilter implements URLFilter {
 
-  private static final Logger LOG = LoggerFactory.getLogger(DomainURLFilter.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(DomainURLFilter.class);
 
   // read in attribute "file" of this plugin.
   private static String attributeFile = null;
@@ -71,8 +84,7 @@
   private String domainFile = null;
   private Set<String> domainSet = new LinkedHashSet<String>();
 
-  private void readConfiguration(Reader configReader)
-    throws IOException {
+  private void readConfiguration(Reader configReader) throws IOException {
 
     // read the configuration file, line by line
     BufferedReader reader = new BufferedReader(configReader);
@@ -95,7 +107,8 @@
   /**
    * Constructor that specifies the domain file to use.
    * 
-   * @param domainFile The domain file, overrides domain-urlfilter.text default.
+   * @param domainFile
+   *          The domain file, overrides domain-urlfilter.text default.
    * 
    * @throws IOException
    */
@@ -111,8 +124,8 @@
 
     // get the extensions for domain urlfilter
     String pluginName = "urlfilter-domain";
-    Extension[] extensions = PluginRepository.get(conf).getExtensionPoint(
-      URLFilter.class.getName()).getExtensions();
+    Extension[] extensions = PluginRepository.get(conf)
+        .getExtensionPoint(URLFilter.class.getName()).getExtensions();
     for (int i = 0; i < extensions.length; i++) {
       Extension extension = extensions[i];
       if (extension.getDescriptor().getPluginId().equals(pluginName)) {
@@ -120,32 +133,30 @@
         break;
       }
     }
-    
+
     // handle blank non empty input
     if (attributeFile != null && attributeFile.trim().equals("")) {
       attributeFile = null;
     }
-    
+
     if (attributeFile != null) {
       if (LOG.isInfoEnabled()) {
         LOG.info("Attribute \"file\" is defined for plugin " + pluginName
-          + " as " + attributeFile);
+            + " as " + attributeFile);
       }
-    }
-    else {
+    } else {
       if (LOG.isWarnEnabled()) {
         LOG.warn("Attribute \"file\" is not defined in plugin.xml for plugin "
-          + pluginName);
+            + pluginName);
       }
     }
 
     // domain file and attribute "file" take precedence if defined
-    String file = conf.get("urlfilter.domain.file");    
+    String file = conf.get("urlfilter.domain.file");
     String stringRules = conf.get("urlfilter.domain.rules");
     if (domainFile != null) {
       file = domainFile;
-    }
-    else if (attributeFile != null) {
+    } else if (attributeFile != null) {
       file = attributeFile;
     }
     Reader reader = null;
@@ -159,8 +170,7 @@
         reader = new FileReader(file);
       }
       readConfiguration(reader);
-    }
-    catch (IOException e) {
+    } catch (IOException e) {
       LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
     }
   }
@@ -173,7 +183,7 @@
 
     try {
 
-      // match for suffix, domain, and host in that order.  more general will
+      // match for suffix, domain, and host in that order. more general will
       // override more specific
       String domain = URLUtil.getDomainName(url).toLowerCase().trim();
       String host = URLUtil.getHost(url);
@@ -182,20 +192,19 @@
       if (domainSuffix != null) {
         suffix = domainSuffix.getDomain();
       }
-      
+
       if (domainSet.contains(suffix) || domainSet.contains(domain)
-        || domainSet.contains(host)) {
+          || domainSet.contains(host)) {
         return url;
       }
 
       // doesn't match, don't allow
       return null;
-    }
-    catch (Exception e) {
-      
+    } catch (Exception e) {
+
       // if an error happens, allow the url to pass
       LOG.error("Could not apply filter on url: " + url + "\n"
-        + org.apache.hadoop.util.StringUtils.stringifyException(e));
+          + org.apache.hadoop.util.StringUtils.stringifyException(e));
       return null;
     }
   }
Index: src/plugin/protocol-httpclient/src/test/org/apache/nutch/protocol/httpclient/TestProtocolHttpClient.java
===================================================================
--- src/plugin/protocol-httpclient/src/test/org/apache/nutch/protocol/httpclient/TestProtocolHttpClient.java	(revision 1188268)
+++ src/plugin/protocol-httpclient/src/test/org/apache/nutch/protocol/httpclient/TestProtocolHttpClient.java	(working copy)
@@ -37,176 +37,177 @@
  */
 public class TestProtocolHttpClient extends TestCase {
 
-	private Server server;
-	private Configuration conf;
-	private static final String RES_DIR = System.getProperty("test.data", ".");
-	private int port;
-	private Http http = new Http();
+  private Server server;
+  private Configuration conf;
+  private static final String RES_DIR = System.getProperty("test.data", ".");
+  private int port;
+  private Http http = new Http();
 
-	protected void setUp() throws Exception {
+  protected void setUp() throws Exception {
 
-		server = new Server();
-		
-//		Context scontext = new Context();
-//		scontext.setContextPath("/");
-//		scontext.setResourceBase(RES_DIR);
-//		// servlet handler?
-//		scontext.addServlet("JSP", "*.jsp",
-//				"org.apache.jasper.servlet.JspServlet");
-//		scontext.addHandler(new ResourceHandler());
+    server = new Server();
 
-		Context root = new Context(server,"/",Context.SESSIONS);
-		root.setContextPath("/");
-		root.setResourceBase(RES_DIR);
-		ServletHolder sh = new ServletHolder(org.apache.jasper.servlet.JspServlet.class);
-		root.addServlet(sh, "*.jsp");
+    // Context scontext = new Context();
+    // scontext.setContextPath("/");
+    // scontext.setResourceBase(RES_DIR);
+    // // servlet handler?
+    // scontext.addServlet("JSP", "*.jsp",
+    // "org.apache.jasper.servlet.JspServlet");
+    // scontext.addHandler(new ResourceHandler());
 
-		conf = new Configuration();
-		conf.addResource("nutch-default.xml");
-		conf.addResource("nutch-site-test.xml");
+    Context root = new Context(server, "/", Context.SESSIONS);
+    root.setContextPath("/");
+    root.setResourceBase(RES_DIR);
+    ServletHolder sh = new ServletHolder(
+        org.apache.jasper.servlet.JspServlet.class);
+    root.addServlet(sh, "*.jsp");
 
-		http = new Http();
-		http.setConf(conf);
-	}
+    conf = new Configuration();
+    conf.addResource("nutch-default.xml");
+    conf.addResource("nutch-site-test.xml");
 
-	protected void tearDown() throws Exception {
-		server.stop();
-	}
+    http = new Http();
+    http.setConf(conf);
+  }
 
-	/**
-	 * Tests whether the client can remember cookies.
-	 * 
-	 * @throws Exception
-	 *             If an error occurs or the test case fails.
-	 */
-	public void testCookies() throws Exception {
-		startServer(47500);
-		fetchPage("/cookies.jsp", 200);
-		fetchPage("/cookies.jsp?cookie=yes", 200);
-	}
+  protected void tearDown() throws Exception {
+    server.stop();
+  }
 
-	/**
-	 * Tests that no pre-emptive authorization headers are sent by the client.
-	 * 
-	 * @throws Exception
-	 *             If an error occurs or the test case fails.
-	 */
-	public void testNoPreemptiveAuth() throws Exception {
-		startServer(47500);
-		fetchPage("/noauth.jsp", 200);
-	}
+  /**
+   * Tests whether the client can remember cookies.
+   * 
+   * @throws Exception
+   *           If an error occurs or the test case fails.
+   */
+  public void testCookies() throws Exception {
+    startServer(47500);
+    fetchPage("/cookies.jsp", 200);
+    fetchPage("/cookies.jsp?cookie=yes", 200);
+  }
 
-	/**
-	 * Tests default credentials.
-	 * 
-	 * @throws Exception
-	 *             If an error occurs or the test case fails.
-	 */
-	public void testDefaultCredentials() throws Exception {
-		startServer(47502);
-		fetchPage("/basic.jsp", 200);
-	}
+  /**
+   * Tests that no pre-emptive authorization headers are sent by the client.
+   * 
+   * @throws Exception
+   *           If an error occurs or the test case fails.
+   */
+  public void testNoPreemptiveAuth() throws Exception {
+    startServer(47500);
+    fetchPage("/noauth.jsp", 200);
+  }
 
-	/**
-	 * Tests basic authentication scheme for various realms.
-	 * 
-	 * @throws Exception
-	 *             If an error occurs or the test case fails.
-	 */
-	public void testBasicAuth() throws Exception {
-		startServer(47500);
-		fetchPage("/basic.jsp", 200);
-		fetchPage("/basic.jsp?case=1", 200);
-		fetchPage("/basic.jsp?case=2", 200);
-		server.start();
-	}
+  /**
+   * Tests default credentials.
+   * 
+   * @throws Exception
+   *           If an error occurs or the test case fails.
+   */
+  public void testDefaultCredentials() throws Exception {
+    startServer(47502);
+    fetchPage("/basic.jsp", 200);
+  }
 
-	/**
-	 * Tests that authentication happens for a defined realm and not for other
-	 * realms for a host:port when an extra <code>authscope</code> tag is not
-	 * defined to match all other realms.
-	 * 
-	 * @throws Exception
-	 *             If an error occurs or the test case fails.
-	 */
-	public void testOtherRealmsNoAuth() throws Exception {
-		startServer(47501);
-		fetchPage("/basic.jsp", 200);
-		fetchPage("/basic.jsp?case=1", 401);
-		fetchPage("/basic.jsp?case=2", 401);
-	}
+  /**
+   * Tests basic authentication scheme for various realms.
+   * 
+   * @throws Exception
+   *           If an error occurs or the test case fails.
+   */
+  public void testBasicAuth() throws Exception {
+    startServer(47500);
+    fetchPage("/basic.jsp", 200);
+    fetchPage("/basic.jsp?case=1", 200);
+    fetchPage("/basic.jsp?case=2", 200);
+    server.start();
+  }
 
-	/**
-	 * Tests Digest authentication scheme.
-	 * 
-	 * @throws Exception
-	 *             If an error occurs or the test case fails.
-	 */
-	public void testDigestAuth() throws Exception {
-		startServer(47500);
-		fetchPage("/digest.jsp", 200);
-	}
+  /**
+   * Tests that authentication happens for a defined realm and not for other
+   * realms for a host:port when an extra <code>authscope</code> tag is not
+   * defined to match all other realms.
+   * 
+   * @throws Exception
+   *           If an error occurs or the test case fails.
+   */
+  public void testOtherRealmsNoAuth() throws Exception {
+    startServer(47501);
+    fetchPage("/basic.jsp", 200);
+    fetchPage("/basic.jsp?case=1", 401);
+    fetchPage("/basic.jsp?case=2", 401);
+  }
 
-	/**
-	 * Tests NTLM authentication scheme.
-	 * 
-	 * @throws Exception
-	 *             If an error occurs or the test case fails.
-	 */
-	public void testNtlmAuth() throws Exception {
-		startServer(47501);
-		fetchPage("/ntlm.jsp", 200);
-	}
+  /**
+   * Tests Digest authentication scheme.
+   * 
+   * @throws Exception
+   *           If an error occurs or the test case fails.
+   */
+  public void testDigestAuth() throws Exception {
+    startServer(47500);
+    fetchPage("/digest.jsp", 200);
+  }
 
-	/**
-	 * Starts the Jetty server at a specified port.
-	 * 
-	 * @param portno
-	 *            Port number.
-	 * @throws Exception
-	 *             When an error occurs.
-	 */
-	private void startServer(int portno) throws Exception {
-		port = portno;
+  /**
+   * Tests NTLM authentication scheme.
+   * 
+   * @throws Exception
+   *           If an error occurs or the test case fails.
+   */
+  public void testNtlmAuth() throws Exception {
+    startServer(47501);
+    fetchPage("/ntlm.jsp", 200);
+  }
 
-		SelectChannelConnector connector1 = new SelectChannelConnector();
-		connector1.setHost("127.0.0.1");
-		connector1.setPort(port);
+  /**
+   * Starts the Jetty server at a specified port.
+   * 
+   * @param portno
+   *          Port number.
+   * @throws Exception
+   *           When an error occurs.
+   */
+  private void startServer(int portno) throws Exception {
+    port = portno;
 
-		server.addConnector(connector1);
-		server.start();
-	}
+    SelectChannelConnector connector1 = new SelectChannelConnector();
+    connector1.setHost("127.0.0.1");
+    connector1.setPort(port);
 
-	/**
-	 * Fetches the specified <code>page</code> from the local Jetty server and
-	 * checks whether the HTTP response status code matches with the expected
-	 * code.
-	 * 
-	 * @param page
-	 *            Page to be fetched.
-	 * @param expectedCode
-	 *            HTTP response status code expected while fetching the page.
-	 * @throws Exception
-	 *             When an error occurs or test case fails.
-	 */
-	private void fetchPage(String page, int expectedCode) throws Exception {
-		URL url = new URL("http", "127.0.0.1", port, page);
-		Response response = null;
-		response = http.getResponse(url, new WebPage(), true);
+    server.addConnector(connector1);
+    server.start();
+  }
 
-		int code = response.getCode();
-		assertEquals("HTTP Status Code for " + url, expectedCode, code);
-	}
+  /**
+   * Fetches the specified <code>page</code> from the local Jetty server and
+   * checks whether the HTTP response status code matches with the expected
+   * code.
+   * 
+   * @param page
+   *          Page to be fetched.
+   * @param expectedCode
+   *          HTTP response status code expected while fetching the page.
+   * @throws Exception
+   *           When an error occurs or test case fails.
+   */
+  private void fetchPage(String page, int expectedCode) throws Exception {
+    URL url = new URL("http", "127.0.0.1", port, page);
+    Response response = null;
+    response = http.getResponse(url, new WebPage(), true);
 
-	/**
-	 * Returns an URL to the specified page.
-	 * 
-	 * @param page
-	 *            Page available in the local Jetty server.
-	 * @throws MalformedURLException
-	 *             If an URL can not be formed.
-	 */
-	private URL getURL(String page) throws MalformedURLException {
-		return new URL("http", "127.0.0.1", port, page);
-	}
+    int code = response.getCode();
+    assertEquals("HTTP Status Code for " + url, expectedCode, code);
+  }
+
+  /**
+   * Returns an URL to the specified page.
+   * 
+   * @param page
+   *          Page available in the local Jetty server.
+   * @throws MalformedURLException
+   *           If an URL can not be formed.
+   */
+  private URL getURL(String page) throws MalformedURLException {
+    return new URL("http", "127.0.0.1", port, page);
+  }
 }
Index: src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java
===================================================================
--- src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java	(revision 1188268)
+++ src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java	(working copy)
@@ -66,392 +66,379 @@
  */
 public class Http extends HttpBase {
 
-	public static final Logger LOG = LoggerFactory.getLogger(Http.class);
+  public static final Logger LOG = LoggerFactory.getLogger(Http.class);
 
-	private static MultiThreadedHttpConnectionManager connectionManager = new MultiThreadedHttpConnectionManager();
+  private static MultiThreadedHttpConnectionManager connectionManager = new MultiThreadedHttpConnectionManager();
 
-	// Since the Configuration has not yet been set,
-	// then an unconfigured client is returned.
-	private static HttpClient client = new HttpClient(connectionManager);
-	private static String defaultUsername;
-	private static String defaultPassword;
-	private static String defaultRealm;
-	private static String defaultScheme;
-	private static String authFile;
-	private static String agentHost;
-	private static boolean authRulesRead = false;
-	private static Configuration conf;
+  // Since the Configuration has not yet been set,
+  // then an unconfigured client is returned.
+  private static HttpClient client = new HttpClient(connectionManager);
+  private static String defaultUsername;
+  private static String defaultPassword;
+  private static String defaultRealm;
+  private static String defaultScheme;
+  private static String authFile;
+  private static String agentHost;
+  private static boolean authRulesRead = false;
+  private static Configuration conf;
 
-	int maxThreadsTotal = 10;
+  int maxThreadsTotal = 10;
 
-	private String proxyUsername;
-	private String proxyPassword;
-	private String proxyRealm;
+  private String proxyUsername;
+  private String proxyPassword;
+  private String proxyRealm;
 
-	private static final Collection<WebPage.Field> FIELDS = new HashSet<WebPage.Field>();
+  private static final Collection<WebPage.Field> FIELDS = new HashSet<WebPage.Field>();
 
-	static {
-		FIELDS.add(WebPage.Field.MODIFIED_TIME);
-		FIELDS.add(WebPage.Field.HEADERS);
-	}
+  static {
+    FIELDS.add(WebPage.Field.MODIFIED_TIME);
+    FIELDS.add(WebPage.Field.HEADERS);
+  }
 
-	@Override
-	public Collection<Field> getFields() {
-		return FIELDS;
-	}
-	
-	/**
-	 * Returns the configured HTTP client.
-	 * 
-	 * @return HTTP client
-	 */
-	static synchronized HttpClient getClient() {
-		return client;
-	}
+  @Override
+  public Collection<Field> getFields() {
+    return FIELDS;
+  }
 
-	/**
-	 * Constructs this plugin.
-	 */
-	public Http() {
-		super(LOG);
-	}
+  /**
+   * Returns the configured HTTP client.
+   * 
+   * @return HTTP client
+   */
+  static synchronized HttpClient getClient() {
+    return client;
+  }
 
-	/**
-	 * Reads the configuration from the Nutch configuration files and sets the
-	 * configuration.
-	 * 
-	 * @param conf
-	 *            Configuration
-	 */
-	public void setConf(Configuration conf) {
-		super.setConf(conf);
-		this.conf = conf;
-		this.maxThreadsTotal = conf.getInt("fetcher.threads.fetch", 10);
-		this.proxyUsername = conf.get("http.proxy.username", "");
-		this.proxyPassword = conf.get("http.proxy.password", "");
-		this.proxyRealm = conf.get("http.proxy.realm", "");
-		agentHost = conf.get("http.agent.host", "");
-		authFile = conf.get("http.auth.file", "");
-		configureClient();
-		try {
-			setCredentials();
-		} catch (Exception ex) {
-			if (LOG.isErrorEnabled()) {
-				LOG.error("Could not read " + authFile + " : "
-						+ ex.getMessage());
-				ex.printStackTrace(LogUtil.getErrorStream(LOG));
-			}
-		}
-	}
+  /**
+   * Constructs this plugin.
+   */
+  public Http() {
+    super(LOG);
+  }
 
-	/**
-	 * Main method.
-	 * 
-	 * @param args
-	 *            Command line arguments
-	 */
-	public static void main(String[] args) throws Exception {
-		Http http = new Http();
-		http.setConf(NutchConfiguration.create());
-		main(http, args);
-	}
+  /**
+   * Reads the configuration from the Nutch configuration files and sets the
+   * configuration.
+   * 
+   * @param conf
+   *          Configuration
+   */
+  public void setConf(Configuration conf) {
+    super.setConf(conf);
+    this.conf = conf;
+    this.maxThreadsTotal = conf.getInt("fetcher.threads.fetch", 10);
+    this.proxyUsername = conf.get("http.proxy.username", "");
+    this.proxyPassword = conf.get("http.proxy.password", "");
+    this.proxyRealm = conf.get("http.proxy.realm", "");
+    agentHost = conf.get("http.agent.host", "");
+    authFile = conf.get("http.auth.file", "");
+    configureClient();
+    try {
+      setCredentials();
+    } catch (Exception ex) {
+      if (LOG.isErrorEnabled()) {
+        LOG.error("Could not read " + authFile + " : " + ex.getMessage());
+        ex.printStackTrace(LogUtil.getErrorStream(LOG));
+      }
+    }
+  }
 
-	/**
-	 * Fetches the <code>url</code> with a configured HTTP client and gets the
-	 * response.
-	 * 
-	 * @param url
-	 *            URL to be fetched
-	 * @param datum
-	 *            Crawl data
-	 * @param redirect
-	 *            Follow redirects if and only if true
-	 * @return HTTP response
-	 */
-	protected Response getResponse(URL url, WebPage page, boolean redirect)
-			throws ProtocolException, IOException {
-		resolveCredentials(url);
-		return new HttpResponse(this, url, page, redirect);
-	}
+  /**
+   * Main method.
+   * 
+   * @param args
+   *          Command line arguments
+   */
+  public static void main(String[] args) throws Exception {
+    Http http = new Http();
+    http.setConf(NutchConfiguration.create());
+    main(http, args);
+  }
 
-	/**
-	 * Configures the HTTP client
-	 */
-	private void configureClient() {
+  /**
+   * Fetches the <code>url</code> with a configured HTTP client and gets the
+   * response.
+   * 
+   * @param url
+   *          URL to be fetched
+   * @param datum
+   *          Crawl data
+   * @param redirect
+   *          Follow redirects if and only if true
+   * @return HTTP response
+   */
+  protected Response getResponse(URL url, WebPage page, boolean redirect)
+      throws ProtocolException, IOException {
+    resolveCredentials(url);
+    return new HttpResponse(this, url, page, redirect);
+  }
 
-		// Set up an HTTPS socket factory that accepts self-signed certs.
-		Protocol https = new Protocol("https",
-				new DummySSLProtocolSocketFactory(), 443);
-		Protocol.registerProtocol("https", https);
+  /**
+   * Configures the HTTP client
+   */
+  private void configureClient() {
 
-		HttpConnectionManagerParams params = connectionManager.getParams();
-		params.setConnectionTimeout(timeout);
-		params.setSoTimeout(timeout);
-		params.setSendBufferSize(BUFFER_SIZE);
-		params.setReceiveBufferSize(BUFFER_SIZE);
-		params.setMaxTotalConnections(maxThreadsTotal);
+    // Set up an HTTPS socket factory that accepts self-signed certs.
+    Protocol https = new Protocol("https", new DummySSLProtocolSocketFactory(),
+        443);
+    Protocol.registerProtocol("https", https);
 
-		// executeMethod(HttpMethod) seems to ignore the connection timeout on
-		// the connection manager.
-		// set it explicitly on the HttpClient.
-		client.getParams().setConnectionManagerTimeout(timeout);
+    HttpConnectionManagerParams params = connectionManager.getParams();
+    params.setConnectionTimeout(timeout);
+    params.setSoTimeout(timeout);
+    params.setSendBufferSize(BUFFER_SIZE);
+    params.setReceiveBufferSize(BUFFER_SIZE);
+    params.setMaxTotalConnections(maxThreadsTotal);
 
-		HostConfiguration hostConf = client.getHostConfiguration();
-		ArrayList headers = new ArrayList();
-		// Set the User Agent in the header
-		headers.add(new Header("User-Agent", userAgent));
-		// prefer English
-		headers.add(new Header("Accept-Language",
-				"en-us,en-gb,en;q=0.7,*;q=0.3"));
-		// prefer UTF-8
-		headers.add(new Header("Accept-Charset",
-				"utf-8,ISO-8859-1;q=0.7,*;q=0.7"));
-		// prefer understandable formats
-		headers.add(new Header(
-				"Accept",
-				"text/html,application/xml;q=0.9,application/xhtml+xml,text/xml;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5"));
-		// accept gzipped content
-		headers.add(new Header("Accept-Encoding", "x-gzip, gzip, deflate"));
-		hostConf.getParams().setParameter("http.default-headers", headers);
+    // executeMethod(HttpMethod) seems to ignore the connection timeout on
+    // the connection manager.
+    // set it explicitly on the HttpClient.
+    client.getParams().setConnectionManagerTimeout(timeout);
 
-		// HTTP proxy server details
-		if (useProxy) {
-			hostConf.setProxy(proxyHost, proxyPort);
+    HostConfiguration hostConf = client.getHostConfiguration();
+    ArrayList headers = new ArrayList();
+    // Set the User Agent in the header
+    headers.add(new Header("User-Agent", userAgent));
+    // prefer English
+    headers.add(new Header("Accept-Language", "en-us,en-gb,en;q=0.7,*;q=0.3"));
+    // prefer UTF-8
+    headers.add(new Header("Accept-Charset", "utf-8,ISO-8859-1;q=0.7,*;q=0.7"));
+    // prefer understandable formats
+    headers
+        .add(new Header(
+            "Accept",
+            "text/html,application/xml;q=0.9,application/xhtml+xml,text/xml;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5"));
+    // accept gzipped content
+    headers.add(new Header("Accept-Encoding", "x-gzip, gzip, deflate"));
+    hostConf.getParams().setParameter("http.default-headers", headers);
 
-			if (proxyUsername.length() > 0) {
+    // HTTP proxy server details
+    if (useProxy) {
+      hostConf.setProxy(proxyHost, proxyPort);
 
-				AuthScope proxyAuthScope = getAuthScope(this.proxyHost,
-						this.proxyPort, this.proxyRealm);
+      if (proxyUsername.length() > 0) {
 
-				NTCredentials proxyCredentials = new NTCredentials(
-						this.proxyUsername, this.proxyPassword, this.agentHost,
-						this.proxyRealm);
+        AuthScope proxyAuthScope = getAuthScope(this.proxyHost, this.proxyPort,
+            this.proxyRealm);
 
-				client.getState().setProxyCredentials(proxyAuthScope,
-						proxyCredentials);
-			}
-		}
+        NTCredentials proxyCredentials = new NTCredentials(this.proxyUsername,
+            this.proxyPassword, this.agentHost, this.proxyRealm);
 
-	}
+        client.getState().setProxyCredentials(proxyAuthScope, proxyCredentials);
+      }
+    }
 
-	/**
-	 * Reads authentication configuration file (defined as 'http.auth.file' in
-	 * Nutch configuration file) and sets the credentials for the configured
-	 * authentication scopes in the HTTP client object.
-	 * 
-	 * @throws ParserConfigurationException
-	 *             If a document builder can not be created.
-	 * @throws SAXException
-	 *             If any parsing error occurs.
-	 * @throws IOException
-	 *             If any I/O error occurs.
-	 */
-	private static synchronized void setCredentials()
-			throws ParserConfigurationException, SAXException, IOException {
+  }
 
-		if (authRulesRead)
-			return;
+  /**
+   * Reads authentication configuration file (defined as 'http.auth.file' in
+   * Nutch configuration file) and sets the credentials for the configured
+   * authentication scopes in the HTTP client object.
+   * 
+   * @throws ParserConfigurationException
+   *           If a document builder can not be created.
+   * @throws SAXException
+   *           If any parsing error occurs.
+   * @throws IOException
+   *           If any I/O error occurs.
+   */
+  private static synchronized void setCredentials()
+      throws ParserConfigurationException, SAXException, IOException {
 
-		authRulesRead = true; // Avoid re-attempting to read
+    if (authRulesRead)
+      return;
 
-		InputStream is = conf.getConfResourceAsInputStream(authFile);
-		if (is != null) {
-			Document doc = DocumentBuilderFactory.newInstance()
-					.newDocumentBuilder().parse(is);
+    authRulesRead = true; // Avoid re-attempting to read
 
-			Element rootElement = doc.getDocumentElement();
-			if (!"auth-configuration".equals(rootElement.getTagName())) {
-				if (LOG.isWarnEnabled())
-					LOG.warn("Bad auth conf file: root element <"
-							+ rootElement.getTagName() + "> found in "
-							+ authFile + " - must be <auth-configuration>");
-			}
+    InputStream is = conf.getConfResourceAsInputStream(authFile);
+    if (is != null) {
+      Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder()
+          .parse(is);
 
-			// For each set of credentials
-			NodeList credList = rootElement.getChildNodes();
-			for (int i = 0; i < credList.getLength(); i++) {
-				Node credNode = credList.item(i);
-				if (!(credNode instanceof Element))
-					continue;
+      Element rootElement = doc.getDocumentElement();
+      if (!"auth-configuration".equals(rootElement.getTagName())) {
+        if (LOG.isWarnEnabled())
+          LOG.warn("Bad auth conf file: root element <"
+              + rootElement.getTagName() + "> found in " + authFile
+              + " - must be <auth-configuration>");
+      }
 
-				Element credElement = (Element) credNode;
-				if (!"credentials".equals(credElement.getTagName())) {
-					if (LOG.isWarnEnabled())
-						LOG.warn("Bad auth conf file: Element <"
-								+ credElement.getTagName()
-								+ "> not recognized in " + authFile
-								+ " - expected <credentials>");
-					continue;
-				}
+      // For each set of credentials
+      NodeList credList = rootElement.getChildNodes();
+      for (int i = 0; i < credList.getLength(); i++) {
+        Node credNode = credList.item(i);
+        if (!(credNode instanceof Element))
+          continue;
 
-				String username = credElement.getAttribute("username");
-				String password = credElement.getAttribute("password");
+        Element credElement = (Element) credNode;
+        if (!"credentials".equals(credElement.getTagName())) {
+          if (LOG.isWarnEnabled())
+            LOG.warn("Bad auth conf file: Element <" + credElement.getTagName()
+                + "> not recognized in " + authFile
+                + " - expected <credentials>");
+          continue;
+        }
 
-				// For each authentication scope
-				NodeList scopeList = credElement.getChildNodes();
-				for (int j = 0; j < scopeList.getLength(); j++) {
-					Node scopeNode = scopeList.item(j);
-					if (!(scopeNode instanceof Element))
-						continue;
+        String username = credElement.getAttribute("username");
+        String password = credElement.getAttribute("password");
 
-					Element scopeElement = (Element) scopeNode;
+        // For each authentication scope
+        NodeList scopeList = credElement.getChildNodes();
+        for (int j = 0; j < scopeList.getLength(); j++) {
+          Node scopeNode = scopeList.item(j);
+          if (!(scopeNode instanceof Element))
+            continue;
 
-					if ("default".equals(scopeElement.getTagName())) {
+          Element scopeElement = (Element) scopeNode;
 
-						// Determine realm and scheme, if any
-						String realm = scopeElement.getAttribute("realm");
-						String scheme = scopeElement.getAttribute("scheme");
+          if ("default".equals(scopeElement.getTagName())) {
 
-						// Set default credentials
-						defaultUsername = username;
-						defaultPassword = password;
-						defaultRealm = realm;
-						defaultScheme = scheme;
+            // Determine realm and scheme, if any
+            String realm = scopeElement.getAttribute("realm");
+            String scheme = scopeElement.getAttribute("scheme");
 
-						if (LOG.isTraceEnabled()) {
-							LOG.trace("Credentials - username: " + username
-									+ "; set as default" + " for realm: "
-									+ realm + "; scheme: " + scheme);
-						}
+            // Set default credentials
+            defaultUsername = username;
+            defaultPassword = password;
+            defaultRealm = realm;
+            defaultScheme = scheme;
 
-					} else if ("authscope".equals(scopeElement.getTagName())) {
+            if (LOG.isTraceEnabled()) {
+              LOG.trace("Credentials - username: " + username
+                  + "; set as default" + " for realm: " + realm + "; scheme: "
+                  + scheme);
+            }
 
-						// Determine authentication scope details
-						String host = scopeElement.getAttribute("host");
-						int port = -1; // For setting port to AuthScope.ANY_PORT
-						try {
-							port = Integer.parseInt(scopeElement
-									.getAttribute("port"));
-						} catch (Exception ex) {
-							// do nothing, port is already set to any port
-						}
-						String realm = scopeElement.getAttribute("realm");
-						String scheme = scopeElement.getAttribute("scheme");
+          } else if ("authscope".equals(scopeElement.getTagName())) {
 
-						// Set credentials for the determined scope
-						AuthScope authScope = getAuthScope(host, port, realm,
-								scheme);
-						NTCredentials credentials = new NTCredentials(username,
-								password, agentHost, realm);
+            // Determine authentication scope details
+            String host = scopeElement.getAttribute("host");
+            int port = -1; // For setting port to AuthScope.ANY_PORT
+            try {
+              port = Integer.parseInt(scopeElement.getAttribute("port"));
+            } catch (Exception ex) {
+              // do nothing, port is already set to any port
+            }
+            String realm = scopeElement.getAttribute("realm");
+            String scheme = scopeElement.getAttribute("scheme");
 
-						client.getState()
-								.setCredentials(authScope, credentials);
+            // Set credentials for the determined scope
+            AuthScope authScope = getAuthScope(host, port, realm, scheme);
+            NTCredentials credentials = new NTCredentials(username, password,
+                agentHost, realm);
 
-						if (LOG.isTraceEnabled()) {
-							LOG.trace("Credentials - username: " + username
-									+ "; set for AuthScope - " + "host: "
-									+ host + "; port: " + port + "; realm: "
-									+ realm + "; scheme: " + scheme);
-						}
+            client.getState().setCredentials(authScope, credentials);
 
-					} else {
-						if (LOG.isWarnEnabled())
-							LOG.warn("Bad auth conf file: Element <"
-									+ scopeElement.getTagName()
-									+ "> not recognized in " + authFile
-									+ " - expected <authscope>");
-					}
-				}
-				is.close();
-			}
-		}
-	}
+            if (LOG.isTraceEnabled()) {
+              LOG.trace("Credentials - username: " + username
+                  + "; set for AuthScope - " + "host: " + host + "; port: "
+                  + port + "; realm: " + realm + "; scheme: " + scheme);
+            }
 
-	/**
-	 * If credentials for the authentication scope determined from the specified
-	 * <code>url</code> is not already set in the HTTP client, then this method
-	 * sets the default credentials to fetch the specified <code>url</code>. If
-	 * credentials are found for the authentication scope, the method returns
-	 * without altering the client.
-	 * 
-	 * @param url
-	 *            URL to be fetched
-	 */
-	private void resolveCredentials(URL url) {
+          } else {
+            if (LOG.isWarnEnabled())
+              LOG.warn("Bad auth conf file: Element <"
+                  + scopeElement.getTagName() + "> not recognized in "
+                  + authFile + " - expected <authscope>");
+          }
+        }
+        is.close();
+      }
+    }
+  }
 
-		if (defaultUsername != null && defaultUsername.length() > 0) {
+  /**
+   * If credentials for the authentication scope determined from the specified
+   * <code>url</code> is not already set in the HTTP client, then this method
+   * sets the default credentials to fetch the specified <code>url</code>. If
+   * credentials are found for the authentication scope, the method returns
+   * without altering the client.
+   * 
+   * @param url
+   *          URL to be fetched
+   */
+  private void resolveCredentials(URL url) {
 
-			int port = url.getPort();
-			if (port == -1) {
-				if ("https".equals(url.getProtocol()))
-					port = 443;
-				else
-					port = 80;
-			}
+    if (defaultUsername != null && defaultUsername.length() > 0) {
 
-			AuthScope scope = new AuthScope(url.getHost(), port);
+      int port = url.getPort();
+      if (port == -1) {
+        if ("https".equals(url.getProtocol()))
+          port = 443;
+        else
+          port = 80;
+      }
 
-			if (client.getState().getCredentials(scope) != null) {
-				if (LOG.isTraceEnabled())
-					LOG.trace("Pre-configured credentials with scope - host: "
-							+ url.getHost() + "; port: " + port
-							+ "; found for url: " + url);
+      AuthScope scope = new AuthScope(url.getHost(), port);
 
-				// Credentials are already configured, so do nothing and return
-				return;
-			}
+      if (client.getState().getCredentials(scope) != null) {
+        if (LOG.isTraceEnabled())
+          LOG.trace("Pre-configured credentials with scope - host: "
+              + url.getHost() + "; port: " + port + "; found for url: " + url);
 
-			if (LOG.isTraceEnabled())
-				LOG.trace("Pre-configured credentials with scope -  host: "
-						+ url.getHost() + "; port: " + port
-						+ "; not found for url: " + url);
+        // Credentials are already configured, so do nothing and return
+        return;
+      }
 
-			AuthScope serverAuthScope = getAuthScope(url.getHost(), port,
-					defaultRealm, defaultScheme);
+      if (LOG.isTraceEnabled())
+        LOG.trace("Pre-configured credentials with scope -  host: "
+            + url.getHost() + "; port: " + port + "; not found for url: " + url);
 
-			NTCredentials serverCredentials = new NTCredentials(
-					defaultUsername, defaultPassword, agentHost, defaultRealm);
+      AuthScope serverAuthScope = getAuthScope(url.getHost(), port,
+          defaultRealm, defaultScheme);
 
-			client.getState()
-					.setCredentials(serverAuthScope, serverCredentials);
-		}
-	}
+      NTCredentials serverCredentials = new NTCredentials(defaultUsername,
+          defaultPassword, agentHost, defaultRealm);
 
-	/**
-	 * Returns an authentication scope for the specified <code>host</code>,
-	 * <code>port</code>, <code>realm</code> and <code>scheme</code>.
-	 * 
-	 * @param host
-	 *            Host name or address.
-	 * @param port
-	 *            Port number.
-	 * @param realm
-	 *            Authentication realm.
-	 * @param scheme
-	 *            Authentication scheme.
-	 */
-	private static AuthScope getAuthScope(String host, int port, String realm,
-			String scheme) {
+      client.getState().setCredentials(serverAuthScope, serverCredentials);
+    }
+  }
 
-		if (host.length() == 0)
-			host = null;
+  /**
+   * Returns an authentication scope for the specified <code>host</code>,
+   * <code>port</code>, <code>realm</code> and <code>scheme</code>.
+   * 
+   * @param host
+   *          Host name or address.
+   * @param port
+   *          Port number.
+   * @param realm
+   *          Authentication realm.
+   * @param scheme
+   *          Authentication scheme.
+   */
+  private static AuthScope getAuthScope(String host, int port, String realm,
+      String scheme) {
 
-		if (port < 0)
-			port = -1;
+    if (host.length() == 0)
+      host = null;
 
-		if (realm.length() == 0)
-			realm = null;
+    if (port < 0)
+      port = -1;
 
-		if (scheme.length() == 0)
-			scheme = null;
+    if (realm.length() == 0)
+      realm = null;
 
-		return new AuthScope(host, port, realm, scheme);
-	}
+    if (scheme.length() == 0)
+      scheme = null;
 
-	/**
-	 * Returns an authentication scope for the specified <code>host</code>,
-	 * <code>port</code> and <code>realm</code>.
-	 * 
-	 * @param host
-	 *            Host name or address.
-	 * @param port
-	 *            Port number.
-	 * @param realm
-	 *            Authentication realm.
-	 */
-	private static AuthScope getAuthScope(String host, int port, String realm) {
+    return new AuthScope(host, port, realm, scheme);
+  }
 
-		return getAuthScope(host, port, realm, "");
-	}
+  /**
+   * Returns an authentication scope for the specified <code>host</code>,
+   * <code>port</code> and <code>realm</code>.
+   * 
+   * @param host
+   *          Host name or address.
+   * @param port
+   *          Port number.
+   * @param realm
+   *          Authentication realm.
+   */
+  private static AuthScope getAuthScope(String host, int port, String realm) {
 
+    return getAuthScope(host, port, realm, "");
+  }
+
 }
Index: src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthentication.java
===================================================================
--- src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthentication.java	(revision 1188268)
+++ src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthentication.java	(working copy)
@@ -15,32 +15,31 @@
  * limitations under the License.
  */
 package org.apache.nutch.protocol.httpclient;
- 
+
 import java.util.List;
 
 /**
- *  The base level of services required for Http Authentication
- *
+ * The base level of services required for Http Authentication
+ * 
  * @see HttpAuthenticationFactory
  * 
- * @author    Matt Tencati
+ * @author Matt Tencati
  */
 public interface HttpAuthentication {
 
-    /**
-     *  Gets the credentials generated by the HttpAuthentication
-     *  object.  May return null.
-     *
-     * @return    The credentials value
-     */
-    public List getCredentials();
+  /**
+   * Gets the credentials generated by the HttpAuthentication object. May return
+   * null.
+   * 
+   * @return The credentials value
+   */
+  public List getCredentials();
 
-    /**
-     *  Gets the realm used by the HttpAuthentication object during creation.
-     *
-     *  @return    The realm value
-     */
-    public String getRealm();
+  /**
+   * Gets the realm used by the HttpAuthentication object during creation.
+   * 
+   * @return The realm value
+   */
+  public String getRealm();
 
 }
-
Index: src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummySSLProtocolSocketFactory.java
===================================================================
--- src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummySSLProtocolSocketFactory.java	(revision 1188268)
+++ src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummySSLProtocolSocketFactory.java	(working copy)
@@ -1,19 +1,19 @@
 /*
-* Licensed to the Apache Software Foundation (ASF) under one or more
-* contributor license agreements.  See the NOTICE file distributed with
-* this work for additional information regarding copyright ownership.
-* The ASF licenses this file to You under the Apache License, Version 2.0
-* (the "License"); you may not use this file except in compliance with
-* the License.  You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 /*
  * Based on EasySSLProtocolSocketFactory from commons-httpclient:
  * 
@@ -41,10 +41,12 @@
 import javax.net.ssl.SSLContext;
 import javax.net.ssl.TrustManager;
 
-public class DummySSLProtocolSocketFactory implements SecureProtocolSocketFactory {
+public class DummySSLProtocolSocketFactory implements
+    SecureProtocolSocketFactory {
 
   /** Logger object for this class. */
-  private static final Logger LOG = LoggerFactory.getLogger(DummySSLProtocolSocketFactory.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(DummySSLProtocolSocketFactory.class);
 
   private SSLContext sslcontext = null;
 
@@ -58,10 +60,13 @@
   private static SSLContext createEasySSLContext() {
     try {
       SSLContext context = SSLContext.getInstance("SSL");
-      context.init(null, new TrustManager[] { new DummyX509TrustManager(null) }, null);
+      context.init(null,
+          new TrustManager[] { new DummyX509TrustManager(null) }, null);
       return context;
     } catch (Exception e) {
-      if (LOG.isErrorEnabled()) { LOG.error(e.getMessage(), e); }
+      if (LOG.isErrorEnabled()) {
+        LOG.error(e.getMessage(), e);
+      }
       throw new HttpClientError(e.toString());
     }
   }
@@ -76,10 +81,11 @@
   /**
    * @see org.apache.commons.httpclient.protocol.SecureProtocolSocketFactory#createSocket(String,int,InetAddress,int)
    */
-  public Socket createSocket(String host, int port, InetAddress clientHost, int clientPort) throws IOException,
-          UnknownHostException {
+  public Socket createSocket(String host, int port, InetAddress clientHost,
+      int clientPort) throws IOException, UnknownHostException {
 
-    return getSSLContext().getSocketFactory().createSocket(host, port, clientHost, clientPort);
+    return getSSLContext().getSocketFactory().createSocket(host, port,
+        clientHost, clientPort);
   }
 
   /**
@@ -93,20 +99,28 @@
    * throws an {@link ConnectTimeoutException}
    * </p>
    * 
-   * @param host the host name/IP
-   * @param port the port on the host
-   * @param localAddress the local host name/IP to bind the socket to
-   * @param localPort the port on the local machine
-   * @param params {@link HttpConnectionParams Http connection parameters}
+   * @param host
+   *          the host name/IP
+   * @param port
+   *          the port on the host
+   * @param localAddress
+   *          the local host name/IP to bind the socket to
+   * @param localPort
+   *          the port on the local machine
+   * @param params
+   *          {@link HttpConnectionParams Http connection parameters}
    * 
    * @return Socket a new socket
    * 
-   * @throws IOException if an I/O error occurs while creating the socket
-   * @throws UnknownHostException if the IP address of the host cannot be
-   *         determined
+   * @throws IOException
+   *           if an I/O error occurs while creating the socket
+   * @throws UnknownHostException
+   *           if the IP address of the host cannot be determined
    */
-  public Socket createSocket(final String host, final int port, final InetAddress localAddress, final int localPort,
-          final HttpConnectionParams params) throws IOException, UnknownHostException, ConnectTimeoutException {
+  public Socket createSocket(final String host, final int port,
+      final InetAddress localAddress, final int localPort,
+      final HttpConnectionParams params) throws IOException,
+      UnknownHostException, ConnectTimeoutException {
     if (params == null) {
       throw new IllegalArgumentException("Parameters may not be null");
     }
@@ -115,27 +129,31 @@
       return createSocket(host, port, localAddress, localPort);
     } else {
       // To be eventually deprecated when migrated to Java 1.4 or above
-      return ControllerThreadSocketFactory.createSocket(this, host, port, localAddress, localPort, timeout);
+      return ControllerThreadSocketFactory.createSocket(this, host, port,
+          localAddress, localPort, timeout);
     }
   }
 
   /**
    * @see org.apache.commons.httpclient.protocol.SecureProtocolSocketFactory#createSocket(String,int)
    */
-  public Socket createSocket(String host, int port) throws IOException, UnknownHostException {
+  public Socket createSocket(String host, int port) throws IOException,
+      UnknownHostException {
     return getSSLContext().getSocketFactory().createSocket(host, port);
   }
 
   /**
    * @see org.apache.commons.httpclient.protocol.SecureProtocolSocketFactory#createSocket(Socket,String,int,boolean)
    */
-  public Socket createSocket(Socket socket, String host, int port, boolean autoClose) throws IOException,
-          UnknownHostException {
-    return getSSLContext().getSocketFactory().createSocket(socket, host, port, autoClose);
+  public Socket createSocket(Socket socket, String host, int port,
+      boolean autoClose) throws IOException, UnknownHostException {
+    return getSSLContext().getSocketFactory().createSocket(socket, host, port,
+        autoClose);
   }
 
   public boolean equals(Object obj) {
-    return ((obj != null) && obj.getClass().equals(DummySSLProtocolSocketFactory.class));
+    return ((obj != null) && obj.getClass().equals(
+        DummySSLProtocolSocketFactory.class));
   }
 
   public int hashCode() {
Index: src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java
===================================================================
--- src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java	(revision 1188268)
+++ src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java	(working copy)
@@ -35,156 +35,165 @@
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configurable;
 
-
 /**
- * Implementation of RFC 2617 Basic Authentication.  Usernames and passwords are stored 
- * in standard Nutch configuration files using the following properties:
- *      http.auth.basic.<realm>.user
- *      http.auth.basic.<realm>.pass
- *
- * @author    Matt Tencati
+ * Implementation of RFC 2617 Basic Authentication. Usernames and passwords are
+ * stored in standard Nutch configuration files using the following properties:
+ * http.auth.basic.<realm>.user http.auth.basic.<realm>.pass
+ * 
+ * @author Matt Tencati
  */
-public class HttpBasicAuthentication implements HttpAuthentication, Configurable {
+public class HttpBasicAuthentication implements HttpAuthentication,
+    Configurable {
 
-    public static final Logger LOG = LoggerFactory.getLogger(HttpBasicAuthentication.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(HttpBasicAuthentication.class);
 
-    private static Pattern basic = Pattern.compile("[bB][aA][sS][iI][cC] [rR][eE][aA][lL][mM]=\"(\\w*)\"");
-	
-    private static Map authMap = new TreeMap();
-   
-    private Configuration conf = null; 
-    private String challenge = null;
-    private ArrayList credentials = null;
-    private String realm = null;
+  private static Pattern basic = Pattern
+      .compile("[bB][aA][sS][iI][cC] [rR][eE][aA][lL][mM]=\"(\\w*)\"");
 
+  private static Map authMap = new TreeMap();
 
-    /**
-     *  Construct an HttpBasicAuthentication for the given challenge
-     *  parameters. The challenge parameters are returned by the web
-     *  server using a WWW-Authenticate header. This will typically be
-     *  represented by single line of the form <code>WWW-Authenticate: Basic realm="myrealm"</code>
-     *
-     * @param  challenge  WWW-Authenticate header from web server
-     */
-    protected HttpBasicAuthentication(String challenge, Configuration conf) throws HttpAuthenticationException {
-        
-        setConf(conf);
-        this.challenge = challenge;
-        credentials = new ArrayList();
-        
-        String username = this.conf.get("http.auth.basic." + challenge + ".user");
-        String password = this.conf.get("http.auth.basic." + challenge + ".password");
-        
-        if (LOG.isTraceEnabled()) {
-          LOG.trace("BasicAuthentication challenge is " + challenge);
-          LOG.trace("BasicAuthentication username=" + username);
-          LOG.trace("BasicAuthentication password=" + password);
-        }
- 
-        if (username == null) {
-        	throw new HttpAuthenticationException("Username for " + challenge + " is null");
-        }
+  private Configuration conf = null;
+  private String challenge = null;
+  private ArrayList credentials = null;
+  private String realm = null;
 
-        if (password == null) {
-        	throw new HttpAuthenticationException("Password for " + challenge + " is null");
-        }
-        
-        byte[] credBytes = (username + ":" + password).getBytes();
-        credentials.add("Authorization: Basic " + new String(Base64.encodeBase64(credBytes)));
-        if (LOG.isTraceEnabled()) {
-          LOG.trace("Basic credentials: " + credentials);
-        }
-    }
+  /**
+   * Construct an HttpBasicAuthentication for the given challenge parameters.
+   * The challenge parameters are returned by the web server using a
+   * WWW-Authenticate header. This will typically be represented by single line
+   * of the form <code>WWW-Authenticate: Basic realm="myrealm"</code>
+   * 
+   * @param challenge
+   *          WWW-Authenticate header from web server
+   */
+  protected HttpBasicAuthentication(String challenge, Configuration conf)
+      throws HttpAuthenticationException {
 
+    setConf(conf);
+    this.challenge = challenge;
+    credentials = new ArrayList();
 
-    /* ---------------------------------- *
-     * <implementation:Configurable> *
-     * ---------------------------------- */
+    String username = this.conf.get("http.auth.basic." + challenge + ".user");
+    String password = this.conf.get("http.auth.basic." + challenge
+        + ".password");
 
-    public void setConf(Configuration conf) {
-      this.conf = conf;
-      //if (conf.getBoolean("http.auth.verbose", false)) {
-      //  LOG.setLevel(Level.FINE);
-      //} else {
-      //  LOG.setLevel(Level.WARNING);
-      //}
+    if (LOG.isTraceEnabled()) {
+      LOG.trace("BasicAuthentication challenge is " + challenge);
+      LOG.trace("BasicAuthentication username=" + username);
+      LOG.trace("BasicAuthentication password=" + password);
     }
 
-    public Configuration getConf() {
-      return this.conf;
+    if (username == null) {
+      throw new HttpAuthenticationException("Username for " + challenge
+          + " is null");
     }
 
-    /* ---------------------------------- *
-     * <implementation:Configurable> *
-     * ---------------------------------- */
+    if (password == null) {
+      throw new HttpAuthenticationException("Password for " + challenge
+          + " is null");
+    }
 
-
-    /**
-     *  Gets the Basic credentials generated by this
-     *  HttpBasicAuthentication object
-     *
-     * @return    Credentials in the form of <code>Authorization: Basic &lt;Base64 encoded userid:password&gt;
-     *
-     */
-    public List getCredentials() {
-        return credentials;
+    byte[] credBytes = (username + ":" + password).getBytes();
+    credentials.add("Authorization: Basic "
+        + new String(Base64.encodeBase64(credBytes)));
+    if (LOG.isTraceEnabled()) {
+      LOG.trace("Basic credentials: " + credentials);
     }
+  }
 
+  /*
+   * ---------------------------------- * <implementation:Configurable> *
+   * ----------------------------------
+   */
 
-   /**
-    * Gets the realm attribute of the HttpBasicAuthentication object.
-    * This should have been supplied to the {@link #getAuthentication(String, Configuration)}
-    * static method
-    *
-    * @return    The realm
-    */
-    public String getRealm() {
-        return realm;
-    }
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+    // if (conf.getBoolean("http.auth.verbose", false)) {
+    // LOG.setLevel(Level.FINE);
+    // } else {
+    // LOG.setLevel(Level.WARNING);
+    // }
+  }
 
-    /**
-     * This method is responsible for providing Basic authentication information.  The
-     * method caches authentication information for each realm so that the required
-     * authentication information does not need to be regenerated for every request.
-     *  
-     * @param challenge The challenge string provided by the webserver.  This is the
-     * text which follows the WWW-Authenticate header, including the Basic tag.
-     * @return An HttpBasicAuthentication object or null 
-     * if unable to generate appropriate credentials.
-     */
-    public static HttpBasicAuthentication getAuthentication(String challenge, Configuration conf) {
-        if (challenge == null) return null;
-        Matcher basicMatcher = basic.matcher(challenge);
-        if (basicMatcher.matches()) {
-        	String realm = basicMatcher.group(1);
-	        Object auth = authMap.get(realm);
-	        if (auth == null) {
-	            HttpBasicAuthentication newAuth = null;
-	            try {
-	            	newAuth = new HttpBasicAuthentication(realm, conf);
-	            } catch (HttpAuthenticationException hae) { 
-                        if (LOG.isTraceEnabled()) {
-	            	  LOG.trace("HttpBasicAuthentication failed for " + challenge);
-                        }
-	            }
-	            authMap.put(realm, newAuth);
-	            return newAuth;
-	        } else {
-	            return (HttpBasicAuthentication) auth;
-	        }
+  public Configuration getConf() {
+    return this.conf;
+  }
+
+  /*
+   * ---------------------------------- * <implementation:Configurable> *
+   * ----------------------------------
+   */
+
+  /**
+   * Gets the Basic credentials generated by this HttpBasicAuthentication object
+   * 
+   * @return Credentials in the form of
+   *         <code>Authorization: Basic &lt;Base64 encoded userid:password&gt;
+   * 
+   */
+  public List getCredentials() {
+    return credentials;
+  }
+
+  /**
+   * Gets the realm attribute of the HttpBasicAuthentication object. This should
+   * have been supplied to the {@link #getAuthentication(String, Configuration)}
+   * static method
+   * 
+   * @return The realm
+   */
+  public String getRealm() {
+    return realm;
+  }
+
+  /**
+   * This method is responsible for providing Basic authentication information.
+   * The method caches authentication information for each realm so that the
+   * required authentication information does not need to be regenerated for
+   * every request.
+   * 
+   * @param challenge
+   *          The challenge string provided by the webserver. This is the text
+   *          which follows the WWW-Authenticate header, including the Basic
+   *          tag.
+   * @return An HttpBasicAuthentication object or null if unable to generate
+   *         appropriate credentials.
+   */
+  public static HttpBasicAuthentication getAuthentication(String challenge,
+      Configuration conf) {
+    if (challenge == null)
+      return null;
+    Matcher basicMatcher = basic.matcher(challenge);
+    if (basicMatcher.matches()) {
+      String realm = basicMatcher.group(1);
+      Object auth = authMap.get(realm);
+      if (auth == null) {
+        HttpBasicAuthentication newAuth = null;
+        try {
+          newAuth = new HttpBasicAuthentication(realm, conf);
+        } catch (HttpAuthenticationException hae) {
+          if (LOG.isTraceEnabled()) {
+            LOG.trace("HttpBasicAuthentication failed for " + challenge);
+          }
         }
-        return null;
+        authMap.put(realm, newAuth);
+        return newAuth;
+      } else {
+        return (HttpBasicAuthentication) auth;
+      }
     }
-    
-	/**
-	 * Provides a pattern which can be used by an outside resource to determine if 
-	 * this class can provide credentials based on simple header information.  It does
-	 * not calculate any information regarding realms or challenges.
-	 * 
-	 * @return Returns a Pattern which will match a Basic WWW-Authenticate header.
-	 */
-	public static final Pattern getBasicPattern() {
-		return basic;
-	}
+    return null;
+  }
+
+  /**
+   * Provides a pattern which can be used by an outside resource to determine if
+   * this class can provide credentials based on simple header information. It
+   * does not calculate any information regarding realms or challenges.
+   * 
+   * @return Returns a Pattern which will match a Basic WWW-Authenticate header.
+   */
+  public static final Pattern getBasicPattern() {
+    return basic;
+  }
 }
-
Index: src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java
===================================================================
--- src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java	(revision 1188268)
+++ src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java	(working copy)
@@ -35,12 +35,10 @@
 import org.apache.nutch.metadata.Metadata;
 import org.apache.nutch.util.LogUtil;
 
-
 /**
- * Provides the Http protocol implementation
- * with the ability to authenticate when prompted.  The goal is to provide 
- * multiple authentication types but for now just the {@link HttpBasicAuthentication} authentication 
- * type is provided.
+ * Provides the Http protocol implementation with the ability to authenticate
+ * when prompted. The goal is to provide multiple authentication types but for
+ * now just the {@link HttpBasicAuthentication} authentication type is provided.
  * 
  * @see HttpBasicAuthentication
  * @see Http
@@ -50,93 +48,95 @@
  */
 public class HttpAuthenticationFactory implements Configurable {
 
-    /** 
-     * The HTTP Authentication (WWW-Authenticate) header which is returned 
-     * by a webserver requiring authentication.
-     */
-    public static final String WWW_AUTHENTICATE = "WWW-Authenticate";
-	
-    public static final Logger LOG = LoggerFactory.getLogger(HttpAuthenticationFactory.class);
+  /**
+   * The HTTP Authentication (WWW-Authenticate) header which is returned by a
+   * webserver requiring authentication.
+   */
+  public static final String WWW_AUTHENTICATE = "WWW-Authenticate";
 
-    private static Map auths = new TreeMap(); 
+  public static final Logger LOG = LoggerFactory
+      .getLogger(HttpAuthenticationFactory.class);
 
-    private Configuration conf = null;
-    
-    
-    public HttpAuthenticationFactory(Configuration conf) {
-      setConf(conf);
-    }
+  private static Map auths = new TreeMap();
 
-   
-    /* ---------------------------------- *
-     * <implementation:Configurable> *
-     * ---------------------------------- */
+  private Configuration conf = null;
 
-    public void setConf(Configuration conf) {
-      this.conf = conf;
-      //if (conf.getBoolean("http.auth.verbose", false)) {
-      //  LOG.setLevel(Level.FINE);
-      //} else {
-      //  LOG.setLevel(Level.WARNING);
-      //}
-    }
+  public HttpAuthenticationFactory(Configuration conf) {
+    setConf(conf);
+  }
 
-    public Configuration getConf() {
-      return conf;
-    }
- 
-    /* ---------------------------------- *
-     * <implementation:Configurable> *
-     * ---------------------------------- */
+  /*
+   * ---------------------------------- * <implementation:Configurable> *
+   * ----------------------------------
+   */
 
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+    // if (conf.getBoolean("http.auth.verbose", false)) {
+    // LOG.setLevel(Level.FINE);
+    // } else {
+    // LOG.setLevel(Level.WARNING);
+    // }
+  }
 
-    public HttpAuthentication findAuthentication(Metadata header) {
+  public Configuration getConf() {
+    return conf;
+  }
 
-        if (header == null) return null;
-        
-    	try {
-			Collection challenge = null;
-			if (header instanceof Metadata) {
-				Object o = header.get(WWW_AUTHENTICATE);
-				if (o instanceof Collection) {
-					challenge = (Collection) o;
-				} else {
-					challenge = new ArrayList();
-					challenge.add(o.toString());
-				}
-			} else {
-				String challengeString = header.get(WWW_AUTHENTICATE); 
-				if (challengeString != null) {
-					challenge = new ArrayList();
-					challenge.add(challengeString);
-				}
-			}
-			if (challenge == null) {
-                                if (LOG.isTraceEnabled()) {
-				  LOG.trace("Authentication challenge is null");
-                                }
-				return null;
-			}
-			
-			Iterator i = challenge.iterator();
-			HttpAuthentication auth = null;
-			while (i.hasNext() && auth == null) {
-				String challengeString = (String)i.next();
-				if (challengeString.equals("NTLM")) {
-				   challengeString="Basic realm=techweb";
-		                  }
-		               
-                                if (LOG.isTraceEnabled()) {  
-		                  LOG.trace("Checking challengeString=" + challengeString);
-                                }
-				auth = HttpBasicAuthentication.getAuthentication(challengeString, conf);
-				if (auth != null) return auth;
-				
-				//TODO Add additional Authentication lookups here
-			}
-		} catch (Exception e) {
-			e.printStackTrace(LogUtil.getErrorStream(LOG));
-		}
+  /*
+   * ---------------------------------- * <implementation:Configurable> *
+   * ----------------------------------
+   */
+
+  public HttpAuthentication findAuthentication(Metadata header) {
+
+    if (header == null)
+      return null;
+
+    try {
+      Collection challenge = null;
+      if (header instanceof Metadata) {
+        Object o = header.get(WWW_AUTHENTICATE);
+        if (o instanceof Collection) {
+          challenge = (Collection) o;
+        } else {
+          challenge = new ArrayList();
+          challenge.add(o.toString());
+        }
+      } else {
+        String challengeString = header.get(WWW_AUTHENTICATE);
+        if (challengeString != null) {
+          challenge = new ArrayList();
+          challenge.add(challengeString);
+        }
+      }
+      if (challenge == null) {
+        if (LOG.isTraceEnabled()) {
+          LOG.trace("Authentication challenge is null");
+        }
         return null;
+      }
+
+      Iterator i = challenge.iterator();
+      HttpAuthentication auth = null;
+      while (i.hasNext() && auth == null) {
+        String challengeString = (String) i.next();
+        if (challengeString.equals("NTLM")) {
+          challengeString = "Basic realm=techweb";
+        }
+
+        if (LOG.isTraceEnabled()) {
+          LOG.trace("Checking challengeString=" + challengeString);
+        }
+        auth = HttpBasicAuthentication.getAuthentication(challengeString, conf);
+        if (auth != null)
+          return auth;
+
+        // TODO Add additional Authentication lookups here
+      }
+    } catch (Exception e) {
+      e.printStackTrace(LogUtil.getErrorStream(LOG));
     }
+    return null;
+  }
 }
Index: src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummyX509TrustManager.java
===================================================================
--- src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummyX509TrustManager.java	(revision 1188268)
+++ src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummyX509TrustManager.java	(working copy)
@@ -1,19 +1,19 @@
 /*
-* Licensed to the Apache Software Foundation (ASF) under one or more
-* contributor license agreements.  See the NOTICE file distributed with
-* this work for additional information regarding copyright ownership.
-* The ASF licenses this file to You under the Apache License, Version 2.0
-* (the "License"); you may not use this file except in compliance with
-* the License.  You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 /*
  * Based on EasyX509TrustManager from commons-httpclient.
  */
@@ -29,59 +29,64 @@
 import javax.net.ssl.TrustManagerFactory;
 import javax.net.ssl.TrustManager;
 import javax.net.ssl.X509TrustManager;
-import org.slf4j.Logger; 
+import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-public class DummyX509TrustManager implements X509TrustManager
-{
-    private X509TrustManager standardTrustManager = null;
+public class DummyX509TrustManager implements X509TrustManager {
+  private X509TrustManager standardTrustManager = null;
 
-    /** Logger object for this class. */
-    private static final Logger LOG = LoggerFactory.getLogger(DummyX509TrustManager.class);
+  /** Logger object for this class. */
+  private static final Logger LOG = LoggerFactory
+      .getLogger(DummyX509TrustManager.class);
 
-    /**
-     * Constructor for DummyX509TrustManager.
-     */
-    public DummyX509TrustManager(KeyStore keystore) throws NoSuchAlgorithmException, KeyStoreException {
-        super();
-        String algo = TrustManagerFactory.getDefaultAlgorithm();
-        TrustManagerFactory factory = TrustManagerFactory.getInstance(algo);
-        factory.init(keystore);
-        TrustManager[] trustmanagers = factory.getTrustManagers();
-        if (trustmanagers.length == 0) {
-            throw new NoSuchAlgorithmException(algo + " trust manager not supported");
-        }
-        this.standardTrustManager = (X509TrustManager)trustmanagers[0];
+  /**
+   * Constructor for DummyX509TrustManager.
+   */
+  public DummyX509TrustManager(KeyStore keystore)
+      throws NoSuchAlgorithmException, KeyStoreException {
+    super();
+    String algo = TrustManagerFactory.getDefaultAlgorithm();
+    TrustManagerFactory factory = TrustManagerFactory.getInstance(algo);
+    factory.init(keystore);
+    TrustManager[] trustmanagers = factory.getTrustManagers();
+    if (trustmanagers.length == 0) {
+      throw new NoSuchAlgorithmException(algo + " trust manager not supported");
     }
+    this.standardTrustManager = (X509TrustManager) trustmanagers[0];
+  }
 
-    /**
-     * @see javax.net.ssl.X509TrustManager#checkClientTrusted(X509Certificate[], String)
-     */
-    public boolean isClientTrusted(X509Certificate[] certificates) {
-        return true;
-    }
+  /**
+   * @see javax.net.ssl.X509TrustManager#checkClientTrusted(X509Certificate[],
+   *      String)
+   */
+  public boolean isClientTrusted(X509Certificate[] certificates) {
+    return true;
+  }
 
-    /**
-     * @see javax.net.ssl.X509TrustManager#checkServerTrusted(X509Certificate[], String)
-     */
-    public boolean isServerTrusted(X509Certificate[] certificates) {
-      return true;
-    }
+  /**
+   * @see javax.net.ssl.X509TrustManager#checkServerTrusted(X509Certificate[],
+   *      String)
+   */
+  public boolean isServerTrusted(X509Certificate[] certificates) {
+    return true;
+  }
 
-    /**
-     * @see javax.net.ssl.X509TrustManager#getAcceptedIssuers()
-     */
-    public X509Certificate[] getAcceptedIssuers() {
-        return this.standardTrustManager.getAcceptedIssuers();
-    }
+  /**
+   * @see javax.net.ssl.X509TrustManager#getAcceptedIssuers()
+   */
+  public X509Certificate[] getAcceptedIssuers() {
+    return this.standardTrustManager.getAcceptedIssuers();
+  }
 
-    public void checkClientTrusted(X509Certificate[] arg0, String arg1) throws CertificateException {
-      // do nothing
-      
-    }
+  public void checkClientTrusted(X509Certificate[] arg0, String arg1)
+      throws CertificateException {
+    // do nothing
 
-    public void checkServerTrusted(X509Certificate[] arg0, String arg1) throws CertificateException {
-      // do nothing
-      
-    }
+  }
+
+  public void checkServerTrusted(X509Certificate[] arg0, String arg1)
+      throws CertificateException {
+    // do nothing
+
+  }
 }
Index: src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationException.java
===================================================================
--- src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationException.java	(revision 1188268)
+++ src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationException.java	(working copy)
@@ -26,40 +26,46 @@
  */
 public class HttpAuthenticationException extends Exception {
 
-    /**
-     *  Constructs a new exception with null as its detail message.
-     */
-    public HttpAuthenticationException() {
-        super();
-    }
+  /**
+   * Constructs a new exception with null as its detail message.
+   */
+  public HttpAuthenticationException() {
+    super();
+  }
 
-    /**
-     * Constructs a new exception with the specified detail message.
-     * 
-     * @param message the detail message. The detail message is saved for later retrieval by the {@link Throwable#getMessage()} method.
-     */
-    public HttpAuthenticationException(String message) {
-        super(message);
-    }
+  /**
+   * Constructs a new exception with the specified detail message.
+   * 
+   * @param message
+   *          the detail message. The detail message is saved for later
+   *          retrieval by the {@link Throwable#getMessage()} method.
+   */
+  public HttpAuthenticationException(String message) {
+    super(message);
+  }
 
-    /**
-     * Constructs a new exception with the specified message and cause.
-     *
-     * @param message the detail message. The detail message is saved for later retrieval by the {@link Throwable#getMessage()} method.
-     * @param cause the cause (use {@link #getCause()} to retrieve the cause)
-     */
-    public HttpAuthenticationException(String message, Throwable cause) {
-        super(message, cause);
-    }
+  /**
+   * Constructs a new exception with the specified message and cause.
+   * 
+   * @param message
+   *          the detail message. The detail message is saved for later
+   *          retrieval by the {@link Throwable#getMessage()} method.
+   * @param cause
+   *          the cause (use {@link #getCause()} to retrieve the cause)
+   */
+  public HttpAuthenticationException(String message, Throwable cause) {
+    super(message, cause);
+  }
 
-    /**
-     * Constructs a new exception with the specified cause and detail message from
-     * given clause if it is not null.
-     * 
-     * @param cause the cause (use {@link #getCause()} to retrieve the cause)
-     */
-    public HttpAuthenticationException(Throwable cause) {
-        super(cause);
-    }
+  /**
+   * Constructs a new exception with the specified cause and detail message from
+   * given clause if it is not null.
+   * 
+   * @param cause
+   *          the cause (use {@link #getCause()} to retrieve the cause)
+   */
+  public HttpAuthenticationException(Throwable cause) {
+    super(cause);
+  }
 
 }
Index: src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java
===================================================================
--- src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java	(revision 1188268)
+++ src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java	(working copy)
@@ -40,7 +40,7 @@
 
 /**
  * An HTTP response.
- *
+ * 
  * @author Susam Pal
  */
 public class HttpResponse implements Response {
@@ -52,18 +52,22 @@
 
   /**
    * Fetches the given <code>url</code> and prepares HTTP response.
-   *
-   * @param http                An instance of the implementation class
-   *                            of this plugin
-   * @param url                 URL to be fetched
-   * @param page                WebPage
-   * @param followRedirects     Whether to follow redirects; follows
-   *                            redirect if and only if this is true
-   * @return                    HTTP response
-   * @throws IOException        When an error occurs
+   * 
+   * @param http
+   *          An instance of the implementation class of this plugin
+   * @param url
+   *          URL to be fetched
+   * @param page
+   *          WebPage
+   * @param followRedirects
+   *          Whether to follow redirects; follows redirect if and only if this
+   *          is true
+   * @return HTTP response
+   * @throws IOException
+   *           When an error occurs
    */
-  HttpResponse(Http http, URL url, WebPage page,
-      boolean followRedirects) throws IOException {
+  HttpResponse(Http http, URL url, WebPage page, boolean followRedirects)
+      throws IOException {
 
     // Prepare GET method for HTTP request
     this.url = url;
@@ -98,7 +102,7 @@
       for (int i = 0; i < heads.length; i++) {
         headers.set(heads[i].getName(), heads[i].getValue());
       }
-      
+
       // Limit download size
       int contentLength = Integer.MAX_VALUE;
       String contentLengthString = headers.get(Response.CONTENT_LENGTH);
@@ -106,12 +110,10 @@
         try {
           contentLength = Integer.parseInt(contentLengthString.trim());
         } catch (NumberFormatException ex) {
-          throw new HttpException("bad content length: " +
-              contentLengthString);
+          throw new HttpException("bad content length: " + contentLengthString);
         }
       }
-      if (http.getMaxContent() >= 0 &&
-          contentLength > http.getMaxContent()) {
+      if (http.getMaxContent() >= 0 && contentLength > http.getMaxContent()) {
         contentLength = http.getMaxContent();
       }
 
@@ -131,7 +133,8 @@
 
         content = out.toByteArray();
       } catch (Exception e) {
-        if (code == 200) throw new IOException(e.toString());
+        if (code == 200)
+          throw new IOException(e.toString());
         // for codes other than 200 OK, we are fine with empty content
       } finally {
         if (in != null) {
@@ -139,16 +142,15 @@
         }
         get.abort();
       }
-      
+
       StringBuilder fetchTrace = null;
       if (Http.LOG.isTraceEnabled()) {
         // Trace message
-        fetchTrace = new StringBuilder("url: " + url +
-            "; status code: " + code +
-            "; bytes received: " + content.length);
+        fetchTrace = new StringBuilder("url: " + url + "; status code: " + code
+            + "; bytes received: " + content.length);
         if (getHeader(Response.CONTENT_LENGTH) != null)
-          fetchTrace.append("; Content-Length: " +
-              getHeader(Response.CONTENT_LENGTH));
+          fetchTrace.append("; Content-Length: "
+              + getHeader(Response.CONTENT_LENGTH));
         if (getHeader(Response.LOCATION) != null)
           fetchTrace.append("; Location: " + getHeader(Response.LOCATION));
       }
@@ -158,8 +160,7 @@
         String contentEncoding = headers.get(Response.CONTENT_ENCODING);
         if (contentEncoding != null && Http.LOG.isTraceEnabled())
           fetchTrace.append("; Content-Encoding: " + contentEncoding);
-        if ("gzip".equals(contentEncoding) ||
-            "x-gzip".equals(contentEncoding)) {
+        if ("gzip".equals(contentEncoding) || "x-gzip".equals(contentEncoding)) {
           content = http.processGzipEncoded(content, url);
           if (Http.LOG.isTraceEnabled())
             fetchTrace.append("; extracted to " + content.length + " bytes");
@@ -179,15 +180,15 @@
     }
   }
 
-  
-  /* ------------------------- *
-   * <implementation:Response> *
-   * ------------------------- */
-  
+  /*
+   * ------------------------- * <implementation:Response> *
+   * -------------------------
+   */
+
   public URL getUrl() {
     return url;
   }
-  
+
   public int getCode() {
     return code;
   }
@@ -195,7 +196,7 @@
   public String getHeader(String name) {
     return headers.get(name);
   }
-  
+
   public Metadata getHeaders() {
     return headers;
   }
@@ -204,8 +205,8 @@
     return content;
   }
 
-  /* -------------------------- *
-   * </implementation:Response> *
-   * -------------------------- */
+  /*
+   * -------------------------- * </implementation:Response> *
+   * --------------------------
+   */
 }
-
Index: src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/Http.java
===================================================================
--- src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/Http.java	(revision 1188268)
+++ src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/Http.java	(working copy)
@@ -49,11 +49,11 @@
   @Override
   public void setConf(Configuration conf) {
     super.setConf(conf);
-//    Level logLevel = Level.WARNING;
-//    if (conf.getBoolean("http.verbose", false)) {
-//      logLevel = Level.FINE;
-//    }
-//    LOG.setLevel(logLevel);
+    // Level logLevel = Level.WARNING;
+    // if (conf.getBoolean("http.verbose", false)) {
+    // logLevel = Level.FINE;
+    // }
+    // LOG.setLevel(logLevel);
   }
 
   public static void main(String[] args) throws Exception {
@@ -64,7 +64,7 @@
 
   @Override
   protected Response getResponse(URL url, WebPage page, boolean redirect)
-    throws ProtocolException, IOException {
+      throws ProtocolException, IOException {
     return new HttpResponse(this, url, page);
   }
 
Index: src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java
===================================================================
--- src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java	(revision 1188268)
+++ src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java	(working copy)
@@ -48,9 +48,8 @@
   private int code;
   private final Metadata headers = new SpellCheckedMetadata();
 
-
   public HttpResponse(HttpBase http, URL url, WebPage page)
-  throws ProtocolException, IOException {
+      throws ProtocolException, IOException {
 
     this.http = http;
     this.url = url;
@@ -72,23 +71,22 @@
     int port;
     String portString;
     if (url.getPort() == -1) {
-      port= 80;
-      portString= "";
+      port = 80;
+      portString = "";
     } else {
-      port= url.getPort();
-      portString= ":" + port;
+      port = url.getPort();
+      portString = ":" + port;
     }
     Socket socket = null;
 
     try {
-      socket = new Socket();                    // create the socket
+      socket = new Socket(); // create the socket
       socket.setSoTimeout(http.getTimeout());
 
-
       // connect
       String sockHost = http.useProxy() ? http.getProxyHost() : host;
       int sockPort = http.useProxy() ? http.getProxyPort() : port;
-      InetSocketAddress sockAddr= new InetSocketAddress(sockHost, sockPort);
+      InetSocketAddress sockAddr = new InetSocketAddress(sockHost, sockPort);
       socket.connect(sockAddr, http.getTimeout());
 
       // make request
@@ -96,9 +94,9 @@
 
       StringBuffer reqStr = new StringBuffer("GET ");
       if (http.useProxy()) {
-      	reqStr.append(url.getProtocol()+"://"+host+portString+path);
+        reqStr.append(url.getProtocol() + "://" + host + portString + path);
       } else {
-      	reqStr.append(path);
+        reqStr.append(path);
       }
 
       reqStr.append(" HTTP/1.0\r\n");
@@ -112,7 +110,9 @@
 
       String userAgent = http.getUserAgent();
       if ((userAgent == null) || (userAgent.length() == 0)) {
-        if (Http.LOG.isErrorEnabled()) { Http.LOG.error("User-agent is not set!"); }
+        if (Http.LOG.isErrorEnabled()) {
+          Http.LOG.error("User-agent is not set!");
+        }
       } else {
         reqStr.append("User-Agent: ");
         reqStr.append(userAgent);
@@ -120,31 +120,30 @@
       }
 
       if (page.isReadable(WebPage.Field.MODIFIED_TIME.getIndex())) {
-        reqStr.append("If-Modified-Since: " +
-                      HttpDateFormat.toString(page.getModifiedTime()));
+        reqStr.append("If-Modified-Since: "
+            + HttpDateFormat.toString(page.getModifiedTime()));
         reqStr.append("\r\n");
       }
       reqStr.append("\r\n");
 
-      byte[] reqBytes= reqStr.toString().getBytes();
+      byte[] reqBytes = reqStr.toString().getBytes();
 
       req.write(reqBytes);
       req.flush();
 
-      PushbackInputStream in =                  // process response
-        new PushbackInputStream(
-          new BufferedInputStream(socket.getInputStream(), Http.BUFFER_SIZE),
-          Http.BUFFER_SIZE) ;
+      PushbackInputStream in = // process response
+      new PushbackInputStream(new BufferedInputStream(socket.getInputStream(),
+          Http.BUFFER_SIZE), Http.BUFFER_SIZE);
 
       StringBuffer line = new StringBuffer();
 
-      boolean haveSeenNonContinueStatus= false;
+      boolean haveSeenNonContinueStatus = false;
       while (!haveSeenNonContinueStatus) {
         // parse status code line
         this.code = parseStatusLine(in, line);
         // parse headers
         parseHeaders(in, line);
-        haveSeenNonContinueStatus= code != 100; // 100 is "Continue"
+        haveSeenNonContinueStatus = code != 100; // 100 is "Continue"
       }
 
       readPlainContent(in);
@@ -173,11 +172,11 @@
 
   }
 
+  /*
+   * ------------------------- * <implementation:Response> *
+   * -------------------------
+   */
 
-  /* ------------------------- *
-   * <implementation:Response> *
-   * ------------------------- */
-
   public URL getUrl() {
     return url;
   }
@@ -198,15 +197,15 @@
     return content;
   }
 
-  /* ------------------------- *
-   * <implementation:Response> *
-   * ------------------------- */
+  /*
+   * ------------------------- * <implementation:Response> *
+   * -------------------------
+   */
 
+  private void readPlainContent(InputStream in) throws HttpException,
+      IOException {
 
-  private void readPlainContent(InputStream in)
-    throws HttpException, IOException {
-
-    int contentLength = Integer.MAX_VALUE;    // get content length
+    int contentLength = Integer.MAX_VALUE; // get content length
     String contentLengthString = headers.get(Response.CONTENT_LENGTH);
     if (contentLengthString != null) {
       contentLengthString = contentLengthString.trim();
@@ -214,17 +213,19 @@
         if (!contentLengthString.isEmpty())
           contentLength = Integer.parseInt(contentLengthString);
       } catch (NumberFormatException e) {
-        throw new HttpException("bad content length: "+contentLengthString);
+        throw new HttpException("bad content length: " + contentLengthString);
       }
     }
-    if (http.getMaxContent() >= 0
-      && contentLength > http.getMaxContent())   // limit download size
-      contentLength  = http.getMaxContent();
+    if (http.getMaxContent() >= 0 && contentLength > http.getMaxContent()) // limit
+                                                                           // download
+                                                                           // size
+      contentLength = http.getMaxContent();
 
     ByteArrayOutputStream out = new ByteArrayOutputStream(Http.BUFFER_SIZE);
     byte[] bytes = new byte[Http.BUFFER_SIZE];
-    int length = 0;                           // read content
-    for (int i = in.read(bytes); i != -1 && length + i <= contentLength; i = in.read(bytes)) {
+    int length = 0; // read content
+    for (int i = in.read(bytes); i != -1 && length + i <= contentLength; i = in
+        .read(bytes)) {
 
       out.write(bytes, 0, i);
       length += i;
@@ -233,36 +234,35 @@
   }
 
   private int parseStatusLine(PushbackInputStream in, StringBuffer line)
-    throws IOException, HttpException {
+      throws IOException, HttpException {
     readLine(in, line, false);
 
     int codeStart = line.indexOf(" ");
-    int codeEnd = line.indexOf(" ", codeStart+1);
+    int codeEnd = line.indexOf(" ", codeStart + 1);
 
     // handle lines with no plaintext result code, ie:
     // "HTTP/1.1 200" vs "HTTP/1.1 200 OK"
     if (codeEnd == -1)
-      codeEnd= line.length();
+      codeEnd = line.length();
 
     int code;
     try {
-      code= Integer.parseInt(line.substring(codeStart+1, codeEnd));
+      code = Integer.parseInt(line.substring(codeStart + 1, codeEnd));
     } catch (NumberFormatException e) {
-      throw new HttpException("bad status line '" + line
-                              + "': " + e.getMessage(), e);
+      throw new HttpException("bad status line '" + line + "': "
+          + e.getMessage(), e);
     }
 
     return code;
   }
 
+  private void processHeaderLine(StringBuffer line) throws IOException,
+      HttpException {
 
-  private void processHeaderLine(StringBuffer line)
-    throws IOException, HttpException {
-
-    int colonIndex = line.indexOf(":");       // key is up to colon
+    int colonIndex = line.indexOf(":"); // key is up to colon
     if (colonIndex == -1) {
       int i;
-      for (i= 0; i < line.length(); i++)
+      for (i = 0; i < line.length(); i++)
         if (!Character.isWhitespace(line.charAt(i)))
           break;
       if (i == line.length())
@@ -271,7 +271,7 @@
     }
     String key = line.substring(0, colonIndex);
 
-    int valueStart = colonIndex+1;            // skip whitespace
+    int valueStart = colonIndex + 1; // skip whitespace
     while (valueStart < line.length()) {
       int c = line.charAt(valueStart);
       if (c != ' ' && c != '\t')
@@ -282,28 +282,27 @@
     headers.set(key, value);
   }
 
-
   // Adds headers to our headers Metadata
   private void parseHeaders(PushbackInputStream in, StringBuffer line)
-    throws IOException, HttpException {
+      throws IOException, HttpException {
 
     while (readLine(in, line, true) != 0) {
 
       // handle HTTP responses with missing blank line after headers
       int pos;
-      if ( ((pos= line.indexOf("<!DOCTYPE")) != -1)
-           || ((pos= line.indexOf("<HTML")) != -1)
-           || ((pos= line.indexOf("<html")) != -1) ) {
+      if (((pos = line.indexOf("<!DOCTYPE")) != -1)
+          || ((pos = line.indexOf("<HTML")) != -1)
+          || ((pos = line.indexOf("<html")) != -1)) {
 
         in.unread(line.substring(pos).getBytes("UTF-8"));
         line.setLength(pos);
 
         try {
-            //TODO: (CM) We don't know the header names here
-            //since we're just handling them generically. It would
-            //be nice to provide some sort of mapping function here
-            //for the returned header names to the standard metadata
-            //names in the ParseData class
+          // TODO: (CM) We don't know the header names here
+          // since we're just handling them generically. It would
+          // be nice to provide some sort of mapping function here
+          // for the returned header names to the standard metadata
+          // names in the ParseData class
           processHeaderLine(line);
         } catch (Exception e) {
           // fixme:
@@ -317,29 +316,29 @@
   }
 
   private static int readLine(PushbackInputStream in, StringBuffer line,
-                      boolean allowContinuedLine)
-    throws IOException {
+      boolean allowContinuedLine) throws IOException {
     line.setLength(0);
     for (int c = in.read(); c != -1; c = in.read()) {
       switch (c) {
-        case '\r':
-          if (peek(in) == '\n') {
-            in.read();
-          }
-        case '\n':
-          if (line.length() > 0) {
-            // at EOL -- check for continued line if the current
-            // (possibly continued) line wasn't blank
-            if (allowContinuedLine)
-              switch (peek(in)) {
-                case ' ' : case '\t':                   // line is continued
-                  in.read();
-                  continue;
-              }
-          }
-          return line.length();      // else complete
-        default :
-          line.append((char)c);
+      case '\r':
+        if (peek(in) == '\n') {
+          in.read();
+        }
+      case '\n':
+        if (line.length() > 0) {
+          // at EOL -- check for continued line if the current
+          // (possibly continued) line wasn't blank
+          if (allowContinuedLine)
+            switch (peek(in)) {
+            case ' ':
+            case '\t': // line is continued
+              in.read();
+              continue;
+            }
+        }
+        return line.length(); // else complete
+      default:
+        line.append((char) c);
       }
     }
     throw new EOFException();