Index: conf/log4j.properties
===================================================================
--- conf/log4j.properties	(revision 1461861)
+++ conf/log4j.properties	(working copy)
@@ -38,6 +38,8 @@
 log4j.logger.org.apache.nutch.indexer.DeleteDuplicates=INFO,cmdstdout
 log4j.logger.org.apache.nutch.crawl.WebTableReader=INFO,cmdstdout
 log4j.logger.org.apache.nutch.host.HostDbReader=INFO,cmdstdout
+log4j.logger.org.apache.nutch.parse.ParserChecker=INFO,cmdstdout
+log4j.logger.org.apache.nutch.indexer.IndexingFiltersChecker=INFO,cmdstdout
 
 log4j.logger.org.apache.nutch=INFO
 log4j.logger.org.apache.hadoop=WARN
Index: src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java
===================================================================
--- src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java	(revision 1461861)
+++ src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java	(working copy)
@@ -37,6 +37,7 @@
 import org.apache.nutch.protocol.ProtocolStatusUtils;
 import org.apache.nutch.storage.WebPage;
 import org.apache.nutch.util.NutchConfiguration;
+import org.apache.nutch.util.URLUtil;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -67,7 +68,7 @@
       return -1;
     }
 
-    url = args[0];
+    url = URLUtil.toASCII(args[0]);
 
     if (LOG.isInfoEnabled()) {
       LOG.info("fetching: " + url);
Index: src/java/org/apache/nutch/parse/ParserChecker.java
===================================================================
--- src/java/org/apache/nutch/parse/ParserChecker.java	(revision 1461861)
+++ src/java/org/apache/nutch/parse/ParserChecker.java	(working copy)
@@ -28,6 +28,7 @@
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.util.ToolRunner;
+import org.apache.nutch.crawl.SignatureFactory;
 import org.apache.nutch.protocol.Content;
 import org.apache.nutch.protocol.Protocol;
 import org.apache.nutch.protocol.ProtocolFactory;
@@ -36,10 +37,32 @@
 import org.apache.nutch.storage.WebPage;
 import org.apache.nutch.util.Bytes;
 import org.apache.nutch.util.NutchConfiguration;
+import org.apache.nutch.util.StringUtil;
+import org.apache.nutch.util.URLUtil;
 
 /**
  * Parser checker, useful for testing parser.
- * 
+ * It also accurately reports possible fetching and 
+ * parsing failures and presents protocol status signals to aid 
+ * debugging. The tool enables us to retrieve the following data from 
+ * any url:
+ * <ol>
+ * <li><tt>contentType</tt>: The URL {@link org.apache.nutch.protocol.Content} type.</li>
+ * <li><tt>signature</tt>: Digest is used to identify pages (like unique ID) and is used to remove
+ * duplicates during the dedup procedure. 
+ * It is calculated using {@link org.apache.nutch.crawl.MD5Signature} or
+ * {@link org.apache.nutch.crawl.TextProfileSignature}.</li>
+ * <li><tt>Version</tt>: From {@link org.apache.nutch.parse.ParseData}.</li>
+ * <li><tt>Status</tt>: From {@link org.apache.nutch.parse.ParseData}.</li>
+ * <li><tt>Title</tt>: of the URL</li>
+ * <li><tt>Outlinks</tt>: associated with the URL</li>
+ * <li><tt>Content Metadata</tt>: such as <i>X-AspNet-Version</i>, <i>Date</i>,
+ * <i>Content-length</i>, <i>servedBy</i>, <i>Content-Type</i>, <i>Cache-Control</>, etc.</li>
+ * <li><tt>Parse Metadata</tt>: such as <i>CharEncodingForConversion</i>,
+ * <i>OriginalCharEncoding</i>, <i>language</i>, etc.</li>
+ * <li><tt>ParseText</tt>: The page parse text which varies in length depdnecing on 
+ * <code>content.length</code> configuration.</li>
+ * </ol>
  * @author John Xing
  */
 
@@ -60,7 +83,7 @@
     String usage = "Usage: ParserChecker [-dumpText] [-forceAs mimeType] url";
 
     if (args.length == 0) {
-      System.err.println(usage);
+      LOG.error(usage);
       return (-1);
     }
 
@@ -71,10 +94,10 @@
       } else if (args[i].equals("-dumpText")) {
         dumpText = true;
       } else if (i != args.length - 1) {
-        System.err.println(usage);
+        LOG.error(usage);
         System.exit(-1);
       } else {
-        url = args[i];
+        url = URLUtil.toASCII(args[i]);
       }
     }
 
@@ -110,15 +133,10 @@
     }
 
     if (contentType == null) {
-      System.err.println("");
+      LOG.error("Failed to determine content type!");
       return (-1);
     }
 
-    if (LOG.isInfoEnabled()) {
-      LOG.info("parsing: " + url);
-      LOG.info("contentType: " + contentType);
-    }
-
     page.setContentType(new Utf8(contentType));
 
     if (ParserJob.isTruncated(url, page)) {
@@ -128,13 +146,23 @@
     Parse parse = new ParseUtil(conf).parse(url, page);
 
     if (parse == null) {
-      System.err.println("Problem with parse - check log");
+      LOG.error("Problem with parse - check log");
       return (-1);
     }
+    
+    // Calculate the signature
+    byte[] signature = SignatureFactory.getSignature(getConf()).calculate(page);
+    
+    if (LOG.isInfoEnabled()) {
+      LOG.info("parsing: " + url);
+      LOG.info("contentType: " + contentType);
+      LOG.info("signature: " + StringUtil.toHexString(signature));
+    }
 
-    System.out.print("---------\nUrl\n---------------\n");
+
+    LOG.info("---------\nUrl\n---------------\n");
     System.out.print(url + "\n");
-    System.out.print("---------\nMetadata\n---------\n");
+    LOG.info("---------\nMetadata\n---------\n");
     Map<Utf8, ByteBuffer> metadata = page.getMetadata();
     StringBuffer sb = new StringBuffer();
     if (metadata != null) {
@@ -148,7 +176,7 @@
       System.out.print(sb.toString());
     }
     if (dumpText) {
-      System.out.print("---------\nParseText\n---------\n");
+      LOG.info("---------\nParseText\n---------\n");
       System.out.print(parse.getText());
     }
 
@@ -170,4 +198,5 @@
         args);
     System.exit(res);
   }
+
 }
Index: src/java/org/apache/nutch/util/URLUtil.java
===================================================================
--- src/java/org/apache/nutch/util/URLUtil.java	(revision 1461861)
+++ src/java/org/apache/nutch/util/URLUtil.java	(working copy)
@@ -18,7 +18,7 @@
 package org.apache.nutch.util;
 
 import java.net.MalformedURLException;
-import java.net.URL;
+import java.net.*;
 import java.util.regex.Pattern;
 
 import org.apache.nutch.util.domain.DomainSuffix;
@@ -333,6 +333,43 @@
     }
   }
   
+  public static String toASCII(String url) {
+    try {
+      URL u = new URL(url);
+      URI p = new URI(u.getProtocol(),
+        null,
+        IDN.toASCII(u.getHost()),
+        u.getPort(),
+        u.getPath(),
+        u.getQuery(),
+        u.getRef());
+
+      return p.toString();
+    }
+    catch (Exception e) {
+      return null;
+    }
+  }
+
+  public static String toUNICODE(String url) {
+    try {
+      URL u = new URL(url);
+      URI p = new URI(u.getProtocol(),
+        null,
+        IDN.toUnicode(u.getHost()),
+        u.getPort(),
+        u.getPath(),
+        u.getQuery(),
+        u.getRef());
+
+      return p.toString();
+    }
+    catch (Exception e) {
+      return null;
+    }
+  }
+
+
   /** For testing */
   public static void main(String[] args){
     
