Index: src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/BasicURLNormalizer.java
===================================================================
--- src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/BasicURLNormalizer.java	(revision 1692121)
+++ src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/BasicURLNormalizer.java	(working copy)
@@ -23,6 +23,8 @@
 import java.net.MalformedURLException;
 import java.net.URISyntaxException;
 import java.net.URL;
+import java.nio.charset.Charset;
+import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
 import org.apache.hadoop.conf.Configured;
@@ -49,9 +51,19 @@
    */
   private final static Pattern hasNormalizablePathPattern = Pattern
       .compile("/[./]|[.]/");
+      
+  /**
+   * Nutch 1098 - finds URL encoded parts of the URL
+   */
+  private final static Pattern unescapeRulePattern = Pattern
+      .compile("%([0-9A-Fa-f]{2})");      
+  
+  // charset used for encoding URLs before escaping
+  private Charset utf8 = Charset.forName("UTF-8");
 
   public String normalize(String urlString, String scope)
       throws MalformedURLException {
+    
     if ("".equals(urlString)) // permit empty
       return urlString;
 
@@ -100,9 +112,17 @@
         changed = true;
         file = file2;
       }
-
     }
+    
+    // decode %nn characters
+    urlString = unescapeURL(urlString);
+   
+    // replace spaces in URL
+    urlString = urlString.replace(" ","%20");
 
+    // encode non ascii characters
+    urlString = escapeURL(urlString);
+
     if (changed)
       urlString = new URL(protocol, host, port, file).toString();
 
@@ -141,7 +161,85 @@
 
     return file;
   }
+  
+  /**
+   * Remove % encoding from URL in range 0x20-0x80 exclusive
+   * / and # are not decoded
+   */
+  private String unescapeURL(String url) {
+    StringBuilder sb = new StringBuilder();
+    
+    Matcher matcher = unescapeRulePattern.matcher(url);
+    
+    int end = -1;
+    int letter;
 
+    // Traverse over all encoded groups
+    while (matcher.find()) {
+      // Append everything up to this group
+      sb.append(url.substring(end + 1, matcher.start()));
+      
+      // Get the integer representation of this hexadecimal encoded character
+      letter = Integer.valueOf(matcher.group().substring(1), 16);
+            
+      // If this a control character or outside the ASCII range?
+      if (letter <= 32 || letter > 127 || letter == 0x23 || letter == 0x2f || letter == 0x26 || letter == 0x3A) {
+        // Append the encoded character as uppercase
+        sb.append(matcher.group().toUpperCase());
+      } else {
+        // Character is within URL acceptable range, append it as-is
+        sb.append(new Character((char)letter));
+      }
+      
+      end = matcher.start() + 2;
+    }
+    
+    letter = url.length();
+    
+    // Append the rest if there's anything
+    if (end <= letter - 1) {
+      sb.append(url.substring(end + 1, letter));
+    }
+
+    // Ok!
+    return sb.toString();
+  }
+
+  /**
+   * Convert URL from unicode to UTF-8 and escape high bit and control chars
+   * control characters at URL sides will not currently reach this
+   * function because String.trim() is called on URL before sending it here.
+   */
+  private String escapeURL(String url) {
+    StringBuilder sb = new StringBuilder(url.length());
+
+    // Traverse over all bytes in this URL
+    for (byte b: url.getBytes(utf8)) {      
+      // Is this a control character?
+      if (b < 32) {
+        // Start the 
+        sb.append('%');
+        
+        // Get this byte's hexadecimal representation 
+        String hex = Integer.toHexString(b & 0xFF).toUpperCase();
+        
+        // Do we need to prepend a zero?
+        if (hex.length() % 2 != 0 ) {
+          sb.append('0');
+          sb.append(hex);
+        } else {
+          // No, append this hexadecimal representation
+          sb.append(hex);
+        }
+      } else {
+        // No, just append this character as-is
+        sb.append((char)b);
+      }
+    }
+      
+    return sb.toString();
+  }
+
   public static void main(String args[]) throws IOException {
     BasicURLNormalizer normalizer = new BasicURLNormalizer();
     normalizer.setConf(NutchConfiguration.create());
Index: src/plugin/urlnormalizer-basic/src/test/org/apache/nutch/net/urlnormalizer/basic/TestBasicURLNormalizer.java
===================================================================
--- src/plugin/urlnormalizer-basic/src/test/org/apache/nutch/net/urlnormalizer/basic/TestBasicURLNormalizer.java	(revision 1692121)
+++ src/plugin/urlnormalizer-basic/src/test/org/apache/nutch/net/urlnormalizer/basic/TestBasicURLNormalizer.java	(working copy)
@@ -34,8 +34,58 @@
     conf = NutchConfiguration.create();
     normalizer.setConf(conf);
   }
+  
+  @Test
+  public void testNUTCH1098() throws Exception {
+    // check that % encoding is normalized
+    normalizeTest("http://foo.com/%66oo.html", "http://foo.com/foo.html");
 
+    // check that % encoding works correctly at end of URL
+    normalizeTest("http://foo.com/%66oo.htm%6c", "http://foo.com/foo.html");
+    normalizeTest("http://foo.com/%66oo.ht%6dl", "http://foo.com/foo.html");
+
+    // check that % decoder do not overlap strings
+    normalizeTest("http://foo.com/%66oo.ht%6d%6c", "http://foo.com/foo.html");
+    
+    // check that % decoder leaves high bit chars alone
+    normalizeTest("http://foo.com/%66oo.htm%C0", "http://foo.com/foo.htm%C0");
+
+    // check that % decoder leaves control chars alone
+    normalizeTest("http://foo.com/%66oo.htm%1A", "http://foo.com/foo.htm%1A");
+
+    // check that % decoder converts to upper case letters
+    normalizeTest("http://foo.com/%66oo.htm%c0", "http://foo.com/foo.htm%C0");
+
+    // check that % decoder leaves encoded spaces alone
+    normalizeTest("http://foo.com/you%20too.html", "http://foo.com/you%20too.html");
+
+    // check that spaces are encoded into %20
+    normalizeTest("http://foo.com/you too.html", "http://foo.com/you%20too.html");
+
+    // check that encoded # are not decoded
+    normalizeTest("http://foo.com/file.html%23cz", "http://foo.com/file.html%23cz");
+
+    // check that encoded / are not decoded
+    normalizeTest("http://foo.com/fast/dir%2fcz", "http://foo.com/fast/dir%2Fcz");
+
+    // check that control chars are encoded
+    normalizeTest("http://foo.com/\u001a!", "http://foo.com/%1A!");
+
+    // check that control chars are always encoded into 2 digits
+    normalizeTest("http://foo.com/\u0001!", "http://foo.com/%01!");
+
+    // check encoding of spanish chars
+    normalizeTest("http://mydomain.com/en Espa\u00F1ol.aspx", "http://mydomain.com/en%20Espa%C3%B1ol.aspx");
+  }
+  
   @Test
+  public void testNUTCH2064() throws Exception {
+    // Ampersand and colon are not to be unescaped
+    normalizeTest("http://x.com/s?q=a%26b&m=10", "http://x.com/s?q=a%26b&m=10");
+    normalizeTest("http://x.com/show?http%3A%2F%2Fx.com%2Fb", "http://x.com/show?http%3A%2F%2Fx.com%2Fb");
+  }
+
+  @Test
   public void testNormalizer() throws Exception {
     // check that leading and trailing spaces are removed
     normalizeTest(" http://foo.com/ ", "http://foo.com/");
