Index: conf/nutch-default.xml
===================================================================
--- conf/nutch-default.xml	(revision 777361)
+++ conf/nutch-default.xml	(working copy)
@@ -826,7 +826,7 @@
 
 <property>
   <name>urlnormalizer.order</name>
-  <value>org.apache.nutch.net.urlnormalizer.basic.BasicURLNormalizer org.apache.nutch.net.urlnormalizer.regex.RegexURLNormalizer</value>
+  <value>org.apache.nutch.net.urlnormalizer.basic.BasicURLNormalizer org.apache.nutch.net.urlnormalizer.regex.RegexURLNormalizer org.apache.nutch.net.urlnormalizer.unalias.UnaliasURLNormalizer</value>
   <description>Order in which normalizers will run. If any of these isn't
   activated it will be silently skipped. If other normalizers not on the
   list are activated, they will run in random order after the ones
@@ -835,6 +835,13 @@
 </property>
 
 <property>
+  <name>urlnormalizer.unalias.file</name>
+  <value>unalias-urlnormalizer.txt</value>
+  <description>Name of file on CLASSPATH containing aliases
+  used by urlnormalizer-unlias (UnaliasURLNormalizer) plugin.</description>
+</property>
+
+<property>
   <name>urlnormalizer.regex.file</name>
   <value>regex-normalize.xml</value>
   <description>Name of the config file used by the RegexUrlNormalizer class.
Index: src/plugin/build.xml
===================================================================
--- src/plugin/build.xml	(revision 777361)
+++ src/plugin/build.xml	(working copy)
@@ -85,6 +85,7 @@
      <ant dir="urlnormalizer-basic" target="deploy"/>
      <ant dir="urlnormalizer-pass" target="deploy"/>
      <ant dir="urlnormalizer-regex" target="deploy"/>
+     <ant dir="urlnormalizer-unalias" target="deploy"/>
   </target>
 
   <!-- ====================================================== -->
@@ -120,6 +121,7 @@
      <ant dir="urlnormalizer-basic" target="test"/>
      <ant dir="urlnormalizer-pass" target="test"/>
      <ant dir="urlnormalizer-regex" target="test"/>
+     <ant dir="urlnormalizer-unalias" target="deploy"/>
     </parallel>
   </target>
 
@@ -189,5 +191,6 @@
     <ant dir="urlnormalizer-basic" target="clean"/>
     <ant dir="urlnormalizer-pass" target="clean"/>
     <ant dir="urlnormalizer-regex" target="clean"/>
+    <ant dir="urlnormalizer-unalias" target="deploy"/>
   </target>
 </project>
Index: src/plugin/urlnormalizer-unalias/src/test/org/apache/nutch/net/urlnormalizer/unalias/TestUnaliasURLNormalizer.java
===================================================================
--- src/plugin/urlnormalizer-unalias/src/test/org/apache/nutch/net/urlnormalizer/unalias/TestUnaliasURLNormalizer.java	(revision 0)
+++ src/plugin/urlnormalizer-unalias/src/test/org/apache/nutch/net/urlnormalizer/unalias/TestUnaliasURLNormalizer.java	(revision 0)
@@ -0,0 +1,63 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.net.urlnormalizer.unalias;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.net.URLNormalizers;
+import org.apache.nutch.util.NutchConfiguration;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import junit.framework.TestCase;
+
+/** Unit tests for UnaliasURLNormalizer. */
+public class TestUnaliasURLNormalizer extends TestCase {
+  protected static final Log LOG = LogFactory.getLog(TestUnaliasURLNormalizer.class);
+
+  private final static String SEPARATOR = System.getProperty("file.separator");
+  private final static String SAMPLES = System.getProperty("test.data", ".");
+  private final static String FILE_PREFIX = SAMPLES + SEPARATOR;
+  
+  private UnaliasURLNormalizer normalizer;
+  private Configuration conf;
+  
+  public TestUnaliasURLNormalizer(String name) {
+    super(name);
+    normalizer = new UnaliasURLNormalizer(FILE_PREFIX + "aliases.txt");
+    normalizer.setConf(NutchConfiguration.create());
+  }
+
+  public void testNormalizer() throws Exception {
+    normalizeTest("http://www.knigi.tomsk.ru/", "http://knigi.tomsk.ru/");
+    normalizeTest("http://www.knigi.tomsk.ru/go/www.knigi.tomsk.ru", "http://knigi.tomsk.ru/go/www.knigi.tomsk.ru");
+    normalizeTest("http://www.tusur.ru", "http://www.tusur.ru");
+    normalizeTest("http://www.gov.tomsk.ru", "http://tomsk.gov.ru");
+  }
+
+  private void normalizeTest(String weird, String normal) throws Exception {
+    assertEquals(normal, normalizer.normalize(weird, URLNormalizers.SCOPE_DEFAULT));
+  }
+
+  public static void main(String[] args) throws Exception {
+    new TestUnaliasURLNormalizer("test").testNormalizer();
+  }
+
+
+
+}
Index: src/plugin/urlnormalizer-unalias/src/java/org/apache/nutch/net/urlnormalizer/unalias/UnaliasURLNormalizer.java
===================================================================
--- src/plugin/urlnormalizer-unalias/src/java/org/apache/nutch/net/urlnormalizer/unalias/UnaliasURLNormalizer.java	(revision 0)
+++ src/plugin/urlnormalizer-unalias/src/java/org/apache/nutch/net/urlnormalizer/unalias/UnaliasURLNormalizer.java	(revision 0)
@@ -0,0 +1,112 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.net.urlnormalizer.unalias;
+
+import java.io.BufferedReader;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.Reader;
+
+import java.net.InetAddress;
+import java.net.MalformedURLException;
+
+import java.util.Map;
+import java.util.HashMap;
+
+// Commons Lang imports
+import org.apache.commons.lang.StringUtils;
+
+// Commons Logging imports
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+// Nutch imports
+import org.apache.nutch.net.URLNormalizer;
+import org.apache.nutch.util.LogUtil;
+import org.apache.nutch.util.URLUtil;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.oro.text.regex.*;
+
+/** Converts URLs to a normal form . */
+public class UnaliasURLNormalizer implements URLNormalizer {
+  public static final Log LOG = LogFactory.getLog(UnaliasURLNormalizer.class);
+
+  private Map<String, String> aliases = new HashMap<String, String>();
+  private String aliasesFile;
+  private Configuration conf;
+
+  public UnaliasURLNormalizer() {
+    super();
+  }
+
+  public UnaliasURLNormalizer(String aliasesFile) {
+    this.aliasesFile = aliasesFile;
+  }
+
+  public String normalize(String urlString, String scope) throws MalformedURLException {
+    String host = URLUtil.getHost(urlString);
+    if (aliases.containsKey(host)) {
+      urlString = urlString.replaceFirst(host, aliases.get(host));
+    }
+    return urlString;
+  }
+
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+
+    try {
+      Reader reader = null;
+      // subnets file take precedence if defined
+      if (aliasesFile != null) {
+        reader = new FileReader(aliasesFile);
+      } else {
+        reader = conf.getConfResourceAsReader(conf.get("urlnormalizer.unalias.file", "unalias-urlnormalizer.txt"));
+      }
+      readConfigurationFile(reader);
+    }
+    catch (IOException e) {
+      LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
+    }
+  }
+
+  public Configuration getConf() {
+    return this.conf;
+  }
+  
+  private void readConfigurationFile(Reader configReader) throws IOException {
+
+    // read the configuration file, line by line
+    BufferedReader reader = new BufferedReader(configReader);
+    String line = null;
+    while ((line = reader.readLine()) != null) {
+      if (StringUtils.isNotBlank(line) && !line.startsWith("#")) {
+        String[] aliaseStrings = line.trim().split(" ");
+        for (int i=0; i<aliaseStrings.length; i++) {
+          aliases.put(aliaseStrings[i], aliaseStrings[0]);
+          if (!aliaseStrings[i].startsWith("www.")) {
+            aliases.put("www." + aliaseStrings[i], aliaseStrings[0]);
+          }
+        }
+      }
+    }
+  }
+  
+
+}
+
Index: src/plugin/urlnormalizer-unalias/data/aliases.txt
===================================================================
--- src/plugin/urlnormalizer-unalias/data/aliases.txt	(revision 0)
+++ src/plugin/urlnormalizer-unalias/data/aliases.txt	(revision 0)
@@ -0,0 +1,2 @@
+tomsk.gov.ru gov.tomsk.ru
+knigi.tomsk.ru
Index: src/plugin/urlnormalizer-unalias/plugin.xml
===================================================================
--- src/plugin/urlnormalizer-unalias/plugin.xml	(revision 0)
+++ src/plugin/urlnormalizer-unalias/plugin.xml	(revision 0)
@@ -0,0 +1,41 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<plugin
+   id="urlnormalizer-unalias"
+   name="Unalias URL Normalizer"
+   version="1.0.0"
+   provider-name="nutch.org">
+
+   <runtime>
+      <library name="urlnormalizer-unalias.jar">
+         <export name="*"/>
+      </library>
+   </runtime>
+
+   <requires>
+      <import plugin="nutch-extensionpoints"/>
+   </requires>
+
+   <extension id="org.apache.nutch.net.urlnormalizer.unalias"
+              name="Nutch Unalias URL Normalizer"
+              point="org.apache.nutch.net.URLNormalizer">
+      <implementation id="UnaliasURLNormalizer"
+                      class="org.apache.nutch.net.urlnormalizer.unalias.UnaliasURLNormalizer"/>
+   </extension>
+
+</plugin>
Index: src/plugin/urlnormalizer-unalias/build.xml
===================================================================
--- src/plugin/urlnormalizer-unalias/build.xml	(revision 0)
+++ src/plugin/urlnormalizer-unalias/build.xml	(revision 0)
@@ -0,0 +1,28 @@
+<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<project name="urlnormalizer-unalias" default="jar-core">
+
+  <import file="../build-plugin.xml"/>
+
+  <!-- for junit test -->
+  <mkdir dir="${build.test}/data"/>
+  <copy todir="${build.test}/data">
+    <fileset dir="data" />
+  </copy>
+
+</project>
