Index: src/plugin/build.xml
===================================================================
--- src/plugin/build.xml	(revision 505282)
+++ src/plugin/build.xml	(working copy)
@@ -29,6 +29,7 @@
      <ant dir="clustering-carrot2" target="deploy"/>
      <ant dir="creativecommons" target="deploy"/>
      <ant dir="index-basic" target="deploy"/>
+     <ant dir="index-domain" target="deploy"/>
      <ant dir="index-more" target="deploy"/>
      <ant dir="languageidentifier" target="deploy"/>
      <ant dir="lib-http" target="deploy"/>
@@ -61,6 +62,7 @@
      <ant dir="parse-text" target="deploy"/>
      <ant dir="parse-zip" target="deploy"/>
      <ant dir="query-basic" target="deploy"/>
+     <ant dir="query-domain" target="deploy"/>
      <ant dir="query-more" target="deploy"/>
      <ant dir="query-site" target="deploy"/>
      <ant dir="query-url" target="deploy"/>
@@ -117,6 +119,7 @@
     <ant dir="clustering-carrot2" target="clean"/>
     <ant dir="creativecommons" target="clean"/>
     <ant dir="index-basic" target="clean"/>
+    <ant dir="index-domain" target="clean"/>
     <ant dir="index-more" target="clean"/>
     <ant dir="languageidentifier" target="clean"/>
     <ant dir="lib-commons-httpclient" target="clean"/>
@@ -150,6 +153,7 @@
     <ant dir="parse-text" target="clean"/>
     <ant dir="parse-zip" target="clean"/>
     <ant dir="query-basic" target="clean"/>
+    <ant dir="query-domain" target="clean"/>
     <ant dir="query-more" target="clean"/>
     <ant dir="query-site" target="clean"/>
     <ant dir="query-url" target="clean"/>
Index: src/plugin/index-domain/src/java/org/apache/nutch/indexer/domain/DomainIndexingFilter.java
===================================================================
--- src/plugin/index-domain/src/java/org/apache/nutch/indexer/domain/DomainIndexingFilter.java	(revision 0)
+++ src/plugin/index-domain/src/java/org/apache/nutch/indexer/domain/DomainIndexingFilter.java	(revision 0)
@@ -0,0 +1,91 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.indexer.domain;
+
+import java.net.URL;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.Text;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.nutch.crawl.CrawlDatum;
+import org.apache.nutch.crawl.Inlinks;
+import org.apache.nutch.indexer.IndexingException;
+import org.apache.nutch.indexer.IndexingFilter;
+import org.apache.nutch.parse.Parse;
+
+/**
+ * Adds the domain(hostname) and all super domains to the index. 
+ * <br> For <code>http://lucene.apache.org/nutch/</code> the 
+ * following will be added to the index : <br> 
+ * <ul>
+ * <li><code>lucene.apache.org</code> </li>
+ * <li><code>apache</code> </li>
+ * <li><code>org</code> </li>
+ * </ul>
+ * All hostnames are domain names, but not all the domain names are 
+ * hostnames. In the above example hostname <code>lucene</code> is a 
+ * subdomain of <code>apache.org</code>, which is itself a subdomain of 
+ * <code>org</code> <br>
+ * 
+ * Indexing the domain this way, we can be able to search domains. Unlike 
+ * the site field (indexed by BasicIndexingFilter) search, searching the 
+ * domain field allows us to retrieve lucene.apache.org to the query 
+ * <code>domain:apache.org</code>
+ *  
+ * @author Enis Soztutar &lt;enis.soz.nutch@gmail.com&gt;
+ * @see RFC 920 - http://tools.ietf.org/html/rfc920
+ * @see query-domain plugin - DomainQueryFilter
+ */
+public class DomainIndexingFilter implements IndexingFilter {
+
+  private Configuration conf;
+  private final Log LOG = LogFactory.getLog(DomainIndexingFilter.class);
+  private static final String FIELD_NAME = "domain";
+  
+  public Document filter(Document doc, Parse parse, Text urlText, CrawlDatum datum,
+      Inlinks inlinks) throws IndexingException {
+    
+    try {
+      URL url = new URL(urlText.toString());
+      String host = url.getHost();
+      String[] parts = host.split("\\.");
+      StringBuilder domain = new StringBuilder(host.length()+1);
+      
+      for(int i=parts.length-1; i>=0; i--) {
+        domain.insert(0,parts[i]);
+        //insert as tokenized, but the analyzer will be WhiteSpaceAnalyzer
+        doc.add(new Field(FIELD_NAME, domain.toString(), Field.Store.NO, Field.Index.TOKENIZED));
+        domain.insert(0,'.');
+      }
+    }
+    catch (Exception ex) {
+      LOG.warn(ex);
+    }
+    return doc;
+  }
+
+  public Configuration getConf() {
+    return conf;
+  }
+
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+  }
+}
Index: src/plugin/index-domain/src/java/org/apache/nutch/indexer/domain/package.html
===================================================================
--- src/plugin/index-domain/src/java/org/apache/nutch/indexer/domain/package.html	(revision 0)
+++ src/plugin/index-domain/src/java/org/apache/nutch/indexer/domain/package.html	(revision 0)
@@ -0,0 +1,5 @@
+<html>
+<body>
+<p>Contains Domain query plugin.</p><p></p>
+</body>
+</html>
Index: src/plugin/index-domain/plugin.xml
===================================================================
--- src/plugin/index-domain/plugin.xml	(revision 0)
+++ src/plugin/index-domain/plugin.xml	(revision 0)
@@ -0,0 +1,41 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<plugin id="index-domain" name="Domain indexing Plugin"
+	version="1.0.0" provider-name="org.apache">
+
+	<runtime>
+		<library name="index-domain.jar">
+			<export name="*" />
+		</library>
+	</runtime>
+
+	<requires>
+		<import plugin="nutch-extensionpoints" />
+	</requires>
+
+	<extension id="org.apache.nutch.indexer.domain" 
+		   name="Domain Indexing Filter"
+		   point="org.apache.nutch.indexer.IndexingFilter">
+
+		<implementation 
+			id="org.apache.nutch.indexer.domain.DomainIndexingFilter"
+			class="org.apache.nutch.indexer.domain.DomainIndexingFilter">
+		</implementation>
+	</extension>
+</plugin>
+
Index: src/plugin/index-domain/build.xml
===================================================================
--- src/plugin/index-domain/build.xml	(revision 0)
+++ src/plugin/index-domain/build.xml	(revision 0)
@@ -0,0 +1,30 @@
+<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<project name="index-domain" default="jar-core">
+
+  <import file="../build-plugin.xml"/>
+
+  <!-- Build compilation dependencies -->
+  <target name="deps-jar">
+	  
+  </target>
+
+  
+  
+</project>
Index: src/plugin/query-domain/src/java/org/apache/nutch/searcher/domain/DomainQueryFilter.java
===================================================================
--- src/plugin/query-domain/src/java/org/apache/nutch/searcher/domain/DomainQueryFilter.java	(revision 0)
+++ src/plugin/query-domain/src/java/org/apache/nutch/searcher/domain/DomainQueryFilter.java	(revision 0)
@@ -0,0 +1,54 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.searcher.domain;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.searcher.TranslatingRawFieldQueryFilter;
+
+/**
+ * Allows to query the domain field. However this is a superclass of
+ * {@link TranslatingRawFieldQueryFilter} and so, the queries of 
+ * form <code>site:&lt;sitename&gt;</code> is translated to 
+ * <code>domain:&lt;sitename&gt;</code>
+ * <br> This allows to retrieve results from <code>http://lucene.apache.org/</code>
+ * to the query <code>site:apache.org</code>
+ * 
+ * <br><strong> Note: </strong> query-site should not be included with domain
+ * since it can cause strange behaviour depending on the order of 
+ * pluging loading
+ * 
+ * @author Enis Soztutar &lt;enis.soz.nutch@gmail.com&gt;
+ * @see index-domain plugin - DomainIndexingFilter
+ */
+public class DomainQueryFilter extends TranslatingRawFieldQueryFilter {
+
+  private static final String QUERY_FIELD_NAME = "site";
+  private static final String INDEX_FIELD_NAME = "domain";
+  
+  public DomainQueryFilter() {
+    super(INDEX_FIELD_NAME, QUERY_FIELD_NAME);
+  }
+
+  public void setConf(Configuration conf) {
+    super.setConf(conf);
+    //Boost is default a small constant, instead of 0, to avoid lucece 
+    //truncate 0 scoring docs for optimization. 
+    setBoost(conf.getFloat("query.domain.boost", 0.000001f));
+  }
+  
+}
Index: src/plugin/query-domain/src/java/org/apache/nutch/searcher/domain/package.html
===================================================================
--- src/plugin/query-domain/src/java/org/apache/nutch/searcher/domain/package.html	(revision 0)
+++ src/plugin/query-domain/src/java/org/apache/nutch/searcher/domain/package.html	(revision 0)
@@ -0,0 +1,5 @@
+<html>
+<body>
+<p>Contains Domain indexing plugin.</p><p></p>
+</body>
+</html>
Index: src/plugin/query-domain/src/java/com/agmlab/bilgi/indexer/DomainIndexingFilter.java
===================================================================
--- src/plugin/query-domain/src/java/com/agmlab/bilgi/indexer/DomainIndexingFilter.java	(revision 0)
+++ src/plugin/query-domain/src/java/com/agmlab/bilgi/indexer/DomainIndexingFilter.java	(revision 0)
@@ -0,0 +1,66 @@
+package com.agmlab.bilgi.indexer;
+
+import java.net.URL;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.Text;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.nutch.crawl.CrawlDatum;
+import org.apache.nutch.crawl.Inlinks;
+import org.apache.nutch.indexer.IndexingException;
+import org.apache.nutch.indexer.IndexingFilter;
+import org.apache.nutch.parse.Parse;
+
+/**
+ * Adds the domain(hostname) and all super domains to the index. 
+ * <br> For <code>http://www.blog.bilgi.com/</code> the 
+ * following will be added to the index : <br> 
+ * <ul>
+ * <li><code>www.blog.bilgi.com</code> </li>
+ * <li><code>blog.bilgi.com</code> </li>
+ * <li><code>bilgi.com</code> </li>
+ * <li><code>com</code> </li>
+ * </ul>
+ * @author enis.soztutar@agmlab.com
+ * @see RFC 920 - http://tools.ietf.org/html/rfc920
+ */
+public class DomainIndexingFilter implements IndexingFilter {
+
+  private Configuration conf;
+  private final Log LOG = LogFactory.getLog(DomainIndexingFilter.class);
+  private static final String FIELD_NAME = "domain";
+  
+  public Document filter(Document doc, Parse parse, Text urlText, CrawlDatum datum,
+      Inlinks inlinks) throws IndexingException {
+    
+    try {
+      URL url = new URL(urlText.toString());
+      String host = url.getHost();
+      String[] parts = host.split("\\.");
+      StringBuilder domain = new StringBuilder(host.length()+1);
+      
+      for(int i=parts.length-1; i>=0; i--) {
+        domain.insert(0,parts[i]);
+        //insert as tokenized, but the analyzer will be WhiteSpaceAnalyzer
+        doc.add(new Field(FIELD_NAME, domain.toString(), Field.Store.NO, Field.Index.TOKENIZED));
+        domain.insert(0,'.');
+      }
+    }
+    catch (Exception ex) {
+      LOG.warn(ex);
+    }
+    return doc;
+  }
+
+  public Configuration getConf() {
+    return conf;
+  }
+
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+  }
+
+}
Index: src/plugin/query-domain/plugin.xml
===================================================================
--- src/plugin/query-domain/plugin.xml	(revision 0)
+++ src/plugin/query-domain/plugin.xml	(revision 0)
@@ -0,0 +1,41 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<plugin id="query-domain" name="Domain Query Filter Plugin"
+	version="1.0.0" provider-name="org.apache.nutch">
+
+	<runtime>
+		<library name="query-domain.jar">
+			<export name="*" />
+		</library>
+	</runtime>
+
+	<requires>
+		<import plugin="nutch-extensionpoints" />
+	</requires>
+
+	<extension id="org.apache.nutch.searcher.domain" 
+		   name="Domain Query Filter"
+		   point="org.apache.nutch.searcher.QueryFilter">
+
+		<implementation 
+			id="org.apache.nutch.searcher.domain.DomainQueryFilter"
+			class="org.apache.nutch.searcher.domain.DomainQueryFilter">
+		</implementation>
+</plugin>
+
Index: src/plugin/query-domain/build.xml
===================================================================
--- src/plugin/query-domain/build.xml	(revision 0)
+++ src/plugin/query-domain/build.xml	(revision 0)
@@ -0,0 +1,28 @@
+<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<project name="query-domain" default="jar-core">
+
+  <import file="../build-plugin.xml"/>
+
+  <!-- Build compilation dependencies -->
+  <target name="deps-jar">
+	  
+  </target>
+
+</project>
Index: conf/nutch-default.xml
===================================================================
--- conf/nutch-default.xml	(revision 505282)
+++ conf/nutch-default.xml	(working copy)
@@ -738,7 +738,7 @@
 
 <property>
   <name>plugin.includes</name>
-  <value>protocol-http|urlfilter-regex|parse-(text|html|js)|index-basic|query-(basic|site|url)|summary-basic|scoring-opic|urlnormalizer-(pass|regex|basic)</value>
+  <value>protocol-http|urlfilter-regex|parse-(text|html|js)|index-(basic|domain)|query-(basic|site|url)|summary-basic|scoring-opic|urlnormalizer-(pass|regex|basic)</value>
   <description>Regular expression naming plugin directory names to
   include.  Any plugin not matching this expression is excluded.
   In any case you need at least include the nutch-extensionpoints plugin. By
@@ -962,6 +962,17 @@
   </description>
 </property>
 
+<!-- query-site plugin properties -->
+
+<property>
+  <name>query.domain.boost</name>
+  <value>0.000001</value>
+  <description> Used as a boost for domain field in Lucene query.
+  Boost is default a small constant, instead of 0, to avoid lucece 
+  truncate 0 scoring docs for optimization. 
+  </description>
+</property>
+
 <!-- microformats-reltag plugin properties -->
 
 <property>
