Index: src/plugin/subcollection/src/java/org/apache/nutch/collection/Subcollection.java
===================================================================
--- src/plugin/subcollection/src/java/org/apache/nutch/collection/Subcollection.java	(revision 0)
+++ src/plugin/subcollection/src/java/org/apache/nutch/collection/Subcollection.java	(revision 0)
@@ -0,0 +1,215 @@
+/**
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.collection;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.StringTokenizer;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.nutch.net.URLFilter;
+import org.apache.xerces.util.DOMUtil;
+import org.w3c.dom.Element;
+
+/**
+ * SubCollection represents a subset of index, you can define url patterns that
+ * will indicate that particular page (url) is part of SubCollection.
+ */
+public class Subcollection extends Configured implements URLFilter{
+  
+  public static final String TAG_COLLECTIONS="subcollections";
+  public static final String TAG_COLLECTION="subcollection";
+  public static final String TAG_WHITELIST="whitelist";
+  public static final String TAG_BLACKLIST="blacklist";
+  public static final String TAG_NAME="name";
+  public static final String TAG_ID="id";
+
+  ArrayList blackList = new ArrayList();
+
+  ArrayList whiteList = new ArrayList();
+
+  /** 
+   * SubCollection identifier
+   */
+  String id;
+
+  /** 
+   * SubCollection name
+   */
+  String name;
+
+  /** 
+   * SubCollection whitelist as String
+   */
+  String wlString;
+
+  /**
+   * SubCollection blacklist as String
+   */
+  String blString;
+
+  /** public Constructor
+   * 
+   * @param id id of SubCollection
+   * @param name name of SubCollection
+   */
+  public Subcollection(String id, String name, Configuration conf) {
+    this(conf);
+    this.id=id;
+    this.name = name;
+  }
+
+  public Subcollection(Configuration conf){
+    super(conf);
+  }
+  
+  /**
+   * @return Returns the name
+   */
+  public String getName() {
+    return name;
+  }
+
+  /**
+   * @return Returns the id
+   */
+  public String getId() {
+    return id;
+  }
+
+  /**
+   * Returns whitelist
+   * 
+   * @return Whitelist entries
+   */
+  public ArrayList getWhiteList() {
+    return whiteList;
+  }
+
+  /**
+   * Returns whitelist String
+   * 
+   * @return Whitelist String
+   */
+  public String getWhiteListString() {
+    return wlString;
+  }
+
+  /**
+   * Returns blacklist String
+   * 
+   * @return Blacklist String
+   */
+  public String getBlackListString() {
+    return blString;
+  }
+
+  /**
+   * @param whiteList
+   *          The whiteList to set.
+   */
+  public void setWhiteList(ArrayList whiteList) {
+    this.whiteList = whiteList;
+  }
+
+  /**
+   * Simple "indexOf" currentFilter for matching patterns.
+   * 
+   * <pre>
+   *  rules for evaluation are as follows:
+   *  1. if pattern matches in blacklist then url is rejected
+   *  2. if pattern matches in whitelist then url is allowed
+   *  3. url is rejected
+   * </pre>
+   * 
+   * @see org.apache.nutch.net.URLFilter#filter(java.lang.String)
+   */
+  public String filter(String urlString) {
+    // first the blacklist
+    Iterator i = blackList.iterator();
+    while (i.hasNext()) {
+      String row = (String) i.next();
+      if (urlString.indexOf(row) != -1)
+        return null;
+    }
+
+    // then whitelist
+    i = whiteList.iterator();
+    while (i.hasNext()) {
+      String row = (String) i.next();
+      if (urlString.indexOf(row) != -1)
+        return urlString;
+    }
+    return null;
+  }
+
+  /**
+   * Initialize SubCollection from dom element
+   * 
+   * @param collection
+   */
+  public void initialize(Element collection) {
+    this.name = DOMUtil.getChildText(
+        collection.getElementsByTagName(TAG_NAME).item(0)).trim();
+    this.wlString = DOMUtil.getChildText(
+        collection.getElementsByTagName(TAG_WHITELIST).item(0)).trim();
+    this.blString = DOMUtil.getChildText(
+        collection.getElementsByTagName(TAG_BLACKLIST).item(0)).trim();
+
+    parseList(this.whiteList, wlString);
+    parseList(this.blackList, blString);
+  }
+
+  /**
+   * Create a list of patterns from chunk of text, patterns are separated with
+   * newline
+   * 
+   * @param list
+   * @param text
+   */
+  protected void parseList(ArrayList list, String text) {
+    list.clear();
+
+    StringTokenizer st = new StringTokenizer(text, "\n\r");
+
+    while (st.hasMoreElements()) {
+      String line = (String) st.nextElement();
+      list.add(line.trim());
+    }
+  }
+
+  /**
+   * Set contents of blacklist from String
+   * 
+   * @param list the blacklist contents
+   */
+  public void setBlackList(String list) {
+    this.blString = list;
+    parseList(blackList, list);
+  }
+
+  /**
+   * Set contents of whitelist from String
+   * 
+   * @param list the whitelist contents
+   */
+  public void setWhiteList(String list) {
+    this.wlString = list;
+    parseList(whiteList, list);
+  }
+}
Index: src/plugin/subcollection/src/java/org/apache/nutch/collection/CollectionManager.java
===================================================================
--- src/plugin/subcollection/src/java/org/apache/nutch/collection/CollectionManager.java	(revision 0)
+++ src/plugin/subcollection/src/java/org/apache/nutch/collection/CollectionManager.java	(revision 0)
@@ -0,0 +1,223 @@
+/**
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.collection;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URL;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.logging.Logger;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.nutch.util.DomUtil;
+import org.apache.xerces.dom.DocumentImpl;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.NodeList;
+
+public class CollectionManager extends Configured {
+
+  public static final String DEFAULT_FILE_NAME = "subcollections.xml";
+
+  static final Logger LOG = org.apache.hadoop.util.LogFormatter.getLogger(CollectionManager.class
+      .getName());
+
+  transient Map collectionMap = new HashMap();
+
+  transient URL configfile;
+  
+  public CollectionManager(Configuration conf) {
+    super(conf);
+    init();
+  }
+
+  protected void init(){
+    try {
+      LOG.info("initializing CollectionManager");
+      // initialize known subcollections
+      configfile = getConf().getResource(
+          getConf().get("subcollections.config", DEFAULT_FILE_NAME));
+
+      InputStream input = getConf().getConfResourceAsInputStream(
+          getConf().get("subcollections.config", DEFAULT_FILE_NAME));
+      Element collections = DomUtil.getDom(input);
+
+      if (collections != null) {
+        NodeList nodeList = collections
+            .getElementsByTagName(Subcollection.TAG_COLLECTION);
+
+        LOG.info("file has" + nodeList.getLength() + " elements");
+        
+        for (int i = 0; i < nodeList.getLength(); i++) {
+          Element scElem = (Element) nodeList.item(i);
+          Subcollection subCol = new Subcollection(getConf());
+          subCol.initialize(scElem);
+          collectionMap.put(subCol.name, subCol);
+        }
+      } else {
+        LOG.info("Cannot find collections");
+      }
+    } catch (Exception e) {
+      LOG.info("Error occured:" + e);
+      e.printStackTrace(System.out);
+    }
+  }
+  
+  public static CollectionManager getCollectionManager(Configuration conf) {
+    String key = "collectionmanager";
+    CollectionManager impl = (CollectionManager)conf.getObject(key);
+    if (impl == null) {
+      try {
+        LOG.info("Instantiating CollectionManager");
+        impl=new CollectionManager(conf);
+        conf.setObject(key,impl);
+      } catch (Exception e) {
+        throw new RuntimeException("Couldn't create CollectionManager",e);
+      }
+    }
+    return impl;
+  }
+
+  /**
+   * Returns named subcollection
+   * 
+   * @param id
+   * @return Named SubCollection (or null if not existing)
+   */
+  public Subcollection getSubColection(final String id) {
+    return (Subcollection) collectionMap.get(id);
+  }
+
+  /**
+   * Delete named subcollection
+   * 
+   * @param id
+   *          Id of SubCollection to delete
+   */
+  public void deleteSubCollection(final String id) throws IOException {
+    final Subcollection subCol = getSubColection(id);
+    if (subCol != null) {
+      collectionMap.remove(id);
+    }
+  }
+
+  /**
+   * Create a new subcollection.
+   * 
+   * @param name
+   *          Name of SubCollection to create
+   * @return Created SubCollection or null if allready existed
+   */
+  public Subcollection createSubCollection(final String id, final String name) {
+    Subcollection subCol = null;
+
+    if (!collectionMap.containsKey(id)) {
+      subCol = new Subcollection(id, name, getConf());
+      collectionMap.put(id, subCol);
+    }
+
+    return subCol;
+  }
+
+  /**
+   * Return names of collections url is part of
+   * 
+   * @param url
+   *          The url to test against Collections
+   * @return Space delimited string of collection names url is part of
+   */
+  public String getSubCollections(final String url) {
+    String collections = "";
+    final Iterator iterator = collectionMap.values().iterator();
+
+    while (iterator.hasNext()) {
+      final Subcollection subCol = (Subcollection) iterator.next();
+      if (subCol.filter(url) != null) {
+        collections += " " + subCol.name;
+      }
+    }
+    LOG.fine("subcollections:" + collections);
+    
+    return collections;
+  }
+
+  /**
+   * Returns all collections
+   * 
+   * @return All collections CollectionManager knows about
+   */
+  public Collection getAll() {
+    return collectionMap.values();
+  }
+
+  /**
+   * Save collections into file
+   * 
+   * @throws Exception
+   */
+  public void save() throws IOException {
+    try {
+      final FileOutputStream fos = new FileOutputStream(new File(configfile
+          .getFile()));
+      final Document doc = new DocumentImpl();
+      final Element collections = doc
+          .createElement(Subcollection.TAG_COLLECTIONS);
+      final Iterator iterator = collectionMap.values().iterator();
+
+      while (iterator.hasNext()) {
+        final Subcollection subCol = (Subcollection) iterator.next();
+        final Element collection = doc
+            .createElement(Subcollection.TAG_COLLECTION);
+        collections.appendChild(collection);
+        final Element name = doc.createElement(Subcollection.TAG_NAME);
+        name.setNodeValue(subCol.getName());
+        collection.appendChild(name);
+        final Element whiteList = doc
+            .createElement(Subcollection.TAG_WHITELIST);
+        whiteList.setNodeValue(subCol.getWhiteListString());
+        collection.appendChild(whiteList);
+        final Element blackList = doc
+            .createElement(Subcollection.TAG_BLACKLIST);
+        blackList.setNodeValue(subCol.getBlackListString());
+        collection.appendChild(blackList);
+      }
+
+      DomUtil.saveDom(fos, collections);
+      fos.flush();
+      fos.close();
+    } catch (FileNotFoundException e) {
+      throw new IOException(e.toString());
+    }
+  }
+
+  public static void main(String args) {
+    // collection management functionality
+
+    // add
+    // update
+    // delete
+    // list
+
+  }
+}
Index: src/plugin/subcollection/src/java/org/apache/nutch/collection/package.html
===================================================================
--- src/plugin/subcollection/src/java/org/apache/nutch/collection/package.html	(revision 0)
+++ src/plugin/subcollection/src/java/org/apache/nutch/collection/package.html	(revision 0)
@@ -0,0 +1,30 @@
+<html>
+<body>
+<p>
+Subcollection is a subset of an index. Subcollections are defined
+by urlpatterns in form of white/blacklist. So to get the page into
+subcollection it must match the whitelist and not the blacklist.
+</p>
+<p>
+Subcollection definitions are read from a file subcollections.xml
+and the format is as follows (imagine here that you are crawling all
+the virtualhosts from apache.org and you wan't to tag pages with
+url pattern "http://lucene.apache.org/" to be part of subcollection
+lucene.
+<p>
+<p>
+<pre>
+&lt?xml version="1.0" encoding="UTF-8"?>
+&ltsubcollections>
+	&ltsubcollection>
+		&ltname>lucene&lt/name>
+		&ltid>lucene&lt/id>
+		&ltwhitelist>http://lucene.apache.org/&lt/whitelist>
+		&ltblacklist />
+	&lt/subcollection>
+&lt/subcollections>
+</pre>
+</p>
+
+</body>
+</html>
Index: src/plugin/subcollection/src/java/org/apache/nutch/searcher/subcollection/SubcollectionQueryFilter.java
===================================================================
--- src/plugin/subcollection/src/java/org/apache/nutch/searcher/subcollection/SubcollectionQueryFilter.java	(revision 0)
+++ src/plugin/subcollection/src/java/org/apache/nutch/searcher/subcollection/SubcollectionQueryFilter.java	(revision 0)
@@ -0,0 +1,38 @@
+/**
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.searcher.subcollection;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.indexer.subcollection.SubcollectionIndexingFilter;
+import org.apache.nutch.searcher.RawFieldQueryFilter;
+
+/** Handles "collection:" query clauses, causing them to search the "collection" field
+ * indexed by SubcollectionINdexingFilter. */
+public class SubcollectionQueryFilter extends RawFieldQueryFilter {
+  public SubcollectionQueryFilter() {
+    super(SubcollectionIndexingFilter.FIELD_NAME);
+  }
+
+  public void setConf(Configuration conf) {
+    // nothing to configure
+  }
+
+  public Configuration getConf() {
+    // nothing configured
+    return null;
+  }
+}
Index: src/plugin/subcollection/src/java/org/apache/nutch/util/DomUtil.java
===================================================================
--- src/plugin/subcollection/src/java/org/apache/nutch/util/DomUtil.java	(revision 0)
+++ src/plugin/subcollection/src/java/org/apache/nutch/util/DomUtil.java	(revision 0)
@@ -0,0 +1,98 @@
+/**
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.util;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.UnsupportedEncodingException;
+
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerConfigurationException;
+import javax.xml.transform.TransformerException;
+import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.stream.StreamResult;
+
+import org.apache.xerces.parsers.DOMParser;
+import org.w3c.dom.Element;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+
+public class DomUtil {
+
+  /**
+   * Returns parsed dom tree or null if any error
+   * 
+   * @param is
+   * @return
+   */
+  public static Element getDom(InputStream is) {
+
+    Element element = null;
+
+    DOMParser parser = new DOMParser();
+
+    InputSource input;
+    try {
+      input = new InputSource(is);
+      input.setEncoding("UTF-8");
+      parser.parse(input);
+      element = (Element) parser.getDocument().getChildNodes().item(0);
+    } catch (FileNotFoundException e) {
+      e.printStackTrace();
+    } catch (SAXException e) {
+      e.printStackTrace();
+    } catch (IOException e) {
+      e.printStackTrace();
+    }
+    return element;
+  }
+
+  /**
+   * save dom into ouputstream
+   * 
+   * @param os
+   * @param e
+   */
+  public static void saveDom(OutputStream os, Element e) {
+
+    DOMSource source = new DOMSource(e);
+    TransformerFactory transFactory = TransformerFactory.newInstance();
+    Transformer transformer;
+    try {
+      transformer = transFactory.newTransformer();
+      transformer.setOutputProperty("indent", "yes");
+      StreamResult result = new StreamResult(os);
+      transformer.transform(source, result);
+      os.flush();
+    } catch (UnsupportedEncodingException e1) {
+      // TODO Auto-generated catch block
+      e1.printStackTrace();
+    } catch (IOException e1) {
+      // TODO Auto-generated catch block
+      e1.printStackTrace();
+    } catch (TransformerConfigurationException e2) {
+      // TODO Auto-generated catch block
+      e2.printStackTrace();
+    } catch (TransformerException ex) {
+      // TODO Auto-generated catch block
+      ex.printStackTrace();
+    }
+  }
+}
Index: src/plugin/subcollection/src/java/org/apache/nutch/indexer/subcollection/SubcollectionIndexingFilter.java
===================================================================
--- src/plugin/subcollection/src/java/org/apache/nutch/indexer/subcollection/SubcollectionIndexingFilter.java	(revision 0)
+++ src/plugin/subcollection/src/java/org/apache/nutch/indexer/subcollection/SubcollectionIndexingFilter.java	(revision 0)
@@ -0,0 +1,75 @@
+/**
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.indexer.subcollection;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.io.UTF8;
+import org.apache.hadoop.util.LogFormatter;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+
+import org.apache.nutch.parse.Parse;
+import org.apache.nutch.util.NutchConfiguration;
+
+import org.apache.nutch.indexer.IndexingFilter;
+import org.apache.nutch.indexer.IndexingException;
+
+import org.apache.nutch.collection.CollectionManager;
+import org.apache.nutch.crawl.CrawlDatum;
+import org.apache.nutch.crawl.Inlinks;
+
+import java.util.logging.Logger;
+
+public class SubcollectionIndexingFilter extends Configured implements IndexingFilter {
+
+  public SubcollectionIndexingFilter(){
+    super(NutchConfiguration.create());
+  }
+  
+  public SubcollectionIndexingFilter(Configuration conf) {
+    super(conf);
+  }
+
+  /**
+   * Doc field name
+   */
+  public static final String FIELD_NAME = "subcollection";
+
+  /**
+   * Logger
+   */
+  public static final Logger LOG = LogFormatter
+      .getLogger(SubcollectionIndexingFilter.class.getName());
+
+  /**
+   * "Mark" document to be a part of subcollection
+   * 
+   * @param doc
+   * @param url
+   */
+  private void addSubCollectionField(Document doc, String url) {
+    String collname = CollectionManager.getCollectionManager(getConf()).getSubCollections(url);
+    doc.add(new Field(FIELD_NAME, collname, Field.Store.YES, Field.Index.TOKENIZED));
+  }
+
+  public Document filter(Document doc, Parse parse, UTF8 url, CrawlDatum datum, Inlinks inlinks) throws IndexingException {
+    String sUrl = url.toString();
+    addSubCollectionField(doc, sUrl);
+    return doc;
+  }
+}
Index: src/plugin/subcollection/plugin.xml
===================================================================
--- src/plugin/subcollection/plugin.xml	(revision 0)
+++ src/plugin/subcollection/plugin.xml	(revision 0)
@@ -0,0 +1,31 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<plugin
+   id="subcollection"
+   name="Subcollection indexing and query filter"
+   version="1.0.0"
+   provider-name="Sami Siren">
+
+   <requires>
+      <import plugin="nutch-extensionpoints"/>
+   </requires>
+
+   <runtime>
+      <library name="subcollection.jar"/>
+   </runtime>
+
+   <extension id="org.apache.nutch.searcher.subcollection.query"
+              name="Subcollection Query Filter"
+              point="org.apache.nutch.searcher.QueryFilter">
+    <implementation id="SubcollectionQueryFilter"
+               class="org.apache.nutch.searcher.subcollection.SubcollectionQueryFilter"
+               raw-fields="subcollection"/>
+   </extension>      
+
+   <extension id="org.apache.nutch.indexer.subcollection.indexing"
+              name="Subcollection Indexing Filter"
+              point="org.apache.nutch.indexer.IndexingFilter">
+      <implementation id="SubcollectionIndexingFilter"
+                      class="org.apache.nutch.indexer.subcollection.SubcollectionIndexingFilter"/>
+                      
+   </extension>
+</plugin>
Index: src/plugin/subcollection/build.xml
===================================================================
--- src/plugin/subcollection/build.xml	(revision 0)
+++ src/plugin/subcollection/build.xml	(revision 0)
@@ -0,0 +1,7 @@
+<?xml version="1.0"?>
+
+<project name="subcollection" default="jar">
+
+  <import file="../build-plugin.xml"/>
+
+</project>
Index: src/plugin/build.xml
===================================================================
--- src/plugin/build.xml	(revision 374884)
+++ src/plugin/build.xml	(working copy)
@@ -36,6 +36,7 @@
      <ant dir="query-more" target="deploy"/>
      <ant dir="query-site" target="deploy"/>
      <ant dir="query-url" target="deploy"/>
+     <ant dir="subcollection" target="deploy"/>
      <ant dir="urlfilter-prefix" target="deploy"/>
      <ant dir="urlfilter-regex" target="deploy"/>
   </target>
Index: conf/subcollections.xml
===================================================================
--- conf/subcollections.xml	(revision 0)
+++ conf/subcollections.xml	(revision 0)
@@ -0,0 +1,36 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<subcollections>
+	<subcollection>
+		<name>httpd</name>
+		<id>httpd</id>
+		<whitelist>http://httpd.apache.org/</whitelist>
+		<blacklist />
+	</subcollection>
+	<subcollection>
+		<name>lucene</name>
+		<id>lucene</id>
+		<whitelist>
+http://lucene.apache.org/
+http://wiki.apache.org/nutch/
+</whitelist>
+		<blacklist />
+	</subcollection>
+	<subcollection>
+		<name>ant</name>
+		<id>ant</id>
+		<whitelist>http://ant.apache.org/</whitelist>
+		<blacklist />
+	</subcollection>
+	<subcollection>
+		<name>db</name>
+		<id>db</id>
+		<whitelist>http://db.apache.org/</whitelist>
+		<blacklist />
+	</subcollection>
+	<subcollection>
+		<name>foundation</name>
+		<id>foundation</id>
+		<whitelist>http://www.apache.org/foundation</whitelist>
+		<blacklist />
+	</subcollection>
+</subcollections>
