Index: src/plugin/build.xml
===================================================================
--- src/plugin/build.xml	(revision 1357700)
+++ src/plugin/build.xml	(working copy)
@@ -27,6 +27,7 @@
   <!-- ====================================================== -->
   <target name="deploy">
      <ant dir="creativecommons" target="deploy"/>
+     <ant dir="date" target="deploy"/>
      <ant dir="feed" target="deploy"/>
      <ant dir="headings" target="deploy"/>
      <ant dir="index-basic" target="deploy"/>
@@ -76,6 +77,7 @@
   <target name="test">
     <parallel threadCount="2">
      <ant dir="creativecommons" target="test"/>
+     <ant dir="date" target="test"/>
      <ant dir="index-more" target="test"/>
      <ant dir="language-identifier" target="test"/>
      <ant dir="lib-http" target="test"/>
@@ -106,6 +108,7 @@
   <!-- ====================================================== -->
   <target name="clean">
     <ant dir="creativecommons" target="clean"/>
+    <ant dir="date" target="clean"/>
     <ant dir="feed" target="clean"/>
     <ant dir="headings" target="clean"/>
     <ant dir="index-basic" target="clean"/>
Index: src/plugin/date/plugin.xml
===================================================================
--- src/plugin/date/plugin.xml	(revision 0)
+++ src/plugin/date/plugin.xml	(revision 0)
@@ -0,0 +1,44 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<plugin
+   id="date"
+   name="Date Parse Filter"
+   version="1.0.0"
+   provider-name="nutch.org">
+
+   <runtime>
+      <library name="date.jar">
+         <export name="*"/>
+      </library>
+   </runtime>
+
+   <requires>
+      <import plugin="nutch-extensionpoints"/>
+   </requires>
+
+   <extension id="org.apache.nutch.parse.date"
+              name="Nutch Date Parse Filter"
+              point="org.apache.nutch.parse.HtmlParseFilter">
+
+      <implementation id="DateParseFilter"
+                      class="org.apache.nutch.parse.date.DateParseFilter">
+        <parameter name="formats-file" value="dateparsefilter-formats.txt"/>
+        <parameter name="patterns-file" value="dateparsefilter-patterns.txt"/>
+      </implementation>
+   </extension>
+</plugin>
Index: src/plugin/date/build.xml
===================================================================
--- src/plugin/date/build.xml	(revision 0)
+++ src/plugin/date/build.xml	(revision 0)
@@ -0,0 +1,27 @@
+<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<project name="date" default="jar-core">
+
+  <import file="../build-plugin.xml"/>
+
+  <!-- for junit test -->
+  <mkdir dir="${build.test}/data"/>
+  <copy todir="${build.test}/data">
+    <fileset dir="data" />
+  </copy>
+</project>
Index: src/plugin/date/ivy.xml
===================================================================
--- src/plugin/date/ivy.xml	(revision 0)
+++ src/plugin/date/ivy.xml	(revision 0)
@@ -0,0 +1,41 @@
+<?xml version="1.0" ?>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<ivy-module version="1.0">
+  <info organisation="org.apache.nutch" module="${ant.project.name}">
+    <license name="Apache 2.0"/>
+    <ivyauthor name="Apache Nutch Team" url="http://nutch.apache.org"/>
+    <description>
+        Apache Nutch
+    </description>
+  </info>
+
+  <configurations>
+      <include file="${nutch.root}/ivy/ivy-configurations.xml"/>
+  </configurations>
+
+  <publications>
+    <!--get the artifact from our module name-->
+    <artifact conf="master"/>
+  </publications>
+
+  <dependencies>
+  </dependencies>
+  
+</ivy-module>
Index: src/plugin/date/src/test/org/apache/nutch/parse/date/TestDateParseFilter.java
===================================================================
--- src/plugin/date/src/test/org/apache/nutch/parse/date/TestDateParseFilter.java	(revision 0)
+++ src/plugin/date/src/test/org/apache/nutch/parse/date/TestDateParseFilter.java	(revision 0)
@@ -0,0 +1,195 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.parse.date;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.util.NutchConfiguration;
+
+import junit.framework.TestCase;
+
+public class TestDateParseFilter extends TestCase {
+
+  private final static String SEPARATOR = System.getProperty("file.separator");
+  private final static String SAMPLES = System.getProperty("test.data", ".");
+
+  public void testDateParseFilter() throws Exception {
+  
+    Configuration conf = NutchConfiguration.create();  
+    
+    String patternsFile = SAMPLES + SEPARATOR + "test-patterns.txt";
+    String formatsFile = SAMPLES + SEPARATOR + "test-formats.txt";
+    
+    DateParseFilter dpf = new DateParseFilter(patternsFile, formatsFile);
+    dpf.setConf(conf);
+    
+    String[] tests;
+    
+    /**
+     * Some common numeric date formats
+     */
+    tests = new String[]{
+      "test 17-05-2012 test",
+      "test 17/05/2012 test",
+      "test 17.05.2012 test",
+      "test 2012-05-17 test",
+      "test 2012/05/17 test",
+      "test 05/17/2012 test",
+      "test 05-17-2012 test",
+      "test 17/05/2012 test",
+      "test 17-05-2012 test",
+      "test 17-05-12 test",
+      "test 17.5.12 test",
+    };
+    
+    for (String test : tests) {
+      assertEquals("2012-05-17T00:00:00+0200", dpf.obtainDate(test));
+    }
+
+    /**
+     * Dates in Dutch
+     */
+    tests = new String[]{
+      "test 17 mei 2012 test",
+      "test 17 mei 12 test",
+      "test mei 17 2012 test",
+      "test mei 17, 2012 test",
+      "test 17 mei, 2012 test",
+    };
+    
+    for (String test : tests) {
+      assertEquals("2012-05-17T00:00:00+0200", dpf.obtainDate(test, "nl"));
+    }
+
+    /**
+     * Dates in English
+     */
+    tests = new String[]{
+      "test 17 may 2012 test",
+      "test 17 may 12 test",
+      "test may 17 2012 test",
+      "test may 17, 2012 test",
+      "test 17 may, 2012 test",
+    };
+    
+    for (String test : tests) {
+      assertEquals("2012-05-17T00:00:00+0200", dpf.obtainDate(test, "en"));
+    }
+    
+    /**
+     * Dates in German
+     */
+    tests = new String[]{
+      "test 17 mai 2012 test",
+      "test 17 mai 12 test",
+      "test mai 17 2012 test",
+      "test mai 17, 2012 test",
+      "test 17 mai, 2012 test",
+    };
+    
+    for (String test : tests) {
+      assertEquals("2012-05-17T00:00:00+0200", dpf.obtainDate(test, "de"));
+    }
+    
+    /**
+     * Dates in Italian
+     */
+    tests = new String[]{
+      "test 17 Maggio 2012 test",
+      "test Maggio 17 2012 test",
+      "test Maggio 17, 2012 test",
+      "test 17 Maggio, 2012 test",
+    };
+
+    for (String test : tests) {
+      assertEquals("2012-05-17T00:00:00+0200", dpf.obtainDate(test, "it"));
+    }
+    
+    /**
+     * Dates in French
+     */
+    tests = new String[]{
+      "test 17 mai 2012 test",
+      "test 17 mai 12 test",
+      "test mai 17 2012 test",
+      "test mai 17, 2012 test",
+      "test 17 mai, 2012 test",
+    };
+
+    for (String test : tests) {
+      assertEquals("2012-05-17T00:00:00+0200", dpf.obtainDate(test, "fr"));
+    }
+    
+//     /**
+//      * Dates in Spanish
+//      */
+//     tests = new String[]{
+//       "test 17 de Mayo de 2012 test",
+//       "test de Mayo 17 de 2012 test",
+//       "test de Mayo 17, de 2012 test",
+//       "test 17 de Mayo, de 2012 test",
+//     };
+// 
+//     for (String test : tests) {
+//       assertEquals("2012-05-17T00:00:00+0200", dpf.obtainDate(test, "es"));
+//     }
+// 
+//     /**
+//      * Dates in Russion
+//      */
+//     tests = new String[]{
+//       "test 17 Май 2012 test",
+//       "test 17 Май 12 test",
+//       "test Май 17 2012 test",
+//       "test Май 17, 2012 test",
+//       "test 17 Май, 2012 test",
+//     };
+// 
+//     for (String test : tests) {
+//       assertEquals("2012-05-17T00:00:00+0200", dpf.obtainDate(test, "ru"));
+//     }
+
+    /**
+     * Chinese and Japanese dates
+     */
+    assertEquals("2012-05-17T00:00:00+0200", dpf.obtainDate("test 2012年5月17日 test", "zh"));
+    assertEquals("2012-05-17T00:00:00+0200", dpf.obtainDate("test 2012年5月17日 test", "jp"));
+
+    // Test ignore future date, 2099 should be high enough
+    assertEquals("2012-05-17T00:00:00+0200", dpf.obtainDate("test 17/05/2099 test test 17/05/2012 test", "en"));
+    
+    // Test ignore ancient date
+    assertEquals("2012-05-17T00:00:00+0200", dpf.obtainDate("test 17/05/1975 test test 17/05/2012 test", "en"));
+
+    // Check if we can find the proper date in this mess
+    assertEquals("2012-05-17T00:00:00+0200", dpf.obtainDate("Period 2012-2013 17-05-2012 bla bla", "nl"));
+   
+//     /**
+//      * Some Thai dates
+//      */
+//     assertEquals("2012-05-17T00:00:00+0200", dpf.obtainDate("17 พฤษภาคม 2555", "th"));
+//     
+//     tests = new String[]{
+//       "test 17 พฤษภาคม พ.ศ.2555 test",
+//       "test พฤษภาคม 17 พ.ศ.2555 test",
+//     };
+// 
+//     for (String test : tests) {
+//     System.out.println(test);
+//       assertEquals("2012-05-17T00:00:00+0200", dpf.obtainDate(test, "th"));
+//     }
+  }
+}
Index: src/plugin/date/src/java/org/apache/nutch/parse/date/DateParseFilter.java
===================================================================
--- src/plugin/date/src/java/org/apache/nutch/parse/date/DateParseFilter.java	(revision 0)
+++ src/plugin/date/src/java/org/apache/nutch/parse/date/DateParseFilter.java	(revision 0)
@@ -0,0 +1,504 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.parse.date;
+
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringReader;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Calendar;
+import java.util.Date;
+import java.util.List;
+import java.util.Locale;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.parse.HTMLMetaTags;
+import org.apache.nutch.parse.Parse;
+import org.apache.nutch.parse.HtmlParseFilter;
+import org.apache.nutch.parse.ParseResult;
+import org.apache.nutch.plugin.Extension;
+import org.apache.nutch.plugin.PluginRepository;
+import org.apache.nutch.protocol.Content;
+import org.apache.nutch.util.NutchConfiguration;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.commons.lang.time.DateUtils;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.w3c.dom.DocumentFragment;
+
+/**
+ * HtmlParseFilter to retrieve an arbitrary date string from the page. Using some
+ * kind of boiler plate text filter is recommended.
+ *
+ * @see http://en.wikipedia.org/wiki/Date_format_by_country
+ * @author markus@apache.org
+ */
+public class DateParseFilter implements HtmlParseFilter {
+
+  private Configuration conf;
+  public static final Logger LOG = LoggerFactory.getLogger(DateParseFilter.class);
+
+  protected String patternsFile = null;
+  protected String formatsFile = null;
+
+  /**
+   * Maximum fragment length to run regular expressions on.
+   */
+  protected int maxFragmentLength = 256;
+
+  /**
+   * Whether to ignore dates in the future.
+   */
+  protected boolean ignoreFutureDate = true;
+
+  /**
+   * Extractde dates below this one are ignored.
+   */
+  protected Date lowerDateThresHold;;
+
+  /**
+   * The parse metadata key for the extracted date.
+   */
+  protected String metaDataKey = "extractedDate";
+
+  /**
+   * A list of regular expressions to use on the text fragment.
+   */
+  protected static ArrayList<String> patterns = new ArrayList<String>();
+
+  /**
+   * Our preconfigured date formats, warning, order matters!
+   */
+  protected static ArrayList<String> formats = new ArrayList<String>();
+
+  /**
+   * Today, used to check if date is in future
+   */
+  protected Date today = new Date();
+
+  /**
+   * URL for this document. Initialized to "" for UnitTests.
+   */
+  protected String url = "";
+
+  /**
+   * Our desired date format.
+   */
+  protected SimpleDateFormat formattedDate = new SimpleDateFormat("yyyy-MM-dd'T00:00:00'Z");
+
+  /**
+   * Default constructor.
+   */
+  public DateParseFilter() { }
+
+  /**
+   * Constructor for initializing with different pattern and format files
+   */
+  public DateParseFilter(String patternsFile, String formatsFile) {
+    this.patternsFile = patternsFile;
+    this.formatsFile = formatsFile;
+  }
+
+  /**
+   * Extract a date from the parsed content.
+   *
+   * @param Content
+   * @param ParseResult
+   * @param HTMLMetaTags
+   * @param DocumentFragment
+   * @return ParseResult
+   */
+  public ParseResult filter(Content content, ParseResult parseResult,
+    HTMLMetaTags metaTags, DocumentFragment doc) {
+    // Get the URL
+    url = content.getUrl();
+
+    // Get the parse!
+    Parse parse = parseResult.get(url);
+
+    // Obtain the language
+    String language = parse.getData().getParseMeta().get("language");
+
+    // Obtain the fragment in which we're looking for a date
+    String fragment = getFragment(parseResult.get(url).getText());
+
+    // Extract a date string from the fragment
+    String date = obtainDate(fragment, language);
+
+    // Did all go well?
+    if (date != null) {
+      // Add the formatted date to the Parse metadata
+      parse.getData().getParseMeta().set(metaDataKey, date);
+    }
+
+    // Return the whole
+    return parseResult;
+  }
+
+  /**
+   * Brrp
+   */
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+
+    this.maxFragmentLength = conf.getInt("dateParseFilter.fragment.length", 340);
+    this.ignoreFutureDate = conf.getBoolean("dateParseFilter.ignore.future.date", true);
+    this.metaDataKey = conf.get("dateParseFilter.metadata.key", "extractedDate");
+
+    try {
+      this.lowerDateThresHold = new SimpleDateFormat("yyyy-MM-dd").parse(
+        conf.get("dateParseFilter.lower.date.threshold", "2000-05-17"));
+    } catch (ParseException e) {
+      LOG.error("Could not parse dateParseFilter.lower.date.threshold");
+    }
+
+    this.conf = conf;
+
+    String attrPatternsFile = null;
+    String attrFormatsFile = null;
+
+    // get the extensions for date indexing filter
+    String pluginName = "date";
+    Extension[] extensions = PluginRepository.get(conf).getExtensionPoint(
+      HtmlParseFilter.class.getName()).getExtensions();
+
+    for (int i = 0; i < extensions.length; i++) {
+      Extension extension = extensions[i];
+
+      if (extension.getDescriptor().getPluginId().equals(pluginName)) {
+        attrPatternsFile = extension.getAttribute("patterns-file");
+        attrFormatsFile = extension.getAttribute("formats-file");
+        break;
+      }
+    }
+
+    // handle blank non empty input
+    if (attrPatternsFile != null && attrPatternsFile.trim().equals("")) {
+      attrPatternsFile = null;
+    }
+    if (attrFormatsFile != null && attrFormatsFile.trim().equals("")) {
+      attrFormatsFile = null;
+    }
+
+    if (attrPatternsFile != null) {
+      if (LOG.isInfoEnabled()) {
+        LOG.info("Attribute \"patterns-file\" is defined for plugin " + pluginName
+          + " as " + attrPatternsFile);
+      }
+    }
+    else {
+      if (LOG.isWarnEnabled()) {
+        LOG.warn("Attribute \"patterns-file\" is not defined in plugin.xml for plugin "
+          + pluginName);
+      }
+    }
+
+    if (attrFormatsFile != null) {
+      if (LOG.isInfoEnabled()) {
+        LOG.info("Attribute \"formats-file\" is defined for plugin " + pluginName
+          + " as " + attrFormatsFile);
+      }
+    }
+    else {
+      if (LOG.isWarnEnabled()) {
+        LOG.warn("Attribute \"formats-file\" is not defined in plugin.xml for plugin "
+          + pluginName);
+      }
+    }
+
+    try {
+      String file = patternsFile != null ? patternsFile : attrPatternsFile;
+      Reader reader = conf.getConfResourceAsReader(file);
+      if (reader == null) {
+        reader = new FileReader(file);
+      }
+      readPatterns(reader);
+    }
+    catch (IOException e) {
+      LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
+    }
+
+    try {
+      String file = formatsFile != null ? formatsFile : attrFormatsFile;
+      Reader reader = conf.getConfResourceAsReader(file);
+      if (reader == null) {
+        reader = new FileReader(file);
+      }
+      readFormats(reader);
+    }
+    catch (IOException e) {
+      LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
+    }
+  }
+
+  /**
+   * Read pattern file
+   *
+   * @param Reader
+   * @return void
+   */
+  protected synchronized void readPatterns(Reader patternReader)
+    throws IOException {
+
+    if (patterns.size() > 0) {
+      return;
+    }
+
+    // read the configuration file, line by line
+    BufferedReader reader = new BufferedReader(patternReader);
+    String line = null;
+    while ((line = reader.readLine()) != null) {
+      if (StringUtils.isNotBlank(line) && !line.startsWith("#")) {
+        // add non-blank lines and non-commented lines
+        patterns.add(line.trim());
+      }
+    }
+  }
+
+  /**
+   * Read formats file
+   *
+   * @param Reader
+   * @return void
+   */
+  protected synchronized void readFormats(Reader formatReader)
+    throws IOException {
+
+    if (formats.size() > 0) {
+      return;
+    }
+
+    // read the configuration file, line by line
+    BufferedReader reader = new BufferedReader(formatReader);
+    String line = null;
+    while ((line = reader.readLine()) != null) {
+      if (StringUtils.isNotBlank(line) && !line.startsWith("#")) {
+        // add non-blank lines and non-commented lines
+        formats.add(line.trim());
+      }
+    }
+  }
+
+  /**
+   * zzz
+   */
+  public Configuration getConf() {
+    return this.conf;
+  }
+
+  /**
+   * Obtains a portion of the body in which we're looking for a date string
+   *
+   * @param String text
+   * @return String
+   */
+  public String getFragment(String text) {
+    String head = "";
+    String tail = "";
+
+    int headFragmentLength = text.length();
+
+    if (headFragmentLength > maxFragmentLength) {
+      headFragmentLength = maxFragmentLength;
+    }
+
+    // Get the head of the text
+    head = text.substring(0, headFragmentLength);
+
+     // Check if we need to obtain the tail
+     if (text.length() > maxFragmentLength + headFragmentLength) {
+       tail = text.substring(text.length() - maxFragmentLength);
+     }
+
+    String fragment = head + "|||" + tail;
+
+    if (LOG.isDebugEnabled()) LOG.debug("Fragment: " + fragment);
+    return fragment;
+  }
+
+  /**
+   * Obtains a checked and valid date from the fragment
+   *
+   * @param String fragment
+   * @return String
+   */
+  public String obtainDate(String fragment) {
+    return obtainDate(fragment, "nl");
+  }
+
+  /**
+   * Obtains a checked and valid date from the fragment
+   *
+   * @param String fragment
+   * @param String language (ISO-639-2)
+   * @return String
+   */
+  public String obtainDate(String fragment, String language) {
+    // Extract a date string from the fragment
+    List<String> extractedDates = extractDate(fragment);
+
+    // List of parsed dates
+    List<Date> parsedDates = new ArrayList<Date>(); 
+
+    // Parsed date buffer
+    Date parsedDate;
+
+    // Did we find any?
+    if (extractedDates.size() == 0) {
+      // Bah
+      return null;
+    }
+
+    // Traverse over the list of extracted dates
+    for (String extractedDate : extractedDates) {
+      // Parse the extracted date string
+      parsedDate = parseDate(extractedDate, language);
+
+      // Did it work?
+      if (parsedDate != null) {
+        // Check if date is in future
+        if (ignoreFutureDate && parsedDate.after(today)) {
+
+          // Get out!
+          if (LOG.isDebugEnabled()) LOG.debug("Future date [" + formatDate(parsedDate) + "] ignored"); 
+          continue;
+        }
+
+        // Check if date is not before threshold
+        if (lowerDateThresHold.after(parsedDate)) {
+          // Get out!
+          if (LOG.isDebugEnabled()) LOG.debug("Ancient date [" + formatDate(parsedDate) + "] ignored");
+          continue;
+        }
+
+        // Add the successfully parsed date to the candidates
+        parsedDates.add(parsedDate);
+      }
+    }
+
+    // Did we parse any?
+    if (parsedDates.size() == 0) {
+      // Bah
+      return null;
+    }
+
+    // Get the proper format
+    String formattedDate = formatDate(parsedDates.get(0));
+
+    // Nice!
+    return formattedDate;
+  }
+
+  /**
+   * Extracts a date using regular expressions from the text fragment
+   *
+   * @param String text
+   * @return List<String>
+   */
+  public List<String> extractDate(String text) {
+    // The list of extracted dates we'll return
+    List<String> extractedDates = new ArrayList<String>();
+
+    // Traverse over the availble patterns
+    for (String pattern : patterns) {
+      // Compile and run the currect expression
+      Matcher matcher = Pattern.compile(pattern).matcher(text.toLowerCase());
+
+      // An index for multiple matches
+      int idx = 0;
+
+      // Check for stuff!
+      while (matcher.find(idx)) {
+        // Nice!{
+        LOG.debug("Extracted: [" + matcher.group() + "] using: " + pattern);
+        extractedDates.add(matcher.group());
+
+        // Shift the index
+        idx = idx + 1; // TODO FIX ME!
+      }
+    }
+
+    // Ok!
+    return extractedDates;
+  }
+
+  /**
+   * Attempt to parse the given date string
+   *
+   * @param String date
+   * @param String language (ISO-639-2)
+   * @return Date
+   */
+  public Date parseDate(String date, String language) {
+    // Set the locale
+    // TODO, pass locale to parseDate
+    // https://issues.apache.org/jira/browse/LANG-799
+    Locale.setDefault(new Locale(language));
+
+    try {
+      // Attempt to parse the string to a date
+      Date parsedDate = DateUtils.parseDateStrictly(date, formats.toArray(new String[formats.size()]));
+
+      // Ok, return the date
+      return parsedDate;
+    } catch (ParseException e) {
+      // Very bad but very common
+      LOG.debug("Unable to parse extracted date: " + date +
+        " using locale " + language + " for URL: " + url);
+      return null;
+    }
+  }
+
+  /**
+   * Return a proper formatted date string
+   *
+   * @param Date
+   * @return String
+   */
+  public String formatDate(Date date) {
+    // Return a proper formatted date string
+    return formattedDate.format(date);
+  }
+
+  /**
+   * Main!
+   */
+  public static void main(String[] args) throws Exception {
+    Configuration conf = NutchConfiguration.create();  
+    DateParseFilter dpf = new DateParseFilter();
+    dpf.setConf(conf);
+
+    BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
+    String line;
+    System.out.println("DateParseFilter test. Enter free text to extract dates.");
+    System.out.print("> ");
+    while((line = in.readLine()) != null) {
+      System.out.print(dpf.obtainDate(line) + "\n> ");
+    }
+  }
+}
