Index: src/java/org/apache/nutch/net/UrlValidator.java
===================================================================
--- src/java/org/apache/nutch/net/UrlValidator.java	(revision 572258)
+++ src/java/org/apache/nutch/net/UrlValidator.java	(working copy)
@@ -1,354 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.nutch.net;
-
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-/**
- * <p>Validates URLs.</p>
- *
- * <p>Originally based in on php script by Debbie Dyer, validation.php v1.2b, Date: 03/07/02,
- * http://javascript.internet.com. However, this validation now bears little resemblance
- * to the php original.</p>
- * <pre>
- *   Example of usage:
- *    UrlValidator urlValidator = UrlValidator.get();
- *    if (urlValidator.isValid("ftp://foo.bar.com/")) {
- *       System.out.println("url is valid");
- *    } else {
- *       System.out.println("url is invalid");
- *    }
- *
- *   prints out "url is valid"
- *  </pre>
- *  
- * <p>Based on UrlValidator code from Apache commons-validator.</p>
- *
- * @see
- * <a href='http://www.ietf.org/rfc/rfc2396.txt' >
- *  Uniform Resource Identifiers (URI): Generic Syntax
- * </a>
- * 
- */
-public class UrlValidator {
-
-  private static final String ALPHA_CHARS = "a-zA-Z";
-
-  private static final String ALPHA_NUMERIC_CHARS = ALPHA_CHARS + "\\d";
-
-  private static final String SPECIAL_CHARS = ";/@&=,.?:+$";
-
-  private static final String VALID_CHARS = "[^\\s" + SPECIAL_CHARS + "]";
-
-  private static final String SCHEME_CHARS = ALPHA_CHARS;
-
-  // Drop numeric, and  "+-." for now
-  private static final String AUTHORITY_CHARS = ALPHA_NUMERIC_CHARS + "\\-\\.";
-
-  private static final String ATOM = VALID_CHARS + '+';
-
-  /**
-   * This expression derived/taken from the BNF for URI (RFC2396).
-   */
-  private static final Pattern URL_PATTERN =
-    Pattern.compile("^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?");
-
-  /**
-   * Schema/Protocol (ie. http:, ftp:, file:, etc).
-   */
-  private static final int PARSE_URL_SCHEME = 2;
-
-  /**
-   * Includes hostname/ip and port number.
-   */
-  private static final int PARSE_URL_AUTHORITY = 4;
-
-  private static final int PARSE_URL_PATH = 5;
-
-  private static final int PARSE_URL_QUERY = 7;
-
-  /**
-   * Protocol (ie. http:, ftp:,https:).
-   */
-  private static final Pattern SCHEME_PATTERN = 
-    Pattern.compile("^[" + SCHEME_CHARS + "]+");
-
-  private static final Pattern AUTHORITY_PATTERN =
-    Pattern.compile("^([" + AUTHORITY_CHARS + "]*)(:\\d*)?(.*)?");
-
-  private static final int PARSE_AUTHORITY_HOST_IP = 1;
-
-  private static final int PARSE_AUTHORITY_PORT = 2;
-
-  /**
-   * Should always be empty.
-   */
-  private static final int PARSE_AUTHORITY_EXTRA = 3;
-
-  private static final Pattern PATH_PATTERN = 
-    Pattern.compile("^(/[-\\w:@&?=+,.!/~*'%$_;\\(\\)]*)?$");
-
-  private static final Pattern QUERY_PATTERN = Pattern.compile("^(.*)$");
-
-  private static final Pattern LEGAL_ASCII_PATTERN = 
-    Pattern.compile("^[\\x21-\\x7E]+$");
-
-  private static final Pattern IP_V4_DOMAIN_PATTERN =
-    Pattern.compile("^(\\d{1,3})[.](\\d{1,3})[.](\\d{1,3})[.](\\d{1,3})$");
-
-  private static final Pattern DOMAIN_PATTERN =
-    Pattern.compile("^" + ATOM + "(\\." + ATOM + ")*$");
-
-  private static final Pattern PORT_PATTERN = 
-    Pattern.compile("^:(\\d{1,5})$");
-
-  private static final Pattern ATOM_PATTERN = 
-    Pattern.compile("(" + ATOM + ")");
-
-  private static final Pattern ALPHA_PATTERN = 
-    Pattern.compile("^[" + ALPHA_CHARS + "]");
-  
-  private static final UrlValidator VALIDATOR = new UrlValidator();
-
-  private UrlValidator() { 
-  }
-  
-  public static UrlValidator get() {
-    return VALIDATOR;
-  }
-
-  /**
-   * <p>Checks if a field has a valid url address.</p>
-   *
-   * @param value The value validation is being performed on.  A <code>null</code>
-   * value is considered invalid.
-   * @return true if the url is valid.
-   */
-  public boolean isValid(String value) {
-    if (value == null) {
-      return false;
-    }
-
-    Matcher matchUrlPat = URL_PATTERN.matcher(value);
-    if (!LEGAL_ASCII_PATTERN.matcher(value).matches()) {
-      return false;
-    }
-
-    // Check the whole url address structure
-    if (!matchUrlPat.matches()) {
-      return false;
-    }
-
-    if (!isValidScheme(matchUrlPat.group(PARSE_URL_SCHEME))) {
-      return false;
-    }
-
-    if (!isValidAuthority(matchUrlPat.group(PARSE_URL_AUTHORITY))) {
-      return false;
-    }
-
-    if (!isValidPath(matchUrlPat.group(PARSE_URL_PATH))) {
-      return false;
-    }
-
-    if (!isValidQuery(matchUrlPat.group(PARSE_URL_QUERY))) {
-      return false;
-    }
-
-    return true;
-  }
-
-  /**
-   * Validate scheme. If schemes[] was initialized to a non null,
-   * then only those scheme's are allowed.  Note this is slightly different
-   * than for the constructor.
-   * @param scheme The scheme to validate.  A <code>null</code> value is considered
-   * invalid.
-   * @return true if valid.
-   */
-  protected boolean isValidScheme(String scheme) {
-    if (scheme == null) {
-      return false;
-    }
-
-    return SCHEME_PATTERN.matcher(scheme).matches();
-  }
-
-  /**
-   * Returns true if the authority is properly formatted.  An authority is the combination
-   * of hostname and port.  A <code>null</code> authority value is considered invalid.
-   * @param authority Authority value to validate.
-   * @return true if authority (hostname and port) is valid.
-   */
-  protected boolean isValidAuthority(String authority) {
-    if (authority == null) {
-      return false;
-    }
-
-    Matcher authorityMatcher = AUTHORITY_PATTERN.matcher(authority);
-    if (!authorityMatcher.matches()) {
-      return false;
-    }
-
-    boolean ipV4Address = false;
-    boolean hostname = false;
-    // check if authority is IP address or hostname
-    String hostIP = authorityMatcher.group(PARSE_AUTHORITY_HOST_IP);
-    Matcher matchIPV4Pat = IP_V4_DOMAIN_PATTERN.matcher(hostIP);
-    ipV4Address = matchIPV4Pat.matches();
-
-    if (ipV4Address) {
-      // this is an IP address so check components
-      for (int i = 1; i <= 4; i++) {
-        String ipSegment = matchIPV4Pat.group(i);
-        if (ipSegment == null || ipSegment.length() <= 0) {
-          return false;
-        }
-
-        try {
-          if (Integer.parseInt(ipSegment) > 255) {
-            return false;
-          }
-        } catch(NumberFormatException e) {
-          return false;
-        }
-
-      }
-    } else {
-      // Domain is hostname name
-      hostname = DOMAIN_PATTERN.matcher(hostIP).matches();
-    }
-
-    // rightmost hostname will never start with a digit.
-    if (hostname) {
-      // LOW-TECH FIX FOR VALIDATOR-202
-      // TODO: Rewrite to use ArrayList and .add semantics: see VALIDATOR-203
-      char[] chars = hostIP.toCharArray();
-      int size = 1;
-      for(int i=0; i<chars.length; i++) {
-        if(chars[i] == '.') {
-          size++;
-        }
-      }
-      String[] domainSegment = new String[size];
-      int segCount = 0;
-      int segLen = 0;
-      Matcher atomMatcher = ATOM_PATTERN.matcher(hostIP);
-
-      while (atomMatcher.find()) {
-        domainSegment[segCount] = atomMatcher.group();
-        segLen = domainSegment[segCount].length() + 1;
-        hostIP = (segLen >= hostIP.length()) ? "" 
-                                             : hostIP.substring(segLen);
-        segCount++;
-      }
-      String topLevel = domainSegment[segCount - 1];
-      if (topLevel.length() < 2 || topLevel.length() > 4) {
-        return false;
-      }
-
-      // First letter of top level must be a alpha
-      if (!ALPHA_PATTERN.matcher(topLevel.substring(0, 1)).matches()) {
-        return false;
-      }
-
-      // Make sure there's a host name preceding the authority.
-      if (segCount < 2) {
-        return false;
-      }
-    }
-
-    if (!hostname && !ipV4Address) {
-      return false;
-    }
-
-    String port = authorityMatcher.group(PARSE_AUTHORITY_PORT);
-    if (port != null) {
-      if (!PORT_PATTERN.matcher(port).matches()) {
-        return false;
-      }
-    }
-
-    String extra = authorityMatcher.group(PARSE_AUTHORITY_EXTRA);
-    return isBlankOrNull(extra);
-  }
-
-  /**
-   * <p>Checks if the field isn't null and length of the field is greater 
-   * than zero not including whitespace.</p>
-   *
-   * @param value The value validation is being performed on.
-   * @return true if blank or null.
-   */
-  private boolean isBlankOrNull(String value) {
-    return ((value == null) || (value.trim().length() == 0));
-  }
-
-  /**
-   * Returns true if the path is valid.  A <code>null</code> value is considered invalid.
-   * @param path Path value to validate.
-   * @return true if path is valid.
-   */
-  protected boolean isValidPath(String path) {
-    if (path == null) {
-      return false;
-    }
-
-    if (!PATH_PATTERN.matcher(path).matches()) {
-      return false;
-    }
-
-    int slash2Count = countToken("//", path);
-    int slashCount = countToken("/", path);
-    int dot2Count = countToken("..", path);
-    
-    return (dot2Count <= 0) || ((slashCount - slash2Count - 1) > dot2Count);
-  }
-
-  /**
-   * Returns true if the query is null or it's a properly formatted query string.
-   * @param query Query value to validate.
-   * @return true if query is valid.
-   */
-  protected boolean isValidQuery(String query) {
-    if (query == null) {
-      return true;
-    }
-
-    return QUERY_PATTERN.matcher(query).matches();
-  }
-
-  /**
-   * Returns the number of times the token appears in the target.
-   * @param token Token value to be counted.
-   * @param target Target value to count tokens in.
-   * @return the number of tokens.
-   */
-  protected int countToken(String token, String target) {
-    int tokenIndex = 0;
-    int count = 0;
-    while (tokenIndex != -1) {
-      tokenIndex = target.indexOf(token, tokenIndex);
-      if (tokenIndex > -1) {
-        tokenIndex++;
-        count++;
-      }
-    }
-    return count;
-  }
-}
Index: src/java/org/apache/nutch/crawl/Injector.java
===================================================================
--- src/java/org/apache/nutch/crawl/Injector.java	(revision 572335)
+++ src/java/org/apache/nutch/crawl/Injector.java	(working copy)
@@ -52,7 +52,6 @@
     private URLFilters filters;
     private ScoringFilters scfilters;
     private long curTime;
-    final UrlValidator validator = UrlValidator.get();
 
     public void configure(JobConf job) {
       this.jobConf = job;
@@ -73,10 +72,7 @@
       String url = value.toString();              // value is line of text
       // System.out.println("url: " +url);
       try {
-        url = urlNormalizers.normalize(url, URLNormalizers.SCOPE_INJECT);       // normalize the url
-        if (!validator.isValid(url)) {
-    	  return;
-        }
+        url = urlNormalizers.normalize(url, URLNormalizers.SCOPE_INJECT);
         url = filters.filter(url);             // filter the url
       } catch (Exception e) {
         if (LOG.isWarnEnabled()) { LOG.warn("Skipping " +url+":"+e); }
Index: src/java/org/apache/nutch/parse/ParseOutputFormat.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseOutputFormat.java	(revision 572335)
+++ src/java/org/apache/nutch/parse/ParseOutputFormat.java	(working copy)
@@ -81,7 +81,6 @@
 
     this.filters = new URLFilters(job);
     this.scfilters = new ScoringFilters(job);
-    final UrlValidator validator = UrlValidator.get();
     final int interval = job.getInt("db.fetch.interval.default", 2592000);
     final boolean ignoreExternalLinks = job.getBoolean("db.ignore.external.links", false);
     final int maxOutlinks = job.getInt("db.max.outlinks.per.page", 100);
@@ -164,9 +163,6 @@
                 continue; // skip it
               }
             }
-            if (!validator.isValid(toUrl)) {
-              continue;
-            }
             try {
               // normalizing here is not necessary since outlinks 
               // are already normalized in Outlink's constructor
Index: src/plugin/urlfilter-validator/src/java/org/apache/nutch/urlfilter/validator/UrlValidator.java
===================================================================
--- src/plugin/urlfilter-validator/src/java/org/apache/nutch/urlfilter/validator/UrlValidator.java	(revision 0)
+++ src/plugin/urlfilter-validator/src/java/org/apache/nutch/urlfilter/validator/UrlValidator.java	(revision 0)
@@ -0,0 +1,371 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.urlfilter.validator;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.net.URLFilter;
+
+/**
+ * <p>Validates URLs.</p>
+ *
+ * <p>Originally based in on php script by Debbie Dyer, validation.php v1.2b,
+ * Date: 03/07/02,
+ * http://javascript.internet.com. However, this validation now bears little
+ * resemblance to the php original.</p>
+ * <pre>
+ *   Example of usage:
+ *    UrlValidator urlValidator = UrlValidator.get();
+ *    if (urlValidator.isValid("ftp://foo.bar.com/")) {
+ *       System.out.println("url is valid");
+ *    } else {
+ *       System.out.println("url is invalid");
+ *    }
+ *
+ *   prints out "url is valid"
+ *  </pre>
+ *
+ * <p>Based on UrlValidator code from Apache commons-validator.</p>
+ *
+ * @see
+ * <a href='http://www.ietf.org/rfc/rfc2396.txt' >
+ *  Uniform Resource Identifiers (URI): Generic Syntax
+ * </a>
+ *
+ */
+public class UrlValidator implements URLFilter {
+
+  private static final String ALPHA_CHARS = "a-zA-Z";
+
+  private static final String ALPHA_NUMERIC_CHARS = ALPHA_CHARS + "\\d";
+
+  private static final String SPECIAL_CHARS = ";/@&=,.?:+$";
+
+  private static final String VALID_CHARS = "[^\\s" + SPECIAL_CHARS + "]";
+
+  private static final String SCHEME_CHARS = ALPHA_CHARS;
+
+  // Drop numeric, and  "+-." for now
+  private static final String AUTHORITY_CHARS = ALPHA_NUMERIC_CHARS + "\\-\\.";
+
+  private static final String ATOM = VALID_CHARS + '+';
+
+  /**
+   * This expression derived/taken from the BNF for URI (RFC2396).
+   */
+  private static final Pattern URL_PATTERN =
+    Pattern.compile("^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)" +
+                    "(\\?([^#]*))?(#(.*))?");
+
+  /**
+   * Schema/Protocol (ie. http:, ftp:, file:, etc).
+   */
+  private static final int PARSE_URL_SCHEME = 2;
+
+  /**
+   * Includes hostname/ip and port number.
+   */
+  private static final int PARSE_URL_AUTHORITY = 4;
+
+  private static final int PARSE_URL_PATH = 5;
+
+  private static final int PARSE_URL_QUERY = 7;
+
+  /**
+   * Protocol (ie. http:, ftp:,https:).
+   */
+  private static final Pattern SCHEME_PATTERN =
+    Pattern.compile("^[" + SCHEME_CHARS + "]+");
+
+  private static final Pattern AUTHORITY_PATTERN =
+    Pattern.compile("^([" + AUTHORITY_CHARS + "]*)(:\\d*)?(.*)?");
+
+  private static final int PARSE_AUTHORITY_HOST_IP = 1;
+
+  private static final int PARSE_AUTHORITY_PORT = 2;
+
+  /**
+   * Should always be empty.
+   */
+  private static final int PARSE_AUTHORITY_EXTRA = 3;
+
+  private static final Pattern PATH_PATTERN =
+    Pattern.compile("^(/[-\\w:@&?=+,.!/~*'%$_;\\(\\)]*)?$");
+
+  private static final Pattern QUERY_PATTERN = Pattern.compile("^(.*)$");
+
+  private static final Pattern LEGAL_ASCII_PATTERN =
+    Pattern.compile("^[\\x21-\\x7E]+$");
+
+  private static final Pattern IP_V4_DOMAIN_PATTERN =
+    Pattern.compile("^(\\d{1,3})[.](\\d{1,3})[.](\\d{1,3})[.](\\d{1,3})$");
+
+  private static final Pattern DOMAIN_PATTERN =
+    Pattern.compile("^" + ATOM + "(\\." + ATOM + ")*$");
+
+  private static final Pattern PORT_PATTERN =
+    Pattern.compile("^:(\\d{1,5})$");
+
+  private static final Pattern ATOM_PATTERN =
+    Pattern.compile("(" + ATOM + ")");
+
+  private static final Pattern ALPHA_PATTERN =
+    Pattern.compile("^[" + ALPHA_CHARS + "]");
+
+  private Configuration conf;
+
+  @Override
+  public String filter(String urlString) {
+    return isValid(urlString) ? urlString : null;
+  }
+
+  @Override
+  public Configuration getConf() {
+    return conf;
+  }
+
+  @Override
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+  }
+
+  /**
+   * <p>Checks if a field has a valid url address.</p>
+   *
+   * @param value The value validation is being performed on.
+   * A <code>null</code> value is considered invalid.
+   * @return true if the url is valid.
+   */
+  private boolean isValid(String value) {
+    if (value == null) {
+      return false;
+    }
+
+    Matcher matchUrlPat = URL_PATTERN.matcher(value);
+    if (!LEGAL_ASCII_PATTERN.matcher(value).matches()) {
+      return false;
+    }
+
+    // Check the whole url address structure
+    if (!matchUrlPat.matches()) {
+      return false;
+    }
+
+    if (!isValidScheme(matchUrlPat.group(PARSE_URL_SCHEME))) {
+      return false;
+    }
+
+    if (!isValidAuthority(matchUrlPat.group(PARSE_URL_AUTHORITY))) {
+      return false;
+    }
+
+    if (!isValidPath(matchUrlPat.group(PARSE_URL_PATH))) {
+      return false;
+    }
+
+    if (!isValidQuery(matchUrlPat.group(PARSE_URL_QUERY))) {
+      return false;
+    }
+
+    return true;
+  }
+
+  /**
+   * Validate scheme. If schemes[] was initialized to a non null,
+   * then only those scheme's are allowed.  Note this is slightly different
+   * than for the constructor.
+   * @param scheme The scheme to validate.  A <code>null</code> value is
+   * considered invalid.
+   * @return true if valid.
+   */
+  private boolean isValidScheme(String scheme) {
+    if (scheme == null) {
+      return false;
+    }
+
+    return SCHEME_PATTERN.matcher(scheme).matches();
+  }
+
+  /**
+   * Returns true if the authority is properly formatted.  An authority is
+   * the combination of hostname and port.  A <code>null</code> authority
+   * value is considered invalid.
+   * @param authority Authority value to validate.
+   * @return true if authority (hostname and port) is valid.
+   */
+  private boolean isValidAuthority(String authority) {
+    if (authority == null) {
+      return false;
+    }
+
+    Matcher authorityMatcher = AUTHORITY_PATTERN.matcher(authority);
+    if (!authorityMatcher.matches()) {
+      return false;
+    }
+
+    boolean ipV4Address = false;
+    boolean hostname = false;
+    // check if authority is IP address or hostname
+    String hostIP = authorityMatcher.group(PARSE_AUTHORITY_HOST_IP);
+    Matcher matchIPV4Pat = IP_V4_DOMAIN_PATTERN.matcher(hostIP);
+    ipV4Address = matchIPV4Pat.matches();
+
+    if (ipV4Address) {
+      // this is an IP address so check components
+      for (int i = 1; i <= 4; i++) {
+        String ipSegment = matchIPV4Pat.group(i);
+        if (ipSegment == null || ipSegment.length() <= 0) {
+          return false;
+        }
+
+        try {
+          if (Integer.parseInt(ipSegment) > 255) {
+            return false;
+          }
+        } catch(NumberFormatException e) {
+          return false;
+        }
+
+      }
+    } else {
+      // Domain is hostname name
+      hostname = DOMAIN_PATTERN.matcher(hostIP).matches();
+    }
+
+    // rightmost hostname will never start with a digit.
+    if (hostname) {
+      // LOW-TECH FIX FOR VALIDATOR-202
+      // TODO: Rewrite to use ArrayList and .add semantics: see VALIDATOR-203
+      char[] chars = hostIP.toCharArray();
+      int size = 1;
+      for(int i=0; i<chars.length; i++) {
+        if(chars[i] == '.') {
+          size++;
+        }
+      }
+      String[] domainSegment = new String[size];
+      int segCount = 0;
+      int segLen = 0;
+      Matcher atomMatcher = ATOM_PATTERN.matcher(hostIP);
+
+      while (atomMatcher.find()) {
+        domainSegment[segCount] = atomMatcher.group();
+        segLen = domainSegment[segCount].length() + 1;
+        hostIP = (segLen >= hostIP.length()) ? ""
+                                             : hostIP.substring(segLen);
+        segCount++;
+      }
+      String topLevel = domainSegment[segCount - 1];
+      if (topLevel.length() < 2 || topLevel.length() > 4) {
+        return false;
+      }
+
+      // First letter of top level must be a alpha
+      if (!ALPHA_PATTERN.matcher(topLevel.substring(0, 1)).matches()) {
+        return false;
+      }
+
+      // Make sure there's a host name preceding the authority.
+      if (segCount < 2) {
+        return false;
+      }
+    }
+
+    if (!hostname && !ipV4Address) {
+      return false;
+    }
+
+    String port = authorityMatcher.group(PARSE_AUTHORITY_PORT);
+    if (port != null) {
+      if (!PORT_PATTERN.matcher(port).matches()) {
+        return false;
+      }
+    }
+
+    String extra = authorityMatcher.group(PARSE_AUTHORITY_EXTRA);
+    return isBlankOrNull(extra);
+  }
+
+  /**
+   * <p>Checks if the field isn't null and length of the field is greater
+   * than zero not including whitespace.</p>
+   *
+   * @param value The value validation is being performed on.
+   * @return true if blank or null.
+   */
+  private boolean isBlankOrNull(String value) {
+    return ((value == null) || (value.trim().length() == 0));
+  }
+
+  /**
+   * Returns true if the path is valid.  A <code>null</code> value is
+   * considered invalid.
+   * @param path Path value to validate.
+   * @return true if path is valid.
+   */
+  private boolean isValidPath(String path) {
+    if (path == null) {
+      return false;
+    }
+
+    if (!PATH_PATTERN.matcher(path).matches()) {
+      return false;
+    }
+
+    int slash2Count = countToken("//", path);
+    int slashCount = countToken("/", path);
+    int dot2Count = countToken("..", path);
+
+    return (dot2Count <= 0) || ((slashCount - slash2Count - 1) > dot2Count);
+  }
+
+  /**
+   * Returns true if the query is null or it's a properly formatted query
+   * string.
+   * @param query Query value to validate.
+   * @return true if query is valid.
+   */
+  private boolean isValidQuery(String query) {
+    if (query == null) {
+      return true;
+    }
+
+    return QUERY_PATTERN.matcher(query).matches();
+  }
+
+  /**
+   * Returns the number of times the token appears in the target.
+   * @param token Token value to be counted.
+   * @param target Target value to count tokens in.
+   * @return the number of tokens.
+   */
+  private int countToken(String token, String target) {
+    int tokenIndex = 0;
+    int count = 0;
+    while (tokenIndex != -1) {
+      tokenIndex = target.indexOf(token, tokenIndex);
+      if (tokenIndex > -1) {
+        tokenIndex++;
+        count++;
+      }
+    }
+    return count;
+  }
+
+}
Index: src/plugin/urlfilter-validator/src/java/org/apache/nutch/urlfilter/validator/package.html
===================================================================
--- src/plugin/urlfilter-validator/src/java/org/apache/nutch/urlfilter/validator/package.html	(revision 0)
+++ src/plugin/urlfilter-validator/src/java/org/apache/nutch/urlfilter/validator/package.html	(revision 0)
@@ -0,0 +1,9 @@
+<html>
+<body>
+<p>A url filter plugin that validates given urls.</p>
+<p>This plugin runs a series of tests for the given url to make sure that given
+url is valid and 'fetchable'.</p>
+<p>Note: This plugin should <b>only</b> be used for web-related protocols such
+as http, https and ftp.</p>
+</body>
+</html>
Index: src/plugin/urlfilter-validator/plugin.xml
===================================================================
--- src/plugin/urlfilter-validator/plugin.xml	(revision 0)
+++ src/plugin/urlfilter-validator/plugin.xml	(revision 0)
@@ -0,0 +1,41 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<plugin
+   id="urlfilter-validator"
+   name="URL Validator"
+   version="1.0.0"
+   provider-name="nutch.org">
+
+   <runtime>
+      <library name="urlfilter-validator.jar">
+         <export name="*"/>
+      </library>
+   </runtime>
+
+   <requires>
+      <import plugin="nutch-extensionpoints"/>
+   </requires>
+
+   <extension id="org.apache.nutch.net.urlfilter.validator"
+              name="Nutch URL Validatorr"
+              point="org.apache.nutch.net.URLFilter">
+      <implementation id="URLValidator"
+                      class="org.apache.nutch.urlfilter.validator.UrlValidator"/>
+   </extension>
+
+</plugin>
Index: src/plugin/urlfilter-validator/build.xml
===================================================================
--- src/plugin/urlfilter-validator/build.xml	(revision 0)
+++ src/plugin/urlfilter-validator/build.xml	(revision 0)
@@ -0,0 +1,22 @@
+<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<project name="urlfilter-validator" default="jar-core">
+
+  <import file="../build-plugin.xml"/>
+
+</project>
Index: src/plugin/build.xml
===================================================================
--- src/plugin/build.xml	(revision 572258)
+++ src/plugin/build.xml	(working copy)
@@ -73,6 +73,7 @@
      <ant dir="urlfilter-prefix" target="deploy"/>
      <ant dir="urlfilter-regex" target="deploy"/>
      <ant dir="urlfilter-suffix" target="deploy"/>
+     <ant dir="urlfilter-validator" target="deploy"/>
      <ant dir="urlnormalizer-basic" target="deploy"/>
      <ant dir="urlnormalizer-pass" target="deploy"/>
      <ant dir="urlnormalizer-regex" target="deploy"/>
@@ -164,6 +165,7 @@
     <ant dir="urlfilter-prefix" target="clean"/>
     <ant dir="urlfilter-regex" target="clean"/>
     <ant dir="urlfilter-suffix" target="clean"/>
+    <ant dir="urlfilter-validator" target="clean"/>
     <ant dir="urlnormalizer-basic" target="clean"/>
     <ant dir="urlnormalizer-pass" target="clean"/>
     <ant dir="urlnormalizer-regex" target="clean"/>
