Index: ivy/ivy.xml
===================================================================
--- ivy/ivy.xml	(revision 1674078)
+++ ivy/ivy.xml	(working copy)
@@ -43,6 +43,8 @@
 		
 		<dependency org="commons-lang" name="commons-lang" rev="2.6"
 			conf="*->default" />
+		<dependency org="commons-validator" name="commons-validator" rev="1.4.1"
+			conf="*->default" />
 		<dependency org="commons-collections" name="commons-collections"
 			rev="3.1" conf="*->default" />
 		<dependency org="commons-httpclient" name="commons-httpclient"
Index: src/java/org/apache/nutch/tools/CommonCrawlDataDumper.java
===================================================================
--- src/java/org/apache/nutch/tools/CommonCrawlDataDumper.java	(revision 1674078)
+++ src/java/org/apache/nutch/tools/CommonCrawlDataDumper.java	(working copy)
@@ -49,6 +49,7 @@
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.io.FilenameUtils;
 
+import org.apache.commons.validator.routines.UrlValidator;
 //Hadoop
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
@@ -384,6 +385,12 @@
 					reader.getCurrentValue(content);
 					Metadata metadata = content.getMetadata();
 					String url = key.toString();
+					
+					UrlValidator urlValidator = new UrlValidator();
+					if (!urlValidator.isValid(url)) {
+						LOG.warn("Not valid URL detected: " + url);
+					}
+					
 					String baseName = FilenameUtils.getBaseName(url);
 					String extension = FilenameUtils.getExtension(url);
 					
