Index: conf/nutch-default.xml
===================================================================
--- conf/nutch-default.xml	(revision 1620951)
+++ conf/nutch-default.xml	(working copy)
@@ -1193,6 +1193,17 @@
   </description>
 </property>
 
+<property>
+   <name>solr.exclude.from.dedup.regex.file</name>
+   <value>regex-exclude-urls-from-dedup.txt</value>
+   <description>
+      Holds the file name of the file containing any regular expressions specifying URLs (ids) to be excluded from the Solr Deduplication process.
+      I.e., any URL matching one of the regular expressions will not be subject to potential deduplication.
+      Each pattern string must start on its own line with a "-" character at the beginning - all other lines will be ignored.
+      Also, the URLs must match the entire pattern.
+   </description>
+</property>
+
 <!-- elasticsearch index properties -->
 <property>
   <name>elastic.host</name>
Index: src/java/org/apache/nutch/indexer/solr/SolrConstants.java
===================================================================
--- src/java/org/apache/nutch/indexer/solr/SolrConstants.java	(revision 1620951)
+++ src/java/org/apache/nutch/indexer/solr/SolrConstants.java	(working copy)
@@ -26,6 +26,10 @@
   public static final String COMMIT_INDEX = SOLR_PREFIX + "commit.index";
 
   public static final String MAPPING_FILE = SOLR_PREFIX + "mapping.file";
+  
+  public static final String EXCLUDE_FROM_DEDUP_REGEX_FILE = SOLR_PREFIX + "exclude.from.dedup.regex.file";
+  
+  public static final String EXCLUDE_FROM_DEDUP_PATTERN = SOLR_PREFIX + "exclude.from.dedup.pattern";
 
   public static final String USE_AUTH = SOLR_PREFIX + "auth";
 
Index: src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java
===================================================================
--- src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java	(revision 1620951)
+++ src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java	(working copy)
@@ -24,6 +24,9 @@
 import java.util.Date;
 import java.util.Iterator;
 import java.util.List;
+import java.util.regex.Pattern;
+import java.io.Reader;
+import java.io.BufferedReader;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -294,6 +297,7 @@
     }
   }
 
+
   private Configuration conf;
 
   private SolrServer solr;
@@ -301,7 +305,10 @@
   private int numDeletes = 0;
 
   private UpdateRequest updateRequest = new UpdateRequest();
+  
+  private Pattern idExcludePattern;
 
+
   @Override
   public Configuration getConf() {
     return conf;
@@ -315,6 +322,13 @@
   @Override
   public void setup(Context job) throws IOException {
     Configuration conf = job.getConfiguration();
+    
+    String idExcludePatternString = conf.get(SolrConstants.EXCLUDE_FROM_DEDUP_PATTERN,null);
+    
+    if (idExcludePatternString != null){
+      idExcludePattern = Pattern.compile(idExcludePatternString);
+    }
+       
     solr = SolrUtils.getHttpSolrServer(conf);
   }
 
@@ -337,6 +351,11 @@
   throws IOException {
     Iterator<SolrRecord> iterator = values.iterator();
     SolrRecord recordToKeep = iterator.next();
+    
+    if (shouldSkipRecord(recordToKeep)){
+      return;
+    }
+    
     while (iterator.hasNext()) {
       SolrRecord solrRecord = iterator.next();
       if (solrRecord.getBoost() > recordToKeep.getBoost() ||
@@ -359,6 +378,17 @@
       }
     }
   }
+  
+  public boolean shouldSkipRecord(SolrRecord recordToKeep){
+    //skip any records with IDs matching the pattern specified in configuration
+    //for exclusion
+    if (idExcludePattern!= null && idExcludePattern.matcher(recordToKeep.id).matches()){
+      LOG.info("Skipping: {} as it matched pattern for exclusion from dedup.", recordToKeep.id);
+      return true;
+    }        
+      
+    return false;
+  }
 
   public boolean dedup(String solrUrl)
   throws IOException, InterruptedException, ClassNotFoundException {
@@ -367,6 +397,8 @@
     
     getConf().set(SolrConstants.SERVER_URL, solrUrl);
     
+    setIdExcludePattern();
+    
     Job job = new Job(getConf(), "solrdedup");
 
     job.setInputFormatClass(SolrInputFormat.class);
@@ -378,6 +410,60 @@
 
     return job.waitForCompletion(true);    
   }
+  
+  public void setIdExcludePattern() throws IOException{
+      String regexFileName = getConf().get(SolrConstants.EXCLUDE_FROM_DEDUP_REGEX_FILE, null);
+      LOG.info("Regex. ID exclude pattern file name: {}", regexFileName);
+      
+      if (regexFileName != null && !regexFileName.trim().isEmpty()){
+          Reader confReader = getConf().getConfResourceAsReader(regexFileName);
+          
+          String idExcludePatternString = createPatternStringFromConfiguration(confReader);
+          
+          LOG.info("Full exclude ID pattern string: {}", idExcludePatternString);
+          
+          if (idExcludePatternString != null){
+              getConf().set(SolrConstants.EXCLUDE_FROM_DEDUP_PATTERN, idExcludePatternString);      
+          }
+      }
+  }
+  
+  public String createPatternStringFromConfiguration(Reader rawConfReader) throws IOException{
+      BufferedReader confReader = new BufferedReader(rawConfReader);
+      String line;
+      final String patternLineIndicator = "-";   
+      List<String> patternStrings = new ArrayList<String>();
+      
+      while ( (line=confReader.readLine()) != null ) {
+        if (!line.trim().isEmpty() && line.startsWith("-")) {
+          patternStrings.add( line.substring(1) );
+        }
+      }
+      
+      return getCombinedPatternString(patternStrings);
+  }
+  
+  public String getCombinedPatternString(List<String> patternStrings){
+      if (patternStrings.isEmpty()){
+          return null;
+      }
+      
+      StringBuilder combinedPatternString = new StringBuilder();
+            
+      boolean first = true;
+      
+      for (String patternString : patternStrings){
+          if (first){
+              first = false;
+          } else{
+              combinedPatternString.append("|");
+          }
+          
+          combinedPatternString.append(patternString);
+      }      
+      
+      return combinedPatternString.toString();
+  }
 
   public int run(String[] args)
   throws IOException, InterruptedException, ClassNotFoundException {
