Index: src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java
===================================================================
--- src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java	(revision 1237621)
+++ src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java	(working copy)
@@ -40,13 +40,20 @@
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.Text;
 
+import org.apache.commons.lang.StringUtils;
+
 import java.text.ParseException;
 import java.text.SimpleDateFormat;
 
+import java.io.BufferedReader;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringReader;
 import java.util.Date;
 import java.util.regex.*;
+import java.util.HashMap;
 
-
 import org.apache.commons.lang.time.DateUtils;
 
 /**
@@ -71,6 +78,10 @@
   /** Get the MimeTypes resolver instance. */
   private MimeUtil MIME;
 
+  /** Map for mime-type substitution */
+  private HashMap<String,String> mimeMap = null;
+  private boolean mapMimes = false;
+
   public NutchDocument filter(NutchDocument doc, Parse parse, Text url, CrawlDatum datum, Inlinks inlinks)
     throws IndexingException {
 
@@ -187,6 +198,7 @@
   private NutchDocument addType(NutchDocument doc, ParseData data, String url) {
     String mimeType = null;
     String contentType = data.getMeta(Response.CONTENT_TYPE);
+
     if (contentType == null) {
       // Note by Jerome Charron on 20050415:
       // Content Type not solved by a previous plugin
@@ -209,6 +221,15 @@
       return doc;
     }
 
+    // Check if we have to map mime types
+    if (mapMimes) {
+      // Check if the current mime is mapped
+      if (mimeMap.containsKey(mimeType)) {
+        // It's mapped, let's replace it
+        mimeType = mimeMap.get(mimeType);
+      }
+    }
+
     contentType = mimeType;
 
     doc.add("type", contentType);
@@ -280,10 +301,44 @@
   public void setConf(Configuration conf) {
     this.conf = conf;
     MIME = new MimeUtil(conf);
+    //
+    if (conf.getBoolean("moreIndexingFilter.mapMimeTypes", false) == true) {
+      mapMimes = true;
+
+      // Load the mapping
+      try {
+        readConfiguration();
+      } catch (Exception e) {
+        LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
+      }
+    }
   }
 
   public Configuration getConf() {
     return this.conf;
   }
 
+
+  private void readConfiguration() throws IOException {
+    BufferedReader reader = new BufferedReader(conf.getConfResourceAsReader("contenttype-mapping.txt"));
+    String line;
+    String parts[];
+
+    mimeMap = new HashMap<String,String>();
+
+    while ((line = reader.readLine()) != null) {
+      if (StringUtils.isNotBlank(line) && !line.startsWith("#")) {
+        line.trim();
+        parts = line.split("\t");
+
+        // Must be at least two parts
+        if (parts.length > 1) {
+          for (int i = 1; i < parts.length; i++) {
+            mimeMap.put(parts[i].trim(), parts[0].trim());
+          }
+        }
+      }
+    }
+  }
+
 }
Index: conf/contenttype-mapping.txt
===================================================================
--- conf/contenttype-mapping.txt	(revision 0)
+++ conf/contenttype-mapping.txt	(revision 0)
@@ -0,0 +1,2 @@
+# Target content type <TAB> type1 [<TAB> type2 ...]
+text/html	application/xhtml+xml
