Index: pom.xml
===================================================================
--- pom.xml	(revision 1446836)
+++ pom.xml	(working copy)
@@ -161,6 +161,12 @@
 		</plugins>
 	</build>
 	<dependencies>
+        <dependency>
+			<groupId>org.mongodb</groupId>
+			<artifactId>mongo-java-driver</artifactId>
+			<version>2.10.1</version>
+			<optional>true</optional>
+		</dependency>
 		<dependency>
 			<groupId>org.apache.solr</groupId>
 			<artifactId>solr-solrj</artifactId>
Index: ivy/ivy.xml
===================================================================
--- ivy/ivy.xml	(revision 1446836)
+++ ivy/ivy.xml	(working copy)
@@ -30,6 +30,9 @@
 	</publications>
 
 	<dependencies>
+	    <!-- **** Required dependency for mongodb java driver (can be removed if mongodb is not needed) -->
+        <dependency org="org.mongodb" name="mongo-java-driver" rev="2.10.1" 
+            conf="*->default"/>
 		<dependency org="org.apache.solr" name="solr-solrj" rev="3.4.0"
 			conf="*->default"/>
 		<dependency org="org.slf4j" name="slf4j-log4j12" rev="1.6.1" conf="*->master" />
Index: src/bin/nutch
===================================================================
--- src/bin/nutch	(revision 1446836)
+++ src/bin/nutch	(working copy)
@@ -61,6 +61,7 @@
   echo "  updatedb          update crawl db from segments after fetching"
   echo "  invertlinks       create a linkdb from parsed segments"
   echo "  mergelinkdb       merge linkdb-s, with optional filtering"
+  echo "  mongodbindex        run the mongodb indexer on parsed segments and linkdb"
   echo "  solrindex         run the solr indexer on parsed segments and linkdb"
   echo "  solrdedup         remove duplicates from solr"
   echo "  solrclean         remove HTTP 301 and 404 documents from solr"
@@ -218,6 +219,8 @@
   CLASS=org.apache.nutch.crawl.LinkDb
 elif [ "$COMMAND" = "mergelinkdb" ] ; then
   CLASS=org.apache.nutch.crawl.LinkDbMerger
+elif [ "$COMMAND" = "mongodbindex" ] ; then
+  CLASS=org.apache.nutch.indexer.mongodb.MongoDbIndexer
 elif [ "$COMMAND" = "solrindex" ] ; then
   CLASS=org.apache.nutch.indexer.solr.SolrIndexer
 elif [ "$COMMAND" = "solrdedup" ] ; then
Index: src/java/org/apache/nutch/indexer/mongodb/MongoDbWriter.java
===================================================================
--- src/java/org/apache/nutch/indexer/mongodb/MongoDbWriter.java	(revision 0)
+++ src/java/org/apache/nutch/indexer/mongodb/MongoDbWriter.java	(revision 0)
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.indexer.mongodb;
+
+import java.io.IOException;
+import java.util.Date;
+import java.util.Map.Entry;
+
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.nutch.indexer.NutchDocument;
+import org.apache.nutch.indexer.NutchField;
+import org.apache.nutch.indexer.NutchIndexWriter;
+import org.apache.solr.common.util.DateUtil;
+
+import com.mongodb.BasicDBObject;
+import com.mongodb.DB;
+import com.mongodb.DBCollection;
+import com.mongodb.Mongo;
+
+public class MongoDbWriter implements NutchIndexWriter{
+
+  private Mongo mongo;
+
+  @Override
+  public void open(JobConf job, String name) throws IOException {
+    mongo = new Mongo(job.get(MongoDbConstants.SERVER_URL));	
+  }
+
+  @Override
+  public void write(NutchDocument doc) throws IOException {
+
+    // Connect to a mongodb database
+    DB db = mongo.getDB( "nutch" );
+    DBCollection col = db.getCollection("index");
+        
+    // Setup the mongodb db object
+    BasicDBObject mongoDoc = new BasicDBObject();
+
+    for(final Entry<String, NutchField> e : doc) {
+      for (final Object val : e.getValue().getValues()) {
+        String key;
+        // normalise the string representation for a Date
+        Object val2 = val;
+        if (val instanceof Date){
+          key = e.getKey();
+          val2 = DateUtil.getThreadLocalDateFormat().format(val);
+          mongoDoc.put(key, val2);
+        } else {
+          key = e.getKey();
+          mongoDoc.put(key, val);
+        }
+
+      }
+    }
+    // insert the document into mongodb
+    col.insert(mongoDoc);
+  }
+
+  @Override
+  public void close() throws IOException {
+    if ( mongo != null ) {
+      mongo.close();
+      mongo = null;
+    }
+  }
+
+	@Override
+  public void delete(String key) throws IOException {
+	  // TODO Auto-generated method stub
+	  
+  }
+
+}
Index: src/java/org/apache/nutch/indexer/mongodb/MongoDbConstants.java
===================================================================
--- src/java/org/apache/nutch/indexer/mongodb/MongoDbConstants.java	(revision 0)
+++ src/java/org/apache/nutch/indexer/mongodb/MongoDbConstants.java	(revision 0)
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.indexer.mongodb;
+
+public interface MongoDbConstants {
+  
+  public static final String MONGODB_PREFIX = "mongodb.";
+  
+  public static final String SERVER_URL = MONGODB_PREFIX + "server.url";
+
+}
Index: src/java/org/apache/nutch/indexer/mongodb/MongoDbIndexer.java
===================================================================
--- src/java/org/apache/nutch/indexer/mongodb/MongoDbIndexer.java	(revision 0)
+++ src/java/org/apache/nutch/indexer/mongodb/MongoDbIndexer.java	(revision 0)
@@ -0,0 +1,132 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.indexer.mongodb;
+
+import java.io.IOException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.util.StringUtils;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.nutch.indexer.IndexerMapReduce;
+import org.apache.nutch.indexer.NutchIndexWriterFactory;
+import org.apache.nutch.util.HadoopFSUtil;
+import org.apache.nutch.util.NutchConfiguration;
+import org.apache.nutch.util.NutchJob;
+import org.apache.nutch.util.TimingUtil;
+
+public class MongoDbIndexer extends Configured implements Tool {
+
+  public static Log LOG = LogFactory.getLog(MongoDbIndexer.class);
+
+  public MongoDbIndexer() {
+    super(null);
+  }
+
+  public MongoDbIndexer(Configuration conf) {
+    super(conf);
+  }
+
+  public void indexMongodb(String mongodbUrl, Path crawlDb, Path linkDb,
+      List<Path> segments) throws IOException {
+
+      SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+      long start = System.currentTimeMillis();
+      LOG.info("MongodbIndexer: starting at " + sdf.format(start));
+
+      final JobConf job = new NutchJob(getConf());
+      job.setJobName("index-mongodb " + mongodbUrl);
+
+      IndexerMapReduce.initMRJob(crawlDb, linkDb, segments, job);
+
+      job.set(MongoDbConstants.SERVER_URL, mongodbUrl);
+
+      NutchIndexWriterFactory.addClassToConf(job, MongoDbWriter.class);
+
+      job.setReduceSpeculativeExecution(false);
+
+      final Path tmp = new Path("tmp_" + System.currentTimeMillis() + "-" +
+        new Random().nextInt());
+
+      FileOutputFormat.setOutputPath(job, tmp);
+      try {
+        // run the job and write the records to infinite (this will be done via the rest api
+        JobClient.runJob(job);
+        long end = System.currentTimeMillis();
+        LOG.info("MongodbIndexer: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+      }
+      catch (Exception e){
+        LOG.error(e);
+      } finally {
+      FileSystem.get(job).delete(tmp, true);
+    }
+  }    
+
+  public int run(String[] args) throws Exception {
+    if (args.length < 4) {
+      System.err.println("Usage: MongodbIndexer <mongodb url> <crawldb> <linkdb> (<segment> ... | -dir <segments>)");
+      return -1;
+    }
+
+    final Path crawlDb = new Path(args[1]);
+    final Path linkDb = new Path(args[2]);
+
+    final List<Path> segments = new ArrayList<Path>();
+    for (int i = 3; i < args.length; i++) {
+      if (args[i].equals("-dir")) {
+        Path dir = new Path(args[++i]);
+        FileSystem fs = dir.getFileSystem(getConf());
+        FileStatus[] fstats = fs.listStatus(dir,
+        HadoopFSUtil.getPassDirectoriesFilter(fs));
+        Path[] files = HadoopFSUtil.getPaths(fstats);
+        for (Path p : files) {
+          segments.add(p);
+        }
+      } else {
+        segments.add(new Path(args[i]));
+      }
+    }
+
+    try {
+      indexMongodb(args[0], crawlDb, linkDb, segments);
+      return 0;
+    } catch (final Exception e) {
+      LOG.fatal("MongodbIndexer: " + StringUtils.stringifyException(e));
+      return -1;
+    }
+  }
+
+  // ./bin/nutch org.apache.nutch.indexer.mongodb.MongodbIndexer localhost crawldb crawldb/linkdb crawldb/segments/*
+  public static void main(String[] args) throws Exception {
+    final int res = ToolRunner.run(NutchConfiguration.create(), new MongoDbIndexer(), args);
+    System.exit(res);
+  }
+}
