Index: build.xml
===================================================================
--- build.xml	(revision 1669716)
+++ build.xml	(working copy)
@@ -990,7 +990,7 @@
         <source path="${plugins.dir}/lib-http/src/java/" />
         <source path="${plugins.dir}/lib-http/src/test/" />
         <source path="${plugins.dir}/lib-selenium/src/java/" />
-        <source path="${plugins.dir}/lib-selenium/src/test/" />
+        <!--<source path="${plugins.dir}/lib-selenium/src/test/" />-->
         <source path="${plugins.dir}/lib-regex-filter/src/java/" />
         <source path="${plugins.dir}/lib-regex-filter/src/test/" />
         <source path="${plugins.dir}/microformats-reltag/src/java/" />
@@ -1015,7 +1015,7 @@
         <source path="${plugins.dir}/protocol-http/src/java/" />
         <source path="${plugins.dir}/protocol-http/src/test/" />
         <source path="${plugins.dir}/protocol-selenium/src/java"/>
-        <source path="${plugins.dir}/protocol-selenium/src/test"/>
+        <!--<source path="${plugins.dir}/protocol-selenium/src/test"/>-->
         <source path="${plugins.dir}/scoring-depth/src/java/" />
         <source path="${plugins.dir}/scoring-link/src/java/" />
         <source path="${plugins.dir}/scoring-opic/src/java/" />
Index: src/java/org/apache/nutch/tools/AbstractCommonCrawlFormat.java
===================================================================
--- src/java/org/apache/nutch/tools/AbstractCommonCrawlFormat.java	(revision 1669716)
+++ src/java/org/apache/nutch/tools/AbstractCommonCrawlFormat.java	(working copy)
@@ -20,6 +20,7 @@
 import java.io.IOException;
 import java.net.InetAddress;
 import java.net.UnknownHostException;
+import java.text.ParseException;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.nutch.metadata.Metadata;
@@ -27,6 +28,8 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import com.ibm.icu.text.SimpleDateFormat;
+
 /**
  * Abstract class that implements {@see CommonCrawlFormat} interface. 
  *
@@ -44,14 +47,27 @@
 	
 	protected String keyPrefix;
 	
-	public AbstractCommonCrawlFormat(String url, byte[] content, Metadata metadata, Configuration conf, String keyPrefix) throws IOException {
+	protected boolean simpleDateFormat;
+	
+	protected boolean jsonArray;
+	
+	protected boolean reverseKey;
+	
+	protected String reverseKeyValue;
+
+	public AbstractCommonCrawlFormat(String url, byte[] content, Metadata metadata, Configuration nutchConf, CommonCrawlConfig config) throws IOException {
 		this.url = url;
 		this.content = content;
 		this.metadata = metadata;
-		this.conf = conf;
-		this.keyPrefix = keyPrefix;
+		this.conf = nutchConf;
+		
+		this.keyPrefix = config.getKeyPrefix();
+		this.simpleDateFormat = config.getSimpleDateFormat();
+		this.jsonArray = config.getJsonArray();
+		this.reverseKey = config.getReverseKey();
+		this.reverseKeyValue = config.getReverseKeyValue();
 	}
-	
+
 	@Override
 	public String getJsonData() throws IOException {
 		try {
@@ -76,12 +92,14 @@
 			writeKeyValue("email", getRequestContactEmail());
 			closeObject("contact");
 			closeObject("client");
-			startObject("headers");
-			writeKeyValue("Accept", getRequestAccept());
-			writeKeyValue("Accept-Encoding", getRequestAcceptEncoding());
-			writeKeyValue("Accept-Language", getRequestAcceptLanguage());
-			writeKeyValue("User-Agent", getRequestUserAgent());
-			closeObject("headers");
+			// start request headers
+			startHeaders("headers", false, true);
+			writeKeyValueWrapper("Accept", getRequestAccept());
+			writeKeyValueWrapper("Accept-Encoding", getRequestAcceptEncoding());
+			writeKeyValueWrapper("Accept-Language", getRequestAcceptLanguage());
+			writeKeyValueWrapper("User-Agent", getRequestUserAgent());
+			//closeObject("headers");
+			closeHeaders("headers", false, true);
 			writeKeyNull("body");
 			closeObject("request");
 			
@@ -92,18 +110,19 @@
 			writeKeyValue("hostname", getResponseHostName());
 			writeKeyValue("address", getResponseAddress());
 			closeObject("server");
-			startObject("headers");
-			writeKeyValue("Content-Encoding", getResponseContentEncoding());
-			writeKeyValue("Content-Type", getResponseContentType());
-			writeKeyValue("Date", getResponseDate());
-			writeKeyValue("Server", getResponseServer());
+			// start response headers
+			startHeaders("headers", false, true);
+			writeKeyValueWrapper("Content-Encoding", getResponseContentEncoding());
+			writeKeyValueWrapper("Content-Type", getResponseContentType());
+			writeKeyValueWrapper("Date", getResponseDate());
+			writeKeyValueWrapper("Server", getResponseServer());
 			for (String name : metadata.names()) {
 				if (name.equalsIgnoreCase("Content-Encoding") || name.equalsIgnoreCase("Content-Type") || name.equalsIgnoreCase("Date") || name.equalsIgnoreCase("Server")) {
 					continue;
 				}
-				writeKeyValue(name, metadata.get(name));
+				writeKeyValueWrapper(name, metadata.get(name));
 			}
-			closeObject("headers");
+			closeHeaders("headers", false, true);
 			writeKeyValue("body", getResponseContent());
 			closeObject("response");
 			
@@ -132,6 +151,12 @@
 	
 	protected abstract void writeKeyNull(String key) throws IOException;
 	
+	protected abstract void startArray(String key, boolean nested, boolean newline) throws IOException;
+	
+	protected abstract void closeArray(String key, boolean nested, boolean newline) throws IOException;
+	
+	protected abstract void writeArrayValue(String value) throws IOException;
+	
 	protected abstract void startObject(String key) throws IOException;
 	
 	protected abstract void closeObject(String key) throws IOException;
@@ -145,7 +170,18 @@
 	}
 	
 	protected String getTimestamp() {
-		return metadata.get(ifNullString(Metadata.LAST_MODIFIED));
+		if (this.simpleDateFormat) {
+			String timestamp = null;
+			try {
+				long epoch = new SimpleDateFormat("EEE, d MMM yyyy HH:mm:ss z").parse(ifNullString(metadata.get(Metadata.LAST_MODIFIED))).getTime();
+				timestamp = String.valueOf(epoch);
+			} catch (ParseException pe) {
+				LOG.warn(pe.getMessage());
+			}
+			return timestamp;
+		} else {
+			return ifNullString(metadata.get(Metadata.LAST_MODIFIED));
+		}
 	}
 	
 	protected String getMethod() {
@@ -225,7 +261,18 @@
 	}
 	
 	protected String getResponseDate() {
-		return ifNullString(metadata.get("Date"));
+		if (this.simpleDateFormat) {
+			String timestamp = null;
+			try {
+				long epoch = new SimpleDateFormat("EEE, d MMM yyyy HH:mm:ss z").parse(ifNullString(metadata.get("Date"))).getTime();
+				timestamp = String.valueOf(epoch);
+			} catch (ParseException pe) {
+				LOG.warn(pe.getMessage());
+			}
+			return timestamp;
+		} else {
+			return ifNullString(metadata.get("Date"));
+		}
 	}
 	
 	protected String getResponseServer() {
@@ -237,14 +284,60 @@
 	}
 	
 	protected String getKey() {
-		return url;
+		if (this.reverseKey) {
+			return this.reverseKeyValue;
+		}
+		else {
+			return url;
+		}
 	}
 	
 	protected String getImported() {
-		return new String(""); // TODO
+		if (this.simpleDateFormat) {
+			String timestamp = null;
+			try {
+				long epoch = new SimpleDateFormat("EEE, d MMM yyyy HH:mm:ss z").parse(ifNullString(metadata.get("Date"))).getTime();
+				timestamp = String.valueOf(epoch);
+			} catch (ParseException pe) {
+				LOG.warn(pe.getMessage());
+			}
+			return timestamp;
+		} else {
+			return ifNullString(metadata.get("Date"));
+		}
 	}
 	
 	private static String ifNullString(String value) {
 		return (value != null) ? value : "";
 	}
+	
+	private void startHeaders(String key, boolean nested, boolean newline) throws IOException {
+		if (this.jsonArray) {
+			startArray(key, nested, newline);
+		}
+		else {
+			startObject(key);
+		}
+	}
+	
+	private void closeHeaders(String key, boolean nested, boolean newline) throws IOException {
+		if (this.jsonArray) {
+			closeArray(key, nested, newline);
+		}
+		else {
+			closeObject(key);
+		}
+	}
+	
+	private void writeKeyValueWrapper(String key, String value) throws IOException {
+		if (this.jsonArray) {
+			startArray(null, true, false);
+			writeArrayValue(key);
+			writeArrayValue(value);
+			closeArray(null, true, false);
+		}
+		else {
+			writeKeyValue(key, value);
+		}
+	}
 }
Index: src/java/org/apache/nutch/tools/CommonCrawlConfig.java
===================================================================
--- src/java/org/apache/nutch/tools/CommonCrawlConfig.java	(revision 0)
+++ src/java/org/apache/nutch/tools/CommonCrawlConfig.java	(working copy)
@@ -0,0 +1,115 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.tools;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Serializable;
+import java.util.Properties;
+
+public class CommonCrawlConfig implements Serializable {
+
+	/**
+	 * Serial version UID
+	 */
+	private static final long serialVersionUID = 5235013733207799661L;
+	
+	// Prefix for key value in the output format
+	private String keyPrefix = "";
+	
+	private boolean simpleDateFormat = false;
+	
+	private boolean jsonArray = false;
+	
+	private boolean reverseKey = false;
+	
+	private String reverseKeyValue = "";
+	
+	/**
+	 * Default constructor
+	 */
+	public CommonCrawlConfig() {
+		//init(this.getClass().getResourceAsStream("CommonCrawlConfig.properties"));
+	}
+	
+	public CommonCrawlConfig(InputStream stream) {
+		init(stream);
+	}
+	
+	private void init(InputStream stream) {
+		if (stream == null) {
+			return;
+		}
+		Properties properties = new Properties();
+		
+		try {
+			properties.load(stream);
+		} catch (IOException e) {
+			// TODO
+		} finally {
+			try {
+				stream.close();
+			} catch (IOException e) {
+				// TODO
+			}
+		}
+
+		setKeyPrefix(properties.getProperty("keyPrefix", ""));
+		setSimpleDateFormat(Boolean.parseBoolean(properties.getProperty("simpleDateFormat", "False")));
+	}
+	
+	public void setKeyPrefix(String keyPrefix) {
+		this.keyPrefix = keyPrefix;
+	}
+	
+	public void setSimpleDateFormat(boolean simpleDateFormat) {
+		this.simpleDateFormat = simpleDateFormat;
+	}
+	
+	public void setJsonArray(boolean jsonArray) {
+		this.jsonArray = jsonArray;
+	}
+	
+	public void setReverseKey(boolean reverseKey) {
+		this.reverseKey = reverseKey;
+	}
+	
+	public void setReverseKeyValue(String reverseKeyValue) {
+		this.reverseKeyValue = reverseKeyValue;
+	}
+	
+	public String getKeyPrefix() {
+		return this.keyPrefix;
+	}
+	
+	public boolean getSimpleDateFormat() {
+		return this.simpleDateFormat;
+	}
+	
+	public boolean getJsonArray() {
+		return this.jsonArray;
+	}
+	
+	public boolean getReverseKey() {
+		return this.reverseKey;
+	}
+	
+	public String getReverseKeyValue() {
+		return this.reverseKeyValue;
+	}
+}
Index: src/java/org/apache/nutch/tools/CommonCrawlDataDumper.java
===================================================================
--- src/java/org/apache/nutch/tools/CommonCrawlDataDumper.java	(revision 1669716)
+++ src/java/org/apache/nutch/tools/CommonCrawlDataDumper.java	(working copy)
@@ -25,6 +25,9 @@
 import java.io.FileFilter;
 import java.io.FileOutputStream;
 import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.text.ParseException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Date;
@@ -38,6 +41,7 @@
 import org.apache.commons.cli.Option;
 import org.apache.commons.cli.OptionBuilder;
 import org.apache.commons.cli.Options;
+import org.apache.commons.codec.digest.DigestUtils;
 import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
 import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
 import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream;
@@ -52,10 +56,10 @@
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.util.StringUtils;
+import org.apache.nutch.metadata.Metadata;
 import org.apache.nutch.protocol.Content;
 import org.apache.nutch.util.DumpFileUtil;
 import org.apache.nutch.util.NutchConfiguration;
-
 //Tika imports
 import org.apache.tika.Tika;
 
@@ -65,6 +69,7 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import com.ibm.icu.text.DateFormat;
 import com.ibm.icu.text.SimpleDateFormat;
 
 /**
@@ -165,6 +170,8 @@
 
 	private static final Logger LOG = LoggerFactory.getLogger(CommonCrawlDataDumper.class.getName());
 	
+	private CommonCrawlConfig config = null;
+	
 	// Gzip initialization
 	private FileOutputStream fileOutput = null;
 	private BufferedOutputStream bufOutput = null;
@@ -218,6 +225,26 @@
 				.hasArg(true)
 				.withDescription("an optional prefix for key in the output format.")
 				.create("keyPrefix");
+		Option simpleDateFormatOpt = OptionBuilder
+				.withArgName("SimpleDateFormat")
+				.hasArg(false)
+				.withDescription("an optional format for timestamp in GMT epoch milliseconds.")
+				.create("SimpleDateFormat");
+		Option epochFilenameOpt = OptionBuilder
+				.withArgName("epochFilename")
+				.hasArg(false)
+				.withDescription("an optional format for output filename.")
+				.create("epochFilename");
+		Option jsonArrayOpt = OptionBuilder
+				.withArgName("jsonArray")
+				.hasArg(false)
+				.withDescription("an optional format for JSON output.")
+				.create("jsonArray");
+		Option reverseKeyOpt = OptionBuilder
+				.withArgName("reverseKey")
+				.hasArg(false)
+				.withDescription("an optional format for key value in JSON output.")
+				.create("reverseKey");
 
 		// create the options
 		Options options = new Options();
@@ -229,6 +256,11 @@
 		options.addOption(gzipOpt);
 		// create keyPrefix option
 		options.addOption(keyPrefixOpt);
+		// create simpleDataFormat option
+		options.addOption(simpleDateFormatOpt);
+		options.addOption(epochFilenameOpt);
+		options.addOption(jsonArrayOpt);
+		options.addOption(reverseKeyOpt);
 
 		CommandLineParser parser = new GnuParser();
 		try {
@@ -243,7 +275,18 @@
 			File segmentRootDir = new File(line.getOptionValue("segment"));
 			String[] mimeTypes = line.getOptionValues("mimetype");
 			boolean gzip = line.hasOption("gzip");
+			boolean epochFilename = line.hasOption("epochFilename");
+			
 			String keyPrefix = line.getOptionValue("keyPrefix", "");
+			boolean simpleDateFormat = line.hasOption("SimpleDateFormat");
+			boolean jsonArray = line.hasOption("jsonArray");
+			boolean reverseKey = line.hasOption("reverseKey");
+			
+			CommonCrawlConfig config = new CommonCrawlConfig();
+			config.setKeyPrefix(keyPrefix);
+			config.setSimpleDateFormat(simpleDateFormat);
+			config.setJsonArray(jsonArray);
+			config.setReverseKey(reverseKey);
 
 			if (!outputDir.exists()) {
 				LOG.warn("Output directory: [" + outputDir.getAbsolutePath() + "]: does not exist, creating it.");
@@ -251,9 +294,9 @@
 					throw new Exception("Unable to create: [" + outputDir.getAbsolutePath() + "]");
 			}
 
-			CommonCrawlDataDumper dumper = new CommonCrawlDataDumper();
+			CommonCrawlDataDumper dumper = new CommonCrawlDataDumper(config);
 			
-			dumper.dump(outputDir, segmentRootDir, gzip, mimeTypes, keyPrefix);
+			dumper.dump(outputDir, segmentRootDir, gzip, mimeTypes, epochFilename);
 			
 		} catch (Exception e) {
 			LOG.error(CommonCrawlDataDumper.class.getName() + ": " + StringUtils.stringifyException(e));
@@ -263,6 +306,13 @@
 	}
 	
 	/**
+	 * Constructor
+	 */
+	public CommonCrawlDataDumper(CommonCrawlConfig config) {
+		this.config = config;
+	}
+	
+	/**
 	 * Dumps the reverse engineered CBOR content from the provided segment
 	 * directories if a parent directory contains more than one segment,
 	 * otherwise a single segment can be passed as an argument. If the boolean
@@ -281,8 +331,8 @@
      *            filtered out.
 	 * @throws Exception
 	 */
-	public void dump(File outputDir, File segmentRootDir, boolean gzip,	String[] mimeTypes, String keyPrefix) throws Exception {
-		if (!gzip) {
+	public void dump(File outputDir, File segmentRootDir, boolean gzip,	String[] mimeTypes, boolean epochFilename) throws Exception {
+		if (gzip) {
 			LOG.info("Gzipping CBOR data has been skipped");
 		}
 		// total file counts
@@ -290,8 +340,8 @@
 		// filtered file counters
 		Map<String, Integer> filteredCounts = new HashMap<String, Integer>();
 		
-		Configuration conf = NutchConfiguration.create();
-		FileSystem fs = FileSystem.get(conf);
+		Configuration nutchConfig = NutchConfiguration.create();
+		FileSystem fs = FileSystem.get(nutchConfig);
 		File[] segmentDirs = segmentRootDir.listFiles(new FileFilter() {
 			@Override
 			public boolean accept(File file) {
@@ -311,8 +361,6 @@
 
 		for (File segment : segmentDirs) {
 			LOG.info("Processing segment: [" + segment.getAbsolutePath() + "]");
-			// GIUSEPPE: Never used (also in FileDumper.java)!
-			//DataOutputStream doutputStream = null;
 			try {
 				String segmentContentPath = segment.getAbsolutePath() + File.separator + Content.DIR_NAME + "/part-00000/data";
 				Path file = new Path(segmentContentPath);
@@ -321,7 +369,7 @@
 					LOG.warn("Skipping segment: [" + segmentContentPath	+ "]: no data directory present");
 					continue;
 				}
-				SequenceFile.Reader reader = new SequenceFile.Reader(fs, file, conf);
+				SequenceFile.Reader reader = new SequenceFile.Reader(fs, file, nutchConfig);
 
 				if (!new File(file.toString()).exists()) {
 					LOG.warn("Skipping segment: [" + segmentContentPath	+ "]: no data directory present");
@@ -334,22 +382,50 @@
 				while (reader.next(key)) {
 					content = new Content();
 					reader.getCurrentValue(content);
+					Metadata metadata = content.getMetadata();
 					String url = key.toString();
 					String baseName = FilenameUtils.getBaseName(url);
 					String extension = FilenameUtils.getExtension(url);
 					if (extension == null || extension.equals("")) {
 						extension = "html";
 					}
-
-					String md5Ofurl = DumpFileUtil.getUrlMD5(url);
-					String fullDir = DumpFileUtil.createTwoLevelsDirectory(outputDir.getAbsolutePath(), md5Ofurl, !gzip);
-					String filename = DumpFileUtil.createFileName(md5Ofurl, baseName, extension);
-					String outputFullPath = String.format("%s/%s", fullDir, filename);
-
-					String [] fullPathLevels = fullDir.split(File.separator);
-					String firstLevelDirName = fullPathLevels[fullPathLevels.length-2]; 
-					String secondLevelDirName = fullPathLevels[fullPathLevels.length-1];
 					
+					String outputFullPath = null;
+					String outputRelativePath = null;
+					String filename = null;
+					String timestamp = null;
+					String reverseKey = null;
+					
+					if (epochFilename || config.getReverseKey()) {	
+						try {
+							long epoch = new SimpleDateFormat("EEE, d MMM yyyy HH:mm:ss z").parse(getDate(metadata.get("Date"))).getTime();
+							timestamp = String.valueOf(epoch);
+						} catch (ParseException pe) {
+							LOG.warn(pe.getMessage());
+						}
+						
+						reverseKey = reverseUrl(url);
+						config.setReverseKeyValue(reverseKey.replace("/", "_") + "_" + DigestUtils.shaHex(url));
+					}	
+					
+					if (epochFilename) {
+						//outputFullPath = DumpFileUtil.createFileNameFromUrl(outputDir.getAbsolutePath(), url, timestamp, extension, !gzip);
+						outputFullPath = DumpFileUtil.createFileNameFromUrl(outputDir.getAbsolutePath(), reverseKey, url, timestamp, extension, !gzip);
+						outputRelativePath = outputFullPath.substring(0, outputFullPath.lastIndexOf(File.separator)-1);
+						filename = content.getMetadata().get(Metadata.DATE) + "." + extension;
+					}
+					else {
+						String md5Ofurl = DumpFileUtil.getUrlMD5(url);
+						String fullDir = DumpFileUtil.createTwoLevelsDirectory(outputDir.getAbsolutePath(), md5Ofurl, !gzip);
+						filename = DumpFileUtil.createFileName(md5Ofurl, baseName, extension);
+						outputFullPath = String.format("%s/%s", fullDir, filename);
+	
+						String [] fullPathLevels = fullDir.split(File.separator);
+						String firstLevelDirName = fullPathLevels[fullPathLevels.length-2]; 
+						String secondLevelDirName = fullPathLevels[fullPathLevels.length-1];
+						outputRelativePath = firstLevelDirName + secondLevelDirName;
+					}
+					
 					// Encode all filetypes if no mimetypes have been given
 					Boolean filter = (mimeTypes == null);
 					
@@ -357,7 +433,7 @@
 					try {
 						String mimeType = new Tika().detect(content.getContent());
 						// Maps file to JSON-based structure
-						CommonCrawlFormat format = CommonCrawlFormatFactory.getCommonCrawlFormat("JACKSON", url, content.getContent(), content.getMetadata(), conf, keyPrefix);
+						CommonCrawlFormat format = CommonCrawlFormatFactory.getCommonCrawlFormat("JACKSON", url, content.getContent(), metadata, nutchConfig, config);
 						jsonData = format.getJsonData();
 
 						collectStats(typeCounts, mimeType);
@@ -375,7 +451,6 @@
 						byte[] byteData = serializeCBORData(jsonData);
 						
 						if (!gzip) {
-							//String outputFullPath = outputDir + File.separator + filename;
 							File outputFile = new File(outputFullPath);
 							if (outputFile.exists()) {
 								LOG.info("Skipping writing: [" + outputFullPath	+ "]: file already exists");
@@ -392,7 +467,8 @@
 							else {
 								fileList.add(outputFullPath);
 								LOG.info("Compressing: [" + outputFullPath + "]");
-								TarArchiveEntry tarEntry = new TarArchiveEntry(firstLevelDirName + File.separator + secondLevelDirName + File.separator + filename);
+								//TarArchiveEntry tarEntry = new TarArchiveEntry(firstLevelDirName + File.separator + secondLevelDirName + File.separator + filename);
+								TarArchiveEntry tarEntry = new TarArchiveEntry(outputRelativePath + File.separator + filename);
 								tarEntry.setSize(byteData.length);
 								tarOutput.putArchiveEntry(tarEntry);
 								tarOutput.write(byteData);
@@ -500,4 +576,49 @@
 		}
 		return builder.toString();
 	}
+	
+	/**
+	 * Gets the current date if the given timestamp is empty or null.
+	 * @param timestamp the timestamp
+	 * @return the current timestamp if the given one is null.
+	 */
+	private String getDate(String timestamp) {
+		if (timestamp == null || timestamp.isEmpty()) {
+			DateFormat dateFormat = new SimpleDateFormat("EEE, d MMM yyyy HH:mm:ss z");
+			timestamp = dateFormat.format(new Date());
+		}
+		return timestamp;
+			
+	}
+	
+	public static String reverseUrl(String urlString) {
+    	URL url = null;
+		String reverseKey = null;
+		try {
+			url = new URL(urlString);
+			
+			String[] hostPart = url.getHost().replace('.', '/').split("/");
+			String[] pathPart = url.getPath().split("/");
+			if (pathPart != null && pathPart.length > 0) {
+				pathPart = Arrays.copyOf(pathPart, pathPart.length-1);
+			}
+			
+			StringBuilder sb = new StringBuilder();
+			sb.append(hostPart[hostPart.length-1]);
+			for (int i = hostPart.length-2; i >= 0; i--) {
+				sb.append("/" + hostPart[i]);
+			}
+			for (int i = 0; i < pathPart.length; i++) {
+				if (pathPart[i].equals(""))
+					continue;
+				sb.append("/" + pathPart[i]);
+			}
+			
+			reverseKey = sb.toString();
+		} catch (MalformedURLException e) {
+			LOG.error("Failed to parse URL: {}", urlString);
+		}
+		
+		return reverseKey;
+    }
 }
Index: src/java/org/apache/nutch/tools/CommonCrawlFormatFactory.java
===================================================================
--- src/java/org/apache/nutch/tools/CommonCrawlFormatFactory.java	(revision 1669716)
+++ src/java/org/apache/nutch/tools/CommonCrawlFormatFactory.java	(working copy)
@@ -34,24 +34,24 @@
 	 * @param url the url.
 	 * @param content the content.
 	 * @param metadata the metadata.
-	 * @param conf the configuration.
+	 * @param nutchConf the configuration.
+	 * @param config the CommonCrawl output configuration.
 	 * @return the new {@see CommonCrawlFormat} object.
 	 * @throws IOException If any I/O error occurs.
 	 */
-	public static CommonCrawlFormat getCommonCrawlFormat(String formatType, String url, byte[] content,
-			Metadata metadata, Configuration conf, String keyPrefix) throws IOException {
+	public static CommonCrawlFormat getCommonCrawlFormat(String formatType, String url, byte[] content,	Metadata metadata, Configuration nutchConf, CommonCrawlConfig config) throws IOException {
 		if (formatType == null) {
 			return null;
 		}
 		
 		if (formatType.equalsIgnoreCase("jackson")) {
-			return new CommonCrawlFormatJackson(url, content, metadata, conf, keyPrefix);
+			return new CommonCrawlFormatJackson(url, content, metadata, nutchConf, config);
 		}
 		else if (formatType.equalsIgnoreCase("jettinson")) {
-			return new CommonCrawlFormatJettinson(url, content, metadata, conf, keyPrefix);
+			return new CommonCrawlFormatJettinson(url, content, metadata, nutchConf, config);
 		}
 		else if (formatType.equalsIgnoreCase("simple")) {
-			return new CommonCrawlFormatSimple(url, content, metadata, conf, keyPrefix);
+			return new CommonCrawlFormatSimple(url, content, metadata, nutchConf, config);
 		}
 		
 		return null;
Index: src/java/org/apache/nutch/tools/CommonCrawlFormatJackson.java
===================================================================
--- src/java/org/apache/nutch/tools/CommonCrawlFormatJackson.java	(revision 1669716)
+++ src/java/org/apache/nutch/tools/CommonCrawlFormatJackson.java	(working copy)
@@ -38,9 +38,9 @@
 	
 	private JsonGenerator generator;
 
-	public CommonCrawlFormatJackson(String url, byte[] content,
-			Metadata metadata, Configuration conf, String keyPrefix) throws IOException {
-		super(url, content, metadata, conf, keyPrefix);
+	
+	public CommonCrawlFormatJackson(String url, byte[] content, Metadata metadata, Configuration nutchConf, CommonCrawlConfig config) throws IOException {
+		super(url, content, metadata, nutchConf, config);
 		
 		JsonFactory factory = new JsonFactory();
 		this.out = new ByteArrayOutputStream();
@@ -58,10 +58,28 @@
 	@Override
 	protected void writeKeyNull(String key) throws IOException {
 		generator.writeFieldName(key);
-		generator.writeNull();;
+		generator.writeNull();
 	}
 	
 	@Override
+	protected void startArray(String key, boolean nested, boolean newline) throws IOException {
+		if (key != null) {
+			generator.writeFieldName(key);
+		}
+		generator.writeStartArray();
+	}
+	
+	@Override
+	protected void closeArray(String key, boolean nested, boolean newline) throws IOException {
+		generator.writeEndArray();
+	}
+	
+	@Override
+	protected void writeArrayValue(String value) throws IOException {
+		generator.writeString(value);
+	}
+	
+	@Override
 	protected void startObject(String key) throws IOException {
 		if (key != null) {
 			generator.writeFieldName(key);
Index: src/java/org/apache/nutch/tools/CommonCrawlFormatJettinson.java
===================================================================
--- src/java/org/apache/nutch/tools/CommonCrawlFormatJettinson.java	(revision 1669716)
+++ src/java/org/apache/nutch/tools/CommonCrawlFormatJettinson.java	(working copy)
@@ -23,6 +23,7 @@
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.nutch.metadata.Metadata;
+import org.codehaus.jettison.json.JSONArray;
 import org.codehaus.jettison.json.JSONException;
 import org.codehaus.jettison.json.JSONObject;
 
@@ -32,19 +33,20 @@
  */
 public class CommonCrawlFormatJettinson extends AbstractCommonCrawlFormat {
 	
-	private Deque<JSONObject> stack;
+	private Deque<JSONObject> stackObjects;
+	
+	private Deque<JSONArray> stackArrays;
 
-	public CommonCrawlFormatJettinson(String url, byte[] content,
-			Metadata metadata, Configuration conf, String keyPrefix) throws IOException {
-		super(url, content, metadata, conf, keyPrefix);
+	public CommonCrawlFormatJettinson(String url, byte[] content, Metadata metadata, Configuration nutchConf, CommonCrawlConfig config) throws IOException {
+		super(url, content, metadata, nutchConf, config);
 		
-		stack = new ArrayDeque<JSONObject>();
+		stackObjects = new ArrayDeque<JSONObject>();
 	}
 	
 	@Override
 	protected void writeKeyValue(String key, String value) throws IOException {
 		try {
-			stack.getFirst().put(key, value);
+			stackObjects.getFirst().put(key, value);
 		} catch (JSONException jsone) {
 			throw new IOException(jsone.getMessage());
 		}
@@ -53,24 +55,54 @@
 	@Override
 	protected void writeKeyNull(String key) throws IOException {
 		try {
-			stack.getFirst().put(key, JSONObject.NULL);
+			stackObjects.getFirst().put(key, JSONObject.NULL);
 		} catch (JSONException jsone) {
 			throw new IOException(jsone.getMessage());
 		}
 	}
 	
 	@Override
+	protected void startArray(String key, boolean nested, boolean newline) throws IOException {
+		JSONArray array = new JSONArray();
+		stackArrays.push(array);
+	}
+	
+	@Override
+	protected void closeArray(String key, boolean nested, boolean newline) throws IOException {
+		try {
+			if (stackArrays.size() > 1) {
+				JSONArray array = stackArrays.pop();
+				if (nested) {
+					stackArrays.getFirst().put(array);
+				}
+				else {
+					stackObjects.getFirst().put(key, array);
+				}
+			}
+		} catch (JSONException jsone) {
+			throw new IOException(jsone.getMessage());
+		}
+	}
+	
+	@Override
+	protected void writeArrayValue(String value) throws IOException {
+		if (stackArrays.size() > 1) {
+			stackArrays.getFirst().put(value);
+		}
+	}
+	
+	@Override
 	protected void startObject(String key) throws IOException {
 		JSONObject object = new JSONObject();
-		stack.push(object);
+		stackObjects.push(object);
 	}
 	
 	@Override
 	protected void closeObject(String key) throws IOException {
 		try {
-			if (stack.size() > 1) {
-				JSONObject object = stack.pop();
-				stack.getFirst().put(key, object);
+			if (stackObjects.size() > 1) {
+				JSONObject object = stackObjects.pop();
+				stackObjects.getFirst().put(key, object);
 			}
 		} catch (JSONException jsone) {
 			throw new IOException(jsone.getMessage());
@@ -80,7 +112,7 @@
 	@Override
 	protected String generateJson() throws IOException {
 		try {
-			return stack.getFirst().toString(2);
+			return stackObjects.getFirst().toString(2);
 		} catch (JSONException jsone) {
 			throw new IOException(jsone.getMessage());
 		}
Index: src/java/org/apache/nutch/tools/CommonCrawlFormatSimple.java
===================================================================
--- src/java/org/apache/nutch/tools/CommonCrawlFormatSimple.java	(revision 1669716)
+++ src/java/org/apache/nutch/tools/CommonCrawlFormatSimple.java	(working copy)
@@ -32,22 +32,53 @@
 	
 	private int tabCount;
 	
-	public CommonCrawlFormatSimple(String url, byte[] content, Metadata metadata,
-			Configuration conf, String keyPrefix) throws IOException {
-		super(url, content, metadata, conf, keyPrefix);
+	public CommonCrawlFormatSimple(String url, byte[] content, Metadata metadata, Configuration nutchConf, CommonCrawlConfig config) throws IOException {
+		super(url, content, metadata, nutchConf, config);
 		
 		this.sb = new StringBuilder();
 		this.tabCount = 0;
 	}
 	
+	@Override
 	protected void writeKeyValue(String key, String value) throws IOException {
 		sb.append(printTabs() + "\"" + key + "\": " + quote(value) + ",\n");
 	}
 	
+	@Override
 	protected void writeKeyNull(String key) throws IOException {
 		sb.append(printTabs() + "\"" + key + "\": null,\n");
 	}
 	
+	@Override
+	protected void startArray(String key, boolean nested, boolean newline) throws IOException {
+		String name = (key != null) ? "\"" + key + "\": " : "";
+		String nl = (newline) ? "\n" : "";
+		sb.append(printTabs() + name + "[" + nl);
+		if (newline) {
+			this.tabCount++;
+		}
+	}
+	
+	@Override
+	protected void closeArray(String key, boolean nested, boolean newline) throws IOException {
+		if (sb.charAt(sb.length()-1) == ',') {
+			sb.deleteCharAt(sb.length()-1); // delete comma
+		}
+		else if (sb.charAt(sb.length()-2) == ',') {
+			sb.deleteCharAt(sb.length()-2); // delete comma
+		}
+		String nl = (newline) ? printTabs() : "";
+		if (newline) {
+			this.tabCount++;
+		}
+		sb.append(nl + "],\n");
+	}
+	
+	@Override
+	protected void writeArrayValue(String value) {
+		sb.append("\"" + value + "\",");
+	}
+	
 	protected void startObject(String key) throws IOException {
 		String name = "";
 		if (key != null) {
@@ -58,7 +89,9 @@
 	}
 	
 	protected void closeObject(String key) throws IOException {
-		sb.deleteCharAt(sb.length()-2); // delete comma
+		if (sb.charAt(sb.length()-2) == ',') {
+			sb.deleteCharAt(sb.length()-2); // delete comma
+		}
 		this.tabCount--;
 		sb.append(printTabs() + "},\n");
 	}
Index: src/java/org/apache/nutch/util/DumpFileUtil.java
===================================================================
--- src/java/org/apache/nutch/util/DumpFileUtil.java	(revision 1669716)
+++ src/java/org/apache/nutch/util/DumpFileUtil.java	(working copy)
@@ -17,6 +17,7 @@
 
 package org.apache.nutch.util;
 
+import org.apache.commons.codec.digest.DigestUtils;
 import org.apache.commons.io.FileUtils;
 import org.apache.commons.lang.StringUtils;
 import org.apache.hadoop.io.MD5Hash;
@@ -25,6 +26,9 @@
 
 import java.io.File;
 import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.Arrays;
 
 public class DumpFileUtil {
     private static final Logger LOG = LoggerFactory.getLogger(DumpFileUtil.class
@@ -81,4 +85,21 @@
 
         return String.format(FILENAME_PATTERN, md5, fileBaseName, fileExtension);
     }
+    
+    public static String createFileNameFromUrl(String basePath, String reverseKey, String urlString, String epochScrapeTime, String fileExtension, boolean makeDir) {
+		String fullDirPath = basePath + File.separator + reverseKey + File.separator + DigestUtils.shaHex(urlString);
+		
+		if (makeDir) {
+	        try {
+	            FileUtils.forceMkdir(new File(fullDirPath));
+	        } catch (IOException e) {
+	            LOG.error("Failed to create dir: {}", fullDirPath);
+	            fullDirPath = null;
+	        }
+        }
+		
+		String outputFullPath = fullDirPath + File.separator + epochScrapeTime + "." + fileExtension;
+		
+		return outputFullPath;
+    }
 }
