Index: src/java/org/apache/nutch/tools/arc/ArcSegmentCreator.java
===================================================================
--- src/java/org/apache/nutch/tools/arc/ArcSegmentCreator.java	(revision 1296159)
+++ src/java/org/apache/nutch/tools/arc/ArcSegmentCreator.java	(working copy)
@@ -315,7 +315,7 @@
         // the arc file,  TODO: currently this doesn't handle text of errors
         // pages (i.e. 404, etc.). We assume we won't get those.
         ProtocolStatus status = ProtocolStatus.STATUS_SUCCESS;
-        Content content = new Content(urlStr, urlStr, bytes.get(), contentType,
+        Content content = new Content(urlStr, urlStr, bytes.getBytes(), contentType,
           new Metadata(), getConf());
         
         // set the url version into the metadata
Index: src/java/org/apache/nutch/protocol/Content.java
===================================================================
--- src/java/org/apache/nutch/protocol/Content.java	(revision 1296159)
+++ src/java/org/apache/nutch/protocol/Content.java	(working copy)
@@ -33,7 +33,6 @@
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.ArrayFile;
 import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.UTF8;
 import org.apache.hadoop.io.VersionMismatchException;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.util.GenericOptionsParser;
@@ -93,21 +92,21 @@
     switch (oldVersion) {
     case 0:
     case 1:
-      url = UTF8.readString(in); // read url
-      base = UTF8.readString(in); // read base
+      url = Text.readString(in); // read url
+      base = Text.readString(in); // read base
 
       content = new byte[in.readInt()]; // read content
       in.readFully(content);
 
-      contentType = UTF8.readString(in); // read contentType
+      contentType = Text.readString(in); // read contentType
       // reconstruct metadata
       int keySize = in.readInt();
       String key;
       for (int i = 0; i < keySize; i++) {
-        key = UTF8.readString(in);
+        key = Text.readString(in);
         int valueSize = in.readInt();
         for (int j = 0; j < valueSize; j++) {
-          metadata.add(key, UTF8.readString(in));
+          metadata.add(key, Text.readString(in));
         }
       }
       break;
Index: src/java/org/apache/nutch/segment/SegmentReader.java
===================================================================
--- src/java/org/apache/nutch/segment/SegmentReader.java	(revision 1296159)
+++ src/java/org/apache/nutch/segment/SegmentReader.java	(working copy)
@@ -43,7 +43,6 @@
 import org.apache.hadoop.io.MapFile;
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.UTF8;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.mapred.FileInputFormat;
@@ -86,8 +85,9 @@
 
     public void map(WritableComparable key, Writable value,
         OutputCollector<Text, NutchWritable> collector, Reporter reporter) throws IOException {
-      // convert on the fly from old formats with UTF8 keys
-      if (key instanceof UTF8) {
+      // convert on the fly from old formats with UTF8 keys.
+      // UTF8 deprecated and replaced by Text.
+      if (key instanceof Text) {
         newKey.set(key.toString());
         key = newKey;
       }
@@ -252,7 +252,7 @@
         writer.close();
       }
     }
-    fs.delete(tempDir);
+    fs.delete(tempDir, true);
     if (LOG.isInfoEnabled()) { LOG.info("SegmentReader: done"); }
   }
 
Index: src/java/org/apache/nutch/crawl/CrawlDbReader.java
===================================================================
--- src/java/org/apache/nutch/crawl/CrawlDbReader.java	(revision 1296159)
+++ src/java/org/apache/nutch/crawl/CrawlDbReader.java	(working copy)
@@ -19,6 +19,7 @@
 
 import java.io.DataOutputStream;
 import java.io.IOException;
+import java.io.Closeable;
 import java.net.URL;
 import java.util.Date;
 import java.util.Iterator;
@@ -35,7 +36,6 @@
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.Closeable;
 import org.apache.hadoop.io.FloatWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.MapFile;
Index: src/java/org/apache/nutch/crawl/CrawlDatum.java
===================================================================
--- src/java/org/apache/nutch/crawl/CrawlDatum.java	(revision 1296159)
+++ src/java/org/apache/nutch/crawl/CrawlDatum.java	(working copy)
@@ -262,7 +262,7 @@
     if (version > 3) {
       boolean hasMetadata = false;
       if (version < 7) {
-        MapWritable oldMetaData = new MapWritable();
+        org.apache.hadoop.io.MapWritable oldMetaData = new org.apache.hadoop.io.MapWritable();
         if (in.readBoolean()) {
           hasMetadata = true;
           metaData = new org.apache.hadoop.io.MapWritable();
Index: src/java/org/apache/nutch/crawl/NutchWritable.java
===================================================================
--- src/java/org/apache/nutch/crawl/NutchWritable.java	(revision 1296159)
+++ src/java/org/apache/nutch/crawl/NutchWritable.java	(working copy)
@@ -31,12 +31,12 @@
       org.apache.hadoop.io.BytesWritable.class,
       org.apache.hadoop.io.FloatWritable.class,
       org.apache.hadoop.io.IntWritable.class,
+      org.apache.hadoop.io.MapWritable.class,
       org.apache.hadoop.io.Text.class,
       org.apache.hadoop.io.MD5Hash.class,
       org.apache.nutch.crawl.CrawlDatum.class,
       org.apache.nutch.crawl.Inlink.class,
       org.apache.nutch.crawl.Inlinks.class,
-      org.apache.nutch.crawl.MapWritable.class,
       org.apache.nutch.fetcher.FetcherOutput.class,
       org.apache.nutch.metadata.Metadata.class,
       org.apache.nutch.parse.Outlink.class,
Index: src/java/org/apache/nutch/crawl/LinkDbReader.java
===================================================================
--- src/java/org/apache/nutch/crawl/LinkDbReader.java	(revision 1296159)
+++ src/java/org/apache/nutch/crawl/LinkDbReader.java	(working copy)
@@ -37,6 +37,7 @@
 
 import java.text.SimpleDateFormat;
 import java.util.Iterator;
+import java.io.Closeable;
 
 /** . */
 public class LinkDbReader extends Configured implements Tool, Closeable {
Index: src/java/org/apache/nutch/parse/ParseSegment.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseSegment.java	(revision 1296159)
+++ src/java/org/apache/nutch/parse/ParseSegment.java	(working copy)
@@ -75,7 +75,7 @@
                   OutputCollector<Text, ParseImpl> output, Reporter reporter)
     throws IOException {
     // convert on the fly from old UTF8 keys
-    if (key instanceof UTF8) {
+    if (key instanceof Text) {
       newKey.set(key.toString());
       key = newKey;
     }
Index: src/java/org/apache/nutch/util/MimeUtil.java
===================================================================
--- src/java/org/apache/nutch/util/MimeUtil.java	(revision 1296159)
+++ src/java/org/apache/nutch/util/MimeUtil.java	(working copy)
@@ -142,10 +142,10 @@
    *          The byte data, returned from the crawl, if any.
    * @return The correctly, automatically guessed {@link MimeType} name.
    */
-  public String autoResolveContentType(String typeName, String url, byte[] data) {
+  public String autoResolveContentType(String typeName, String url, byte[] data) throws MimeTypeException {
     String retType = null;
     String magicType = null;
-    MimeType type = null;
+    String mimeType = null;
     String cleanedMimeType = null;
 
     try {
@@ -158,22 +158,21 @@
 
     // first try to get the type from the cleaned type name
     try {
-      type = cleanedMimeType != null ? this.mimeTypes.forName(cleanedMimeType)
+      mimeType = cleanedMimeType != null ? tika.detect(cleanedMimeType)
           : null;
     } catch (MimeTypeException e) {
-      type = null;
+      mimeType = null;
     }
 
     // if returned null, or if it's the default type then try url resolution
-    if (type == null
-        || (type != null && type.getName().equals(MimeTypes.OCTET_STREAM))) {
+    if (mimeType == null
+        || (mimeType != null && mimeType.equals(MimeTypes.OCTET_STREAM))) {
       // If no mime-type header, or cannot find a corresponding registered
       // mime-type, then guess a mime-type from the url pattern
-      type = this.mimeTypes.getMimeType(url) != null ? this.mimeTypes
-          .getMimeType(url) : type;
+      mimeType = tika.detect(url) != null ? tika.detect(url) : mimeType;
     }
 
-    retType= type.getName();
+    retType= mimeType;
 
     // if magic is enabled use mime magic to guess if the mime type returned
     // from the magic guess is different than the one that's already set so far
Index: src/java/org/apache/nutch/plugin/PluginManifestParser.java
===================================================================
--- src/java/org/apache/nutch/plugin/PluginManifestParser.java	(revision 1296159)
+++ src/java/org/apache/nutch/plugin/PluginManifestParser.java	(working copy)
@@ -21,6 +21,7 @@
 import java.io.UnsupportedEncodingException;
 import java.net.MalformedURLException;
 import java.net.URL;
+import java.net.URI;
 import java.net.URLDecoder;
 import java.util.HashMap;
 import java.util.Map;
@@ -147,7 +148,7 @@
   private PluginDescriptor parseManifestFile(String pManifestPath)
       throws MalformedURLException, SAXException, IOException,
       ParserConfigurationException {
-    Document document = parseXML(new File(pManifestPath).toURL());
+    Document document = parseXML(new File(pManifestPath).toURI().toURL());
     String pPath = new File(pManifestPath).getParent();
     return parsePlugin(document, pPath);
   }
Index: src/java/org/apache/nutch/plugin/PluginDescriptor.java
===================================================================
--- src/java/org/apache/nutch/plugin/PluginDescriptor.java	(revision 1296159)
+++ src/java/org/apache/nutch/plugin/PluginDescriptor.java	(working copy)
@@ -20,6 +20,7 @@
 import java.io.IOException;
 import java.net.MalformedURLException;
 import java.net.URL;
+import java.net.URI;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.Locale;
@@ -214,12 +215,17 @@
 
   /**
    * Adds a exported library with a relative path to the plugin directory.
+   * We automatically escape characters that are illegal in URLs. It is 
+   * recommended that code converts an abstract pathname into a URL by 
+   * first converting it into a URI, via the toURI method, and then 
+   * converting the URI into a URL via the URI.toURL method.
    * 
    * @param pLibPath
    */
   public void addExportedLibRelative(String pLibPath)
       throws MalformedURLException {
-    URL url = new File(getPluginPath() + File.separator + pLibPath).toURL();
+    URI uri = new File(getPluginPath() + File.separator + pLibPath).toURI();
+    URL url = uri.toURL();
     fExportedLibs.add(url);
   }
 
@@ -242,13 +248,18 @@
   }
 
   /**
-   * Adds a not exported library with a plugin directory relative path.
+   * Adds a exported library with a relative path to the plugin directory.
+   * We automatically escape characters that are illegal in URLs. It is 
+   * recommended that code converts an abstract pathname into a URL by 
+   * first converting it into a URI, via the toURI method, and then 
+   * converting the URI into a URL via the URI.toURL method.
    * 
    * @param pLibPath
    */
   public void addNotExportedLibRelative(String pLibPath)
       throws MalformedURLException {
-    URL url = new File(getPluginPath() + File.separator + pLibPath).toURL();
+    URI uri = new File(getPluginPath() + File.separator + pLibPath).toURI();
+    URL url = uri.toURL();
     fNotExportedLibs.add(url);
   }
 
@@ -279,7 +290,7 @@
     try {
       for (File file2 : file.listFiles()) {
         if (file2.getAbsolutePath().endsWith("properties"))
-          arrayList.add(file2.getParentFile().toURL());
+          arrayList.add(file2.getParentFile().toURI().toURL());
       }
     } catch (MalformedURLException e) {
       LOG.debug(getPluginId() + " " + e.toString());
Index: src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java
===================================================================
--- src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java	(revision 1296159)
+++ src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java	(working copy)
@@ -19,6 +19,7 @@
 
 // JDK imports
 import java.net.URL;
+import java.net.URI;
 import java.util.Date;
 import java.util.TreeMap;
 import java.io.IOException;
@@ -151,7 +152,12 @@
       if (!f.equals(f.getCanonicalFile())) {
         // set headers
         //hdrs.put("Location", f.getCanonicalFile().toURI());
-        headers.set(Response.LOCATION, f.getCanonicalFile().toURL().toString());
+        //
+        // we want to automatically escape characters that are illegal in URLs. 
+        // It is recommended that new code convert an abstract pathname into a URL 
+        // by first converting it into a URI, via the toURI method, and then 
+        // converting the URI into a URL via the URI.toURL method.
+        headers.set(Response.LOCATION, f.getCanonicalFile().toURI().toURL().toString());
 
         this.code = 300;  // http redirect
         return;
