Index: conf/parse-plugins.xml
===================================================================
--- conf/parse-plugins.xml	(revision 414852)
+++ conf/parse-plugins.xml	(working copy)
@@ -36,8 +36,7 @@
 	</mimeType>
 
 	<mimeType name="application/pdf">
-		<plugin id="parse-pdf" />
-		<plugin id="parse-text" />
+		<plugin id="parse-ext" />
 	</mimeType>
 
 	<mimeType name="application/postscript">
Index: src/java/org/apache/nutch/searcher/LuceneQueryOptimizer.java
===================================================================
--- src/java/org/apache/nutch/searcher/LuceneQueryOptimizer.java	(revision 414852)
+++ src/java/org/apache/nutch/searcher/LuceneQueryOptimizer.java	(working copy)
@@ -154,6 +154,7 @@
         }
       }        
     }
+    System.out.println("OPTIMIZE: " + query.toString());
     if (sortField == null && !reverse) {
 
       // no hit limit
Index: src/java/org/apache/nutch/searcher/NutchBean.java
===================================================================
--- src/java/org/apache/nutch/searcher/NutchBean.java	(revision 414852)
+++ src/java/org/apache/nutch/searcher/NutchBean.java	(working copy)
@@ -134,9 +134,11 @@
     }
 
     LOG.info("opening segments in " + segmentsDir);
-    FetchedSegments segments = new FetchedSegments(this.fs, segmentsDir.toString(),this.conf);
-    
+    FetchedSegments segments =
+	new FetchedSegments(this.fs, segmentsDir.toString(), this.conf);
+    System.out.println("SEGMENTS " + segments);
     this.segmentNames = segments.getSegmentNames();
+    System.out.println("SEGMENTS " + this.segmentNames);
 
     this.searcher = indexSearcher;
     this.detailer = indexSearcher;
Index: src/java/org/apache/nutch/searcher/IndexSearcher.java
===================================================================
--- src/java/org/apache/nutch/searcher/IndexSearcher.java	(revision 414852)
+++ src/java/org/apache/nutch/searcher/IndexSearcher.java	(working copy)
@@ -57,6 +57,7 @@
     this.conf = conf;
     this.fs = FileSystem.get(conf);
     for (int i = 0; i < indexDirs.length; i++) {
+      System.out.println("READER " + indexDirs[i]);
       readers[i] = IndexReader.open(getDirectory(indexDirs[i]));
     }
     init(new MultiReader(readers), conf);
@@ -92,6 +93,7 @@
     throws IOException {
     org.apache.lucene.search.BooleanQuery luceneQuery =
       this.queryFilters.filter(query);
+    System.out.println("QUERY: " + luceneQuery.toString());
     return translateHits
       (optimizer.optimize(luceneQuery, luceneSearcher, numHits,
                           sortField, reverse),
Index: src/java/org/apache/nutch/searcher/OpenSearchServlet.java
===================================================================
--- src/java/org/apache/nutch/searcher/OpenSearchServlet.java	(revision 414852)
+++ src/java/org/apache/nutch/searcher/OpenSearchServlet.java	(working copy)
@@ -262,23 +262,64 @@
   private static void addNode(Document doc, Node parent,
                               String name, String text) {
     Element child = doc.createElement(name);
-    child.appendChild(doc.createTextNode(text));
+    child.appendChild(doc.createTextNode(getLegalXml(text)));
     parent.appendChild(child);
   }
 
   private static void addNode(Document doc, Node parent,
                               String ns, String name, String text) {
     Element child = doc.createElementNS((String)NS_MAP.get(ns), ns+":"+name);
-    child.appendChild(doc.createTextNode(text));
+    child.appendChild(doc.createTextNode(getLegalXml(text)));
     parent.appendChild(child);
   }
 
   private static void addAttribute(Document doc, Element node,
                                    String name, String value) {
     Attr attribute = doc.createAttribute(name);
-    attribute.setValue(value);
+    attribute.setValue(getLegalXml(getLegalXml(value)));
     node.getAttributes().setNamedItem(attribute);
   }
 
+  /*
+   * Ensure string is legal xml.
+   * @param text String to verify.
+   * @return Passed <code>text</code> or a new string with illegal
+   * characters removed if any found in <code>text</code>.
+   * @see http://www.w3.org/TR/2000/REC-xml-20001006#NT-Char
+   */
+  private static String getLegalXml(final String text) {
+      if (text == null) {
+          return null;
+      }
+      StringBuffer buffer = null;
+      for (int i = 0; i < text.length(); i++) {
+        char c = text.charAt(i);
+        if (!isLegalXml(c)) {
+	  if (buffer == null) {
+              // Start up a buffer.  Copy characters here from now on
+              // now we've found at least one bad character in original.
+	      buffer = new StringBuffer(text.length());
+              buffer.append(text.substring(0, i));
+          }
+        } else {
+           if (buffer != null) {
+             buffer.append(c);
+           }
+        }
+      }
+      return (buffer != null)? buffer.toString(): text;
+  }
+ 
+  private static boolean isLegalXml(final char c) {
+    return c == 0x9 || c == 0xa || c == 0xd || (c >= 0x20 && c <= 0xd7ff)
+        || (c >= 0xe000 && c <= 0xfffd) || (c >= 0x10000 && c <= 0x10ffff);
+  }
+
+  public static void main(final String [] args) {
+    // Test that our isLegalXml works.
+    System.out.println(getLegalXml("hello"));
+    System.out.println(getLegalXml("he\u0000llo"));
+    System.out.println(getLegalXml("\u0000he\u0000llo"));
+    System.out.println(getLegalXml("\u0000he\u0000llo\u0000"));
+  }
 }
-
Index: src/java/org/apache/nutch/searcher/QueryFilters.java
===================================================================
--- src/java/org/apache/nutch/searcher/QueryFilters.java	(revision 414852)
+++ src/java/org/apache/nutch/searcher/QueryFilters.java	(working copy)
@@ -103,6 +103,7 @@
     // then run each plugin
     BooleanQuery output = new BooleanQuery();
     for (int i = 0; i < this.queryFilters.length; i++) {
+      System.out.println("FILTER: " + this.queryFilters[i]);
       output = this.queryFilters[i].filter(input, output);
     }
     return output;
Index: src/java/org/apache/nutch/parse/ParserFactory.java
===================================================================
--- src/java/org/apache/nutch/parse/ParserFactory.java	(revision 414852)
+++ src/java/org/apache/nutch/parse/ParserFactory.java	(working copy)
@@ -357,6 +357,9 @@
   }
 
   private boolean match(Extension extension, String id, String type) {
+	System.out.println("EXT " + extension.getId() + " "  +
+		extension.getAttribute("contentType"));
+		
     return ((id.equals(extension.getId())) &&
             (type.equals(extension.getAttribute("contentType")) ||
              type.equals(DEFAULT_PLUGIN)));
@@ -364,7 +367,9 @@
   
   /** Get an extension from its id and supported content-type. */
   private Extension getExtension(Extension[] list, String id, String type) {
+	System.out.println("ID " + id + " TYPE " + type);
     for (int i=0; i<list.length; i++) {
+	System.out.println("LIST " + list[i]);
       if (match(list[i], id, type)) {
         return list[i];
       }
Index: bin/nutch
===================================================================
--- bin/nutch	(revision 414852)
+++ bin/nutch	(working copy)
@@ -95,6 +95,9 @@
 for f in $NUTCH_HOME/build/nutch-*.job; do
   CLASSPATH=${CLASSPATH}:$f;
 done
+if [ -d "$NUTCH_HOME/build/classes" ]; then
+  CLASSPATH=${CLASSPATH}:$NUTCH_HOME/build/classes
+fi
 if [ -d "$NUTCH_HOME/build/test/classes" ]; then
   CLASSPATH=${CLASSPATH}:$NUTCH_HOME/build/test/classes
 fi
