Index: src/java/org/apache/nutch/fetcher/Fetcher.java
===================================================================
--- src/java/org/apache/nutch/fetcher/Fetcher.java	(revision 168215)
+++ src/java/org/apache/nutch/fetcher/Fetcher.java	(working copy)
@@ -68,6 +68,10 @@
 
   private int threadCount =                       // max number of threads
     NutchConf.get().getInt("fetcher.threads.fetch", 10);
+  private static final float NEW_INJECTED_PAGE_SCORE =
+    NutchConf.get().getFloat("db.score.injected", 2.0f);
+  private static final int MAX_REDIRECT =
+    NutchConf.get().getInt("http.redirect.max", 3);
 
   // All threads (FetcherThread or thread started by it) belong to
   // group "fetcher". Each FetcherThread is named as "fetcherXX",
@@ -115,36 +119,64 @@
           }
 
           LOG.info("fetching " + url);            // fetch the page
-
-          Protocol protocol = ProtocolFactory.getProtocol(url);
-          Content content = protocol.getContent(url);
-
-          handleFetch(url, fle, content);
-
-          synchronized (Fetcher.this) {           // update status
-            pages++;
-            bytes += content.getContent().length;
-            if ((pages % 100) == 0) {             // show status every 100pp
-              status();
+          
+          // support multiple redirects, if requested by protocol
+          // or content meta-tags (the latter requires running Fetcher
+          // in parsing mode). Protocol-level redirects take precedence over
+          // content-level redirects. Some plugins can handle redirects
+          // automatically, so that only the final success or failure will be
+          // shown here.
+          boolean refetch = false;
+          int redirCnt = 0;
+          do {
+            Protocol protocol = ProtocolFactory.getProtocol(url);
+            ProtocolOutput output = protocol.getProtocolOutput(fle);
+            ProtocolStatus pstat = output.getStatus();
+            System.out.println("pstat=" + pstat);
+            Content content = output.getContent();
+            switch(pstat.getCode()) {
+              case ProtocolStatus.SUCCESS:
+                if (content != null) {
+                  synchronized (Fetcher.this) {           // update status
+                    pages++;
+                    bytes += content.getContent().length;
+                    if ((pages % 100) == 0) {             // show status every 100pp
+                      status();
+                    }
+                  }
+                  ParseStatus ps = handleFetch(url, fle, content);
+                  if (ps != null && ps.getMinorCode() == ParseStatus.SUCCESS_REDIRECT) {
+                    url = ps.getMessage();
+                    if (url != null) {
+                      refetch = true;
+                      redirCnt++;
+                      fle = new FetchListEntry(true, new Page(url, NEW_INJECTED_PAGE_SCORE), new String[0]);
+                    }
+                  }
+                }
+                break;
+              case ProtocolStatus.TEMP_MOVED: // try to redirect immediately
+                url = pstat.getMessage();
+                if (url != null) {
+                  refetch = true;
+                  redirCnt++;
+                  // XXX create new entry.
+                  fle = new FetchListEntry(true, new Page(url, NEW_INJECTED_PAGE_SCORE), new String[0]);
+                }
+                break;
+              case ProtocolStatus.GONE:
+                //logError(url, fle, e);
+                handleNoFetch(fle, FetcherOutput.NOT_FOUND);
+                break;
+              case ProtocolStatus.RETRY:
+                //logError(url, fle, e);
+                handleNoFetch(fle, FetcherOutput.RETRY);
+                break;
+              default:
+                throw new Exception("Unknown ProtocolStatus: " + pstat.getCode());
             }
-          }
-        } catch (ResourceGone e) {                // don't retry
-          logError(url, fle, e);
-          handleNoFetch(fle, FetcherOutput.NOT_FOUND);
+          } while (refetch && redirCnt < MAX_REDIRECT);
 
-        // dealt with in handleFetch() below
-        //} catch (ParseException e) {              // don't retry
-        //  logError(url, fle, e);
-        //  handleNoFetch(fle, FetcherOutput.CANT_PARSE);
-
-        } catch (RetryLater e) {                  // explicit retry
-          logError(url, fle, e);
-          handleNoFetch(fle, FetcherOutput.RETRY);
-
-        } catch (ProtocolException e) {           // implicit retry
-          logError(url, fle, e);
-          handleNoFetch(fle, FetcherOutput.RETRY);
-
         } catch (Throwable t) {                   // an unchecked exception
           if (fle != null) {
             logError(url, fle, t);                // retry?
@@ -176,33 +208,39 @@
       }
     }
 
-    private void handleFetch(String url, FetchListEntry fle, Content content) {
+    private ParseStatus handleFetch(String url, FetchListEntry fle, Content content) {
       if (!Fetcher.this.parsing) {
         outputPage(new FetcherOutput(fle, MD5Hash.digest(content.getContent()),
                                     FetcherOutput.SUCCESS),
                 content, null, null);
-        return;
+        return null;
       }
 
-      try {
         String contentType = content.getContentType();
-        Parser parser = ParserFactory.getParser(contentType, url);
-        Parse parse = parser.getParse(content);
-        outputPage(new FetcherOutput(fle, MD5Hash.digest(content.getContent()),
-                                    FetcherOutput.SUCCESS),
-                content, new ParseText(parse.getText()), parse.getData());
-      } catch (ParseException e) {
-        // 20041026, xing
-        // If fetching succeeds, but parsing fails, content should be saved
-        // so that we can try to parse again in separate pass, possibly
-        // using better/alternative parser.
-        LOG.info("fetch okay, but can't parse " + url + ", reason: "
-          + e.getMessage());
-        outputPage(new FetcherOutput(fle, MD5Hash.digest(content.getContent()),
-                                    FetcherOutput.CANT_PARSE),
-                content, new ParseText(""),
-                new ParseData("", new Outlink[0], new Properties()));
-      }
+        Parser parser = null;
+        Parse parse = null;
+        ParseStatus status = null;
+        try {
+          parser = ParserFactory.getParser(contentType, url);
+          parse = parser.getParse(content);
+          status = parse.getStatus();
+        } catch (Exception e) {
+          e.printStackTrace();
+          status = new ParseStatus(e);
+        }
+        if (status.isSuccess()) {
+          outputPage(new FetcherOutput(fle, MD5Hash.digest(content.getContent()),
+                  FetcherOutput.SUCCESS),
+                  content, new ParseText(parse.getText()), parse.getData());
+        } else {
+          LOG.info("fetch okay, but can't parse " + url + ", reason: "
+                  + status.toString());
+          outputPage(new FetcherOutput(fle, MD5Hash.digest(content.getContent()),
+                  FetcherOutput.CANT_PARSE),
+                  content, new ParseText(""),
+                  new ParseData("", new Outlink[0], new Properties()));
+        }
+        return status;
     }
 
     private void handleNoFetch(FetchListEntry fle, int status) {
@@ -429,7 +467,7 @@
     }
 
     // set log level
-    fetcher.setLogLevel(Level.parse(logLevel.toUpperCase()));
+    setLogLevel(Level.parse(logLevel.toUpperCase()));
 
     if (showThreadID) {
       LogFormatter.setShowThreadIDs(showThreadID);
Index: src/java/org/apache/nutch/protocol/Protocol.java
===================================================================
--- src/java/org/apache/nutch/protocol/Protocol.java	(revision 168215)
+++ src/java/org/apache/nutch/protocol/Protocol.java	(working copy)
@@ -18,13 +18,21 @@
 
 import java.io.IOException;
 
+import org.apache.nutch.pagedb.FetchListEntry;
+
 /** A retriever of url content.  Implemented by protocol extensions. */
 public interface Protocol {
   /** The name of the extension point. */
   public final static String X_POINT_ID = Protocol.class.getName();
 
-  /** Returns the {@link Content} for a url.
+  /** Returns the {@link Content} for a url. This method may be
+   * more limited than {@link #getProtocolOutput(FetchListEntry)}.
    * @throws IOException for any errors.
    */
-  Content getContent(String url) throws ProtocolException;
+  ProtocolOutput getProtocolOutput(String url);
+
+  /** Returns the {@link Content} for a fetchlist entry.
+   * @throws IOException for any errors.
+   */
+  ProtocolOutput getProtocolOutput(FetchListEntry fle);
 }
Index: src/java/org/apache/nutch/protocol/ProtocolStatus.java
===================================================================
--- src/java/org/apache/nutch/protocol/ProtocolStatus.java	(revision 0)
+++ src/java/org/apache/nutch/protocol/ProtocolStatus.java	(revision 0)
@@ -0,0 +1,100 @@
+/*
+ * Created on May 4, 2005
+ * Author: Andrzej Bialecki &lt;ab@getopt.org&gt;
+ *
+ */
+package org.apache.nutch.protocol;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.nutch.io.Writable;
+
+/**
+ * @author Andrzej Bialecki &lt;ab@getopt.org&gt;
+ */
+public class ProtocolStatus implements Writable {
+  
+  /** Content was retrieved without errors. */
+  public static final int SUCCESS              = 1;
+  /** Content was not retrieved. Any further errors may be indicated in args. */
+  public static final int FAILED               = 2;
+  
+  /** This protocol was not found.  Application may attempt to retry later. */
+  public static final int PROTO_NOT_FOUND      = 10;
+  /** Resource is gone. */
+  public static final int GONE                 = 11;
+  /** Resource has moved permanently. */
+  public static final int MOVED                = 12;
+  /** Resource has moved temporarily. */
+  public static final int TEMP_MOVED           = 13;
+  /** Resource was not found. */
+  public static final int NOT_FOUND            = 14;
+  /** Temporary failure. Application may retry immediately. */
+  public static final int RETRY                = 15;
+  /** Unspecified exception occured. Further information may be provided in args. */
+  public static final int EXCEPTION            = 16;
+  
+  public static final ProtocolStatus STATUS_SUCCESS = new ProtocolStatus(SUCCESS);
+  public static final ProtocolStatus STATUS_FAILED = new ProtocolStatus(FAILED);
+  
+  private int code;
+  private String[] args;
+  
+  public ProtocolStatus(int code, String[] args) {
+    this.code = code;
+    this.args = args;
+  }
+  
+  public ProtocolStatus(int code) {
+    this(code, null);
+  }
+  
+  public ProtocolStatus(int code, Object message) {
+    this.code = code;
+    this.args = new String[]{String.valueOf(message)};
+  }
+  
+  public ProtocolStatus(Throwable t) {
+    this(EXCEPTION, t);
+  }
+
+  public void readFields(DataInput in) throws IOException {
+  // TODO Auto-generated method stub
+
+  }
+  public void write(DataOutput out) throws IOException {
+  // TODO Auto-generated method stub
+
+  }
+
+  public String[] getArgs() {
+    return args;
+  }
+
+  public int getCode() {
+    return code;
+  }
+  
+  public String getMessage() {
+    if (args != null && args.length > 0) return args[0];
+    return null;
+  }
+  
+  public String toString() {
+    StringBuffer res = new StringBuffer();
+    res.append("(" + code + ")");
+    if (args != null) {
+      if (args.length == 1) {
+        res.append(": " + String.valueOf(args[0]));
+      } else {
+        for (int i = 0; i < args.length; i++) {
+          if (args[i] != null)
+            res.append(", args[" + i + "]=" + String.valueOf(args[i]));
+        }
+      }
+    }
+    return res.toString();
+  }
+}

Property changes on: src/java/org/apache/nutch/protocol/ProtocolStatus.java
___________________________________________________________________
Name: svn:eol-style
   + native

Index: src/java/org/apache/nutch/parse/ParserChecker.java
===================================================================
--- src/java/org/apache/nutch/parse/ParserChecker.java	(revision 168215)
+++ src/java/org/apache/nutch/parse/ParserChecker.java	(working copy)
@@ -67,7 +67,7 @@
     LOG.info("fetching: "+url);
 
     Protocol protocol = ProtocolFactory.getProtocol(url);
-    Content content = protocol.getContent(url);
+    Content content = protocol.getProtocolOutput(url).getContent();
 
     if (force) {
       content.setContentType(contentType);
Index: src/java/org/apache/nutch/parse/ParseData.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseData.java	(revision 168215)
+++ src/java/org/apache/nutch/parse/ParseData.java	(working copy)
@@ -21,7 +21,6 @@
 
 import org.apache.nutch.io.*;
 import org.apache.nutch.fs.*;
-import org.apache.nutch.util.*;
 import org.apache.nutch.tools.UpdateDatabaseTool;
 
 
Index: src/java/org/apache/nutch/parse/HtmlParseFilter.java
===================================================================
--- src/java/org/apache/nutch/parse/HtmlParseFilter.java	(revision 168215)
+++ src/java/org/apache/nutch/parse/HtmlParseFilter.java	(working copy)
@@ -30,6 +30,5 @@
 
   /** Adds metadata or otherwise modifies a parse of HTML content, given
    * the DOM tree of a page. */
-  Parse filter(Content content, Parse parse, DocumentFragment doc)
-    throws ParseException;
+  Parse filter(Content content, Parse parse, HTMLMetaTags metaTags, DocumentFragment doc);
 }
Index: src/java/org/apache/nutch/parse/HtmlParseFilters.java
===================================================================
--- src/java/org/apache/nutch/parse/HtmlParseFilters.java	(revision 168215)
+++ src/java/org/apache/nutch/parse/HtmlParseFilters.java	(working copy)
@@ -45,11 +45,11 @@
   private  HtmlParseFilters() {}                  // no public ctor
 
   /** Run all defined filters. */
-  public static Parse filter(Content content,Parse parse,DocumentFragment doc)
-    throws ParseException {
+  public static Parse filter(Content content, Parse parse, HTMLMetaTags metaTags, DocumentFragment doc) {
 
     for (int i = 0 ; i < CACHE.length; i++) {
-      parse = CACHE[i].filter(content, parse, doc);
+      parse = CACHE[i].filter(content, parse, metaTags, doc);
+      if (!parse.getStatus().isSuccess()) break;
     }
 
     return parse;
Index: src/java/org/apache/nutch/parse/Parse.java
===================================================================
--- src/java/org/apache/nutch/parse/Parse.java	(revision 168215)
+++ src/java/org/apache/nutch/parse/Parse.java	(working copy)
@@ -20,10 +20,14 @@
  * @see Parser#getParse(FetcherOutput,Content)
  */
 public interface Parse {
+  
   /** The textual content of the page. This is indexed, searched, and used when
    * generating snippets.*/ 
   String getText();
 
   /** Other data extracted from the page. */
   ParseData getData();
+  
+  /** Status of parsing. */
+  ParseStatus getStatus();
 }
Index: src/java/org/apache/nutch/parse/HTMLMetaTags.java
===================================================================
--- src/java/org/apache/nutch/parse/HTMLMetaTags.java	(revision 0)
+++ src/java/org/apache/nutch/parse/HTMLMetaTags.java	(revision 0)
@@ -0,0 +1,166 @@
+/*
+ * Created on Apr 28, 2005
+ * Author: Andrzej Bialecki &lt;ab@getopt.org&gt;
+ *
+ */
+package org.apache.nutch.parse;
+
+import java.net.URL;
+import java.util.Properties;
+
+/**
+ * @author Andrzej Bialecki &lt;ab@getopt.org&gt;
+ */
+public class HTMLMetaTags {
+  private boolean noIndex = false;
+
+  private boolean noFollow = false;
+
+  private boolean noCache = false;
+
+  private URL baseHref = null;
+
+  private boolean refresh = false;
+
+  private int refreshTime = 0;
+
+  private URL refreshHref = null;
+
+  private Properties generalTags = new Properties();
+
+  private Properties httpEquivTags = new Properties();
+
+  /**
+   * Sets all boolean values to <code>false</code>. Clears all other tags.
+   */
+  public void reset() {
+    noIndex = false;
+    noFollow = false;
+    noCache = false;
+    refresh = false;
+    refreshTime = 0;
+    baseHref = null;
+    refreshHref = null;
+    generalTags.clear();
+    httpEquivTags.clear();
+  }
+
+  /**
+   * Sets <code>noFollow</code> to <code>true</code>.
+   */
+  public void setNoFollow() {
+    noFollow = true;
+  }
+
+  /**
+   * Sets <code>noIndex</code> to <code>true</code>.
+   */
+  public void setNoIndex() {
+    noIndex = true;
+  }
+
+  /**
+   * Sets <code>noCache</code> to <code>true</code>.
+   */
+  public void setNoCache() {
+    noCache = true;
+  }
+
+  /**
+   * Sets <code>refresh</code> to <code>true</code>.
+   */
+  public void setRefresh() {
+    refresh = true;
+  }
+
+  /**
+   * Sets the <code>baseHref</code>.
+   */
+  public void setBaseHref(URL baseHref) {
+    this.baseHref = baseHref;
+  }
+
+  /**
+   * Sets the <code>refreshHref</code>.
+   */
+  public void setRefreshHref(URL refreshHref) {
+    this.refreshHref = refreshHref;
+  }
+
+  /**
+   * Sets the <code>refreshTime</code>.
+   */
+  public void setRefreshTime(int refreshTime) {
+    this.refreshTime = refreshTime;
+  }
+
+  /**
+   * A convenience method. Returns the current value of <code>noIndex</code>.
+   */
+  public boolean getNoIndex() {
+    return noIndex;
+  }
+
+  /**
+   * A convenience method. Returns the current value of <code>noFollow</code>.
+   */
+  public boolean getNoFollow() {
+    return noFollow;
+  }
+
+  /**
+   * A convenience method. Returns the current value of <code>noCache</code>.
+   */
+  public boolean getNoCache() {
+    return noCache;
+  }
+
+  /**
+   * A convenience method. Returns the current value of <code>refresh</code>.
+   */
+  public boolean getRefresh() {
+    return refresh;
+  }
+
+  /**
+   * A convenience method. Returns the <code>baseHref</code>, if set, or
+   * <code>null</code> otherwise.
+   */
+  public URL getBaseHref() {
+    return baseHref;
+  }
+
+  /**
+   * A convenience method. Returns the <code>refreshHref</code>, if set, or
+   * <code>null</code> otherwise. The value may be invalid if
+   * {@link #getRefresh()}returns <code>false</code>.
+   */
+  public URL getRefreshHref() {
+    return refreshHref;
+  }
+
+  /**
+   * A convenience method. Returns the current value of <code>refreshTime</code>.
+   * The value may be invalid if {@link #getRefresh()}returns
+   * <code>false</code>.
+   */
+  public int getRefreshTime() {
+    return refreshTime;
+  }
+
+  /**
+   * Returns all collected values of the general meta tags. Property names are
+   * tag names, property values are "content" values.
+   */
+  public Properties getGeneralTags() {
+    return generalTags;
+  }
+
+  /**
+   * Returns all collected values of the "http-equiv" meta tags. Property names
+   * are tag names, property values are "content" values.
+   */
+  public Properties getHttpEquivTags() {
+    return httpEquivTags;
+  }
+}
Index: src/java/org/apache/nutch/parse/ParseImpl.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseImpl.java	(revision 168215)
+++ src/java/org/apache/nutch/parse/ParseImpl.java	(working copy)
@@ -22,13 +22,23 @@
 public class ParseImpl implements Parse {
   private String text;
   private ParseData data;
+  private ParseStatus status;
 
+  public ParseImpl(String text, ParseData data, ParseStatus status) {
+    this.text = text;
+    this.data = data;
+    this.status = status;
+  }
+  
   public ParseImpl(String text, ParseData data) {
     this.text = text;
     this.data = data;
+    this.status = new ParseStatus(ParseStatus.SUCCESS);
   }
 
   public String getText() { return text; }
 
   public ParseData getData() { return data; }
+
+  public ParseStatus getStatus() { return status; }
 }
Index: src/java/org/apache/nutch/parse/Parser.java
===================================================================
--- src/java/org/apache/nutch/parse/Parser.java	(revision 168215)
+++ src/java/org/apache/nutch/parse/Parser.java	(working copy)
@@ -27,5 +27,5 @@
   public final static String X_POINT_ID = Parser.class.getName();
 
   /** Creates the parse for some content. */
-  Parse getParse(Content c) throws ParseException;
+  Parse getParse(Content c);
 }
Index: src/java/org/apache/nutch/parse/ParseStatus.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseStatus.java	(revision 0)
+++ src/java/org/apache/nutch/parse/ParseStatus.java	(revision 0)
@@ -0,0 +1,179 @@
+/*
+ * Created on Apr 28, 2005
+ * Author: Andrzej Bialecki &lt;ab@getopt.org&gt;
+ *
+ */
+package org.apache.nutch.parse;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.nutch.io.Writable;
+import org.apache.nutch.io.WritableUtils;
+
+/**
+ * @author Andrzej Bialecki &lt;ab@getopt.org&gt;
+ */
+public class ParseStatus implements Writable {
+  
+  // Primary status codes:
+  
+  /** Parsing succeeded. */
+  public static final byte SUCCESS         = 1;
+  /** General failure. There may be a more specific error message in arguments. */
+  public static final byte FAILED          = 2;
+  
+  public static final String[] majorCodes = {
+          "undefined",
+          "success",
+          "failed"
+  };
+  
+  // Secondary success codes go here:
+  
+  /** Parsed content contains a directive to redirect to another URL.
+   * The target URL can be retrieved from the arguments.
+   */
+  public static final short SUCCESS_REDIRECT          = 100;
+  
+  // Secondary failure codes go here:
+  
+  /** Parsing failed. An Exception occured (which may be retrieved from the arguments). */
+  public static final short FAILED_EXCEPTION          = 200;
+  /** Parsing failed. Invalid format - the content may be corrupted or of wrong type. */
+  public static final short FAILED_TRUNCATED          = 202;
+  /** Parsing failed. Invalid format - the content may be corrupted or of wrong type. */
+  public static final short FAILED_INVALID_FORMAT     = 203;
+  /** Parsing failed. Other related parts of the content are needed to complete
+   * parsing. The list of URLs to missing parts may be provided in arguments.
+   * The Fetcher may decide to fetch these parts at once, then put them into
+   * Content.metadata, and supply them for re-parsing.
+   */
+  public static final short FAILED_MISSING_PARTS      = 204;
+
+
+  public static final ParseStatus STATUS_SUCCESS = new ParseStatus(SUCCESS);
+  public static final ParseStatus STATUS_FAILURE = new ParseStatus(FAILED);
+  
+  private byte majorCode = 0;
+  private short minorCode = 0;
+  private String[] args = null;
+  
+  public ParseStatus(int majorCode, int minorCode, String[] args) {
+    this.args = args;
+    this.majorCode = (byte)majorCode;
+    this.minorCode = (short)minorCode;
+  }
+  
+  public ParseStatus(int majorCode) {
+    this(majorCode, 0, (String[])null);
+  }
+  
+  public ParseStatus(int majorCode, String[] args) {
+    this(majorCode, 0, args);
+  }
+  
+  public ParseStatus(int majorCode, int minorCode) {
+    this(majorCode, minorCode, (String[])null);
+  }
+  
+  /** Simplified constructor for passing just a text message. */
+  public ParseStatus(int majorCode, int minorCode, String message) {
+    this(majorCode, minorCode, new String[]{message});
+  }
+  
+  /** Simplified constructor for passing just a text message. */
+  public ParseStatus(int majorCode, String message) {
+    this(majorCode, 0, new String[]{message});
+  }
+  
+  public ParseStatus(Throwable t) {
+    this(FAILED, FAILED_EXCEPTION, new String[]{t.toString()});
+  }
+  
+  public void readFields(DataInput in) throws IOException {
+  }
+  
+  public void write(DataOutput out) throws IOException {
+    out.writeByte(majorCode);
+    out.writeShort(minorCode);
+    if (args != null && args.length > 0)
+      WritableUtils.writeCompressedStringArray(out, args);
+    else out.writeInt(0);
+  }
+  
+  /** A convenience method. Returns true if majorCode is SUCCESS, false
+   * otherwise.
+   */
+  
+  public boolean isSuccess() {
+    return majorCode == SUCCESS;
+  }
+  
+  /** A convenience method. Return a String representation of the first
+   * argument, or null.
+   */
+  public String getMessage() {
+    if (args != null && args.length > 0 && args[0] != null)
+      return args[0].toString();
+    return null;
+  }
+  
+  public String[] getArgs() {
+    return args;
+  }
+  
+  public int getMajorCode() {
+    return majorCode;
+  }
+  
+  public int getMinorCode() {
+    return minorCode;
+  }
+  
+  /** A convenience method. Creates an empty Parse instance,
+   * which returns this status.
+   */
+  public Parse getEmptyParse() {
+    return new EmptyParseImpl(this);
+  }
+  
+  public String toString() {
+    StringBuffer res = new StringBuffer();
+    res.append(majorCodes[majorCode] + "(" + majorCode + "," + minorCode + ")");
+    if (args != null) {
+      if (args.length == 1) {
+        res.append(": " + String.valueOf(args[0]));
+      } else {
+        for (int i = 0; i < args.length; i++) {
+          if (args[i] != null)
+            res.append(", args[" + i + "]=" + String.valueOf(args[i]));
+        }
+      }
+    }
+    return res.toString();
+  }
+}
+
+class EmptyParseImpl implements Parse {
+  
+  private ParseStatus status = null;
+  
+  public EmptyParseImpl(ParseStatus status) {
+    this.status = status;
+  }
+  
+  public ParseData getData() {
+    return null;
+  }
+
+  public ParseStatus getStatus() {
+    return status;
+  }
+
+  public String getText() {
+    return null;
+  }
+}
+
Index: src/plugin/parse-pdf/src/test/org/apache/nutch/parse/pdf/TestPdfParser.java
===================================================================
--- src/plugin/parse-pdf/src/test/org/apache/nutch/parse/pdf/TestPdfParser.java	(revision 168215)
+++ src/plugin/parse-pdf/src/test/org/apache/nutch/parse/pdf/TestPdfParser.java	(working copy)
@@ -64,7 +64,7 @@
       urlString = "file:" + sampleDir + fileSeparator + sampleFiles[i];
 
       protocol = ProtocolFactory.getProtocol(urlString);
-      content = protocol.getContent(urlString);
+      content = protocol.getProtocolOutput(urlString).getContent();
 
       parser = ParserFactory.getParser(content.getContentType(), urlString);
       parse = parser.getParse(content);
Index: src/plugin/parse-pdf/src/java/org/apache/nutch/parse/pdf/PdfParser.java
===================================================================
--- src/plugin/parse-pdf/src/java/org/apache/nutch/parse/pdf/PdfParser.java	(revision 168215)
+++ src/plugin/parse-pdf/src/java/org/apache/nutch/parse/pdf/PdfParser.java	(working copy)
@@ -27,6 +27,7 @@
 
 import org.apache.nutch.protocol.Content;
 import org.apache.nutch.util.LogFormatter;
+import org.apache.nutch.parse.ParseStatus;
 import org.apache.nutch.parse.Parser;
 import org.apache.nutch.parse.Parse;
 import org.apache.nutch.parse.ParseData;
@@ -79,13 +80,13 @@
     rootLogger.addAppender(appender);
   }
 
-  public Parse getParse(Content content) throws ParseException {
+  public Parse getParse(Content content) {
 
     // check that contentType is one we can handle
     String contentType = content.getContentType();
     if (contentType != null && !contentType.startsWith("application/pdf"))
-      throw new ParseException(
-        "Content-Type not application/pdf: "+contentType);
+      return new ParseStatus(ParseStatus.FAILED, ParseStatus.FAILED_INVALID_FORMAT,
+        "Content-Type not application/pdf: " + contentType).getEmptyParse();
 
     // in memory representation of pdf file
     PDDocument pdf = null;
@@ -100,8 +101,9 @@
       String contentLength = content.get("Content-Length");
       if (contentLength != null
             && raw.length != Integer.parseInt(contentLength)) {
-          throw new ParseException("Content truncated at "+raw.length
-            +" bytes. Parser can't handle incomplete pdf file.");
+          return new ParseStatus(ParseStatus.FAILED, ParseStatus.FAILED_TRUNCATED,
+                  "Content truncated at "+raw.length
+            +" bytes. Parser can't handle incomplete pdf file.").getEmptyParse();
       }
 
       PDFParser parser = new PDFParser(
@@ -134,14 +136,15 @@
       // formatDate(info.getCreationDate())
       // formatDate(info.getModificationDate())
 
-    } catch (ParseException e) {
-      throw e;
     } catch (CryptographyException e) {
-      throw new ParseException("Error decrypting document. "+e);
+      return new ParseStatus(ParseStatus.FAILED,
+              "Error decrypting document. " + e).getEmptyParse();
     } catch (InvalidPasswordException e) {
-      throw new ParseException("Can't decrypt document. "+e);
+      return new ParseStatus(ParseStatus.FAILED,
+              "Can't decrypt document - invalid password. " + e).getEmptyParse();
     } catch (Exception e) { // run time exception
-      throw new ParseException("Can't be handled as pdf document. "+e);
+      return new ParseStatus(ParseStatus.FAILED,
+              "Can't be handled as pdf document. " + e).getEmptyParse();
     } finally {
       try {
         if (pdf != null)
@@ -165,7 +168,7 @@
     metadata.putAll(content.getMetadata()); // copy through
 
     ParseData parseData = new ParseData(title, outlinks, metadata);
-    return new ParseImpl(text, parseData);
+    return new ParseImpl(text, parseData, ParseStatus.STATUS_SUCCESS);
     // any filter?
     //return HtmlParseFilters.filter(content, parse, root);
   }
Index: src/plugin/parse-msword/src/test/org/apache/nutch/parse/msword/TestMSWordParser.java
===================================================================
--- src/plugin/parse-msword/src/test/org/apache/nutch/parse/msword/TestMSWordParser.java	(revision 168215)
+++ src/plugin/parse-msword/src/test/org/apache/nutch/parse/msword/TestMSWordParser.java	(working copy)
@@ -64,7 +64,7 @@
       urlString = "file:" + sampleDir + fileSeparator + sampleFiles[i];
 
       protocol = ProtocolFactory.getProtocol(urlString);
-      content = protocol.getContent(urlString);
+      content = protocol.getProtocolOutput(urlString).getContent();
 
       parser = ParserFactory.getParser(content.getContentType(), urlString);
       parse = parser.getParse(content);
Index: src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/MSWordParser.java
===================================================================
--- src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/MSWordParser.java	(revision 168215)
+++ src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/MSWordParser.java	(working copy)
@@ -18,6 +18,7 @@
 
 import org.apache.nutch.protocol.Content;
 import org.apache.nutch.util.LogFormatter;
+import org.apache.nutch.parse.ParseStatus;
 import org.apache.nutch.parse.Parser;
 import org.apache.nutch.parse.Parse;
 import org.apache.nutch.parse.ParseData;
@@ -52,13 +53,13 @@
 
   public MSWordParser () {}
 
-  public Parse getParse(Content content) throws ParseException {
+  public Parse getParse(Content content) {
 
     // check that contentType is one we can handle
     String contentType = content.getContentType();
     if (contentType != null && !contentType.startsWith("application/msword"))
-      throw new ParseException(
-        "Content-Type not application/msword: "+contentType);
+      return new ParseStatus(ParseStatus.FAILED, ParseStatus.FAILED_INVALID_FORMAT,
+        "Content-Type not application/msword: " + contentType).getEmptyParse();
 
     String text = null;
     String title = null;
@@ -71,8 +72,9 @@
       String contentLength = content.get("Content-Length");
       if (contentLength != null
             && raw.length != Integer.parseInt(contentLength)) {
-          throw new ParseException("Content truncated at "+raw.length
-            +" bytes. Parser can't handle incomplete msword file.");
+          return new ParseStatus(ParseStatus.FAILED, ParseStatus.FAILED_TRUNCATED,
+                  "Content truncated at " + raw.length
+            +" bytes. Parser can't handle incomplete msword file.").getEmptyParse();
       }
 
       WordExtractor extractor = new WordExtractor();
@@ -86,13 +88,14 @@
       extractor = null;
 
     } catch (ParseException e) {
-      throw e;
+      return new ParseStatus(e).getEmptyParse();
     } catch (FastSavedException e) {
-      throw new ParseException(e);
+      return new ParseStatus(e).getEmptyParse();
     } catch (PasswordProtectedException e) {
-      throw new ParseException(e);
+      return new ParseStatus(e).getEmptyParse();
     } catch (Exception e) { // run time exception
-      throw new ParseException("Can't be handled as msword document. "+e);
+      return new ParseStatus(ParseStatus.FAILED,
+              "Can't be handled as msword document. " + e).getEmptyParse();
     } finally {
       // nothing so far
     }
@@ -117,7 +120,7 @@
     Outlink[] outlinks = new Outlink[0];
 
     ParseData parseData = new ParseData(title, outlinks, metadata);
-    return new ParseImpl(text, parseData);
+    return new ParseImpl(text, parseData, ParseStatus.STATUS_SUCCESS);
     // any filter?
     //return HtmlParseFilters.filter(content, parse, root);
   }
Index: src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/HTMLLanguageParser.java
===================================================================
--- src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/HTMLLanguageParser.java	(revision 168215)
+++ src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/HTMLLanguageParser.java	(working copy)
@@ -14,9 +14,9 @@
  * limitations under the License.
  */
 package org.apache.nutch.analysis.lang;
+import org.apache.nutch.parse.HTMLMetaTags;
 import org.apache.nutch.parse.Parse;
 import org.apache.nutch.parse.HtmlParseFilter;
-import org.apache.nutch.parse.ParseException;
 import org.apache.nutch.protocol.Content;
 import org.w3c.dom.*;
 
@@ -38,8 +38,7 @@
    * <li>3. meta http-equiv (content-language) (http://www.w3.org/TR/REC-html40/struct/global.html#h-7.4.4.2)
    * <br>Only the first occurence of language is stored.
    */
-  public Parse filter(Content content, Parse parse, DocumentFragment doc)
-    throws ParseException {
+  public Parse filter(Content content, Parse parse, HTMLMetaTags metaTags, DocumentFragment doc) {
     String lang = findLanguage(doc);
 
     if (lang != null) {
Index: src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/LanguageIdentifier.java
===================================================================
--- src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/LanguageIdentifier.java	(revision 168215)
+++ src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/LanguageIdentifier.java	(working copy)
@@ -239,7 +239,7 @@
     Protocol protocol;
     try {
       protocol = ProtocolFactory.getProtocol(url);
-      Content content = protocol.getContent(url);
+      Content content = protocol.getProtocolOutput(url).getContent();
       String contentType = content.getContentType();
       Parser parser = ParserFactory.getParser(contentType, url);
       Parse parse = parser.getParse(content);
Index: src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java
===================================================================
--- src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java	(revision 168215)
+++ src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java	(working copy)
@@ -19,22 +19,24 @@
 
 import org.apache.commons.net.ftp.FTPFileEntryParser;
 
+import org.apache.nutch.db.Page;
 import org.apache.nutch.net.protocols.HttpDateFormat;
 
 import org.apache.nutch.util.LogFormatter;
 import org.apache.nutch.util.NutchConf;
 
+import org.apache.nutch.pagedb.FetchListEntry;
 import org.apache.nutch.protocol.Content;
 import org.apache.nutch.protocol.Protocol;
+import org.apache.nutch.protocol.ProtocolOutput;
+import org.apache.nutch.protocol.ProtocolStatus;
 
 import java.util.logging.Level;
 import java.util.logging.Logger;
 
+import java.net.MalformedURLException;
 import java.net.URL;
 
-import java.io.InputStream;
-// 20040528, xing, disabled for now
-//import java.io.Reader;
 import java.io.IOException;
 
 /************************************
@@ -91,13 +93,13 @@
   }
 
   /** Set the timeout. */
-  public void setTimeout(int timeout) {
-    this.timeout = timeout;
+  public void setTimeout(int to) {
+    timeout = to;
   }
 
   /** Set the point at which content is truncated. */
   public void setMaxContentLength(int length) {
-    this.maxContentLength = length;
+    maxContentLength = length;
   }
 
   /** Set followTalk */
@@ -110,8 +112,19 @@
     this.keepConnection = keepConnection;
   }
 
-  public Content getContent(String urlString) throws FtpException {
+  public ProtocolOutput getProtocolOutput(String urlString) {
+    ProtocolOutput output = null;
     try {
+      return getProtocolOutput(new FetchListEntry(true,
+            new Page(urlString, 1.0f), new String[0]));
+    } catch (MalformedURLException mue) {
+      return new ProtocolOutput(null, new ProtocolStatus(mue));
+    }
+  }
+  
+  public ProtocolOutput getProtocolOutput(FetchListEntry fle) {
+    String urlString = fle.getUrl().toString();
+    try {
       URL url = new URL(urlString);
   
       int redirects = 0;
@@ -123,7 +136,7 @@
         int code = response.getCode();
   
         if (code == 200) {                          // got a good response
-          return response.toContent();              // return it
+          return new ProtocolOutput(response.toContent());              // return it
   
         } else if (code >= 300 && code < 400) {     // handle redirect
           if (redirects == MAX_REDIRECTS)
@@ -137,8 +150,8 @@
           throw new FtpError(code);
         }
       } 
-    } catch (IOException e) {
-      throw new FtpException(e);
+    } catch (Exception e) {
+      return new ProtocolOutput(null, new ProtocolStatus(e));
     }
   }
 
@@ -205,7 +218,7 @@
     // set log level
     LOG.setLevel(Level.parse((new String(logLevel)).toUpperCase()));
 
-    Content content = ftp.getContent(urlString);
+    Content content = ftp.getProtocolOutput(urlString).getContent();
 
     System.err.println("Content-Type: " + content.getContentType());
     System.err.println("Content-Length: " + content.get("Content-Length"));
Index: src/plugin/parse-text/src/java/org/apache/nutch/parse/text/TextParser.java
===================================================================
--- src/plugin/parse-text/src/java/org/apache/nutch/parse/text/TextParser.java	(revision 168215)
+++ src/plugin/parse-text/src/java/org/apache/nutch/parse/text/TextParser.java	(working copy)
@@ -23,7 +23,7 @@
 import org.apache.nutch.util.*;
 
 public class TextParser implements Parser {
-  public Parse getParse(Content content) throws ParseException {
+  public Parse getParse(Content content) {
     // copy content meta data through
     Properties metadata = new Properties();
     metadata.putAll(content.getMetadata());
@@ -37,7 +37,7 @@
       try {                                       // try to use named encoding
         text = new String(content.getContent(), encoding);
       } catch (java.io.UnsupportedEncodingException e) {
-        throw new ParseException(e);
+        return new ParseStatus(e).getEmptyParse();
       }
     } else {
       // FIXME: implement charset detector. This code causes problem when 
@@ -45,6 +45,6 @@
       text = new String(content.getContent());    // use default encoding
     }
 
-    return new ParseImpl(text, parseData);
+    return new ParseImpl(text, parseData, ParseStatus.STATUS_SUCCESS);
   }
 }
Index: src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCParseFilter.java
===================================================================
--- src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCParseFilter.java	(revision 168215)
+++ src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCParseFilter.java	(working copy)
@@ -252,19 +252,22 @@
 
   /** Adds metadata or otherwise modifies a parse of an HTML document, given
    * the DOM tree of a page. */
-  public Parse filter(Content content, Parse parse, DocumentFragment doc)
-    throws ParseException {
+  public Parse filter(Content content, Parse parse, HTMLMetaTags metaTags, DocumentFragment doc) {
 
     // construct base url
     URL base;
     try {
       base = new URL(content.getBaseUrl());
     } catch (MalformedURLException e) {
-      throw new ParseException(e);
+      return new ParseStatus(e).getEmptyParse();
     }
 
-    // extract license metadata
-    Walker.walk(doc, base, parse.getData().getMetadata());
+    try {
+      // extract license metadata
+      Walker.walk(doc, base, parse.getData().getMetadata());
+    } catch (ParseException e) {
+      return new ParseStatus(e).getEmptyParse();
+    }
 
     return parse;
   }
Index: src/plugin/parse-ext/src/test/org/apache/nutch/parse/ext/TestExtParser.java
===================================================================
--- src/plugin/parse-ext/src/test/org/apache/nutch/parse/ext/TestExtParser.java	(revision 168215)
+++ src/plugin/parse-ext/src/test/org/apache/nutch/parse/ext/TestExtParser.java	(working copy)
@@ -79,7 +79,7 @@
 
     // get nutch content
     Protocol protocol = ProtocolFactory.getProtocol(urlString);
-    content = protocol.getContent(urlString);
+    content = protocol.getProtocolOutput(urlString).getContent();
     protocol = null;
   }
 
Index: src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/ExtParser.java
===================================================================
--- src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/ExtParser.java	(revision 168215)
+++ src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/ExtParser.java	(working copy)
@@ -17,15 +17,14 @@
 package org.apache.nutch.parse.ext;
 
 import org.apache.nutch.protocol.Content;
+import org.apache.nutch.parse.ParseStatus;
 import org.apache.nutch.parse.Parser;
 import org.apache.nutch.parse.Parse;
 import org.apache.nutch.parse.ParseData;
 import org.apache.nutch.parse.ParseImpl;
 import org.apache.nutch.parse.Outlink;
-import org.apache.nutch.parse.ParseException;
 
 import org.apache.nutch.util.LogFormatter;
-import org.apache.nutch.util.NutchConf;
 import org.apache.nutch.util.CommandRunner;
 
 import org.apache.nutch.plugin.Extension;
@@ -88,14 +87,15 @@
 
   public ExtParser () {}
 
-  public Parse getParse(Content content) throws ParseException {
+  public Parse getParse(Content content) {
 
     String contentType = content.getContentType();
 
     String[] params = (String[]) TYPE_PARAMS_MAP.get(contentType);
     if (params == null)
-      throw new ParseException(
-        "No external command defined for contentType: " + contentType);
+      return new ParseImpl(null, null,
+              new ParseStatus(ParseStatus.FAILED,
+                      "No external command defined for contentType: " + contentType));
 
     String command = params[0];
     int timeout = Integer.parseInt(params[1]);
@@ -114,8 +114,10 @@
         (String)content.getMetadata().get("Content-Length");
       if (contentLength != null
             && raw.length != Integer.parseInt(contentLength)) {
-          throw new ParseException("Content truncated at "+raw.length
-            +" bytes. Parser can't handle incomplete "+contentType+" file.");
+          return new ParseStatus(ParseStatus.FAILED, ParseStatus.FAILED_TRUNCATED,
+                "Content truncated at " + raw.length
+            +" bytes. Parser can't handle incomplete "
+            + contentType + " file.").getEmptyParse();
       }
 
       ByteArrayOutputStream os = new ByteArrayOutputStream(BUFFER_SIZE);
@@ -133,15 +135,14 @@
       cr.evaluate();
 
       if (cr.getExitValue() != 0)
-        throw new ParseException("External command "+command
-          +" failed with error: "+es.toString());
+        return new ParseStatus(ParseStatus.FAILED,
+                        "External command " + command
+                        + " failed with error: " + es.toString()).getEmptyParse();
 
       text = os.toString();
 
-    } catch (ParseException e) {
-      throw e;
     } catch (Exception e) { // run time exception
-      throw new ParseException("ExtParser failed. "+e);
+      return new ParseStatus(e).getEmptyParse();
     }
 
     if (text == null)
@@ -158,7 +159,7 @@
     metaData.putAll(content.getMetadata()); // copy through
 
     ParseData parseData = new ParseData(title, outlinks, metaData);
-    return new ParseImpl(text, parseData);
+    return new ParseImpl(text, parseData, ParseStatus.STATUS_SUCCESS);
   }
 
 }
Index: src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestRobotsMetaProcessor.java
===================================================================
--- src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestRobotsMetaProcessor.java	(revision 168215)
+++ src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestRobotsMetaProcessor.java	(working copy)
@@ -18,7 +18,8 @@
 
 import junit.framework.TestCase;
 
-import org.apache.nutch.parse.html.RobotsMetaProcessor.*;
+import org.apache.nutch.parse.HTMLMetaTags;
+import org.apache.nutch.parse.html.HTMLMetaProcessor.*;
 
 import java.io.ByteArrayInputStream;
 import java.net.URL;
@@ -28,7 +29,7 @@
 import org.w3c.dom.*;
 import org.apache.html.dom.*;
 
-/** Unit tests for RobotsMetaProcessor. */
+/** Unit tests for HTMLMetaProcessor. */
 public class TestRobotsMetaProcessor extends TestCase {
   public TestRobotsMetaProcessor(String name) { 
     super(name); 
@@ -157,8 +158,8 @@
         e.printStackTrace();
       }
 
-      RobotsMetaIndicator robotsMeta= new RobotsMetaIndicator();
-      RobotsMetaProcessor.getRobotsMetaDirectives(robotsMeta, node, 
+      HTMLMetaTags robotsMeta= new HTMLMetaTags();
+      HTMLMetaProcessor.getMetaTags(robotsMeta, node, 
                                                   currURLsAndAnswers[i][0]);
 
       assertTrue("got index wrong on test " + i,
Index: src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java
===================================================================
--- src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java	(revision 168215)
+++ src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java	(working copy)
@@ -35,7 +35,7 @@
 import org.apache.nutch.protocol.Content;
 import org.apache.nutch.util.*;
 import org.apache.nutch.parse.*;
-import org.apache.nutch.parse.html.RobotsMetaProcessor.*;
+import org.apache.nutch.parse.html.HTMLMetaProcessor.*;
 
 
 public class HtmlParser implements Parser {
@@ -94,7 +94,7 @@
   private static String defaultCharEncoding =
     NutchConf.get().get("parser.character.encoding.default", "windows-1252");
 
-  public Parse getParse(Content content) throws ParseException {
+  public Parse getParse(Content content) {
     DOMParser parser = new DOMParser();
     
     // some plugins, e.g., creativecommons, need to examine html comments
@@ -103,13 +103,13 @@
                         true);
     } catch (SAXException e) {}
 
-    RobotsMetaIndicator robotsMeta = new RobotsMetaIndicator();
+    HTMLMetaTags metaTags = new HTMLMetaTags();
 
     URL base;
     try {
       base = new URL(content.getBaseUrl());
     } catch (MalformedURLException e) {
-      throw new ParseException(e);
+      return new ParseStatus(e).getEmptyParse();
     }
 
     String text = "";
@@ -120,7 +120,8 @@
     // check that contentType is one we can handle
     String contentType = content.getContentType();
     if (!"".equals(contentType) && !contentType.startsWith("text/html"))
-      throw new ParseException("Content-Type not text/html: " + contentType);
+      return new ParseStatus(ParseStatus.FAILED, ParseStatus.FAILED_INVALID_FORMAT,
+              "Content-Type not text/html: " + contentType).getEmptyParse();
     
     // parse the content
     DocumentFragment root;
@@ -173,18 +174,18 @@
       root = doc.createDocumentFragment();
       root.appendChild(doc.getDocumentElement());
     } catch (IOException e) {
-      throw new ParseException(e);
+      return new ParseStatus(e).getEmptyParse();
     } catch (DOMException e) {
-      throw new ParseException(e);
+      return new ParseStatus(e).getEmptyParse();
     } catch (SAXException e) {
-      throw new ParseException(e);
+      return new ParseStatus(e).getEmptyParse();
     }
       
     // get meta directives
-    RobotsMetaProcessor.getRobotsMetaDirectives(robotsMeta, root, base);
+    HTMLMetaProcessor.getMetaTags(metaTags, root, base);
       
     // check meta directives
-    if (!robotsMeta.getNoIndex()) {               // okay to index
+    if (!metaTags.getNoIndex()) {               // okay to index
       StringBuffer sb = new StringBuffer();
       LOG.fine("Getting text...");
       DOMContentUtils.getText(sb, root);          // extract text
@@ -195,7 +196,7 @@
       title = sb.toString().trim();
     }
       
-    if (!robotsMeta.getNoFollow()) {              // okay to follow links
+    if (!metaTags.getNoFollow()) {              // okay to follow links
       ArrayList l = new ArrayList();              // extract outlinks
       URL baseTag = DOMContentUtils.getBase(root);
       LOG.fine("Getting links...");
@@ -204,7 +205,7 @@
       LOG.fine("found "+outlinks.length+" outlinks in "+content.getUrl());
     }
     
-    if (!robotsMeta.getNoCache()) {             // okay to cache
+    if (!metaTags.getNoCache()) {             // okay to cache
       // ??? FIXME ???
     }
     
@@ -212,10 +213,10 @@
     metadata.putAll(content.getMetadata());
 
     ParseData parseData = new ParseData(title, outlinks, metadata);
-    Parse parse = new ParseImpl(text, parseData);
+    Parse parse = new ParseImpl(text, parseData, new ParseStatus(ParseStatus.SUCCESS));
 
     // run filters on parse
-    return HtmlParseFilters.filter(content, parse, root);
+    return HtmlParseFilters.filter(content, parse, metaTags, root);
   }
 
   public static void main(String[] args) throws Exception {
Index: src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HTMLMetaProcessor.java
===================================================================
--- src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HTMLMetaProcessor.java	(revision 0)
+++ src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HTMLMetaProcessor.java	(revision 0)
@@ -0,0 +1,180 @@
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.parse.html;
+
+import java.net.URL;
+import java.util.Properties;
+
+import org.apache.nutch.parse.HTMLMetaTags;
+import org.w3c.dom.*;
+
+/**
+ * Class for parsing META Directives from DOM trees.  This class
+ * handles specifically Robots META directives (all, none, nofollow,
+ * noindex), finding BASE HREF tags, and HTTP-EQUIV no-cache
+ * instructions. All meta directives are stored in a HTMLMetaTags instance.
+ */
+public class HTMLMetaProcessor {
+
+  /**
+   * Utility class with indicators for the robots directives "noindex"
+   * and "nofollow", and HTTP-EQUIV/no-cache
+   */
+  
+  /**
+   * Sets the indicators in <code>robotsMeta</code> to appropriate
+   * values, based on any META tags found under the given
+   * <code>node</code>.
+   */
+  public static final void getMetaTags (
+    HTMLMetaTags metaTags, Node node, URL currURL) {
+
+    metaTags.reset();
+    getMetaTagsHelper(metaTags, node, currURL);
+  }
+
+  private static final void getMetaTagsHelper(
+    HTMLMetaTags metaTags, Node node, URL currURL) {
+
+    if (node.getNodeType() == Node.ELEMENT_NODE) {
+
+      if ("BODY".equals(node.getNodeName())) {
+        // META tags should not be under body
+        return;
+      }
+
+      if ("META".equals(node.getNodeName())) {
+        NamedNodeMap attrs = node.getAttributes();
+        Node nameNode = attrs.getNamedItem("name");
+        Node equivNode = attrs.getNamedItem("http-equiv");
+        Node contentNode = attrs.getNamedItem("content");
+
+        if (nameNode != null) {
+          if (contentNode != null) {
+            String name = nameNode.getNodeValue().toLowerCase();
+            metaTags.getGeneralTags().setProperty(name, contentNode.getNodeValue());
+            if ("robots".equals(name)) {
+  
+              if (contentNode != null) {
+                String directives = 
+                  contentNode.getNodeValue().toLowerCase();
+                int index = directives.indexOf("none");
+  
+                if (index >= 0) {
+                  metaTags.setNoIndex();
+                  metaTags.setNoFollow();
+                }
+  
+                index = directives.indexOf("all");
+                if (index >= 0) {
+                  // do nothing...
+                }
+  
+                index = directives.indexOf("noindex");
+                if (index >= 0) {
+                  metaTags.setNoIndex();
+                }
+  
+                index = directives.indexOf("nofollow");
+                if (index >= 0) {
+                  metaTags.setNoFollow();
+                }
+              } 
+  
+            } // end if (name == robots)
+          }
+        }
+
+        if (equivNode != null) {
+          if (contentNode != null) {
+            String name = equivNode.getNodeValue().toLowerCase();
+            String content = contentNode.getNodeValue();
+            metaTags.getHttpEquivTags().setProperty(name, content);
+            if ("pragma".equals(name)) {
+              content = content.toLowerCase();
+              int index = content.indexOf("no-cache");
+              if (index >= 0) 
+                metaTags.setNoCache();
+            } else if ("refresh".equals(name)) {
+              int idx = content.indexOf(';');
+              String time = null;
+              if (idx == -1) { // just the refresh time
+                time = content;
+              } else time = content.substring(0, idx);
+              try {
+                metaTags.setRefreshTime(Integer.parseInt(time));
+                // skip this if we couldn't parse the time
+                metaTags.setRefresh();
+              } catch (Exception e) {
+                ;
+              }
+              if (metaTags.getRefresh() && idx != -1) { // set the URL
+                idx = content.indexOf("URL=");
+                if (idx != -1) {
+                  String url = content.substring(idx + 4);
+                  URL refreshUrl = null;
+                  try {
+                    refreshUrl = new URL(url);
+                  } catch (Exception e) {
+                    try {
+                      refreshUrl = new URL(currURL, url);
+                    } catch (Exception e1) {
+                      ;
+                    }
+                  }
+                  metaTags.setRefreshHref(refreshUrl);
+                }
+              }
+            }
+          }
+        }
+
+      } else if ("BASE".equalsIgnoreCase(node.getNodeName())) {
+        NamedNodeMap attrs = node.getAttributes();
+        Node hrefNode = attrs.getNamedItem("href");
+
+        if (hrefNode != null) {
+          String urlString = hrefNode.getNodeValue();
+
+          URL url = null;
+          try {
+            if (currURL == null)
+              url = new URL(urlString);
+            else 
+              url = new URL(currURL, urlString);
+          } catch (Exception e) {
+            ;
+          }
+
+          if (url != null) 
+            metaTags.setBaseHref(url);
+        }
+
+      }
+
+    }
+
+    NodeList children = node.getChildNodes();
+    if (children != null) {
+      int len = children.getLength();
+      for (int i = 0; i < len; i++) {
+        getMetaTagsHelper(metaTags, children.item(i), currURL);
+      }
+    }
+  }
+
+}
Index: src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/Http.java
===================================================================
--- src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/Http.java	(revision 168215)
+++ src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/Http.java	(working copy)
@@ -16,7 +16,7 @@
 
 package org.apache.nutch.protocol.http;
 
-import java.io.*;
+import java.net.MalformedURLException;
 import java.net.URL;
 import java.net.InetAddress;
 import java.net.UnknownHostException;
@@ -28,13 +28,15 @@
 import org.apache.nutch.util.LogFormatter;
 import org.apache.nutch.util.NutchConf;
 
+import org.apache.nutch.db.Page;
+import org.apache.nutch.pagedb.FetchListEntry;
 import org.apache.nutch.protocol.*;
 
-/** An implementation of the Http protocol. */
+/** An implementation of the Https protocol. */
 public class Http implements Protocol {
 
   public static final Logger LOG =
-    LogFormatter.getLogger("org.apache.nutch.net.Http");
+    LogFormatter.getLogger("org.apache.nutch.net.Https");
 
   static {
     if (NutchConf.get().getBoolean("http.verbose", false))
@@ -170,8 +172,19 @@
     }
   }
 
-  public Content getContent(String urlString) throws ProtocolException {
+  public ProtocolOutput getProtocolOutput(String urlString) {
+    ProtocolOutput output = null;
     try {
+      return getProtocolOutput(new FetchListEntry(true,
+            new Page(urlString, 1.0f), new String[0]));
+    } catch (MalformedURLException mue) {
+      return new ProtocolOutput(null, new ProtocolStatus(mue));
+    }
+  }
+  
+  public ProtocolOutput getProtocolOutput(FetchListEntry fle) {
+    String urlString = fle.getUrl().toString();
+    try {
       URL url = new URL(urlString);
 
       int redirects = 0;
@@ -191,10 +204,10 @@
         int code = response.getCode();
         
         if (code == 200) {                        // got a good response
-          return response.toContent();            // return it
+          return new ProtocolOutput(response.toContent());            // return it
           
         } else if (code == 410) {                 // page is gone
-          throw new ResourceGone(url, "Http: " + code);
+          throw new ResourceGone(url, "Https: " + code);
 
         } else if (code >= 300 && code < 400) {   // handle redirect
           if (redirects == MAX_REDIRECTS)
@@ -207,8 +220,8 @@
           throw new HttpError(code);
         }
       }
-    } catch (IOException e) {
-      throw new HttpException(e);
+    } catch (Exception e) {
+      return new ProtocolOutput(null, new ProtocolStatus(e));
     } 
   }
 
@@ -259,7 +272,7 @@
     boolean verbose = false;
     String url = null;
 
-    String usage = "Usage: Http [-verbose] [-timeout N] url";
+    String usage = "Usage: Https [-verbose] [-timeout N] url";
 
     if (args.length == 0) {
       System.err.println(usage);
@@ -285,7 +298,7 @@
       LOG.setLevel(Level.FINE);
     }
 
-    Content content = http.getContent(url);
+    Content content = http.getProtocolOutput(url).getContent();
 
     System.out.println("Content Type: " + content.getContentType());
     System.out.println("Content Length: " + content.get("Content-Length"));
Index: src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java
===================================================================
--- src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java	(revision 168215)
+++ src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java	(working copy)
@@ -247,7 +247,7 @@
     ByteArrayOutputStream out = new ByteArrayOutputStream(Http.BUFFER_SIZE);
 
     while (!doneChunks) {
-      Http.LOG.fine("Http: starting chunk");
+      Http.LOG.fine("Https: starting chunk");
 
       readLine(in, line, false);
 
Index: src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java
===================================================================
--- src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java	(revision 168215)
+++ src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java	(working copy)
@@ -17,24 +17,24 @@
 package org.apache.nutch.protocol.file;
 
 
+import org.apache.nutch.db.Page;
 import org.apache.nutch.net.protocols.HttpDateFormat;
 
 import org.apache.nutch.util.LogFormatter;
 import org.apache.nutch.util.NutchConf;
 
+import org.apache.nutch.pagedb.FetchListEntry;
 import org.apache.nutch.protocol.Content;
 import org.apache.nutch.protocol.Protocol;
+import org.apache.nutch.protocol.ProtocolOutput;
+import org.apache.nutch.protocol.ProtocolStatus;
 
 import java.util.logging.Level;
 import java.util.logging.Logger;
 
+import java.net.MalformedURLException;
 import java.net.URL;
 
-import java.io.InputStream;
-// 20040528, xing, disabled for now
-//import java.io.Reader;
-import java.io.IOException;
-
 /************************************
  * File.java deals with file: scheme.
  *
@@ -65,10 +65,21 @@
   }
 
   /** Set the point at which content is truncated. */
-  public void setMaxContentLength(int length) {this.maxContentLength = length;}
+  public void setMaxContentLength(int length) {maxContentLength = length;}
 
-  public Content getContent(String urlString) throws FileException {
+  public ProtocolOutput getProtocolOutput(String urlString) {
+    ProtocolOutput output = null;
     try {
+      return getProtocolOutput(new FetchListEntry(true,
+            new Page(urlString, 1.0f), new String[0]));
+    } catch (MalformedURLException mue) {
+      return new ProtocolOutput(null, new ProtocolStatus(mue));
+    }
+  }
+  
+  public ProtocolOutput getProtocolOutput(FetchListEntry fle) {
+    String urlString = fle.getUrl().toString();
+    try {
       URL url = new URL(urlString);
   
       int redirects = 0;
@@ -80,7 +91,7 @@
         int code = response.getCode();
   
         if (code == 200) {                          // got a good response
-          return response.toContent();              // return it
+          return new ProtocolOutput(response.toContent());              // return it
   
         } else if (code >= 300 && code < 400) {     // handle redirect
           if (redirects == MAX_REDIRECTS)
@@ -94,8 +105,8 @@
           throw new FileError(code);
         }
       } 
-    } catch (IOException e) {
-      throw new FileException(e);
+    } catch (Exception e) {
+      return new ProtocolOutput(null, new ProtocolStatus(e));
     }
   }
 
@@ -139,7 +150,7 @@
     // set log level
     LOG.setLevel(Level.parse((new String(logLevel)).toUpperCase()));
 
-    Content content = file.getContent(urlString);
+    Content content = file.getProtocolOutput(urlString).getContent();
 
     System.err.println("Content-Type: " + content.getContentType());
     System.err.println("Content-Length: " + content.get("Content-Length"));
