Index: src/test/org/apache/nutch/metadata/TestMetadata.java
===================================================================
--- src/test/org/apache/nutch/metadata/TestMetadata.java	(revision 0)
+++ src/test/org/apache/nutch/metadata/TestMetadata.java	(revision 0)
@@ -0,0 +1,268 @@
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.metadata;
+
+// JDK imports
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.util.Properties;
+import junit.framework.Test;
+
+// JUnit imports
+import junit.framework.TestCase;
+import junit.framework.TestSuite;
+import junit.textui.TestRunner;
+
+// Nutch imports
+import org.apache.nutch.metadata.Metadata;
+
+
+/**
+ * JUnit based tests of class {@link org.apache.nutch.metadata.Metadata}.
+ *
+ * @author Chris Mattmann
+ * @author J&eacute;r&ocirc;me Charron
+ */
+public class TestMetadata extends TestCase {
+
+  
+  public TestMetadata(String testName) {
+    super(testName);
+  }
+  
+  public static Test suite() {
+    return new TestSuite(TestMetadata.class);
+  }
+  
+  public static void main(String[] args) {
+    TestRunner.run(suite());
+  }
+  
+
+  /** Test for the <code>getNormalizedName(String)</code> method. */
+  public void testGetNormalizedName() {
+    assertEquals("Content-Type", Metadata.getNormalizedName("Content-Type"));
+    assertEquals("Content-Type", Metadata.getNormalizedName("ContentType"));
+    assertEquals("Content-Type", Metadata.getNormalizedName("Content-type"));
+    assertEquals("Content-Type", Metadata.getNormalizedName("contenttype"));
+    assertEquals("Content-Type", Metadata.getNormalizedName("contentype"));
+    assertEquals("Content-Type", Metadata.getNormalizedName("contntype"));
+  }
+
+  /** Test for the <code>add(String, String)</code> method. */
+  public void testAdd() {
+    String[] values = null;
+    Metadata meta = new Metadata();
+
+    values = meta.getValues("contentype");
+    assertEquals(0, values.length);
+
+    meta.add("contentype", "value1");
+    values = meta.getValues("contentype");
+    assertEquals(1, values.length);
+    assertEquals("value1", values[0]);
+    
+    meta.add("Content-Type", "value2");
+    values = meta.getValues("contentype");
+    assertEquals(2, values.length);
+    assertEquals("value1", values[0]);
+    assertEquals("value2", values[1]);
+
+    // NOTE : For now, the same value can be added many times.
+    //        Should it be changed?
+    meta.add("ContentType", "value1");
+    values = meta.getValues("Content-Type");
+    assertEquals(3, values.length);
+    assertEquals("value1", values[0]);
+    assertEquals("value2", values[1]);
+    assertEquals("value1", values[2]);
+  }
+
+  /** Test for the <code>set(String, String)</code> method. */
+  public void testSet() {
+    String[] values = null;
+    Metadata meta = new Metadata();
+
+    values = meta.getValues("contentype");
+    assertEquals(0, values.length);
+
+    meta.set("contentype", "value1");
+    values = meta.getValues("contentype");
+    assertEquals(1, values.length);
+    assertEquals("value1", values[0]);
+    
+    meta.set("Content-Type", "value2");
+    values = meta.getValues("contentype");
+    assertEquals(1, values.length);
+    assertEquals("value2", values[0]);
+    
+    meta.set("contenttype", "new value 1");
+    meta.add("contenttype", "new value 2");
+    values = meta.getValues("contentype");
+    assertEquals(2, values.length);
+    assertEquals("new value 1", values[0]);
+    assertEquals("new value 2", values[1]);
+  }
+  
+  /** Test for <code>setAll(Properties)</code> method */
+  public void testSetProperties() {
+    String[] values = null;
+    Metadata meta = new Metadata();
+    Properties props = new Properties();
+    
+    meta.setAll(props);
+    assertEquals(0, meta.size());
+    
+    props.setProperty("name-one", "value1.1");
+    meta.setAll(props);
+    assertEquals(1, meta.size());
+    values = meta.getValues("name-one");
+    assertEquals(1, values.length);
+    assertEquals("value1.1", values[0]);
+    
+    props.setProperty("name-two", "value2.1");
+    meta.setAll(props);
+    assertEquals(2, meta.size());
+    values = meta.getValues("name-one");
+    assertEquals(1, values.length);
+    assertEquals("value1.1", values[0]);
+    values = meta.getValues("name-two");
+    assertEquals(1, values.length);
+    assertEquals("value2.1", values[0]);
+  }
+    
+  /** Test for <code>get(String)</code> method */
+  public void testGet() {
+    String[] values = null;
+    Metadata meta = new Metadata();
+    assertNull(meta.get("a-name"));
+    
+    meta.add("a-name", "value-1");
+    assertEquals("value-1", meta.get("a-name"));
+    meta.add("a-name", "value-2");
+    assertEquals("value-1", meta.get("a-name"));
+  }
+    
+  /** Test for <code>isMultiValued()</code> method */
+  public void testIsMultiValued() {
+    Metadata meta = new Metadata();
+    assertFalse(meta.isMultiValued("key"));
+    meta.add("key", "value1");
+    assertFalse(meta.isMultiValued("key"));
+    meta.add("key", "value2");
+    assertTrue(meta.isMultiValued("key"));
+  }
+
+  /** Test for <code>names</code> method */
+  public void testNames() {
+    String[] names = null;
+    Metadata meta = new Metadata();
+    names = meta.names();
+    assertEquals(0, names.length);
+    
+    meta.add("name-one", "value");
+    names = meta.names();
+    assertEquals(1, names.length);
+    assertEquals("name-one", names[0]);
+    meta.add("name-two", "value");
+    names = meta.names();
+    assertEquals(2, names.length);
+  }
+  
+  /** Test for <code>remove(String)</code> method */
+  public void testRemove() {
+    Metadata meta = new Metadata();
+    meta.remove("name-one");
+    assertEquals(0, meta.size());
+    meta.add("name-one", "value-1.1");
+    meta.add("name-one", "value-1.2");
+    meta.add("name-two", "value-2.2");
+    assertEquals(2, meta.size());
+    assertNotNull(meta.get("name-one"));
+    assertNotNull(meta.get("name-two"));
+    meta.remove("name-one");
+    assertEquals(1, meta.size());
+    assertNull(meta.get("name-one"));
+    assertNotNull(meta.get("name-two"));
+    meta.remove("name-two");
+    assertEquals(0, meta.size());
+    assertNull(meta.get("name-one"));
+    assertNull(meta.get("name-two"));
+  }
+
+  /** Test for <code>equals(Object)</code> method */
+  public void testObject() {
+    Metadata meta1 = new Metadata();
+    Metadata meta2 = new Metadata();
+    assertFalse(meta1.equals(null));
+    assertFalse(meta1.equals("String"));
+    assertTrue(meta1.equals(meta2));
+    meta1.add("name-one", "value-1.1");
+    assertFalse(meta1.equals(meta2));
+    meta2.add("name-one", "value-1.1");
+    assertTrue(meta1.equals(meta2));
+    meta1.add("name-one", "value-1.2");
+    assertFalse(meta1.equals(meta2));
+    meta2.add("name-one", "value-1.2");
+    assertTrue(meta1.equals(meta2));
+    meta1.add("name-two", "value-2.1");
+    assertFalse(meta1.equals(meta2));
+    meta2.add("name-two", "value-2.1");
+    assertTrue(meta1.equals(meta2));
+    meta1.add("name-two", "value-2.2");
+    assertFalse(meta1.equals(meta2));
+    meta2.add("name-two", "value-2.x");
+    assertFalse(meta1.equals(meta2));
+  }
+  
+  /** Test for <code>Writable</code> implementation */
+  public void testWritable() {
+    Metadata result = null;
+    Metadata meta = new Metadata();
+    result = writeRead(meta);
+    assertEquals(0, result.size());
+    meta.add("name-one", "value-1.1");
+    result = writeRead(meta);
+    assertEquals(1, result.size());
+    assertEquals(1, result.getValues("name-one").length);
+    assertEquals("value-1.1", result.get("name-one"));
+    meta.add("name-two", "value-2.1");
+    meta.add("name-two", "value-2.2");
+    result = writeRead(meta);
+    assertEquals(2, result.size());
+    assertEquals(1, result.getValues("name-one").length);
+    assertEquals("value-1.1", result.getValues("name-one")[0]);
+    assertEquals(2, result.getValues("name-two").length);
+    assertEquals("value-2.1", result.getValues("name-two")[0]);
+    assertEquals("value-2.2", result.getValues("name-two")[1]);
+  }
+  
+  private Metadata writeRead(Metadata meta) {
+    Metadata readed = new Metadata();
+    try {
+      ByteArrayOutputStream out = new ByteArrayOutputStream();
+      meta.write(new DataOutputStream(out));
+      readed.readFields(new DataInputStream(new ByteArrayInputStream(out.toByteArray())));
+    } catch (IOException ioe) {
+      fail(ioe.toString());
+    }
+    return readed;
+  }
+  	
+}

Property changes on: src/test/org/apache/nutch/metadata/TestMetadata.java
___________________________________________________________________
Name: svn:eol-style
   + native

Index: src/test/org/apache/nutch/protocol/TestContentProperties.java
===================================================================
--- src/test/org/apache/nutch/protocol/TestContentProperties.java	(revision 375984)
+++ src/test/org/apache/nutch/protocol/TestContentProperties.java	(working copy)
@@ -1,65 +0,0 @@
-/**
- * Copyright 2005 The Apache Software Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nutch.protocol;
-
-import org.apache.nutch.util.WritableTestUtils;
-import org.apache.nutch.util.NutchConfiguration;
-
-import junit.framework.TestCase;
-
-public class TestContentProperties extends TestCase {
-
-    public void testOneValue() throws Exception {
-        ContentProperties properties = new ContentProperties();
-        String value = "aValue";
-        properties.setProperty("aKey", value);
-        assertEquals(value, properties.get("aKey"));
-        assertEquals(value, properties.get("akey"));
-    }
-
-    public void testMultiValue() throws Exception {
-        ContentProperties properties = new ContentProperties();
-        String value = "aValue";
-        for (int i = 0; i < 100; i++) {
-            properties.setProperty("aKey", value + i);
-
-        }
-        assertEquals(value + 99, properties.get("aKey"));
-        assertEquals(value + 99, properties.getProperty("aKey"));
-        String[] propertie = properties.getProperties("aKey");
-        for (int i = 0; i < 100; i++) {
-            assertEquals(value + i, propertie[i]);
-
-        }
-    }
-
-    public void testSerialization() throws Exception {
-        ContentProperties properties = new ContentProperties();
-        for (int i = 0; i < 10; i++) {
-            properties.setProperty("key", "" + i);
-        }
-        WritableTestUtils.testWritable(properties);
-        Content content = new Content("url", "url", new byte[0], "text/html",
-                new ContentProperties(), NutchConfiguration.create());
-        ContentProperties metadata = content.getMetadata();
-        for (int i = 0; i < 100; i++) {
-            metadata.setProperty("aKey", "" + i);
-        }
-        WritableTestUtils.testWritable(content);
-    }
-	
-}
Index: src/test/org/apache/nutch/protocol/TestContent.java
===================================================================
--- src/test/org/apache/nutch/protocol/TestContent.java	(revision 375984)
+++ src/test/org/apache/nutch/protocol/TestContent.java	(working copy)
@@ -16,13 +16,14 @@
 
 package org.apache.nutch.protocol;
 
+import org.apache.nutch.metadata.Metadata;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.nutch.util.NutchConfiguration;
-
 import org.apache.nutch.util.WritableTestUtils;
 
 import junit.framework.TestCase;
 
+
 /** Unit tests for Content. */
 
 public class TestContent extends TestCase {
@@ -37,9 +38,9 @@
 
     String url = "http://www.foo.com/";
 
-    ContentProperties metaData = new ContentProperties();
-    metaData.put("Host", "www.foo.com");
-    metaData.put("Content-Type", "text/html");
+    Metadata metaData = new Metadata();
+    metaData.add("Host", "www.foo.com");
+    metaData.add("Content-Type", "text/html");
 
     Content r = new Content(url, url, page.getBytes("UTF8"), "text/html",
                             metaData, conf);
@@ -47,12 +48,13 @@
     WritableTestUtils.testWritable(r);
     assertEquals("text/html", r.getMetadata().get("Content-Type"));
     assertEquals("text/html", r.getMetadata().get("content-type"));
+    assertEquals("text/html", r.getMetadata().get("CONTENTYPE"));
   }
 
   /** Unit tests for getContentType(String, String, byte[]) method. */
   public void testGetContentType() throws Exception {
     Content c = null;
-    ContentProperties p = new ContentProperties();
+    Metadata p = new Metadata();
 
     c = new Content("http://www.foo.com/",
                     "http://www.foo.com/",
Index: src/test/org/apache/nutch/parse/TestParseData.java
===================================================================
--- src/test/org/apache/nutch/parse/TestParseData.java	(revision 375984)
+++ src/test/org/apache/nutch/parse/TestParseData.java	(working copy)
@@ -20,7 +20,7 @@
 import org.apache.nutch.util.NutchConfiguration;
 
 import org.apache.nutch.util.WritableTestUtils;
-import org.apache.nutch.protocol.ContentProperties;
+import org.apache.nutch.metadata.Metadata;
 
 import junit.framework.TestCase;
 
@@ -41,9 +41,9 @@
       new Outlink("http://bar.com/", "Bar", conf)
     };
 
-    ContentProperties metaData = new ContentProperties();
-    metaData.put("Language", "en/us");
-    metaData.put("Charset", "UTF-8");
+    Metadata metaData = new Metadata();
+    metaData.add("Language", "en/us");
+    metaData.add("Charset", "UTF-8");
 
     ParseData r = new ParseData(ParseStatus.STATUS_SUCCESS, title, outlinks, metaData);
     r.setConf(conf);
Index: src/java/org/apache/nutch/metadata/Metadata.java
===================================================================
--- src/java/org/apache/nutch/metadata/Metadata.java	(revision 0)
+++ src/java/org/apache/nutch/metadata/Metadata.java	(revision 0)
@@ -0,0 +1,318 @@
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.metadata;
+
+// JDK imports
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.lang.reflect.Field;
+import java.lang.reflect.Modifier;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Enumeration;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+// Commons Lang imports
+import org.apache.commons.lang.StringUtils;
+
+// Hadoop imports
+import org.apache.hadoop.io.UTF8;
+import org.apache.hadoop.io.Writable;
+
+
+/**
+ * A syntax tolerant and multi-valued metadata container.
+ *
+ * All the static String fields declared by this class are used as reference
+ * names for syntax correction on meta-data naming.
+ *
+ * @author Chris Mattmann
+ * @author J&eacute;r&ocirc;me Charron
+ */
+public class Metadata implements CreativeCommons,
+                                 DublinCore,
+                                 HttpHeaders,
+                                 Nutch,
+                                 Office,
+                                 Writable {
+  
+
+  /** Used to format DC dates for the DATE metadata field */
+  public final static SimpleDateFormat DATE_FORMAT = 
+          new SimpleDateFormat("yyyy-MM-dd");
+  
+    
+  private final static Map NAMES_IDX = new HashMap();
+  private static String[] normalized = null;
+
+  // Uses self introspection to fill the metanames index and the
+  // metanames list.
+  static {
+    Field[] fields = Metadata.class.getFields();
+    for (int i=0; i<fields.length; i++) {
+      int mods = fields[i].getModifiers();
+      if (Modifier.isFinal(mods) &&
+          Modifier.isPublic(mods) &&
+          Modifier.isStatic(mods) &&
+          fields[i].getType().equals(String.class)) {
+        try {
+          String val = (String) fields[i].get(null);
+          NAMES_IDX.put(normalize(val), val);
+        } catch (Exception e) {
+          // Simply ignore...
+        }
+      }
+    }
+    normalized = (String[]) NAMES_IDX.keySet().toArray(new String[NAMES_IDX.size()]);
+  }
+  
+  
+  /** A map of all metadata attributes */
+  private Map metadata = null;
+
+  
+  /** Constructs a new, empty metadata. */
+  public Metadata() {
+    metadata = new HashMap();
+  }
+
+  /**
+   */
+  public boolean isMultiValued(String name) {
+    return getValues(name).length > 1;
+  }
+
+  /**
+   * Returns an array of the names contained in the metadata.
+   */
+  public String[] names() {
+    Iterator iter = metadata.keySet().iterator();
+    List names = new ArrayList();
+    while(iter.hasNext()) {
+      names.add(getNormalizedName((String) iter.next()));
+    }
+    return (String[]) names.toArray(new String[names.size()]);
+  }
+  
+  /**
+   * Get the value associated to a metadata name.
+   * If many values are assiociated to the specified name, then the first
+   * one is returned.
+   *
+   * @param name of the metadata.
+   * @return the value associated to the specified metadata name.
+   */
+  public String get(String name) {
+    Object values = metadata.get(getNormalizedName(name));
+    if ((values != null) && (values instanceof List)) {
+      return (String) ((List) values).get(0);
+    } else {
+      return (String) values;
+    }
+  }
+
+  /**
+   * Get the values associated to a metadata name.
+   * @param name of the metadata.
+   * @return the values associated to a metadata name.
+   */
+  public String[] getValues(String name) {
+    Object values = metadata.get(getNormalizedName(name));
+    if (values != null) {
+      if (values instanceof List) {
+        List list = (List) values;
+        return (String[]) list.toArray(new String[list.size()]);
+      } else {
+        return new String[] { (String) values };
+      }
+    }
+    return new String[0];
+  }
+  
+  /**
+   * Add a metadata name/value mapping.
+   * Add the specified value to the list of values associated to the
+   * specified metadata name.
+   *
+   * @param name the metadata name.
+   * @param value the metadata value.
+   */
+  public void add(String name, String value) {
+    String normalized = getNormalizedName(name);
+    Object values = metadata.get(normalized);
+    if (values != null) {
+      if (values instanceof String) {
+        List list = new ArrayList();
+        list.add(values);
+        list.add(value);
+        metadata.put(normalized, list);
+      } else if (values instanceof List) {
+        ((List) values).add(value);
+      }
+    } else {
+      metadata.put(normalized, value);
+    }
+  }
+
+  public void setAll(Properties properties) {
+    Enumeration names = properties.propertyNames();
+    while (names.hasMoreElements()) {
+      String name = (String) names.nextElement();
+      set(name, properties.getProperty(name));
+    }
+  }
+  
+  /**
+   * Set metadata name/value.
+   * Associate the specified value to the specified metadata name. If some
+   * previous values were associated to this name, they are removed.
+   *
+   * @param name the metadata name.
+   * @param value the metadata value.
+   */
+  public void set(String name, String value) {
+    remove(name);
+    add(name, value);
+  }
+
+  /**
+   * Remove a metadata and all its associated values.
+   */
+  public void remove(String name) {
+    metadata.remove(getNormalizedName(name));
+  }
+  
+  /**
+   * Returns the number of metadata names in this metadata.
+   */
+  public int size() {
+    return metadata.size();
+  }
+  
+  // Inherited Javadoc
+  public boolean equals(Object o) {
+    
+    if (o == null) { return false; }
+    
+    Metadata other = null;
+    try {
+      other = (Metadata) o;
+    } catch (ClassCastException cce) {
+      return false;
+    }
+    
+    if (other.size() != size()) { return false; }
+    
+    String[] names = names();
+    for (int i=0; i<names.length; i++) {
+      String[] otherValues = other.getValues(names[i]);
+      String[] thisValues = getValues(names[i]);
+      if (otherValues.length != thisValues.length) {
+        return false;
+      }
+      for (int j=0; j<otherValues.length; j++) {
+        if (!otherValues[j].equals(thisValues[j])) {
+          return false;
+        }
+      }
+    }
+    return true;
+  }
+  
+  
+  /**
+   * Get the normalized name of metadata attribute name.
+   * This method tries to find a well-known metadata name (one of the
+   * metadata names defined in this class) that matches the specified name.
+   * The matching is error tolerent. For instance,
+   * <ul>
+   *  <li>content-type gives Content-Type</li>
+   *  <li>CoNtEntType  gives Content-Type</li>
+   *  <li>ConTnTtYpe   gives Content-Type</li>
+   * </ul>
+   * If no matching with a well-known metadata name is found, then the original
+   * name is returned.
+   */
+  public static String getNormalizedName(String name) {
+    String searched = normalize(name);
+    String value = (String) NAMES_IDX.get(searched);
+
+    if ((value == null) && (normalized != null)) {
+      int threshold = searched.length() / 3;
+      for (int i=0; i<normalized.length && value == null; i++) {
+        if (StringUtils.getLevenshteinDistance(searched, normalized[i]) < threshold) {
+          value = (String) NAMES_IDX.get(normalized[i]);
+        }
+      }
+    }
+    return (value != null) ? value : name;
+  }
+    
+  private final static String normalize(String str) {
+    char c;
+    StringBuffer buf = new StringBuffer();
+    for (int i=0; i<str.length(); i++) {
+      c = str.charAt(i);
+      if (Character.isLetter(c)) {
+        buf.append(Character.toLowerCase(c));
+      }
+    }
+    return buf.toString();
+  }
+
+  
+  /* ------------------------- *
+   * <implementation:Writable> *
+   * ------------------------- */
+  
+  // Inherited Javadoc
+  public final void write(DataOutput out) throws IOException {
+    out.writeInt(size());
+    String[] values = null;
+    String[] names = names();
+    for (int i=0; i<names.length; i++) {
+      UTF8.writeString(out, names[i]);
+      values = getValues(names[i]);
+      out.writeInt(values.length);
+      for (int j=0; j<values.length; j++) {
+        UTF8.writeString(out, values[j]);
+      }
+    }
+  }
+
+  // Inherited Javadoc
+  public final void readFields(DataInput in) throws IOException {
+    int keySize = in.readInt();
+    String key;
+    for (int i=0; i<keySize; i++) {
+      key = UTF8.readString(in);
+      int valueSize = in.readInt();
+      for (int j=0; j<valueSize; j++) {
+        add(key, UTF8.readString(in));
+      }
+    }
+  }
+
+  /* -------------------------- *
+   * </implementation:Writable> *
+   * -------------------------- */
+   
+}

Property changes on: src/java/org/apache/nutch/metadata/Metadata.java
___________________________________________________________________
Name: svn:eol-style
   + native

Index: src/java/org/apache/nutch/metadata/Nutch.java
===================================================================
--- src/java/org/apache/nutch/metadata/Nutch.java	(revision 0)
+++ src/java/org/apache/nutch/metadata/Nutch.java	(revision 0)
@@ -0,0 +1,33 @@
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.metadata;
+
+
+/**
+ * A collection of Nutch internal metadata constants.
+ *
+ * @author Chris Mattmann
+ * @author J&eacute;r&ocirc;me Charron
+ */
+public interface Nutch {
+  
+  public static final String ORIGINAL_CHAR_ENCODING =
+          "OriginalCharEncoding";
+  
+  public static final String CHAR_ENCODING_FOR_CONVERSION =
+          "CharEncodingForConversion";
+    
+}

Property changes on: src/java/org/apache/nutch/metadata/Nutch.java
___________________________________________________________________
Name: svn:eol-style
   + native

Index: src/java/org/apache/nutch/metadata/DublinCore.java
===================================================================
--- src/java/org/apache/nutch/metadata/DublinCore.java	(revision 0)
+++ src/java/org/apache/nutch/metadata/DublinCore.java	(revision 0)
@@ -0,0 +1,163 @@
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.metadata;
+
+
+/**
+ * A collection of Dublin Core metadata names.
+ *
+ * @see <a href="http://dublincore.org">dublincore.org</a> 
+ *
+ * @author Chris Mattmann
+ * @author J&eacute;r&ocirc;me Charron
+ */
+public interface DublinCore {
+  
+    
+  /**
+   * Typically, Format may include the media-type or dimensions of the
+   * resource. Format may be used to determine the software, hardware or other
+   * equipment needed to display or operate the resource. Examples of
+   * dimensions include size and duration. Recommended best practice is to
+   * select a value from a controlled vocabulary (for example, the list of
+   * Internet Media Types [MIME] defining computer media formats).
+   */
+  public static final String FORMAT = "format";
+  
+  /**
+   * Recommended best practice is to identify the resource by means of a
+   * string or number conforming to a formal identification system. Example
+   * formal identification systems include the Uniform Resource Identifier
+   * (URI) (including the Uniform Resource Locator (URL)), the Digital Object
+   * Identifier (DOI) and the International Standard Book Number (ISBN).
+   */
+  public static final String IDENTIFIER = "identifier";
+  
+  /**
+   * Date on which the resource was changed.
+   */
+  public static final String MODIFIED = "modified";
+  
+  /**
+   * An entity responsible for making contributions to the content of the
+   * resource. Examples of a Contributor include a person, an organisation, or
+   * a service. Typically, the name of a Contributor should be used to
+   * indicate the entity.
+   */
+  public static final String CONTRIBUTOR = "contributor";
+  
+  /**
+   * The extent or scope of the content of the resource. Coverage will
+   * typically include spatial location (a place name or geographic
+   * coordinates), temporal period (a period label, date, or date range) or
+   * jurisdiction (such as a named administrative entity). Recommended best
+   * practice is to select a value from a controlled vocabulary (for example,
+   * the Thesaurus of Geographic Names [TGN]) and that, where appropriate,
+   * named places or time periods be used in preference to numeric identifiers
+   * such as sets of coordinates or date ranges.
+   */
+  public static final String COVERAGE = "coverage";
+  
+  /**
+   * An entity primarily responsible for making the content of the resource.
+   * Examples of a Creator include a person, an organisation, or a service.
+   * Typically, the name of a Creator should be used to indicate the entity.
+   */
+  public static final String CREATOR = "creator";
+  
+  /**
+   * A date associated with an event in the life cycle of the resource.
+   * Typically, Date will be associated with the creation or availability of
+   * the resource. Recommended best practice for encoding the date value is
+   * defined in a profile of ISO 8601 [W3CDTF] and follows the YYYY-MM-DD
+   * format.
+   */
+  public static final String DATE = "date";
+  
+  /**
+   * An account of the content of the resource. Description may include but is
+   * not limited to: an abstract, table of contents, reference to a graphical
+   * representation of content or a free-text account of the content.
+   */
+  public static final String DESCRIPTION = "description";
+  
+  /**
+   * A language of the intellectual content of the resource. Recommended best
+   * practice is to use RFC 3066 [RFC3066], which, in conjunction with ISO 639
+   * [ISO639], defines two- and three-letter primary language tags with
+   * optional subtags. Examples include "en" or "eng" for English, "akk" for
+   * Akkadian, and "en-GB" for English used in the United Kingdom.
+   */
+  public static final String LANGUAGE = "language";
+  
+  /**
+   * An entity responsible for making the resource available. Examples of a
+   * Publisher include a person, an organisation, or a service. Typically, the
+   * name of a Publisher should be used to indicate the entity.
+   */
+  public static final String PUBLISHER = "publisher";
+  
+  /**
+   * A reference to a related resource. Recommended best practice is to
+   * reference the resource by means of a string or number conforming to a
+   * formal identification system.
+   */
+  public static final String RELATION = "relation";
+  
+  /**
+   * Information about rights held in and over the resource. Typically, a
+   * Rights element will contain a rights management statement for the
+   * resource, or reference a service providing such information. Rights
+   * information often encompasses Intellectual Property Rights (IPR),
+   * Copyright, and various Property Rights. If the Rights element is absent,
+   * no assumptions can be made about the status of these and other rights
+   * with respect to the resource.
+   */
+  public static final String RIGHTS = "rights";
+  
+  /**
+   * A reference to a resource from which the present resource is derived. The
+   * present resource may be derived from the Source resource in whole or in
+   * part. Recommended best practice is to reference the resource by means of
+   * a string or number conforming to a formal identification system.
+   */
+  public static final String SOURCE = "source";
+  
+  /**
+   * The topic of the content of the resource. Typically, a Subject will be
+   * expressed as keywords, key phrases or classification codes that describe
+   * a topic of the resource. Recommended best practice is to select a value
+   * from a controlled vocabulary or formal classification scheme.
+   */
+  public static final String SUBJECT = "subject";
+  
+  /**
+   * A name given to the resource. Typically, a Title will be a name by which
+   * the resource is formally known.
+   */
+  public static final String TITLE = "title";
+  
+  /**
+   * The nature or genre of the content of the resource. Type includes terms
+   * describing general categories, functions, genres, or aggregation levels
+   * for content. Recommended best practice is to select a value from a
+   * controlled vocabulary (for example, the DCMI Type Vocabulary [DCMITYPE]).
+   * To describe the physical or digital manifestation of the resource, use
+   * the Format element.
+   */
+  public static final String TYPE = "type";
+  
+}

Property changes on: src/java/org/apache/nutch/metadata/DublinCore.java
___________________________________________________________________
Name: svn:eol-style
   + native

Index: src/java/org/apache/nutch/metadata/HttpHeaders.java
===================================================================
--- src/java/org/apache/nutch/metadata/HttpHeaders.java	(revision 0)
+++ src/java/org/apache/nutch/metadata/HttpHeaders.java	(revision 0)
@@ -0,0 +1,48 @@
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.metadata;
+
+
+/**
+ * A collection of HTTP header names.
+ *
+ * @see <a href="http://rfc-ref.org/RFC-TEXTS/2616/">Hypertext Transfer
+ *      Protocol -- HTTP/1.1 (RFC 2616)</a>
+ *
+ * @author Chris Mattmann
+ * @author J&eacute;r&ocirc;me Charron
+ */
+public interface HttpHeaders {
+
+  public final static String CONTENT_ENCODING = "Content-Encoding";
+  
+  public final static String CONTENT_LANGUAGE = "Content-Language";
+
+  public final static String CONTENT_LENGTH = "Content-Length";
+  
+  public final static String CONTENT_LOCATION = "Content-Location";
+  
+  public static final String CONTENT_DISPOSITION = "Content-Disposition";
+
+  public final static String CONTENT_MD5 = "Content-MD5";
+  
+  public final static String CONTENT_TYPE = "Content-Type";
+  
+  public final static String LAST_MODIFIED = "Last-Modified";
+  
+  public final static String LOCATION = "Location";
+
+}

Property changes on: src/java/org/apache/nutch/metadata/HttpHeaders.java
___________________________________________________________________
Name: svn:eol-style
   + native

Index: src/java/org/apache/nutch/metadata/Office.java
===================================================================
--- src/java/org/apache/nutch/metadata/Office.java	(revision 0)
+++ src/java/org/apache/nutch/metadata/Office.java	(revision 0)
@@ -0,0 +1,51 @@
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.metadata;
+
+
+/**
+ * A collection of <i>"Office"</i> documents properties names.
+ *
+ * @author Chris Mattmann
+ * @author J&eacute;r&ocirc;me Charron
+ */
+public interface Office {
+    
+  public static final String KEYWORDS = "Keywords";
+  
+  public static final String COMMENTS = "Comments";
+  
+  public static final String LAST_AUTHOR = "Last-Author";
+  
+  public static final String APPLICATION_NAME = "Application-Name";
+  
+  public static final String CHARACTER_COUNT = "Character Count";
+  
+  public static final String LAST_PRINTED = "Last-Printed";
+  
+  public static final String LAST_SAVED = "Last-Save-Date";
+  
+  public static final String PAGE_COUNT = "Page-Count";
+  
+  public static final String REVISION_NUMBER = "Revision-Number";
+  
+  public static final String WORD_COUNT = "Word-Count";
+  
+  public static final String TEMPLATE = "Template";
+  
+  public static final String AUTHOR = "Author";
+  
+}

Property changes on: src/java/org/apache/nutch/metadata/Office.java
___________________________________________________________________
Name: svn:eol-style
   + native

Index: src/java/org/apache/nutch/metadata/CreativeCommons.java
===================================================================
--- src/java/org/apache/nutch/metadata/CreativeCommons.java	(revision 0)
+++ src/java/org/apache/nutch/metadata/CreativeCommons.java	(revision 0)
@@ -0,0 +1,35 @@
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.metadata;
+
+
+/**
+ * A collection of Creative Commons properties names.
+ *
+ * @see <a href="http://www.creativecommons.org/">creativecommons.org</a>
+ *
+ * @author Chris Mattmann
+ * @author J&eacute;r&ocirc;me Charron
+ */
+public interface CreativeCommons {
+  
+  public final static String LICENSE_URL = "License-Url";
+  
+  public final static String LICENSE_LOCATION = "License-Location";
+  
+  public final static String WORK_TYPE = "Work-Type";
+  
+}

Property changes on: src/java/org/apache/nutch/metadata/CreativeCommons.java
___________________________________________________________________
Name: svn:eol-style
   + native

Index: src/java/org/apache/nutch/fetcher/Fetcher.java
===================================================================
--- src/java/org/apache/nutch/fetcher/Fetcher.java	(revision 375984)
+++ src/java/org/apache/nutch/fetcher/Fetcher.java	(working copy)
@@ -27,6 +27,7 @@
 
 import org.apache.nutch.crawl.CrawlDatum;
 import org.apache.nutch.crawl.SignatureFactory;
+import org.apache.nutch.metadata.Metadata;
 import org.apache.nutch.net.*;
 import org.apache.nutch.protocol.*;
 import org.apache.nutch.parse.*;
@@ -208,14 +209,14 @@
 
       if (content == null) {
         String url = key.toString();
-        content = new Content(url, url, new byte[0], "", new ContentProperties(), this.conf);
+        content = new Content(url, url, new byte[0], "", new Metadata(), this.conf);
       }
+      Metadata metadata = content.getMetadata();
+      // add segment to metadata
+      metadata.set(SEGMENT_NAME_KEY, segmentName);
+      // add score to metadata
+      metadata.set(SCORE_KEY, Float.toString(datum.getScore()));
 
-      content.getMetadata().setProperty           // add segment to metadata
-        (SEGMENT_NAME_KEY, segmentName);
-      content.getMetadata().setProperty           // add score to metadata
-        (SCORE_KEY, Float.toString(datum.getScore()));
-
       Parse parse = null;
       if (parsing && status == CrawlDatum.STATUS_FETCH_SUCCESS) {
         ParseStatus parseStatus;
@@ -232,11 +233,8 @@
         // Calculate page signature. For non-parsing fetchers this will
         // be done in ParseSegment
         byte[] signature = SignatureFactory.getSignature(getConf()).calculate(content, parse);
-        parse.getData().getMetadata().setProperty(SIGNATURE_KEY, StringUtil.toHexString(signature));
+        metadata.set(SIGNATURE_KEY, StringUtil.toHexString(signature));
         datum.setSignature(signature);
-        // add segment name and score to parseData metadata
-        parse.getData().getMetadata().setProperty(SEGMENT_NAME_KEY, segmentName);
-        parse.getData().getMetadata().setProperty(SCORE_KEY, Float.toString(datum.getScore()));
       }
 
       try {
Index: src/java/org/apache/nutch/servlet/Cached.java
===================================================================
--- src/java/org/apache/nutch/servlet/Cached.java	(revision 375984)
+++ src/java/org/apache/nutch/servlet/Cached.java	(working copy)
@@ -16,7 +16,8 @@
 
 package org.apache.nutch.servlet;
 
-import org.apache.nutch.protocol.ContentProperties;
+import org.apache.nutch.metadata.Metadata;
+import org.apache.nutch.net.protocols.Response;
 import org.apache.nutch.searcher.NutchBean;
 import org.apache.nutch.searcher.Hit;
 import org.apache.nutch.searcher.HitDetails;
@@ -74,10 +75,10 @@
     byte[] bytes = bean.getContent(details);
 
     // pass all original headers? only these for now.
-    ContentProperties metaData = bean.getParseData(details).getMetadata();
-    String contentType = (String) metaData.get("Content-Type");
-    //String lastModified = (String) metaData.get("Last-Modified");
-    //String contentLength = (String) metaData.get("Content-Length");
+    Metadata metadata = bean.getParseData(details).getContentMeta();
+    String contentType = metadata.get(Response.CONTENT_TYPE);
+    //String lastModified = metadata.get(Metadata.LAST_MODIFIED);
+    //String contentLength = metadata.get(Metadata.CONTENT_LENGTH);
     // better use this, since it may have been truncated during fetch
     // or give warning if they don't match?
     int contentLength = bytes.length;
Index: src/java/org/apache/nutch/net/protocols/Response.java
===================================================================
--- src/java/org/apache/nutch/net/protocols/Response.java	(revision 375984)
+++ src/java/org/apache/nutch/net/protocols/Response.java	(working copy)
@@ -19,14 +19,15 @@
 import java.net.URL;
 
 // Nutch imports
-import org.apache.nutch.protocol.ContentProperties;
+import org.apache.nutch.metadata.HttpHeaders;
+import org.apache.nutch.metadata.Metadata;
 
 
 /**
  * A response inteface.  Makes all protocols model HTTP.
  */
-public interface Response {
-
+public interface Response extends HttpHeaders {
+  
   /** Returns the URL used to retrieve this response. */
   public URL getUrl();
 
@@ -37,7 +38,7 @@
   public String getHeader(String name);
 
   /** Returns all the headers. */
-  public ContentProperties getHeaders();
+  public Metadata getHeaders();
   
   /** Returns the full content of the response. */
   public byte[] getContent();
Index: src/java/org/apache/nutch/indexer/Indexer.java
===================================================================
--- src/java/org/apache/nutch/indexer/Indexer.java	(revision 375984)
+++ src/java/org/apache/nutch/indexer/Indexer.java	(working copy)
@@ -38,6 +38,7 @@
 
 import org.apache.lucene.index.*;
 import org.apache.lucene.document.*;
+import org.apache.nutch.metadata.Metadata;
 
 /** Create indexes for segments. */
 public class Indexer extends Configured implements Reducer {
@@ -196,15 +197,15 @@
     }
 
     Document doc = new Document();
-    ContentProperties meta = parseData.getMetadata();
+    Metadata metadata = parseData.getContentMeta();
     String[] anchors = inlinks!=null ? inlinks.getAnchors() : new String[0];
 
     // add segment, used to map from merged index back to segment files
     doc.add(Field.UnIndexed("segment",
-                            meta.getProperty(Fetcher.SEGMENT_NAME_KEY)));
+                            metadata.get(Fetcher.SEGMENT_NAME_KEY)));
 
     // add digest, used by dedup
-    doc.add(Field.UnIndexed("digest", meta.getProperty(Fetcher.SIGNATURE_KEY)));
+    doc.add(Field.UnIndexed("digest", metadata.get(Fetcher.SIGNATURE_KEY)));
 
     // boost is opic
     float boost = (float)Math.pow(dbDatum.getScore(), scorePower);
Index: src/java/org/apache/nutch/protocol/ContentProperties.java
===================================================================
--- src/java/org/apache/nutch/protocol/ContentProperties.java	(revision 375984)
+++ src/java/org/apache/nutch/protocol/ContentProperties.java	(working copy)
@@ -1,203 +0,0 @@
-/**
- * Copyright 2005 The Apache Software Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nutch.protocol;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Enumeration;
-import java.util.Iterator;
-import java.util.Properties;
-import java.util.TreeMap;
-
-import org.apache.hadoop.io.UTF8;
-import org.apache.hadoop.io.Writable;
-
-/**
- * writable case insensitive properties
- */
-public class ContentProperties extends TreeMap implements Writable {
-
-    /**
-     * construct the TreeMap with a case insensitive comparator
-     */
-    public ContentProperties() {
-        super(String.CASE_INSENSITIVE_ORDER);
-    }
-
-    /**
-     * initialize with default values
-     * 
-     * @param defaults
-     */
-    public ContentProperties(Properties defaults) {
-        super(String.CASE_INSENSITIVE_ORDER);
-        putAll(defaults);
-    }
-
-    /**
-     * @param key
-     * @return the property value or null
-     */
-    public String getProperty(String key) {
-        return (String) get(key);
-    }
-
-    /*
-     * (non-Javadoc)
-     * 
-     * @see java.util.Map#get(java.lang.Object)
-     */
-    public Object get(Object arg0) {
-        Object object = super.get(arg0);
-        if (object != null && object instanceof ArrayList) {
-            ArrayList list = (ArrayList) object;
-            return list.get(list.size() - 1);
-        }
-        return object;
-    }
-
-    /**
-     * @param key
-     * @return the properties as a string array if there is no such property we
-     *         retunr a array with 0 entries
-     */
-    public String[] getProperties(String key) {
-        Object object = super.get(key);
-        if (object != null && !(object instanceof ArrayList)) {
-            return new String[] { (String) object };
-        } else if (object != null && object instanceof ArrayList) {
-            ArrayList list = (ArrayList) object;
-            return (String[]) list.toArray(new String[list.size()]);
-        }
-        return new String[0];
-    }
-
-    /**
-     * sets the key value tuple
-     * 
-     * @param key
-     * @param value
-     */
-    public void setProperty(String key, String value) {
-        Object object = super.get(key);
-        if (object != null && !(object instanceof ArrayList)) {
-            ArrayList arrayList = new ArrayList();
-            arrayList.add(object);
-            arrayList.add(value);
-            put(key, arrayList);
-        } else if (object instanceof ArrayList) {
-            ((ArrayList) object).add(value);
-        } else {
-            put(key, value);
-        }
-
-    }
-
-    public Enumeration propertyNames() {
-        return new KeyEnumeration(keySet().iterator());
-    }
-
-    class KeyEnumeration implements Enumeration {
-
-        private Iterator fIterator;
-
-        public KeyEnumeration(Iterator iterator) {
-            fIterator = iterator;
-        }
-
-        public boolean hasMoreElements() {
-            return fIterator.hasNext();
-
-        }
-
-        public Object nextElement() {
-            return fIterator.next();
-        }
-
-    }
-
-    /*
-     * (non-Javadoc)
-     * 
-     * @see org.apache.hadoop.io.Writable#write(java.io.DataOutput)
-     */
-    public final void write(DataOutput out) throws IOException {
-        out.writeInt(keySet().size());
-        Iterator iterator = keySet().iterator();
-        String key;
-        String[] properties;
-        while (iterator.hasNext()) {
-            key = (String) iterator.next();
-            UTF8.writeString(out, key);
-            properties = getProperties(key);
-            out.writeInt(properties.length);
-            for (int i = 0; i < properties.length; i++) {
-                UTF8.writeString(out, properties[i]);
-            }
-        }
-
-    }
-
-    /*
-     * (non-Javadoc)
-     * 
-     * @see org.apache.hadoop.io.Writable#readFields(java.io.DataInput)
-     */
-    public final void readFields(DataInput in) throws IOException {
-        int keySize = in.readInt();
-        String key;
-        for (int i = 0; i < keySize; i++) {
-            key = UTF8.readString(in);
-            int valueSize = in.readInt();
-            for (int j = 0; j < valueSize; j++) {
-                setProperty(key, UTF8.readString(in));
-            }
-        }
-    }
-
-    /*
-     * (non-Javadoc)
-     * 
-     * @see java.lang.Object#equals(java.lang.Object)
-     */
-    public boolean equals(Object obj) {
-        if (!(obj instanceof ContentProperties)) {
-            return false;
-        }
-        ContentProperties properties = (ContentProperties) obj;
-        Enumeration enumeration = properties.propertyNames();
-        while (enumeration.hasMoreElements()) {
-            String key = (String) enumeration.nextElement();
-            String[] values = properties.getProperties(key);
-            String[] myValues = getProperties(key);
-            if (values.length != myValues.length) {
-                return false;
-            }
-            for (int i = 0; i < values.length; i++) {
-                if (!values[i].equals(myValues[i])) {
-                    return false;
-                }
-
-            }
-        }
-
-        return true;
-    }
-
-}
Index: src/java/org/apache/nutch/protocol/Content.java
===================================================================
--- src/java/org/apache/nutch/protocol/Content.java	(revision 375984)
+++ src/java/org/apache/nutch/protocol/Content.java	(working copy)
@@ -22,6 +22,7 @@
 import org.apache.hadoop.io.*;
 import org.apache.hadoop.fs.*;
 import org.apache.hadoop.conf.*;
+import org.apache.nutch.metadata.Metadata;
 import org.apache.nutch.util.mime.MimeType;
 import org.apache.nutch.util.mime.MimeTypes;
 import org.apache.nutch.util.mime.MimeTypeException;
@@ -38,14 +39,14 @@
   private String base;
   private byte[] content;
   private String contentType;
-  private ContentProperties metadata;
+  private Metadata metadata;
   private boolean mimeTypeMagic;
   private MimeTypes mimeTypes;
 
   public Content() {}
     
   public Content(String url, String base, byte[] content, String contentType,
-                 ContentProperties metadata, Configuration conf) {
+                 Metadata metadata, Configuration conf) {
 
     if (url == null) throw new IllegalArgumentException("null url");
     if (base == null) throw new IllegalArgumentException("null base");
@@ -74,7 +75,7 @@
 
     contentType = UTF8.readString(in);            // read contentType
 
-    metadata = new ContentProperties();
+    metadata = new Metadata();
     metadata.readFields(in);                    // read meta data
   }
 
@@ -140,17 +141,11 @@
   }
 
   /** Other protocol-specific data. */
-  public ContentProperties getMetadata() {
+  public Metadata getMetadata() {
     ensureInflated();
     return metadata;
   }
 
-  /** Return the value of a metadata property. */
-  public String get(String name) {
-    ensureInflated();
-    return getMetadata().getProperty(name);
-  }
-
   public boolean equals(Object o) {
     ensureInflated();
     if (!(o instanceof Content)){
Index: src/java/org/apache/nutch/parse/ParseData.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseData.java	(revision 375984)
+++ src/java/org/apache/nutch/parse/ParseData.java	(working copy)
@@ -21,23 +21,25 @@
 
 import org.apache.hadoop.io.*;
 import org.apache.hadoop.fs.*;
-import org.apache.nutch.protocol.ContentProperties;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configurable;
 
+import org.apache.nutch.metadata.Metadata;
 import org.apache.nutch.util.NutchConfiguration;
 
+
 /** Data extracted from a page's content.
  * @see Parse#getData()
  */
 public final class ParseData extends VersionedWritable implements Configurable {
   public static final String DIR_NAME = "parse_data";
 
-  private final static byte VERSION = 3;
+  private final static byte VERSION = 4;
 
   private String title;
   private Outlink[] outlinks;
-  private ContentProperties metadata;
+  private Metadata contentMeta;
+  private Metadata parseMeta;
   private ParseStatus status;
   private Configuration conf;
   
@@ -47,11 +49,18 @@
   // constructor is called -> conf is null. The programmer which use this object may not forget to set the conf.
   public ParseData() {}
 
-  public ParseData(ParseStatus status, String title, Outlink[] outlinks, ContentProperties metadata) {
+  public ParseData(ParseStatus status, String title, Outlink[] outlinks,
+                   Metadata contentMeta) {
+    this(status, title, outlinks, contentMeta, new Metadata());
+  }
+  
+  public ParseData(ParseStatus status, String title, Outlink[] outlinks,
+                   Metadata contentMeta, Metadata parseMeta) {
     this.status = status;
     this.title = title;
     this.outlinks = outlinks;
-    this.metadata = metadata;
+    this.contentMeta = contentMeta;
+    this.parseMeta = parseMeta;
   }
 
   //
@@ -67,14 +76,32 @@
   /** The outlinks of the page. */
   public Outlink[] getOutlinks() { return outlinks; }
 
-  /** Other page properties.  This is the place to find format-specific
-   * properties.  Different parser implementations for different content types
-   * will populate this differently. */
-  public ContentProperties getMetadata() { return metadata; }
+  /** The original Metadata retrieved from content */
+  public Metadata getContentMeta() { return contentMeta; }
 
-  /** Return the value of a metadata property. */
-  public String get(String name) { return getMetadata().getProperty(name); }
-
+  /**
+   * Other content properties.
+   * This is the place to find format-specific properties.
+   * Different parser implementations for different content types will populate
+   * this differently.
+   */
+  public Metadata getParseMeta() { return parseMeta; }
+  
+  /**
+   * Get a metadata single value.
+   * This method first looks for the metadata value in the parse metadata. If no
+   * value is found it the looks for the metadata in the content metadata.
+   * @see #getContentMeta()
+   * @see #getParseMeta()
+   */
+  public String getMeta(String name) {
+    String value = parseMeta.get(name);
+    if (value == null) {
+      value = contentMeta.get(name);
+    }
+    return value;
+  }
+  
   //
   // Writable methods
   //
@@ -103,27 +130,31 @@
     
     if (version < 3) {
       int propertyCount = in.readInt();             // read metadata
-      metadata = new ContentProperties();
+      contentMeta = new Metadata();
       for (int i = 0; i < propertyCount; i++) {
-        metadata.put(UTF8.readString(in), UTF8.readString(in));
+        contentMeta.add(UTF8.readString(in), UTF8.readString(in));
       }
     } else {
-      metadata = new ContentProperties();
-      metadata.readFields(in);
+      contentMeta = new Metadata();
+      contentMeta.readFields(in);
     }
-    
+    if (version > 3) {
+      parseMeta = new Metadata();
+      parseMeta.readFields(in);
+    }
   }
 
   public final void write(DataOutput out) throws IOException {
-    out.writeByte(VERSION);                             // write version
-    status.write(out);                       // write status
+    out.writeByte(VERSION);                       // write version
+    status.write(out);                            // write status
     UTF8.writeString(out, title);                 // write title
 
     out.writeInt(outlinks.length);                // write outlinks
     for (int i = 0; i < outlinks.length; i++) {
       outlinks[i].write(out);
     }
-    metadata.write(out);
+    contentMeta.write(out);                      // write content metadata
+    parseMeta.write(out);
   }
 
   public static ParseData read(DataInput in) throws IOException {
@@ -144,7 +175,8 @@
       this.status.equals(other.status) &&
       this.title.equals(other.title) &&
       Arrays.equals(this.outlinks, other.outlinks) &&
-      this.metadata.equals(other.metadata);
+      this.contentMeta.equals(other.contentMeta) &&
+      this.parseMeta.equals(other.parseMeta);
   }
 
   public String toString() {
@@ -160,7 +192,8 @@
       }
     }
 
-    buffer.append("Metadata: " + metadata + "\n" );
+    buffer.append("Content Metadata: " + contentMeta + "\n" );
+    buffer.append("Parse Metadata: " + parseMeta + "\n" );
 
     return buffer.toString();
   }
Index: src/java/org/apache/nutch/parse/ParseOutputFormat.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseOutputFormat.java	(revision 375984)
+++ src/java/org/apache/nutch/parse/ParseOutputFormat.java	(working copy)
@@ -72,7 +72,8 @@
           dataOut.append(key, parse.getData());
           
           // recover the signature prepared by Fetcher or ParseSegment
-          String sig = parse.getData().getMetadata().getProperty(Fetcher.SIGNATURE_KEY);
+          String sig = parse.getData()
+                            .getContentMeta().get(Fetcher.SIGNATURE_KEY);
           if (sig != null) {
             byte[] signature = StringUtil.fromHexString(sig);
             if (signature != null) {
@@ -87,7 +88,8 @@
           Outlink[] links = parse.getData().getOutlinks();
 
           // compute OPIC score contribution
-          String scoreString = parse.getData().get(Fetcher.SCORE_KEY);
+          String scoreString = parse.getData()
+                                    .getContentMeta().get(Fetcher.SCORE_KEY);
           float score = extscore;
           // this may happen if there was a fetch error.
          if (scoreString != null) score = Float.parseFloat(scoreString);
Index: src/java/org/apache/nutch/parse/ParseStatus.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseStatus.java	(revision 375984)
+++ src/java/org/apache/nutch/parse/ParseStatus.java	(working copy)
@@ -11,9 +11,11 @@
 
 import org.apache.hadoop.io.VersionedWritable;
 import org.apache.hadoop.io.WritableUtils;
-import org.apache.nutch.protocol.ContentProperties;
 import org.apache.hadoop.conf.Configuration;
 
+import org.apache.nutch.metadata.Metadata;
+
+
 /**
  * @author Andrzej Bialecki &lt;ab@getopt.org&gt;
  */
@@ -231,7 +233,8 @@
     private ParseData data = null;
     
     public EmptyParseImpl(ParseStatus status, Configuration conf) {
-      data = new ParseData(status, "", new Outlink[0], new ContentProperties());
+      data = new ParseData(status, "", new Outlink[0],
+                           new Metadata(), new Metadata());
       data.setConf(conf);
     }
     
Index: src/java/org/apache/nutch/parse/ParseSegment.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseSegment.java	(revision 375984)
+++ src/java/org/apache/nutch/parse/ParseSegment.java	(working copy)
@@ -19,11 +19,11 @@
 import org.apache.nutch.crawl.SignatureFactory;
 import org.apache.nutch.fetcher.Fetcher;
 import org.apache.hadoop.io.*;
-import org.apache.nutch.parse.ParseOutputFormat;
 import org.apache.hadoop.mapred.*;
 import org.apache.hadoop.conf.*;
 import org.apache.hadoop.util.LogFormatter;
 import org.apache.nutch.protocol.*;
+import org.apache.nutch.metadata.Metadata;
 import org.apache.nutch.util.*;
 
 import java.io.*;
@@ -58,15 +58,9 @@
       status = new ParseStatus(e);
     }
 
-    ContentProperties metadata = parse.getData().getMetadata();
     // compute the new signature
     byte[] signature = SignatureFactory.getSignature(getConf()).calculate(content, parse);
-    metadata.setProperty(Fetcher.SIGNATURE_KEY, StringUtil.toHexString(signature));
-    // copy segment name and score
-    String segmentName = content.getMetadata().getProperty(Fetcher.SEGMENT_NAME_KEY);
-    String score = content.getMetadata().getProperty(Fetcher.SCORE_KEY);
-    metadata.setProperty(Fetcher.SEGMENT_NAME_KEY, segmentName);
-    metadata.setProperty(Fetcher.SCORE_KEY, score);
+    content.getMetadata().set(Fetcher.SIGNATURE_KEY, StringUtil.toHexString(signature));
     
     if (status.isSuccess()) {
       output.collect(key, new ParseImpl(parse.getText(), parse.getData()));
Index: src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/WordExtractor.java
===================================================================
--- src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/WordExtractor.java	(revision 375984)
+++ src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/WordExtractor.java	(working copy)
@@ -20,6 +20,7 @@
 import org.apache.poi.poifs.eventfilesystem.*;
 import org.apache.poi.poifs.filesystem.*;
 import org.apache.poi.util.LittleEndian;
+import org.apache.nutch.metadata.Metadata;
 
 import java.util.*;
 import java.io.*;
@@ -33,8 +34,7 @@
  * code to extract all msword properties.
  *
  */
-public class WordExtractor
-{
+public class WordExtractor {
 
   /**
    * Constructor
@@ -276,39 +276,40 @@
       /*Dates are being stored in millis since the epoch to aid
       localization*/
       if(title != null)
-        properties.setProperty("Title", title);
+        properties.setProperty(Metadata.TITLE, title);
       if(applicationName != null)
-        properties.setProperty("Application-Name", applicationName);
+        properties.setProperty(Metadata.APPLICATION_NAME, applicationName);
       if(author != null)
-        properties.setProperty("Author", author);
+        properties.setProperty(Metadata.AUTHOR, author);
       if(charCount != 0)
-        properties.setProperty("Character Count", charCount + "");
+        properties.setProperty(Metadata.CHARACTER_COUNT, charCount + "");
       if(comments != null)
-        properties.setProperty("Comments", comments);
+        properties.setProperty(Metadata.COMMENTS, comments);
       if(createDateTime != null)
-        properties.setProperty("Creation-Date", createDateTime.getTime() + "");
+        properties.setProperty(Metadata.DATE,
+                               Metadata.DATE_FORMAT.format(createDateTime));
       if(editTime != 0)
-        properties.setProperty("Edit-Time", editTime + "");
+        properties.setProperty(Metadata.LAST_MODIFIED, editTime + "");
       if(keywords != null)
-        properties.setProperty("Keywords", keywords);
+        properties.setProperty(Metadata.KEYWORDS, keywords);
       if(lastAuthor != null)
-        properties.setProperty("Last-Author", lastAuthor);
+        properties.setProperty(Metadata.LAST_AUTHOR, lastAuthor);
       if(lastPrinted != null)
-        properties.setProperty("Last-Printed", lastPrinted.getTime() + "");
+        properties.setProperty(Metadata.LAST_PRINTED, lastPrinted.getTime() + "");
       if(lastSaveDateTime != null)
-        properties.setProperty("Last-Save-Date", lastSaveDateTime.getTime() + "");
+        properties.setProperty(Metadata.LAST_SAVED, lastSaveDateTime.getTime() + "");
       if(pageCount != 0)
-        properties.setProperty("Page-Count", pageCount + "");
+        properties.setProperty(Metadata.PAGE_COUNT, pageCount + "");
       if(revNumber != null)
-        properties.setProperty("Revision-Number", revNumber);
+        properties.setProperty(Metadata.REVISION_NUMBER, revNumber);
       if(security != 0)
-        properties.setProperty("Security", security + "");
+        properties.setProperty(Metadata.RIGHTS, security + "");
       if(subject != null)
-        properties.setProperty("Subject", subject);
+        properties.setProperty(Metadata.SUBJECT, subject);
       if(template != null)
-        properties.setProperty("Template", template);
+        properties.setProperty(Metadata.TEMPLATE, template);
       if(wordCount != 0)
-        properties.setProperty("Word-Count", wordCount + "");
+        properties.setProperty(Metadata.WORD_COUNT, wordCount + "");
       propertiesBroker.setProperties(properties);
 
       //si.getThumbnail(); // can't think of a sensible way of turning this into a string.
Index: src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/MSWordParser.java
===================================================================
--- src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/MSWordParser.java	(revision 375984)
+++ src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/MSWordParser.java	(working copy)
@@ -16,8 +16,10 @@
 
 package org.apache.nutch.parse.msword;
 
+import org.apache.nutch.metadata.DublinCore;
+import org.apache.nutch.metadata.Metadata;
+import org.apache.nutch.net.protocols.Response;
 import org.apache.nutch.protocol.Content;
-import org.apache.nutch.protocol.ContentProperties;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.nutch.parse.ParseStatus;
 import org.apache.nutch.parse.Parser;
@@ -29,8 +31,6 @@
 import org.apache.nutch.parse.ParseException;
 
 import java.util.Properties;
-//import java.util.logging.Logger;
-
 import java.io.ByteArrayInputStream;
 
 /**
@@ -67,7 +67,7 @@
 
       byte[] raw = content.getContent();
 
-      String contentLength = content.get("Content-Length");
+      String contentLength = content.getMetadata().get(Response.CONTENT_LENGTH);
       if (contentLength != null
             && raw.length != Integer.parseInt(contentLength)) {
           return new ParseStatus(ParseStatus.FAILED, ParseStatus.FAILED_TRUNCATED,
@@ -99,25 +99,20 @@
     }
 
     // collect meta data
-    ContentProperties metadata = new ContentProperties();
-    metadata.putAll(content.getMetadata()); // copy through
+    Metadata metadata = new Metadata();
+    title = properties.getProperty(DublinCore.TITLE);
+    properties.remove(DublinCore.TITLE);
+    metadata.setAll(properties);
 
-    if(properties != null) {
-      title = properties.getProperty("Title");
-      properties.remove("Title");
-      metadata.putAll(properties);
-    }
+    if (text == null) { text = ""; }
+    if (title == null) { title = ""; }
 
-    if (text == null)
-      text = "";
-
-    if (title == null)
-      title = "";
-
     // collect outlink
     Outlink[] outlinks = OutlinkExtractor.getOutlinks(text, this.conf);
 
-    ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS, title, outlinks, metadata);
+    ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS, title,
+                                        outlinks, content.getMetadata(),
+                                        metadata);
     parseData.setConf(this.conf);
     return new ParseImpl(text, parseData);
     // any filter?
Index: src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java
===================================================================
--- src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java	(revision 375984)
+++ src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java	(working copy)
@@ -19,7 +19,6 @@
 import org.apache.nutch.parse.ParseStatus;
 import org.apache.nutch.parse.Parser;
 import org.apache.nutch.protocol.Content;
-import org.apache.nutch.protocol.ContentProperties;
 import org.apache.nutch.util.NutchConfiguration;
 import org.apache.hadoop.util.LogFormatter;
 import org.apache.hadoop.conf.Configuration;
@@ -58,14 +57,15 @@
     walk(doc, parse, metaTags, url, outlinks);
     if (outlinks.size() > 0) {
       Outlink[] old = parse.getData().getOutlinks();
-      ContentProperties metadata = parse.getData().getMetadata();
       String title = parse.getData().getTitle();
       List list = Arrays.asList(old);
       outlinks.addAll(list);
       ParseStatus status = parse.getData().getStatus();
       String text = parse.getText();
       Outlink[] newlinks = (Outlink[])outlinks.toArray(new Outlink[outlinks.size()]);
-      ParseData parseData = new ParseData(status, title, newlinks, metadata);
+      ParseData parseData = new ParseData(status, title, newlinks,
+                                          parse.getData().getContentMeta(),
+                                          parse.getData().getParseMeta());
       parseData.setConf(this.conf);
       parse = new ParseImpl(text, parseData);
     }
@@ -140,10 +140,8 @@
       idx = Math.min(MAX_TITLE_LEN, script.length());
       title = script.substring(0, idx);
     }
-    ContentProperties metadata = new ContentProperties();
-    metadata.putAll(c.getMetadata());
-    ParseData pd = new ParseData(ParseStatus.STATUS_SUCCESS, title,
-            outlinks, metadata);
+    ParseData pd = new ParseData(ParseStatus.STATUS_SUCCESS, title, outlinks,
+                                 c.getMetadata());
     pd.setConf(this.conf);
     Parse parse = new ParseImpl(script, pd);
     return parse;
Index: src/plugin/languageidentifier/src/test/org/apache/nutch/analysis/lang/TestHTMLLanguageParser.java
===================================================================
--- src/plugin/languageidentifier/src/test/org/apache/nutch/analysis/lang/TestHTMLLanguageParser.java	(revision 375984)
+++ src/plugin/languageidentifier/src/test/org/apache/nutch/analysis/lang/TestHTMLLanguageParser.java	(working copy)
@@ -21,11 +21,11 @@
 import junit.framework.TestCase;
 
 // Nutch imports
+import org.apache.nutch.metadata.Metadata;
 import org.apache.nutch.parse.Parse;
 import org.apache.nutch.parse.Parser;
 import org.apache.nutch.parse.ParserFactory;
 import org.apache.nutch.protocol.Content;
-import org.apache.nutch.protocol.ContentProperties;
 import org.apache.nutch.util.NutchConfiguration;
 
 
@@ -56,8 +56,7 @@
         Parser parser = new ParserFactory(NutchConfiguration.create()).getParser("text/html", URL);
         Parse parse = parser.getParse(content);
 
-        assertEquals(metalanguages[t], (String) parse.getData().get(
-            HTMLLanguageParser.META_LANG_NAME));
+        assertEquals(metalanguages[t], (String) parse.getData().getParseMeta().get(Metadata.LANGUAGE));
 
       }
     } catch (Exception e) {
@@ -123,11 +122,9 @@
   
   
   private Content getContent(String text) {
-    ContentProperties p = new ContentProperties();
-    p.put("Content-Type", "text/html");
-
-    Content content = new Content(URL, BASE, text.getBytes(), "text/html", p, NutchConfiguration.create());
-    return content;
+    Metadata meta = new Metadata();
+    meta.add("Content-Type", "text/html");
+    return new Content(URL, BASE, text.getBytes(), "text/html", meta, NutchConfiguration.create());
   }
 
 }
Index: src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/LanguageIndexingFilter.java
===================================================================
--- src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/LanguageIndexingFilter.java	(revision 375984)
+++ src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/LanguageIndexingFilter.java	(working copy)
@@ -23,6 +23,10 @@
 import org.apache.nutch.indexer.IndexingException;
 import org.apache.hadoop.io.UTF8;
 import org.apache.nutch.parse.Parse;
+import org.apache.nutch.metadata.Metadata;
+import org.apache.nutch.net.protocols.Response;
+
+// Hadoop imports
 import org.apache.hadoop.conf.Configuration;
 
 // Lucene imports
@@ -63,12 +67,12 @@
   public Document filter(Document doc, Parse parse, UTF8 url, CrawlDatum datum, Inlinks inlinks)
     throws IndexingException {
 
-    //check if X-meta-lang found, possibly put there by HTMLLanguageParser
-    String lang = parse.getData().get(HTMLLanguageParser.META_LANG_NAME);
+    // check if LANGUAGE found, possibly put there by HTMLLanguageParser
+    String lang = parse.getData().getParseMeta().get(Metadata.LANGUAGE);
 
-    //check if HTTP-header tels us the language
+    // check if HTTP-header tels us the language
     if (lang == null) {
-        lang = parse.getData().get("Content-Language");
+        lang = parse.getData().getContentMeta().get(Response.CONTENT_LANGUAGE);
     }
     
     if (lang == null) {
Index: src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/HTMLLanguageParser.java
===================================================================
--- src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/HTMLLanguageParser.java	(revision 375984)
+++ src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/HTMLLanguageParser.java	(working copy)
@@ -23,10 +23,13 @@
 import java.util.logging.Logger;
 
 // Nutch imports
+import org.apache.nutch.metadata.Metadata;
 import org.apache.nutch.parse.HTMLMetaTags;
 import org.apache.nutch.parse.Parse;
 import org.apache.nutch.parse.HtmlParseFilter;
 import org.apache.nutch.protocol.Content;
+
+// Hadoop imports
 import org.apache.hadoop.util.LogFormatter;
 import org.apache.hadoop.conf.Configuration;
 
@@ -44,7 +47,6 @@
  */
 public class HTMLLanguageParser implements HtmlParseFilter {
   
-  public static final String META_LANG_NAME="X-meta-lang";
   public static final Logger LOG = LogFormatter
     .getLogger(HTMLLanguageParser.class.getName());
 
@@ -87,7 +89,7 @@
     String lang = parser.getLanguage();
 
     if (lang != null) {
-      parse.getData().getMetadata().put(META_LANG_NAME, lang);
+      parse.getData().getParseMeta().set(Metadata.LANGUAGE, lang);
     }
     return parse;
   }
Index: src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/SWFParser.java
===================================================================
--- src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/SWFParser.java	(revision 375984)
+++ src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/SWFParser.java	(working copy)
@@ -21,9 +21,10 @@
 import java.util.*;
 import java.util.logging.Logger;
 
+import org.apache.nutch.metadata.Metadata;
+import org.apache.nutch.net.protocols.Response;
 import org.apache.nutch.parse.*;
 import org.apache.nutch.protocol.Content;
-import org.apache.nutch.protocol.ContentProperties;
 
 import org.apache.hadoop.util.LogFormatter;
 import org.apache.hadoop.conf.Configuration;
@@ -60,19 +61,17 @@
   public Parse getParse(Content content) {
 
     String text = null;
-    // collect meta data
-    ContentProperties metadata = new ContentProperties();
-    metadata.putAll(content.getMetadata()); // copy through
     Vector outlinks = new Vector();
 
     try {
 
       byte[] raw = content.getContent();
 
-      String contentLength = content.get("Content-Length");
+      String contentLength = content.getMetadata().get(Response.CONTENT_LENGTH);
       if (contentLength != null && raw.length != Integer.parseInt(contentLength)) {
-        return new ParseStatus(ParseStatus.FAILED, ParseStatus.FAILED_TRUNCATED, "Content truncated at " + raw.length
-                + " bytes. Parser can't handle incomplete files.").getEmptyParse(conf);
+        return new ParseStatus(ParseStatus.FAILED, ParseStatus.FAILED_TRUNCATED,
+                               "Content truncated at " + raw.length +
+                               " bytes. Parser can't handle incomplete files.").getEmptyParse(conf);
       }
       ExtractText extractor = new ExtractText();
 
@@ -106,7 +105,8 @@
     if (text == null) text = "";
 
     Outlink[] links = (Outlink[]) outlinks.toArray(new Outlink[outlinks.size()]);
-    ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS, "", links, metadata);
+    ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS, "", links,
+                                        content.getMetadata());
     return new ParseImpl(text, parseData);
   }
 
@@ -119,8 +119,10 @@
     byte[] buf = new byte[in.available()];
     in.read(buf);
     SWFParser parser = new SWFParser();
-    Parse p = parser.getParse(new Content("file:" + args[0], "file:" + args[0], buf, "application/x-shockwave-flash",
-            new ContentProperties(), NutchConfiguration.create()));
+    Parse p = parser.getParse(new Content("file:" + args[0], "file:" + args[0],
+                                          buf, "application/x-shockwave-flash",
+                                          new Metadata(),
+                                          NutchConfiguration.create()));
     System.out.println("Parse Text:");
     System.out.println(p.getText());
     System.out.println("Parse Data:");
Index: src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java
===================================================================
--- src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java	(revision 375984)
+++ src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java	(working copy)
@@ -23,6 +23,8 @@
 import java.util.ArrayList;
 import java.util.List;
 
+import org.apache.nutch.metadata.Metadata;
+import org.apache.nutch.net.protocols.Response;
 import org.apache.nutch.parse.Outlink;
 import org.apache.nutch.parse.Parse;
 import org.apache.nutch.parse.ParseData;
@@ -30,7 +32,6 @@
 import org.apache.nutch.parse.ParseStatus;
 import org.apache.nutch.parse.Parser;
 import org.apache.nutch.protocol.Content;
-import org.apache.nutch.protocol.ContentProperties;
 import org.apache.hadoop.util.LogFormatter;
 import org.apache.hadoop.conf.Configuration;
 
@@ -59,7 +60,7 @@
     Properties properties = null;
 
     try {
-      final String contentLen = content.get("Content-Length");
+      final String contentLen = content.getMetadata().get(Response.CONTENT_LENGTH);
       final int len = Integer.parseInt(contentLen);
       System.out.println("ziplen: " + len);
       final byte[] contentInBytes = content.getContent();
@@ -86,10 +87,6 @@
           "Can't be handled as Zip document. " + e).getEmptyParse(getConf());
     }
 
-    // collect meta data
-    final ContentProperties metadata = new ContentProperties();
-    metadata.putAll(content.getMetadata()); // copy through
-
     if (resultText == null) {
       resultText = "";
     }
@@ -100,7 +97,8 @@
 
     outlinks = (Outlink[]) outLinksList.toArray(new Outlink[0]);
     final ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS,
-        resultTitle, outlinks, metadata);
+                                              resultTitle, outlinks,
+                                              content.getMetadata());
     parseData.setConf(this.conf);
 
     LOG.finest("Zip file parsed sucessfully !!");
Index: src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipTextExtractor.java
===================================================================
--- src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipTextExtractor.java	(revision 375984)
+++ src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipTextExtractor.java	(working copy)
@@ -26,13 +26,14 @@
 import java.net.URL;
 
 // Nutch imports
+import org.apache.nutch.metadata.Metadata;
+import org.apache.nutch.net.protocols.Response;
 import org.apache.nutch.parse.Parse;
+import org.apache.nutch.parse.ParseData;
 import org.apache.nutch.parse.ParseUtil;
-import org.apache.nutch.parse.ParseData;
 import org.apache.nutch.parse.ParseException;
 import org.apache.nutch.parse.Outlink;
 import org.apache.nutch.protocol.Content;
-import org.apache.nutch.protocol.ContentProperties;
 import org.apache.hadoop.util.LogFormatter;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.nutch.util.mime.MimeTypes;
@@ -87,9 +88,9 @@
           // Trying to resolve the Mime-Type
           String contentType = MIME.getMimeType(fname).getName();
           try {
-            ContentProperties metadata = new ContentProperties();
-            metadata.setProperty("Content-Length", Long.toString(entry.getSize()));
-            metadata.setProperty("Content-Type", contentType);
+            Metadata metadata = new Metadata();
+            metadata.set(Response.CONTENT_LENGTH, Long.toString(entry.getSize()));
+            metadata.set(Response.CONTENT_TYPE, contentType);
             Content content = new Content(newurl, base, b, contentType, metadata, this.conf);
             Parse parse = new ParseUtil(this.conf).parse(content);
             ParseData theParseData = parse.getData();
Index: src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java
===================================================================
--- src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java	(revision 375984)
+++ src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java	(working copy)
@@ -19,7 +19,9 @@
 
 import org.apache.nutch.crawl.CrawlDatum;
 import org.apache.hadoop.io.UTF8;
+import org.apache.nutch.metadata.Metadata;
 import org.apache.nutch.net.protocols.HttpDateFormat;
+import org.apache.nutch.net.protocols.Response;
 
 import org.apache.hadoop.util.LogFormatter;
 import org.apache.hadoop.conf.Configuration;
@@ -144,8 +146,10 @@
     Content content = file.getProtocolOutput(new UTF8(urlString), new CrawlDatum()).getContent();
 
     System.err.println("Content-Type: " + content.getContentType());
-    System.err.println("Content-Length: " + content.get("Content-Length"));
-    System.err.println("Last-Modified: " + content.get("Last-Modified"));
+    System.err.println("Content-Length: " +
+                       content.getMetadata().get(Response.CONTENT_LENGTH));
+    System.err.println("Last-Modified: " +
+                       content.getMetadata().get(Response.LAST_MODIFIED));
     if (dumpContent) {
       System.out.print(new String(content.getContent()));
     }
Index: src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java
===================================================================
--- src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java	(revision 375984)
+++ src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java	(working copy)
@@ -18,6 +18,7 @@
 
 // JDK imports
 import java.net.URL;
+import java.util.Date;
 import java.util.TreeMap;
 import java.util.logging.Level;
 import java.io.IOException;
@@ -25,7 +26,10 @@
 // Nutch imports
 import org.apache.nutch.crawl.CrawlDatum;
 import org.apache.nutch.protocol.Content;
-import org.apache.nutch.protocol.ContentProperties;
+import org.apache.nutch.metadata.Metadata;
+import org.apache.nutch.net.protocols.Response;
+
+// Hadoop imports
 import org.apache.hadoop.conf.Configuration;
 
 
@@ -61,7 +65,7 @@
   private String base;
   private byte[] content;
   private int code;
-  private ContentProperties headers = new ContentProperties();
+  private Metadata headers = new Metadata();
 
   private final File file;
   private Configuration conf;
@@ -71,17 +75,17 @@
 
   /** Returns the value of a named header. */
   public String getHeader(String name) {
-    return (String)headers.get(name);
+    return headers.get(name);
   }
 
   public byte[] getContent() { return content; }
 
   public Content toContent() {
     return new Content(orig, base, content,
-                       getHeader("Content-Type"),
+                       getHeader(Response.CONTENT_TYPE),
                        headers, this.conf);
   }
-
+  
   public FileResponse(URL url, CrawlDatum datum, File file, Configuration conf)
     throws FileException, IOException {
 
@@ -124,10 +128,8 @@
       // where case is insensitive
       if (!f.equals(f.getCanonicalFile())) {
         // set headers
-        TreeMap hdrs = new TreeMap(String.CASE_INSENSITIVE_ORDER);
         //hdrs.put("Location", f.getCanonicalFile().toURI());
-        hdrs.put("Location", f.getCanonicalFile().toURL().toString());
-        this.headers.putAll(hdrs);
+        headers.set(Response.LOCATION, f.getCanonicalFile().toURL().toString());
 
         this.code = 300;  // http redirect
         return;
@@ -181,17 +183,11 @@
     is.close(); 
 
     // set headers
-    TreeMap hdrs = new TreeMap(String.CASE_INSENSITIVE_ORDER);
-
-    hdrs.put("Content-Length", new Long(size).toString());
-
-    hdrs.put("Last-Modified",
+    headers.set(Response.CONTENT_LENGTH, new Long(size).toString());
+    headers.set(Response.LAST_MODIFIED,
       this.file.httpDateFormat.toString(f.lastModified()));
+    headers.set(Response.CONTENT_TYPE, "");   // No Content-Type at file protocol level
 
-    hdrs.put("Content-Type", "");   // No Content-Type at file protocol level
-
-    this.headers.putAll(hdrs);
-
     // response code
     this.code = 200; // http OK
   }
@@ -204,18 +200,12 @@
     this.content = list2html(f.listFiles(), path, "/".equals(path) ? false : true);
 
     // set headers
-    TreeMap hdrs = new TreeMap(String.CASE_INSENSITIVE_ORDER);
-
-    hdrs.put("Content-Length",
+    headers.set(Response.CONTENT_LENGTH,
       new Integer(this.content.length).toString());
-
-    hdrs.put("Content-Type", "text/html");
-
-    hdrs.put("Last-Modified",
+    headers.set(Response.CONTENT_TYPE, "text/html");
+    headers.set(Response.LAST_MODIFIED,
       this.file.httpDateFormat.toString(f.lastModified()));
 
-    this.headers.putAll(hdrs);
-
     // response code
     this.code = 200; // http OK
   }
Index: src/plugin/parse-pdf/src/java/org/apache/nutch/parse/pdf/PdfParser.java
===================================================================
--- src/plugin/parse-pdf/src/java/org/apache/nutch/parse/pdf/PdfParser.java	(revision 375984)
+++ src/plugin/parse-pdf/src/java/org/apache/nutch/parse/pdf/PdfParser.java	(working copy)
@@ -26,7 +26,8 @@
 import org.pdfbox.exceptions.InvalidPasswordException;
 
 import org.apache.nutch.protocol.Content;
-import org.apache.nutch.protocol.ContentProperties;
+import org.apache.nutch.metadata.Metadata;
+import org.apache.nutch.net.protocols.Response;
 import org.apache.hadoop.util.LogFormatter;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.nutch.parse.ParseStatus;
@@ -89,12 +90,13 @@
 
     String text = null;
     String title = null;
+    Metadata metadata = new Metadata();
 
     try {
 
       byte[] raw = content.getContent();
 
-      String contentLength = content.get("Content-Length");
+      String contentLength = content.getMetadata().get(Response.CONTENT_LENGTH);
       if (contentLength != null
             && raw.length != Integer.parseInt(contentLength)) {
           return new ParseStatus(ParseStatus.FAILED, ParseStatus.FAILED_TRUNCATED,
@@ -102,8 +104,7 @@
             +" bytes. Parser can't handle incomplete pdf file.").getEmptyParse(getConf());
       }
 
-      PDFParser parser = new PDFParser(
-        new ByteArrayInputStream(raw));
+      PDFParser parser = new PDFParser(new ByteArrayInputStream(raw));
       parser.parse();
 
       pdf = parser.getPDDocument();
@@ -122,15 +123,18 @@
       PDDocumentInformation info = pdf.getDocumentInformation();
       title = info.getTitle();
       // more useful info, currently not used. please keep them for future use.
-      // pdf.getPageCount();
-      // info.getAuthor()
-      // info.getSubject()
-      // info.getKeywords()
-      // info.getCreator()
-      // info.getProducer()
-      // info.getTrapped()
-      // formatDate(info.getCreationDate())
-      // formatDate(info.getModificationDate())
+      metadata.add(Metadata.PAGE_COUNT, String.valueOf(pdf.getPageCount()));
+      metadata.add(Metadata.AUTHOR, info.getAuthor());
+      metadata.add(Metadata.SUBJECT, info.getSubject());
+      metadata.add(Metadata.KEYWORDS, info.getKeywords());
+      metadata.add(Metadata.CREATOR, info.getCreator());
+      metadata.add(Metadata.PUBLISHER, info.getProducer());
+      
+      //TODO: Figure out why we get a java.io.IOException: Error converting date:1-Jan-3 18:15PM
+      //error here
+      
+      //metadata.put(DATE, dcDateFormatter.format(info.getCreationDate().getTime()));
+      //metadata.put(LAST_MODIFIED, dcDateFormatter.format(info.getModificationDate().getTime()));
 
     } catch (CryptographyException e) {
       return new ParseStatus(ParseStatus.FAILED,
@@ -139,6 +143,8 @@
       return new ParseStatus(ParseStatus.FAILED,
               "Can't decrypt document - invalid password. " + e).getEmptyParse(getConf());
     } catch (Exception e) { // run time exception
+        LOG.warning("General exception in PDF parser: "+e.getMessage());
+        e.printStackTrace();        
       return new ParseStatus(ParseStatus.FAILED,
               "Can't be handled as pdf document. " + e).getEmptyParse(getConf());
     } finally {
@@ -159,11 +165,9 @@
     // collect outlink
     Outlink[] outlinks = OutlinkExtractor.getOutlinks(text, getConf());
 
-    // collect meta data
-    ContentProperties metadata = new ContentProperties();
-    metadata.putAll(content.getMetadata()); // copy through
-
-    ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS, title, outlinks, metadata);
+    ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS, title,
+                                        outlinks, content.getMetadata(),
+                                        metadata);
     parseData.setConf(this.conf);
     return new ParseImpl(text, parseData);
     // any filter?
Index: src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java
===================================================================
--- src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java	(revision 375984)
+++ src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java	(working copy)
@@ -25,15 +25,19 @@
 import java.util.logging.Level;
 import java.util.logging.Logger;
 
+// Nutch imports
 import org.apache.nutch.crawl.CrawlDatum;
-import org.apache.hadoop.io.UTF8;
+import org.apache.nutch.metadata.Metadata;
 import org.apache.nutch.net.protocols.Response;
 import org.apache.nutch.protocol.Content;
 import org.apache.nutch.protocol.Protocol;
 import org.apache.nutch.protocol.ProtocolException;
 import org.apache.nutch.protocol.ProtocolOutput;
 import org.apache.nutch.protocol.ProtocolStatus;
+
+// Hadoop imports
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.UTF8;
 
 
 /**
@@ -443,7 +447,8 @@
     System.out.println("Status: " + out.getStatus());
     if (content != null) {
       System.out.println("Content Type: " + content.getContentType());
-      System.out.println("Content Length: " + content.get("Content-Length"));
+      System.out.println("Content Length: " +
+                         content.getMetadata().get(Response.CONTENT_LENGTH));
       System.out.println("Content:");
       String text = new String(content.getContent());
       System.out.println(text);
Index: src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java
===================================================================
--- src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java	(revision 375984)
+++ src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java	(working copy)
@@ -26,7 +26,9 @@
 
 import org.apache.nutch.crawl.CrawlDatum;
 import org.apache.nutch.protocol.Content;
-import org.apache.nutch.protocol.ContentProperties;
+import org.apache.nutch.metadata.Metadata;
+import org.apache.nutch.net.protocols.Response;
+
 import org.apache.hadoop.conf.Configuration;
 
 import java.net.InetAddress;
@@ -34,11 +36,9 @@
 
 import java.util.List;
 import java.util.LinkedList;
-
 import java.util.logging.Level;
 
 import java.io.ByteArrayOutputStream;
-//import java.io.InputStream;
 import java.io.IOException;
 
 
@@ -61,7 +61,7 @@
   private String base;
   private byte[] content;
   private int code;
-  private ContentProperties headers = new ContentProperties();
+  private Metadata headers = new Metadata();
 
   private final Ftp ftp;
   private Configuration conf;
@@ -71,14 +71,14 @@
 
   /** Returns the value of a named header. */
   public String getHeader(String name) {
-    return (String)headers.get(name);
+    return headers.get(name);
   }
 
   public byte[] getContent() { return content; }
 
   public Content toContent() {
     return new Content(orig, base, content,
-                       getHeader("Content-Type"),
+                       getHeader(Response.CONTENT_TYPE),
                        headers, this.conf);
   }
 
@@ -294,11 +294,11 @@
       ftp.client.retrieveFile(path, os, ftp.maxContentLength);
 
       FTPFile ftpFile = (FTPFile) list.get(0);
-      this.headers.put("Content-Length",
-        new Long(ftpFile.getSize()).toString());
+      this.headers.set(Response.CONTENT_LENGTH,
+                       new Long(ftpFile.getSize()).toString());
       //this.headers.put("content-type", "text/html");
-      this.headers.put("Last-Modified",
-        ftp.httpDateFormat.toString(ftpFile.getTimestamp()));
+      this.headers.set(Response.LAST_MODIFIED,
+                       ftp.httpDateFormat.toString(ftpFile.getTimestamp()));
       this.content = os.toByteArray();
 
 //      // approximate bytes sent and read
@@ -330,11 +330,11 @@
       }
 
       FTPFile ftpFile = (FTPFile) list.get(0);
-      this.headers.put("Content-Length",
-        new Long(ftpFile.getSize()).toString());
+      this.headers.set(Response.CONTENT_LENGTH,
+                       new Long(ftpFile.getSize()).toString());
       //this.headers.put("content-type", "text/html");
-      this.headers.put("Last-Modified",
-        ftp.httpDateFormat.toString(ftpFile.getTimestamp()));
+      this.headers.set(Response.LAST_MODIFIED,
+                      ftp.httpDateFormat.toString(ftpFile.getTimestamp()));
       this.content = os.toByteArray();
 
 //      // approximate bytes sent and read
@@ -349,7 +349,7 @@
 
       if (FTPReply.isPositiveCompletion(ftp.client.cwd(path))) {
       // it is not a file, but dir, so redirect as a dir
-        this.headers.put("Location", path + "/");
+        this.headers.set(Response.LOCATION, path + "/");
         this.code = 300;  // http redirect
         // fixme, should we do ftp.client.cwd("/"), back to top dir?
       } else {
@@ -386,9 +386,9 @@
 
       ftp.client.retrieveList(null, list, ftp.maxContentLength, ftp.parser);
       this.content = list2html(list, path, "/".equals(path) ? false : true);
-      this.headers.put("Content-Length",
-        new Integer(this.content.length).toString());
-      this.headers.put("Content-Type", "text/html");
+      this.headers.set(Response.CONTENT_LENGTH,
+                       new Integer(this.content.length).toString());
+      this.headers.set(Response.CONTENT_TYPE, "text/html");
       // this.headers.put("Last-Modified", null);
 
 //      // approximate bytes sent and read
@@ -408,9 +408,9 @@
       ftp.client = null;
 
       this.content = list2html(list, path, "/".equals(path) ? false : true);
-      this.headers.put("Content-Length",
-        new Integer(this.content.length).toString());
-      this.headers.put("Content-Type", "text/html");
+      this.headers.set(Response.CONTENT_LENGTH,
+                       new Integer(this.content.length).toString());
+      this.headers.set(Response.CONTENT_TYPE, "text/html");
       // this.headers.put("Last-Modified", null);
 
 //      // approximate bytes sent and read
Index: src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java
===================================================================
--- src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java	(revision 375984)
+++ src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java	(working copy)
@@ -22,6 +22,7 @@
 import org.apache.nutch.crawl.CrawlDatum;
 import org.apache.hadoop.io.UTF8;
 import org.apache.nutch.net.protocols.HttpDateFormat;
+import org.apache.nutch.net.protocols.Response;
 
 import org.apache.hadoop.util.LogFormatter;
 import org.apache.hadoop.conf.Configuration;
@@ -212,8 +213,10 @@
     Content content = ftp.getProtocolOutput(new UTF8(urlString), new CrawlDatum()).getContent();
 
     System.err.println("Content-Type: " + content.getContentType());
-    System.err.println("Content-Length: " + content.get("Content-Length"));
-    System.err.println("Last-Modified: " + content.get("Last-Modified"));
+    System.err.println("Content-Length: " +
+                       content.getMetadata().get(Response.CONTENT_LENGTH));
+    System.err.println("Last-Modified: " +
+                      content.getMetadata().get(Response.LAST_MODIFIED));
     if (dumpContent) {
       System.out.print(new String(content.getContent()));
     }
Index: src/plugin/parse-text/src/java/org/apache/nutch/parse/text/TextParser.java
===================================================================
--- src/plugin/parse-text/src/java/org/apache/nutch/parse/text/TextParser.java	(revision 375984)
+++ src/plugin/parse-text/src/java/org/apache/nutch/parse/text/TextParser.java	(working copy)
@@ -17,7 +17,6 @@
 package org.apache.nutch.parse.text;
 
 import org.apache.nutch.protocol.Content;
-import org.apache.nutch.protocol.ContentProperties;
 import org.apache.nutch.parse.*;
 import org.apache.nutch.util.*;
 
@@ -27,9 +26,6 @@
   private Configuration conf;
 
   public Parse getParse(Content content) {
-    // copy content meta data through
-    ContentProperties metadata = new ContentProperties();
-    metadata.putAll(content.getMetadata());
 
     // ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS, "", new
     // Outlink[0], metadata);
@@ -49,7 +45,7 @@
       text = new String(content.getContent()); // use default encoding
     }
     ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS, "",
-        OutlinkExtractor.getOutlinks(text, getConf()), metadata);
+        OutlinkExtractor.getOutlinks(text, getConf()), content.getMetadata());
     parseData.setConf(this.conf);
     return new ParseImpl(text, parseData);
     
Index: src/plugin/creativecommons/src/test/org/creativecommons/nutch/TestCCParseFilter.java
===================================================================
--- src/plugin/creativecommons/src/test/org/creativecommons/nutch/TestCCParseFilter.java	(revision 375984)
+++ src/plugin/creativecommons/src/test/org/creativecommons/nutch/TestCCParseFilter.java	(working copy)
@@ -16,10 +16,10 @@
 
 package org.creativecommons.nutch;
 
+import org.apache.nutch.metadata.Metadata;
 import org.apache.nutch.parse.Parse;
 import org.apache.nutch.parse.ParseUtil;
 import org.apache.nutch.protocol.Content;
-import org.apache.nutch.protocol.ContentProperties;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.nutch.util.NutchConfiguration;
 
@@ -60,12 +60,13 @@
     Configuration conf = NutchConfiguration.create();
 
     Content content =
-      new Content(url, url, bytes, contentType, new ContentProperties(), conf);
+      new Content(url, url, bytes, contentType, new Metadata(), conf);
     Parse parse = new ParseUtil(conf).parseByParserId("parse-html",content);
 
-    ContentProperties metadata = parse.getData().getMetadata();
+    Metadata metadata = parse.getData().getParseMeta();
     assertEquals(license, metadata.get("License-Url"));
     assertEquals(location, metadata.get("License-Location"));
     assertEquals(type, metadata.get("Work-Type"));
   }
 }
+
Index: src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCIndexingFilter.java
===================================================================
--- src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCIndexingFilter.java	(revision 375984)
+++ src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCIndexingFilter.java	(working copy)
@@ -18,6 +18,7 @@
 
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
+import org.apache.nutch.metadata.CreativeCommons;
 
 import org.apache.nutch.parse.Parse;
 
@@ -27,6 +28,8 @@
 
 import org.apache.nutch.crawl.CrawlDatum;
 import org.apache.nutch.crawl.Inlinks;
+import org.apache.nutch.metadata.Metadata;
+import org.apache.nutch.metadata.CreativeCommons;
 
 import java.util.logging.Logger;
 import org.apache.hadoop.util.LogFormatter;
@@ -49,8 +52,9 @@
   public Document filter(Document doc, Parse parse, UTF8 url, CrawlDatum datum, Inlinks inlinks)
     throws IndexingException {
     
+    Metadata metadata = parse.getData().getParseMeta();
     // index the license
-    String licenseUrl = parse.getData().get("License-Url");
+    String licenseUrl = metadata.get(CreativeCommons.LICENSE_URL);
     if (licenseUrl != null) {
       LOG.info("CC: indexing " + licenseUrl + " for: " + url.toString());
 
@@ -62,13 +66,13 @@
     }
 
     // index the license location as cc:meta=xxx
-    String licenseLocation = parse.getData().get("License-Location");
+    String licenseLocation = metadata.get(CreativeCommons.LICENSE_LOCATION);
     if (licenseLocation != null) {
       addFeature(doc, "meta=" + licenseLocation);
     }
 
     // index the work type cc:type=xxx
-    String workType = parse.getData().get("Work-Type");
+    String workType = metadata.get(CreativeCommons.WORK_TYPE);
     if (workType != null) {
       addFeature(doc, workType);
     }
Index: src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCParseFilter.java
===================================================================
--- src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCParseFilter.java	(revision 375984)
+++ src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCParseFilter.java	(working copy)
@@ -16,9 +16,10 @@
 
 package org.creativecommons.nutch;
 
+import org.apache.nutch.metadata.CreativeCommons;
 import org.apache.nutch.parse.*;
 import org.apache.nutch.protocol.Content;
-import org.apache.nutch.protocol.ContentProperties;
+import org.apache.nutch.metadata.Metadata;
 import org.apache.hadoop.conf.Configuration;
 
 import java.util.*;
@@ -50,7 +51,7 @@
     }
 
     /** Scan the document adding attributes to metadata.*/
-    public static void walk(Node doc, URL base, ContentProperties metadata, Configuration conf)
+    public static void walk(Node doc, URL base, Metadata metadata, Configuration conf)
       throws ParseException {
 
       // walk the DOM tree, scanning for license data
@@ -76,13 +77,13 @@
       // add license to metadata
       if (licenseUrl != null) {
         LOG.info("CC: found "+licenseUrl+" in "+licenseLocation+" of "+base);
-        metadata.put("License-Url", licenseUrl);
-        metadata.put("License-Location", licenseLocation);
+        metadata.add(CreativeCommons.LICENSE_URL, licenseUrl);
+        metadata.add(CreativeCommons.LICENSE_LOCATION, licenseLocation);
       }
 
       if (walker.workType != null) {
         LOG.info("CC: found "+walker.workType+" in "+base);
-        metadata.put("Work-Type", walker.workType);
+        metadata.add(CreativeCommons.WORK_TYPE, walker.workType);
       }
 
     }
@@ -265,7 +266,7 @@
 
     try {
       // extract license metadata
-      Walker.walk(doc, base, parse.getData().getMetadata(), getConf());
+      Walker.walk(doc, base, parse.getData().getParseMeta(), getConf());
     } catch (ParseException e) {
       return new ParseStatus(e).getEmptyParse(getConf());
     }
Index: src/plugin/parse-rtf/src/test/org/apache/nutch/parse/rtf/TestRTFParser.java
===================================================================
--- src/plugin/parse-rtf/src/test/org/apache/nutch/parse/rtf/TestRTFParser.java	(revision 375984)
+++ src/plugin/parse-rtf/src/test/org/apache/nutch/parse/rtf/TestRTFParser.java	(working copy)
@@ -25,7 +25,15 @@
 import org.apache.nutch.protocol.Protocol;
 import org.apache.nutch.protocol.ProtocolException;
 import org.apache.nutch.protocol.ProtocolFactory;
+<<<<<<< .mine
+<<<<<<< .mine
+import org.apache.nutch.util.MetadataNames;
+=======
+import org.apache.nutch.util.NutchConf;
+=======
 import org.apache.hadoop.conf.Configuration;
+>>>>>>> .r374853
+>>>>>>> .r373941
 
 import java.util.Properties;
 
@@ -34,7 +42,7 @@
  *
  * @author Andy Hedges
  */
-public class TestRTFParser extends TestCase {
+public class TestRTFParser extends TestCase implements MetadataNames {
 
   private String fileSeparator = System.getProperty("file.separator");
   // This system property is defined in ./src/plugin/build-plugin.xml
@@ -73,7 +81,7 @@
     String title = parse.getData().getTitle();
     Properties meta = parse.getData().getMetadata();
     assertEquals("test rft document", title);
-    assertEquals("tests", meta.getProperty("subject"));
+    assertEquals("tests", meta.getProperty(SUBJECT));
 
 
 
Index: src/plugin/parse-rtf/src/java/org/apache/nutch/parse/rtf/RTFParserDelegateImpl.java
===================================================================
--- src/plugin/parse-rtf/src/java/org/apache/nutch/parse/rtf/RTFParserDelegateImpl.java	(revision 375984)
+++ src/plugin/parse-rtf/src/java/org/apache/nutch/parse/rtf/RTFParserDelegateImpl.java	(working copy)
@@ -22,18 +22,20 @@
 import java.util.List;
 import java.util.Properties;
 
+import org.apache.nutch.util.MetadataNames;
+
 /**
  * A parser delegate for handling rtf events.
  * @author Andy Hedges
  */
-public class RTFParserDelegateImpl implements RTFParserDelegate {
+public class RTFParserDelegateImpl implements RTFParserDelegate, MetadataNames {
 
   String tabs = "";
   Properties metadata = new Properties();
 
-  String[] META_NAMES_TEXT = {"title", "subject", "author", "manager",
-                              "company", "operator", "category", "keywords",
-                              "comment", "doccomm", "hlinkbase"};
+  String[] META_NAMES_TEXT = {TITLE, SUBJECT, AUTHOR, "manager",
+                              "company", "operator", "category", KEYWORDS,
+                              COMMENTS, "doccomm", "hlinkbase"};
   String[] META_NAMES_DATE = {"creatim", "creatim", "printim", "buptim"};
 
   String metaName = "";
Index: src/plugin/parse-rtf/src/java/org/apache/nutch/parse/rtf/RTFParseFactory.java
===================================================================
--- src/plugin/parse-rtf/src/java/org/apache/nutch/parse/rtf/RTFParseFactory.java	(revision 375984)
+++ src/plugin/parse-rtf/src/java/org/apache/nutch/parse/rtf/RTFParseFactory.java	(working copy)
@@ -18,7 +18,16 @@
 
 import org.apache.nutch.parse.*;
 import org.apache.nutch.protocol.Content;
+<<<<<<< .mine
+<<<<<<< .mine
+import org.apache.nutch.util.MetadataNames;
+
+=======
+import org.apache.nutch.util.NutchConf;
+=======
 import org.apache.hadoop.conf.Configuration;
+>>>>>>> .r374853
+>>>>>>> .r373941
 import java.io.ByteArrayInputStream;
 import java.io.InputStreamReader;
 import java.io.Reader;
@@ -31,7 +40,7 @@
  * 
  * @author Andy Hedges
  */
-public class RTFParseFactory implements Parser {
+public class RTFParseFactory implements Parser, MetadataNames {
 
   private Configuration conf;
 
@@ -53,10 +62,13 @@
     Properties metadata = new Properties();
     metadata.putAll(content.getMetadata());
     metadata.putAll(delegate.getMetaData());
-    String title = metadata.getProperty("title");
+    String title = metadata.getProperty(TITLE);
 
     if (title != null) {
-      metadata.remove(title);
+        //(CM): Why remove the title metadata property here? Even 
+        //though it's stored in the ParseData, it still might be useful
+        //to have via this properties object?
+        //metadata.remove(title);
     } else {
       title = "";
     }
Index: src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java
===================================================================
--- src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java	(revision 375984)
+++ src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java	(working copy)
@@ -13,9 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package org.apache.nutch.indexer.more;
 
+
 import org.apache.oro.text.regex.Perl5Compiler;
 import org.apache.oro.text.regex.Perl5Matcher;
 import org.apache.oro.text.regex.Perl5Pattern;
@@ -26,24 +26,28 @@
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 
+import org.apache.nutch.metadata.Metadata;
+
 import org.apache.nutch.net.protocols.HttpDateFormat;
+import org.apache.nutch.net.protocols.Response;
 
 import org.apache.nutch.parse.Parse;
-import org.apache.nutch.protocol.ContentProperties;
 
 import org.apache.nutch.indexer.IndexingFilter;
 import org.apache.nutch.indexer.IndexingException;
-import org.apache.hadoop.io.UTF8;
 
 import org.apache.nutch.crawl.CrawlDatum;
 import org.apache.nutch.crawl.Inlinks;
+import org.apache.nutch.parse.ParseData;
 
-import org.apache.hadoop.conf.Configuration;
 import org.apache.nutch.util.mime.MimeType;
 import org.apache.nutch.util.mime.MimeTypes;
 import org.apache.nutch.util.mime.MimeTypeException;
 
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.UTF8;
 import org.apache.hadoop.util.LogFormatter;
+
 import java.util.logging.Logger;
 
 import java.text.ParseException;
@@ -51,9 +55,10 @@
 
 import java.util.Date;
 import java.util.TimeZone;
-import java.util.Enumeration;
 
 import org.apache.commons.lang.time.DateUtils;
+
+
 /**
  * Add (or reset) a few metaData properties as respective fields
  * (if they are available), so that they can be displayed by more.jsp
@@ -81,27 +86,22 @@
     throws IndexingException {
 
     String url_s = url.toString();
-    // normalize metaData (see note in the method below).
-    ContentProperties metaData = normalizeMeta(parse.getData().getMetadata());
 
-    addTime(doc, metaData, url_s, datum);
+    addTime(doc, parse.getData(), url_s, datum);
+    addLength(doc, parse.getData(), url_s);
+    addType(doc, parse.getData(), url_s);
+    resetTitle(doc, parse.getData(), url_s);
 
-    addLength(doc, metaData, url_s);
-
-    addType(doc, metaData, url_s);
-
-    resetTitle(doc, metaData, url_s);
-
     return doc;
   }
     
   // Add time related meta info.  Add last-modified if present.  Index date as
   // last-modified, or, if that's not present, use fetch time.
-  private Document addTime(Document doc, ContentProperties metaData, String url,
-                           CrawlDatum datum) {
+  private Document addTime(Document doc, ParseData data,
+                           String url, CrawlDatum datum) {
     long time = -1;
 
-    String lastModified = metaData.getProperty("last-modified");
+    String lastModified = data.getMeta(Metadata.LAST_MODIFIED);
     if (lastModified != null) {                   // try parse last-modified
       time = getTime(lastModified,url);           // use as time
                                                   // store as string
@@ -109,7 +109,7 @@
     }
 
     if (time == -1) {                             // if no last-modified
-      time = datum.getFetchTime();                   // use fetch time
+      time = datum.getFetchTime();                // use fetch time
     }
 
     // add support for query syntax date:
@@ -165,8 +165,8 @@
   }
 
   // Add Content-Length
-  private Document addLength(Document doc, ContentProperties metaData, String url) {
-    String contentLength = metaData.getProperty("content-length");
+  private Document addLength(Document doc, ParseData data, String url) {
+    String contentLength = data.getMeta(Response.CONTENT_LENGTH);
 
     if (contentLength != null)
       doc.add(Field.UnIndexed("contentLength", contentLength));
@@ -175,9 +175,9 @@
   }
 
   // Add Content-Type and its primaryType and subType
-  private Document addType(Document doc, ContentProperties metaData, String url) {
+  private Document addType(Document doc, ParseData data, String url) {
     MimeType mimeType = null;
-    String contentType = metaData.getProperty("content-type");
+    String contentType = data.getMeta(Response.CONTENT_TYPE);
     if (contentType == null) {
 	// Note by Jerome Charron on 20050415:
         // Content Type not solved by a previous plugin
@@ -257,8 +257,8 @@
     }
   }
 
-  private Document resetTitle(Document doc, ContentProperties metaData, String url) {
-    String contentDisposition = metaData.getProperty("content-disposition");
+  private Document resetTitle(Document doc, ParseData data, String url) {
+    String contentDisposition = data.getMeta(Metadata.CONTENT_DISPOSITION);
     if (contentDisposition == null)
       return doc;
 
@@ -274,30 +274,6 @@
     return doc;
   }
 
-  // Meta info in nutch metaData are saved in raw form, i.e.,
-  // whatever the fetcher sees. To facilitate further processing,
-  // a "normalization" is necessary.
-  // This includes fixing http server oddities, such as:
-  // (*) non-uniform casing of header names
-  // (*) empty header value
-  // Note: the original metaData should be kept intact,
-  // because there is a benefit to preserve whatever comes from server.
-  private ContentProperties normalizeMeta(ContentProperties old) {
-      ContentProperties normalized = new ContentProperties();
-
-    for (Enumeration e = old.propertyNames(); e.hasMoreElements();) {
-      String key = (String) e.nextElement();
-      String value = old.getProperty(key).trim();
-      // some http server sends out header with empty value! if so, skip it
-      if (value == null || value.equals(""))
-        continue;
-      // convert key (but, not value) to lower-case
-      normalized.setProperty(key.toLowerCase(),value);
-    }
-
-    return normalized;
-  }
-
   public void setConf(Configuration conf) {
     this.conf = conf;
     MAGIC = conf.getBoolean("mime.type.magic", true);
Index: src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/ExtParser.java
===================================================================
--- src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/ExtParser.java	(revision 375984)
+++ src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/ExtParser.java	(working copy)
@@ -17,7 +17,6 @@
 package org.apache.nutch.parse.ext;
 
 import org.apache.nutch.protocol.Content;
-import org.apache.nutch.protocol.ContentProperties;
 import org.apache.nutch.parse.ParseStatus;
 import org.apache.nutch.parse.Parser;
 import org.apache.nutch.parse.Parse;
@@ -28,6 +27,8 @@
 
 import org.apache.hadoop.util.LogFormatter;
 import org.apache.nutch.util.CommandRunner;
+import org.apache.nutch.metadata.Metadata;
+import org.apache.nutch.net.protocols.Response;
 import org.apache.hadoop.conf.Configuration;
 
 import org.apache.nutch.plugin.Extension;
@@ -47,6 +48,7 @@
  */
 
 public class ExtParser implements Parser {
+
   public static final Logger LOG =
     LogFormatter.getLogger("org.apache.nutch.parse.ext");
 
@@ -85,8 +87,7 @@
 
       byte[] raw = content.getContent();
 
-      String contentLength =
-        (String)content.getMetadata().get("Content-Length");
+      String contentLength = content.getMetadata().get(Response.CONTENT_LENGTH);
       if (contentLength != null
             && raw.length != Integer.parseInt(contentLength)) {
           return new ParseStatus(ParseStatus.FAILED, ParseStatus.FAILED_TRUNCATED,
@@ -129,11 +130,8 @@
     // collect outlink
     Outlink[] outlinks = OutlinkExtractor.getOutlinks(text, getConf());
 
-    // collect meta data
-    ContentProperties metaData = new ContentProperties();
-    metaData.putAll(content.getMetadata()); // copy through
-
-    ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS, title, outlinks, metaData);
+    ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS, title,
+                                        outlinks, content.getMetadata());
     parseData.setConf(this.conf);
     return new ParseImpl(text, parseData);
   }
Index: src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java
===================================================================
--- src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java	(revision 375984)
+++ src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java	(working copy)
@@ -29,8 +29,9 @@
 import org.w3c.dom.*;
 import org.apache.html.dom.*;
 
+import org.apache.nutch.metadata.Metadata;
+import org.apache.nutch.net.protocols.Response;
 import org.apache.nutch.protocol.Content;
-import org.apache.nutch.protocol.ContentProperties;
 import org.apache.hadoop.conf.*;
 import org.apache.hadoop.util.LogFormatter;
 import org.apache.nutch.parse.*;
@@ -110,19 +111,19 @@
     String text = "";
     String title = "";
     Outlink[] outlinks = new Outlink[0];
-    ContentProperties metadata = new ContentProperties();
+    Metadata metadata = new Metadata();
 
     // parse the content
     DocumentFragment root;
     try {
       byte[] contentInOctets = content.getContent();
       InputSource input = new InputSource(new ByteArrayInputStream(contentInOctets));
-      String contentType = content.getMetadata().getProperty("Content-Type");
+      String contentType = content.getMetadata().get(Response.CONTENT_TYPE);
       String encoding = StringUtil.parseCharacterEncoding(contentType);
       if (encoding!=null) {
-        metadata.put("OriginalCharEncoding", encoding);
+        metadata.set(Metadata.ORIGINAL_CHAR_ENCODING, encoding);
         if ((encoding = StringUtil.resolveEncodingAlias(encoding)) != null) {
-          metadata.put("CharEncodingForConversion", encoding);
+          metadata.set(Metadata.CHAR_ENCODING_FOR_CONVERSION, encoding);
           LOG.fine(base + ": setting encoding to " + encoding);
         }
       }
@@ -131,9 +132,9 @@
       if (encoding == null) {
         encoding = sniffCharacterEncoding(contentInOctets);
         if (encoding!=null) {
-          metadata.put("OriginalCharEncoding", encoding);
+          metadata.set(Metadata.ORIGINAL_CHAR_ENCODING, encoding);
           if ((encoding = StringUtil.resolveEncodingAlias(encoding)) != null) {
-            metadata.put("CharEncodingForConversion", encoding);
+            metadata.set(Metadata.CHAR_ENCODING_FOR_CONVERSION, encoding);
             LOG.fine(base + ": setting encoding to " + encoding);
           }
         }
@@ -147,7 +148,7 @@
         // doesn't work for jp because euc-jp and shift_jis have about the
         // same share)
         encoding = defaultCharEncoding;
-        metadata.put("CharEncodingForConversion", defaultCharEncoding);
+        metadata.set(Metadata.CHAR_ENCODING_FOR_CONVERSION, defaultCharEncoding);
         LOG.fine(base + ": falling back to " + defaultCharEncoding);
       }
       input.setEncoding(encoding);
@@ -192,14 +193,13 @@
       // ??? FIXME ???
     }
     
-    // copy content metadata through
-    metadata.putAll(content.getMetadata());
     ParseStatus status = new ParseStatus(ParseStatus.SUCCESS);
     if (metaTags.getRefresh()) {
       status.setMinorCode(ParseStatus.SUCCESS_REDIRECT);
       status.setMessage(metaTags.getRefreshHref().toString());
     }
-    ParseData parseData = new ParseData(status, title, outlinks, metadata);
+    ParseData parseData = new ParseData(status, title, outlinks,
+                                        content.getMetadata(), metadata);
     parseData.setConf(this.conf);
     Parse parse = new ParseImpl(text, parseData);
 
@@ -269,9 +269,9 @@
     byte[] bytes = new byte[(int)file.length()];
     DataInputStream in = new DataInputStream(new FileInputStream(file));
     in.readFully(bytes);
-    Parse parse = new HtmlParser().getParse(new Content(url,url,
-                                                        bytes,"text/html",
-                                                        new ContentProperties(), NutchConfiguration.create()));
+    Parse parse = new HtmlParser().getParse(
+            new Content(url, url, bytes, "text/html", new Metadata(),
+                        NutchConfiguration.create()));
     System.out.println("data: "+parse.getData());
 
     System.out.println("text: "+parse.getText());
Index: src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java
===================================================================
--- src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java	(revision 375984)
+++ src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java	(working copy)
@@ -11,7 +11,8 @@
 import java.util.logging.Level;
 import java.util.logging.Logger;
 
-import org.apache.nutch.protocol.ContentProperties;
+import org.apache.nutch.metadata.Metadata;
+
 import org.apache.hadoop.util.LogFormatter;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configurable;
@@ -35,7 +36,7 @@
      * The HTTP Authentication (WWW-Authenticate) header which is returned 
      * by a webserver requiring authentication.
      */
-    public static final String AUTH_HEADER = "WWW-Authenticate";
+    public static final String WWW_AUTHENTICATE = "WWW-Authenticate";
 	
     public static final Logger LOG =
 		LogFormatter.getLogger(HttpAuthenticationFactory.class.getName());
@@ -72,13 +73,14 @@
      * ---------------------------------- */
 
 
-    public HttpAuthentication findAuthentication(ContentProperties header) {
+    public HttpAuthentication findAuthentication(Metadata header) {
+
         if (header == null) return null;
         
     	try {
 			Collection challenge = null;
-			if (header instanceof ContentProperties) {
-				Object o = header.get(AUTH_HEADER);
+			if (header instanceof Metadata) {
+				Object o = header.get(WWW_AUTHENTICATE);
 				if (o instanceof Collection) {
 					challenge = (Collection) o;
 				} else {
@@ -86,7 +88,7 @@
 					challenge.add(o.toString());
 				}
 			} else {
-				String challengeString = header.getProperty(AUTH_HEADER); 
+				String challengeString = header.get(WWW_AUTHENTICATE); 
 				if (challengeString != null) {
 					challenge = new ArrayList();
 					challenge.add(challengeString);
Index: src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java
===================================================================
--- src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java	(revision 375984)
+++ src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java	(working copy)
@@ -20,6 +20,7 @@
 import java.io.IOException;
 import java.io.InputStream;
 import java.net.URL;
+import java.util.Date;
 
 // HTTP Client imports
 import org.apache.commons.httpclient.Header;
@@ -30,8 +31,8 @@
 
 // Nutch imports
 import org.apache.nutch.crawl.CrawlDatum;
+import org.apache.nutch.metadata.Metadata;
 import org.apache.nutch.net.protocols.Response;
-import org.apache.nutch.protocol.ContentProperties;
 import org.apache.nutch.protocol.http.api.HttpBase;
 
 
@@ -52,7 +53,7 @@
 
   private int code;
 
-  private ContentProperties headers = new ContentProperties();
+  private Metadata headers = new Metadata();
 
   
   public HttpResponse(HttpBase http, URL url, CrawlDatum datum) throws IOException {
@@ -85,8 +86,9 @@
       Header[] heads = get.getResponseHeaders();
 
       for (int i = 0; i < heads.length; i++) {
-        headers.setProperty(heads[i].getName(), heads[i].getValue());
+        headers.set(heads[i].getName(), heads[i].getValue());
       }
+      
       // always read content. Sometimes content is useful to find a cause
       // for error.
       try {
@@ -131,10 +133,10 @@
   }
 
   public String getHeader(String name) {
-    return (String) headers.get(name);
+    return headers.get(name);
   }
   
-  public ContentProperties getHeaders() {
+  public Metadata getHeaders() {
     return headers;
   }
 
Index: src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java
===================================================================
--- src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java	(revision 375984)
+++ src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java	(working copy)
@@ -28,12 +28,13 @@
 import java.net.URL;
 import java.util.Map;
 import java.util.TreeMap;
+import java.util.Date;
 import java.util.logging.Level;
 
 // Nutch imports
 import org.apache.nutch.crawl.CrawlDatum;
+import org.apache.nutch.metadata.Metadata;
 import org.apache.nutch.net.protocols.Response;
-import org.apache.nutch.protocol.ContentProperties;
 import org.apache.nutch.protocol.ProtocolException;
 import org.apache.nutch.protocol.http.api.HttpBase;
 import org.apache.nutch.protocol.http.api.HttpException;
@@ -49,7 +50,7 @@
   private String base;
   private byte[] content;
   private int code;
-  private ContentProperties headers = new ContentProperties();
+  private Metadata headers = new Metadata();
 
 
   public HttpResponse(HttpBase http, URL url, CrawlDatum datum)
@@ -141,13 +142,13 @@
         // parse status code line
         this.code = parseStatusLine(in, line); 
         // parse headers
-        headers.putAll(parseHeaders(in, line));
+        parseHeaders(in, line);
         haveSeenNonContinueStatus= code != 100; // 100 is "Continue"
       }
 
       readPlainContent(in);
 
-      String contentEncoding= getHeader("Content-Encoding");
+      String contentEncoding = getHeader(Response.CONTENT_ENCODING);
       if ("gzip".equals(contentEncoding) || "x-gzip".equals(contentEncoding)) {
         Http.LOG.fine("uncompressing....");
         byte[] compressed = content;
@@ -187,10 +188,10 @@
   }
 
   public String getHeader(String name) {
-    return (String) headers.get(name);
+    return headers.get(name);
   }
   
-  public ContentProperties getHeaders() {
+  public Metadata getHeaders() {
     return headers;
   }
 
@@ -207,7 +208,7 @@
     throws HttpException, IOException {
 
     int contentLength = Integer.MAX_VALUE;    // get content length
-    String contentLengthString = (String)headers.get("Content-Length");
+    String contentLengthString = headers.get(Response.CONTENT_LENGTH);
     if (contentLengthString != null) {
       contentLengthString = contentLengthString.trim();
       try {
@@ -333,8 +334,9 @@
   }
 
 
-  private void processHeaderLine(StringBuffer line, TreeMap headers)
+  private void processHeaderLine(StringBuffer line)
     throws IOException, HttpException {
+
     int colonIndex = line.indexOf(":");       // key is up to colon
     if (colonIndex == -1) {
       int i;
@@ -355,20 +357,14 @@
       valueStart++;
     }
     String value = line.substring(valueStart);
-
-    headers.put(key, value);
+    headers.set(key, value);
   }
 
-  private Map parseHeaders(PushbackInputStream in, StringBuffer line)
-    throws IOException, HttpException {
-    TreeMap headers = new TreeMap(String.CASE_INSENSITIVE_ORDER);
-    return parseHeaders(in, line, headers);
-  }
 
-  // Adds headers to an existing TreeMap
-  private Map parseHeaders(PushbackInputStream in, StringBuffer line,
-                           TreeMap headers)
+  // Adds headers to our headers Metadata
+  private void parseHeaders(PushbackInputStream in, StringBuffer line)
     throws IOException, HttpException {
+
     while (readLine(in, line, true) != 0) {
 
       // handle HTTP responses with missing blank line after headers
@@ -381,18 +377,21 @@
         line.setLength(pos);
 
         try {
-          processHeaderLine(line, headers);
+            //TODO: (CM) We don't know the header names here
+            //since we're just handling them generically. It would
+            //be nice to provide some sort of mapping function here
+            //for the returned header names to the standard metadata
+            //names in the ParseData class
+          processHeaderLine(line);
         } catch (Exception e) {
           // fixme:
           e.printStackTrace();
         }
-
-        return headers;
+        return;
       }
 
-      processHeaderLine(line, headers);
+      processHeaderLine(line);
     }
-    return headers;
   }
 
   private static int readLine(PushbackInputStream in, StringBuffer line,
Index: src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/MSPowerPointParser.java
===================================================================
--- src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/MSPowerPointParser.java	(revision 375984)
+++ src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/MSPowerPointParser.java	(working copy)
@@ -30,11 +30,14 @@
 import org.apache.nutch.parse.ParseStatus;
 import org.apache.nutch.parse.Parser;
 import org.apache.nutch.protocol.Content;
-import org.apache.nutch.protocol.ContentProperties;
+import org.apache.nutch.metadata.Metadata;
+import org.apache.nutch.net.protocols.Response;
+
 import org.apache.hadoop.util.LogFormatter;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.nutch.util.NutchConfiguration;
 
+
 /**
  * Nutch-Parser for parsing MS PowerPoint slides ( mime type:
  * application/vnd.ms-powerpoint).
@@ -78,11 +81,10 @@
 
     byte[] raw = getRawBytes(new File(file));
 
-    ContentProperties prop = new ContentProperties();
-    prop.setProperty("Content-Length", "" + raw.length);
+    Metadata meta = new Metadata();
+    meta.set(Response.CONTENT_LENGTH, "" + raw.length);
+    Content content = new Content(file, file, raw, MIME_TYPE, meta, NutchConfiguration.create());
 
-    Content content = new Content(file, file, raw, MIME_TYPE, prop, NutchConfiguration.create());
-
     System.out.println(ppe.getParse(content).getText());
   }
 
@@ -99,7 +101,7 @@
     Properties properties = null;
 
     try {
-      final String contentLen = content.get("Content-Length");
+      final String contentLen = content.getMetadata().get(Response.CONTENT_LENGTH);
       final byte[] raw = content.getContent();
 
       if (contentLen != null && raw.length != Integer.parseInt(contentLen)) {
@@ -125,14 +127,12 @@
       return new ParseStatus(e).getEmptyParse(getConf());
     }
 
-    // collect meta data
-    final ContentProperties metadata = new ContentProperties();
-    metadata.putAll(content.getMetadata()); // copy through
+    Metadata metadata = new Metadata();
 
     if (properties != null) {
-      title = properties.getProperty("Title");
-      properties.remove("Title");
-      metadata.putAll(properties);
+      title = properties.getProperty(Metadata.TITLE);
+      properties.remove(Metadata.TITLE);
+      metadata.setAll(properties);
     }
 
     if (plainText == null) {
Index: src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/PropertiesReaderListener.java
===================================================================
--- src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/PropertiesReaderListener.java	(revision 375984)
+++ src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/PropertiesReaderListener.java	(working copy)
@@ -23,6 +23,7 @@
 
 import org.apache.nutch.parse.mspowerpoint.PPTExtractor.PropertiesBroker;
 import org.apache.hadoop.util.LogFormatter;
+import org.apache.nutch.metadata.Metadata;
 import org.apache.poi.hpsf.PropertySetFactory;
 import org.apache.poi.hpsf.SummaryInformation;
 import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
@@ -74,30 +75,12 @@
         final SummaryInformation sInfo = (SummaryInformation) PropertySetFactory
             .create(event.getStream());
 
-        addProperty("Title", sInfo.getTitle());
-        addProperty("Subject", sInfo.getSubject());
-        addProperty("Keywords", sInfo.getKeywords());
-        addProperty("Comments", sInfo.getComments());
-        addProperty("Author", sInfo.getAuthor());
-        addProperty("Last-Author", sInfo.getLastAuthor());
-
-        /*
-         * already provided by nutch
-         */
-        // addProperty("Saved-Date", si.getLastSaveDateTime());
-        /*
-         * following properties are not required for indexing/searching
-         */
-        // addProperty("Word-Count", si.getWordCount());
-        // addProperty("Page-Count", si.getPageCount());
-        // addProperty("Character Count", si.getCharCount());
-        // addProperty("Revision-Number", si.getRevNumber());
-        // addProperty("Creation-Date", si.getEditTime());
-        // addProperty("Edit-Time", si.getEditTime());
-        // addProperty("Last-Printed", si.getLastPrinted());
-        // addProperty("Template", si.getTemplate());
-        // addProperty("Security", si.getSecurity());
-        // addProperty("Application-Name", si.getApplicationName());
+        addProperty(Metadata.TITLE, sInfo.getTitle());
+        addProperty(Metadata.SUBJECT, sInfo.getSubject());
+        addProperty(Metadata.KEYWORDS, sInfo.getKeywords());
+        addProperty(Metadata.COMMENTS, sInfo.getComments());
+        addProperty(Metadata.CREATOR, sInfo.getAuthor());
+        addProperty(Metadata.LAST_AUTHOR, sInfo.getLastAuthor());
       } catch (Exception ex) {
         LOG.throwing(this.getClass().getName(), "processPOIFSReaderEvent", ex);
       }