Index: src/test/org/apache/nutch/protocol/TestContentProperties.java
===================================================================
--- src/test/org/apache/nutch/protocol/TestContentProperties.java	(revision 366307)
+++ src/test/org/apache/nutch/protocol/TestContentProperties.java	(working copy)
@@ -1,64 +0,0 @@
-/**
- * Copyright 2005 The Apache Software Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nutch.protocol;
-
-import org.apache.nutch.io.TestWritable;
-
-import junit.framework.TestCase;
-
-public class TestContentProperties extends TestCase {
-
-    public void testOneValue() throws Exception {
-        ContentProperties properties = new ContentProperties();
-        String value = "aValue";
-        properties.setProperty("aKey", value);
-        assertEquals(value, properties.get("aKey"));
-        assertEquals(value, properties.get("akey"));
-    }
-
-    public void testMultiValue() throws Exception {
-        ContentProperties properties = new ContentProperties();
-        String value = "aValue";
-        for (int i = 0; i < 100; i++) {
-            properties.setProperty("aKey", value + i);
-
-        }
-        assertEquals(value + 99, properties.get("aKey"));
-        assertEquals(value + 99, properties.getProperty("aKey"));
-        String[] propertie = properties.getProperties("aKey");
-        for (int i = 0; i < 100; i++) {
-            assertEquals(value + i, propertie[i]);
-
-        }
-    }
-
-    public void testSerialization() throws Exception {
-        ContentProperties properties = new ContentProperties();
-        for (int i = 0; i < 10; i++) {
-            properties.setProperty("key", "" + i);
-        }
-        TestWritable.testWritable(properties);
-        Content content = new Content("url", "url", new byte[0], "text/html",
-                new ContentProperties());
-        ContentProperties metadata = content.getMetadata();
-        for (int i = 0; i < 100; i++) {
-            metadata.setProperty("aKey", "" + i);
-        }
-        TestWritable.testWritable(content);
-    }
-
-}
Index: src/test/org/apache/nutch/protocol/TestContent.java
===================================================================
--- src/test/org/apache/nutch/protocol/TestContent.java	(revision 366307)
+++ src/test/org/apache/nutch/protocol/TestContent.java	(working copy)
@@ -17,6 +17,7 @@
 package org.apache.nutch.protocol;
 
 import org.apache.nutch.io.*;
+import org.apache.nutch.util.MetaData;
 import junit.framework.TestCase;
 
 /** Unit tests for Content. */
@@ -30,22 +31,23 @@
 
     String url = "http://www.foo.com/";
 
-    ContentProperties metaData = new ContentProperties();
-    metaData.put("Host", "www.foo.com");
-    metaData.put("Content-Type", "text/html");
+    MetaData metaData = new MetaData();
+    metaData.add("Host", "www.foo.com");
+    metaData.add("Content-Type", "text/html");
 
     Content r = new Content(url, url, page.getBytes("UTF8"), "text/html",
                             metaData);
                         
     TestWritable.testWritable(r);
-    assertEquals("text/html", r.getMetadata().get("Content-Type"));
-    assertEquals("text/html", r.getMetadata().get("content-type"));
+    assertEquals("text/html", r.getMetadata().last("Content-Type"));
+    assertEquals("text/html", r.getMetadata().last("content-type"));
+    assertEquals("text/html", r.getMetadata().last("CONTENTYPE"));
   }
 
   /** Unit tests for getContentType(String, String, byte[]) method. */
   public void testGetContentType() throws Exception {
     Content c = null;
-    ContentProperties p = new ContentProperties();
+    MetaData p = new MetaData();
 
     c = new Content("http://www.foo.com/",
                     "http://www.foo.com/",
Index: src/test/org/apache/nutch/parse/TestParseData.java
===================================================================
--- src/test/org/apache/nutch/parse/TestParseData.java	(revision 366307)
+++ src/test/org/apache/nutch/parse/TestParseData.java	(working copy)
@@ -17,7 +17,7 @@
 package org.apache.nutch.parse;
 
 import org.apache.nutch.io.*;
-import org.apache.nutch.protocol.ContentProperties;
+import org.apache.nutch.util.MetaData;
 import junit.framework.TestCase;
 
 /** Unit tests for ParseData. */
@@ -34,9 +34,9 @@
       new Outlink("http://bar.com/", "Bar")
     };
 
-    ContentProperties metaData = new ContentProperties();
-    metaData.put("Language", "en/us");
-    metaData.put("Charset", "UTF-8");
+    MetaData metaData = new MetaData();
+    metaData.add("Language", "en/us");
+    metaData.add("Charset", "UTF-8");
 
     ParseData r = new ParseData(ParseStatus.STATUS_SUCCESS, title, outlinks, metaData);
                         
Index: src/test/org/apache/nutch/util/TestMetaData.java
===================================================================
--- src/test/org/apache/nutch/util/TestMetaData.java	(revision 0)
+++ src/test/org/apache/nutch/util/TestMetaData.java	(revision 0)
@@ -0,0 +1,437 @@
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.util;
+
+// JDK imports
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+
+// JUnit imports
+import junit.framework.Test;
+import junit.framework.TestCase;
+import junit.framework.TestSuite;
+import junit.textui.TestRunner;
+
+
+/**
+ * JUnit based test of class <code>MetaData</code>.
+ *
+ * @author Chris Mattmann
+ * @author J&eacute;r&ocirc;me Charron
+ */
+public class TestMetaData extends TestCase {
+  
+  public TestMetaData(String testName) {
+    super(testName);
+  }
+  
+  public static Test suite() {
+    return new TestSuite(TestMetaData.class);
+  }
+  
+  public static void main(String[] args) {
+    TestRunner.run(suite());
+  }
+  
+
+  /** Test for the <code>getNormalizedName(String)</code> method. */
+  public void testGetNormalizedName() {
+    assertEquals(MetaData.CONTENT_TYPE, MetaData.getNormalizedName("Content-Type"));
+    assertEquals(MetaData.CONTENT_TYPE, MetaData.getNormalizedName("ContentType"));
+    assertEquals(MetaData.CONTENT_TYPE, MetaData.getNormalizedName("Content-type"));
+    assertEquals(MetaData.CONTENT_TYPE, MetaData.getNormalizedName("contenttype"));
+    assertEquals(MetaData.CONTENT_TYPE, MetaData.getNormalizedName("contentype"));
+    assertEquals(MetaData.CONTENT_TYPE, MetaData.getNormalizedName("contntype"));
+  }
+
+  /** Test for the <code>add(String, String)</code> method. */
+  public void testAdd() {
+    String[] values = null;
+    MetaData meta = new MetaData();
+
+    values = meta.get("contentype");
+    assertEquals(0, values.length);
+
+    meta.add("contentype", "value1");
+    values = meta.get("contentype");
+    assertEquals(1, values.length);
+    assertEquals("value1", values[0]);
+    
+    meta.add("Content-Type", "value2");
+    values = meta.get("contentype");
+    assertEquals(2, values.length);
+    assertEquals("value1", values[0]);
+    assertEquals("value2", values[1]);
+
+    // NOTE : For now, the same value can be added many times.
+    //        Should it be changed?
+    meta.add("ContentType", "value1");
+    values = meta.get(MetaData.CONTENT_TYPE);
+    assertEquals(3, values.length);
+    assertEquals("value1", values[0]);
+    assertEquals("value2", values[1]);
+    assertEquals("value1", values[2]);
+  }
+
+  /** Test for the <code>set(String, String)</code> method. */
+  public void testSet() {
+    String[] values = null;
+    MetaData meta = new MetaData();
+
+    values = meta.get("contentype");
+    assertEquals(0, values.length);
+
+    meta.set("contentype", "value1");
+    values = meta.get("contentype");
+    assertEquals(1, values.length);
+    assertEquals("value1", values[0]);
+    
+    meta.set("Content-Type", "value2");
+    values = meta.get("contentype");
+    assertEquals(1, values.length);
+    assertEquals("value2", values[0]);
+    
+    meta.set("contenttype", "new value 1");
+    meta.add("contenttype", "new value 2");
+    values = meta.get("contentype");
+    assertEquals(2, values.length);
+    assertEquals("new value 1", values[0]);
+    assertEquals("new value 2", values[1]);
+  }
+  
+  /** Test for <code>addAll(MetaData)</code> method */
+  public void testAddAllMetaData() {
+    String[] values = null;
+    MetaData meta1 = new MetaData();
+    MetaData meta2 = new MetaData();
+    
+    meta1.addAll(meta2);
+    assertEquals(0, meta1.size());
+    
+    meta2.add("name-one", "value1.1");
+    meta2.add("name-one", "value1.2");
+    meta1.addAll(meta2);
+    assertEquals(1, meta1.size());
+    values = meta1.get("name-one");
+    assertEquals(2, values.length);
+    assertEquals("value1.1", values[0]);
+    assertEquals("value1.2", values[1]);
+    
+    meta2.add("name-two", "value2.1");
+    meta1.addAll(meta2);
+    assertEquals(2, meta1.size());
+    values = meta1.get("name-one");
+    assertEquals(4, values.length);
+    assertEquals("value1.1", values[0]);
+    assertEquals("value1.2", values[1]);
+    assertEquals("value1.1", values[2]);
+    assertEquals("value1.2", values[3]);
+    values = meta1.get("name-two");
+    assertEquals(1, values.length);
+    assertEquals("value2.1", values[0]);
+  }
+
+  /** Test for <code>setAll(MetaData)</code> method */
+  public void testSetAllMetaData() {
+    String[] values = null;
+    MetaData meta1 = new MetaData();
+    MetaData meta2 = new MetaData();
+    
+    meta1.setAll(meta2);
+    assertEquals(0, meta1.size());
+    
+    meta2.add("name-one", "value1.1");
+    meta2.add("name-one", "value1.2");
+    meta1.setAll(meta2);
+    assertEquals(1, meta1.size());
+    values = meta1.get("name-one");
+    assertEquals(2, values.length);
+    assertEquals("value1.1", values[0]);
+    assertEquals("value1.2", values[1]);
+    
+    meta2.add("name-two", "value2.1");
+    meta1.setAll(meta2);
+    assertEquals(2, meta1.size());
+    values = meta1.get("name-one");
+    assertEquals(2, values.length);
+    assertEquals("value1.1", values[0]);
+    assertEquals("value1.2", values[1]);
+    values = meta1.get("name-two");
+    assertEquals(1, values.length);
+    assertEquals("value2.1", values[0]);
+  }
+  
+  /** Test for <code>addAll(Map)</code> method */
+  public void testAddAllMap() {
+    String[] values = null;
+    HashMap map = new HashMap();
+    MetaData meta = new MetaData();
+    meta.addAll(map);
+    assertEquals(0, meta.size());
+    
+    map.put("name-one", "value-1");
+    meta.addAll(map);
+    assertEquals(1, meta.size());
+    values = meta.get("name-one");
+    assertEquals(1, values.length);
+    assertEquals("value-1", values[0]);
+    
+    map.clear();
+    map.put("name-two", new Integer(1));
+    meta.addAll(map);
+    assertEquals(1, meta.size());
+    
+    ArrayList list = new ArrayList();
+    list.add("value-2");
+    list.add("value-3");
+    list.add(new Integer(10));
+    list.add("value-4");
+    map.put("name-three", list);
+    meta.addAll(map);
+    assertEquals(2, meta.size());
+    values = meta.get("name-one");
+    assertEquals(1, values.length);
+    assertEquals("value-1", values[0]);
+    values = meta.get("name-two");
+    assertEquals(0, values.length);
+    values = meta.get("name-three");
+    assertEquals(3, values.length);
+    assertEquals("value-2", values[0]);
+    assertEquals("value-3", values[1]);
+    assertEquals("value-4", values[2]);
+  }
+
+  /** Test for <code>setAll(Map)</code> method */
+  public void testSetAllMap() {
+    String[] values = null;
+    HashMap map = new HashMap();
+    MetaData meta = new MetaData();
+    meta.setAll(map);
+    assertEquals(0, meta.size());
+    
+    map.put("name-one", "value-1");
+    meta.setAll(map);
+    assertEquals(1, meta.size());
+    values = meta.get("name-one");
+    assertEquals(1, values.length);
+    assertEquals("value-1", values[0]);
+    
+    map.put("name-two", new Integer(1));
+    meta.setAll(map);
+    assertEquals(1, meta.size());
+    
+    ArrayList list = new ArrayList();
+    list.add("value-2");
+    list.add("value-3");
+    list.add(new Integer(10));
+    list.add("value-4");
+    map.put("name-three", list);
+    meta.setAll(map);
+    assertEquals(2, meta.size());
+    values = meta.get("name-one");
+    assertEquals(1, values.length);
+    assertEquals("value-1", values[0]);
+    values = meta.get("name-two");
+    assertEquals(0, values.length);
+    values = meta.get("name-three");
+    assertEquals(3, values.length);
+    assertEquals("value-2", values[0]);
+    assertEquals("value-3", values[1]);
+    assertEquals("value-4", values[2]);
+    
+    map.clear();
+    map.put("name-three", "value-5");
+    meta.setAll(map);
+    assertEquals(2, meta.size());
+    values = meta.get("name-three");
+    assertEquals(1, values.length);
+    assertEquals("value-5", values[0]);
+  }
+  
+  /** Test for <code>first(String)</code> method */
+  public void testFirst() {
+    String[] values = null;
+    MetaData meta = new MetaData();
+    assertNull(meta.first("a-name"));
+    
+    meta.add("a-name", "value-1");
+    assertEquals("value-1", meta.first("a-name"));
+    meta.add("a-name", "value-2");
+    assertEquals("value-1", meta.first("a-name"));
+  }
+  
+  /** Test for <code>last(String)</code> method */
+  public void testLast() {
+    String[] values = null;
+    MetaData meta = new MetaData();
+    assertNull(meta.last("a-name"));
+    
+    meta.add("a-name", "value-1");
+    assertEquals("value-1", meta.last("a-name"));
+    meta.add("a-name", "value-2");
+    assertEquals("value-2", meta.last("a-name"));
+  }
+
+  /** Test for <code>contains(String)</code> method */
+  public void testContains() {
+    MetaData meta = new MetaData();
+    assertFalse(meta.contains("a-name"));
+    
+    meta.add("a-name", "value-1");
+    assertTrue(meta.contains("a-name"));
+    assertFalse(meta.contains("another-name"));
+  }
+  
+  /** Test for <code>isEmpty()</code> method */
+  public void testIsEmpty() {
+    MetaData meta = new MetaData();
+    assertTrue(meta.isEmpty());
+    meta.add("a-name", "value-1");
+    assertFalse(meta.isEmpty());
+  }
+
+  /** Test for <code>names</code> method */
+  public void testNames() {
+    String[] names = null;
+    MetaData meta = new MetaData();
+    names = meta.names();
+    assertEquals(0, names.length);
+    
+    meta.add("name-one", "value");
+    names = meta.names();
+    assertEquals(1, names.length);
+    assertEquals("name-one", names[0]);
+    meta.add("name-two", "value");
+    names = meta.names();
+    assertEquals(2, names.length);
+  }
+
+  /** Test for <code>get(String, int)</code> method */
+  public void testGetStringInt() {
+    MetaData meta = new MetaData();
+    meta.add("name-one", "value-1.1");
+    meta.add("name-one", "value-1.2");
+    assertEquals("value-1.1", meta.get("name-one", 0));
+    assertEquals("value-1.2", meta.get("name-one", 1));
+    try {
+      meta.get("name-one", 2);
+      fail("Should raise an IndexOutOfBoundsException");
+    } catch (IndexOutOfBoundsException ie) {
+      // All is ok
+    } catch (Exception e) {
+      fail("Should raise an IndexOutOfBoundsException instead of " + e);      
+    }
+  }
+  
+  /** Test for <code>get(String)</code> method */
+  public void testGetString() {
+    String[] values = null;
+    MetaData meta = new MetaData();
+    meta.add("name-one", "value-1.1");
+    meta.add("name-one", "value-1.2");
+    values = meta.get("name-one");
+    assertEquals("value-1.1", values[0]);
+    assertEquals("value-1.2", values[1]);
+    values = meta.get("name-two");
+    assertNotNull(values);
+    assertEquals(0, values.length);
+  }
+
+  /** Test for <code>remove(String)</code> method */
+  public void testRemove() {
+    MetaData meta = new MetaData();
+    meta.remove("name-one");
+    assertEquals(0, meta.size());
+    meta.add("name-one", "value-1.1");
+    meta.add("name-one", "value-1.2");
+    meta.add("name-two", "value-2.2");
+    assertEquals(2, meta.size());
+    assertTrue(meta.contains("name-one"));
+    assertTrue(meta.contains("name-two"));
+    meta.remove("name-one");
+    assertEquals(1, meta.size());
+    assertFalse(meta.contains("name-one"));
+    assertTrue(meta.contains("name-two"));
+    meta.remove("name-two");
+    assertEquals(0, meta.size());
+    assertFalse(meta.contains("name-one"));
+    assertFalse(meta.contains("name-two"));
+  }
+
+  /** Test for <code>equals(Object)</code> method */
+  public void testObject() {
+    MetaData meta1 = new MetaData();
+    MetaData meta2 = new MetaData();
+    assertFalse(meta1.equals(null));
+    assertFalse(meta1.equals("String"));
+    assertTrue(meta1.equals(meta2));
+    meta1.add("name-one", "value-1.1");
+    assertFalse(meta1.equals(meta2));
+    meta2.add("name-one", "value-1.1");
+    assertTrue(meta1.equals(meta2));
+    meta1.add("name-one", "value-1.2");
+    assertFalse(meta1.equals(meta2));
+    meta2.add("name-one", "value-1.2");
+    assertTrue(meta1.equals(meta2));
+    meta1.add("name-two", "value-2.1");
+    assertFalse(meta1.equals(meta2));
+    meta2.add("name-two", "value-2.1");
+    assertTrue(meta1.equals(meta2));
+    meta1.add("name-two", "value-2.2");
+    assertFalse(meta1.equals(meta2));
+    meta2.add("name-two", "value-2.x");
+    assertFalse(meta1.equals(meta2));
+  }
+  
+  /** Test for <code>Writable</code> implementation */
+  public void testWritable() {
+    MetaData result = null;
+    MetaData meta = new MetaData();
+    result = writeRead(meta);
+    assertTrue(result.isEmpty());
+    meta.add("name-one", "value-1.1");
+    result = writeRead(meta);
+    assertEquals(1, result.size());
+    assertEquals(1, result.size("name-one"));
+    assertEquals("value-1.1", result.get("name-one", 0));
+    meta.add("name-two", "value-2.1");
+    meta.add("name-two", "value-2.2");
+    result = writeRead(meta);
+    assertEquals(2, result.size());
+    assertEquals(1, result.size("name-one"));
+    assertEquals("value-1.1", result.get("name-one", 0));
+    assertEquals(2, result.size("name-two"));
+    assertEquals("value-2.1", result.get("name-two", 0));
+    assertEquals("value-2.2", result.get("name-two", 1));
+  }
+  
+  private MetaData writeRead(MetaData meta) {
+    MetaData readed = new MetaData();
+    try {
+      ByteArrayOutputStream out = new ByteArrayOutputStream();
+      meta.write(new DataOutputStream(out));
+      readed.readFields(new DataInputStream(new ByteArrayInputStream(out.toByteArray())));
+    } catch (IOException ioe) {
+      fail(ioe.toString());
+    }
+    return readed;
+  }
+}

Property changes on: src/test/org/apache/nutch/util/TestMetaData.java
___________________________________________________________________
Name: svn:eol-style
   + native

Index: src/java/org/apache/nutch/fetcher/Fetcher.java
===================================================================
--- src/java/org/apache/nutch/fetcher/Fetcher.java	(revision 366307)
+++ src/java/org/apache/nutch/fetcher/Fetcher.java	(working copy)
@@ -196,13 +196,14 @@
 
       if (content == null) {
         String url = key.toString();
-        content = new Content(url, url, new byte[0], "", new ContentProperties());
+        content = new Content(url, url, new byte[0], "", new MetaData());
       }
 
-      content.getMetadata().setProperty           // add segment to metadata
+      content.getMetadata().add                  // add segment to metadata
         (SEGMENT_NAME_KEY, segmentName);
-      content.getMetadata().setProperty           // add score to metadata
+      content.getMetadata().add                 // add score to metadata
         (SCORE_KEY, Float.toString(datum.getScore()));
+      LOG.warning("JEG: Fetcher.segmentName=" + segmentName + " ADDED");
 
       Parse parse = null;
       if (parsing && status == CrawlDatum.STATUS_FETCH_SUCCESS) {
@@ -220,7 +221,7 @@
         // Calculate page signature. For non-parsing fetchers this will
         // be done in ParseSegment
         byte[] signature = SignatureFactory.getSignature(getConf()).calculate(content, parse);
-        parse.getData().getMetadata().setProperty(SIGNATURE_KEY, StringUtil.toHexString(signature));
+        parse.getData().getMetadata().add(SIGNATURE_KEY, StringUtil.toHexString(signature));
         datum.setSignature(signature);
       }
 
Index: src/java/org/apache/nutch/servlet/Cached.java
===================================================================
--- src/java/org/apache/nutch/servlet/Cached.java	(revision 366307)
+++ src/java/org/apache/nutch/servlet/Cached.java	(working copy)
@@ -16,11 +16,11 @@
 
 package org.apache.nutch.servlet;
 
-import org.apache.nutch.protocol.ContentProperties;
 import org.apache.nutch.searcher.NutchBean;
 import org.apache.nutch.searcher.Hit;
 import org.apache.nutch.searcher.HitDetails;
 
+import org.apache.nutch.util.MetaData;
 import org.apache.nutch.util.NutchConf;
 
 import javax.servlet.ServletContext;
@@ -77,8 +77,8 @@
     byte[] bytes = bean.getContent(details);
 
     // pass all original headers? only these for now.
-    ContentProperties metaData = bean.getParseData(details).getMetadata();
-    String contentType = (String) metaData.get("Content-Type");
+    MetaData metaData = bean.getParseData(details).getMetadata();
+    String contentType = metaData.first(MetaData.CONTENT_TYPE);
     //String lastModified = (String) metaData.get("Last-Modified");
     //String contentLength = (String) metaData.get("Content-Length");
     // better use this, since it may have been truncated during fetch
Index: src/java/org/apache/nutch/indexer/Indexer.java
===================================================================
--- src/java/org/apache/nutch/indexer/Indexer.java	(revision 366307)
+++ src/java/org/apache/nutch/indexer/Indexer.java	(working copy)
@@ -191,15 +191,15 @@
     }
 
     Document doc = new Document();
-    ContentProperties meta = parseData.getMetadata();
+    MetaData meta = parseData.getMetadata();
     String[] anchors = inlinks!=null ? inlinks.getAnchors() : new String[0];
 
     // add segment, used to map from merged index back to segment files
     doc.add(Field.UnIndexed("segment",
-                            meta.getProperty(Fetcher.SEGMENT_NAME_KEY)));
+                            meta.last(Fetcher.SEGMENT_NAME_KEY)));
 
     // add digest, used by dedup
-    doc.add(Field.UnIndexed("digest", meta.getProperty(Fetcher.SIGNATURE_KEY)));
+    doc.add(Field.UnIndexed("digest", meta.last(Fetcher.SIGNATURE_KEY)));
 
     // boost is opic
     float boost = (float)Math.pow(dbDatum.getScore(), scorePower);
Index: src/java/org/apache/nutch/protocol/ContentProperties.java
===================================================================
--- src/java/org/apache/nutch/protocol/ContentProperties.java	(revision 366307)
+++ src/java/org/apache/nutch/protocol/ContentProperties.java	(working copy)
@@ -1,203 +0,0 @@
-/**
- * Copyright 2005 The Apache Software Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nutch.protocol;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Enumeration;
-import java.util.Iterator;
-import java.util.Properties;
-import java.util.TreeMap;
-
-import org.apache.nutch.io.UTF8;
-import org.apache.nutch.io.Writable;
-
-/**
- * writable case insensitive properties
- */
-public class ContentProperties extends TreeMap implements Writable {
-
-    /**
-     * construct the TreeMap with a case insensitive comparator
-     */
-    public ContentProperties() {
-        super(String.CASE_INSENSITIVE_ORDER);
-    }
-
-    /**
-     * initialize with default values
-     * 
-     * @param defaults
-     */
-    public ContentProperties(Properties defaults) {
-        super(String.CASE_INSENSITIVE_ORDER);
-        putAll(defaults);
-    }
-
-    /**
-     * @param key
-     * @return the property value or null
-     */
-    public String getProperty(String key) {
-        return (String) get(key);
-    }
-
-    /*
-     * (non-Javadoc)
-     * 
-     * @see java.util.Map#get(java.lang.Object)
-     */
-    public Object get(Object arg0) {
-        Object object = super.get(arg0);
-        if (object != null && object instanceof ArrayList) {
-            ArrayList list = (ArrayList) object;
-            return list.get(list.size() - 1);
-        }
-        return object;
-    }
-
-    /**
-     * @param key
-     * @return the properties as a string array if there is no such property we
-     *         retunr a array with 0 entries
-     */
-    public String[] getProperties(String key) {
-        Object object = super.get(key);
-        if (object != null && !(object instanceof ArrayList)) {
-            return new String[] { (String) object };
-        } else if (object != null && object instanceof ArrayList) {
-            ArrayList list = (ArrayList) object;
-            return (String[]) list.toArray(new String[list.size()]);
-        }
-        return new String[0];
-    }
-
-    /**
-     * sets the key value tuple
-     * 
-     * @param key
-     * @param value
-     */
-    public void setProperty(String key, String value) {
-        Object object = super.get(key);
-        if (object != null && !(object instanceof ArrayList)) {
-            ArrayList arrayList = new ArrayList();
-            arrayList.add(object);
-            arrayList.add(value);
-            put(key, arrayList);
-        } else if (object instanceof ArrayList) {
-            ((ArrayList) object).add(value);
-        } else {
-            put(key, value);
-        }
-
-    }
-
-    public Enumeration propertyNames() {
-        return new KeyEnumeration(keySet().iterator());
-    }
-
-    class KeyEnumeration implements Enumeration {
-
-        private Iterator fIterator;
-
-        public KeyEnumeration(Iterator iterator) {
-            fIterator = iterator;
-        }
-
-        public boolean hasMoreElements() {
-            return fIterator.hasNext();
-
-        }
-
-        public Object nextElement() {
-            return fIterator.next();
-        }
-
-    }
-
-    /*
-     * (non-Javadoc)
-     * 
-     * @see org.apache.nutch.io.Writable#write(java.io.DataOutput)
-     */
-    public final void write(DataOutput out) throws IOException {
-        out.writeInt(keySet().size());
-        Iterator iterator = keySet().iterator();
-        String key;
-        String[] properties;
-        while (iterator.hasNext()) {
-            key = (String) iterator.next();
-            UTF8.writeString(out, key);
-            properties = getProperties(key);
-            out.writeInt(properties.length);
-            for (int i = 0; i < properties.length; i++) {
-                UTF8.writeString(out, properties[i]);
-            }
-        }
-
-    }
-
-    /*
-     * (non-Javadoc)
-     * 
-     * @see org.apache.nutch.io.Writable#readFields(java.io.DataInput)
-     */
-    public final void readFields(DataInput in) throws IOException {
-        int keySize = in.readInt();
-        String key;
-        for (int i = 0; i < keySize; i++) {
-            key = UTF8.readString(in);
-            int valueSize = in.readInt();
-            for (int j = 0; j < valueSize; j++) {
-                setProperty(key, UTF8.readString(in));
-            }
-        }
-    }
-
-    /*
-     * (non-Javadoc)
-     * 
-     * @see java.lang.Object#equals(java.lang.Object)
-     */
-    public boolean equals(Object obj) {
-        if (!(obj instanceof ContentProperties)) {
-            return false;
-        }
-        ContentProperties properties = (ContentProperties) obj;
-        Enumeration enumeration = properties.propertyNames();
-        while (enumeration.hasMoreElements()) {
-            String key = (String) enumeration.nextElement();
-            String[] values = properties.getProperties(key);
-            String[] myValues = getProperties(key);
-            if (values.length != myValues.length) {
-                return false;
-            }
-            for (int i = 0; i < values.length; i++) {
-                if (!values[i].equals(myValues[i])) {
-                    return false;
-                }
-
-            }
-        }
-
-        return true;
-    }
-
-}
Index: src/java/org/apache/nutch/protocol/Content.java
===================================================================
--- src/java/org/apache/nutch/protocol/Content.java	(revision 366307)
+++ src/java/org/apache/nutch/protocol/Content.java	(working copy)
@@ -45,12 +45,12 @@
   private String base;
   private byte[] content;
   private String contentType;
-  private ContentProperties metadata;
+  private MetaData metadata;
 
   public Content() {}
     
   public Content(String url, String base, byte[] content, String contentType,
-                 ContentProperties metadata){
+                 MetaData metadata){
 
     if (url == null) throw new IllegalArgumentException("null url");
     if (base == null) throw new IllegalArgumentException("null base");
@@ -77,7 +77,7 @@
 
     contentType = UTF8.readString(in);            // read contentType
 
-    metadata = new ContentProperties();
+    metadata = new MetaData();
     metadata.readFields(in);                    // read meta data
   }
 
@@ -143,7 +143,7 @@
   }
 
   /** Other protocol-specific data. */
-  public ContentProperties getMetadata() {
+  public MetaData getMetadata() {
     ensureInflated();
     return metadata;
   }
@@ -151,7 +151,7 @@
   /** Return the value of a metadata property. */
   public String get(String name) {
     ensureInflated();
-    return getMetadata().getProperty(name);
+    return getMetadata().last(name);
   }
 
   public boolean equals(Object o) {
Index: src/java/org/apache/nutch/parse/ParseOutputFormat.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseOutputFormat.java	(revision 366307)
+++ src/java/org/apache/nutch/parse/ParseOutputFormat.java	(working copy)
@@ -67,7 +67,7 @@
           dataOut.append(key, parse.getData());
           
           // recover the signature prepared by Fetcher or ParseSegment
-          String sig = parse.getData().getMetadata().getProperty(Fetcher.SIGNATURE_KEY);
+          String sig = parse.getData().getMetadata().last(Fetcher.SIGNATURE_KEY);
           if (sig != null) {
             byte[] signature = StringUtil.fromHexString(sig);
             if (signature != null) {
Index: src/java/org/apache/nutch/parse/ParseData.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseData.java	(revision 366307)
+++ src/java/org/apache/nutch/parse/ParseData.java	(working copy)
@@ -21,7 +21,7 @@
 
 import org.apache.nutch.io.*;
 import org.apache.nutch.fs.*;
-import org.apache.nutch.protocol.ContentProperties;
+import org.apache.nutch.util.MetaData;
 import org.apache.nutch.util.NutchConf;
 
 
@@ -35,12 +35,12 @@
 
   private String title;
   private Outlink[] outlinks;
-  private ContentProperties metadata;
+  private MetaData metadata;
   private ParseStatus status;
 
   public ParseData() {}
 
-  public ParseData(ParseStatus status, String title, Outlink[] outlinks, ContentProperties metadata) {
+  public ParseData(ParseStatus status, String title, Outlink[] outlinks, MetaData metadata) {
     this.status = status;
     this.title = title;
     this.outlinks = outlinks;
@@ -63,10 +63,12 @@
   /** Other page properties.  This is the place to find format-specific
    * properties.  Different parser implementations for different content types
    * will populate this differently. */
-  public ContentProperties getMetadata() { return metadata; }
+  public MetaData getMetadata() { return metadata; }
 
   /** Return the value of a metadata property. */
-  public String get(String name) { return getMetadata().getProperty(name); }
+  public String get(String name) {
+    return getMetadata().last(name);
+  }
 
   //
   // Writable methods
@@ -95,13 +97,9 @@
     for (int i = outlinksToRead; i < totalOutlinks; i++) {
       Outlink.skip(in);
     }
-    
-    int propertyCount = in.readInt();             // read metadata
-    metadata = new ContentProperties();
-    for (int i = 0; i < propertyCount; i++) {
-      metadata.put(UTF8.readString(in), UTF8.readString(in));
-    }
-    
+
+    metadata = new MetaData();                    // read metadata
+    metadata.readFields(in);
   }
 
   public final void write(DataOutput out) throws IOException {
@@ -113,14 +111,7 @@
     for (int i = 0; i < outlinks.length; i++) {
       outlinks[i].write(out);
     }
-
-    out.writeInt(metadata.size());                // write metadata
-    Iterator i = metadata.entrySet().iterator();
-    while (i.hasNext()) {
-      Map.Entry e = (Map.Entry)i.next();
-      UTF8.writeString(out, (String)e.getKey());
-      UTF8.writeString(out, (String)e.getValue());
-    }
+    metadata.write(out);                          // write metadate
   }
 
   public static ParseData read(DataInput in) throws IOException {
Index: src/java/org/apache/nutch/parse/ParseStatus.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseStatus.java	(revision 366307)
+++ src/java/org/apache/nutch/parse/ParseStatus.java	(working copy)
@@ -12,8 +12,9 @@
 
 import org.apache.nutch.io.VersionedWritable;
 import org.apache.nutch.io.WritableUtils;
-import org.apache.nutch.protocol.ContentProperties;
+import org.apache.nutch.util.MetaData;
 
+
 /**
  * @author Andrzej Bialecki &lt;ab@getopt.org&gt;
  */
@@ -231,7 +232,7 @@
     private ParseData data = null;
     
     public EmptyParseImpl(ParseStatus status) {
-      data = new ParseData(status, "", new Outlink[0], new ContentProperties());
+      data = new ParseData(status, "", new Outlink[0], new MetaData());
     }
     
     public ParseData getData() {
Index: src/java/org/apache/nutch/parse/ParseSegment.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseSegment.java	(revision 366307)
+++ src/java/org/apache/nutch/parse/ParseSegment.java	(working copy)
@@ -60,7 +60,7 @@
 
     // compute the new signature
     byte[] signature = SignatureFactory.getSignature(getConf()).calculate(content, parse);
-    parse.getData().getMetadata().setProperty(Fetcher.SIGNATURE_KEY, StringUtil.toHexString(signature));
+    parse.getData().getMetadata().add(Fetcher.SIGNATURE_KEY, StringUtil.toHexString(signature));
     if (status.isSuccess()) {
       output.collect(key, new ParseImpl(parse.getText(), parse.getData()));
     } else {
Index: src/java/org/apache/nutch/util/MetaData.java
===================================================================
--- src/java/org/apache/nutch/util/MetaData.java	(revision 0)
+++ src/java/org/apache/nutch/util/MetaData.java	(revision 0)
@@ -0,0 +1,631 @@
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.util;
+
+// JDK imports
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.lang.reflect.Field;
+import java.lang.reflect.Modifier;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+// Commons Lang imports
+import org.apache.commons.lang.StringUtils;
+
+// Nutch imports
+import org.apache.nutch.io.UTF8;
+import org.apache.nutch.io.Writable;
+
+
+/**
+ * A syntax tolerant and multi-valued metadata container.
+ *
+ * This class defines a Collection of Standard Metadata Property names.
+ * It including the <a href="http://dublincore.org">Dublin Core</a> metadata
+ * properties, along with Nutch specific properties.
+ * It also provides syntax tolerant properties naming and support for
+ * multi-valued properties.
+ *
+ * @author Chris Mattmann
+ * @author J&eacute;r&ocirc;me Charron
+ */
+public final class MetaData implements Writable {
+  
+
+  /** Used to format DC dates for the DATE metadata field */
+  public final static SimpleDateFormat DATE_FORMAT = 
+          new SimpleDateFormat("yyyy-MM-dd");
+  
+  // Dublin Core Standard Metadata Elements
+  // @see http://www.dublincore.org/documents/dcmi-terms/
+  
+  /**
+   * Typically, Format may include the media-type or dimensions of the
+   * resource. Format may be used to determine the software, hardware or other
+   * equipment needed to display or operate the resource. Examples of
+   * dimensions include size and duration. Recommended best practice is to
+   * select a value from a controlled vocabulary (for example, the list of
+   * Internet Media Types [MIME] defining computer media formats).
+   */
+  public static final String FORMAT = "format";
+  
+  /**
+   * Recommended best practice is to identify the resource by means of a
+   * string or number conforming to a formal identification system. Example
+   * formal identification systems include the Uniform Resource Identifier
+   * (URI) (including the Uniform Resource Locator (URL)), the Digital Object
+   * Identifier (DOI) and the International Standard Book Number (ISBN).
+   */
+  public static final String IDENTIFIER = "identifier";
+  
+  /**
+   * Date on which the resource was changed.
+   */
+  public static final String LAST_MODIFIED = "modified";
+  
+  /**
+   * An entity responsible for making contributions to the content of the
+   * resource. Examples of a Contributor include a person, an organisation, or
+   * a service. Typically, the name of a Contributor should be used to
+   * indicate the entity.
+   */
+  public static final String CONTRIBUTOR = "contributor";
+  
+  /**
+   * The extent or scope of the content of the resource. Coverage will
+   * typically include spatial location (a place name or geographic
+   * coordinates), temporal period (a period label, date, or date range) or
+   * jurisdiction (such as a named administrative entity). Recommended best
+   * practice is to select a value from a controlled vocabulary (for example,
+   * the Thesaurus of Geographic Names [TGN]) and that, where appropriate,
+   * named places or time periods be used in preference to numeric identifiers
+   * such as sets of coordinates or date ranges.
+   */
+  public static final String COVERAGE = "coverage";
+  
+  /**
+   * An entity primarily responsible for making the content of the resource.
+   * Examples of a Creator include a person, an organisation, or a service.
+   * Typically, the name of a Creator should be used to indicate the entity.
+   */
+  public static final String CREATOR = "creator";
+  
+  /**
+   * A date associated with an event in the life cycle of the resource.
+   * Typically, Date will be associated with the creation or availability of
+   * the resource. Recommended best practice for encoding the date value is
+   * defined in a profile of ISO 8601 [W3CDTF] and follows the YYYY-MM-DD
+   * format.
+   */
+  public static final String DATE = "date";
+  
+  /**
+   * An account of the content of the resource. Description may include but is
+   * not limited to: an abstract, table of contents, reference to a graphical
+   * representation of content or a free-text account of the content.
+   */
+  public static final String DESCRIPTION = "description";
+  
+  /**
+   * A language of the intellectual content of the resource. Recommended best
+   * practice is to use RFC 3066 [RFC3066], which, in conjunction with ISO 639
+   * [ISO639], defines two- and three-letter primary language tags with
+   * optional subtags. Examples include "en" or "eng" for English, "akk" for
+   * Akkadian, and "en-GB" for English used in the United Kingdom.
+   */
+  public static final String LANGUAGE = "language";
+  
+  /**
+   * An entity responsible for making the resource available. Examples of a
+   * Publisher include a person, an organisation, or a service. Typically, the
+   * name of a Publisher should be used to indicate the entity.
+   */
+  public static final String PUBLISHER = "publisher";
+  
+  /**
+   * A reference to a related resource. Recommended best practice is to
+   * reference the resource by means of a string or number conforming to a
+   * formal identification system.
+   */
+  public static final String RELATION = "relation";
+  
+  /**
+   * Information about rights held in and over the resource. Typically, a
+   * Rights element will contain a rights management statement for the
+   * resource, or reference a service providing such information. Rights
+   * information often encompasses Intellectual Property Rights (IPR),
+   * Copyright, and various Property Rights. If the Rights element is absent,
+   * no assumptions can be made about the status of these and other rights
+   * with respect to the resource.
+   */
+  public static final String RIGHTS = "rights";
+  
+  /**
+   * A reference to a resource from which the present resource is derived. The
+   * present resource may be derived from the Source resource in whole or in
+   * part. Recommended best practice is to reference the resource by means of
+   * a string or number conforming to a formal identification system.
+   */
+  public static final String SOURCE = "source";
+  
+  /**
+   * The topic of the content of the resource. Typically, a Subject will be
+   * expressed as keywords, key phrases or classification codes that describe
+   * a topic of the resource. Recommended best practice is to select a value
+   * from a controlled vocabulary or formal classification scheme.
+   */
+  public static final String SUBJECT = "subject";
+  
+  /**
+   * A name given to the resource. Typically, a Title will be a name by which
+   * the resource is formally known.
+   */
+  public static final String TITLE = "title";
+  
+  /**
+   * The nature or genre of the content of the resource. Type includes terms
+   * describing general categories, functions, genres, or aggregation levels
+   * for content. Recommended best practice is to select a value from a
+   * controlled vocabulary (for example, the DCMI Type Vocabulary [DCMITYPE]).
+   * To describe the physical or digital manifestation of the resource, use
+   * the Format element.
+   */
+  public static final String TYPE = "type";
+  
+  
+  // Other Metadata Elements used at the protocol, and parsing layers
+  
+  public static final String CONTENT_TYPE =
+          "Content-Type";
+  
+  public static final String CONTENT_LENGTH =
+          "Content-Length";
+  
+  public static final String CONTENT_ENCODING =
+          "Content-Encoding";
+  
+  public static final String ORIGINAL_CHAR_ENCODING =
+          "OriginalCharEncoding";
+  
+  public static final String CHAR_ENCODING_FOR_CONVERSION =
+          "CharEncodingForConversion";
+  
+  public static final String KEYWORDS =
+          "Keywords";
+  
+  public static final String COMMENTS =
+          "Comments";
+  
+  public static final String LAST_AUTHOR =
+          "Last-Author";
+  
+  public static final String APPLICATION_NAME =
+          "Application-Name";
+  
+  public static final String CHARACTER_COUNT =
+          "Character Count";
+  
+  public static final String LAST_PRINTED =
+          "Last-Printed";
+  
+  public static final String LAST_SAVED =
+          "Last-Save-Date";
+  
+  public static final String PAGE_COUNT =
+          "Page-Count";
+  
+  public static final String REVISION_NUMBER =
+          "Revision-Number";
+  
+  public static final String WORD_COUNT =
+          "Word-Count";
+  
+  public static final String TEMPLATE =
+          "Template";
+  
+  public static final String AUTHOR =
+          "Author";
+  
+  public static final String CONTENT_DISPOSITION =
+          "content-disposition";
+  
+  public static final String LOCATION =
+          "Location";
+  
+
+  
+  private final static Map NAMES_IDX = new HashMap();
+  private static String[] normalized = null;
+  
+  static {
+    // Uses self introspection to fill the metanames index and the
+    // metanames list.
+    Field[] fields = MetaData.class.getDeclaredFields();
+    for (int i=0; i<fields.length; i++) {
+      int mods = fields[i].getModifiers();
+      if (Modifier.isFinal(mods) &&
+          Modifier.isPublic(mods) &&
+          Modifier.isStatic(mods) &&
+          fields[i].getType().equals(String.class)) {
+        try {
+          String val = (String) fields[i].get(null);
+          NAMES_IDX.put(normalize(val), val);
+        } catch (Exception e) {
+          // Simply ignore...
+        }
+      }
+    }
+    normalized = (String[]) NAMES_IDX.keySet().toArray(new String[NAMES_IDX.size()]);
+  }
+  
+  
+  /** A map of all metadata attributes */
+  private Map metadata = null;
+
+  
+  /** Constructs a new, empty metadata. */
+  public MetaData() {
+    metadata = new HashMap();
+  }
+  
+
+  /** Removes all the name-values mappings from this metadata */
+  public void clear() {
+    metadata.clear();
+  }
+
+  /** 
+   * Checks if a mapping exists for a metadata name.
+   *
+   * @return <code>true</code> if this metadata contains at least one value
+   *         for the specified metadata name
+   */
+  public boolean contains(String name) {
+    return metadata.containsKey(getNormalizedName(name));
+  }
+
+  /**
+   * Returns <code>true</code> if this metadata contains no
+   * name-values mappings.
+   */
+  public boolean isEmpty() {
+    return metadata.isEmpty();
+  }
+
+  /**
+   * Returns an array of the metadata names contained in the metadata.
+   */
+  public String[] names() {
+    Iterator iter = metadata.keySet().iterator();
+    List names = new ArrayList();
+    while(iter.hasNext()) {
+      names.add(getNormalizedName((String) iter.next()));
+    }
+    return (String[]) names.toArray(new String[names.size()]);
+  }
+
+  /**
+   * Get the last value associated to a metadata name.
+   * @return the last added value to the specified metadata name.
+   */
+  public String last(String name) {
+    String[] values = get(name);
+    return (values.length != 0) ? values[values.length - 1] : null;
+  }
+
+  /**
+   * Get the first value associated to a metadata name.
+   * @return the first added value to the specified metadata name.
+   */
+  public String first(String name) {
+    String[] values = get(name);
+    return (values.length > 0) ? values[0] : null;
+  }
+
+  /**
+   * Get the number of values associated to a metadata name.
+   * @return the number of values associated to the specified metadata name.
+   */
+  public int size(String name) {
+    return get(name).length;
+  }
+
+  /**
+   * Get the idx<sup>th</sup> value associated to a metadata name.
+   * @return the idx<sup>th</sup> value associated to the specified metadata
+   *         name.
+   */
+  public String get(String name, int idx) {
+    return get(name)[idx];
+  }
+
+  /**
+   * Get all the values associated to a metadata name.
+   * @return the values associated to the specified metadata name.
+   */
+  public String[] get(String name) {
+    Object object = metadata.get(getNormalizedName(name));
+    if (object == null) {
+      return new String[0];
+    } else {
+      List list = (List) object;
+      return (String[]) list.toArray(new String[list.size()]);
+    }
+  }
+
+  /**
+   * Add metadata name/value.
+   * Add the specified value to the list of values associated to the
+   * specified metadata name.
+   *
+   * @param name the metadata name.
+   * @param value the metadata value.
+   */
+  public void add(String name, String value) {
+    String theName = getNormalizedName(name);
+    List list = null;
+    Object object = metadata.get(theName);
+    if (object == null) {
+      list = new ArrayList();
+      metadata.put(theName, list);
+    } else {
+      list = (List) object;
+    }
+    list.add(value);
+  }
+
+  /**
+   * Set metadata name/value.
+   * Associate the specified value to the specified metadata name. If some
+   * previous values were associated to this name, they are removed.
+   *
+   * @param name the metadata name.
+   * @param value the metadata value.
+   */
+  public void set(String name, String value) {
+    List list = new ArrayList();
+    list.add(value);
+    metadata.put(getNormalizedName(name), list);
+  }
+
+  /**
+   * Remove a metadata and all its associated values.
+   */
+  public void remove(String name) {
+    metadata.remove(getNormalizedName(name));
+  }
+  
+  /**
+   * Add all the name/value pairs to this metadata.
+   * Add all the name/value pairs of the specified map to this metadata.
+   * Some restrictions apply on the name/value that will be added:
+   * <ul>
+   *   <li>the entry's key can be casted to a String</li>
+   *   <li>the entry's value can be casted to a String or to a List.</li>
+   *     <ul>
+   *       <li>If it is a String, then the (String)key/(String)value is added.</li>
+   *       <li>If it is a List, then each element of the List that can be casted
+   *           to a String is added</li>
+   *    </lu>
+   * </lu>
+   * In any other cases, the key/value is simply ignored.
+   */
+  public void addAll(Map map) {
+    putAll(map, false);
+  }
+    
+  /**
+   * Add all the name/value pairs to this metadata.
+   * Add all the name/value pairs of the specified metadata to this metadata.
+   */
+  public void addAll(MetaData meta) {
+    addAll(meta.asMap());
+  }
+
+  /**
+   * Set all the name/value pairs to this metadata.
+   * Set all the name/value pairs of the specified map to this metadata.
+   * If some previous values were associated to this name, they are removed.
+   * Some restrictions apply on the name/value that will be setted:
+   * <ul>
+   *   <li>the entry's key can be casted to a String</li>
+   *   <li>the entry's value can be casted to a String or to a List.</li>
+   *     <ul>
+   *       <li>If it is a String, then the (String)key/(String)value is added.</li>
+   *       <li>If it is a List, then each element of the List that can be casted
+   *           to a String is added</li>
+   *    </lu>
+   * </lu>
+   * In any other cases, the key/value is simply ignored.
+   */
+  public void setAll(Map map) {
+    putAll(map, true);
+  }
+  
+  /**
+   * Set all the name/value pairs to this metadata.
+   * Set all the name/value pairs of the specified metadata to this metadata.
+   * If some previous values were associated to this name, they are removed.
+   */
+  public void setAll(MetaData meta) {
+    setAll(meta.asMap());
+  }
+
+  /**
+   * Returns the number of metadata names in this metadata.
+   */
+  public int size() {
+    return metadata.size();
+  }
+  
+  // Inherited Javadoc
+  public boolean equals(Object o) {
+    
+    if (o == null) { return false; }
+    
+    MetaData other = null;
+    try {
+      other = (MetaData) o;
+    } catch (ClassCastException cce) {
+      return false;
+    }
+    
+    if (other.size() != size()) { return false; }
+    
+    String[] names = names();
+    for (int i=0; i<names.length; i++) {
+      String[] otherValues = other.get(names[i]);
+      String[] thisValues = get(names[i]);
+      if (otherValues.length != thisValues.length) {
+        return false;
+      }
+      for (int j=0; j<otherValues.length; j++) {
+        if (!otherValues[j].equals(thisValues[j])) {
+          return false;
+        }
+      }
+    }
+    return true;
+  }
+  
+  
+  /**
+   * Get the normalized name of metadata attribute name.
+   * This method tries to find a well-known metadata name (one of the
+   * metadata names defined in this class) that matches the specified name.
+   * The matching is error tolerent. For instance,
+   * <ul>
+   *  <li>content-type gives Content-Type</li>
+   *  <li>CoNtEntType  gives Content-Type</li>
+   *  <li>ConTnTtYpe   gives Content-Type</li>
+   * </ul>
+   * If no matching with a well-known metadata name is found, then the original
+   * name is returned.
+   */
+  public static String getNormalizedName(String name) {
+    String searched = normalize(name);
+    String value = (String) NAMES_IDX.get(searched);
+
+    if (value == null) {
+      int threshold = searched.length() / 3;
+      for (int i=0; i<normalized.length && value == null; i++) {
+        if (StringUtils.getLevenshteinDistance(searched, normalized[i]) < threshold) {
+          value = (String) NAMES_IDX.get(normalized[i]);
+        }
+      }
+    }
+    return (value != null) ? value : name;
+  }
+
+  private Map asMap() {
+    return metadata;
+  }
+  
+  private void putAll(Map map, boolean replace) {
+    Iterator iter = map.entrySet().iterator();
+    Map.Entry entry = null;
+    while (iter.hasNext()) {
+      entry = (Map.Entry) iter.next();
+      try {
+        putAll((String) entry.getKey(), getValues(entry.getValue()), replace);
+      } catch (ClassCastException cce) {
+          // Simply ignore if key cannot be cast to String
+      }
+    }
+  }
+  
+  private void putAll(String name, List values, boolean replace) {
+
+    if (replace) { remove(name); }
+    
+    for (int i=0; i<values.size(); i++) {
+      try {
+        add(name, (String) values.get(i));
+      } catch (ClassCastException cce) {
+        // Simply ignore a value cannot be cast to String
+      }
+    }
+  }
+  
+  private static List getValues(Object value) {
+    List values = new ArrayList();
+    if (value instanceof List) {
+      values.addAll((List) value);
+    } else if (value instanceof String) {
+      values.add((String) value);
+    } else {
+      // Simply ignore...
+    }
+    return values;
+  }
+  
+  
+  private final static String normalize(String str) {
+    char c;
+    StringBuffer buf = new StringBuffer();
+    for (int i=0; i<str.length(); i++) {
+      c = str.charAt(i);
+      if (Character.isLetter(c)) {
+        buf.append(Character.toLowerCase(c));
+      }
+    }
+    return buf.toString();
+  }
+
+  
+  /* ------------------------- *
+   * <implementation:Writable> *
+   * ------------------------- */
+  
+  // Inherited Javadoc
+  public final void write(DataOutput out) throws IOException {
+    out.writeInt(size());
+    String[] values = null;
+    String[] names = names();
+    for (int i=0; i<names.length; i++) {
+      UTF8.writeString(out, names[i]);
+      values = get(names[i]);
+      out.writeInt(values.length);
+      for (int j=0; j<values.length; j++) {
+        UTF8.writeString(out, values[j]);
+      }
+    }
+  }
+
+  // Inherited Javadoc
+  public final void readFields(DataInput in) throws IOException {
+    int keySize = in.readInt();
+    String key;
+    for (int i=0; i<keySize; i++) {
+      key = UTF8.readString(in);
+      int valueSize = in.readInt();
+      for (int j=0; j<valueSize; j++) {
+        add(key, UTF8.readString(in));
+      }
+    }
+  }
+
+  /* -------------------------- *
+   * </implementation:Writable> *
+   * -------------------------- */
+   
+}

Property changes on: src/java/org/apache/nutch/util/MetaData.java
___________________________________________________________________
Name: svn:eol-style
   + native

Index: src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/WordExtractor.java
===================================================================
--- src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/WordExtractor.java	(revision 366307)
+++ src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/WordExtractor.java	(working copy)
@@ -20,6 +20,7 @@
 import org.apache.poi.poifs.eventfilesystem.*;
 import org.apache.poi.poifs.filesystem.*;
 import org.apache.poi.util.LittleEndian;
+import org.apache.nutch.util.MetaData;
 
 import java.util.*;
 import java.io.*;
@@ -33,8 +34,7 @@
  * code to extract all msword properties.
  *
  */
-public class WordExtractor
-{
+public class WordExtractor {
 
   /**
    * Constructor
@@ -276,39 +276,40 @@
       /*Dates are being stored in millis since the epoch to aid
       localization*/
       if(title != null)
-        properties.setProperty("Title", title);
+        properties.setProperty(MetaData.TITLE, title);
       if(applicationName != null)
-        properties.setProperty("Application-Name", applicationName);
+        properties.setProperty(MetaData.APPLICATION_NAME, applicationName);
       if(author != null)
-        properties.setProperty("Author", author);
+        properties.setProperty(MetaData.AUTHOR, author);
       if(charCount != 0)
-        properties.setProperty("Character Count", charCount + "");
+        properties.setProperty(MetaData.CHARACTER_COUNT, charCount + "");
       if(comments != null)
-        properties.setProperty("Comments", comments);
+        properties.setProperty(MetaData.COMMENTS, comments);
       if(createDateTime != null)
-        properties.setProperty("Creation-Date", createDateTime.getTime() + "");
+        properties.setProperty(MetaData.DATE,
+                               MetaData.DATE_FORMAT.format(createDateTime));
       if(editTime != 0)
-        properties.setProperty("Edit-Time", editTime + "");
+        properties.setProperty(MetaData.LAST_MODIFIED, editTime + "");
       if(keywords != null)
-        properties.setProperty("Keywords", keywords);
+        properties.setProperty(MetaData.KEYWORDS, keywords);
       if(lastAuthor != null)
-        properties.setProperty("Last-Author", lastAuthor);
+        properties.setProperty(MetaData.LAST_AUTHOR, lastAuthor);
       if(lastPrinted != null)
-        properties.setProperty("Last-Printed", lastPrinted.getTime() + "");
+        properties.setProperty(MetaData.LAST_PRINTED, lastPrinted.getTime() + "");
       if(lastSaveDateTime != null)
-        properties.setProperty("Last-Save-Date", lastSaveDateTime.getTime() + "");
+        properties.setProperty(MetaData.LAST_SAVED, lastSaveDateTime.getTime() + "");
       if(pageCount != 0)
-        properties.setProperty("Page-Count", pageCount + "");
+        properties.setProperty(MetaData.PAGE_COUNT, pageCount + "");
       if(revNumber != null)
-        properties.setProperty("Revision-Number", revNumber);
+        properties.setProperty(MetaData.REVISION_NUMBER, revNumber);
       if(security != 0)
-        properties.setProperty("Security", security + "");
+        properties.setProperty(MetaData.RIGHTS, security + "");
       if(subject != null)
-        properties.setProperty("Subject", subject);
+        properties.setProperty(MetaData.SUBJECT, subject);
       if(template != null)
-        properties.setProperty("Template", template);
+        properties.setProperty(MetaData.TEMPLATE, template);
       if(wordCount != 0)
-        properties.setProperty("Word-Count", wordCount + "");
+        properties.setProperty(MetaData.WORD_COUNT, wordCount + "");
       propertiesBroker.setProperties(properties);
 
       //si.getThumbnail(); // can't think of a sensible way of turning this into a string.
Index: src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/MSWordParser.java
===================================================================
--- src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/MSWordParser.java	(revision 366307)
+++ src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/MSWordParser.java	(working copy)
@@ -17,8 +17,6 @@
 package org.apache.nutch.parse.msword;
 
 import org.apache.nutch.protocol.Content;
-import org.apache.nutch.protocol.ContentProperties;
-import org.apache.nutch.util.LogFormatter;
 import org.apache.nutch.parse.ParseStatus;
 import org.apache.nutch.parse.Parser;
 import org.apache.nutch.parse.Parse;
@@ -27,9 +25,9 @@
 import org.apache.nutch.parse.Outlink;
 import org.apache.nutch.parse.OutlinkExtractor;
 import org.apache.nutch.parse.ParseException;
+import org.apache.nutch.util.MetaData;
 
 import java.util.Properties;
-//import java.util.logging.Logger;
 
 import java.io.ByteArrayInputStream;
 
@@ -97,21 +95,14 @@
     }
 
     // collect meta data
-    ContentProperties metadata = new ContentProperties();
-    metadata.putAll(content.getMetadata()); // copy through
+    MetaData metadata = new MetaData();
+    metadata.setAll(content.getMetadata()); // copy through
+    metadata.addAll(properties);
+    title = metadata.last(MetaData.TITLE);
 
-    if(properties != null) {
-      title = properties.getProperty("Title");
-      properties.remove("Title");
-      metadata.putAll(properties);
-    }
+    if (text == null) { text = ""; }
+    if (title == null) { title = ""; }
 
-    if (text == null)
-      text = "";
-
-    if (title == null)
-      title = "";
-
     // collect outlink
     Outlink[] outlinks = OutlinkExtractor.getOutlinks(text);
 
Index: src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java
===================================================================
--- src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java	(revision 366307)
+++ src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java	(working copy)
@@ -22,8 +22,8 @@
 import org.apache.nutch.parse.ParseStatus;
 import org.apache.nutch.parse.Parser;
 import org.apache.nutch.protocol.Content;
-import org.apache.nutch.protocol.ContentProperties;
 import org.apache.nutch.util.LogFormatter;
+import org.apache.nutch.util.MetaData;
 import org.apache.oro.text.regex.MatchResult;
 import org.apache.oro.text.regex.Pattern;
 import org.apache.oro.text.regex.PatternCompiler;
@@ -57,7 +57,7 @@
     walk(doc, parse, metaTags, url, outlinks);
     if (outlinks.size() > 0) {
       Outlink[] old = parse.getData().getOutlinks();
-      ContentProperties metadata = parse.getData().getMetadata();
+      MetaData metadata = parse.getData().getMetadata();
       String title = parse.getData().getTitle();
       List list = Arrays.asList(old);
       outlinks.addAll(list);
@@ -137,8 +137,8 @@
       idx = Math.min(MAX_TITLE_LEN, script.length());
       title = script.substring(0, idx);
     }
-    ContentProperties metadata = new ContentProperties();
-    metadata.putAll(c.getMetadata());
+    MetaData metadata = new MetaData();
+    metadata.addAll(c.getMetadata());
     ParseData pd = new ParseData(ParseStatus.STATUS_SUCCESS, title,
             outlinks, metadata);
     Parse parse = new ParseImpl(script, pd);
Index: src/plugin/languageidentifier/src/test/org/apache/nutch/analysis/lang/TestHTMLLanguageParser.java
===================================================================
--- src/plugin/languageidentifier/src/test/org/apache/nutch/analysis/lang/TestHTMLLanguageParser.java	(revision 366307)
+++ src/plugin/languageidentifier/src/test/org/apache/nutch/analysis/lang/TestHTMLLanguageParser.java	(working copy)
@@ -25,7 +25,7 @@
 import org.apache.nutch.parse.Parser;
 import org.apache.nutch.parse.ParserFactory;
 import org.apache.nutch.protocol.Content;
-import org.apache.nutch.protocol.ContentProperties;
+import org.apache.nutch.util.MetaData;
 
 
 public class TestHTMLLanguageParser extends TestCase {
@@ -55,8 +55,7 @@
         Parser parser = ParserFactory.getParser("text/html", URL);
         Parse parse = parser.getParse(content);
 
-        assertEquals(metalanguages[t], (String) parse.getData().get(
-            HTMLLanguageParser.META_LANG_NAME));
+        assertEquals(metalanguages[t], (String) parse.getData().get(MetaData.LANGUAGE));
 
       }
     } catch (Exception e) {
@@ -122,10 +121,10 @@
   
   
   private Content getContent(String text) {
-    ContentProperties p = new ContentProperties();
-    p.put("Content-Type", "text/html");
+    MetaData meta = new MetaData();
+    meta.add("Content-Type", "text/html");
 
-    Content content = new Content(URL, BASE, text.getBytes(), "text/html", p);
+    Content content = new Content(URL, BASE, text.getBytes(), "text/html", meta);
     return content;
   }
 
Index: src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/LanguageIndexingFilter.java
===================================================================
--- src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/LanguageIndexingFilter.java	(revision 366307)
+++ src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/LanguageIndexingFilter.java	(working copy)
@@ -24,6 +24,7 @@
 import org.apache.nutch.indexer.IndexingException;
 import org.apache.nutch.io.UTF8;
 import org.apache.nutch.parse.Parse;
+import org.apache.nutch.util.MetaData;
 
 // Lucene imports
 import org.apache.lucene.document.Field;
@@ -61,7 +62,7 @@
     throws IndexingException {
 
     //check if X-meta-lang found, possibly put there by HTMLLanguageParser
-    String lang = parse.getData().get(HTMLLanguageParser.META_LANG_NAME);
+    String lang = parse.getData().get(MetaData.LANGUAGE);
 
     //check if HTTP-header tels us the language
     if (lang == null) {
Index: src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/HTMLLanguageParser.java
===================================================================
--- src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/HTMLLanguageParser.java	(revision 366307)
+++ src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/HTMLLanguageParser.java	(working copy)
@@ -28,6 +28,7 @@
 import org.apache.nutch.parse.HtmlParseFilter;
 import org.apache.nutch.protocol.Content;
 import org.apache.nutch.util.LogFormatter;
+import org.apache.nutch.util.MetaData;
 
 // DOM imports
 import org.w3c.dom.DocumentFragment;
@@ -43,7 +44,6 @@
  */
 public class HTMLLanguageParser implements HtmlParseFilter {
   
-  public static final String META_LANG_NAME="X-meta-lang";
   public static final Logger LOG = LogFormatter
     .getLogger(HTMLLanguageParser.class.getName());
 
@@ -84,7 +84,7 @@
     String lang = parser.getLanguage();
 
     if (lang != null) {
-      parse.getData().getMetadata().put(META_LANG_NAME, lang);
+      parse.getData().getMetadata().add(MetaData.LANGUAGE, lang);
     }
     return parse;
   }
Index: src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java
===================================================================
--- src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java	(revision 366307)
+++ src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java	(working copy)
@@ -26,20 +26,19 @@
 import org.apache.nutch.parse.Outlink;
 import org.apache.nutch.parse.Parse;
 import org.apache.nutch.parse.ParseData;
-import org.apache.nutch.parse.ParseException;
 import org.apache.nutch.parse.ParseImpl;
 import org.apache.nutch.parse.ParseStatus;
 import org.apache.nutch.parse.Parser;
 import org.apache.nutch.protocol.Content;
-import org.apache.nutch.protocol.ContentProperties;
 import org.apache.nutch.util.LogFormatter;
+import org.apache.nutch.util.MetaData;
 
 /**
  * ZipParser class based on MSPowerPointParser class by Stephan Strittmatter.
  * Nutch parse plugin for zip files - Content Type : application/zip
  * @author Rohit Kulkarni & Ashish Vaidya
  */
-public class ZipParser implements Parser{
+public class ZipParser implements Parser {
     
     private static final Logger LOG = LogFormatter.getLogger(ZipParser.class.getName());
     /** Creates a new instance of ZipParser */
@@ -55,7 +54,7 @@
 	Properties properties = null;
         
         try {
-            final String contentLen = content.get("Content-Length");
+            final String contentLen = content.get(MetaData.CONTENT_LENGTH);
             final int len = Integer.parseInt(contentLen);
             System.out.println("ziplen: " + len);
             final byte[] contentInBytes = content.getContent();
@@ -81,8 +80,8 @@
         }
         
         // collect meta data
-        final ContentProperties metadata = new ContentProperties();
-        metadata.putAll(content.getMetadata()); // copy through
+        final MetaData metadata = new MetaData();
+        metadata.addAll(content.getMetadata()); // copy through
         
         if (resultText == null) {
             resultText = "";
Index: src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipTextExtractor.java
===================================================================
--- src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipTextExtractor.java	(revision 366307)
+++ src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipTextExtractor.java	(working copy)
@@ -20,7 +20,6 @@
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.List;
-import java.util.Properties;
 import java.util.logging.Logger;
 import java.util.zip.ZipEntry;
 import java.util.zip.ZipInputStream;
@@ -28,14 +27,14 @@
 
 // Nutch imports
 import org.apache.nutch.parse.Parse;
-import org.apache.nutch.parse.ParseUtil;
 import org.apache.nutch.parse.ParseData;
+import org.apache.nutch.parse.ParseUtil;
 import org.apache.nutch.parse.ParseException;
 import org.apache.nutch.parse.Outlink;
 import org.apache.nutch.protocol.Content;
-import org.apache.nutch.protocol.ContentProperties;
 import org.apache.nutch.util.LogFormatter;
 import org.apache.nutch.util.NutchConf;
+import org.apache.nutch.util.MetaData;
 import org.apache.nutch.util.mime.MimeTypes;
 
 
@@ -85,9 +84,9 @@
           // Trying to resolve the Mime-Type
           String contentType = MIME.getMimeType(fname).getName();
           try {
-            ContentProperties metadata = new ContentProperties();
-            metadata.setProperty("Content-Length", Long.toString(entry.getSize()));
-            metadata.setProperty("Content-Type", contentType);
+            MetaData metadata = new MetaData();
+            metadata.add(MetaData.CONTENT_LENGTH, Long.toString(entry.getSize()));
+            metadata.add(MetaData.CONTENT_TYPE, contentType);
             Content content = new Content(newurl, base, b, contentType, metadata);
             Parse parse = ParseUtil.parse(content);
             ParseData theParseData = parse.getData();
Index: src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java
===================================================================
--- src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java	(revision 366307)
+++ src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java	(working copy)
@@ -18,6 +18,7 @@
 
 // JDK imports
 import java.net.URL;
+import java.util.Date;
 import java.util.TreeMap;
 import java.util.logging.Level;
 import java.io.IOException;
@@ -25,7 +26,7 @@
 // Nutch imports
 import org.apache.nutch.crawl.CrawlDatum;
 import org.apache.nutch.protocol.Content;
-import org.apache.nutch.protocol.ContentProperties;
+import org.apache.nutch.util.MetaData;
 
 
 /************************************
@@ -60,7 +61,7 @@
   private String base;
   private byte[] content;
   private int code;
-  private ContentProperties headers = new ContentProperties();
+  private MetaData headers = new MetaData();
 
   private final File file;
 
@@ -69,14 +70,14 @@
 
   /** Returns the value of a named header. */
   public String getHeader(String name) {
-    return (String)headers.get(name);
+    return headers.get(name)[0];
   }
 
   public byte[] getContent() { return content; }
 
   public Content toContent() {
     return new Content(orig, base, content,
-                       getHeader("Content-Type"),
+                       getHeader(MetaData.CONTENT_TYPE),
                        headers);
   }
 
@@ -121,10 +122,9 @@
       // where case is insensitive
       if (!f.equals(f.getCanonicalFile())) {
         // set headers
-        TreeMap hdrs = new TreeMap(String.CASE_INSENSITIVE_ORDER);
         //hdrs.put("Location", f.getCanonicalFile().toURI());
-        hdrs.put("Location", f.getCanonicalFile().toURL().toString());
-        this.headers.putAll(hdrs);
+        headers.add(MetaData.LOCATION, f.getCanonicalFile().toURL().toString());
+        headers.add(MetaData.IDENTIFIER, f.getCanonicalFile().toURL().toString());
 
         this.code = 300;  // http redirect
         return;
@@ -178,17 +178,14 @@
     is.close(); 
 
     // set headers
-    TreeMap hdrs = new TreeMap(String.CASE_INSENSITIVE_ORDER);
-
-    hdrs.put("Content-Length", new Long(size).toString());
-
-    hdrs.put("Last-Modified",
+    headers.add(MetaData.CONTENT_LENGTH, new Long(size).toString());
+    headers.add(MetaData.CONTRIBUTOR, "protocol-file");
+    headers.add(MetaData.DATE, MetaData.DATE_FORMAT.format(new Date()));
+    headers.add(MetaData.LAST_MODIFIED,
       this.file.httpDateFormat.toString(f.lastModified()));
+    headers.add(MetaData.CONTENT_TYPE, "");   // No Content-Type at file protocol level
+    headers.add(MetaData.FORMAT, "");
 
-    hdrs.put("Content-Type", "");   // No Content-Type at file protocol level
-
-    this.headers.putAll(hdrs);
-
     // response code
     this.code = 200; // http OK
   }
@@ -201,18 +198,13 @@
     this.content = list2html(f.listFiles(), path, "/".equals(path) ? false : true);
 
     // set headers
-    TreeMap hdrs = new TreeMap(String.CASE_INSENSITIVE_ORDER);
-
-    hdrs.put("Content-Length",
+    headers.add(MetaData.CONTENT_LENGTH,
       new Integer(this.content.length).toString());
-
-    hdrs.put("Content-Type", "text/html");
-
-    hdrs.put("Last-Modified",
+    headers.add(MetaData.CONTENT_TYPE, "text/html");
+    headers.add(MetaData.FORMAT, "text/html");
+    headers.add(MetaData.LAST_MODIFIED,
       this.file.httpDateFormat.toString(f.lastModified()));
 
-    this.headers.putAll(hdrs);
-
     // response code
     this.code = 200; // http OK
   }
Index: src/plugin/parse-pdf/src/java/org/apache/nutch/parse/pdf/PdfParser.java
===================================================================
--- src/plugin/parse-pdf/src/java/org/apache/nutch/parse/pdf/PdfParser.java	(revision 366307)
+++ src/plugin/parse-pdf/src/java/org/apache/nutch/parse/pdf/PdfParser.java	(working copy)
@@ -26,8 +26,8 @@
 import org.pdfbox.exceptions.InvalidPasswordException;
 
 import org.apache.nutch.protocol.Content;
-import org.apache.nutch.protocol.ContentProperties;
 import org.apache.nutch.util.LogFormatter;
+import org.apache.nutch.util.MetaData;
 import org.apache.nutch.parse.ParseStatus;
 import org.apache.nutch.parse.Parser;
 import org.apache.nutch.parse.Parse;
@@ -35,12 +35,10 @@
 import org.apache.nutch.parse.ParseImpl;
 import org.apache.nutch.parse.Outlink;
 import org.apache.nutch.parse.OutlinkExtractor;
-import org.apache.nutch.parse.ParseException;
 
 import java.text.SimpleDateFormat;
 import java.util.Calendar;
 
-import java.util.Properties;
 import java.util.logging.Logger;
 
 import java.io.ByteArrayInputStream;
@@ -89,12 +87,13 @@
 
     String text = null;
     String title = null;
+    MetaData metadata = new MetaData();;
 
     try {
 
       byte[] raw = content.getContent();
 
-      String contentLength = content.get("Content-Length");
+      String contentLength = content.get(MetaData.CONTENT_LENGTH);
       if (contentLength != null
             && raw.length != Integer.parseInt(contentLength)) {
           return new ParseStatus(ParseStatus.FAILED, ParseStatus.FAILED_TRUNCATED,
@@ -122,15 +121,19 @@
       PDDocumentInformation info = pdf.getDocumentInformation();
       title = info.getTitle();
       // more useful info, currently not used. please keep them for future use.
-      // pdf.getPageCount();
-      // info.getAuthor()
-      // info.getSubject()
-      // info.getKeywords()
-      // info.getCreator()
-      // info.getProducer()
-      // info.getTrapped()
-      // formatDate(info.getCreationDate())
-      // formatDate(info.getModificationDate())
+      metadata.add(MetaData.TITLE, title);
+      metadata.add(MetaData.PAGE_COUNT, String.valueOf(pdf.getPageCount()));
+      metadata.add(MetaData.AUTHOR, info.getAuthor());
+      metadata.add(MetaData.SUBJECT, info.getSubject());
+      metadata.add(MetaData.KEYWORDS, info.getKeywords());
+      metadata.add(MetaData.CREATOR, info.getCreator());
+      metadata.add(MetaData.PUBLISHER, info.getProducer());
+      
+      //TODO: Figure out why we get a java.io.IOException: Error converting date:1-Jan-3 18:15PM
+      //error here
+      
+      //metadata.put(DATE, dcDateFormatter.format(info.getCreationDate().getTime()));
+      //metadata.put(LAST_MODIFIED, dcDateFormatter.format(info.getModificationDate().getTime()));
 
     } catch (CryptographyException e) {
       return new ParseStatus(ParseStatus.FAILED,
@@ -139,6 +142,8 @@
       return new ParseStatus(ParseStatus.FAILED,
               "Can't decrypt document - invalid password. " + e).getEmptyParse();
     } catch (Exception e) { // run time exception
+        LOG.warning("General exception in PDF parser: "+e.getMessage());
+        e.printStackTrace();        
       return new ParseStatus(ParseStatus.FAILED,
               "Can't be handled as pdf document. " + e).getEmptyParse();
     } finally {
@@ -160,8 +165,7 @@
     Outlink[] outlinks = OutlinkExtractor.getOutlinks(text);
 
     // collect meta data
-    ContentProperties metadata = new ContentProperties();
-    metadata.putAll(content.getMetadata()); // copy through
+    metadata.addAll(content.getMetadata()); // copy through
 
     ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS, title, outlinks, metadata);
     return new ParseImpl(text, parseData);
Index: src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java
===================================================================
--- src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java	(revision 366307)
+++ src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java	(working copy)
@@ -26,19 +26,17 @@
 
 import org.apache.nutch.crawl.CrawlDatum;
 import org.apache.nutch.protocol.Content;
-import org.apache.nutch.protocol.ContentProperties;
+import org.apache.nutch.util.MetaData;
 
 import java.net.InetAddress;
 import java.net.URL;
 
 import java.util.List;
 import java.util.LinkedList;
-import java.util.Properties;
-
+import java.util.Date;
 import java.util.logging.Level;
 
 import java.io.ByteArrayOutputStream;
-//import java.io.InputStream;
 import java.io.IOException;
 
 
@@ -61,7 +59,7 @@
   private String base;
   private byte[] content;
   private int code;
-  private ContentProperties headers = new ContentProperties();
+  private MetaData headers = new MetaData();
 
   private final Ftp ftp;
 
@@ -70,14 +68,14 @@
 
   /** Returns the value of a named header. */
   public String getHeader(String name) {
-    return (String)headers.get(name);
+    return headers.last(name);
   }
 
   public byte[] getContent() { return content; }
 
   public Content toContent() {
     return new Content(orig, base, content,
-                       getHeader("Content-Type"),
+                       getHeader(MetaData.CONTENT_TYPE),
                        headers);
   }
 
@@ -292,11 +290,14 @@
       ftp.client.retrieveFile(path, os, ftp.maxContentLength);
 
       FTPFile ftpFile = (FTPFile) list.get(0);
-      this.headers.put("Content-Length",
-        new Long(ftpFile.getSize()).toString());
+      this.headers.add(MetaData.CONTENT_LENGTH,
+                       new Long(ftpFile.getSize()).toString());
+      this.headers.add(MetaData.CONTRIBUTOR, "protocol-ftp");
+      this.headers.add(MetaData.DATE, 
+                       MetaData.DATE_FORMAT.format(new Date()));
       //this.headers.put("content-type", "text/html");
-      this.headers.put("Last-Modified",
-        ftp.httpDateFormat.toString(ftpFile.getTimestamp()));
+      this.headers.add(MetaData.LAST_MODIFIED,
+                       ftp.httpDateFormat.toString(ftpFile.getTimestamp()));
       this.content = os.toByteArray();
 
 //      // approximate bytes sent and read
@@ -328,11 +329,11 @@
       }
 
       FTPFile ftpFile = (FTPFile) list.get(0);
-      this.headers.put("Content-Length",
-        new Long(ftpFile.getSize()).toString());
+      this.headers.add(MetaData.CONTENT_LENGTH,
+                       new Long(ftpFile.getSize()).toString());
       //this.headers.put("content-type", "text/html");
-      this.headers.put("Last-Modified",
-        ftp.httpDateFormat.toString(ftpFile.getTimestamp()));
+      this.headers.add(MetaData.LAST_MODIFIED,
+                      ftp.httpDateFormat.toString(ftpFile.getTimestamp()));
       this.content = os.toByteArray();
 
 //      // approximate bytes sent and read
@@ -347,7 +348,8 @@
 
       if (FTPReply.isPositiveCompletion(ftp.client.cwd(path))) {
       // it is not a file, but dir, so redirect as a dir
-        this.headers.put("Location", path + "/");
+        this.headers.add(MetaData.LOCATION, path + "/");
+        this.headers.add(MetaData.IDENTIFIER, path + "/");
         this.code = 300;  // http redirect
         // fixme, should we do ftp.client.cwd("/"), back to top dir?
       } else {
@@ -384,9 +386,9 @@
 
       ftp.client.retrieveList(null, list, ftp.maxContentLength, ftp.parser);
       this.content = list2html(list, path, "/".equals(path) ? false : true);
-      this.headers.put("Content-Length",
-        new Integer(this.content.length).toString());
-      this.headers.put("Content-Type", "text/html");
+      this.headers.add(MetaData.CONTENT_LENGTH,
+                       new Integer(this.content.length).toString());
+      this.headers.add(MetaData.CONTENT_TYPE, "text/html");
       // this.headers.put("Last-Modified", null);
 
 //      // approximate bytes sent and read
@@ -406,9 +408,9 @@
       ftp.client = null;
 
       this.content = list2html(list, path, "/".equals(path) ? false : true);
-      this.headers.put("Content-Length",
-        new Integer(this.content.length).toString());
-      this.headers.put("Content-Type", "text/html");
+      this.headers.add(MetaData.CONTENT_LENGTH,
+                       new Integer(this.content.length).toString());
+      this.headers.add(MetaData.CONTENT_TYPE, "text/html");
       // this.headers.put("Last-Modified", null);
 
 //      // approximate bytes sent and read
Index: src/plugin/parse-text/src/java/org/apache/nutch/parse/text/TextParser.java
===================================================================
--- src/plugin/parse-text/src/java/org/apache/nutch/parse/text/TextParser.java	(revision 366307)
+++ src/plugin/parse-text/src/java/org/apache/nutch/parse/text/TextParser.java	(working copy)
@@ -16,18 +16,15 @@
 
 package org.apache.nutch.parse.text;
 
-import java.util.Properties;
-
 import org.apache.nutch.protocol.Content;
-import org.apache.nutch.protocol.ContentProperties;
 import org.apache.nutch.parse.*;
 import org.apache.nutch.util.*;
 
 public class TextParser implements Parser {
   public Parse getParse(Content content) {
     // copy content meta data through
-    ContentProperties metadata = new ContentProperties();
-    metadata.putAll(content.getMetadata());
+    MetaData metadata = new MetaData();
+    metadata.addAll(content.getMetadata());
 
     //ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS, "", new Outlink[0], metadata);
 
Index: src/plugin/creativecommons/src/test/org/creativecommons/nutch/TestCCParseFilter.java
===================================================================
--- src/plugin/creativecommons/src/test/org/creativecommons/nutch/TestCCParseFilter.java	(revision 366307)
+++ src/plugin/creativecommons/src/test/org/creativecommons/nutch/TestCCParseFilter.java	(working copy)
@@ -19,7 +19,7 @@
 import org.apache.nutch.parse.Parse;
 import org.apache.nutch.parse.ParseUtil;
 import org.apache.nutch.protocol.Content;
-import org.apache.nutch.protocol.ContentProperties;
+import org.apache.nutch.util.MetaData;
 
 import java.util.Properties;
 import java.io.*;
@@ -57,12 +57,12 @@
     byte[] bytes = out.toByteArray();
 
     Content content =
-      new Content(url, url, bytes, contentType, new ContentProperties());
+      new Content(url, url, bytes, contentType, new MetaData());
     Parse parse = ParseUtil.parseByParserId("parse-html",content);
 
-    ContentProperties metadata = parse.getData().getMetadata();
-    assertEquals(license, metadata.get("License-Url"));
-    assertEquals(location, metadata.get("License-Location"));
-    assertEquals(type, metadata.get("Work-Type"));
+    MetaData metadata = parse.getData().getMetadata();
+    assertEquals(license, metadata.last("License-Url"));
+    assertEquals(location, metadata.last("License-Location"));
+    assertEquals(type, metadata.last("Work-Type"));
   }
 }
Index: src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCParseFilter.java
===================================================================
--- src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCParseFilter.java	(revision 366307)
+++ src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCParseFilter.java	(working copy)
@@ -18,7 +18,7 @@
 
 import org.apache.nutch.parse.*;
 import org.apache.nutch.protocol.Content;
-import org.apache.nutch.protocol.ContentProperties;
+import org.apache.nutch.util.MetaData;
 import org.apache.nutch.util.NutchConf;
 
 import java.util.*;
@@ -52,7 +52,7 @@
     }
 
     /** Scan the document adding attributes to metadata.*/
-    public static void walk(Node doc, URL base, ContentProperties metadata)
+    public static void walk(Node doc, URL base, MetaData metadata)
       throws ParseException {
 
       // walk the DOM tree, scanning for license data
@@ -78,13 +78,13 @@
       // add license to metadata
       if (licenseUrl != null) {
         LOG.info("CC: found "+licenseUrl+" in "+licenseLocation+" of "+base);
-        metadata.put("License-Url", licenseUrl);
-        metadata.put("License-Location", licenseLocation);
+        metadata.add("License-Url", licenseUrl);
+        metadata.add("License-Location", licenseLocation);
       }
 
       if (walker.workType != null) {
         LOG.info("CC: found "+walker.workType+" in "+base);
-        metadata.put("Work-Type", walker.workType);
+        metadata.add("Work-Type", walker.workType);
       }
 
     }
Index: src/plugin/parse-rtf/src/test/org/apache/nutch/parse/rtf/TestRTFParser.java
===================================================================
--- src/plugin/parse-rtf/src/test/org/apache/nutch/parse/rtf/TestRTFParser.java	(revision 366307)
+++ src/plugin/parse-rtf/src/test/org/apache/nutch/parse/rtf/TestRTFParser.java	(working copy)
@@ -25,6 +25,7 @@
 import org.apache.nutch.protocol.Protocol;
 import org.apache.nutch.protocol.ProtocolException;
 import org.apache.nutch.protocol.ProtocolFactory;
+import org.apache.nutch.util.MetadataNames;
 
 import java.util.Properties;
 
@@ -33,7 +34,7 @@
  *
  * @author Andy Hedges
  */
-public class TestRTFParser extends TestCase {
+public class TestRTFParser extends TestCase implements MetadataNames {
 
   private String fileSeparator = System.getProperty("file.separator");
   // This system property is defined in ./src/plugin/build-plugin.xml
@@ -71,7 +72,7 @@
     String title = parse.getData().getTitle();
     Properties meta = parse.getData().getMetadata();
     assertEquals("test rft document", title);
-    assertEquals("tests", meta.getProperty("subject"));
+    assertEquals("tests", meta.getProperty(SUBJECT));
 
 
 
Index: src/plugin/parse-rtf/src/java/org/apache/nutch/parse/rtf/RTFParserDelegateImpl.java
===================================================================
--- src/plugin/parse-rtf/src/java/org/apache/nutch/parse/rtf/RTFParserDelegateImpl.java	(revision 366307)
+++ src/plugin/parse-rtf/src/java/org/apache/nutch/parse/rtf/RTFParserDelegateImpl.java	(working copy)
@@ -22,18 +22,20 @@
 import java.util.List;
 import java.util.Properties;
 
+import org.apache.nutch.util.MetadataNames;
+
 /**
  * A parser delegate for handling rtf events.
  * @author Andy Hedges
  */
-public class RTFParserDelegateImpl implements RTFParserDelegate {
+public class RTFParserDelegateImpl implements RTFParserDelegate, MetadataNames {
 
   String tabs = "";
   Properties metadata = new Properties();
 
-  String[] META_NAMES_TEXT = {"title", "subject", "author", "manager",
-                              "company", "operator", "category", "keywords",
-                              "comment", "doccomm", "hlinkbase"};
+  String[] META_NAMES_TEXT = {TITLE, SUBJECT, AUTHOR, "manager",
+                              "company", "operator", "category", KEYWORDS,
+                              COMMENTS, "doccomm", "hlinkbase"};
   String[] META_NAMES_DATE = {"creatim", "creatim", "printim", "buptim"};
 
   String metaName = "";
Index: src/plugin/parse-rtf/src/java/org/apache/nutch/parse/rtf/RTFParseFactory.java
===================================================================
--- src/plugin/parse-rtf/src/java/org/apache/nutch/parse/rtf/RTFParseFactory.java	(revision 366307)
+++ src/plugin/parse-rtf/src/java/org/apache/nutch/parse/rtf/RTFParseFactory.java	(working copy)
@@ -19,6 +19,7 @@
 import org.apache.nutch.parse.*;
 import org.apache.nutch.parse.ParseException;
 import org.apache.nutch.protocol.Content;
+import org.apache.nutch.util.MetadataNames;
 
 import java.io.ByteArrayInputStream;
 import java.io.InputStreamReader;
@@ -31,7 +32,7 @@
  * A parser for RTF documents
  * @author Andy Hedges
  */
-public class RTFParseFactory implements Parser {
+public class RTFParseFactory implements Parser, MetadataNames {
 
   public Parse getParse(Content content) throws ParseException {
     byte[] raw = content.getContent();
@@ -51,10 +52,13 @@
     Properties metadata = new Properties();
     metadata.putAll(content.getMetadata());
     metadata.putAll(delegate.getMetaData());
-    String title = metadata.getProperty("title");
+    String title = metadata.getProperty(TITLE);
 
     if(title != null){
-      metadata.remove(title);
+        //(CM): Why remove the title metadata property here? Even 
+        //though it's stored in the ParseData, it still might be useful
+        //to have via this properties object?
+        //metadata.remove(title);
     } else {
       title = "";
     }
Index: src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java
===================================================================
--- src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java	(revision 366307)
+++ src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java	(working copy)
@@ -29,7 +29,6 @@
 import org.apache.nutch.net.protocols.HttpDateFormat;
 
 import org.apache.nutch.parse.Parse;
-import org.apache.nutch.protocol.ContentProperties;
 
 import org.apache.nutch.indexer.IndexingFilter;
 import org.apache.nutch.indexer.IndexingException;
@@ -39,6 +38,7 @@
 import org.apache.nutch.crawl.Inlinks;
 
 import org.apache.nutch.util.NutchConf;
+import org.apache.nutch.util.MetaData;
 import org.apache.nutch.util.mime.MimeType;
 import org.apache.nutch.util.mime.MimeTypes;
 import org.apache.nutch.util.mime.MimeTypeException;
@@ -46,15 +46,12 @@
 import org.apache.nutch.util.LogFormatter;
 import java.util.logging.Logger;
 
-import java.text.DateFormat;
 import java.text.ParseException;
 import java.text.SimpleDateFormat;
 
 import java.util.Date;
-import java.util.Locale;
 import java.util.TimeZone;
 import java.util.Enumeration;
-import java.util.Properties;
 
 import org.apache.commons.lang.time.DateUtils;
 /**
@@ -87,9 +84,9 @@
     throws IndexingException {
 
     String url_s = url.toString();
-    // normalize metaData (see note in the method below).
-    ContentProperties metaData = normalizeMeta(parse.getData().getMetadata());
 
+    MetaData metaData = parse.getData().getMetadata();
+
     addTime(doc, metaData, url_s, datum);
 
     addLength(doc, metaData, url_s);
@@ -103,11 +100,11 @@
     
   // Add time related meta info.  Add last-modified if present.  Index date as
   // last-modified, or, if that's not present, use fetch time.
-  private Document addTime(Document doc, ContentProperties metaData, String url,
+  private Document addTime(Document doc, MetaData metaData, String url,
                            CrawlDatum datum) {
     long time = -1;
 
-    String lastModified = metaData.getProperty("last-modified");
+    String lastModified = metaData.last(MetaData.LAST_MODIFIED);
     if (lastModified != null) {                   // try parse last-modified
       time = getTime(lastModified,url);           // use as time
                                                   // store as string
@@ -171,8 +168,8 @@
   }
 
   // Add Content-Length
-  private Document addLength(Document doc, ContentProperties metaData, String url) {
-    String contentLength = metaData.getProperty("content-length");
+  private Document addLength(Document doc, MetaData metaData, String url) {
+    String contentLength = metaData.last(MetaData.CONTENT_LENGTH);
 
     if (contentLength != null)
       doc.add(Field.UnIndexed("contentLength", contentLength));
@@ -181,9 +178,9 @@
   }
 
   // Add Content-Type and its primaryType and subType
-  private Document addType(Document doc, ContentProperties metaData, String url) {
+  private Document addType(Document doc, MetaData metaData, String url) {
     MimeType mimeType = null;
-    String contentType = metaData.getProperty("content-type");
+    String contentType = metaData.last(MetaData.CONTENT_TYPE);
     if (contentType == null) {
 	// Note by Jerome Charron on 20050415:
         // Content Type not solved by a previous plugin
@@ -261,8 +258,8 @@
     }
   }
 
-  private Document resetTitle(Document doc, ContentProperties metaData, String url) {
-    String contentDisposition = metaData.getProperty("content-disposition");
+  private Document resetTitle(Document doc, MetaData metaData, String url) {
+    String contentDisposition = metaData.last(MetaData.CONTENT_DISPOSITION);
     if (contentDisposition == null)
       return doc;
 
@@ -278,28 +275,4 @@
     return doc;
   }
 
-  // Meta info in nutch metaData are saved in raw form, i.e.,
-  // whatever the fetcher sees. To facilitate further processing,
-  // a "normalization" is necessary.
-  // This includes fixing http server oddities, such as:
-  // (*) non-uniform casing of header names
-  // (*) empty header value
-  // Note: the original metaData should be kept intact,
-  // because there is a benefit to preserve whatever comes from server.
-  private ContentProperties normalizeMeta(ContentProperties old) {
-      ContentProperties normalized = new ContentProperties();
-
-    for (Enumeration e = old.propertyNames(); e.hasMoreElements();) {
-      String key = (String) e.nextElement();
-      String value = old.getProperty(key).trim();
-      // some http server sends out header with empty value! if so, skip it
-      if (value == null || value.equals(""))
-        continue;
-      // convert key (but, not value) to lower-case
-      normalized.setProperty(key.toLowerCase(),value);
-    }
-
-    return normalized;
-  }
-
 }
Index: src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/ExtParser.java
===================================================================
--- src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/ExtParser.java	(revision 366307)
+++ src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/ExtParser.java	(working copy)
@@ -17,7 +17,6 @@
 package org.apache.nutch.parse.ext;
 
 import org.apache.nutch.protocol.Content;
-import org.apache.nutch.protocol.ContentProperties;
 import org.apache.nutch.parse.ParseStatus;
 import org.apache.nutch.parse.Parser;
 import org.apache.nutch.parse.Parse;
@@ -28,12 +27,12 @@
 
 import org.apache.nutch.util.LogFormatter;
 import org.apache.nutch.util.CommandRunner;
+import org.apache.nutch.util.MetaData;
 
 import org.apache.nutch.plugin.Extension;
 import org.apache.nutch.plugin.PluginRepository;
 
 import java.util.Hashtable;
-import java.util.Properties;
 import java.util.logging.Level;
 import java.util.logging.Logger;
 
@@ -47,6 +46,7 @@
  */
 
 public class ExtParser implements Parser {
+
   public static final Logger LOG =
     LogFormatter.getLogger("org.apache.nutch.parse.ext");
 
@@ -112,7 +112,7 @@
       byte[] raw = content.getContent();
 
       String contentLength =
-        (String)content.getMetadata().get("Content-Length");
+        content.getMetadata().last(MetaData.CONTENT_LENGTH);
       if (contentLength != null
             && raw.length != Integer.parseInt(contentLength)) {
           return new ParseStatus(ParseStatus.FAILED, ParseStatus.FAILED_TRUNCATED,
@@ -156,10 +156,10 @@
     Outlink[] outlinks = OutlinkExtractor.getOutlinks(text);
 
     // collect meta data
-    ContentProperties metaData = new ContentProperties();
-    metaData.putAll(content.getMetadata()); // copy through
+    MetaData meta = new MetaData();
+    meta.addAll(content.getMetadata()); // copy through
 
-    ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS, title, outlinks, metaData);
+    ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS, title, outlinks, meta);
     return new ParseImpl(text, parseData);
   }
 
Index: src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java
===================================================================
--- src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java	(revision 366307)
+++ src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java	(working copy)
@@ -16,7 +16,6 @@
 
 package org.apache.nutch.parse.html;
 
-import java.util.Properties;
 import java.util.ArrayList;
 import java.util.logging.*;
 import java.net.URL;
@@ -31,7 +30,6 @@
 import org.apache.html.dom.*;
 
 import org.apache.nutch.protocol.Content;
-import org.apache.nutch.protocol.ContentProperties;
 import org.apache.nutch.util.*;
 import org.apache.nutch.parse.*;
 
@@ -107,19 +105,19 @@
     String text = "";
     String title = "";
     Outlink[] outlinks = new Outlink[0];
-    ContentProperties metadata = new ContentProperties();
+    MetaData metadata = new MetaData();
 
     // parse the content
     DocumentFragment root;
     try {
       byte[] contentInOctets = content.getContent();
       InputSource input = new InputSource(new ByteArrayInputStream(contentInOctets));
-      String contentType = content.getMetadata().getProperty("Content-Type");
+      String contentType = content.getMetadata().last(MetaData.CONTENT_TYPE);
       String encoding = StringUtil.parseCharacterEncoding(contentType);
       if (encoding!=null) {
-        metadata.put("OriginalCharEncoding", encoding);
+        metadata.add(MetaData.ORIGINAL_CHAR_ENCODING, encoding);
         if ((encoding = StringUtil.resolveEncodingAlias(encoding)) != null) {
-          metadata.put("CharEncodingForConversion", encoding);
+          metadata.add(MetaData.CHAR_ENCODING_FOR_CONVERSION, encoding);
           LOG.fine(base + ": setting encoding to " + encoding);
         }
       }
@@ -128,9 +126,9 @@
       if (encoding == null) {
         encoding = sniffCharacterEncoding(contentInOctets);
         if (encoding!=null) {
-          metadata.put("OriginalCharEncoding", encoding);
+          metadata.add(MetaData.ORIGINAL_CHAR_ENCODING, encoding);
           if ((encoding = StringUtil.resolveEncodingAlias(encoding)) != null) {
-            metadata.put("CharEncodingForConversion", encoding);
+            metadata.add(MetaData.CHAR_ENCODING_FOR_CONVERSION, encoding);
             LOG.fine(base + ": setting encoding to " + encoding);
           }
         }
@@ -144,7 +142,7 @@
         // doesn't work for jp because euc-jp and shift_jis have about the
         // same share)
         encoding = defaultCharEncoding;
-        metadata.put("CharEncodingForConversion", defaultCharEncoding);
+        metadata.add(MetaData.CHAR_ENCODING_FOR_CONVERSION, defaultCharEncoding);
         LOG.fine(base + ": falling back to " + defaultCharEncoding);
       }
       input.setEncoding(encoding);
@@ -190,7 +188,7 @@
     }
     
     // copy content metadata through
-    metadata.putAll(content.getMetadata());
+    metadata.addAll(content.getMetadata());
     ParseStatus status = new ParseStatus(ParseStatus.SUCCESS);
     if (metaTags.getRefresh()) {
       status.setMinorCode(ParseStatus.SUCCESS_REDIRECT);
@@ -267,7 +265,7 @@
     in.readFully(bytes);
     Parse parse = new HtmlParser().getParse(new Content(url,url,
                                                         bytes,"text/html",
-                                                        new ContentProperties()));
+                                                        new MetaData()));
     System.out.println("data: "+parse.getData());
 
     System.out.println("text: "+parse.getText());
Index: src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java
===================================================================
--- src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java	(revision 366307)
+++ src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java	(working copy)
@@ -12,8 +12,8 @@
 import java.util.logging.Level;
 import java.util.logging.Logger;
 
-import org.apache.nutch.protocol.ContentProperties;
 import org.apache.nutch.util.LogFormatter;
+import org.apache.nutch.util.MetaData;
 import org.apache.nutch.util.NutchConf;
 
 /**
@@ -46,12 +46,12 @@
     
     private HttpAuthenticationFactory() { }
     
-    public static HttpAuthentication findAuthentication(ContentProperties header) {
+    public static HttpAuthentication findAuthentication(MetaData header) {
         if (header == null) return null;
         
     	try {
 			Collection challenge = null;
-			if (header instanceof ContentProperties) {
+			if (header instanceof MetaData) {
 				Object o = header.get(AUTH_HEADER);
 				if (o instanceof Collection) {
 					challenge = (Collection) o;
@@ -60,7 +60,7 @@
 					challenge.add(o.toString());
 				}
 			} else {
-				String challengeString = header.getProperty(AUTH_HEADER); 
+				String challengeString = header.last(AUTH_HEADER); 
 				if (challengeString != null) {
 					challenge = new ArrayList();
 					challenge.add(challengeString);
Index: src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java
===================================================================
--- src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java	(revision 366307)
+++ src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java	(working copy)
@@ -5,7 +5,7 @@
 
 import org.apache.nutch.crawl.CrawlDatum;
 import org.apache.nutch.protocol.Content;
-import org.apache.nutch.protocol.ContentProperties;
+import org.apache.nutch.util.MetaData;
 
 import org.apache.commons.httpclient.Header;
 import org.apache.commons.httpclient.HttpVersion;
@@ -18,6 +18,7 @@
 import java.io.IOException;
 import java.io.InputStream;
 import java.net.URL;
+import java.util.Date;
 
 /**
  * An HTTP response.
@@ -34,7 +35,7 @@
 
   private int code;
 
-  private ContentProperties headers = new ContentProperties();
+  private MetaData headers = new MetaData();
 
   /**
    * Returns the response code.
@@ -47,7 +48,7 @@
    * Returns the value of a named header.
    */
   public String getHeader(String name) {
-    return (String) headers.get(name);
+    return headers.last(name);
   }
 
   public byte[] getContent() {
@@ -57,7 +58,7 @@
   public Content toContent() {
     return new Content(orig, base,
                        (content == null ? EMPTY_CONTENT : content),
-                       getHeader("Content-Type"),
+                       getHeader(MetaData.CONTENT_TYPE),
                        headers);
   }
 
@@ -88,8 +89,12 @@
       Header[] heads = get.getResponseHeaders();
 
       for (int i = 0; i < heads.length; i++) {
-        headers.setProperty(heads[i].getName(), heads[i].getValue());
+        headers.add(heads[i].getName(), heads[i].getValue());
       }
+      
+      headers.add(MetaData.CONTRIBUTOR, "protocol-httpclient");
+      headers.add(MetaData.DATE, MetaData.DATE_FORMAT.format(new Date()));
+      
       // always read content. Sometimes content is useful to find a cause
       // for error.
       try {
Index: src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java
===================================================================
--- src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java	(revision 366307)
+++ src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java	(working copy)
@@ -28,13 +28,13 @@
 import java.net.URL;
 import java.util.Map;
 import java.util.TreeMap;
-import java.util.Properties;
+import java.util.Date;
 import java.util.logging.Level;
 
 import org.apache.nutch.crawl.CrawlDatum;
 import org.apache.nutch.protocol.Content;
-import org.apache.nutch.protocol.ContentProperties;
 import org.apache.nutch.protocol.ProtocolException;
+import org.apache.nutch.util.MetaData;
 
 import org.apache.nutch.util.GZIPUtils;
 
@@ -46,21 +46,21 @@
   private String base;
   private byte[] content;
   private int code;
-  private ContentProperties headers = new ContentProperties();
+  private MetaData headers = new MetaData();
 
   /** Returns the response code. */
   public int getCode() { return code; }
 
   /** Returns the value of a named header. */
   public String getHeader(String name) {
-    return (String)headers.get(name);
+    return headers.last(name);
   }
 
   public byte[] getContent() { return content; }
 
   public Content toContent() {
     return new Content(orig, base, content,
-                       getHeader("Content-Type"),
+                       getHeader(MetaData.CONTENT_TYPE),
                        headers);
   }
 
@@ -150,13 +150,17 @@
         // parse status code line
         this.code = parseStatusLine(in, line); 
         // parse headers
-        headers.putAll(parseHeaders(in, line));
+        headers.addAll(parseHeaders(in, line));
+        headers.add(MetaData.CONTRIBUTOR, "protocol-http");
+        headers.add(MetaData.DATE, 
+                    MetaData.DATE_FORMAT.format(new Date()));
+        
         haveSeenNonContinueStatus= code != 100; // 100 is "Continue"
       }
 
       readPlainContent(in);
 
-      String contentEncoding= getHeader("Content-Encoding");
+      String contentEncoding = getHeader(MetaData.CONTENT_ENCODING);
       if ("gzip".equals(contentEncoding) || "x-gzip".equals(contentEncoding)) {
         Http.LOG.fine("uncompressing....");
         byte[] compressed = content;
@@ -186,7 +190,7 @@
     throws HttpException, IOException {
 
     int contentLength = Integer.MAX_VALUE;    // get content length
-    String contentLengthString = (String)headers.get("Content-Length");
+    String contentLengthString = headers.last(MetaData.CONTENT_LENGTH);
     if (contentLengthString != null) {
       contentLengthString = contentLengthString.trim();
       try {
@@ -334,7 +338,6 @@
       valueStart++;
     }
     String value = line.substring(valueStart);
-
     headers.put(key, value);
   }
 
@@ -360,6 +363,11 @@
         line.setLength(pos);
 
         try {
+            //TODO: (CM) We don't know the header names here
+            //since we're just handling them generically. It would
+            //be nice to provide some sort of mapping function here
+            //for the returned header names to the standard metadata
+            //names in the ParseData class
           processHeaderLine(line, headers);
         } catch (Exception e) {
           // fixme:
Index: src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/MSPowerPointParser.java
===================================================================
--- src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/MSPowerPointParser.java	(revision 366307)
+++ src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/MSPowerPointParser.java	(working copy)
@@ -30,8 +30,8 @@
 import org.apache.nutch.parse.ParseStatus;
 import org.apache.nutch.parse.Parser;
 import org.apache.nutch.protocol.Content;
-import org.apache.nutch.protocol.ContentProperties;
 import org.apache.nutch.util.LogFormatter;
+import org.apache.nutch.util.MetaData;
 
 /**
  * Nutch-Parser for parsing MS PowerPoint slides ( mime type:
@@ -74,10 +74,10 @@
 
     byte[] raw = getRawBytes(new File(file));
 
-    ContentProperties prop = new ContentProperties();
-    prop.setProperty("Content-Length", "" + raw.length);
+    MetaData meta = new MetaData();
+    meta.set(MetaData.CONTENT_LENGTH, "" + raw.length);
 
-    Content content = new Content(file, file, raw, MIME_TYPE, prop);
+    Content content = new Content(file, file, raw, MIME_TYPE, meta);
 
     System.out.println(ppe.getParse(content).getText());
   }
@@ -95,7 +95,7 @@
     Properties properties = null;
 
     try {
-      final String contentLen = content.get("Content-Length");
+      final String contentLen = content.get(MetaData.CONTENT_LENGTH);
       final byte[] raw = content.getContent();
 
       if (contentLen != null && raw.length != Integer.parseInt(contentLen)) {
@@ -122,13 +122,16 @@
     }
 
     // collect meta data
-    final ContentProperties metadata = new ContentProperties();
-    metadata.putAll(content.getMetadata()); // copy through
+    final MetaData metadata = new MetaData();
+    metadata.setAll(content.getMetadata()); // copy through
 
     if (properties != null) {
-      title = properties.getProperty("Title");
-      properties.remove("Title");
-      metadata.putAll(properties);
+      title = properties.getProperty(MetaData.TITLE);
+      //(CM): Why remove the title metadata property here? Even 
+      //though it's stored in the ParseData, it still might be useful
+      //to have via this properties object?
+      //properties.remove("Title");
+      metadata.addAll(properties);
     }
 
     if (plainText == null) {
Index: src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/PropertiesReaderListener.java
===================================================================
--- src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/PropertiesReaderListener.java	(revision 366307)
+++ src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/PropertiesReaderListener.java	(working copy)
@@ -23,6 +23,7 @@
 
 import org.apache.nutch.parse.mspowerpoint.PPTExtractor.PropertiesBroker;
 import org.apache.nutch.util.LogFormatter;
+import org.apache.nutch.util.MetaData;
 import org.apache.poi.hpsf.PropertySetFactory;
 import org.apache.poi.hpsf.SummaryInformation;
 import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
@@ -74,12 +75,14 @@
         final SummaryInformation sInfo = (SummaryInformation) PropertySetFactory
             .create(event.getStream());
 
-        addProperty("Title", sInfo.getTitle());
-        addProperty("Subject", sInfo.getSubject());
-        addProperty("Keywords", sInfo.getKeywords());
-        addProperty("Comments", sInfo.getComments());
-        addProperty("Author", sInfo.getAuthor());
-        addProperty("Last-Author", sInfo.getLastAuthor());
+        addProperty(MetaData.TITLE, sInfo.getTitle());
+        addProperty(MetaData.SUBJECT, sInfo.getSubject());
+        // (CM): DC considers "Subject" to be equivalent to "Keywords", so here we
+        // reference a Nutch specific property
+        addProperty(MetaData.KEYWORDS, sInfo.getKeywords());
+        addProperty(MetaData.COMMENTS, sInfo.getComments());
+        addProperty(MetaData.CREATOR, sInfo.getAuthor());
+        addProperty(MetaData.LAST_AUTHOR, sInfo.getLastAuthor());
 
         /*
          * already provided by nutch
@@ -126,4 +129,4 @@
       this.properties.setProperty(name, this.dateFormatter.format(value));
     }
   }
-}
\ No newline at end of file
+}
