Index: tika-core/src/main/java/org/apache/tika/metadata/Metadata.java
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- tika-core/src/main/java/org/apache/tika/metadata/Metadata.java	(date 1455626693000)
+++ tika-core/src/main/java/org/apache/tika/metadata/Metadata.java	(revision )
@@ -20,21 +20,31 @@
 import static org.apache.tika.utils.DateUtils.UTC;
 import static org.apache.tika.utils.DateUtils.formatDate;
 
+import javax.xml.bind.DatatypeConverter;
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
 import java.io.Serializable;
 import java.text.DateFormat;
 import java.text.DateFormatSymbols;
 import java.text.ParseException;
 import java.text.SimpleDateFormat;
+import java.util.ArrayList;
 import java.util.Calendar;
 import java.util.Date;
 import java.util.Enumeration;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Locale;
 import java.util.Map;
 import java.util.Properties;
 import java.util.TimeZone;
 
 import org.apache.tika.metadata.Property.PropertyType;
+import org.w3c.dom.Document;
+import org.xml.sax.SAXException;
 
 /**
  * A multi-valued metadata container.
@@ -261,8 +271,142 @@
             return null;
         }
     }
-    
+
     /**
+     * Returns the value of the identified DOM based metadata property.
+     *
+     * @since Apache Tika 1.13
+     * @param property DOM property definition
+     * @return property value as a DOM, or <code>null</code> if the property is not set or
+     *  there is an invalid DOM or an invalid Base64 encoded <code>byte[]</code> value.
+     */
+    public Document getDOM(Property property) {
+        if (property == null) {
+            throw new NullPointerException("property must not be null");
+        }
+        if (! property.getValueType().equals(Property.ValueType.DOM)) {
+            throw new PropertyTypeException(property.getName() +
+                    " : " + property.getPropertyType());
+        }
+        return convertToDOM(get(property));
+    }
+
+    /**
+     * Returns the values of the identified DOM based metadata property.
+     * <p/>
+     * Throws defined type exceptions if parse fails on any one of the values.
+     * Consider {@link #getByteArrays(Property)}, to get the raw bytes.
+     * <p/>
+     * Throws <code>IllegalArgumentException</code> if a stored value
+     * cannot be decoded as a Base64 encoded byte[].
+     *
+     * @since Apache Tika 1.13
+     * @param property DOM property definition
+     * @return property value as a DOM, or <code>null</code>
+     * if the property is not set or if there is an invalid DOM or an
+     * invalid Base64 encoded <code>byte[]</code> value.
+     */
+    public Document[] getDOMs(Property property) {
+        if (property == null) {
+            throw new NullPointerException("property must not be null");
+        }
+        if (! property.getValueType().equals(Property.ValueType.DOM)) {
+            throw new PropertyTypeException(property.getName() +
+                    " : " + property.getPropertyType());
+        }
+        String[] values = getValues(property);
+        Document[] doms = new Document[values.length];
+        for (int i = 0; i < values.length; i++) {
+            doms[i] = convertToDOM(values[i]);
+        }
+        return doms;
+    }
+
+    /**
+     * Returns the value of the identified <code>byte[]</code> based metadata property.
+     *
+     * @since Apache Tika 1.13
+     * @param property <code>byte[]</code> property definition
+     * @return property value as <code>byte[]</code>,
+     * or <code>null</code> if the property is not set or if
+     * there's an invalid Base64 encoded <code>byte[]</code> value.
+     */
+    public byte[] getByteArray(Property property) {
+        if (property == null) {
+            throw new NullPointerException("property must not be null");
+        }
+        if (! property.getValueType().equals(Property.ValueType.BYTES)
+                && ! property.getValueType().equals(Property.ValueType.DOM)) {
+            throw new PropertyTypeException(property.getName() +
+                    " : " + property.getPropertyType());
+        }
+        String base64Encoded = get(property);
+        return convertToBytes(base64Encoded);
+    }
+
+    /**
+     * Returns the values of the identified <code>byte[]</code> based metadata property.
+     * <p/>
+     * Throws <code>IllegalArgumentException</code> if any one of the stored values
+     * cannot be decoded as a Base64 encoded <code>byte[]</code>.
+     *
+     * @since Apache Tika 1.13
+     * @param property <code>byte[]</code> property definition
+     * @return list of <code>byte[]</code>, or <code>null</code>
+     * if the property is not set or an invalid Base64 encoded <code>byte[]</code> value.
+     */
+    public List<byte[]> getByteArrays(Property property) {
+        if (property == null) {
+            throw new NullPointerException("property must not be null");
+        }
+        if (! property.getValueType().equals(Property.ValueType.BYTES)
+                && ! property.getValueType().equals(Property.ValueType.DOM)) {
+            throw new PropertyTypeException(property.getName() +
+                    " : " + property.getPropertyType());
+        }
+        String[] vals = getValues(property);
+        List<byte[]> bytes = new ArrayList<>();
+        for (int i = 0; i < vals.length; i++) {
+            bytes.add(convertToBytes(vals[i]));
+        }
+        return bytes;
+    }
+
+    private Document convertToDOM(String base64Encoded) {
+
+        byte[] bytes = convertToBytes(base64Encoded);
+        if (bytes == null) {
+            return null;
+        }
+        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
+        factory.setNamespaceAware(true);
+        DocumentBuilder builder = null;
+        try {
+            builder = factory.newDocumentBuilder();
+            //turn off writing to STDERR on parse exception
+            builder.setErrorHandler(null);
+            return builder.parse(new ByteArrayInputStream(bytes));
+        } catch (ParserConfigurationException|SAXException|IOException e) {
+            //swallow
+        }
+        return null;
+    }
+
+    //decode, return null if there's a problem
+    private byte[] convertToBytes(String base64Encoded) {
+        if (base64Encoded == null) {
+            return null;
+        }
+        try {
+            //TODO: when we move to Java 1.8 use java.util.BASE64
+            return DatatypeConverter.parseBase64Binary(base64Encoded);
+        } catch (IllegalArgumentException e) {
+            //swallow
+        }
+        return null;
+    }
+    
+    /**
      * Get the values associated to a metadata name.
      * 
      * @param property
@@ -316,8 +460,31 @@
             metadata.put(name, appendedValues(values, value));
         }
     }
-    
+
+
     /**
+     * Add a metadata property/value mapping. Add the specified
+     * bytes to the list of values associated with the specified
+     * property.  Property must be one of the following:
+     * {@link org.apache.tika.metadata.Property.ValueType#BYTES} or
+     * {@link org.apache.tika.metadata.Property.ValueType#DOM}.
+     * @param property
+     * @param bytes
+     */
+    public void add(final Property property, final byte[] bytes) {
+        if (property == null) {
+            throw new NullPointerException("property must not be null");
+        }
+        if (! property.getValueType().equals(Property.ValueType.BYTES)
+            && ! property.getValueType().equals(Property.ValueType.DOM)) {
+            throw new PropertyTypeException(property.getName() +
+                    " : " + property.getPropertyType());
+        }
+        //TODO: when we move to Java 1.8 use java.util.BASE64
+        add(property, DatatypeConverter.printBase64Binary(bytes));
+    }
+
+    /**
      * Add a metadata property/value mapping. Add the specified value to the list of
      * values associated to the specified metadata property.
      * 
@@ -395,7 +562,7 @@
             set(property.getName(), value);
         }
     }
-    
+
     /**
      * Sets the values of the identified metadata property.
      *
@@ -494,6 +661,24 @@
             dateString = formatDate(date);
         }
         set(property, dateString);
+    }
+
+    /**
+     * Sets the value of the identified metadata property
+     * as a Base64 encoded String.
+     *
+     * @since Apache Tika 0.7
+     * @param property property definition
+     * @param bytes   property value to be stored as a Base64 encoded String
+     */
+    public void set(Property property, byte[] bytes) {
+        if (!property.getValueType().equals(Property.ValueType.BYTES)
+                && !property.getValueType().equals(Property.ValueType.DOM)) {
+            throw new PropertyTypeException(property.getName() +
+                    " : " + property.getPropertyType());
+        }
+        //TODO: when we move to Java 1.8 use java.util.BASE64
+        set(property, DatatypeConverter.printBase64Binary(bytes));
     }
 
     /**
Index: tika-core/src/test/java/org/apache/tika/metadata/TestMetadata.java
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- tika-core/src/test/java/org/apache/tika/metadata/TestMetadata.java	(date 1455626693000)
+++ tika-core/src/test/java/org/apache/tika/metadata/TestMetadata.java	(revision )
@@ -17,14 +17,8 @@
 package org.apache.tika.metadata;
 
 //JDK imports
-import java.util.Date;
-import java.util.Properties;
 
-import org.apache.tika.utils.DateUtils;
-import org.junit.Test;
-
-
-//Junit imports
+import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotNull;
@@ -32,6 +26,18 @@
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
+import java.nio.charset.StandardCharsets;
+import java.util.Date;
+import java.util.List;
+import java.util.Properties;
+
+import org.apache.tika.utils.DateUtils;
+import org.junit.Test;
+import org.w3c.dom.Document;
+import org.w3c.dom.Node;
+
+//Junit imports
+
 /**
  * JUnit based tests of class {@link org.apache.tika.metadata.Metadata}.
  */
@@ -347,8 +353,134 @@
         meta.set(TikaCoreProperties.CREATED, DateUtils.formatDateUnknownTimezone(new Date(1000)));
         assertEquals("should return string without time zone specifier because zone is not known",
                          "1970-01-01T00:00:01", meta.get(TikaCoreProperties.CREATED));
+    }
+
+    @Test
+    public void testGetSetBinary() throws Exception {
+        String propName = "testBytes";
+        String xml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" +
+                "<xdp>" +
+                "  <text name=\"producer\">普林斯顿大学</text>" +
+                "</xdp>";
+        byte[] bytes = xml.getBytes(StandardCharsets.UTF_8);
+        Metadata m = new Metadata();
+        Property bytesProp = Property.internalByteArray(propName);
+        m.set(bytesProp, bytes);
+
+        assertArrayEquals(bytes, m.getByteArray(bytesProp));
+
+        //test exception
+        try {
+            Document result = m.getDOM(bytesProp);
+            fail("Should have thrown PropertyTypeException");
+        } catch (PropertyTypeException e) {
+
+        }
+    }
+
+    @Test
+    public void testGetSetDOM() throws Exception {
+        String propName = "testDOM";
+        String xml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" +
+                "<xdp>" +
+                "  <text name=\"producer\">普林斯顿大学</text>" +
+                "</xdp>";
+        byte[] bytes = xml.getBytes(StandardCharsets.UTF_8);
+        Metadata m = new Metadata();
+        Property domProp = Property.internalDOM(propName);
+        m.set(domProp, bytes);
+        //can get raw bytes for a DOM
+        assertArrayEquals(bytes, m.getByteArray(domProp));
+
+        Document document = m.getDOM(domProp);
+        assertEquals("xdp", document.getFirstChild().getNodeName());
+
+        Node xdp = document.getFirstChild();
+        boolean found = false;
+        for (int i = 0; i < xdp.getChildNodes().getLength();i++) {
+            Node child = xdp.getChildNodes().item(i);
+            if (child.getNodeName().equals("text")) {
+                assertEquals("普林斯顿大学",
+                        child.getTextContent());
+                found = true;
+            }
+        }
+        assertTrue("failed to find child text", found);
+    }
+
+    @Test
+    public void testGetInvalidDOM() throws Exception {
+        String propName = "testDOM";
+        String xml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" +
+                "<xdp>" +
+                "  <text name=\"producer\">普林斯顿大学</text>";
+        byte[] bytes = xml.getBytes(StandardCharsets.UTF_8);
+        Metadata m = new Metadata();
+        Property domProp = Property.internalDOM(propName);
+        m.set(domProp, bytes);
+        //can get raw bytes for a DOM
+        assertArrayEquals(bytes, m.getByteArray(domProp));
+        assertNull("should return null for invalid document",
+                m.getDOM(domProp));
+        try {
+            Document document = m.getDOM(TikaCoreProperties.CREATOR);
+            fail("should have gotten SAXParseException");
+        } catch (PropertyTypeException e) {
+        }
+    }
+
+    @Test
+    public void testAddGetByteArrays() throws Exception {
+        String xml1 = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" +
+                "<xdp>" +
+                "  <text name=\"producer\">普林斯顿大学</text>" +
+                "</xdp>";
+        String xml2 = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" +
+                "<xdp>" +
+                "  <text name=\"producer\">新泽西州</text>" +
+                "</xdp>";
+        Metadata metadata = new Metadata();
+        Property property = Property.internalByteArraySequence("bytes");
+        metadata.add(property, xml1.getBytes(StandardCharsets.UTF_8));
+        metadata.add(property, xml2.getBytes(StandardCharsets.UTF_8));
+        List<byte[]> bytes = metadata.getByteArrays(property);
+        assertEquals(2, bytes.size());
+        assertTrue(new String(bytes.get(1), StandardCharsets.UTF_8)
+                .contains("<text name=\"producer\">新泽西州</text>"));
+    }
+
+    @Test
+    public void testAddGetDoms() throws Exception {
+        String xml1 = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" +
+                "<xdp>" +
+                "  <text name=\"producer\">普林斯顿大学</text>" +
+                "</xdp>";
+        String xml2 = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" +
+                "<xdp>" +
+                "  <text name=\"producer\">新泽西州</text>" +
+                "</xdp>";
+        Metadata metadata = new Metadata();
+        Property property = Property.internalDOMSequence("dom");
+        metadata.add(property, xml1.getBytes(StandardCharsets.UTF_8));
+        metadata.add(property, xml2.getBytes(StandardCharsets.UTF_8));
+        Document[] doms = metadata.getDOMs(property);
+        assertEquals(2, doms.length);
+        Document doc2 = doms[1];
+        assertEquals("xdp", doc2.getFirstChild().getNodeName());
+
+        Node xdp = doc2.getFirstChild();
+        boolean found = false;
+        for (int i = 0; i < xdp.getChildNodes().getLength();i++) {
+            Node child = xdp.getChildNodes().item(i);
+            if (child.getNodeName().equals("text")) {
+                assertEquals("新泽西州",
+                        child.getTextContent());
+                found = true;
+            }
+        }
+        assertTrue("failed to find child text", found);
     }
-    
+
     /**
      * Defines a composite property, then checks that when set as the
      *  composite the value can be retrieved with the property or the aliases
Index: tika-core/src/main/java/org/apache/tika/metadata/Property.java
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- tika-core/src/main/java/org/apache/tika/metadata/Property.java	(date 1455626693000)
+++ tika-core/src/main/java/org/apache/tika/metadata/Property.java	(revision )
@@ -52,7 +52,8 @@
 
     public static enum ValueType {
         BOOLEAN, OPEN_CHOICE, CLOSED_CHOICE, DATE, INTEGER, LOCALE,
-        MIME_TYPE, PROPER_NAME, RATIONAL, REAL, TEXT, URI, URL, XPATH, PROPERTY
+        MIME_TYPE, PROPER_NAME, RATIONAL, REAL, TEXT, URI, URL, XPATH, PROPERTY,
+        BYTES, DOM
     }
 
     private static final Map<String, Property> properties =
@@ -269,6 +270,22 @@
         return new Property(name, true, ValueType.URI);
     }
 
+    public static Property internalByteArray(String name) {
+        return new Property(name, true, ValueType.BYTES);
+    }
+
+    public static Property internalDOM(String name) {
+        return new Property(name, true, ValueType.DOM);
+    }
+
+    public static Property internalByteArraySequence(String name) {
+        return new Property(name, true, PropertyType.SEQ, ValueType.BYTES);
+    }
+
+    public static Property internalDOMSequence(String name) {
+        return new Property(name, true, PropertyType.SEQ, ValueType.DOM);
+    }
+
     public static Property externalClosedChoise(
             String name, String... choices) {
         return new Property(name, false, ValueType.CLOSED_CHOICE, choices);
@@ -302,6 +319,8 @@
     public static Property externalTextBag(String name) {
         return new Property(name, false, PropertyType.BAG, ValueType.TEXT);
     }
+
+
 
     /**
      * Constructs a new composite property from the given primary and array of secondary properties.
