Index: tika-parsers/src/test/resources/test-documents/microdata.html
===================================================================
--- tika-parsers/src/test/resources/test-documents/microdata.html	(revision 0)
+++ tika-parsers/src/test/resources/test-documents/microdata.html	(revision 0)
@@ -0,0 +1,51 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+        "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+        <meta http-equiv="content-type" content="text/html; charset=utf-8" />
+        <title>Tika microdata test</title>
+</head>
+<body itemscope="itemscope" itemtype="http://schema.org/WebPage">
+        <ul itemprop="breadcrumb">
+                <li><a itemprop="url" href="http://tika.apache.org/">Apache Tika</a></li>
+                <li>Microdata</li>
+        </ul>
+
+        <div itemscope="itemscope" itemtype="http://schema.org/Event">
+                <h2 itemprop="name">ApacheCon Europe 2012\</h2>
+                <section itemprop="description">Details of the annual ApacheCon meetings held in Europe and the United States, with registration information and an archive of previous meetings.</section>
+
+                <span itemprop="location" itemscope itemtype="http://schema.org/Place">
+                        <span itemprop="name">Sinsheim, Germany</span>
+                </span>
+
+                <a href="http://apachecon.eu/" itemprop="url">apachecon.eu</a>
+
+                <time itemprop="startDate">2012-11-05</time>
+                <time itemprop="duration" datetime="PD02TH17M17S0">a few days</time>
+                <time itemprop="endDate">2012-11-08</time>
+
+                <meta content="EUR" itemprop="priceCurrency">
+                <span itemprop="price">17.50</span>
+        </div>
+
+        <div itemscope="itemscope" itemtype="http://schema.org/Event">
+                <h2 itemprop="name">ApacheCon North America 2013</h2>
+                <section itemprop="description">Details of the annual ApacheCon meetings held in Europe and the United States, with registration information and an archive of previous meetings.</section>
+
+                <span itemprop="location" itemscope itemtype="http://schema.org/Place">
+                        <span itemprop="name">Portland, Oregon</span>
+                </span>
+
+                <a href="http://na.apachecon.com/" itemprop="url">na.apachecon.com</a>
+
+                <time itemprop="startDate">2013-02-24</time>
+                <time itemprop="duration" datetime="PD02TH17M17S0">a few days</time>
+                <time itemprop="endDate">2013-03-02</time>
+
+                <meta content="USD" itemprop="priceCurrency">
+                <span itemprop="price">17.50</span>
+        </div>
+</body>
+</html>
\ No newline at end of file
Index: tika-parsers/src/test/java/org/apache/tika/parser/html/MicrodataContentHandlerTest.java
===================================================================
--- tika-parsers/src/test/java/org/apache/tika/parser/html/MicrodataContentHandlerTest.java	(revision 0)
+++ tika-parsers/src/test/java/org/apache/tika/parser/html/MicrodataContentHandlerTest.java	(revision 0)
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.html;
+
+import java.io.IOException;
+import java.util.List;
+
+import junit.framework.TestCase;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.helpers.AttributesImpl;
+import org.xml.sax.SAXException;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.html.microdata.*;
+import org.apache.tika.parser.html.MicrodataContentHandler;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.TeeContentHandler;
+import org.apache.tika.sax.XHTMLContentHandler;
+
+/**
+ * Test cases for the {@link MicrodataContentHandler} class.
+ */
+public class MicrodataContentHandlerTest extends TestCase {
+
+    public void testMicrodataParser() throws SAXException, IOException, TikaException  {
+        String path = "/test-documents/microdata.html";
+
+        // Set up a parse context
+        ParseContext context = new ParseContext();
+
+        // ..and tell the parser not to remap HTML elements or we'll loose important other mark up
+        context.set(HtmlMapper.class, IdentityHtmlMapper.INSTANCE);
+
+        Metadata metadata = new Metadata();
+        MicrodataContentHandler handler = new MicrodataContentHandler();
+        new HtmlParser().parse(
+                MicrodataContentHandlerTest.class.getResourceAsStream(path),
+                handler,  metadata, context);
+
+        // Get the microdata items
+        List<ItemScope> items = handler.getItems();
+System.out.println(Integer.toString(items.size()));
+System.out.println(items.toString());
+
+        // The first itemscope should be webpage
+//         assertEquals("http://schema.org/WebPage", items.get(0).getType());
+
+        // Check for the bread crumb, it's a nested item and must not contain the item value from the attribute
+        assertEquals("Apache Tika Microdata", items.get(0).getProperties().get("breadcrumb").get(0).getValue().getContent());
+
+System.out.println(items.get(0).getType());
+System.out.println(items.get(1).getProperties().get("description").get(0).getValue().getContent());
+System.out.println(items.get(1).getProperties().get("price").get(0).getValue().getContent());
+
+        // Do we have a description field
+        assertNotNull(items.get(1).getProperties().get("description"));
+
+        // Check a price
+        assertEquals("17.50", items.get(1).getProperties().get("price").get(0).getValue().getContent());
+
+        // Check a date
+        assertNotNull(items.get(1).getProperties().get("startDate"));
+
+        // Check a property in in-body meta tag (bad practice)
+        assertEquals("EUR", items.get(1).getProperties().get("priceCurrency").get(0).getValue().getContent());
+
+        // Check if the content attribute is read
+        assertEquals("http://apachecon.eu/", items.get(1).getProperties().get("url").get(0).getValue().getContent());
+
+        // Check a nested field
+        assertEquals("Portland, Oregon", items.get(2).getProperties().get("location").get(0).getValue().getAsNested().getProperties().get("name").get(0).getValue().getContent());
+    }
+}
\ No newline at end of file
Index: tika-parsers/src/main/java/org/apache/tika/parser/html/microdata/ItemScope.java
===================================================================
--- tika-parsers/src/main/java/org/apache/tika/parser/html/microdata/ItemScope.java	(revision 0)
+++ tika-parsers/src/main/java/org/apache/tika/parser/html/microdata/ItemScope.java	(revision 0)
@@ -0,0 +1,228 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.html.microdata;
+
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * This class describes a <b>Microdata <i>itemscope</i></b>.
+ *
+ * @author Michele Mostarda (mostarda@fbk.eu)
+ */
+public class ItemScope {
+
+    /**
+     * Map of properties and multi values.
+     */
+    private Map<String, List<ItemProp>> properties = new HashMap<String, List<ItemProp>>();
+
+    /**
+     * <i>itemscope</i> references.
+     */
+    private String[] refs;
+
+    /**
+     * <i>itemscope</i> type.
+     */
+    private URL type;
+    
+    public ItemScope() {}
+
+    /**
+     * Constructor.
+     *
+     * @param itemProps list of properties bound to this <i>itemscope</i>.
+     * @param refs      list of item prop references connected to this <i>itemscope</i>. Can be <code>null<code>.
+     * @param type      <i>itemscope</i> type. Can be <code>null<code>.
+     */
+    public ItemScope(ItemProp[] itemProps, String[] refs, String type) {
+        if (itemProps == null) {
+            throw new NullPointerException("itemProps list cannot be null.");
+        }
+        if (type != null) {
+            try {
+                this.type = new URL(type);
+            } catch (MalformedURLException murle) {
+                throw new IllegalArgumentException("Invalid type '" + type + "', must be a valid URL.");
+            }
+        } else {
+            this.type = null;
+        }
+
+        this.refs = refs;
+
+        final Map<String, List<ItemProp>> tmpProperties = new HashMap<String, List<ItemProp>>();
+        for (ItemProp itemProp : itemProps) {
+            final String propName = itemProp.getName();
+            List<ItemProp> propList = tmpProperties.get(propName);
+            if (propList == null) {
+                propList = new ArrayList<ItemProp>();
+                tmpProperties.put(propName, propList);
+            }
+            propList.add(itemProp);
+        }
+        final Map<String, List<ItemProp>> properties = new HashMap<String, List<ItemProp>>();
+        for (Map.Entry<String, List<ItemProp>> propertiesEntry : tmpProperties.entrySet()) {
+            properties.put(
+                    propertiesEntry.getKey(),
+                    //Collections.unmodifiableList( propertiesEntry.getValue() )
+                    propertiesEntry.getValue()
+            );
+        }
+        // this.properties = Collections.unmodifiableMap(properties);
+        this.properties = properties;
+    }
+
+    /**
+     * @return map of declared properties, every property can have more than a value.
+     */
+    public Map<String, List<ItemProp>> getProperties() {
+        return properties;
+    }
+
+    /**
+     * @return <i>itemscope</i> list of references to <i>itemprop</i>s.
+     */
+    public String[] getRefs() {
+        return refs;
+    }
+
+    /**
+     * @return <i>itemscope</i> type.
+     */
+    public URL getType() {
+        return type;
+    }
+    
+    /**
+     *
+     */
+    public void setType(URL type) {
+        this.type = type;
+    }
+    
+    /**
+     *
+     */
+    public void setType(String type) {
+        try {
+            this.type = new URL(type);
+        } catch (MalformedURLException e) {}         
+    }
+    
+    public String toJSON() {
+        StringBuilder sb = new StringBuilder();
+        int i, j;
+        final Collection<List<ItemProp>> itemPropsList = properties.values();
+        j = 0;
+        for (List<ItemProp> itemProps : itemPropsList) {
+            i = 0;
+            for (ItemProp itemProp : itemProps) {
+                sb.append(itemProp);
+                if (i < itemProps.size() - 1) {
+                    sb.append(", ");
+                }
+                i++;
+            }
+            if (j < itemPropsList.size() - 1) {
+                sb.append(", ");
+            }
+            j++;
+        }
+        return String.format(
+                "{ " +
+                        "\"refs\" : %s, \"type\" : %s, \"properties\" : [ %s ]" +
+                        " }",
+                refs == null ? null : toJSON(refs),
+                type == null ? null : "\"" + type + "\"",
+                sb.toString()
+        );
+    }
+
+    @Override
+    public String toString() {
+        return toJSON();
+    }
+
+    @Override
+    public int hashCode() {
+            return
+                (properties == null ? 1 : properties.hashCode()) *
+                (refs == null       ? 1 : refs.hashCode()) * 3 *
+                (type == null       ? 1 : type.hashCode()) * 5;
+
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+        if (obj == null) {
+            return false;
+        }
+        if (obj == this) {
+            return true;
+        }
+        if (obj instanceof ItemScope) {
+            final ItemScope other = (ItemScope) obj;
+                return
+                        (properties == null ? other.properties == null : properties.equals(other.properties))
+                            &&
+                        (refs == null ? other.refs == null : Arrays.equals(refs, other.refs))
+                            &&
+                        (type == null ? other.type == null : type.equals(other.type));
+        }
+        return false;
+    }
+
+    public void acquireProperty(ItemProp itemProp) {
+        List<ItemProp> itemProps = properties.get(itemProp.getName());
+        if (itemProps == null) {
+            itemProps = new ArrayList<ItemProp>();
+            properties.put(itemProp.getName(), itemProps);
+        }
+        if (!itemProps.contains(itemProp)) itemProps.add(itemProp);
+    }
+
+    public void disownProperty(ItemProp itemProp) {
+        List<ItemProp> propList = properties.get(itemProp.getName());
+        if (propList != null) propList.remove(itemProp);
+    }
+
+    private String toJSON(String[] in) {
+        StringBuilder sb = new StringBuilder();
+        sb.append('[');
+        for (int i = 0; i < in.length; i++) {
+            sb.append("\"");
+            sb.append(in[i]);
+            sb.append("\"");
+            if (i < in.length - 1) {
+                sb.append(", ");
+            }
+        }
+        sb.append(']');
+        return sb.toString();
+    }
+
+}
+ 
Index: tika-parsers/src/main/java/org/apache/tika/parser/html/microdata/ItemPropValue.java
===================================================================
--- tika-parsers/src/main/java/org/apache/tika/parser/html/microdata/ItemPropValue.java	(revision 0)
+++ tika-parsers/src/main/java/org/apache/tika/parser/html/microdata/ItemPropValue.java	(revision 0)
@@ -0,0 +1,325 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.html.microdata;
+
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.List;
+import org.apache.commons.lang.time.DateUtils;
+
+/**
+ * Describes a possible value for a <b>Microdata item property</b>.
+ *
+ * @author Michele Mostarda (mostarda@fbk.eu)
+ */
+public class ItemPropValue {
+
+    /**
+     * ISO-8601 datetime format as specified by schema.org
+     * @see http://schema.org/Date
+     */
+    private static final SimpleDateFormat iso8601format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
+
+    /**
+     * ISO-8601 datetime formats
+     */
+    private static final String[] iso8601Formats = new String [] {
+        "yyyy-MM-dd",
+        "yyyy-MM-dd'T'HH:mm:ss'Z'"
+    };
+
+    /**
+     * Supported types.
+     */
+    public enum Type {
+        Plain,
+        Link,
+        Date,
+        Nested
+    }
+
+    /**
+     * Attempt to parse the specified date string
+     *
+     * @param String
+     * @return Date
+     */
+    public static Date parseDateTime(String dateStr) throws Exception {
+        // Is this a Duration?
+        if (dateStr.charAt(0) == 'p' || dateStr.charAt(0) == 'P') {
+          // Cannot deal with durations...
+          throw new Exception("Unable to handle ISO-8601 durations");
+        }
+
+        // Attempt to parse the date string as ISO-8601 as specified by schema.org
+        return DateUtils.parseDate(dateStr, iso8601Formats);
+    }
+
+    /**
+     * Format the given date object as ISO-8601 date string
+     *
+     * @param Date
+     * @return String
+     */
+    public static String formatDateTime(Date in) {
+        return iso8601format.format(in);
+    }
+
+    /**
+     * Internal content value.
+     */
+    private final Object content;
+
+    /**
+     * Content type.
+     */
+    private final Type type;
+
+    /**
+     * Constructor.
+     *
+     * @param content content object.
+     * @param type content type.
+     */
+    public ItemPropValue(Object content, Type type) {
+        if(content == null) {
+            throw new NullPointerException("content cannot be null.");
+        }
+        if(type == null) {
+            throw new NullPointerException("type cannot be null.");
+        }
+        if(type == Type.Nested && ! (content instanceof ItemScope) ) {
+            throw new IllegalArgumentException(
+                    "content must be an " + ItemScope.class + " when type is " + Type.Nested
+            );
+        }
+        if(type == Type.Date && !(content instanceof Date) ) {
+            throw new IllegalArgumentException(
+                    "content '" + content + "' must be a " + Date.class.getName() + " when type is " + Type.Date
+            );
+        }
+        if(content instanceof String && ((String) content).trim().length() == 0) {
+            throw new IllegalArgumentException("Invalid content '" + content + "'");
+        }
+
+        this.content = content;
+        this.type = type;
+    }
+
+    /**
+     * @return the content object.
+     */
+    public Object getContent() {
+        return content;
+    }
+
+    /**
+     * @return the content type.
+     */
+    public Type getType() {
+        return type;
+    }
+
+   /**
+     * @return <code>true</code> if type is plain text.
+     */
+    public boolean isPlain() {
+        return type == Type.Plain;
+    }
+
+    /**
+     * @return <code>true</code> if type is a link.
+     */
+    public boolean isLink() {
+        return type == Type.Link;
+    }
+
+    /**
+     * @return <code>true</code> if type is a date.
+     */
+    public boolean isDate() {
+        return type == Type.Date;
+    }
+
+    /**
+     * @return <code>true</code> if type is a nested {@link ItemScope}.
+     */
+    public boolean isNested() {
+        return type == Type.Nested;
+    }
+
+    /**
+     * @return <code>true</code> if type is an integer.
+     */
+    public boolean isInteger() {
+        if(type != Type.Plain) return false;
+         try {
+             Integer.parseInt((String) content);
+             return true;
+         } catch (Exception e) {
+             return false;
+         }
+     }
+
+    /**
+     * @return <code>true</code> if type is a float.
+     */
+     public boolean isFloat() {
+         if(type != Type.Plain) return false;
+         try {
+             Float.parseFloat((String) content);
+             return true;
+         } catch (Exception e) {
+             return false;
+         }
+     }
+
+    /**
+     * @return <code>true</code> if type is a number.
+     */
+     public boolean isNumber() {
+         return isInteger() || isFloat();
+     }
+
+    /**
+     * @return the content value as integer, or raises an exception.
+     * @throws NumberFormatException if the content is not an integer.
+     * @throws ClassCastException if content is not plain.
+     */
+     public int getAsInteger() {
+         return Integer.parseInt((String) content);
+     }
+
+    /**
+     * @return the content value as float, or raises an exception.
+     * @throws NumberFormatException if the content is not an float.
+     * @throws ClassCastException if content is not plain.
+     */
+     public float getAsFloat() {
+         return Float.parseFloat((String) content);
+     }
+
+
+    /**
+     * @return the content as {@link Date}
+     *         if <code>type == Type.DateTime</code>,
+     * @throws ClassCastException if content is not a valid date.
+     */
+    public Date getAsDate() {
+        return (Date) content;
+    }
+
+    /**
+     * @return the content value as URL, or raises an exception.
+     * @throws MalformedURLException if the content is not a valid URL.
+     * @throws ClassCastException if content is not a link.
+     */
+    public URL getAsLink() {
+        try {
+            return new URL((String) content);
+        } catch (MalformedURLException murle) {
+            throw new IllegalStateException("Error while parsing URI.", murle);
+        }
+    }
+
+    /**
+     * @return the content value as {@link ItemScope}.
+     * @throws ClassCastException if the content is not a valid nested item.
+     */
+    public ItemScope getAsNested() {
+        return (ItemScope) content;
+    }
+
+    public String toJSON() {
+        String contentStr;
+        if(content instanceof String) {
+            contentStr = (String)content;
+            contentStr = contentStr.replaceAll("\\\\", "\\\\\\\\\"");
+            contentStr = "\"" + escapeAsJSONString(contentStr) + "\"";
+        } else if(content instanceof Date) {
+            contentStr = "\"" + iso8601format.format((Date) content) + "\"";
+        } else {
+            contentStr = content.toString();
+        }
+
+        return String.format( "{ \"content\" : %s, \"type\" : \"%s\" }", contentStr, type );
+    }
+
+    @Override
+    public String toString() {
+        return toJSON();
+    }
+
+    @Override
+    public int hashCode() {
+        return content.hashCode() * type.hashCode() * 2;
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+        if(obj == null) {
+            return false;
+        }
+        if(obj == this) {
+            return true;
+        }
+        if(obj instanceof ItemPropValue) {
+            final ItemPropValue other = (ItemPropValue) obj;
+            return content.equals(other.content) && type.equals(other.type);
+        }
+        return false;
+    }
+
+    /**
+     * Escapes all the unescaped double quotes when needed.
+     *
+     * @param in input string.
+     * @return unescaped output.
+     */
+    public static String escapeDoubleQuotes(String in) {
+        final StringBuilder out = new StringBuilder();
+        boolean escaped = false;
+        char current;
+        for(int i = 0; i < in.length(); i++) {
+            current = in.charAt(i);
+            if(current == '\\') {
+              escaped = !escaped;
+            } else if(current == '"' && !escaped) {
+              out.append('\\');
+            }
+            out.append(current);
+        }
+        return out.toString();
+    }
+
+    /**
+     * Escapes the <code>in</code> string as <b>JSON</b> string
+     * to let it being embeddable within a string field.
+     *
+     * @param in string to be escaped.
+     * @return escaped string.
+     */
+    public static String escapeAsJSONString(String in) {
+        return escapeDoubleQuotes( in.replaceAll("\n", "\\\\n") );
+    }
+}
+
Index: tika-parsers/src/main/java/org/apache/tika/parser/html/microdata/ItemProp.java
===================================================================
--- tika-parsers/src/main/java/org/apache/tika/parser/html/microdata/ItemProp.java	(revision 0)
+++ tika-parsers/src/main/java/org/apache/tika/parser/html/microdata/ItemProp.java	(revision 0)
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.html.microdata;
+
+/**
+ * Describes a <b>Microdata item property</b>.
+ *
+ * @author Michele Mostarda (mostarda@fbk.eu)
+ */
+public class ItemProp {
+
+    /**
+     * Property name.
+     */
+    private final String name;
+
+    /**
+     * Property value.
+     */
+    private final ItemPropValue value;
+
+    /**
+     * Constructor.
+     *
+     * @param xpath item location in container document.
+     * @param name item property name.
+     * @param value item property value.
+     */
+    public ItemProp(String name, ItemPropValue value) {
+        if(name == null) {
+            throw new NullPointerException("name cannot be null.");
+        }
+        if(name.trim().length() == 0) {
+            throw new IllegalArgumentException("invalid property name '" + name + "'");
+        }
+        if(value == null) {
+            throw new NullPointerException("value cannot be null.");
+        }
+        this.name = name;
+        this.value = value;
+    }
+
+    /**
+     * @return the item property name.
+     */
+    public String getName() {
+        return name;
+    }
+
+    /**
+     * @return the item property value.
+     */
+    public ItemPropValue getValue() {
+        return value;
+    }
+
+    public String toJSON() {
+        return String.format(
+                "{ \"name\" : \"%s\", \"value\" : %s }",
+                name,
+                value.toJSON()
+        );
+    }
+
+    public String toString() {
+        return toJSON();
+    }
+
+    @Override
+    public int hashCode() {
+        return name.hashCode() * value.hashCode() * 3;
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+        if( obj == null ) {
+            return false;
+        }
+        if(obj == this) {
+            return true;
+        }
+        if(obj instanceof ItemProp) {
+            final ItemProp other = (ItemProp) obj;
+            return name.equals(other.name) && value.equals( other.value );
+        }
+        return false;
+    }
+}
+ 
Index: tika-parsers/src/main/java/org/apache/tika/parser/html/MicrodataContentHandler.java
===================================================================
--- tika-parsers/src/main/java/org/apache/tika/parser/html/MicrodataContentHandler.java	(revision 0)
+++ tika-parsers/src/main/java/org/apache/tika/parser/html/MicrodataContentHandler.java	(revision 0)
@@ -0,0 +1,456 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.html;
+
+import static org.apache.tika.sax.XHTMLContentHandler.XHTML;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.Stack;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.tika.parser.html.microdata.*;
+import org.xml.sax.Attributes;
+import org.xml.sax.helpers.DefaultHandler;
+import org.xml.sax.SAXException;
+
+/**
+ * SAX based Microdata content handler.
+ *
+ * @see <a href="https://issues.apache.org/jira/browse/TIKA-980">TIKA-980</a>
+ * @author markus@apache.org
+ */
+public class MicrodataContentHandler extends DefaultHandler {
+    private static final Log logger =
+      LogFactory.getLog(MicrodataContentHandler.class);
+
+    /** List of item scopes */
+    protected List<ItemScope> items = new ArrayList<ItemScope>();
+
+    /** Which depth is this itemScope? */
+    protected Stack<Integer> scopeDepthStack = new Stack<Integer>();
+
+    /** Temp reference to the itemScope we're currently working on */
+    protected Stack<ItemScope> currentItemScopeRef = new Stack<ItemScope>();
+
+    /** We need to keep track of our depth */
+    protected int depth = 0;
+
+    /** Whether to read contents for the itemprop from the sax characters */
+    protected Stack<Boolean> readContentStack = new Stack<Boolean>();
+
+    /** Name of the itemProp we're currently processing */
+    protected Stack<String> propNameStack = new Stack<String>();
+
+    /** Depth of the itemProp we're currently processing */
+    protected Stack<Integer> propDepthStack = new Stack<Integer>();
+
+    /** Buffer for storing nested characters as value for an itemProp */
+    protected StringBuilder contents = new StringBuilder();
+
+    /** Contains the contents we read from an attribute */
+    protected String contentFromAttribute = null;
+
+    /**
+     * List of tags providing the <code>src</code> property.
+     */
+    public static final Set<String> SRC_TAGS = Collections.unmodifiableSet(
+        new HashSet<String>( Arrays.asList("audio", "embed", "iframe", "img",
+          "source", "track", "video") )
+    );
+
+    /**
+     * List of tags providing the <code>href</code> property.
+     */
+    public static final Set<String> HREF_TAGS = Collections.unmodifiableSet(
+        new HashSet<String>( Arrays.asList("a", "area", "link") )
+    );
+
+    /**
+     * List of tags providing the <code>content</code> property.
+     */
+    public static final Set<String> CONTENT_TAGS = Collections.unmodifiableSet(
+        new HashSet<String>( Arrays.asList("meta") )
+    );
+
+    /**
+     * List of tags providing the <code>datetime</code> property.
+     */
+    public static final Set<String> DATETIME_TAGS =
+      Collections.unmodifiableSet(new HashSet<String>( Arrays.asList("time") )
+    );
+
+    /**
+     * List of tags providing the <code>value</code> property.
+     */
+    public static final Set<String> VALUE_TAGS = Collections.unmodifiableSet(
+        new HashSet<String>( Arrays.asList("data") )
+    );
+
+    /**
+     * Default constructor.
+     */
+    public MicrodataContentHandler() {
+        super();
+    }
+
+    /**
+     * Returns the list of collected items.
+     *
+     * @return collected items
+     */
+    public List<ItemScope> getItems() {
+        return items;
+    }
+
+    @Override
+    public void startElement(String uri, String local, String name,
+      Attributes attributes) throws SAXException {
+        // XHTML?
+        if (XHTML.equals(uri)) {
+            // Increase element depth
+            depth++;
+
+            // Do we have attributes?
+            if (attributes == null || attributes.getLength() == 0) {
+                // Nothing to do here
+                return;
+            }
+
+            // Get the itemProp and itemType for this element
+            String type = getAttr(attributes, "itemtype");
+            String prop = getAttr(attributes, "itemprop");
+
+            // The content reading state produced by this element
+            boolean readContents = false;
+
+            // Continue the previous reading state
+            if (readContentStack.size() > 0) {
+              readContents = readContentStack.peek();
+            }
+
+            // Do we actually have one?
+            if (prop != null) {
+                // We must be in an itemScope
+                if (currentItemScopeRef.size() == 0) {
+                  logger.warn("ItemProp " + local + "." + prop +
+                    " not in itemscope");
+                  return;
+                }
+
+                // Keep track of the current depth
+                propDepthStack.push(depth);
+
+                // Remember the current item prop name
+                propNameStack.push(prop);
+
+                // We'll attempt to read the contents for this itemprop
+                // from its attributes
+                // http://www.w3.org/TR/microdata/#values
+                String content = null;
+
+                if (SRC_TAGS.contains(local)) {
+                    // Attempt to read the src attribute for this itemprop
+                    content = getAttr(attributes, "src");
+
+                } else if (HREF_TAGS.contains(local)) {
+                    // Attempt to read the href attribute for this itemprop
+                    content = getAttr(attributes, "href");
+
+                } else if (CONTENT_TAGS.contains(local)) {
+                    // Attempt to read the content attribute for this itemprop
+                    content = getAttr(attributes, "content");
+
+                } else if (DATETIME_TAGS.contains(local)) {
+                    // Attempt to read the datetime attribute for this itemprop
+                    content = getAttr(attributes, "datetime");
+
+                } else if (VALUE_TAGS.contains(local)) {
+                    // Attempt to read the datetime attribute for this itemprop
+                    content = getAttr(attributes, "value");
+
+                } else {
+                    // Attempt to read the contents attribute for this itemprop
+                    // (this is non-standard)
+                    content = getAttr(attributes, "content");
+                }
+
+                // Did we get contents from an attribute?
+                if (content != null) {
+                    // Store the content read from the attribute
+                    contentFromAttribute = content;
+                }
+
+                // We're reading contents from the following textElement(s)
+                readContents = true;
+            }
+
+            // Are we in an item scope?
+            if (isItemScope(attributes)) {
+                // Create a new ItemScope
+                ItemScope itemScope = new ItemScope();
+
+                // Are we nested and in an item property and do we have a
+                // property at all?
+                if (scopeDepthStack.size() != 0 && prop != null) {
+                    // Create a new ItemPropValue
+                    ItemPropValue itemPropValue = new ItemPropValue(itemScope,
+                      ItemPropValue.Type.Nested);
+
+                    // Create a new ItemProp
+                    ItemProp itemProp = new ItemProp(prop, itemPropValue);
+
+                    // Add this itemScope as value of this property
+                    currentItemScopeRef.peek().acquireProperty(itemProp);
+                } else {
+                    // Add scope to lowest level
+                    items.add(itemScope);
+                }
+
+                // Add current depth to the stack
+                scopeDepthStack.push(depth);
+
+                // Add this item scope to the current on the stack
+                currentItemScopeRef.push(itemScope);
+
+                // Disable reading of contents
+                readContents = false;
+            }
+
+            // Do we have a type here?
+            if (type != null) {
+                // Set the current itemScope's type
+                currentItemScopeRef.peek().setType(type);
+
+                // Disable reading of contents
+                readContents = false;
+            }
+
+            // Push the final reading state from this element if it changed
+            if (readContentStack.size() == 0 || readContentStack.peek() !=
+              readContents || contentFromAttribute != null) {
+                readContentStack.push(readContents);
+            }
+        }
+    }
+
+    @Override
+    public void characters(char[] chars, int offset, int length)
+      throws SAXException {
+        // Do we have to read contents?
+        if (readContentStack.size() > 0 && readContentStack.peek() == true) {
+            // Copy the proper character range
+            char[] characters = new char[length];
+            System.arraycopy(chars, offset, characters, 0, length);
+
+            // Add the contents
+            contents.append(characters);
+        }
+    }
+
+    public void characters(String characters) throws SAXException {
+        if (characters != null && characters.length() > 0) {
+            characters(characters.toCharArray(), 0, characters.length());
+        }
+    }
+
+    @Override
+    public void ignorableWhitespace(char[] chars, int offset, int length)
+      throws SAXException {
+        characters(chars, offset, length);
+    }
+
+    @Override
+    public void endElement(String uri, String local, String name)
+      throws SAXException {
+        // XHTML?
+        if (XHTML.equals(uri)) {
+            // Do we have item scopes at all and are we leaving an item scope?
+            if (scopeDepthStack.size() > 0 &&
+              scopeDepthStack.peek() == depth) {
+                // Yes! Get rid of it's depth info
+                scopeDepthStack.pop();
+
+                // And remove it from the temp stack
+                currentItemScopeRef.pop();
+            }
+
+            // Are we back at the depth of the current itemprop?
+            if (propDepthStack.size() > 0 && depth == propDepthStack.peek()) {
+                // We're leaving this propery
+                propDepthStack.pop();
+
+                // This is going to be our property's value
+                String contentStr;
+
+                // Prefer content read from the attribute
+                if (contentFromAttribute != null) {
+                  contentStr = contentFromAttribute;
+                } else {
+                  contentStr = contents.toString();
+                }
+
+                // Clean it up!
+                contentStr = contentStr.replaceAll("\\s+", " ").trim();
+
+                // Get the current prop name and leave it
+                String currentPropName = propNameStack.pop();
+
+                // Create an item with attribute's value or content read from
+                // text elements
+                if (contentFromAttribute != null || readContentStack.size() > 0
+                  && readContentStack.peek() == true) {
+                    // Do we have any contents?
+                    if (contentStr.length() > 0) {
+                        try {
+                            // Create a new ItemPropValue
+                            ItemPropValue itemPropValue =
+                              getItemPropValue(contentStr, local);
+
+                            // Create a new ItemProp
+                            ItemProp itemProp =
+                              new ItemProp(currentPropName, itemPropValue);
+
+                            // Add this itemScope as value of this property
+                            currentItemScopeRef.peek().acquireProperty(itemProp);
+                        } catch (Exception e) {
+                            logger.warn("Cannot read value for ItemProp " +
+                              local + "." + currentPropName + " " +
+                              e.toString());
+                        }
+                    } else {
+                        // Should we send a warning for this?
+                        logger.warn("ItemProp " + currentPropName +
+                          " has no contents");
+                    }
+                }
+
+                // Leave the current content reading state
+                if (readContentStack.size() > 0) {
+                  readContentStack.pop();
+                }
+
+                // Clear the buffer only if we've not read from an attribute
+                if (contentFromAttribute != null) {
+                  contentFromAttribute = null;
+                } else {
+                  contents.setLength(0);
+                }
+            }
+
+            // Clear the buffer in case we're not reading anyway
+            if (readContentStack.size() == 0 ||
+              readContentStack.peek() == false) {
+                contents.setLength(0);
+            }
+
+            // Decrease element depth
+            depth--;
+        }
+    }
+
+    /**
+     * Return an ItemPropValue for the specified content and element
+     *
+     * @param String
+     * @param String
+     * @return ItemPropValue
+     */
+    protected ItemPropValue getItemPropValue(String content, String element) {
+        // Get the itemProp type for this element
+        ItemPropValue.Type itemPropType = getItemPropType(element);
+
+        // Is this a date?
+        if (itemPropType == ItemPropValue.Type.Date) {
+            // Return with content as Date
+            try {
+                return new ItemPropValue(ItemPropValue.parseDateTime(content),
+                  itemPropType);
+            } catch (Exception e) { }
+        }
+
+        // Return with content as String
+        return new ItemPropValue(content, itemPropType);
+    }
+
+    /**
+     * Returns the ItemPropValue type for the specified element.
+     *
+     * @param String element
+     * @return ItemPropValue.Type
+     */
+    protected ItemPropValue.Type getItemPropType(String element) {
+        if ("meta".equals(element)) {
+            return ItemPropValue.Type.Plain;
+        }
+
+        if (SRC_TAGS.contains(element)) {
+            return ItemPropValue.Type.Link;
+        }
+
+        if (HREF_TAGS.contains(element)) {
+            return ItemPropValue.Type.Link;
+        }
+
+        if ("object".equals(element) ) {
+            return ItemPropValue.Type.Link;
+        }
+
+        if ("time".equals(element) ) {
+            return ItemPropValue.Type.Date;
+        }
+
+        return ItemPropValue.Type.Plain;
+    }
+
+    /**
+     * Return the value for the itemscope attribute.
+     *
+     * @param Attributes attributes
+     * @return boolean
+     */
+    protected boolean isItemScope(Attributes attributes) {
+        return getAttr(attributes, "itemscope") != null ? true : false;
+    }
+
+    /**
+     * Return the value for the specified attribute name.
+     *
+     * @param Attributes attributes
+     * @param String name
+     * @return String
+     */
+    protected String getAttr(Attributes attributes, String name) {
+        for (int i = 0; i < attributes.getLength(); i++) {
+            if (attributes.getLocalName(i).equals(name)) {
+                return attributes.getValue(i);
+            }
+        }
+
+        return null;
+    }
+
+}
