Index: tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java
===================================================================
--- tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java	(revision 940756)
+++ tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java	(working copy)
@@ -194,7 +194,7 @@
                     @Override
                     public void startElement(
                             String u, String l, String name, Attributes atts) {
-                        if (atts.getValue("", "href") != null) {
+                        if (name.equals("a") && atts.getValue("", "href") != null) {
                             links.add(atts.getValue("", "href"));
                         }
                     }
Index: tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlParser.java
===================================================================
--- tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlParser.java	(revision 940756)
+++ tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlParser.java	(working copy)
@@ -234,6 +234,14 @@
     }
 
     /**
+    * @deprecated Use the {@link HtmlMapper} mechanism to customize
+    *             the HTML mapping. This method will be removed in Tika 1.0.
+    **/
+    public String mapSafeAttribute(String elementName, String attributeName) {
+        return DefaultHtmlMapper.INSTANCE.mapSafeAttribute(elementName,attributeName) ;
+    }    
+    
+    /**
      * Adapter class that maintains backwards compatibility with the
      * protected HtmlParser methods. Making HtmlParser implement HtmlMapper
      * directly would require those methods to be public, which would break
@@ -249,6 +257,9 @@
         public boolean isDiscardElement(String name) {
             return HtmlParser.this.isDiscardElement(name);
         }
+        public String mapSafeAttribute(String elementName, String attributeName){
+            return HtmlParser.this.mapSafeAttribute(elementName,attributeName);
+        }
     }
 
 }
Index: tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlHandler.java
===================================================================
--- tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlHandler.java	(revision 940756)
+++ tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlHandler.java	(working copy)
@@ -25,6 +25,7 @@
 import org.xml.sax.Attributes;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
+import org.xml.sax.helpers.AttributesImpl;
 
 class HtmlHandler extends TextContentHandler {
 
@@ -89,23 +90,40 @@
                     metadata.set(
                             atts.getValue("http-equiv"),
                             atts.getValue("content"));
+                    xhtml.startElement(uri, local, "meta", atts);
                 }
                 if (atts.getValue("name") != null) {
                     metadata.set(
                             atts.getValue("name"),
                             atts.getValue("content"));
+                    xhtml.startElement(uri, local, "meta", atts);
                 }
             } else if ("BASE".equals(name) && atts.getValue("href") != null) {
                 metadata.set(
                         Metadata.CONTENT_LOCATION,
                         resolve(atts.getValue("href").trim()));
+                xhtml.startElement(uri, local, "base", atts);
+            } else if ("LINK".equals(name) && atts.getValue("href") != null) {
+                xhtml.startElement(uri, local, "link", atts);
             }
         }
 
         if (bodyLevel > 0 && discardLevel == 0) {
             String safe = mapper.mapSafeElement(name);
             if (safe != null) {
-                xhtml.startElement(safe);
+                // check if there are any attributes to process
+                if (atts.getLength()==0) xhtml.startElement(safe);
+                else {
+                    AttributesImpl newAttributes = new AttributesImpl(atts);
+                    for (int att=0;att<newAttributes.getLength();att++){
+                        String normAttrName = mapper.mapSafeAttribute(safe, newAttributes.getLocalName(att));
+                        if (normAttrName==null){
+                            newAttributes.removeAttribute(att);
+                            att--;
+                        }
+                    }
+                    xhtml.startElement(safe, newAttributes);
+                }
             } else if ("A".equals(name)) {
                 String href = atts.getValue("href");
                 if (href != null) {
@@ -127,6 +145,15 @@
     @Override
     public void endElement(
             String uri, String local, String name) throws SAXException {
+        if (bodyLevel == 0 && discardLevel == 0) {
+            if ("LINK".equals(name)) {
+                xhtml.endElement("link");
+            } else if ("BASE".equals(name)) {
+                xhtml.endElement("base");
+            } else if ("META".equals(name)) {
+                xhtml.endElement("meta");
+            }
+        }
         if (bodyLevel > 0 && discardLevel == 0) {
             String safe = mapper.mapSafeElement(name);
             if (safe != null) {
Index: tika-parsers/src/main/java/org/apache/tika/parser/html/IdentityHtmlMapper.java
===================================================================
--- tika-parsers/src/main/java/org/apache/tika/parser/html/IdentityHtmlMapper.java	(revision 940756)
+++ tika-parsers/src/main/java/org/apache/tika/parser/html/IdentityHtmlMapper.java	(working copy)
@@ -17,9 +17,9 @@
 package org.apache.tika.parser.html;
 
 /**
- * Alternative HTML mapping rules that pass the input HTML
- * as-is without any modifications.
- *
+ * Alternative HTML mapping rules that pass the input HTML as-is without any
+ * modifications.
+ * 
  * @since Apache Tika 0.8
  */
 public class IdentityHtmlMapper implements HtmlMapper {
@@ -30,6 +30,10 @@
         return false;
     }
 
+    public String mapSafeAttribute(String elementName, String attributeName) {
+        return attributeName.toLowerCase();
+    }
+
     public String mapSafeElement(String name) {
         return name;
     }
Index: tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlMapper.java
===================================================================
--- tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlMapper.java	(revision 940756)
+++ tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlMapper.java	(working copy)
@@ -50,5 +50,20 @@
      *         should be ignored, <code>false</code> otherwise
      */
     boolean isDiscardElement(String name);
+    
+    
+    /**
+     * Maps "safe" HTML attribute names to semantic XHTML equivalents. If the
+     * given attribute is unknown or deemed unsafe for inclusion in the parse
+     * output, then this method returns <code>null</code> and the attribute
+     * will be ignored. This method assumes that the element name 
+     * is valid and normalised.
+     *
+     * @param elementName HTML element name (lower case)
+     * @param attributeName HTML attribute name (lower case)
+     * @return XHTML attribute name (lower case), or
+     *         <code>null</code> if the element is unsafe 
+     */
+    String mapSafeAttribute(String elementName, String attributeName);
 
 }
Index: tika-parsers/src/main/java/org/apache/tika/parser/html/DefaultHtmlMapper.java
===================================================================
--- tika-parsers/src/main/java/org/apache/tika/parser/html/DefaultHtmlMapper.java	(revision 940756)
+++ tika-parsers/src/main/java/org/apache/tika/parser/html/DefaultHtmlMapper.java	(working copy)
@@ -62,6 +62,12 @@
         return null;
     }
 
+    /** Normalises an attribute name. Assumes that the element name 
+     * is valid and normalised **/
+    public String mapSafeAttribute(String elementName, String attributeName) {
+        return null;
+    }    
+    
     public boolean isDiscardElement(String name) {
         return "STYLE".equals(name) || "SCRIPT".equals(name);
     }
Index: tika-core/src/main/java/org/apache/tika/sax/XHTMLContentHandler.java
===================================================================
--- tika-core/src/main/java/org/apache/tika/sax/XHTMLContentHandler.java	(revision 940756)
+++ tika-core/src/main/java/org/apache/tika/sax/XHTMLContentHandler.java	(working copy)
@@ -188,6 +188,11 @@
         startElement(XHTML, name, name, attributes);
     }
 
+    public void startElement(String name, AttributesImpl attributes)
+            throws SAXException {
+        startElement(XHTML, name, name, attributes);
+    }
+
     public void endElement(String name) throws SAXException {
         endElement(XHTML, name, name);
     }
