From 1a4bcc6aca9f3a8ec3afab53aab1f9ef29716806 Mon Sep 17 00:00:00 2001
From: Jukka Zitting <jukka@apache.org>
Date: Sat, 20 Aug 2011 20:22:22 +0200
Subject: [PATCH 2/2] TIKA-692: TikaCLI -x or -h on a Word doc sometimes adds newline after </b> tag

Automatically pretty-print the <head> section generated by the XHTMLContentHandler
---
 .../org/apache/tika/sax/XHTMLContentHandler.java   |    7 ++++++-
 1 files changed, 6 insertions(+), 1 deletions(-)

diff --git a/tika-core/src/main/java/org/apache/tika/sax/XHTMLContentHandler.java b/tika-core/src/main/java/org/apache/tika/sax/XHTMLContentHandler.java
index 656cd90..f0ddfda 100644
--- a/tika-core/src/main/java/org/apache/tika/sax/XHTMLContentHandler.java
+++ b/tika-core/src/main/java/org/apache/tika/sax/XHTMLContentHandler.java
@@ -127,8 +127,11 @@ public class XHTMLContentHandler extends SafeContentHandler {
             
             // Call directly, so we don't go through our startElement(), which will
             // ignore these elements.
+            newline();
             super.startElement(XHTML, "html", "html", EMPTY_ATTRIBUTES);
+            newline();
             super.startElement(XHTML, "head", "head", EMPTY_ATTRIBUTES);
+            newline();
         }
     }
 
@@ -165,6 +168,7 @@ public class XHTMLContentHandler extends SafeContentHandler {
                         attributes.addAttribute("", "content", "content", "CDATA", value);
                         super.startElement(XHTML, "meta", "meta", attributes);
                         super.endElement(XHTML, "meta", "meta");
+                        newline();
                     }
                 }
             }
@@ -175,10 +179,11 @@ public class XHTMLContentHandler extends SafeContentHandler {
                 char[] titleChars = title.toCharArray();
                 super.characters(titleChars, 0, titleChars.length);
             }
-            
             super.endElement(XHTML, "title", "title");
+            newline();
             
             super.endElement(XHTML, "head", "head");
+            newline();
             
             if (useFrameset) {
                 super.startElement(XHTML, "frameset", "frameset", EMPTY_ATTRIBUTES);
-- 
1.7.4

