Index: tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
===================================================================
--- tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java	(revision 1206429)
+++ tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java	(working copy)
@@ -250,8 +250,28 @@
         content = content.replaceAll("[\\s\u00a0]+"," ");
         assertContains("Here is some text", content);
         assertEquals(-1, content.indexOf("Here is a comment"));
+
+        // TIKA-738: make sure no extra </p> tags
+        String xml = getXML("testAnnotations.pdf").xml;
+        assertEquals(substringCount("<p>", xml),
+                     substringCount("</p>", xml));
     }
 
+    private static int substringCount(String needle, String haystack) {
+        int upto = -1;
+        int count = 0;
+        while(true) {
+            final int next = haystack.indexOf(needle, upto);
+            if (next == -1) {
+                break;
+            }
+            count++;
+            upto = next+1;
+        }
+
+        return count;
+    }
+
     public void testPageNumber() throws Exception {
         final XMLResult result = getXML("testPageNumber.pdf");
         final String content = result.xml.replaceAll("\\s+","");
Index: tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
===================================================================
--- tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java	(revision 1206429)
+++ tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java	(working copy)
@@ -126,20 +126,19 @@
     protected void startPage(PDPage page) throws IOException {
         try {
             handler.startElement("div", "class", "page");
-            handler.startElement("p");
         } catch (SAXException e) {
             throw new IOExceptionWithCause("Unable to start a page", e);
         }
+        writeParagraphStart();
     }
 
     @Override
     protected void endPage(PDPage page) throws IOException {
 
         try {
+            writeParagraphEnd();
             // TODO: remove once PDFBOX-1143 is fixed:
-            handler.endElement("p");
             if (extractAnnotationText) {
-                boolean foundTextAnnots = false;
                 for(Object o : page.getAnnotations()) {
                     if ((o instanceof PDAnnotation) && PDAnnotationMarkup.SUB_TYPE_FREETEXT.equals(((PDAnnotation) o).getSubtype())) {
                         // It's a text annotation:
@@ -149,11 +148,6 @@
                         String contents = annot.getContents();
                         // TODO: maybe also annot.getRichContents()?
                         if (title != null || subject != null || contents != null) {
-                            if (!foundTextAnnots) {
-                                handler.endElement("p");
-                                foundTextAnnots = true;
-                            }
-
                             handler.startElement("div", "class", "annotation");
 
                             if (title != null) {
Index: CHANGES.txt
===================================================================
--- CHANGES.txt	(revision 1206429)
+++ CHANGES.txt	(working copy)
@@ -13,7 +13,8 @@
    non-duplicated characters were incorrectly removed (TIKA-767).
    Allow controlling whether text tokens should be sorted by their x/y
    position before extracting text (TIKA-612); this is necessary for
-   certain PDFs.
+   certain PDFs.  Fixed cases where too many </p> tags appear in the
+   XHTML output, causing NPE for some PDFs (TIKA-738, TIKA-778).
 
  * RTF: Fixed case where a font change would result in processing
    bytes in the wrong font's charset, producing bogus text output