Index: CHANGES.txt
===================================================================
--- CHANGES.txt	(revision 1185749)
+++ CHANGES.txt	(working copy)
@@ -17,6 +17,8 @@
 
  * TIKA-753: Improve performance when extracting embedded office docs.
 
+ * TIKA-738: Optionally extract text from PDF annotations.
+
 Release 0.10 - 09/25/2011
 
 The most notable changes in Tika 0.10 over previous releases are:
Index: tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
===================================================================
--- tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java	(revision 1185749)
+++ tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java	(working copy)
@@ -218,8 +218,7 @@
         //assertContains("\uD800\uDF32\uD800\uDF3f\uD800\uDF44\uD800\uDF39\uD800\uDF43\uD800\uDF3A", content);
     }
 
-    // TIKA-738: re-enable this
-    public void IGNOREtestAnnotations() throws Exception {
+    public void testAnnotations() throws Exception {
         Parser parser = new AutoDetectParser(); // Should auto-detect!
         ContentHandler handler = new BodyContentHandler();
         Metadata metadata = new Metadata();
@@ -234,6 +233,23 @@
         content = content.replaceAll("[\\s\u00a0]+"," ");
         assertContains("Here is some text", content);
         assertContains("Here is a comment", content);
+
+        // Test w/ annotation text disabled:
+        PDFParser pdfParser = new PDFParser();
+        pdfParser.setExtractAnnotationText(false);
+        handler = new BodyContentHandler();
+        metadata = new Metadata();
+        context = new ParseContext();
+        stream = getResourceAsStream("/test-documents/testAnnotations.pdf");
+        try {
+            pdfParser.parse(stream, handler, metadata, context);
+        } finally {
+            stream.close();
+        }
+        content = handler.toString();
+        content = content.replaceAll("[\\s\u00a0]+"," ");
+        assertContains("Here is some text", content);
+        assertEquals(-1, content.indexOf("Here is a comment"));
     }
 
     public void testPageNumber() throws Exception {
Index: tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
===================================================================
--- tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java	(revision 1185749)
+++ tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java	(working copy)
@@ -61,6 +61,8 @@
      */
     public static final String PASSWORD = "org.apache.tika.parser.pdf.password";
 
+    private boolean extractAnnotationText = true;
+
     private static final Set<MediaType> SUPPORTED_TYPES =
         Collections.singleton(MediaType.application("pdf"));
 
@@ -88,7 +90,7 @@
             }
             metadata.set(Metadata.CONTENT_TYPE, "application/pdf");
             extractMetadata(pdfDocument, metadata);
-            PDF2XHTML.process(pdfDocument, handler, metadata);
+            PDF2XHTML.process(pdfDocument, handler, metadata, extractAnnotationText);
         } finally {
             pdfDocument.close();
         }
@@ -165,4 +167,19 @@
             addMetadata(metadata, name, value.toString());
         }
     }
+
+    /**
+     * If true (the default), text in annotations will be
+     * extracted.
+     */
+    public void setExtractAnnotationText(boolean v) {
+        extractAnnotationText = v;
+    }
+
+    /**
+     * If true, text in annotations will be extracted.
+     */
+    public boolean getExtractAnnotationText() {
+        return extractAnnotationText;
+    }
 }
Index: tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
===================================================================
--- tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java	(revision 1185749)
+++ tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java	(working copy)
@@ -22,6 +22,8 @@
 import org.apache.pdfbox.pdmodel.PDDocument;
 import org.apache.pdfbox.pdmodel.PDPage;
 import org.apache.pdfbox.util.PDFTextStripper;
+import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
+import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationMarkup;
 import org.apache.pdfbox.util.TextPosition;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.io.IOExceptionWithCause;
@@ -51,12 +53,12 @@
      * @throws TikaException if the PDF document can not be processed
      */
     public static void process(
-            PDDocument document, ContentHandler handler, Metadata metadata)
+            PDDocument document, ContentHandler handler, Metadata metadata, boolean extractAnnotationText)
             throws SAXException, TikaException {
         try {
             // Extract text using a dummy Writer as we override the
             // key methods to output to the given content handler.
-            new PDF2XHTML(handler, metadata).writeText(document, new Writer() {
+            new PDF2XHTML(handler, metadata, extractAnnotationText).writeText(document, new Writer() {
                 @Override
                 public void write(char[] cbuf, int off, int len) {
                 }
@@ -77,10 +79,12 @@
     }
 
     private final XHTMLContentHandler handler;
+    private final boolean extractAnnotationText;
 
-    private PDF2XHTML(ContentHandler handler, Metadata metadata)
+    private PDF2XHTML(ContentHandler handler, Metadata metadata, boolean extractAnnotationText)
             throws IOException {
         this.handler = new XHTMLContentHandler(handler, metadata);
+        this.extractAnnotationText = extractAnnotationText;
         setForceParsing(true);
         setSortByPosition(false);
     }
@@ -115,8 +119,51 @@
 
     @Override
     protected void endPage(PDPage page) throws IOException {
+
         try {
+            // TODO: remove once PDFBOX-1143 is fixed:
             handler.endElement("p");
+            if (extractAnnotationText) {
+                boolean foundTextAnnots = false;
+                for(Object o : page.getAnnotations()) {
+                    if ((o instanceof PDAnnotation) && PDAnnotationMarkup.SUB_TYPE_FREETEXT.equals(((PDAnnotation) o).getSubtype())) {
+                        // It's a text annotation:
+                        PDAnnotationMarkup annot = (PDAnnotationMarkup) o;
+                        String title = annot.getTitlePopup();
+                        String subject = annot.getTitlePopup();
+                        String contents = annot.getContents();
+                        // TODO: maybe also annot.getRichContents()?
+                        if (title != null || subject != null || contents != null) {
+                            if (!foundTextAnnots) {
+                                handler.endElement("p");
+                                foundTextAnnots = true;
+                            }
+
+                            handler.startElement("div", "class", "annotation");
+
+                            if (title != null) {
+                                handler.startElement("div", "class", "annotationTitle");
+                                handler.characters(title);
+                                handler.endElement("div");
+                            }
+
+                            if (subject != null) {
+                                handler.startElement("div", "class", "annotationSubject");
+                                handler.characters(subject);
+                                handler.endElement("div");
+                            }
+
+                            if (contents != null) {
+                                handler.startElement("div", "class", "annotationContents");
+                                handler.characters(contents);
+                                handler.endElement("div");
+                            }
+
+                            handler.endElement("div");
+                        }
+                    }
+                }
+            }
             handler.endElement("div");
         } catch (SAXException e) {
             throw new IOExceptionWithCause("Unable to end a page", e);
