Index: tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
===================================================================
--- tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java	(revision 1202206)
+++ tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java	(working copy)
@@ -323,6 +323,39 @@
         assertContains("Text the first timesecond time", content);
     }
 
+    public void testSortByPosition() throws Exception {
+        PDFParser parser = new PDFParser();
+        parser.setEnableAutoSpace(false);
+        ContentHandler handler = new BodyContentHandler();
+        Metadata metadata = new Metadata();
+        ParseContext context = new ParseContext();
+        InputStream stream = getResourceAsStream("/test-documents/testPDFTwoTextBoxes.pdf");
+        // Default is false (do not sort):
+        try {
+            parser.parse(stream, handler, metadata, context);
+        } finally {
+            stream.close();
+        }
+        String content = handler.toString();
+        content = content.replaceAll("\\s+", " ");
+        assertContains("Left column line 1 Left column line 2 Right column line 1 Right column line 2", content);
+
+        parser.setSortByPosition(true);
+        handler = new BodyContentHandler();
+        metadata = new Metadata();
+        context = new ParseContext();
+        stream = getResourceAsStream("/test-documents/testPDFTwoTextBoxes.pdf");
+        try {
+            parser.parse(stream, handler, metadata, context);
+        } finally {
+            stream.close();
+        }
+        content = handler.toString();
+        content = content.replaceAll("\\s+", " ");
+        // Column text is now interleaved:
+        assertContains("Left column line 1 Right column line 1 Left colu mn line 2 Right column line 2", content);
+    }
+
     private static class XMLResult {
         public final String xml;
         public final Metadata metadata;
Index: tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
===================================================================
--- tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java	(revision 1202206)
+++ tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java	(working copy)
@@ -60,6 +60,14 @@
     // True if we let PDFBox remove duplicate overlapping text:
     private boolean suppressDuplicateOverlappingText;
 
+    // True if we extract annotation text ourselves
+    // (workaround for PDFBOX-1143):
+    private boolean extractAnnotationText = true;
+
+    // True if we should sort text tokens by position
+    // (necessary for some PDFs, but messes up other PDFs):
+    private boolean sortByPosition = false;
+
     /**
      * Metadata key for giving the document password to the parser.
      *
@@ -67,8 +75,6 @@
      */
     public static final String PASSWORD = "org.apache.tika.parser.pdf.password";
 
-    private boolean extractAnnotationText = true;
-
     private static final Set<MediaType> SUPPORTED_TYPES =
         Collections.singleton(MediaType.application("pdf"));
 
@@ -96,7 +102,9 @@
             }
             metadata.set(Metadata.CONTENT_TYPE, "application/pdf");
             extractMetadata(pdfDocument, metadata);
-            PDF2XHTML.process(pdfDocument, handler, metadata, extractAnnotationText, enableAutoSpace, suppressDuplicateOverlappingText);
+            PDF2XHTML.process(pdfDocument, handler, metadata,
+                              extractAnnotationText, enableAutoSpace,
+                              suppressDuplicateOverlappingText, sortByPosition);
         } finally {
             pdfDocument.close();
         }
@@ -222,4 +230,21 @@
         return suppressDuplicateOverlappingText;
     }
 
+    /**
+     *  If true, sort text tokens by their x/y position
+     *  before extracting text.  This may be necessary for
+     *  some PDFs (if the text tokens are not rendered "in
+     *  order"), while for other PDFs it can produce the
+     *  wrong result (for example if there are 2 columns,
+     *  the text will be interleaved).  Default is false.
+     */
+    public void setSortByPosition(boolean v) {
+        sortByPosition = v;
+    }
+
+    /** @see #setSortByPosition. */
+    public boolean getSortByPosition() {
+        return sortByPosition;
+    }
+
 }
Index: tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
===================================================================
--- tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java	(revision 1202206)
+++ tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java	(working copy)
@@ -55,14 +55,14 @@
     public static void process(
             PDDocument document, ContentHandler handler, Metadata metadata,
             boolean extractAnnotationText, boolean enableAutoSpace,
-            boolean suppressDuplicateOverlappingText)
+            boolean suppressDuplicateOverlappingText, boolean sortByPosition)
             throws SAXException, TikaException {
         try {
             // Extract text using a dummy Writer as we override the
             // key methods to output to the given content handler.
             new PDF2XHTML(handler, metadata,
                           extractAnnotationText, enableAutoSpace,
-                          suppressDuplicateOverlappingText).writeText(document, new Writer() {
+                          suppressDuplicateOverlappingText, sortByPosition).writeText(document, new Writer() {
                 @Override
                 public void write(char[] cbuf, int off, int len) {
                 }
@@ -87,12 +87,12 @@
 
     private PDF2XHTML(ContentHandler handler, Metadata metadata,
                       boolean extractAnnotationText, boolean enableAutoSpace,
-                      boolean suppressDuplicateOverlappingText)
+                      boolean suppressDuplicateOverlappingText, boolean sortByPosition)
             throws IOException {
         this.handler = new XHTMLContentHandler(handler, metadata);
         this.extractAnnotationText = extractAnnotationText;
         setForceParsing(true);
-        setSortByPosition(false);
+        setSortByPosition(sortByPosition);
         if (enableAutoSpace) {
             setWordSeparator(" ");
         } else {
Index: CHANGES.txt
===================================================================
--- CHANGES.txt	(revision 1202206)
+++ CHANGES.txt	(working copy)
@@ -10,7 +10,10 @@
  * PDF: Allow controlling whether overlapping duplicated text should
    be removed.  Disabling this (the default) can give big
    speedups to text extraction and may workaround cases where
-   non-duplicated characters were incorrectly removed.  (TIKA-767)
+   non-duplicated characters were incorrectly removed (TIKA-767).
+   Allow controlling whether text tokens should be sorted by their x/y
+   position before extracting text (TIKA-612); this is necessary for
+   certain PDFs.
 
  * RTF: Fixed case where a font change would result in processing
    bytes in the wrong font's charset, producing bogus text output