Index: pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripper.java
===================================================================
--- pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripper.java	(Revision 1436253)
+++ pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripper.java	(Arbeitskopie)
@@ -861,6 +861,19 @@
     }
 
     /**
+     * Write a Java string to the output stream. The default implementation will ignore the <code>textPositions</code>
+     * and just calls {@link #writeString(String)}.
+     *
+     * @param text The text to write to the stream.
+     * @param textPositions The TextPositions belonging to the text.
+     * @throws IOException If there is an error when writing the text.
+     */
+    protected void writeString(String text, List<TextPosition> textPositions) throws IOException
+    {
+        writeString(text);
+    }
+
+    /**
      * Write a Java string to the output stream.
      *
      * @param text The text to write to the stream.
@@ -1887,12 +1900,13 @@
      * @param isRtlDominant determines if rtl or ltl is dominant
      * @throws IOException if something went wrong
      */
-    private void writeLine(List<String> line, boolean isRtlDominant)throws IOException
+    private void writeLine(List<WordWithTextPositions> line, boolean isRtlDominant) throws IOException
     {
         int numberOfStrings = line.size();
         for(int i=0; i<numberOfStrings; i++)
         {
-            writeString(line.get(i));
+            WordWithTextPositions word = line.get(i);
+            writeString(word.getText(), word.getTextPositions());
             if (i < numberOfStrings-1)
             {
                 writeWordSeparator();
@@ -1907,55 +1921,68 @@
      * @param hasRtl determines if lines contains rtl formatted text(parts)
      * @return a list of strings, one string for every word
      */
-    private List<String> normalize(List<TextPosition> line, boolean isRtlDominant, boolean hasRtl)
+    private List<WordWithTextPositions> normalize(List<TextPosition> line, boolean isRtlDominant, boolean hasRtl)
     {
-        LinkedList<String> normalized = new LinkedList<String>();
+        LinkedList<WordWithTextPositions> normalized = new LinkedList<WordWithTextPositions>();
         StringBuilder lineBuilder = new StringBuilder();
+        List<TextPosition> wordPositions = new ArrayList<TextPosition>();
+
         // concatenate the pieces of text in opposite order if RTL is dominant
         if (isRtlDominant)
         {
             int numberOfPositions = line.size();
             for(int i = numberOfPositions-1;i>=0;i--)
             {
-                TextPosition text = line.get(i);
-                if (text instanceof WordSeparator) 
-                {
-                    normalized.add(normalize.normalizePres(lineBuilder.toString()));
-                    lineBuilder = new StringBuilder();
-                }
-                else 
-                {
-                    lineBuilder.append(text.getCharacter());
-                }
+                lineBuilder = normalizeAdd(normalized, lineBuilder, wordPositions, line.get(i));
             }
-            if (lineBuilder.length() > 0) 
-            {
-                normalized.add(normalize.normalizePres(lineBuilder.toString()));
-            }
         }
         else
         {
             for(TextPosition text : line)
             {
-                if (text instanceof WordSeparator) 
-                {
-                    normalized.add(normalize.normalizePres(lineBuilder.toString()));
-                    lineBuilder = new StringBuilder();
-                }
-                else 
-                {
-                    lineBuilder.append(text.getCharacter());
-                }
+                lineBuilder = normalizeAdd(normalized, lineBuilder, wordPositions, text);
             }
-            if (lineBuilder.length() > 0) 
-            {
-                normalized.add(normalize.normalizePres(lineBuilder.toString()));
-            }
         }
+
+        if (lineBuilder.length() > 0) 
+        {
+            normalized.add(createWord(lineBuilder.toString(), wordPositions));
+        }
+
         return normalized;
     }
 
     /**
+     * Used within {@link #normalize(List, boolean, boolean)} to create a single {@link WordWithTextPositions}
+     * entry.
+     */
+    private WordWithTextPositions createWord(String word, List<TextPosition> wordPositions)
+    {
+        return new WordWithTextPositions(normalize.normalizePres(word), wordPositions);
+    }
+
+    /**
+     * Used within {@link #normalize(List, boolean, boolean)} to handle a {@link TextPosition}.
+     * @return The StringBuilder that must be used when calling this method.
+     */
+    private StringBuilder normalizeAdd(LinkedList<WordWithTextPositions> normalized,
+            StringBuilder lineBuilder, List<TextPosition> wordPositions, TextPosition text)
+    {
+        if (text instanceof WordSeparator) 
+        {
+            normalized.add(createWord(lineBuilder.toString(), wordPositions));
+            lineBuilder = new StringBuilder();
+            wordPositions.clear();
+        }
+        else 
+        {
+            lineBuilder.append(text.getCharacter());
+            wordPositions.add(text);
+        }
+        return lineBuilder;
+    }
+
+    /**
      * internal marker class.  Used as a place holder in
      * a line of TextPositions.
      * @author ME21969
@@ -1973,7 +2000,34 @@
         {
             return separator;
         }
+    }
 
+    /**
+     * Internal class that maps strings to lists of {@link TextPosition} arrays.
+     * Note that the number of entries in that list may differ from the number of characters in the
+     * string due to normalization.
+     *
+     * @author Axel Dörfler
+     */
+    private static final class WordWithTextPositions
+    {
+        protected String text;
+        protected List<TextPosition> textPositions;
+        
+        public WordWithTextPositions(String word, List<TextPosition> positions)
+        {
+            this.text = word;
+            this.textPositions = positions;
+        }
+        
+        public String getText()
+        {
+            return text;
+        }
+
+        public List<TextPosition> getTextPositions()
+        {
+            return textPositions;
+        }
     }
-
 }
