Index: src/test/java/org/apache/tika/parser/txt/TXTParserTest.java
===================================================================
--- src/test/java/org/apache/tika/parser/txt/TXTParserTest.java	(revision 0)
+++ src/test/java/org/apache/tika/parser/txt/TXTParserTest.java	(revision 0)
@@ -0,0 +1,74 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.txt;
+
+import java.io.ByteArrayInputStream;
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.Parser;
+
+import junit.framework.TestCase;
+
+public class TXTParserTest extends TestCase {
+
+    private Parser parser = new TXTParser();
+
+    public void testEnglishText() throws Exception {
+        String text =
+            "Hello, World! This is simple UTF-8 text content written"
+            + " in English to test autodetection of both the character"
+            + " encoding and the language of the input stream.";
+
+        Metadata metadata = new Metadata();
+        String content = parser.parse(
+                new ByteArrayInputStream(text.getBytes("UTF-8")), metadata);
+
+        assertEquals("text/plain", metadata.get(Metadata.CONTENT_TYPE));
+        assertEquals("en", metadata.get(Metadata.CONTENT_LANGUAGE));
+        assertEquals("en", metadata.get(Metadata.LANGUAGE));
+        // TODO: ICU reports the content encoding as ISO-8859-1, even though
+        // it could just as well be ASCII or UTF-8, so  for now we won't
+        // test for the Metadata.CONTENT_ENCODING field
+
+        assertTrue(content.contains("Hello"));
+        assertTrue(content.contains("World"));
+        assertTrue(content.contains("autodetection"));
+        assertTrue(content.contains("stream"));
+    }
+
+    public void testUTF8Text() throws Exception {
+        String text = "I\u00F1t\u00EBrn\u00E2ti\u00F4n\u00E0liz\u00E6ti\u00F8n";
+
+        Metadata metadata = new Metadata();
+        String content = parser.parse(
+                new ByteArrayInputStream(text.getBytes("UTF-8")), metadata);
+
+        assertEquals("text/plain", metadata.get(Metadata.CONTENT_TYPE));
+        assertEquals("UTF-8", metadata.get(Metadata.CONTENT_ENCODING));
+
+        assertTrue(content.contains(text));
+    }
+
+    public void testEmptyText() throws Exception {
+        Metadata metadata = new Metadata();
+        String content = parser.parse(
+                new ByteArrayInputStream(new byte[0]), metadata);
+        assertEquals("text/plain", metadata.get(Metadata.CONTENT_TYPE));
+        assertEquals("", content);
+    }
+
+}

Property changes on: src\test\java\org\apache\tika\parser\txt\TXTParserTest.java
___________________________________________________________________
Name: svn:eol-style
   + native

Index: src/main/java/org/apache/tika/parser/txt/TXTParser.java
===================================================================
--- src/main/java/org/apache/tika/parser/txt/TXTParser.java	(revision 582689)
+++ src/main/java/org/apache/tika/parser/txt/TXTParser.java	(working copy)
@@ -16,15 +16,17 @@
  */
 package org.apache.tika.parser.txt;
 
-import java.io.BufferedReader;
+import java.io.BufferedInputStream;
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.InputStreamReader;
 
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.Parser;
 
+import com.ibm.icu.text.CharsetDetector;
+import com.ibm.icu.text.CharsetMatch;
+
 /**
  * Text parser
  */
@@ -32,16 +34,35 @@
 
     public String parse(InputStream stream, Metadata metadata)
             throws IOException, TikaException {
-        StringBuilder sb = new StringBuilder();
-        BufferedReader br = new BufferedReader(new InputStreamReader(stream));
+        CharsetDetector detector = new CharsetDetector();
 
-        int charAsInt;
+        // Use the declared character encoding, if available
+        String encoding = metadata.get(Metadata.CONTENT_ENCODING);
+        if (encoding != null) {
+            detector.setDeclaredEncoding(encoding);
+        }
 
-        while ((charAsInt = br.read()) != -1) {
-            sb.append((char) charAsInt);
+        // CharsetDetector expects a stream to support marks
+        if (!stream.markSupported()) {
+            stream = new BufferedInputStream(stream);
         }
 
-        return sb.toString();
+        detector.setText(stream);
+
+        CharsetMatch match = detector.detect();
+        if (match == null) {
+            throw new TikaException("Unable to detect character encoding");
+        }
+
+        metadata.set(Metadata.CONTENT_TYPE, "text/plain");
+        metadata.set(Metadata.CONTENT_ENCODING, match.getName());
+        String language = match.getLanguage();
+        if (language != null) {
+            metadata.set(Metadata.CONTENT_LANGUAGE, match.getLanguage());
+            metadata.set(Metadata.LANGUAGE, match.getLanguage());
+        }
+
+        return match.getString();
     }
 
 }
Index: pom.xml
===================================================================
--- pom.xml	(revision 582999)
+++ pom.xml	(working copy)
@@ -189,6 +189,11 @@
       <version>4aug2000r7-dev</version>
     </dependency>
     <dependency>
+      <groupId>com.ibm.icu</groupId>
+      <artifactId>icu4j</artifactId>
+      <version>3.4.4</version>
+    </dependency>
+    <dependency>
       <groupId>log4j</groupId>
       <artifactId>log4j</artifactId>
       <version>1.2.14</version>
