Index: tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
===================================================================
--- tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java	(revision 1206200)
+++ tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java	(working copy)
@@ -140,6 +140,33 @@
         // With an incorrect filename of a different container type, data trumps filename
         assertTypeByNameAndData("testEXCEL.xlsx", "notOldExcel.xls", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
     }
+    
+    public void testDetectProtectedOLE2() throws Exception {
+        assertTypeByData("testEXCEL_protected_passtika.xls", "application/vnd.ms-excel");
+        assertTypeByData("testWORD_protected_passtika.doc", "application/msword");
+        assertTypeByData("testPPT_protected_passtika.ppt", "application/vnd.ms-powerpoint");
+    }
+    
+    public void testDetectProtectedOOXML() throws Exception {
+        // Encrypted Microsoft Office OOXML files have OLE magic
+        assertTypeByData("testEXCEL_protected_passtika.xlsx", 
+                "application/x-tika-ooxml");
+        assertTypeByData("testWORD_protected_passtika.docx", 
+                "application/x-tika-ooxml");
+        assertTypeByData("testPPT_protected_passtika.pptx", 
+                "application/x-tika-ooxml");
+        
+        // theoretically the name-based detection is a specialization
+        // of the container-based detection, yet due to the limitation
+        // mimetypes, which always prefers magic-based over name-based
+        // adding the name doesn't improve the detection in this case.
+        assertTypeByNameAndData("testEXCEL_protected_passtika.xlsx", 
+                "application/x-tika-ooxml");
+        assertTypeByNameAndData("testWORD_protected_passtika.docx", 
+                "application/x-tika-ooxml");
+        assertTypeByNameAndData("testPPT_protected_passtika.pptx", 
+                "application/x-tika-ooxml");
+    }
 
     /**
      * Check that temporary files created by Tika are removed after
Index: tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
===================================================================
--- tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java	(revision 1206200)
+++ tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java	(working copy)
@@ -16,6 +16,7 @@
  */
 package org.apache.tika.parser.microsoft.ooxml;
 
+import java.io.IOException;
 import java.io.InputStream;
 import java.io.StringWriter;
 import java.util.Locale;
@@ -26,6 +27,7 @@
 import javax.xml.transform.stream.StreamResult;
 
 import org.apache.tika.TikaTest;
+import org.apache.tika.exception.TikaException;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.TikaMetadataKeys;
@@ -34,6 +36,7 @@
 import org.apache.tika.parser.Parser;
 import org.apache.tika.sax.BodyContentHandler;
 import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
 
 public class OOXMLParserTest extends TikaTest {
 
@@ -267,6 +270,77 @@
             input.close();
         }
     }
+    
+    public void testProtectedDOCXNonStandardPasswordPureOOXMLParser() throws Exception {
+        testProtectedFileNonStandardPassword("testWORD_protected_passtika.docx");
+    }
+    
+    public void testProtectedXLSXStandardPasswordPureOOXMLParserStandardStream() throws Exception {
+        InputStream input = OOXMLParserTest.class
+                .getResourceAsStream("/test-documents/protectedFile.xlsx");
+
+        Parser parser = new OOXMLParser();
+        Metadata metadata = new Metadata();
+        ContentHandler handler = new BodyContentHandler();
+        ParseContext context = new ParseContext();
+
+        try {
+            parser.parse(input, handler, metadata, context);
+
+            assertEquals(
+                    "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+                    metadata.get(Metadata.CONTENT_TYPE));
+
+            assertEquals("true", metadata.get(TikaMetadataKeys.PROTECTED));
+            
+            String content = handler.toString();
+            assertTrue(content.contains("Office"));
+        } finally {
+            input.close();
+        }
+    }
+    
+    public void testProtectedXLSXStandardPasswordPureOOXMLParserTikaStream() throws Exception {
+        InputStream input = TikaInputStream.get(OOXMLParserTest.class
+                .getResourceAsStream("/test-documents/protectedFile.xlsx"));
+
+        Parser parser = new OOXMLParser();
+        Metadata metadata = new Metadata();
+        ContentHandler handler = new BodyContentHandler();
+        ParseContext context = new ParseContext();
+
+        try {
+            parser.parse(input, handler, metadata, context);
+
+            assertEquals(
+                    "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+                    metadata.get(Metadata.CONTENT_TYPE));
+
+            assertEquals("true", metadata.get(TikaMetadataKeys.PROTECTED));
+            
+            String content = handler.toString();
+            assertTrue(content.contains("Office"));
+        } finally {
+            input.close();
+        }
+    }
+    
+    private void testProtectedFileNonStandardPassword(String name) throws IOException, SAXException, TikaException {
+        Metadata metadata = new Metadata();
+        StringWriter sw = new StringWriter();
+        ContentHandler handler = new BodyContentHandler(sw);
+        ParseContext context = new ParseContext();
+        InputStream is = TikaInputStream.get(OOXMLParser.class
+                .getResourceAsStream("/test-documents/" + name));
+        try {
+            OOXMLParser ooxmlParser = new OOXMLParser();
+            ooxmlParser.parse(is, handler, metadata, context);
+            assertEquals("true",metadata.get(TikaMetadataKeys.PROTECTED));
+            assertEquals(0, sw.toString().length());
+        } finally {
+            is.close();
+        }
+    }
 
     private static class XMLResult {
         public final String xml;
Index: tika-parsers/src/test/java/org/apache/tika/parser/txt/CharsetDetectorTest.java
===================================================================
--- tika-parsers/src/test/java/org/apache/tika/parser/txt/CharsetDetectorTest.java	(revision 0)
+++ tika-parsers/src/test/java/org/apache/tika/parser/txt/CharsetDetectorTest.java	(revision 0)
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.txt;
+
+import org.junit.Test;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+public class CharsetDetectorTest {
+  @Test
+  public void testTagDropper() throws IOException {
+    InputStream in = CharsetDetectorTest.class.getResourceAsStream( "/test-documents/resume.html" );
+
+    try {
+      CharsetDetector detector = new CharsetDetector();
+      detector.enableInputFilter(true);
+      detector.setText(in);
+      CharsetMatch [] matches = detector.detectAll();
+      CharsetMatch mm = null;
+      for ( CharsetMatch m : matches ) {
+        if ( mm == null || mm.getConfidence() < m.getConfidence() ) {
+          mm = m;
+        }
+      }
+      assertTrue( mm != null );
+      assertEquals( "UTF-8", mm.getName() );
+    } finally {
+      in.close();
+    }
+  }
+}
Index: tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java
===================================================================
--- tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java	(revision 1206200)
+++ tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java	(working copy)
@@ -18,7 +18,11 @@
 
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.PushbackInputStream;
+import java.security.GeneralSecurityException;
+import java.util.HashSet;
 import java.util.Locale;
+import java.util.Set;
 
 import org.apache.poi.POIXMLDocument;
 import org.apache.poi.POIXMLTextExtractor;
@@ -26,16 +30,27 @@
 import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
 import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
 import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.poifs.crypt.Decryptor;
+import org.apache.poi.poifs.crypt.EncryptionInfo;
+import org.apache.poi.poifs.filesystem.DirectoryNode;
+import org.apache.poi.poifs.filesystem.Entry;
+import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
 import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
 import org.apache.poi.xslf.usermodel.XMLSlideShow;
 import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor;
 import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
 import org.apache.poi.xwpf.usermodel.XWPFDocument;
+import org.apache.tika.Tika;
+import org.apache.tika.exception.EncryptedDocumentException;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.io.CloseShieldInputStream;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaMetadataKeys;
 import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.BodyContentHandler;
+import org.apache.tika.sax.EmbeddedContentHandler;
 import org.apache.tika.sax.EndDocumentShieldingContentHandler;
 import org.apache.xmlbeans.XmlException;
 import org.xml.sax.ContentHandler;
@@ -58,8 +73,54 @@
             OOXMLExtractor extractor;
 
             POIXMLTextExtractor poiExtractor;
+            POIFSFileSystem pfs = null;
+            
             TikaInputStream tis = TikaInputStream.cast(stream);
-            if (tis != null && tis.getOpenContainer() instanceof OPCPackage) {
+            
+            {
+                InputStream streamForHeaderChecking = null;
+                if (tis != null) {
+                    streamForHeaderChecking = tis;
+                } else {
+                    stream = new PushbackInputStream(stream, 8);
+                    streamForHeaderChecking = stream;
+                }
+                if (POIFSFileSystem.hasPOIFSHeader(streamForHeaderChecking)) {
+                    pfs = new POIFSFileSystem(streamForHeaderChecking);
+                    if (isEncryptedOOXML(pfs)) {
+                        DirectoryNode root = pfs.getRoot();
+                        EncryptionInfo info = new EncryptionInfo(root);
+                        Decryptor d = Decryptor.getInstance(info);
+                        metadata.add(TikaMetadataKeys.PROTECTED, "true");
+
+                        try {
+                            if (!d.verifyPassword(Decryptor.DEFAULT_PASSWORD)) {
+                                return; // we've already marked this doc as
+                                // protected
+                            }
+
+                            OOXMLParser parser = new OOXMLParser();
+                            parser.parse(d.getDataStream(root), baseHandler,
+                                            metadata, context);
+                        } catch (GeneralSecurityException ex) {
+                            throw new EncryptedDocumentException(ex);
+                        }
+                        return;
+                    }
+                }
+            }
+            
+            if (pfs != null) {
+                /*
+                 * this means that the file is an OLE2 file, but it's not
+                 * an encrypted OOXML. There are cases where ExtractorFactory
+                 * could return a normal POITextExtractor which is not a
+                 * POIXMLTestExtractor. We don't support them here, hence
+                 * we simply disregard this case. It shouldn't have ended
+                 * up here. 
+                 */
+                throw new TikaException("Error creating OOXML extractor. Not an OOXML file");
+            } else if (tis != null && tis.getOpenContainer() instanceof OPCPackage) {
                 poiExtractor = ExtractorFactory.createExtractor(
                         (OPCPackage) tis.getOpenContainer());
             } else if (tis != null && tis.hasFile()) {
@@ -115,8 +176,42 @@
             throw new TikaException("Error creating OOXML extractor", e);
         } catch (XmlException e) {
             throw new TikaException("Error creating OOXML extractor", e);
+        }
+    }
 
+    private static boolean isEncryptedOOXML(POIFSFileSystem pfs) {
+        Set<String> names = new HashSet<String>();
+        for (Entry entry : pfs.getRoot()) {
+            names.add(entry.getName());
         }
+        return names.contains("EncryptedPackage") && 
+                names.contains("EncryptionInfo") &&
+                names.contains("\u0006DataSpaces");
     }
 
+//    private static boolean isProtectedOOXML(InputStream tis) throws IOException {
+//        tis.mark(8);
+//        try {
+//        if (POIFSFileSystem.hasPOIFSHeader(tis)) {
+//            NPOIFSFileSystem pfs = null;
+//            if (tis.hasFile()) {
+//                pfs = new NPOIFSFileSystem(tis.getFile());
+//            } else {
+//                pfs = new NPOIFSFileSystem(tis);
+//            }
+//            tis.setOpenContainer(pfs);
+//            Set<String> names = new HashSet<String>();
+//            for (Entry entry : pfs.getRoot()) {
+//                names.add(entry.getName());
+//            }
+//            return names.contains("EncryptedPackage") && 
+//                    names.contains("EncryptionInfo") &&
+//                    names.contains("\u0006DataSpaces");
+//        }
+//        return false;
+//        } finally {
+//            tis.reset();
+//        }
+//    }
+
 }
Index: tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java
===================================================================
--- tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java	(revision 1206200)
+++ tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java	(working copy)
@@ -47,6 +47,9 @@
 
     /** The OLE base file format */
     public static final MediaType OLE = application("x-tika-msoffice");
+    
+    /** The OOXML base file format */
+    public static final MediaType OOXML = application("x-tika-ooxml");
 
     /** Microsoft Excel */
     public static final MediaType XLS = application("vnd.ms-excel");
@@ -121,6 +124,15 @@
         if (names != null) {
             if (names.contains("Workbook")) {
                 return XLS;
+            } else if (names.contains("EncryptedPackage") && 
+                    names.contains("EncryptionInfo") &&
+                    names.contains("\u0006DataSpaces")) {
+                // this particular combination of names have been found in 
+                // protected ooxml files, generated by Office 2007. We can't 
+                // distinguish Word from excel here, but we should at least 
+                // abstain from lying, in hope that name-based detection will 
+                // be more lucky
+                return OOXML;
             } else if (names.contains("EncryptedPackage")) {
                 return OLE;
             } else if (names.contains("WordDocument")) {
