Index: tika-core/src/main/resources/org/apache/tika/language/tika.language.properties
===================================================================
--- tika-core/src/main/resources/org/apache/tika/language/tika.language.properties	(revision 0)
+++ tika-core/src/main/resources/org/apache/tika/language/tika.language.properties	(revision 0)
@@ -0,0 +1,47 @@
+#
+#  Licensed to the Apache Software Foundation (ASF) under one or more
+#  contributor license agreements.  See the NOTICE file distributed with
+#  this work for additional information regarding copyright ownership.
+#  The ASF licenses this file to You under the Apache License, Version 2.0
+#  (the "License"); you may not use this file except in compliance with
+#  the License.  You may obtain a copy of the License at
+# 
+#      http://www.apache.org/licenses/LICENSE-2.0
+# 
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+# This is a tika LanguageIdentifier properties file.
+# Its name is org/apache/tika/language/tika.language.properties
+# You can override it by placing a copy on the classpath in a file called
+# org/apache/tika/language/tika.language.override.properties
+
+# List of languages for which there are <language>.ngp profiles
+# If there exists an ISO 639-1 2-letter code it should be used
+# If not, you can choose an ISO 639-2 3-letter code
+# See http://www.loc.gov/standards/iso639-2/php/code_list.php
+languages=da,de,et,el,en,es,fi,fr,hu,is,it,nl,no,pl,pt,ru,sv,th
+
+# List of language names in english
+name.da=Danish
+name.de=German
+name.et=Estonian
+name.el=Greek
+name.en=English
+name.es=Spanish
+name.fi=Finnish
+name.fr=French
+name.hu=Hungarian
+name.is=Icelandic
+name.it=Italian
+name.nl=Dutch
+name.no=Norwegian
+name.pl=Polish
+name.pt=Portuguese
+name.ru=Russian
+name.sv=Swedish
+name.th=Thai
\ No newline at end of file
Index: tika-core/src/main/java/org/apache/tika/language/LanguageIdentifier.java
===================================================================
--- tika-core/src/main/java/org/apache/tika/language/LanguageIdentifier.java	(revision 987766)
+++ tika-core/src/main/java/org/apache/tika/language/LanguageIdentifier.java	(working copy)
@@ -17,10 +17,13 @@
 package org.apache.tika.language;
 
 import java.io.BufferedReader;
+import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.util.HashMap;
 import java.util.Map;
+import java.util.Properties;
+import java.util.logging.*;
 
 /**
  * Identifier of the language that best matches a given content profile.
@@ -30,26 +33,33 @@
  * @since Apache Tika 0.5
  * @see <a href="http://www.iccs.inf.ed.ac.uk/~pkoehn/publications/europarl/">
  *      Europarl: A Parallel Corpus for Statistical Machine Translation</a>
- * @see <a href="http://www.w3.org/WAI/ER/IG/ert/iso639.htm">
+ * @see <a href="http://www.loc.gov/standards/iso639-2/php/code_list.php">
  *      ISO 639 Language Codes</a>
  */
 public class LanguageIdentifier {
-
+    private static Logger logger = Logger.getLogger("org.apache.tika.language.LanguageIdentifier");
+    
     /**
      * The available language profiles.
      */
     private static final Map<String, LanguageProfile> PROFILES =
         new HashMap<String, LanguageProfile>();
+    private static final String PROFILE_SUFFIX = ".ngp";
+    private static final String PROFILE_ENCODING = "UTF-8";
+
+    private static final String PROPERTIES_OVERRIDE_FILE = "tika.language.override.properties";
+    private static final String PROPERTIES_FILE = "tika.language.properties";
+    private static final String LANGUAGES_KEY = "languages";
 
-    private static void addProfile(String language) {
+    private static void addProfile(String language) throws Exception {
         try {
             LanguageProfile profile = new LanguageProfile();
 
             InputStream stream =
-                LanguageIdentifier.class.getResourceAsStream(language + ".ngp");
+                LanguageIdentifier.class.getResourceAsStream(language + PROFILE_SUFFIX);
             try {
                 BufferedReader reader =
-                    new BufferedReader(new InputStreamReader(stream, "UTF-8"));
+                    new BufferedReader(new InputStreamReader(stream, PROFILE_ENCODING));
                 String line = reader.readLine();
                 while (line != null) {
                     if (line.length() > 0 && !line.startsWith("#")) {
@@ -66,36 +76,16 @@
 
             PROFILES.put(language, profile);
         } catch (Throwable t) {
-            // Failed to load this language profile. Log the problem?
+            throw new Exception("Failed trying to load language profile for language \""+language+"\". Error: "+t.getMessage());
         }
     }
 
-    static {
-        addProfile("da"); // Danish
-        addProfile("de"); // German
-        addProfile("et"); // Estonian
-        addProfile("el"); // Greek
-        addProfile("en"); // English
-        addProfile("es"); // Spanish
-        addProfile("fi"); // Finnish
-        addProfile("fr"); // French
-        addProfile("hu"); // Hungarian
-        addProfile("is"); // Icelandic
-        addProfile("it"); // Italian
-        addProfile("nl"); // Dutch
-        addProfile("no"); // Norwegian
-        addProfile("pl"); // Polish
-        addProfile("pt"); // Portuguese
-        addProfile("ru"); // Russian
-        addProfile("sv"); // Swedish
-        addProfile("th"); // Thai
-    }
-
     private final String language;
 
     private final double distance;
 
     public LanguageIdentifier(LanguageProfile profile) {
+        initProfiles(false);
         String minLanguage = "unknown";
         double minDistance = 1.0;
         for (Map.Entry<String, LanguageProfile> entry : PROFILES.entrySet()) {
@@ -122,6 +112,51 @@
         return distance < 0.022;
     }
 
+    /**
+     * Builds the language profiles.
+     * The list of languages are fetched from a property file named "tika.language.properties"
+     * If a file called "tika.language.override.properties" is found, this is used instead
+     * The property file contains a key "language" with values being comma-separated language codes
+     * This method is called in the constructor
+     * @param force Forces reload of property file and language profiles
+     */
+    public void initProfiles(boolean force) {
+        if(force)
+          PROFILES.clear();
+        
+        if(PROFILES.isEmpty()) {
+            InputStream stream;
+            if((stream = LanguageIdentifier.class.getResourceAsStream(PROPERTIES_OVERRIDE_FILE)) != null) {
+                logger.info("Loading tika config from "+PROPERTIES_OVERRIDE_FILE);
+            } else if((stream = LanguageIdentifier.class.getResourceAsStream(PROPERTIES_FILE)) != null) {
+                logger.info("Loading tika config from "+PROPERTIES_FILE);
+            } else {
+                logger.warning("ERROR: Could not find property files, no languages loaded. Check your classpath");
+                return;
+            }
+
+            Properties props = new Properties();
+            try {
+                props.load(stream);
+            } catch (IOException e) {
+                logger.warning("ERROR: Could not load property file, no languages loaded. Error: "+e.getMessage());
+                return;
+            }
+            
+            String[] languages = props.getProperty(LANGUAGES_KEY).split(",");
+            for(String language : languages) {
+                language = language.trim();
+                String name = props.getProperty("name."+language, "Unknown");
+                try {
+                    addProfile(language);
+                    logger.info("Added language profile for language code "+language+" ("+name+")");
+                } catch (Exception e) {
+                    logger.warning(e.getMessage());
+                }
+            }
+        }
+    }
+
     @Override
     public String toString() {
         return language + " (" + distance + ")";
