diff --git i/conf/nutch-default.xml w/conf/nutch-default.xml
index 67326ee..fd84d49 100644
--- i/conf/nutch-default.xml
+++ w/conf/nutch-default.xml
@@ -1504,6 +1504,15 @@ visit https://wiki.apache.org/nutch/SimilarityScoringFilter-->
   </description>
 </property>
 
+<property>
+  <name>lang.strip.alpha2</name>
+  <value>false</value>
+  <description>If set to true, and if the detected/identified language code
+  contains an alpha-2 code (for instance, 'en-US'), then strip out the alpha-2
+  code (for instance, strip out 'US' and keep only 'en').
+  </description>
+</property>
+
 <!-- index-static plugin properties -->
 
 <property>
diff --git i/src/plugin/language-identifier/src/java/org/apache/nutch/analysis/lang/HTMLLanguageParser.java w/src/plugin/language-identifier/src/java/org/apache/nutch/analysis/lang/HTMLLanguageParser.java
index cb8f8c1..ecab9a1 100644
--- i/src/plugin/language-identifier/src/java/org/apache/nutch/analysis/lang/HTMLLanguageParser.java
+++ w/src/plugin/language-identifier/src/java/org/apache/nutch/analysis/lang/HTMLLanguageParser.java
@@ -119,17 +119,22 @@ public class HTMLLanguageParser implements HtmlParseFilter {
 
   /** Try to find the document's language from page headers and metadata */
   private String detectLanguage(Parse page, DocumentFragment doc) {
+    boolean stripAlpha2 = conf.getBoolean("lang.strip.alpha2", false);
+
     String lang = getLanguageFromMetadata(page.getData().getParseMeta());
     if (lang == null) {
-      LanguageParser parser = new LanguageParser(doc);
+      LanguageParser parser = new LanguageParser(doc, stripAlpha2);
       lang = parser.getLanguage();
     }
 
-    if (lang != null) {
-      return lang;
+    if (lang == null) {
+      lang = page.getData().getContentMeta().get(Response.CONTENT_LANGUAGE);
     }
 
-    lang = page.getData().getContentMeta().get(Response.CONTENT_LANGUAGE);
+    // Strip out alpha-2 part from the language code, if needed.
+    if (stripAlpha2) {
+      lang = LanguageParser.parseLanguage(lang);
+    }
 
     return lang;
   }
@@ -192,8 +197,8 @@ public class HTMLLanguageParser implements HtmlParseFilter {
     private String httpEquiv = null;
     private String language = null;
 
-    LanguageParser(Node node) {
-      parse(node);
+    LanguageParser(Node node, boolean stripAlpha2) {
+      parse(node, stripAlpha2);
       if (htmlAttribute != null) {
         language = htmlAttribute;
       } else if (dublinCore != null) {
@@ -207,7 +212,7 @@ public class HTMLLanguageParser implements HtmlParseFilter {
       return language;
     }
 
-    void parse(Node node) {
+    void parse(Node node, boolean stripAlpha2) {
 
       NodeWalker walker = new NodeWalker(node);
       while (walker.hasNext()) {
@@ -220,8 +225,11 @@ public class HTMLLanguageParser implements HtmlParseFilter {
 
           // Check for the lang HTML attribute
           if (htmlAttribute == null) {
-            htmlAttribute = parseLanguage(((Element) currentNode)
-                .getAttribute("lang"));
+            htmlAttribute = ((Element) currentNode).getAttribute("lang");
+          }
+
+          if (stripAlpha2) {
+            htmlAttribute = parseLanguage(htmlAttribute);
           }
 
           // Check for Meta
@@ -236,7 +244,10 @@ public class HTMLLanguageParser implements HtmlParseFilter {
                   if ("dc.language".equalsIgnoreCase(attrnode.getNodeValue())) {
                     Node valueattr = attrs.getNamedItem("content");
                     if (valueattr != null) {
-                      dublinCore = parseLanguage(valueattr.getNodeValue());
+                      dublinCore = valueattr.getNodeValue();
+                      if (stripAlpha2) {
+                        dublinCore = parseLanguage(dublinCore);
+                      }
                     }
                   }
                 }
@@ -252,7 +263,10 @@ public class HTMLLanguageParser implements HtmlParseFilter {
                       .toLowerCase())) {
                     Node valueattr = attrs.getNamedItem("content");
                     if (valueattr != null) {
-                      httpEquiv = parseLanguage(valueattr.getNodeValue());
+                      httpEquiv = valueattr.getNodeValue();
+                      if (stripAlpha2) {
+                        httpEquiv = parseLanguage(httpEquiv);
+                      }
                     }
                   }
                 }
