From 21da2d5f9e76e83a979cb23f68c2762ce2f27e0f Mon Sep 17 00:00:00 2001
From: "Kosmynin, Arkadi (CASS, Marsfield)" <Arkadi.Kosmynin@csiro.au>
Date: Thu, 30 Jul 2015 16:28:40 +1000
Subject: [PATCH] Fix for NUTCH-2071 contributed by Arkadi Kosmynin

---
 src/java/org/apache/nutch/parse/ParseUtil.java | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/src/java/org/apache/nutch/parse/ParseUtil.java b/src/java/org/apache/nutch/parse/ParseUtil.java
index 39024dc..d23c06a 100644
--- a/src/java/org/apache/nutch/parse/ParseUtil.java
+++ b/src/java/org/apache/nutch/parse/ParseUtil.java
@@ -91,10 +91,17 @@ public class ParseUtil {
         LOG.debug("Parsing [" + content.getUrl() + "] with [" + parsers[i]
             + "]");
       }
+    try
+    {
       if (maxParseTime != -1)
         parseResult = runParser(parsers[i], content);
       else
         parseResult = parsers[i].getParse(content);
+    } catch( Throwable e )
+    {
+        LOG.warn( "Parsing " + content.getUrl() + " with " + parsers[i].getClass().getName() + " failed: " + e.getMessage() ) ;
+        parseResult = null ;
+    }
 
       if (parseResult != null && !parseResult.isEmpty())
         return parseResult;
@@ -146,10 +153,19 @@ public class ParseUtil {
     }
 
     ParseResult parseResult = null;
-    if (maxParseTime != -1)
-      parseResult = runParser(p, content);
-    else
-      parseResult = p.getParse(content);
+    try
+    {
+      if (maxParseTime != -1)
+         parseResult = runParser(p, content);
+      else
+         parseResult = p.getParse(content);
+    } catch( Throwable e )
+    {
+       LOG.warn( "Parsing " + content.getUrl() + " with " + parsers[i].getClass().getName() + " failed: " + e.getMessage() ) ;
+       parseResult = null ;
+    }
+ 
+      
     if (parseResult != null && !parseResult.isEmpty()) {
       return parseResult;
     } else {
-- 
1.9.5.msysgit.0

