From 7168363ef2078b6f1cfcde4c84417ab4b4e4b203 Mon Sep 17 00:00:00 2001
From: Asitang Mishra <asitang@gmail.com>
Date: Thu, 26 Mar 2015 13:08:48 -0700
Subject: [PATCH] NUTCH -1941 itr 4

---
 .../apache/nutch/protocol/http/api/HttpBase.java   | 47 ++++++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java b/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java
index 846be0e..98b8cf9 100644
--- a/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java
+++ b/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java
@@ -17,11 +17,15 @@
 package org.apache.nutch.protocol.http.api;
 
 // JDK imports
+import java.io.BufferedReader;
 import java.io.IOException;
+import java.io.Reader;
 import java.net.URL;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashSet;
 import java.util.Set;
+import java.util.concurrent.ThreadLocalRandom;
 
 // Logging imports
 import org.slf4j.Logger;
@@ -56,6 +60,8 @@ public abstract class HttpBase implements Protocol {
 
   private HttpRobotRulesParser robots = null;
 
+  private ArrayList<String> useragentnames=null; // NUTCH-1941
+
   /** The proxy hostname. */
   protected String proxyHost = null;
 
@@ -143,6 +149,43 @@ public abstract class HttpBase implements Protocol {
     this.enableIfModifiedsinceHeader = conf.getBoolean("http.enable.if.modified.since.header", true);
     this.robots.setConf(conf);
 
+    // NUTCH-1941
+    BufferedReader br=null;
+    try {
+
+      if (conf.getBoolean("agent.rotate", false)) {
+        Reader reader = conf.getConfResourceAsReader(conf.get(
+            "agent.rotate.file", "agents.txt"));
+        
+
+        br = new BufferedReader(reader);
+        useragentnames = new ArrayList<String>();
+        String word = "";
+        while ((word = br.readLine()) != null) {
+          useragentnames.add(word);
+        }
+        
+        
+        if(useragentnames.size()==0){
+          useragentnames=null;
+        }
+        
+      }
+
+    } catch (Exception e) {
+      logger.info("agents.txt was not found, so using the default agent name set in the nutch-site.xml ");
+    }
+    finally{
+      if(br!=null){
+        try {
+          br.close();
+        } catch (IOException e) {
+          // TODO Auto-generated catch block
+          e.printStackTrace();
+        }
+      }
+    }
+
     String[] protocols = conf.getStrings("http.tls.supported.protocols",
         "TLSv1.2", "TLSv1.1", "TLSv1", "SSLv3");
     String[] ciphers = conf.getStrings("http.tls.supported.cipher.suites",
@@ -312,6 +355,10 @@ public abstract class HttpBase implements Protocol {
   }
 
   public String getUserAgent() {
+    if (useragentnames!=null) {
+      return useragentnames.get(ThreadLocalRandom.current().nextInt(useragentnames.size()-1));
+    }
+
     return userAgent;
   }
 
-- 
1.9.5 (Apple Git-50.3)

