org.supermind.crawl
Class Fetcher

java.lang.Object
  extended by org.supermind.crawl.Fetcher
All Implemented Interfaces:
org.springframework.beans.factory.BeanFactoryAware

public class Fetcher
extends java.lang.Object
implements org.springframework.beans.factory.BeanFactoryAware

Fetcher.


Field Summary
protected  org.springframework.beans.factory.BeanFactory beanFactory
           
protected  long bytes
           
protected  CrawlSeedSource crawlSeedSource
           
protected  int errors
           
protected  java.lang.ThreadGroup group
           
protected static java.util.logging.Logger LOG
          Logger.
protected  int maxPagesPerConnection
          The maximum number of pipedlined HTTP GETs to perform per HTTP connection.
protected  int pages
           
protected  boolean parsing
           
protected  PostFetchProcessor postFetchProcessor
           
protected  long start
           
protected static java.lang.String THREAD_GROUP_NAME
           
protected  int threadCount
           
 
Constructor Summary
Fetcher()
           
 
Method Summary
protected  FetcherThread assignThread(java.net.URL url)
          Assign a fetcherthread to handle this URL.
protected  void close()
           
 org.apache.nutch.fetcher.Fetcher.FetcherStatus getStatus()
           
 void run()
           
 void setBeanFactory(org.springframework.beans.factory.BeanFactory beanFactory)
           
 void setMaxPagesPerConnection(int maxPagesPerConnection)
           
 void setParsing(boolean parsing)
           
 void setPostFetchProcessor(PostFetchProcessor postFetchProcessor)
           
 void setSeedSource(CrawlSeedSource crawlSource)
           
 void setThreadCount(int threadCount)
           
 void status()
          Display the status of the fetcher run.
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

beanFactory

protected org.springframework.beans.factory.BeanFactory beanFactory

bytes

protected long bytes

crawlSeedSource

protected CrawlSeedSource crawlSeedSource

errors

protected int errors

group

protected final java.lang.ThreadGroup group

LOG

protected static java.util.logging.Logger LOG
Logger.


maxPagesPerConnection

protected int maxPagesPerConnection
The maximum number of pipedlined HTTP GETs to perform per HTTP connection.


pages

protected int pages

parsing

protected boolean parsing

postFetchProcessor

protected PostFetchProcessor postFetchProcessor

start

protected long start

THREAD_GROUP_NAME

protected static final java.lang.String THREAD_GROUP_NAME
See Also:
Constant Field Values

threadCount

protected int threadCount
Constructor Detail

Fetcher

public Fetcher()
        throws java.lang.Exception
Throws:
java.lang.Exception
Method Detail

assignThread

protected FetcherThread assignThread(java.net.URL url)
Assign a fetcherthread to handle this URL. TODO: implement a more sophisticated assignment algorithm (which load-balances for e.g.).

Parameters:
url -
Returns:

close

protected void close()
              throws java.io.IOException
Throws:
java.io.IOException

getStatus

public org.apache.nutch.fetcher.Fetcher.FetcherStatus getStatus()

run

public void run()
         throws java.io.IOException,
                java.lang.InterruptedException
Throws:
java.io.IOException
java.lang.InterruptedException

setBeanFactory

public void setBeanFactory(org.springframework.beans.factory.BeanFactory beanFactory)
Specified by:
setBeanFactory in interface org.springframework.beans.factory.BeanFactoryAware

setMaxPagesPerConnection

public void setMaxPagesPerConnection(int maxPagesPerConnection)

setParsing

public void setParsing(boolean parsing)

setPostFetchProcessor

public void setPostFetchProcessor(PostFetchProcessor postFetchProcessor)

setSeedSource

public void setSeedSource(CrawlSeedSource crawlSource)
                   throws java.io.IOException
Throws:
java.io.IOException

setThreadCount

public void setThreadCount(int threadCount)

status

public void status()
Display the status of the fetcher run.