java.lang.Objectorg.supermind.crawl.NutchFetchListCrawlSeedSource
public class NutchFetchListCrawlSeedSource
Uses a Nutch FetchList to seed a crawl.
Field Summary | |
---|---|
(package private) org.apache.nutch.io.ArrayFile.Reader |
fetchList
|
(package private) int |
idx
|
(package private) java.util.Iterator |
it
|
(package private) boolean |
next
|
(package private) org.apache.nutch.pagedb.FetchListEntry |
nextEntry
|
Constructor Summary | |
---|---|
NutchFetchListCrawlSeedSource()
|
Method Summary | |
---|---|
void |
close()
Close resources. |
SeedURL |
getSeedURL(int index)
Get seed URL corresponding to an index. |
java.util.Iterator<SeedURL> |
getSeedURLs()
Get iterator of seed URLs. |
void |
setFile(java.lang.String file)
|
void |
setNfs(org.apache.nutch.fs.NutchFileSystem nfs)
|
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Field Detail |
---|
org.apache.nutch.io.ArrayFile.Reader fetchList
int idx
java.util.Iterator it
boolean next
org.apache.nutch.pagedb.FetchListEntry nextEntry
Constructor Detail |
---|
public NutchFetchListCrawlSeedSource()
Method Detail |
---|
public void close() throws java.io.IOException
CrawlSeedSource
close
in interface CrawlSeedSource
java.io.IOException
public SeedURL getSeedURL(int index)
CrawlSeedSource
getSeedURL
in interface CrawlSeedSource
public java.util.Iterator<SeedURL> getSeedURLs() throws java.io.IOException
CrawlSeedSource
getSeedURLs
in interface CrawlSeedSource
java.io.IOException
public void setFile(java.lang.String file)
public void setNfs(org.apache.nutch.fs.NutchFileSystem nfs)