java.lang.Object org.supermind.crawl.InMemoryFetchedURLs
public class InMemoryFetchedURLs
Saves fetched URLs to a HashSet. Not recommended for large crawls.
Field Summary |
---|
Fields inherited from interface org.supermind.crawl.FetchedURLs |
---|
LOG |
Constructor Summary | |
---|---|
InMemoryFetchedURLs()
|
Method Summary | |
---|---|
void |
close()
|
boolean |
contains(java.lang.String url)
|
boolean |
contains(java.net.URL url)
Has the URL already been fetched? |
ScheduledURL |
get(long id)
Get a persisted URL. |
void |
init()
|
void |
insert(ScheduledURL url,
org.apache.nutch.protocol.ProtocolOutput output)
Insert a fetched URL. |
void |
insert(java.lang.String url)
|
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Constructor Detail |
---|
public InMemoryFetchedURLs()
Method Detail |
---|
public void close() throws java.io.IOException
close
in interface FetchedURLs
java.io.IOException
public boolean contains(java.lang.String url)
public boolean contains(java.net.URL url)
FetchedURLs
contains
in interface FetchedURLs
public ScheduledURL get(long id)
FetchedURLs
get
in interface FetchedURLs
id
- ScheduledURL's id
public void init() throws java.io.IOException
init
in interface FetchedURLs
java.io.IOException
public void insert(ScheduledURL url, org.apache.nutch.protocol.ProtocolOutput output)
FetchedURLs
insert
in interface FetchedURLs
url
- urloutput
- protocol outputpublic void insert(java.lang.String url)