java.lang.Object org.supermind.crawl.DefaultFetchedURLs
public class DefaultFetchedURLs
Default implementation of FetchedURLs
. Saves URL checksums directly
to a MapFile.
Field Summary |
---|
Fields inherited from interface org.supermind.crawl.FetchedURLs |
---|
LOG |
Constructor Summary | |
---|---|
DefaultFetchedURLs()
|
Method Summary | |
---|---|
void |
close()
|
boolean |
contains(java.net.URL url)
Has this URL already been fetched? |
ScheduledURL |
get(long id)
Get a persisted URL. |
protected long |
getChecksum(java.net.URL url)
Create a 64-bit checksum by merging a 32-bit host checksum with the url's 32-bit checksum. |
void |
init()
|
void |
insert(ScheduledURL url,
org.apache.nutch.protocol.ProtocolOutput output)
Insert a fetched URL. |
void |
setChecksum(java.util.zip.Checksum checksum)
|
void |
setPersister(LongPersister persister)
|
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Constructor Detail |
---|
public DefaultFetchedURLs()
Method Detail |
---|
public void close() throws java.io.IOException
close
in interface FetchedURLs
java.io.IOException
public boolean contains(java.net.URL url)
contains
in interface FetchedURLs
url
-
public ScheduledURL get(long id)
FetchedURLs
get
in interface FetchedURLs
id
- ScheduledURL's id
protected long getChecksum(java.net.URL url)
url
-
public void init() throws java.io.IOException
init
in interface FetchedURLs
java.io.IOException
public void insert(ScheduledURL url, org.apache.nutch.protocol.ProtocolOutput output) throws java.io.IOException
FetchedURLs
insert
in interface FetchedURLs
url
- urloutput
- protocol output
java.io.IOException
public void setChecksum(java.util.zip.Checksum checksum)
public void setPersister(LongPersister persister) throws java.io.IOException
java.io.IOException