java.lang.Object org.supermind.crawl.util.MapFilePersister<K,V>
public abstract class MapFilePersister<K,V>
Helper class to simplify interaction with a MapFile
. Because a
MapFile is optimal in batch-update scenarios, an in-memory buffer is used
to perform batch updates. This buffer also improves performance when
read/updates have some locality.
Field Summary | |
---|---|
protected java.util.TreeMap<K,V> |
buffer
Buffer. |
protected static int |
maxBufferSize
Buffer size. |
protected org.apache.nutch.io.SequenceFile.Sorter |
sorter
|
protected org.apache.nutch.io.SequenceFile.Writer |
tmpWriter
|
Constructor Summary | |
---|---|
MapFilePersister()
|
Method Summary | |
---|---|
void |
add(K k,
V v)
Add a key/value pair. |
void |
close()
Close resources. |
void |
flushToDisk()
Flush the buffer to disk. |
protected abstract org.apache.nutch.io.WritableComparator |
getKeyComparator()
Get comparator for MapFile key class. |
protected abstract org.apache.nutch.io.WritableComparable |
getKeyInstance()
Return a new instance of the key. |
protected abstract java.lang.Class<? extends org.apache.nutch.io.WritableComparable> |
getMapFileKeyClass()
Get key class. |
protected java.lang.Class<? extends org.apache.nutch.io.Writable> |
getMapFileValueClass()
Get value class. |
protected abstract java.util.Comparator<K> |
getTypeComparator()
Get comparator for type. |
protected org.apache.nutch.io.Writable |
getValueInstance()
Return a new instance of the value. |
void |
init()
Initialize resources. |
protected void |
initTmpWriter()
Initialize tmpWriter . |
void |
setMapdir(java.lang.String mapdir)
Set location of directory where the MapFile will be created. |
void |
setNfs(org.apache.nutch.fs.NutchFileSystem nfs)
Set NutchFileSystem. |
void |
setOverwrite(boolean overwrite)
Setter whether existing files/directories should be overwritten. |
void |
setTmpfile(java.lang.String tmpfile)
Set the location of temp file. |
protected abstract void |
writeBufferToTmp()
Write contents of buffer to tmpfile. |
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Field Detail |
---|
protected java.util.TreeMap<K,V> buffer
protected static int maxBufferSize
protected org.apache.nutch.io.SequenceFile.Sorter sorter
protected org.apache.nutch.io.SequenceFile.Writer tmpWriter
Constructor Detail |
---|
public MapFilePersister()
Method Detail |
---|
public void add(K k, V v) throws java.io.IOException
k
- v
-
java.io.IOException
public void close() throws java.io.IOException
java.io.IOException
public void flushToDisk() throws java.io.IOException
java.io.IOException
protected abstract org.apache.nutch.io.WritableComparator getKeyComparator()
protected abstract org.apache.nutch.io.WritableComparable getKeyInstance()
protected abstract java.lang.Class<? extends org.apache.nutch.io.WritableComparable> getMapFileKeyClass()
protected java.lang.Class<? extends org.apache.nutch.io.Writable> getMapFileValueClass()
NullWritable
.
protected abstract java.util.Comparator<K> getTypeComparator()
protected org.apache.nutch.io.Writable getValueInstance()
NullWritable.get()
.
public void init() throws java.io.IOException
java.io.IOException
protected void initTmpWriter() throws java.io.IOException
tmpWriter
.
java.io.IOException
public void setMapdir(java.lang.String mapdir)
MapFile
will be created.
mapdir
- public void setNfs(org.apache.nutch.fs.NutchFileSystem nfs)
nfs
- public void setOverwrite(boolean overwrite)
overwrite
- public void setTmpfile(java.lang.String tmpfile)
tmpfile
- protected abstract void writeBufferToTmp() throws java.io.IOException
tmpWriter
to do this.
java.io.IOException