diff --git ivy/ivy.xml ivy/ivy.xml
index 7d45e0f..eeabddb 100644
--- ivy/ivy.xml
+++ ivy/ivy.xml
@@ -102,8 +102,7 @@
-
-
+
-
-
+
diff --git src/gora/host.avsc src/gora/host.avsc
index e4165cb..f7e386d 100644
--- src/gora/host.avsc
+++ src/gora/host.avsc
@@ -2,8 +2,8 @@
"type": "record",
"namespace": "org.apache.nutch.storage",
"fields": [
- {"name": "metadata", "type": {"type": "map", "values": "bytes"}},
- {"name": "outlinks", "type": {"type": "map", "values": "string"}},
- {"name": "inlinks", "type": {"type": "map", "values": "string"}}
+ {"name": "metadata", "type": {"type": "map", "values": "bytes"}, "default":{}},
+ {"name": "outlinks", "type": {"type": "map", "values": "string"}, "default":{}},
+ {"name": "inlinks", "type": {"type": "map", "values": "string"}, "default":{}}
]
}
diff --git src/gora/webpage.avsc src/gora/webpage.avsc
index 601fae8..f025c74 100644
--- src/gora/webpage.avsc
+++ src/gora/webpage.avsc
@@ -2,47 +2,47 @@
"type": "record",
"namespace": "org.apache.nutch.storage",
"fields": [
- {"name": "baseUrl", "type": "string"},
- {"name": "status", "type": "int"},
- {"name": "fetchTime", "type": "long"},
- {"name": "prevFetchTime", "type": "long"},
- {"name": "fetchInterval", "type": "int"},
- {"name": "retriesSinceFetch", "type": "int"},
- {"name": "modifiedTime", "type": "long"},
- {"name": "prevModifiedTime", "type": "long"},
- {"name": "protocolStatus", "type": {
+ {"name": "baseUrl", "type": ["null","string"], "default":null},
+ {"name": "status", "type": "int", "default":0},
+ {"name": "fetchTime", "type": "long", "default":0},
+ {"name": "prevFetchTime", "type": "long", "default":0},
+ {"name": "fetchInterval", "type": "int", "default":0},
+ {"name": "retriesSinceFetch", "type": "int", "default":0},
+ {"name": "modifiedTime", "type": "long", "default":0},
+ {"name": "prevModifiedTime", "type": "long", "default":0},
+ {"name": "protocolStatus", "type": ["null", {
"name": "ProtocolStatus",
"type": "record",
"namespace": "org.apache.nutch.storage",
"fields": [
- {"name": "code", "type": "int"},
- {"name": "args", "type": {"type": "array", "items": "string"}},
- {"name": "lastModified", "type": "long"}
+ {"name": "code", "type": "int", "default":0},
+ {"name": "args", "type": {"type": "array", "items": "string"}, "default":[]},
+ {"name": "lastModified", "type": "long", "default":0}
]
- }},
- {"name": "content", "type": "bytes"},
- {"name": "contentType", "type": "string"},
- {"name": "prevSignature", "type": "bytes"},
- {"name": "signature", "type": "bytes"},
- {"name": "title", "type": "string"},
- {"name": "text", "type": "string"},
- {"name": "parseStatus", "type": {
+ }], "default":null},
+ {"name": "content", "type": ["null","bytes"], "default":null},
+ {"name": "contentType", "type": ["null","string"], "default":null},
+ {"name": "prevSignature", "type": ["null","bytes"], "default":null},
+ {"name": "signature", "type": ["null","bytes"], "default":null},
+ {"name": "title", "type": ["null","string"], "default":null},
+ {"name": "text", "type": ["null","string"], "default":null},
+ {"name": "parseStatus", "type": ["null", {
"name": "ParseStatus",
"type": "record",
"namespace": "org.apache.nutch.storage",
"fields": [
- {"name": "majorCode", "type": "int"},
- {"name": "minorCode", "type": "int"},
- {"name": "args", "type": {"type": "array", "items": "string"}}
+ {"name": "majorCode", "type": "int", "default":0},
+ {"name": "minorCode", "type": "int", "default":0},
+ {"name": "args", "type": {"type": "array", "items": "string"}, "default":[]}
]
- }},
- {"name": "score", "type": "float"},
- {"name": "reprUrl", "type": "string"},
- {"name": "headers", "type": {"type": "map", "values": "string"}},
- {"name": "outlinks", "type": {"type": "map", "values": "string"}},
- {"name": "inlinks", "type": {"type": "map", "values": "string"}},
- {"name": "markers", "type": {"type": "map", "values": "string"}},
- {"name": "metadata", "type": {"type": "map", "values": "bytes"}},
- {"name": "batchId", "type": "string"}
+ }], "default":null},
+ {"name": "score", "type": "float", "default":0},
+ {"name": "reprUrl", "type": ["null","string"], "default":null},
+ {"name": "headers", "type": {"type":"map", "values": ["null","string"]}, "default":{}},
+ {"name": "outlinks", "type": {"type": "map", "values": ["null","string"]}, "default":{}},
+ {"name": "inlinks", "type": {"type": "map", "values": ["null","string"]}, "default":{}},
+ {"name": "markers", "type": {"type": "map", "values": ["null","string"]}, "default":{}},
+ {"name": "metadata", "type": {"type": "map", "values": ["null","bytes"]}, "default":{}},
+ {"name": "batchId", "type": ["null","string"], "default":null}
]
}
diff --git src/java/org/apache/nutch/api/DbReader.java src/java/org/apache/nutch/api/DbReader.java
index 728da3a..68cc9ac 100644
--- src/java/org/apache/nutch/api/DbReader.java
+++ src/java/org/apache/nutch/api/DbReader.java
@@ -16,16 +16,7 @@
******************************************************************************/
package org.apache.nutch.api;
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.TreeSet;
-
+import org.apache.avro.Schema;
import org.apache.avro.util.Utf8;
import org.apache.gora.query.Query;
import org.apache.gora.query.Result;
@@ -34,11 +25,7 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.metadata.Nutch;
import org.apache.nutch.parse.ParseStatusUtils;
import org.apache.nutch.protocol.ProtocolStatusUtils;
-import org.apache.nutch.storage.Mark;
-import org.apache.nutch.storage.ParseStatus;
-import org.apache.nutch.storage.ProtocolStatus;
-import org.apache.nutch.storage.StorageUtils;
-import org.apache.nutch.storage.WebPage;
+import org.apache.nutch.storage.*;
import org.apache.nutch.util.Bytes;
import org.apache.nutch.util.NutchJob;
import org.apache.nutch.util.StringUtil;
@@ -46,6 +33,11 @@ import org.apache.nutch.util.TableUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.*;
+import java.util.Map.Entry;
+
public class DbReader {
private static final Logger LOG = LoggerFactory.getLogger(DbReader.class);
@@ -145,7 +137,7 @@ public class DbReader {
public Map next() {
url = res.getKey();
- page = (WebPage)res.get().clone();
+ page = WebPage.newBuilder(res.get()).build();
try {
advance();
if (!hasNext) {
@@ -169,16 +161,16 @@ public class DbReader {
if (fields == null || fields.contains("url")) {
res.put("url", TableUtil.unreverseUrl(url));
}
- String[] pfields = page.getFields();
- TreeSet flds = null;
+ List pfields = page.getSchema().getFields();
+ TreeSet flds = null;
if (fields != null) {
- flds = (TreeSet)fields.clone();
+ flds = (TreeSet) fields.clone();
} else {
- flds = new TreeSet(Arrays.asList(pfields));
+ flds = new TreeSet(pfields);
}
flds.retainAll(Arrays.asList(pfields));
- for (String f : flds) {
- int idx = page.getFieldIndex(f);
+ for (Schema.Field f : flds) {
+ int idx = f.pos();
if (idx < 0) {
continue;
}
@@ -187,43 +179,43 @@ public class DbReader {
continue;
}
if ("metadata".equals(f)) {
- Map metadata = page.getMetadata();
+ Map metadata = page.getMetadata();
Map simpleMeta = new HashMap();
if (metadata != null) {
- Iterator> iterator = metadata.entrySet()
+ Iterator> iterator = metadata.entrySet()
.iterator();
while (iterator.hasNext()) {
- Entry entry = iterator.next();
+ Entry entry = iterator.next();
simpleMeta.put(entry.getKey().toString(),
Bytes.toStringBinary(entry.getValue()));
}
}
- res.put(f, simpleMeta);
+ res.put(f.name(), simpleMeta);
} else if ("protocolStatus".equals(f)) {
ProtocolStatus ps = page.getProtocolStatus();
- res.put(f, ProtocolStatusUtils.toString(ps));
+ res.put(f.name(), ProtocolStatusUtils.toString(ps));
} else if ("parseStatus".equals(f)) {
ParseStatus ps = page.getParseStatus();
- res.put(f, ParseStatusUtils.toString(ps));
+ res.put(f.name(), ParseStatusUtils.toString(ps));
} else if ("signature".equals(f)) {
ByteBuffer bb = page.getSignature();
- res.put(f, StringUtil.toHexString(bb));
+ res.put(f.name(), StringUtil.toHexString(bb));
} else if ("content".equals(f)) {
ByteBuffer bb = page.getContent();
- res.put(f, Bytes.toStringBinary(bb));
+ res.put(f.name(), Bytes.toStringBinary(bb));
} else if ("markers".equals(f)) {
- res.put(f, convertMap(page.getMarkers()));
+ res.put(f.name(), convertMap(page.getMarkers()));
} else if ("inlinks".equals(f)) {
- res.put(f, convertMap(page.getInlinks()));
+ res.put(f.name(), convertMap(page.getInlinks()));
} else if ("outlinks".equals(f)) {
- res.put(f, convertMap(page.getOutlinks()));
+ res.put(f.name(), convertMap(page.getOutlinks()));
} else {
if (val instanceof Utf8) {
val = val.toString();
} else if (val instanceof ByteBuffer) {
val = Bytes.toStringBinary((ByteBuffer)val);
}
- res.put(f, val);
+ res.put(f.name(), val);
}
}
return res;
diff --git src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java
index 20ce90d..d9a9e8d 100755
--- src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java
+++ src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java
@@ -17,14 +17,14 @@
package org.apache.nutch.crawl;
-import java.util.HashSet;
-import java.util.Set;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.nutch.storage.WebPage;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.HashSet;
+import java.util.Set;
/**
* This class provides common methods for implementations of
@@ -196,7 +196,7 @@ implements FetchSchedule {
// reduce fetchInterval so that it fits within the max value
if (page.getFetchInterval() > maxInterval)
page.setFetchInterval(Math.round(maxInterval * 0.9f));
- page.setStatus(CrawlStatus.STATUS_UNFETCHED);
+ page.setStatus((int) CrawlStatus.STATUS_UNFETCHED);
page.setRetriesSinceFetch(0);
// TODO: row.setSignature(null) ??
page.setModifiedTime(0L);
diff --git src/java/org/apache/nutch/crawl/DbUpdateMapper.java src/java/org/apache/nutch/crawl/DbUpdateMapper.java
index 5bef21d..48e4913 100644
--- src/java/org/apache/nutch/crawl/DbUpdateMapper.java
+++ src/java/org/apache/nutch/crawl/DbUpdateMapper.java
@@ -67,11 +67,11 @@ extends GoraMapper {
String url = TableUtil.unreverseUrl(key);
scoreData.clear();
- Map outlinks = page.getOutlinks();
+ Map outlinks = page.getOutlinks();
if (outlinks != null) {
- for (Entry e : outlinks.entrySet()) {
+ for (Entry e : outlinks.entrySet()) {
int depth=Integer.MAX_VALUE;
- Utf8 depthUtf8=page.getFromMarkers(DbUpdaterJob.DISTANCE);
+ CharSequence depthUtf8 = page.getMarkers().get(DbUpdaterJob.DISTANCE);
if (depthUtf8 != null) depth=Integer.parseInt(depthUtf8.toString());
scoreData.add(new ScoreDatum(0.0f, e.getKey().toString(),
e.getValue().toString(), depth));
diff --git src/java/org/apache/nutch/crawl/DbUpdateReducer.java src/java/org/apache/nutch/crawl/DbUpdateReducer.java
index a671694..89e57f5 100644
--- src/java/org/apache/nutch/crawl/DbUpdateReducer.java
+++ src/java/org/apache/nutch/crawl/DbUpdateReducer.java
@@ -22,7 +22,8 @@ import java.util.ArrayList;
import java.util.List;
import org.apache.avro.util.Utf8;
-import org.slf4j.Logger;
+import org.apache.gora.mapreduce.GoraReducer;
+import org.apache.gora.store.DataStore;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.util.StringUtils;
@@ -35,7 +36,7 @@ import org.apache.nutch.storage.Mark;
import org.apache.nutch.storage.WebPage;
import org.apache.nutch.util.TableUtil;
import org.apache.nutch.util.WebPageWritable;
-import org.apache.gora.mapreduce.GoraReducer;
+import org.slf4j.Logger;
public class DbUpdateReducer
extends GoraReducer {
@@ -97,16 +98,16 @@ extends GoraReducer {
if (!additionsAllowed) {
return;
}
- page = new WebPage();
+ page = WebPage.newBuilder().build();
schedule.initializeSchedule(url, page);
- page.setStatus(CrawlStatus.STATUS_UNFETCHED);
+ page.setStatus((int) CrawlStatus.STATUS_UNFETCHED);
try {
scoringFilters.initialScore(url, page);
} catch (ScoringFilterException e) {
page.setScore(0.0f);
}
} else {
- byte status = (byte)page.getStatus();
+ byte status = page.getStatus().byteValue();
switch (status) {
case CrawlStatus.STATUS_FETCHED: // succesful fetch
case CrawlStatus.STATUS_REDIR_TEMP: // successful fetch, redirected
@@ -129,7 +130,7 @@ extends GoraReducer {
long prevFetchTime = page.getPrevFetchTime();
long modifiedTime = page.getModifiedTime();
long prevModifiedTime = page.getPrevModifiedTime();
- Utf8 lastModified = page.getFromHeaders(new Utf8("Last-Modified"));
+ CharSequence lastModified = page.getHeaders().get(new Utf8("Last-Modified"));
if ( lastModified != null ){
try {
modifiedTime = HttpDateFormat.toLong(lastModified.toString());
@@ -145,9 +146,9 @@ extends GoraReducer {
case CrawlStatus.STATUS_RETRY:
schedule.setPageRetrySchedule(url, page, 0L, page.getPrevModifiedTime(), page.getFetchTime());
if (page.getRetriesSinceFetch() < retryMax) {
- page.setStatus(CrawlStatus.STATUS_UNFETCHED);
+ page.setStatus((int)CrawlStatus.STATUS_UNFETCHED);
} else {
- page.setStatus(CrawlStatus.STATUS_GONE);
+ page.setStatus((int)CrawlStatus.STATUS_GONE);
}
break;
case CrawlStatus.STATUS_GONE:
@@ -171,15 +172,15 @@ extends GoraReducer {
if (inlinkDist < smallestDist) {
smallestDist=inlinkDist;
}
- page.putToInlinks(new Utf8(inlink.getUrl()), new Utf8(inlink.getAnchor()));
+ page.getInlinks().put(new Utf8(inlink.getUrl()), new Utf8(inlink.getAnchor()));
}
if (smallestDist != Integer.MAX_VALUE) {
int oldDistance=Integer.MAX_VALUE;
- Utf8 oldDistUtf8 = page.getFromMarkers(DbUpdaterJob.DISTANCE);
+ CharSequence oldDistUtf8 = page.getMarkers().get(DbUpdaterJob.DISTANCE);
if (oldDistUtf8 != null)oldDistance=Integer.parseInt(oldDistUtf8.toString());
int newDistance = smallestDist+1;
if (newDistance < oldDistance) {
- page.putToMarkers(DbUpdaterJob.DISTANCE, new Utf8(Integer.toString(newDistance)));
+ page.getMarkers().put(DbUpdaterJob.DISTANCE, new Utf8(Integer.toString(newDistance)));
}
}
@@ -193,8 +194,8 @@ extends GoraReducer {
// clear markers
// But only delete when they exist. This is much faster for the underlying
// store. The markers are on the input anyway.
- if (page.getFromMetadata(FetcherJob.REDIRECT_DISCOVERED) != null) {
- page.removeFromMetadata(FetcherJob.REDIRECT_DISCOVERED);
+ if (page.getMetadata().get(FetcherJob.REDIRECT_DISCOVERED) != null) {
+ page.getMetadata().put(FetcherJob.REDIRECT_DISCOVERED, null);
}
Mark.GENERATE_MARK.removeMarkIfExist(page);
Mark.FETCH_MARK.removeMarkIfExist(page);
diff --git src/java/org/apache/nutch/crawl/GeneratorMapper.java src/java/org/apache/nutch/crawl/GeneratorMapper.java
index 144784a..6af4e75 100644
--- src/java/org/apache/nutch/crawl/GeneratorMapper.java
+++ src/java/org/apache/nutch/crawl/GeneratorMapper.java
@@ -16,9 +16,8 @@
******************************************************************************/
package org.apache.nutch.crawl;
-import java.io.IOException;
-import java.net.MalformedURLException;
-
+import org.apache.avro.util.Utf8;
+import org.apache.gora.mapreduce.GoraMapper;
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.crawl.GeneratorJob.SelectorEntry;
import org.apache.nutch.net.URLFilterException;
@@ -29,8 +28,11 @@ import org.apache.nutch.scoring.ScoringFilters;
import org.apache.nutch.storage.Mark;
import org.apache.nutch.storage.WebPage;
import org.apache.nutch.util.TableUtil;
-import org.apache.avro.util.Utf8;
-import org.apache.gora.mapreduce.GoraMapper;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.nio.ByteBuffer;
+import java.util.HashMap;
public class GeneratorMapper
extends GoraMapper {
@@ -59,7 +61,7 @@ extends GoraMapper {
//filter on distance
if (maxDistance > -1) {
- Utf8 distanceUtf8 = page.getFromMarkers(DbUpdaterJob.DISTANCE);
+ CharSequence distanceUtf8 = page.getMarkers().get(DbUpdaterJob.DISTANCE);
if (distanceUtf8 != null) {
int distance=Integer.parseInt(distanceUtf8.toString());
if (distance > maxDistance) {
diff --git src/java/org/apache/nutch/crawl/GeneratorReducer.java src/java/org/apache/nutch/crawl/GeneratorReducer.java
index fd5bc31..92019dd 100644
--- src/java/org/apache/nutch/crawl/GeneratorReducer.java
+++ src/java/org/apache/nutch/crawl/GeneratorReducer.java
@@ -22,6 +22,10 @@ import java.util.HashMap;
import java.util.Map;
import org.apache.avro.util.Utf8;
+import org.apache.gora.mapreduce.GoraReducer;
+import org.apache.gora.query.Query;
+import org.apache.gora.query.Result;
+import org.apache.gora.store.DataStore;
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.crawl.GeneratorJob.SelectorEntry;
import org.apache.nutch.fetcher.FetcherJob.FetcherMapper;
@@ -29,7 +33,6 @@ import org.apache.nutch.storage.Mark;
import org.apache.nutch.storage.WebPage;
import org.apache.nutch.util.TableUtil;
import org.apache.nutch.util.URLUtil;
-import org.apache.gora.mapreduce.GoraReducer;
/** Reduce class for generate
*
diff --git src/java/org/apache/nutch/crawl/InjectorJob.java src/java/org/apache/nutch/crawl/InjectorJob.java
index bfb5f6d..b852dd5 100644
--- src/java/org/apache/nutch/crawl/InjectorJob.java
+++ src/java/org/apache/nutch/crawl/InjectorJob.java
@@ -16,15 +16,6 @@
******************************************************************************/
package org.apache.nutch.crawl;
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.text.SimpleDateFormat;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.Set;
-import java.util.TreeMap;
-
import org.apache.avro.util.Utf8;
import org.apache.gora.mapreduce.GoraOutputFormat;
import org.apache.gora.persistency.Persistent;
@@ -47,15 +38,15 @@ import org.apache.nutch.scoring.ScoringFilters;
import org.apache.nutch.storage.Mark;
import org.apache.nutch.storage.StorageUtils;
import org.apache.nutch.storage.WebPage;
-import org.apache.nutch.util.NutchConfiguration;
-import org.apache.nutch.util.NutchJob;
-import org.apache.nutch.util.NutchTool;
-import org.apache.nutch.util.TableUtil;
-import org.apache.nutch.util.TimingUtil;
-import org.apache.nutch.util.ToolUtil;
+import org.apache.nutch.util.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.text.SimpleDateFormat;
+import java.util.*;
+
/** This class takes a flat file of URLs and adds them to the of pages to be
* crawled. Useful for bootstrapping the system.
* The URL files contain one URL per line, optionally followed by custom metadata
@@ -160,7 +151,7 @@ public class InjectorJob extends NutchTool implements Tool {
return;
} else { // if it passes
String reversedUrl = TableUtil.reverseUrl(url); // collect it
- WebPage row = new WebPage();
+ WebPage row = WebPage.newBuilder().build();
row.setFetchTime(curTime);
row.setFetchInterval(customInterval);
@@ -169,7 +160,7 @@ public class InjectorJob extends NutchTool implements Tool {
while (keysIter.hasNext()) {
String keymd = keysIter.next();
String valuemd = metadata.get(keymd);
- row.putToMetadata(new Utf8(keymd), ByteBuffer.wrap(valuemd.getBytes()));
+ row.getMetadata().put(new Utf8(keymd), ByteBuffer.wrap(valuemd.getBytes()));
}
if (customScore != -1)
@@ -186,7 +177,7 @@ public class InjectorJob extends NutchTool implements Tool {
}
}
context.getCounter("injector", "urls_injected").increment(1);
- row.putToMarkers(DbUpdaterJob.DISTANCE, new Utf8(String.valueOf(0)));
+ row.getMarkers().put(DbUpdaterJob.DISTANCE, new Utf8(String.valueOf(0)));
Mark.INJECT_MARK.putMark(row, YES_STRING);
context.write(reversedUrl, row);
}
diff --git src/java/org/apache/nutch/crawl/MD5Signature.java src/java/org/apache/nutch/crawl/MD5Signature.java
index 8c09543..f0e0332 100644
--- src/java/org/apache/nutch/crawl/MD5Signature.java
+++ src/java/org/apache/nutch/crawl/MD5Signature.java
@@ -17,14 +17,14 @@
package org.apache.nutch.crawl;
-import java.nio.ByteBuffer;
-import java.util.Collection;
-import java.util.HashSet;
-
import org.apache.avro.util.Utf8;
import org.apache.hadoop.io.MD5Hash;
import org.apache.nutch.storage.WebPage;
+import java.nio.ByteBuffer;
+import java.util.Collection;
+import java.util.HashSet;
+
/**
* Default implementation of a page signature. It calculates an MD5 hash
* of the raw binary content of a page. In case there is no content, it
@@ -47,7 +47,7 @@ public class MD5Signature extends Signature {
int of;
int cb;
if (buf == null) {
- Utf8 baseUrl = page.getBaseUrl();
+ Utf8 baseUrl = (Utf8) page.getBaseUrl();
if (baseUrl == null) {
data = null;
of = 0;
@@ -56,7 +56,7 @@ public class MD5Signature extends Signature {
else {
data = baseUrl.getBytes();
of = 0;
- cb = baseUrl.getLength();
+ cb = baseUrl.length();
}
} else {
data = buf.array();
diff --git src/java/org/apache/nutch/crawl/WebTableReader.java src/java/org/apache/nutch/crawl/WebTableReader.java
index 167f122..0bebcbe 100644
--- src/java/org/apache/nutch/crawl/WebTableReader.java
+++ src/java/org/apache/nutch/crawl/WebTableReader.java
@@ -16,18 +16,11 @@
******************************************************************************/
package org.apache.nutch.crawl;
-import java.io.IOException;
-import java.net.URL;
-import java.nio.ByteBuffer;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.TreeMap;
-import java.util.regex.Pattern;
-
import org.apache.avro.util.Utf8;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+import org.apache.gora.mapreduce.GoraMapper;
+import org.apache.gora.query.Query;
+import org.apache.gora.query.Result;
+import org.apache.gora.store.DataStore;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -47,17 +40,18 @@ import org.apache.nutch.parse.ParseStatusUtils;
import org.apache.nutch.protocol.ProtocolStatusUtils;
import org.apache.nutch.storage.StorageUtils;
import org.apache.nutch.storage.WebPage;
-import org.apache.nutch.util.Bytes;
-import org.apache.nutch.util.NutchConfiguration;
-import org.apache.nutch.util.NutchJob;
-import org.apache.nutch.util.NutchTool;
-import org.apache.nutch.util.StringUtil;
-import org.apache.nutch.util.TableUtil;
-import org.apache.nutch.util.ToolUtil;
-import org.apache.gora.mapreduce.GoraMapper;
-import org.apache.gora.query.Query;
-import org.apache.gora.query.Result;
-import org.apache.gora.store.DataStore;
+import org.apache.nutch.util.*;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.net.URL;
+import java.nio.ByteBuffer;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.TreeMap;
+import java.util.regex.Pattern;
/**
* Displays information about the entries of the webtable
@@ -344,7 +338,7 @@ public class WebTableReader extends NutchTool implements Tool {
sb.append("key:\t" + key).append("\n");
sb.append("baseUrl:\t" + page.getBaseUrl()).append("\n");
sb.append("status:\t").append(page.getStatus()).append(" (").append(
- CrawlStatus.getName((byte) page.getStatus())).append(")\n");
+ CrawlStatus.getName(page.getStatus().byteValue())).append(")\n");
sb.append("fetchTime:\t" + page.getFetchTime()).append("\n");
sb.append("prevFetchTime:\t" + page.getPrevFetchTime()).append("\n");
sb.append("fetchInterval:\t" + page.getFetchInterval()).append("\n");
@@ -366,41 +360,41 @@ public class WebTableReader extends NutchTool implements Tool {
sb.append("title:\t" + page.getTitle()).append("\n");
sb.append("score:\t" + page.getScore()).append("\n");
- Map markers = page.getMarkers();
+ Map markers = page.getMarkers();
sb.append("markers:\t" + markers).append("\n");
sb.append("reprUrl:\t" + page.getReprUrl()).append("\n");
- Utf8 batchId = page.getBatchId();
+ CharSequence batchId = page.getBatchId();
if (batchId != null) {
sb.append("batchId:\t" + batchId.toString()).append("\n");
}
- Map metadata = page.getMetadata();
+ Map metadata = page.getMetadata();
if (metadata != null) {
- Iterator> iterator = metadata.entrySet()
+ Iterator> iterator = metadata.entrySet()
.iterator();
while (iterator.hasNext()) {
- Entry entry = iterator.next();
+ Entry entry = iterator.next();
sb.append("metadata " + entry.getKey().toString()).append(" : \t")
.append(Bytes.toString(entry.getValue())).append("\n");
}
}
if (dumpLinks) {
- Map inlinks = page.getInlinks();
- Map outlinks = page.getOutlinks();
+ Map inlinks = page.getInlinks();
+ Map outlinks = page.getOutlinks();
if (outlinks != null) {
- for (Entry e : outlinks.entrySet()) {
+ for (Entry e : outlinks.entrySet()) {
sb.append("outlink:\t" + e.getKey() + "\t" + e.getValue() + "\n");
}
}
if (inlinks != null) {
- for (Entry e : inlinks.entrySet()) {
+ for (Entry e : inlinks.entrySet()) {
sb.append("inlink:\t" + e.getKey() + "\t" + e.getValue() + "\n");
}
}
}
if (dumpHeaders) {
- Map headers = page.getHeaders();
+ Map headers = page.getHeaders();
if (headers != null) {
- for (Entry e : headers.entrySet()) {
+ for (Entry e : headers.entrySet()) {
sb.append("header:\t" + e.getKey() + "\t" + e.getValue() + "\n");
}
}
@@ -412,7 +406,7 @@ public class WebTableReader extends NutchTool implements Tool {
sb.append(Bytes.toString(content));
sb.append("\ncontent:end:\n");
}
- Utf8 text = page.getText();
+ CharSequence text = page.getText();
if (text != null && dumpText) {
sb.append("text:start:\n");
sb.append(text.toString());
diff --git src/java/org/apache/nutch/fetcher/FetcherReducer.java src/java/org/apache/nutch/fetcher/FetcherReducer.java
index 055d0fd..069bb45 100644
--- src/java/org/apache/nutch/fetcher/FetcherReducer.java
+++ src/java/org/apache/nutch/fetcher/FetcherReducer.java
@@ -16,43 +16,19 @@
******************************************************************************/
package org.apache.nutch.fetcher;
-import java.io.IOException;
-import java.net.InetAddress;
-import java.net.MalformedURLException;
-import java.net.URL;
-import java.net.UnknownHostException;
-import java.nio.ByteBuffer;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.concurrent.atomic.AtomicInteger;
-import java.util.concurrent.atomic.AtomicLong;
-
+import crawlercommons.robots.BaseRobotRules;
import org.apache.avro.util.Utf8;
import org.apache.gora.mapreduce.GoraReducer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.IntWritable;
import org.apache.nutch.crawl.CrawlStatus;
-import org.apache.nutch.crawl.URLWebPage;
import org.apache.nutch.host.HostDb;
import org.apache.nutch.net.URLFilterException;
import org.apache.nutch.net.URLFilters;
import org.apache.nutch.net.URLNormalizers;
import org.apache.nutch.parse.ParseUtil;
import org.apache.nutch.parse.ParserJob;
-import org.apache.nutch.protocol.Content;
-import org.apache.nutch.protocol.Protocol;
-import org.apache.nutch.protocol.ProtocolFactory;
-import org.apache.nutch.protocol.ProtocolOutput;
-import org.apache.nutch.protocol.ProtocolStatusCodes;
-import org.apache.nutch.protocol.ProtocolStatusUtils;
-import org.apache.nutch.protocol.RobotRules;
+import org.apache.nutch.protocol.*;
import org.apache.nutch.storage.Host;
import org.apache.nutch.storage.Mark;
import org.apache.nutch.storage.ProtocolStatus;
@@ -61,7 +37,14 @@ import org.apache.nutch.util.TableUtil;
import org.apache.nutch.util.URLUtil;
import org.slf4j.Logger;
-import crawlercommons.robots.BaseRobotRules;
+import java.io.IOException;
+import java.net.InetAddress;
+import java.net.URL;
+import java.net.UnknownHostException;
+import java.nio.ByteBuffer;
+import java.util.*;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicLong;
public class FetcherReducer
extends GoraReducer {
@@ -478,7 +461,7 @@ extends GoraReducer {
}
}
lastRequestStart.set(System.currentTimeMillis());
- if (!fit.page.isReadable(WebPage.Field.REPR_URL.getIndex())) {
+ if (fit.page.getReprUrl() == null) {
reprUrl = fit.url;
} else {
reprUrl = TableUtil.toString(fit.page.getReprUrl());
@@ -615,8 +598,8 @@ extends GoraReducer {
}
}
- page.putToOutlinks(new Utf8(newUrl), new Utf8());
- page.putToMetadata(FetcherJob.REDIRECT_DISCOVERED, TableUtil.YES_VAL);
+ page.getOutlinks().put(new Utf8(newUrl), new Utf8());
+ page.getMetadata().put(FetcherJob.REDIRECT_DISCOVERED, TableUtil.YES_VAL);
reprUrl = URLUtil.chooseRepr(reprUrl, newUrl, temp);
if (reprUrl == null) {
LOG.warn("reprUrl==null");
@@ -638,7 +621,7 @@ extends GoraReducer {
private void output(FetchItem fit, Content content,
ProtocolStatus pstatus, byte status)
throws IOException, InterruptedException {
- fit.page.setStatus(status);
+ fit.page.setStatus((int)status);
final long prevFetchTime = fit.page.getFetchTime();
fit.page.setPrevFetchTime(prevFetchTime);
fit.page.setFetchTime(System.currentTimeMillis());
diff --git src/java/org/apache/nutch/host/HostDbUpdateReducer.java src/java/org/apache/nutch/host/HostDbUpdateReducer.java
index d471e36..acba4ed 100644
--- src/java/org/apache/nutch/host/HostDbUpdateReducer.java
+++ src/java/org/apache/nutch/host/HostDbUpdateReducer.java
@@ -16,10 +16,6 @@
******************************************************************************/
package org.apache.nutch.host;
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.Set;
-
import org.apache.avro.util.Utf8;
import org.apache.gora.mapreduce.GoraReducer;
import org.apache.hadoop.io.Text;
@@ -29,6 +25,10 @@ import org.apache.nutch.storage.WebPage;
import org.apache.nutch.util.Histogram;
import org.apache.nutch.util.URLUtil;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Set;
+
/**
* Combines all WebPages with the same host key to create a Host object,
* with some statistics.
@@ -58,15 +58,15 @@ public class HostDbUpdateReducer extends GoraReducer inlinks = page.getInlinks().keySet();
- for (Utf8 inlink: inlinks) {
+ Set inlinks = page.getInlinks().keySet();
+ for (CharSequence inlink: inlinks) {
String host = URLUtil.getHost(inlink.toString());
inlinkCount.add(host);
}
}
if (page.getOutlinks() != null) {
- Set outlinks = page.getOutlinks().keySet();
- for (Utf8 outlink: outlinks) {
+ Set outlinks = page.getOutlinks().keySet();
+ for (CharSequence outlink: outlinks) {
String host = URLUtil.getHost(outlink.toString());
outlinkCount.add(host);
}
@@ -76,15 +76,15 @@ public class HostDbUpdateReducer extends GoraReducer 0) {
- host.putToMetadata(new Utf8("f"),ByteBuffer.wrap(Integer.toString(numFetched).getBytes()));
+ host.getMetadata().put(new Utf8("f"),ByteBuffer.wrap(Integer.toString(numFetched).getBytes()));
}
for (String inlink: inlinkCount.getKeys()) {
- host.putToInlinks(new Utf8(inlink), new Utf8(Integer.toString(inlinkCount.getCount(inlink))));
+ host.getInlinks().put(new Utf8(inlink), new Utf8(Integer.toString(inlinkCount.getCount(inlink))));
}
for (String outlink: outlinkCount.getKeys()) {
- host.putToOutlinks(new Utf8(outlink), new Utf8(Integer.toString(outlinkCount.getCount(outlink))));
+ host.getInlinks().put(new Utf8(outlink), new Utf8(Integer.toString(outlinkCount.getCount(outlink))));
}
context.write(key.toString(), host);
diff --git src/java/org/apache/nutch/host/HostInjectorJob.java src/java/org/apache/nutch/host/HostInjectorJob.java
index 4910ae8..8780457 100644
--- src/java/org/apache/nutch/host/HostInjectorJob.java
+++ src/java/org/apache/nutch/host/HostInjectorJob.java
@@ -16,15 +16,6 @@
******************************************************************************/
package org.apache.nutch.host;
-import java.io.IOException;
-import java.net.URL;
-import java.nio.ByteBuffer;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.Set;
-import java.util.TreeMap;
-
import org.apache.avro.util.Utf8;
import org.apache.gora.mapreduce.GoraOutputFormat;
import org.apache.hadoop.conf.Configuration;
@@ -46,6 +37,11 @@ import org.apache.nutch.util.TableUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import java.io.IOException;
+import java.net.URL;
+import java.nio.ByteBuffer;
+import java.util.*;
+
/**
* Creates or updates an existing host table from a text file.
* The files contain one host name per line, optionally followed by custom
@@ -127,7 +123,7 @@ public class HostInjectorJob implements Tool {
while (keysIter.hasNext()) {
String keymd = keysIter.next();
String valuemd = metadata.get(keymd);
- host.putToMetadata(new Utf8(keymd), ByteBuffer.wrap(valuemd.getBytes()));
+ host.getMetadata().put(new Utf8(keymd), ByteBuffer.wrap(valuemd.getBytes()));
}
String hostname;
if (url.indexOf("://")> -1) {
diff --git src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java
index 8e70978..b7bbf97 100644
--- src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java
+++ src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java
@@ -80,12 +80,12 @@ public class IndexingFiltersChecker extends Configured implements Tool {
ProtocolFactory factory = new ProtocolFactory(conf);
Protocol protocol = factory.getProtocol(url);
- WebPage page = new WebPage();
+ WebPage page = WebPage.newBuilder().build();
page.setBaseUrl(new org.apache.avro.util.Utf8(url));
ProtocolOutput protocolOutput = protocol.getProtocolOutput(url, page);
page.setProtocolStatus(protocolOutput.getStatus());
if (protocolOutput.getStatus().getCode() == ProtocolStatusCodes.SUCCESS) {
- page.setStatus(CrawlStatus.STATUS_FETCHED);
+ page.setStatus((int)CrawlStatus.STATUS_FETCHED);
page.setFetchTime(System.currentTimeMillis());
} else {
LOG.error("Fetch failed with protocol status: "
diff --git src/java/org/apache/nutch/parse/ParseStatusUtils.java src/java/org/apache/nutch/parse/ParseStatusUtils.java
index 179e85e..f642980 100644
--- src/java/org/apache/nutch/parse/ParseStatusUtils.java
+++ src/java/org/apache/nutch/parse/ParseStatusUtils.java
@@ -16,22 +16,23 @@
******************************************************************************/
package org.apache.nutch.parse;
-import java.util.HashMap;
-import java.util.Iterator;
-
import org.apache.avro.generic.GenericArray;
import org.apache.avro.util.Utf8;
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.storage.ParseStatus;
import org.apache.nutch.util.TableUtil;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+
public class ParseStatusUtils {
- public static ParseStatus STATUS_SUCCESS = new ParseStatus();
+ public static ParseStatus STATUS_SUCCESS = ParseStatus.newBuilder().build();
public static final HashMap minorCodes = new HashMap();
static {
- STATUS_SUCCESS.setMajorCode(ParseStatusCodes.SUCCESS);
+ STATUS_SUCCESS.setMajorCode((int)ParseStatusCodes.SUCCESS);
minorCodes.put(ParseStatusCodes.SUCCESS_OK, "ok");
minorCodes.put(ParseStatusCodes.SUCCESS_REDIRECT, "redirect");
minorCodes.put(ParseStatusCodes.FAILED_EXCEPTION, "exception");
@@ -52,7 +53,7 @@ public class ParseStatusUtils {
* argument, or null.
*/
public static String getMessage(ParseStatus status) {
- GenericArray args = status.getArgs();
+ List args = status.getArgs();
if (args != null && args.size() > 0) {
return TableUtil.toString(args.iterator().next());
}
@@ -60,12 +61,12 @@ public class ParseStatusUtils {
}
public static String getArg(ParseStatus status, int n) {
- GenericArray args = status.getArgs();
+ List args = status.getArgs();
if (args == null) {
return null;
}
int i = 0;
- for (Utf8 arg : args) {
+ for (CharSequence arg : args) {
if (i == n) {
return TableUtil.toString(arg);
}
@@ -75,19 +76,19 @@ public class ParseStatusUtils {
}
public static Parse getEmptyParse(Exception e, Configuration conf) {
- ParseStatus status = new ParseStatus();
- status.setMajorCode(ParseStatusCodes.FAILED);
- status.setMinorCode(ParseStatusCodes.FAILED_EXCEPTION);
- status.addToArgs(new Utf8(e.toString()));
+ ParseStatus status = ParseStatus.newBuilder().build();
+ status.setMajorCode((int)ParseStatusCodes.FAILED);
+ status.setMinorCode((int)ParseStatusCodes.FAILED_EXCEPTION);
+ status.getArgs().add(new Utf8(e.toString()));
return new Parse("", "", new Outlink[0], status);
}
public static Parse getEmptyParse(int minorCode, String message, Configuration conf) {
ParseStatus status = new ParseStatus();
- status.setMajorCode(ParseStatusCodes.FAILED);
+ status.setMajorCode((int)ParseStatusCodes.FAILED);
status.setMinorCode(minorCode);
- status.addToArgs(new Utf8(message));
+ status.getArgs().add(new Utf8(message));
return new Parse("", "", new Outlink[0], status);
}
@@ -98,13 +99,13 @@ public class ParseStatusUtils {
}
StringBuilder sb = new StringBuilder();
sb.append(ParseStatusCodes.majorCodes[status.getMajorCode()] +
- "/" + minorCodes.get((short)status.getMinorCode()));
+ "/" + minorCodes.get(status.getMinorCode().shortValue()));
sb.append(" (" + status.getMajorCode() + "/" + status.getMinorCode() + ")");
sb.append(", args=[");
- GenericArray args = status.getArgs();
+ List args = status.getArgs();
if (args != null) {
int i = 0;
- Iterator it = args.iterator();
+ Iterator it = args.iterator();
while (it.hasNext()) {
if (i > 0) sb.append(',');
sb.append(it.next());
diff --git src/java/org/apache/nutch/parse/ParseUtil.java src/java/org/apache/nutch/parse/ParseUtil.java
index 04121a6..e517315 100644
--- src/java/org/apache/nutch/parse/ParseUtil.java
+++ src/java/org/apache/nutch/parse/ParseUtil.java
@@ -17,14 +17,8 @@
package org.apache.nutch.parse;
// Commons Logging imports
-import java.net.MalformedURLException;
-import java.net.URL;
-import java.nio.ByteBuffer;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
-import java.util.concurrent.TimeUnit;
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
import org.apache.avro.util.Utf8;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
@@ -43,7 +37,13 @@ import org.apache.nutch.util.URLUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import com.google.common.util.concurrent.ThreadFactoryBuilder;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.nio.ByteBuffer;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
/**
* A Utility class containing methods to simply perform parsing utilities such
@@ -162,7 +162,7 @@ public class ParseUtil extends Configured {
*/
public void process(String key, WebPage page) {
String url = TableUtil.unreverseUrl(key);
- byte status = (byte) page.getStatus();
+ byte status = page.getStatus().byteValue();
if (status != CrawlStatus.STATUS_FETCHED) {
if (LOG.isDebugEnabled()) {
LOG.debug("Skipping " + url + " as status is: " + CrawlStatus.getName(status));
@@ -214,8 +214,8 @@ public class ParseUtil extends Configured {
LOG.warn("malformed url exception parsing redirect " + url);
return;
}
- page.putToOutlinks(new Utf8(newUrl), new Utf8());
- page.putToMetadata(FetcherJob.REDIRECT_DISCOVERED, TableUtil.YES_VAL);
+ page.getOutlinks().put(new Utf8(newUrl), new Utf8());
+ page.getMetadata().put(FetcherJob.REDIRECT_DISCOVERED, TableUtil.YES_VAL);
if (newUrl == null || newUrl.equals(url)) {
String reprUrl = URLUtil.chooseRepr(url, newUrl,
refreshTime < FetcherJob.PERM_REFRESH_TIME);
@@ -265,7 +265,7 @@ public class ParseUtil extends Configured {
continue;
}
Utf8 utf8ToUrl = new Utf8(toUrl);
- if (page.getFromOutlinks(utf8ToUrl) != null) {
+ if (page.getOutlinks().get(utf8ToUrl) != null) {
// skip duplicate outlinks
continue;
}
@@ -281,7 +281,7 @@ public class ParseUtil extends Configured {
}
}
validCount++;
- page.putToOutlinks(utf8ToUrl, new Utf8(outlinks[i].getAnchor()));
+ page.getOutlinks().put(utf8ToUrl, new Utf8(outlinks[i].getAnchor()));
}
Utf8 fetchMark = Mark.FETCH_MARK.checkMark(page);
if (fetchMark != null) {
diff --git src/java/org/apache/nutch/parse/ParserChecker.java src/java/org/apache/nutch/parse/ParserChecker.java
index 53f0e83..c3aac32 100644
--- src/java/org/apache/nutch/parse/ParserChecker.java
+++ src/java/org/apache/nutch/parse/ParserChecker.java
@@ -17,28 +17,24 @@
package org.apache.nutch.parse;
-import java.nio.ByteBuffer;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.Map.Entry;
-
import org.apache.avro.util.Utf8;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.nutch.crawl.SignatureFactory;
-import org.apache.nutch.protocol.Content;
-import org.apache.nutch.protocol.Protocol;
-import org.apache.nutch.protocol.ProtocolFactory;
-import org.apache.nutch.protocol.ProtocolOutput;
-import org.apache.nutch.protocol.ProtocolStatusUtils;
+import org.apache.nutch.protocol.*;
import org.apache.nutch.storage.WebPage;
import org.apache.nutch.util.Bytes;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.nutch.util.StringUtil;
import org.apache.nutch.util.URLUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.nio.ByteBuffer;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Map.Entry;
/**
* Parser checker, useful for testing parser.
@@ -107,7 +103,7 @@ public class ParserChecker implements Tool {
ProtocolFactory factory = new ProtocolFactory(conf);
Protocol protocol = factory.getProtocol(url);
- WebPage page = new WebPage();
+ WebPage page = WebPage.newBuilder().build();
ProtocolOutput protocolOutput = protocol.getProtocolOutput(url, page);
@@ -163,13 +159,13 @@ public class ParserChecker implements Tool {
LOG.info("---------\nUrl\n---------------\n");
System.out.print(url + "\n");
LOG.info("---------\nMetadata\n---------\n");
- Map metadata = page.getMetadata();
+ Map metadata = page.getMetadata();
StringBuffer sb = new StringBuffer();
if (metadata != null) {
- Iterator> iterator = metadata.entrySet()
+ Iterator> iterator = metadata.entrySet()
.iterator();
while (iterator.hasNext()) {
- Entry entry = iterator.next();
+ Entry entry = iterator.next();
sb.append(entry.getKey().toString()).append(" : \t")
.append(Bytes.toString(entry.getValue())).append("\n");
}
diff --git src/java/org/apache/nutch/parse/ParserJob.java src/java/org/apache/nutch/parse/ParserJob.java
index 9f8ce8f..ba17744 100644
--- src/java/org/apache/nutch/parse/ParserJob.java
+++ src/java/org/apache/nutch/parse/ParserJob.java
@@ -102,7 +102,7 @@ public class ParserJob extends NutchTool implements Tool {
@Override
public void map(String key, WebPage page, Context context)
throws IOException, InterruptedException {
- Utf8 mark = Mark.FETCH_MARK.checkMark(page);
+ CharSequence mark = Mark.FETCH_MARK.checkMark(page);
String unreverseKey = TableUtil.unreverseUrl(key);
if (batchId.equals(REPARSE)) {
LOG.debug("Reparsing " + unreverseKey);
@@ -161,7 +161,7 @@ public class ParserJob extends NutchTool implements Tool {
if (content == null) {
return false;
}
- Utf8 lengthUtf8 = page.getFromHeaders(new Utf8(HttpHeaders.CONTENT_LENGTH));
+ CharSequence lengthUtf8 = page.getHeaders().get(new Utf8(HttpHeaders.CONTENT_LENGTH));
if (lengthUtf8 == null) {
return false;
}
diff --git src/java/org/apache/nutch/protocol/ProtocolStatusUtils.java src/java/org/apache/nutch/protocol/ProtocolStatusUtils.java
index 438c967..7f1f11c 100644
--- src/java/org/apache/nutch/protocol/ProtocolStatusUtils.java
+++ src/java/org/apache/nutch/protocol/ProtocolStatusUtils.java
@@ -16,14 +16,15 @@
******************************************************************************/
package org.apache.nutch.protocol;
-import java.net.URL;
-import java.util.Iterator;
-
import org.apache.avro.generic.GenericArray;
import org.apache.avro.util.Utf8;
import org.apache.nutch.storage.ProtocolStatus;
import org.apache.nutch.util.TableUtil;
+import java.net.URL;
+import java.util.Iterator;
+import java.util.List;
+
public class ProtocolStatusUtils implements ProtocolStatusCodes {
// Useful static instances for status codes that don't usually require any
// additional arguments.
@@ -76,15 +77,15 @@ public class ProtocolStatusUtils implements ProtocolStatusCodes {
}
public static ProtocolStatus makeStatus(int code) {
- ProtocolStatus pstatus = new ProtocolStatus();
+ ProtocolStatus pstatus = ProtocolStatus.newBuilder().build();
pstatus.setCode(code);
- pstatus.setLastModified(0);
+ pstatus.setLastModified(0L);
return pstatus;
}
public static ProtocolStatus makeStatus(int code, String message) {
ProtocolStatus pstatus = makeStatus(code);
- pstatus.addToArgs(new Utf8(message));
+ pstatus.getArgs().add(new Utf8(message));
return pstatus;
}
@@ -93,7 +94,7 @@ public class ProtocolStatusUtils implements ProtocolStatusCodes {
}
public static String getMessage(ProtocolStatus pstatus) {
- GenericArray args = pstatus.getArgs();
+ List args = pstatus.getArgs();
if (args == null || args.size() == 0) {
return null;
}
@@ -107,10 +108,10 @@ public class ProtocolStatusUtils implements ProtocolStatusCodes {
StringBuilder sb = new StringBuilder();
sb.append(getName(status.getCode()));
sb.append(", args=[");
- GenericArray args = status.getArgs();
+ List args = status.getArgs();
if (args != null) {
int i = 0;
- Iterator it = args.iterator();
+ Iterator it = args.iterator();
while (it.hasNext()) {
if (i > 0) sb.append(',');
sb.append(it.next());
diff --git src/java/org/apache/nutch/storage/Host.java src/java/org/apache/nutch/storage/Host.java
index 941415f..b577344 100644
--- src/java/org/apache/nutch/storage/Host.java
+++ src/java/org/apache/nutch/storage/Host.java
@@ -1,157 +1,444 @@
-/*******************************************************************************
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
+/**
+ * Autogenerated by Avro
*
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- ******************************************************************************/
+ * DO NOT EDIT DIRECTLY
+ */
package org.apache.nutch.storage;
-import java.nio.ByteBuffer;
-import java.util.Map;
-import org.apache.avro.Schema;
-import org.apache.avro.AvroRuntimeException;
import org.apache.avro.util.Utf8;
-import org.apache.gora.persistency.StateManager;
-import org.apache.gora.persistency.impl.PersistentBase;
-import org.apache.gora.persistency.impl.StateManagerImpl;
-import org.apache.gora.persistency.StatefulHashMap;
import org.apache.nutch.util.Bytes;
@SuppressWarnings("all")
-public class Host extends PersistentBase {
- public static final org.apache.avro.Schema _SCHEMA = org.apache.avro.Schema.parse("{\"type\":\"record\",\"name\":\"Host\",\"namespace\":\"org.apache.nutch.storage\",\"fields\":[{\"name\":\"metadata\",\"type\":{\"type\":\"map\",\"values\":\"bytes\"}},{\"name\":\"outlinks\",\"type\":{\"type\":\"map\",\"values\":\"string\"}},{\"name\":\"inlinks\",\"type\":{\"type\":\"map\",\"values\":\"string\"}}]}");
- public java.util.Map metadata;
- public java.util.Map outlinks;
- public java.util.Map inlinks;
-
+public class Host extends org.apache.gora.persistency.impl.PersistentBase implements org.apache.avro.specific.SpecificRecord, org.apache.gora.persistency.Persistent {
+ public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"Host\",\"namespace\":\"org.apache.nutch.storage\",\"fields\":[{\"name\":\"__g__dirty\",\"type\":\"bytes\",\"doc\":\"Bytes used to represent weather or not a field is dirty.\",\"default\":\"AA==\"},{\"name\":\"metadata\",\"type\":{\"type\":\"map\",\"values\":\"bytes\"},\"default\":{}},{\"name\":\"outlinks\",\"type\":{\"type\":\"map\",\"values\":\"string\"},\"default\":{}},{\"name\":\"inlinks\",\"type\":{\"type\":\"map\",\"values\":\"string\"},\"default\":{}}]}");
+
+ /** Enum containing all data bean's fields. */
public static enum Field {
- METADATA(0,"metadata"),
- OUTLINKS(1,"outlinks"),
- INLINKS(2,"inlinks"),
+ __G__DIRTY(0, "__g__dirty"),
+ METADATA(1, "metadata"),
+ OUTLINKS(2, "outlinks"),
+ INLINKS(3, "inlinks"),
;
+ /**
+ * Field's index.
+ */
private int index;
+
+ /**
+ * Field's name.
+ */
private String name;
+
+ /**
+ * Field's constructor
+ * @param index field's index.
+ * @param name field's name.
+ */
Field(int index, String name) {this.index=index;this.name=name;}
+
+ /**
+ * Gets field's index.
+ * @return int field's index.
+ */
public int getIndex() {return index;}
+
+ /**
+ * Gets field's name.
+ * @return String field's name.
+ */
public String getName() {return name;}
+
+ /**
+ * Gets field's attributes to string.
+ * @return String field's attributes to string.
+ */
public String toString() {return name;}
};
- public static final String[] _ALL_FIELDS = {"metadata","outlinks","inlinks"};
- static {
- PersistentBase.registerFields(Host.class, _ALL_FIELDS);
+
+ public static final String[] _ALL_FIELDS = {
+ "__g__dirty",
+ "metadata",
+ "outlinks",
+ "inlinks",
+ };
+
+ /** Bytes used to represent weather or not a field is dirty. */
+ private java.nio.ByteBuffer __g__dirty = java.nio.ByteBuffer.wrap(new byte[1]);
+ private java.util.Map metadata;
+ private java.util.Map outlinks;
+ private java.util.Map inlinks;
+ public org.apache.avro.Schema getSchema() { return SCHEMA$; }
+ // Used by DatumWriter. Applications should not call.
+ public Object get(int field$) {
+ switch (field$) {
+ case 0: return __g__dirty;
+ case 1: return metadata;
+ case 2: return outlinks;
+ case 3: return inlinks;
+ default: throw new org.apache.avro.AvroRuntimeException("Bad index");
+ }
+ }
+
+ // Used by DatumReader. Applications should not call.
+ @SuppressWarnings(value="unchecked")
+ public void put(int field$, Object value) {
+ switch (field$) {
+ case 0: __g__dirty = (java.nio.ByteBuffer)(value); break;
+ case 1: metadata = (java.util.Map)((value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)value)); break;
+ case 2: outlinks = (java.util.Map)((value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)value)); break;
+ case 3: inlinks = (java.util.Map)((value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)value)); break;
+ default: throw new org.apache.avro.AvroRuntimeException("Bad index");
+ }
}
- public Host() {
- this(new StateManagerImpl());
+ /**
+ * Gets the value of the 'metadata' field.
+ */
+ public java.util.Map getMetadata() {
+ return metadata;
}
- public Host(StateManager stateManager) {
- super(stateManager);
- metadata = new StatefulHashMap();
- inlinks = new StatefulHashMap();
- outlinks = new StatefulHashMap();
+
+ /**
+ * Sets the value of the 'metadata' field.
+ * @param value the value to set.
+ */
+ public void setMetadata(java.util.Map value) {
+ this.metadata = (value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
+ setDirty(1);
}
- public Host newInstance(StateManager stateManager) {
- return new Host(stateManager);
+
+ /**
+ * Checks the dirty status of the 'metadata' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * @param value the value to set.
+ */
+ public boolean isMetadataDirty(java.util.Map value) {
+ return isDirty(1);
}
- public Schema getSchema() { return _SCHEMA; }
- public Object get(int _field) {
- switch (_field) {
- case 0: return metadata;
- case 1: return outlinks;
- case 2: return inlinks;
- default: throw new AvroRuntimeException("Bad index");
- }
+
+ /**
+ * Gets the value of the 'outlinks' field.
+ */
+ public java.util.Map getOutlinks() {
+ return outlinks;
}
- @SuppressWarnings(value="unchecked")
- public void put(int _field, Object _value) {
-
- if(isFieldEqual(_field, _value)) return;
- getStateManager().setDirty(this, _field);
- switch (_field) {
- case 0: metadata = (Map)_value; break;
- case 1: outlinks = (Map)_value; break;
- case 2: inlinks = (Map)_value; break;
- default: throw new AvroRuntimeException("Bad index");
- }
- }
- @SuppressWarnings("unchecked")
- public Map getMetadata() {
- return (Map) get(0);
- }
- public ByteBuffer getFromMetadata(Utf8 key) {
- if (metadata == null) { return null; }
- return metadata.get(key);
+
+ /**
+ * Sets the value of the 'outlinks' field.
+ * @param value the value to set.
+ */
+ public void setOutlinks(java.util.Map value) {
+ this.outlinks = (value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
+ setDirty(2);
}
- public void putToMetadata(Utf8 key, ByteBuffer value) {
- getStateManager().setDirty(this, 0);
- metadata.put(key, value);
+ /**
+ * Checks the dirty status of the 'outlinks' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * @param value the value to set.
+ */
+ public boolean isOutlinksDirty(java.util.Map value) {
+ return isDirty(2);
}
- public ByteBuffer removeFromMetadata(Utf8 key) {
- if (metadata == null) { return null; }
- getStateManager().setDirty(this, 0);
- return metadata.remove(key);
+
+ /**
+ * Gets the value of the 'inlinks' field.
+ */
+ public java.util.Map getInlinks() {
+ return inlinks;
}
- @SuppressWarnings("unchecked")
- public Map getOutlinks() {
- return (Map) get(1);
+
+ /**
+ * Sets the value of the 'inlinks' field.
+ * @param value the value to set.
+ */
+ public void setInlinks(java.util.Map value) {
+ this.inlinks = (value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
+ setDirty(3);
}
- public Utf8 getFromOutlinks(Utf8 key) {
- if (outlinks == null) { return null; }
- return outlinks.get(key);
+
+ /**
+ * Checks the dirty status of the 'inlinks' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * @param value the value to set.
+ */
+ public boolean isInlinksDirty(java.util.Map value) {
+ return isDirty(3);
}
- public void putToOutlinks(Utf8 key, Utf8 value) {
- getStateManager().setDirty(this, 1);
- outlinks.put(key, value);
+
+ public boolean contains(String key) {
+ return metadata.containsKey(new Utf8(key));
}
- public Utf8 removeFromOutlinks(Utf8 key) {
- if (outlinks == null) { return null; }
- getStateManager().setDirty(this, 1);
- return outlinks.remove(key);
+
+ public String getValue(String key, String defaultValue) {
+ if (!contains(key))
+ return defaultValue;
+ return Bytes.toString(metadata.get(new Utf8(key)));
}
- @SuppressWarnings("unchecked")
- public Map getInlinks() {
- return (Map) get(2);
+
+ public int getInt(String key, int defaultValue) {
+ if (!contains(key))
+ return defaultValue;
+ return Integer.parseInt(getValue(key, null));
}
- public Utf8 getFromInlinks(Utf8 key) {
- if (inlinks == null) { return null; }
- return inlinks.get(key);
+
+ public long getLong(String key, long defaultValue) {
+ if (!contains(key))
+ return defaultValue;
+ return Long.parseLong(getValue(key, null));
}
- public void putToInlinks(Utf8 key, Utf8 value) {
- getStateManager().setDirty(this, 2);
- inlinks.put(key, value);
+
+ /** Creates a new Host RecordBuilder */
+ public static Builder newBuilder() {
+ return new Builder();
}
- public Utf8 removeFromInlinks(Utf8 key) {
- if (inlinks == null) { return null; }
- getStateManager().setDirty(this, 2);
- return inlinks.remove(key);
+
+ /** Creates a new Host RecordBuilder by copying an existing Builder */
+ public static Builder newBuilder(Builder other) {
+ return new Builder(other);
}
- public boolean contains(String key) {
- return metadata.containsKey(new Utf8(key));
+ /** Creates a new Host RecordBuilder by copying an existing Host instance */
+ public static Builder newBuilder(Host other) {
+ return new Builder(other);
}
- public String getValue(String key, String defaultValue) {
- if (!contains(key)) return defaultValue;
- return Bytes.toString(metadata.get(new Utf8(key)));
+ private static java.nio.ByteBuffer deepCopyToWriteOnlyBuffer(
+ java.nio.ByteBuffer input) {
+ java.nio.ByteBuffer copy = java.nio.ByteBuffer.allocate(input.capacity());
+ int position = input.position();
+ input.reset();
+ int mark = input.position();
+ int limit = input.limit();
+ input.rewind();
+ input.limit(input.capacity());
+ copy.put(input);
+ input.rewind();
+ copy.rewind();
+ input.position(mark);
+ input.mark();
+ copy.position(mark);
+ copy.mark();
+ input.position(position);
+ copy.position(position);
+ input.limit(limit);
+ copy.limit(limit);
+ return copy.asReadOnlyBuffer();
}
- public int getInt(String key, int defaultValue) {
- if (!contains(key)) return defaultValue;
- return Integer.parseInt(getValue(key,null));
+ /**
+ * RecordBuilder for Host instances.
+ */
+ public static class Builder extends org.apache.avro.specific.SpecificRecordBuilderBase
+ implements org.apache.avro.data.RecordBuilder {
+
+ private java.nio.ByteBuffer __g__dirty;
+ private java.util.Map metadata;
+ private java.util.Map outlinks;
+ private java.util.Map inlinks;
+
+ /** Creates a new Builder */
+ private Builder() {
+ super(Host.SCHEMA$);
+ }
+
+ /** Creates a Builder by copying an existing Builder */
+ private Builder(Builder other) {
+ super(other);
+ }
+
+ /** Creates a Builder by copying an existing Host instance */
+ private Builder(Host other) {
+ super(Host.SCHEMA$);
+ if (isValidValue(fields()[0], other.__g__dirty)) {
+ this.__g__dirty = (java.nio.ByteBuffer) data().deepCopy(fields()[0].schema(), other.__g__dirty);
+ fieldSetFlags()[0] = true;
+ }
+ if (isValidValue(fields()[1], other.metadata)) {
+ this.metadata = (java.util.Map) data().deepCopy(fields()[1].schema(), other.metadata);
+ fieldSetFlags()[1] = true;
+ }
+ if (isValidValue(fields()[2], other.outlinks)) {
+ this.outlinks = (java.util.Map) data().deepCopy(fields()[2].schema(), other.outlinks);
+ fieldSetFlags()[2] = true;
+ }
+ if (isValidValue(fields()[3], other.inlinks)) {
+ this.inlinks = (java.util.Map) data().deepCopy(fields()[3].schema(), other.inlinks);
+ fieldSetFlags()[3] = true;
+ }
+ }
+
+ /** Gets the value of the 'metadata' field */
+ public java.util.Map getMetadata() {
+ return metadata;
+ }
+
+ /** Sets the value of the 'metadata' field */
+ public Builder setMetadata(java.util.Map value) {
+ validate(fields()[1], value);
+ this.metadata = value;
+ fieldSetFlags()[1] = true;
+ return this;
+ }
+
+ /** Checks whether the 'metadata' field has been set */
+ public boolean hasMetadata() {
+ return fieldSetFlags()[1];
+ }
+
+ /** Clears the value of the 'metadata' field */
+ public Builder clearMetadata() {
+ metadata = null;
+ fieldSetFlags()[1] = false;
+ return this;
+ }
+
+ /** Gets the value of the 'outlinks' field */
+ public java.util.Map getOutlinks() {
+ return outlinks;
+ }
+
+ /** Sets the value of the 'outlinks' field */
+ public Builder setOutlinks(java.util.Map value) {
+ validate(fields()[2], value);
+ this.outlinks = value;
+ fieldSetFlags()[2] = true;
+ return this;
+ }
+
+ /** Checks whether the 'outlinks' field has been set */
+ public boolean hasOutlinks() {
+ return fieldSetFlags()[2];
+ }
+
+ /** Clears the value of the 'outlinks' field */
+ public Builder clearOutlinks() {
+ outlinks = null;
+ fieldSetFlags()[2] = false;
+ return this;
+ }
+
+ /** Gets the value of the 'inlinks' field */
+ public java.util.Map getInlinks() {
+ return inlinks;
+ }
+
+ /** Sets the value of the 'inlinks' field */
+ public Builder setInlinks(java.util.Map value) {
+ validate(fields()[3], value);
+ this.inlinks = value;
+ fieldSetFlags()[3] = true;
+ return this;
+ }
+
+ /** Checks whether the 'inlinks' field has been set */
+ public boolean hasInlinks() {
+ return fieldSetFlags()[3];
+ }
+
+ /** Clears the value of the 'inlinks' field */
+ public Builder clearInlinks() {
+ inlinks = null;
+ fieldSetFlags()[3] = false;
+ return this;
+ }
+
+ @Override
+ public Host build() {
+ try {
+ Host record = new Host();
+ record.__g__dirty = fieldSetFlags()[0] ? this.__g__dirty : (java.nio.ByteBuffer) java.nio.ByteBuffer.wrap(new byte[1]);
+ record.metadata = fieldSetFlags()[1] ? this.metadata : (java.util.Map) new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)defaultValue(fields()[1]));
+ record.outlinks = fieldSetFlags()[2] ? this.outlinks : (java.util.Map) new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)defaultValue(fields()[2]));
+ record.inlinks = fieldSetFlags()[3] ? this.inlinks : (java.util.Map) new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)defaultValue(fields()[3]));
+ return record;
+ } catch (Exception e) {
+ throw new org.apache.avro.AvroRuntimeException(e);
+ }
+ }
}
- public long getLong(String key, long defaultValue) {
- if (!contains(key)) return defaultValue;
- return Long.parseLong(getValue(key,null));
+
+ public Tombstone getTombstone(){
+ return TOMBSTONE;
+ }
+
+ public Host newInstance(){
+ return newBuilder().build();
+ }
+
+ private static final Tombstone TOMBSTONE = new Tombstone();
+
+ public static final class Tombstone extends Host implements org.apache.gora.persistency.Tombstone {
+
+ private Tombstone() { }
+
+ /**
+ * Gets the value of the 'metadata' field.
+ */
+ public java.util.Map getMetadata() {
+ throw new UnsupportedOperationException("Get is not supported on tombstones");
+ }
+
+ /**
+ * Sets the value of the 'metadata' field.
+ * @param value the value to set.
+ */
+ public void setMetadata(java.util.Map value) {
+ throw new UnsupportedOperationException("Set is not supported on tombstones");
+ }
+
+ /**
+ * Checks the dirty status of the 'metadata' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * @param value the value to set.
+ */
+ public boolean isMetadataDirty(java.util.Map value) {
+ throw new UnsupportedOperationException("IsDirty is not supported on tombstones");
+ }
+
+ /**
+ * Gets the value of the 'outlinks' field.
+ */
+ public java.util.Map getOutlinks() {
+ throw new UnsupportedOperationException("Get is not supported on tombstones");
+ }
+
+ /**
+ * Sets the value of the 'outlinks' field.
+ * @param value the value to set.
+ */
+ public void setOutlinks(java.util.Map value) {
+ throw new UnsupportedOperationException("Set is not supported on tombstones");
+ }
+
+ /**
+ * Checks the dirty status of the 'outlinks' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * @param value the value to set.
+ */
+ public boolean isOutlinksDirty(java.util.Map value) {
+ throw new UnsupportedOperationException("IsDirty is not supported on tombstones");
+ }
+
+ /**
+ * Gets the value of the 'inlinks' field.
+ */
+ public java.util.Map getInlinks() {
+ throw new UnsupportedOperationException("Get is not supported on tombstones");
+ }
+
+ /**
+ * Sets the value of the 'inlinks' field.
+ * @param value the value to set.
+ */
+ public void setInlinks(java.util.Map value) {
+ throw new UnsupportedOperationException("Set is not supported on tombstones");
+ }
+
+ /**
+ * Checks the dirty status of the 'inlinks' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * @param value the value to set.
+ */
+ public boolean isInlinksDirty(java.util.Map value) {
+ throw new UnsupportedOperationException("IsDirty is not supported on tombstones");
+ }
+
+
}
-}
+
+}
\ No newline at end of file
diff --git src/java/org/apache/nutch/storage/Mark.java src/java/org/apache/nutch/storage/Mark.java
index f20ed9c..d334304 100644
--- src/java/org/apache/nutch/storage/Mark.java
+++ src/java/org/apache/nutch/storage/Mark.java
@@ -29,7 +29,7 @@ public enum Mark {
}
public void putMark(WebPage page, Utf8 markValue) {
- page.putToMarkers(name, markValue);
+ page.getMarkers().put(name, markValue);
}
public void putMark(WebPage page, String markValue) {
@@ -37,11 +37,11 @@ public enum Mark {
}
public Utf8 removeMark(WebPage page) {
- return page.removeFromMarkers(name);
+ return (Utf8) page.getMarkers().put(name, null);
}
public Utf8 checkMark(WebPage page) {
- return page.getFromMarkers(name);
+ return (Utf8) page.getMarkers().get(name);
}
/**
@@ -50,8 +50,8 @@ public enum Mark {
* @return If the mark was present.
*/
public Utf8 removeMarkIfExist(WebPage page) {
- if (page.getFromMarkers(name) != null) {
- return page.removeFromMarkers(name);
+ if (checkMark(page) != null) {
+ return removeMark(page);
}
return null;
}
diff --git src/java/org/apache/nutch/storage/ParseStatus.java src/java/org/apache/nutch/storage/ParseStatus.java
index 68dc651..60e8afc 100644
--- src/java/org/apache/nutch/storage/ParseStatus.java
+++ src/java/org/apache/nutch/storage/ParseStatus.java
@@ -1,113 +1,416 @@
-/*******************************************************************************
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
+/**
+ * Autogenerated by Avro
*
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- ******************************************************************************/
-
-package org.apache.nutch.storage;
-
-import java.nio.ByteBuffer;
-import java.util.Map;
-import java.util.HashMap;
-import org.apache.avro.Protocol;
-import org.apache.avro.Schema;
-import org.apache.avro.AvroRuntimeException;
-import org.apache.avro.Protocol;
-import org.apache.avro.util.Utf8;
-import org.apache.avro.ipc.AvroRemoteException;
-import org.apache.avro.generic.GenericArray;
-import org.apache.avro.specific.FixedSize;
-import org.apache.avro.specific.SpecificExceptionBase;
-import org.apache.avro.specific.SpecificRecordBase;
-import org.apache.avro.specific.SpecificRecord;
-import org.apache.avro.specific.SpecificFixed;
-import org.apache.gora.persistency.StateManager;
-import org.apache.gora.persistency.impl.PersistentBase;
-import org.apache.gora.persistency.impl.StateManagerImpl;
-import org.apache.gora.persistency.StatefulHashMap;
-import org.apache.gora.persistency.ListGenericArray;
-
+ * DO NOT EDIT DIRECTLY
+ */
+package org.apache.nutch.storage;
@SuppressWarnings("all")
-public class ParseStatus extends PersistentBase {
- public static final Schema _SCHEMA = Schema.parse("{\"type\":\"record\",\"name\":\"ParseStatus\",\"namespace\":\"org.apache.nutch.storage\",\"fields\":[{\"name\":\"majorCode\",\"type\":\"int\"},{\"name\":\"minorCode\",\"type\":\"int\"},{\"name\":\"args\",\"type\":{\"type\":\"array\",\"items\":\"string\"}}]}");
+public class ParseStatus extends org.apache.gora.persistency.impl.PersistentBase implements org.apache.avro.specific.SpecificRecord, org.apache.gora.persistency.Persistent {
+ public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"ParseStatus\",\"namespace\":\"org.apache.nutch.storage\",\"fields\":[{\"name\":\"__g__dirty\",\"type\":\"bytes\",\"doc\":\"Bytes used to represent weather or not a field is dirty.\",\"default\":\"AA==\"},{\"name\":\"majorCode\",\"type\":\"int\",\"default\":0},{\"name\":\"minorCode\",\"type\":\"int\",\"default\":0},{\"name\":\"args\",\"type\":{\"type\":\"array\",\"items\":\"string\"},\"default\":[]}]}");
+
+ /** Enum containing all data bean's fields. */
public static enum Field {
- MAJOR_CODE(0,"majorCode"),
- MINOR_CODE(1,"minorCode"),
- ARGS(2,"args"),
+ __G__DIRTY(0, "__g__dirty"),
+ MAJOR_CODE(1, "majorCode"),
+ MINOR_CODE(2, "minorCode"),
+ ARGS(3, "args"),
;
+ /**
+ * Field's index.
+ */
private int index;
+
+ /**
+ * Field's name.
+ */
private String name;
+
+ /**
+ * Field's constructor
+ * @param index field's index.
+ * @param name field's name.
+ */
Field(int index, String name) {this.index=index;this.name=name;}
+
+ /**
+ * Gets field's index.
+ * @return int field's index.
+ */
public int getIndex() {return index;}
+
+ /**
+ * Gets field's name.
+ * @return String field's name.
+ */
public String getName() {return name;}
+
+ /**
+ * Gets field's attributes to string.
+ * @return String field's attributes to string.
+ */
public String toString() {return name;}
};
- public static final String[] _ALL_FIELDS = {"majorCode","minorCode","args",};
- static {
- PersistentBase.registerFields(ParseStatus.class, _ALL_FIELDS);
- }
+
+ public static final String[] _ALL_FIELDS = {
+ "__g__dirty",
+ "majorCode",
+ "minorCode",
+ "args",
+ };
+
+ /** Bytes used to represent weather or not a field is dirty. */
+ private java.nio.ByteBuffer __g__dirty = java.nio.ByteBuffer.wrap(new byte[1]);
private int majorCode;
private int minorCode;
- private GenericArray args;
- public ParseStatus() {
- this(new StateManagerImpl());
- }
- public ParseStatus(StateManager stateManager) {
- super(stateManager);
- args = new ListGenericArray(getSchema().getField("args").schema());
- }
- public ParseStatus newInstance(StateManager stateManager) {
- return new ParseStatus(stateManager);
- }
- public Schema getSchema() { return _SCHEMA; }
- public Object get(int _field) {
- switch (_field) {
- case 0: return majorCode;
- case 1: return minorCode;
- case 2: return args;
- default: throw new AvroRuntimeException("Bad index");
+ private java.util.List args;
+ public org.apache.avro.Schema getSchema() { return SCHEMA$; }
+ // Used by DatumWriter. Applications should not call.
+ public Object get(int field$) {
+ switch (field$) {
+ case 0: return __g__dirty;
+ case 1: return majorCode;
+ case 2: return minorCode;
+ case 3: return args;
+ default: throw new org.apache.avro.AvroRuntimeException("Bad index");
}
}
+
+ // Used by DatumReader. Applications should not call.
@SuppressWarnings(value="unchecked")
- public void put(int _field, Object _value) {
- if(isFieldEqual(_field, _value)) return;
- getStateManager().setDirty(this, _field);
- switch (_field) {
- case 0:majorCode = (Integer)_value; break;
- case 1:minorCode = (Integer)_value; break;
- case 2:args = (GenericArray)_value; break;
- default: throw new AvroRuntimeException("Bad index");
+ public void put(int field$, Object value) {
+ switch (field$) {
+ case 0: __g__dirty = (java.nio.ByteBuffer)(value); break;
+ case 1: majorCode = (Integer)(value); break;
+ case 2: minorCode = (Integer)(value); break;
+ case 3: args = (java.util.List)((value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyListWrapper((java.util.List)value)); break;
+ default: throw new org.apache.avro.AvroRuntimeException("Bad index");
}
}
- public int getMajorCode() {
- return (Integer) get(0);
+
+ /**
+ * Gets the value of the 'majorCode' field.
+ */
+ public Integer getMajorCode() {
+ return majorCode;
+ }
+
+ /**
+ * Sets the value of the 'majorCode' field.
+ * @param value the value to set.
+ */
+ public void setMajorCode(Integer value) {
+ this.majorCode = value;
+ setDirty(1);
+ }
+
+ /**
+ * Checks the dirty status of the 'majorCode' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * @param value the value to set.
+ */
+ public boolean isMajorCodeDirty(Integer value) {
+ return isDirty(1);
+ }
+
+ /**
+ * Gets the value of the 'minorCode' field.
+ */
+ public Integer getMinorCode() {
+ return minorCode;
+ }
+
+ /**
+ * Sets the value of the 'minorCode' field.
+ * @param value the value to set.
+ */
+ public void setMinorCode(Integer value) {
+ this.minorCode = value;
+ setDirty(2);
+ }
+
+ /**
+ * Checks the dirty status of the 'minorCode' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * @param value the value to set.
+ */
+ public boolean isMinorCodeDirty(Integer value) {
+ return isDirty(2);
+ }
+
+ /**
+ * Gets the value of the 'args' field.
+ */
+ public java.util.List getArgs() {
+ return args;
+ }
+
+ /**
+ * Sets the value of the 'args' field.
+ * @param value the value to set.
+ */
+ public void setArgs(java.util.List value) {
+ this.args = (value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyListWrapper(value);
+ setDirty(3);
+ }
+
+ /**
+ * Checks the dirty status of the 'args' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * @param value the value to set.
+ */
+ public boolean isArgsDirty(java.util.List value) {
+ return isDirty(3);
}
- public void setMajorCode(int value) {
- put(0, value);
+
+ /** Creates a new ParseStatus RecordBuilder */
+ public static Builder newBuilder() {
+ return new Builder();
}
- public int getMinorCode() {
- return (Integer) get(1);
+
+ /** Creates a new ParseStatus RecordBuilder by copying an existing Builder */
+ public static Builder newBuilder(Builder other) {
+ return new Builder(other);
}
- public void setMinorCode(int value) {
- put(1, value);
+
+ /** Creates a new ParseStatus RecordBuilder by copying an existing ParseStatus instance */
+ public static Builder newBuilder(ParseStatus other) {
+ return new Builder(other);
}
- @SuppressWarnings("unchecked")
- public GenericArray getArgs() {
- return (GenericArray) get(2);
+
+ private static java.nio.ByteBuffer deepCopyToWriteOnlyBuffer(
+ java.nio.ByteBuffer input) {
+ java.nio.ByteBuffer copy = java.nio.ByteBuffer.allocate(input.capacity());
+ int position = input.position();
+ input.reset();
+ int mark = input.position();
+ int limit = input.limit();
+ input.rewind();
+ input.limit(input.capacity());
+ copy.put(input);
+ input.rewind();
+ copy.rewind();
+ input.position(mark);
+ input.mark();
+ copy.position(mark);
+ copy.mark();
+ input.position(position);
+ copy.position(position);
+ input.limit(limit);
+ copy.limit(limit);
+ return copy.asReadOnlyBuffer();
}
- public void addToArgs(Utf8 element) {
- getStateManager().setDirty(this, 2);
- args.add(element);
+
+ /**
+ * RecordBuilder for ParseStatus instances.
+ */
+ public static class Builder extends org.apache.avro.specific.SpecificRecordBuilderBase
+ implements org.apache.avro.data.RecordBuilder {
+
+ private java.nio.ByteBuffer __g__dirty;
+ private int majorCode;
+ private int minorCode;
+ private java.util.List args;
+
+ /** Creates a new Builder */
+ private Builder() {
+ super(ParseStatus.SCHEMA$);
+ }
+
+ /** Creates a Builder by copying an existing Builder */
+ private Builder(Builder other) {
+ super(other);
+ }
+
+ /** Creates a Builder by copying an existing ParseStatus instance */
+ private Builder(ParseStatus other) {
+ super(ParseStatus.SCHEMA$);
+ if (isValidValue(fields()[0], other.__g__dirty)) {
+ this.__g__dirty = (java.nio.ByteBuffer) data().deepCopy(fields()[0].schema(), other.__g__dirty);
+ fieldSetFlags()[0] = true;
+ }
+ if (isValidValue(fields()[1], other.majorCode)) {
+ this.majorCode = (Integer) data().deepCopy(fields()[1].schema(), other.majorCode);
+ fieldSetFlags()[1] = true;
+ }
+ if (isValidValue(fields()[2], other.minorCode)) {
+ this.minorCode = (Integer) data().deepCopy(fields()[2].schema(), other.minorCode);
+ fieldSetFlags()[2] = true;
+ }
+ if (isValidValue(fields()[3], other.args)) {
+ this.args = (java.util.List) data().deepCopy(fields()[3].schema(), other.args);
+ fieldSetFlags()[3] = true;
+ }
+ }
+
+ /** Gets the value of the 'majorCode' field */
+ public Integer getMajorCode() {
+ return majorCode;
+ }
+
+ /** Sets the value of the 'majorCode' field */
+ public Builder setMajorCode(int value) {
+ validate(fields()[1], value);
+ this.majorCode = value;
+ fieldSetFlags()[1] = true;
+ return this;
+ }
+
+ /** Checks whether the 'majorCode' field has been set */
+ public boolean hasMajorCode() {
+ return fieldSetFlags()[1];
+ }
+
+ /** Clears the value of the 'majorCode' field */
+ public Builder clearMajorCode() {
+ fieldSetFlags()[1] = false;
+ return this;
+ }
+
+ /** Gets the value of the 'minorCode' field */
+ public Integer getMinorCode() {
+ return minorCode;
+ }
+
+ /** Sets the value of the 'minorCode' field */
+ public Builder setMinorCode(int value) {
+ validate(fields()[2], value);
+ this.minorCode = value;
+ fieldSetFlags()[2] = true;
+ return this;
+ }
+
+ /** Checks whether the 'minorCode' field has been set */
+ public boolean hasMinorCode() {
+ return fieldSetFlags()[2];
+ }
+
+ /** Clears the value of the 'minorCode' field */
+ public Builder clearMinorCode() {
+ fieldSetFlags()[2] = false;
+ return this;
+ }
+
+ /** Gets the value of the 'args' field */
+ public java.util.List getArgs() {
+ return args;
+ }
+
+ /** Sets the value of the 'args' field */
+ public Builder setArgs(java.util.List value) {
+ validate(fields()[3], value);
+ this.args = value;
+ fieldSetFlags()[3] = true;
+ return this;
+ }
+
+ /** Checks whether the 'args' field has been set */
+ public boolean hasArgs() {
+ return fieldSetFlags()[3];
+ }
+
+ /** Clears the value of the 'args' field */
+ public Builder clearArgs() {
+ args = null;
+ fieldSetFlags()[3] = false;
+ return this;
+ }
+
+ @Override
+ public ParseStatus build() {
+ try {
+ ParseStatus record = new ParseStatus();
+ record.__g__dirty = fieldSetFlags()[0] ? this.__g__dirty : (java.nio.ByteBuffer) java.nio.ByteBuffer.wrap(new byte[1]);
+ record.majorCode = fieldSetFlags()[1] ? this.majorCode : (Integer) defaultValue(fields()[1]);
+ record.minorCode = fieldSetFlags()[2] ? this.minorCode : (Integer) defaultValue(fields()[2]);
+ record.args = fieldSetFlags()[3] ? this.args : (java.util.List) new org.apache.gora.persistency.impl.DirtyListWrapper((java.util.List)defaultValue(fields()[3]));
+ return record;
+ } catch (Exception e) {
+ throw new org.apache.avro.AvroRuntimeException(e);
+ }
+ }
+ }
+
+ public Tombstone getTombstone(){
+ return TOMBSTONE;
+ }
+
+ public ParseStatus newInstance(){
+ return newBuilder().build();
+ }
+
+ private static final Tombstone TOMBSTONE = new Tombstone();
+
+ public static final class Tombstone extends ParseStatus implements org.apache.gora.persistency.Tombstone {
+
+ private Tombstone() { }
+
+ /**
+ * Gets the value of the 'majorCode' field.
+ */
+ public Integer getMajorCode() {
+ throw new UnsupportedOperationException("Get is not supported on tombstones");
+ }
+
+ /**
+ * Sets the value of the 'majorCode' field.
+ * @param value the value to set.
+ */
+ public void setMajorCode(Integer value) {
+ throw new UnsupportedOperationException("Set is not supported on tombstones");
+ }
+
+ /**
+ * Checks the dirty status of the 'majorCode' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * @param value the value to set.
+ */
+ public boolean isMajorCodeDirty(Integer value) {
+ throw new UnsupportedOperationException("IsDirty is not supported on tombstones");
+ }
+
+ /**
+ * Gets the value of the 'minorCode' field.
+ */
+ public Integer getMinorCode() {
+ throw new UnsupportedOperationException("Get is not supported on tombstones");
+ }
+
+ /**
+ * Sets the value of the 'minorCode' field.
+ * @param value the value to set.
+ */
+ public void setMinorCode(Integer value) {
+ throw new UnsupportedOperationException("Set is not supported on tombstones");
+ }
+
+ /**
+ * Checks the dirty status of the 'minorCode' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * @param value the value to set.
+ */
+ public boolean isMinorCodeDirty(Integer value) {
+ throw new UnsupportedOperationException("IsDirty is not supported on tombstones");
+ }
+
+ /**
+ * Gets the value of the 'args' field.
+ */
+ public java.util.List getArgs() {
+ throw new UnsupportedOperationException("Get is not supported on tombstones");
+ }
+
+ /**
+ * Sets the value of the 'args' field.
+ * @param value the value to set.
+ */
+ public void setArgs(java.util.List value) {
+ throw new UnsupportedOperationException("Set is not supported on tombstones");
+ }
+
+ /**
+ * Checks the dirty status of the 'args' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * @param value the value to set.
+ */
+ public boolean isArgsDirty(java.util.List value) {
+ throw new UnsupportedOperationException("IsDirty is not supported on tombstones");
+ }
+
+
}
-}
+
+}
\ No newline at end of file
diff --git src/java/org/apache/nutch/storage/ProtocolStatus.java src/java/org/apache/nutch/storage/ProtocolStatus.java
index b0845b6..d8acaa9 100644
--- src/java/org/apache/nutch/storage/ProtocolStatus.java
+++ src/java/org/apache/nutch/storage/ProtocolStatus.java
@@ -1,122 +1,428 @@
-/*******************************************************************************
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
+/**
+ * Autogenerated by Avro
*
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- ******************************************************************************/
-
-package org.apache.nutch.storage;
-
-import java.nio.ByteBuffer;
-import java.util.Map;
-import java.util.HashMap;
-import org.apache.avro.Protocol;
-import org.apache.avro.Schema;
-import org.apache.avro.AvroRuntimeException;
-import org.apache.avro.Protocol;
-import org.apache.avro.util.Utf8;
-import org.apache.avro.ipc.AvroRemoteException;
-import org.apache.avro.generic.GenericArray;
-import org.apache.avro.specific.FixedSize;
-import org.apache.avro.specific.SpecificExceptionBase;
-import org.apache.avro.specific.SpecificRecordBase;
-import org.apache.avro.specific.SpecificRecord;
-import org.apache.avro.specific.SpecificFixed;
-import org.apache.gora.persistency.StateManager;
-import org.apache.gora.persistency.impl.PersistentBase;
-import org.apache.gora.persistency.impl.StateManagerImpl;
-import org.apache.gora.persistency.StatefulHashMap;
-import org.apache.gora.persistency.ListGenericArray;
+ * DO NOT EDIT DIRECTLY
+ */
+package org.apache.nutch.storage;
+
import org.apache.nutch.protocol.ProtocolStatusUtils;
@SuppressWarnings("all")
-public class ProtocolStatus extends PersistentBase {
- public static final Schema _SCHEMA = Schema.parse("{\"type\":\"record\",\"name\":\"ProtocolStatus\",\"namespace\":\"org.apache.nutch.storage\",\"fields\":[{\"name\":\"code\",\"type\":\"int\"},{\"name\":\"args\",\"type\":{\"type\":\"array\",\"items\":\"string\"}},{\"name\":\"lastModified\",\"type\":\"long\"}]}");
+public class ProtocolStatus extends org.apache.gora.persistency.impl.PersistentBase implements org.apache.avro.specific.SpecificRecord, org.apache.gora.persistency.Persistent {
+ public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"ProtocolStatus\",\"namespace\":\"org.apache.nutch.storage\",\"fields\":[{\"name\":\"__g__dirty\",\"type\":\"bytes\",\"doc\":\"Bytes used to represent weather or not a field is dirty.\",\"default\":\"AA==\"},{\"name\":\"code\",\"type\":\"int\",\"default\":0},{\"name\":\"args\",\"type\":{\"type\":\"array\",\"items\":\"string\"},\"default\":[]},{\"name\":\"lastModified\",\"type\":\"long\",\"default\":0}]}");
+
+ /** Enum containing all data bean's fields. */
public static enum Field {
- CODE(0,"code"),
- ARGS(1,"args"),
- LAST_MODIFIED(2,"lastModified"),
+ __G__DIRTY(0, "__g__dirty"),
+ CODE(1, "code"),
+ ARGS(2, "args"),
+ LAST_MODIFIED(3, "lastModified"),
;
+ /**
+ * Field's index.
+ */
private int index;
+
+ /**
+ * Field's name.
+ */
private String name;
+
+ /**
+ * Field's constructor
+ * @param index field's index.
+ * @param name field's name.
+ */
Field(int index, String name) {this.index=index;this.name=name;}
+
+ /**
+ * Gets field's index.
+ * @return int field's index.
+ */
public int getIndex() {return index;}
+
+ /**
+ * Gets field's name.
+ * @return String field's name.
+ */
public String getName() {return name;}
+
+ /**
+ * Gets field's attributes to string.
+ * @return String field's attributes to string.
+ */
public String toString() {return name;}
};
- public static final String[] _ALL_FIELDS = {"code","args","lastModified",};
- static {
- PersistentBase.registerFields(ProtocolStatus.class, _ALL_FIELDS);
- }
+
+ public static final String[] _ALL_FIELDS = {
+ "__g__dirty",
+ "code",
+ "args",
+ "lastModified",
+ };
+
+ /** Bytes used to represent weather or not a field is dirty. */
+ private java.nio.ByteBuffer __g__dirty = java.nio.ByteBuffer.wrap(new byte[1]);
private int code;
- private GenericArray args;
+ private java.util.List args;
private long lastModified;
- public ProtocolStatus() {
- this(new StateManagerImpl());
- }
- public ProtocolStatus(StateManager stateManager) {
- super(stateManager);
- args = new ListGenericArray(getSchema().getField("args").schema());
- }
- public ProtocolStatus newInstance(StateManager stateManager) {
- return new ProtocolStatus(stateManager);
- }
- public Schema getSchema() { return _SCHEMA; }
- public Object get(int _field) {
- switch (_field) {
- case 0: return code;
- case 1: return args;
- case 2: return lastModified;
- default: throw new AvroRuntimeException("Bad index");
+ public org.apache.avro.Schema getSchema() { return SCHEMA$; }
+ // Used by DatumWriter. Applications should not call.
+ public Object get(int field$) {
+ switch (field$) {
+ case 0: return __g__dirty;
+ case 1: return code;
+ case 2: return args;
+ case 3: return lastModified;
+ default: throw new org.apache.avro.AvroRuntimeException("Bad index");
}
}
+
+ // Used by DatumReader. Applications should not call.
@SuppressWarnings(value="unchecked")
- public void put(int _field, Object _value) {
- if(isFieldEqual(_field, _value)) return;
- getStateManager().setDirty(this, _field);
- switch (_field) {
- case 0:code = (Integer)_value; break;
- case 1:args = (GenericArray)_value; break;
- case 2:lastModified = (Long)_value; break;
- default: throw new AvroRuntimeException("Bad index");
+ public void put(int field$, Object value) {
+ switch (field$) {
+ case 0: __g__dirty = (java.nio.ByteBuffer)(value); break;
+ case 1: code = (Integer)(value); break;
+ case 2: args = (java.util.List)((value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyListWrapper((java.util.List)value)); break;
+ case 3: lastModified = (Long)(value); break;
+ default: throw new org.apache.avro.AvroRuntimeException("Bad index");
}
}
- public int getCode() {
- return (Integer) get(0);
+
+ /**
+ * Gets the value of the 'code' field.
+ */
+ public Integer getCode() {
+ return code;
+ }
+
+ /**
+ * Sets the value of the 'code' field.
+ * @param value the value to set.
+ */
+ public void setCode(Integer value) {
+ this.code = value;
+ setDirty(1);
+ }
+
+ /**
+ * Checks the dirty status of the 'code' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * @param value the value to set.
+ */
+ public boolean isCodeDirty(Integer value) {
+ return isDirty(1);
}
- public void setCode(int value) {
- put(0, value);
+
+ /**
+ * Gets the value of the 'args' field.
+ */
+ public java.util.List getArgs() {
+ return args;
}
- @SuppressWarnings("unchecked")
- public GenericArray getArgs() {
- return (GenericArray) get(1);
+
+ /**
+ * Sets the value of the 'args' field.
+ * @param value the value to set.
+ */
+ public void setArgs(java.util.List value) {
+ this.args = (value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyListWrapper(value);
+ setDirty(2);
}
- public void addToArgs(Utf8 element) {
- getStateManager().setDirty(this, 1);
- args.add(element);
+
+ /**
+ * Checks the dirty status of the 'args' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * @param value the value to set.
+ */
+ public boolean isArgsDirty(java.util.List value) {
+ return isDirty(2);
}
- public long getLastModified() {
- return (Long) get(2);
+
+ /**
+ * Gets the value of the 'lastModified' field.
+ */
+ public Long getLastModified() {
+ return lastModified;
}
- public void setLastModified(long value) {
- put(2, value);
+
+ /**
+ * Sets the value of the 'lastModified' field.
+ * @param value the value to set.
+ */
+ public void setLastModified(Long value) {
+ this.lastModified = value;
+ setDirty(3);
}
/**
+ * Checks the dirty status of the 'lastModified' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * @param value the value to set.
+ */
+ public boolean isLastModifiedDirty(Long value) {
+ return isDirty(3);
+ }
+
+ /**
* A convenience method which returns a successful {@link ProtocolStatus}.
+ *
* @return the {@link ProtocolStatus} value for 200 (success).
*/
public boolean isSuccess() {
- return code == ProtocolStatusUtils.SUCCESS;
+ return code == ProtocolStatusUtils.SUCCESS;
+ }
+
+ /** Creates a new ProtocolStatus RecordBuilder */
+ public static Builder newBuilder() {
+ return new Builder();
+ }
+
+ /** Creates a new ProtocolStatus RecordBuilder by copying an existing Builder */
+ public static Builder newBuilder(Builder other) {
+ return new Builder(other);
}
-}
+
+ /** Creates a new ProtocolStatus RecordBuilder by copying an existing ProtocolStatus instance */
+ public static Builder newBuilder(ProtocolStatus other) {
+ return new Builder(other);
+ }
+
+ private static java.nio.ByteBuffer deepCopyToWriteOnlyBuffer(
+ java.nio.ByteBuffer input) {
+ java.nio.ByteBuffer copy = java.nio.ByteBuffer.allocate(input.capacity());
+ int position = input.position();
+ input.reset();
+ int mark = input.position();
+ int limit = input.limit();
+ input.rewind();
+ input.limit(input.capacity());
+ copy.put(input);
+ input.rewind();
+ copy.rewind();
+ input.position(mark);
+ input.mark();
+ copy.position(mark);
+ copy.mark();
+ input.position(position);
+ copy.position(position);
+ input.limit(limit);
+ copy.limit(limit);
+ return copy.asReadOnlyBuffer();
+ }
+
+ /**
+ * RecordBuilder for ProtocolStatus instances.
+ */
+ public static class Builder extends org.apache.avro.specific.SpecificRecordBuilderBase
+ implements org.apache.avro.data.RecordBuilder {
+
+ private java.nio.ByteBuffer __g__dirty;
+ private int code;
+ private java.util.List args;
+ private long lastModified;
+
+ /** Creates a new Builder */
+ private Builder() {
+ super(ProtocolStatus.SCHEMA$);
+ }
+
+ /** Creates a Builder by copying an existing Builder */
+ private Builder(Builder other) {
+ super(other);
+ }
+
+ /** Creates a Builder by copying an existing ProtocolStatus instance */
+ private Builder(ProtocolStatus other) {
+ super(ProtocolStatus.SCHEMA$);
+ if (isValidValue(fields()[0], other.__g__dirty)) {
+ this.__g__dirty = (java.nio.ByteBuffer) data().deepCopy(fields()[0].schema(), other.__g__dirty);
+ fieldSetFlags()[0] = true;
+ }
+ if (isValidValue(fields()[1], other.code)) {
+ this.code = (Integer) data().deepCopy(fields()[1].schema(), other.code);
+ fieldSetFlags()[1] = true;
+ }
+ if (isValidValue(fields()[2], other.args)) {
+ this.args = (java.util.List) data().deepCopy(fields()[2].schema(), other.args);
+ fieldSetFlags()[2] = true;
+ }
+ if (isValidValue(fields()[3], other.lastModified)) {
+ this.lastModified = (Long) data().deepCopy(fields()[3].schema(), other.lastModified);
+ fieldSetFlags()[3] = true;
+ }
+ }
+
+ /** Gets the value of the 'code' field */
+ public Integer getCode() {
+ return code;
+ }
+
+ /** Sets the value of the 'code' field */
+ public Builder setCode(int value) {
+ validate(fields()[1], value);
+ this.code = value;
+ fieldSetFlags()[1] = true;
+ return this;
+ }
+
+ /** Checks whether the 'code' field has been set */
+ public boolean hasCode() {
+ return fieldSetFlags()[1];
+ }
+
+ /** Clears the value of the 'code' field */
+ public Builder clearCode() {
+ fieldSetFlags()[1] = false;
+ return this;
+ }
+
+ /** Gets the value of the 'args' field */
+ public java.util.List getArgs() {
+ return args;
+ }
+
+ /** Sets the value of the 'args' field */
+ public Builder setArgs(java.util.List value) {
+ validate(fields()[2], value);
+ this.args = value;
+ fieldSetFlags()[2] = true;
+ return this;
+ }
+
+ /** Checks whether the 'args' field has been set */
+ public boolean hasArgs() {
+ return fieldSetFlags()[2];
+ }
+
+ /** Clears the value of the 'args' field */
+ public Builder clearArgs() {
+ args = null;
+ fieldSetFlags()[2] = false;
+ return this;
+ }
+
+ /** Gets the value of the 'lastModified' field */
+ public Long getLastModified() {
+ return lastModified;
+ }
+
+ /** Sets the value of the 'lastModified' field */
+ public Builder setLastModified(long value) {
+ validate(fields()[3], value);
+ this.lastModified = value;
+ fieldSetFlags()[3] = true;
+ return this;
+ }
+
+ /** Checks whether the 'lastModified' field has been set */
+ public boolean hasLastModified() {
+ return fieldSetFlags()[3];
+ }
+
+ /** Clears the value of the 'lastModified' field */
+ public Builder clearLastModified() {
+ fieldSetFlags()[3] = false;
+ return this;
+ }
+
+ @Override
+ public ProtocolStatus build() {
+ try {
+ ProtocolStatus record = new ProtocolStatus();
+ record.__g__dirty = fieldSetFlags()[0] ? this.__g__dirty : (java.nio.ByteBuffer) java.nio.ByteBuffer.wrap(new byte[1]);
+ record.code = fieldSetFlags()[1] ? this.code : (Integer) defaultValue(fields()[1]);
+ record.args = fieldSetFlags()[2] ? this.args : (java.util.List) new org.apache.gora.persistency.impl.DirtyListWrapper((java.util.List)defaultValue(fields()[2]));
+ record.lastModified = fieldSetFlags()[3] ? this.lastModified : (Long) defaultValue(fields()[3]);
+ return record;
+ } catch (Exception e) {
+ throw new org.apache.avro.AvroRuntimeException(e);
+ }
+ }
+ }
+
+ public Tombstone getTombstone(){
+ return TOMBSTONE;
+ }
+
+ public ProtocolStatus newInstance(){
+ return newBuilder().build();
+ }
+
+ private static final Tombstone TOMBSTONE = new Tombstone();
+
+ public static final class Tombstone extends ProtocolStatus implements org.apache.gora.persistency.Tombstone {
+
+ private Tombstone() { }
+
+ /**
+ * Gets the value of the 'code' field.
+ */
+ public Integer getCode() {
+ throw new UnsupportedOperationException("Get is not supported on tombstones");
+ }
+
+ /**
+ * Sets the value of the 'code' field.
+ * @param value the value to set.
+ */
+ public void setCode(Integer value) {
+ throw new UnsupportedOperationException("Set is not supported on tombstones");
+ }
+
+ /**
+ * Checks the dirty status of the 'code' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * @param value the value to set.
+ */
+ public boolean isCodeDirty(Integer value) {
+ throw new UnsupportedOperationException("IsDirty is not supported on tombstones");
+ }
+
+ /**
+ * Gets the value of the 'args' field.
+ */
+ public java.util.List getArgs() {
+ throw new UnsupportedOperationException("Get is not supported on tombstones");
+ }
+
+ /**
+ * Sets the value of the 'args' field.
+ * @param value the value to set.
+ */
+ public void setArgs(java.util.List value) {
+ throw new UnsupportedOperationException("Set is not supported on tombstones");
+ }
+
+ /**
+ * Checks the dirty status of the 'args' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * @param value the value to set.
+ */
+ public boolean isArgsDirty(java.util.List value) {
+ throw new UnsupportedOperationException("IsDirty is not supported on tombstones");
+ }
+
+ /**
+ * Gets the value of the 'lastModified' field.
+ */
+ public Long getLastModified() {
+ throw new UnsupportedOperationException("Get is not supported on tombstones");
+ }
+
+ /**
+ * Sets the value of the 'lastModified' field.
+ * @param value the value to set.
+ */
+ public void setLastModified(Long value) {
+ throw new UnsupportedOperationException("Set is not supported on tombstones");
+ }
+
+ /**
+ * Checks the dirty status of the 'lastModified' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * @param value the value to set.
+ */
+ public boolean isLastModifiedDirty(Long value) {
+ throw new UnsupportedOperationException("IsDirty is not supported on tombstones");
+ }
+
+
+ }
+
+}
\ No newline at end of file
diff --git src/java/org/apache/nutch/storage/StorageUtils.java src/java/org/apache/nutch/storage/StorageUtils.java
index 1540e3e..4bdc775 100644
--- src/java/org/apache/nutch/storage/StorageUtils.java
+++ src/java/org/apache/nutch/storage/StorageUtils.java
@@ -16,10 +16,6 @@
******************************************************************************/
package org.apache.nutch.storage;
-import java.io.IOException;
-import java.util.Collection;
-import java.util.Iterator;
-
import org.apache.gora.mapreduce.GoraMapper;
import org.apache.gora.mapreduce.GoraOutputFormat;
import org.apache.gora.mapreduce.GoraReducer;
@@ -33,6 +29,10 @@ import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.nutch.metadata.Nutch;
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Iterator;
+
/**
* Entry point to Gora store/mapreduce functionality.
* Translates the concept of "crawlid" to the corresponding Gora support.
@@ -52,23 +52,23 @@ public class StorageUtils {
@SuppressWarnings("unchecked")
public static DataStore createWebStore(Configuration conf,
Class keyClass, Class persistentClass) throws ClassNotFoundException, GoraException {
-
- String schema = null;
+
+ String crawlId = conf.get(Nutch.CRAWL_ID_KEY, "");
+ String schemaPrefix = "";
+ if (!crawlId.isEmpty()) {
+ schemaPrefix = crawlId + "_";
+ }
+
+ String schema;
if (WebPage.class.equals(persistentClass)) {
schema = conf.get("storage.schema.webpage", "webpage");
+ conf.set("preferred.schema.name", schemaPrefix + "webpage");
} else if (Host.class.equals(persistentClass)) {
schema = conf.get("storage.schema.host", "host");
+ conf.set("preferred.schema.name", schemaPrefix + "host");
} else {
throw new UnsupportedOperationException("Unable to create store for class " + persistentClass);
}
-
- String crawlId = conf.get(Nutch.CRAWL_ID_KEY, "");
-
- if (!crawlId.isEmpty()) {
- conf.set("schema.prefix", crawlId + "_");
- } else {
- conf.set("schema.prefix", "");
- }
Class extends DataStore> dataStoreClass =
(Class extends DataStore>) getDataStoreClass(conf);
diff --git src/java/org/apache/nutch/storage/WebPage.java src/java/org/apache/nutch/storage/WebPage.java
index ddfc6b9..eaba3e2 100644
--- src/java/org/apache/nutch/storage/WebPage.java
+++ src/java/org/apache/nutch/storage/WebPage.java
@@ -1,84 +1,108 @@
-/*******************************************************************************
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
+/**
+ * Autogenerated by Avro
*
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- ******************************************************************************/
-
-package org.apache.nutch.storage;
-
-import java.nio.ByteBuffer;
-import java.util.Map;
-import java.util.HashMap;
-import org.apache.avro.Protocol;
-import org.apache.avro.Schema;
-import org.apache.avro.AvroRuntimeException;
-import org.apache.avro.Protocol;
-import org.apache.avro.util.Utf8;
-import org.apache.avro.ipc.AvroRemoteException;
-import org.apache.avro.generic.GenericArray;
-import org.apache.avro.specific.FixedSize;
-import org.apache.avro.specific.SpecificExceptionBase;
-import org.apache.avro.specific.SpecificRecordBase;
-import org.apache.avro.specific.SpecificRecord;
-import org.apache.avro.specific.SpecificFixed;
-import org.apache.gora.persistency.StateManager;
-import org.apache.gora.persistency.impl.PersistentBase;
-import org.apache.gora.persistency.impl.StateManagerImpl;
-import org.apache.gora.persistency.StatefulHashMap;
-import org.apache.gora.persistency.ListGenericArray;
-
+ * DO NOT EDIT DIRECTLY
+ */
+package org.apache.nutch.storage;
@SuppressWarnings("all")
-public class WebPage extends PersistentBase {
- public static final Schema _SCHEMA = Schema.parse("{\"type\":\"record\",\"name\":\"WebPage\",\"namespace\":\"org.apache.nutch.storage\",\"fields\":[{\"name\":\"baseUrl\",\"type\":\"string\"},{\"name\":\"status\",\"type\":\"int\"},{\"name\":\"fetchTime\",\"type\":\"long\"},{\"name\":\"prevFetchTime\",\"type\":\"long\"},{\"name\":\"fetchInterval\",\"type\":\"int\"},{\"name\":\"retriesSinceFetch\",\"type\":\"int\"},{\"name\":\"modifiedTime\",\"type\":\"long\"},{\"name\":\"prevModifiedTime\",\"type\":\"long\"},{\"name\":\"protocolStatus\",\"type\":{\"type\":\"record\",\"name\":\"ProtocolStatus\",\"fields\":[{\"name\":\"code\",\"type\":\"int\"},{\"name\":\"args\",\"type\":{\"type\":\"array\",\"items\":\"string\"}},{\"name\":\"lastModified\",\"type\":\"long\"}]}},{\"name\":\"content\",\"type\":\"bytes\"},{\"name\":\"contentType\",\"type\":\"string\"},{\"name\":\"prevSignature\",\"type\":\"bytes\"},{\"name\":\"signature\",\"type\":\"bytes\"},{\"name\":\"title\",\"type\":\"string\"},{\"name\":\"text\",\"type\":\"string\"},{\"name\":\"parseStatus\",\"type\":{\"type\":\"record\",\"name\":\"ParseStatus\",\"fields\":[{\"name\":\"majorCode\",\"type\":\"int\"},{\"name\":\"minorCode\",\"type\":\"int\"},{\"name\":\"args\",\"type\":{\"type\":\"array\",\"items\":\"string\"}}]}},{\"name\":\"score\",\"type\":\"float\"},{\"name\":\"reprUrl\",\"type\":\"string\"},{\"name\":\"headers\",\"type\":{\"type\":\"map\",\"values\":\"string\"}},{\"name\":\"outlinks\",\"type\":{\"type\":\"map\",\"values\":\"string\"}},{\"name\":\"inlinks\",\"type\":{\"type\":\"map\",\"values\":\"string\"}},{\"name\":\"markers\",\"type\":{\"type\":\"map\",\"values\":\"string\"}},{\"name\":\"metadata\",\"type\":{\"type\":\"map\",\"values\":\"bytes\"}},{\"name\":\"batchId\",\"type\":\"string\"}]}");
+public class WebPage extends org.apache.gora.persistency.impl.PersistentBase implements org.apache.avro.specific.SpecificRecord, org.apache.gora.persistency.Persistent {
+ public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"WebPage\",\"namespace\":\"org.apache.nutch.storage\",\"fields\":[{\"name\":\"__g__dirty\",\"type\":\"bytes\",\"doc\":\"Bytes used to represent weather or not a field is dirty.\",\"default\":\"AAAAAA==\"},{\"name\":\"baseUrl\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"status\",\"type\":\"int\",\"default\":0},{\"name\":\"fetchTime\",\"type\":\"long\",\"default\":0},{\"name\":\"prevFetchTime\",\"type\":\"long\",\"default\":0},{\"name\":\"fetchInterval\",\"type\":\"int\",\"default\":0},{\"name\":\"retriesSinceFetch\",\"type\":\"int\",\"default\":0},{\"name\":\"modifiedTime\",\"type\":\"long\",\"default\":0},{\"name\":\"prevModifiedTime\",\"type\":\"long\",\"default\":0},{\"name\":\"protocolStatus\",\"type\":[\"null\",{\"type\":\"record\",\"name\":\"ProtocolStatus\",\"fields\":[{\"name\":\"__g__dirty\",\"type\":\"bytes\",\"doc\":\"Bytes used to represent weather or not a field is dirty.\",\"default\":\"AA==\"},{\"name\":\"code\",\"type\":\"int\",\"default\":0},{\"name\":\"args\",\"type\":{\"type\":\"array\",\"items\":\"string\"},\"default\":[]},{\"name\":\"lastModified\",\"type\":\"long\",\"default\":0}]}],\"default\":null},{\"name\":\"content\",\"type\":[\"null\",\"bytes\"],\"default\":null},{\"name\":\"contentType\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"prevSignature\",\"type\":[\"null\",\"bytes\"],\"default\":null},{\"name\":\"signature\",\"type\":[\"null\",\"bytes\"],\"default\":null},{\"name\":\"title\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"text\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"parseStatus\",\"type\":[\"null\",{\"type\":\"record\",\"name\":\"ParseStatus\",\"fields\":[{\"name\":\"__g__dirty\",\"type\":\"bytes\",\"doc\":\"Bytes used to represent weather or not a field is dirty.\",\"default\":\"AA==\"},{\"name\":\"majorCode\",\"type\":\"int\",\"default\":0},{\"name\":\"minorCode\",\"type\":\"int\",\"default\":0},{\"name\":\"args\",\"type\":{\"type\":\"array\",\"items\":\"string\"},\"default\":[]}]}],\"default\":null},{\"name\":\"score\",\"type\":\"float\",\"default\":0},{\"name\":\"reprUrl\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"headers\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"default\":{}},{\"name\":\"outlinks\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"default\":{}},{\"name\":\"inlinks\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"default\":{}},{\"name\":\"markers\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"default\":{}},{\"name\":\"metadata\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"bytes\"]},\"default\":{}},{\"name\":\"batchId\",\"type\":[\"null\",\"string\"],\"default\":null}]}");
+
+ /** Enum containing all data bean's fields. */
public static enum Field {
- BASE_URL(0,"baseUrl"),
- STATUS(1,"status"),
- FETCH_TIME(2,"fetchTime"),
- PREV_FETCH_TIME(3,"prevFetchTime"),
- FETCH_INTERVAL(4,"fetchInterval"),
- RETRIES_SINCE_FETCH(5,"retriesSinceFetch"),
- MODIFIED_TIME(6,"modifiedTime"),
- PREV_MODIFIED_TIME(7,"prevModifiedTime"),
- PROTOCOL_STATUS(8,"protocolStatus"),
- CONTENT(9,"content"),
- CONTENT_TYPE(10,"contentType"),
- PREV_SIGNATURE(11,"prevSignature"),
- SIGNATURE(12,"signature"),
- TITLE(13,"title"),
- TEXT(14,"text"),
- PARSE_STATUS(15,"parseStatus"),
- SCORE(16,"score"),
- REPR_URL(17,"reprUrl"),
- HEADERS(18,"headers"),
- OUTLINKS(19,"outlinks"),
- INLINKS(20,"inlinks"),
- MARKERS(21,"markers"),
- METADATA(22,"metadata"),
- BATCH_ID(23,"batchId"),
+ __G__DIRTY(0, "__g__dirty"),
+ BASE_URL(1, "baseUrl"),
+ STATUS(2, "status"),
+ FETCH_TIME(3, "fetchTime"),
+ PREV_FETCH_TIME(4, "prevFetchTime"),
+ FETCH_INTERVAL(5, "fetchInterval"),
+ RETRIES_SINCE_FETCH(6, "retriesSinceFetch"),
+ MODIFIED_TIME(7, "modifiedTime"),
+ PREV_MODIFIED_TIME(8, "prevModifiedTime"),
+ PROTOCOL_STATUS(9, "protocolStatus"),
+ CONTENT(10, "content"),
+ CONTENT_TYPE(11, "contentType"),
+ PREV_SIGNATURE(12, "prevSignature"),
+ SIGNATURE(13, "signature"),
+ TITLE(14, "title"),
+ TEXT(15, "text"),
+ PARSE_STATUS(16, "parseStatus"),
+ SCORE(17, "score"),
+ REPR_URL(18, "reprUrl"),
+ HEADERS(19, "headers"),
+ OUTLINKS(20, "outlinks"),
+ INLINKS(21, "inlinks"),
+ MARKERS(22, "markers"),
+ METADATA(23, "metadata"),
+ BATCH_ID(24, "batchId"),
;
+ /**
+ * Field's index.
+ */
private int index;
+
+ /**
+ * Field's name.
+ */
private String name;
+
+ /**
+ * Field's constructor
+ * @param index field's index.
+ * @param name field's name.
+ */
Field(int index, String name) {this.index=index;this.name=name;}
+
+ /**
+ * Gets field's index.
+ * @return int field's index.
+ */
public int getIndex() {return index;}
+
+ /**
+ * Gets field's name.
+ * @return String field's name.
+ */
public String getName() {return name;}
+
+ /**
+ * Gets field's attributes to string.
+ * @return String field's attributes to string.
+ */
public String toString() {return name;}
};
- public static final String[] _ALL_FIELDS = {"baseUrl","status","fetchTime","prevFetchTime","fetchInterval","retriesSinceFetch","modifiedTime","prevModifiedTime","protocolStatus","content","contentType","prevSignature","signature","title","text","parseStatus","score","reprUrl","headers","outlinks","inlinks","markers","metadata","batchId",};
- static {
- PersistentBase.registerFields(WebPage.class, _ALL_FIELDS);
- }
- private Utf8 baseUrl;
+
+ public static final String[] _ALL_FIELDS = {
+ "__g__dirty",
+ "baseUrl",
+ "status",
+ "fetchTime",
+ "prevFetchTime",
+ "fetchInterval",
+ "retriesSinceFetch",
+ "modifiedTime",
+ "prevModifiedTime",
+ "protocolStatus",
+ "content",
+ "contentType",
+ "prevSignature",
+ "signature",
+ "title",
+ "text",
+ "parseStatus",
+ "score",
+ "reprUrl",
+ "headers",
+ "outlinks",
+ "inlinks",
+ "markers",
+ "metadata",
+ "batchId",
+ };
+
+ /** Bytes used to represent weather or not a field is dirty. */
+ private java.nio.ByteBuffer __g__dirty = java.nio.ByteBuffer.wrap(new byte[4]);
+ private CharSequence baseUrl;
private int status;
private long fetchTime;
private long prevFetchTime;
@@ -87,294 +111,2041 @@ public class WebPage extends PersistentBase {
private long modifiedTime;
private long prevModifiedTime;
private ProtocolStatus protocolStatus;
- private ByteBuffer content;
- private Utf8 contentType;
- private ByteBuffer prevSignature;
- private ByteBuffer signature;
- private Utf8 title;
- private Utf8 text;
+ private java.nio.ByteBuffer content;
+ private CharSequence contentType;
+ private java.nio.ByteBuffer prevSignature;
+ private java.nio.ByteBuffer signature;
+ private CharSequence title;
+ private CharSequence text;
private ParseStatus parseStatus;
private float score;
- private Utf8 reprUrl;
- private Map headers;
- private Map outlinks;
- private Map inlinks;
- private Map markers;
- private Map metadata;
- private Utf8 batchId;
- public WebPage() {
- this(new StateManagerImpl());
- }
- public WebPage(StateManager stateManager) {
- super(stateManager);
- headers = new StatefulHashMap();
- outlinks = new StatefulHashMap();
- inlinks = new StatefulHashMap();
- markers = new StatefulHashMap();
- metadata = new StatefulHashMap();
- }
- public WebPage newInstance(StateManager stateManager) {
- return new WebPage(stateManager);
- }
- public Schema getSchema() { return _SCHEMA; }
- public Object get(int _field) {
- switch (_field) {
- case 0: return baseUrl;
- case 1: return status;
- case 2: return fetchTime;
- case 3: return prevFetchTime;
- case 4: return fetchInterval;
- case 5: return retriesSinceFetch;
- case 6: return modifiedTime;
- case 7: return prevModifiedTime;
- case 8: return protocolStatus;
- case 9: return content;
- case 10: return contentType;
- case 11: return prevSignature;
- case 12: return signature;
- case 13: return title;
- case 14: return text;
- case 15: return parseStatus;
- case 16: return score;
- case 17: return reprUrl;
- case 18: return headers;
- case 19: return outlinks;
- case 20: return inlinks;
- case 21: return markers;
- case 22: return metadata;
- case 23: return batchId;
- default: throw new AvroRuntimeException("Bad index");
+ private CharSequence reprUrl;
+ private java.util.Map headers;
+ private java.util.Map outlinks;
+ private java.util.Map inlinks;
+ private java.util.Map markers;
+ private java.util.Map metadata;
+ private CharSequence batchId;
+ public org.apache.avro.Schema getSchema() { return SCHEMA$; }
+ // Used by DatumWriter. Applications should not call.
+ public Object get(int field$) {
+ switch (field$) {
+ case 0: return __g__dirty;
+ case 1: return baseUrl;
+ case 2: return status;
+ case 3: return fetchTime;
+ case 4: return prevFetchTime;
+ case 5: return fetchInterval;
+ case 6: return retriesSinceFetch;
+ case 7: return modifiedTime;
+ case 8: return prevModifiedTime;
+ case 9: return protocolStatus;
+ case 10: return content;
+ case 11: return contentType;
+ case 12: return prevSignature;
+ case 13: return signature;
+ case 14: return title;
+ case 15: return text;
+ case 16: return parseStatus;
+ case 17: return score;
+ case 18: return reprUrl;
+ case 19: return headers;
+ case 20: return outlinks;
+ case 21: return inlinks;
+ case 22: return markers;
+ case 23: return metadata;
+ case 24: return batchId;
+ default: throw new org.apache.avro.AvroRuntimeException("Bad index");
}
}
+
+ // Used by DatumReader. Applications should not call.
@SuppressWarnings(value="unchecked")
- public void put(int _field, Object _value) {
- if(isFieldEqual(_field, _value)) return;
- getStateManager().setDirty(this, _field);
- switch (_field) {
- case 0:baseUrl = (Utf8)_value; break;
- case 1:status = (Integer)_value; break;
- case 2:fetchTime = (Long)_value; break;
- case 3:prevFetchTime = (Long)_value; break;
- case 4:fetchInterval = (Integer)_value; break;
- case 5:retriesSinceFetch = (Integer)_value; break;
- case 6:modifiedTime = (Long)_value; break;
- case 7:prevModifiedTime = (Long)_value; break;
- case 8:protocolStatus = (ProtocolStatus)_value; break;
- case 9:content = (ByteBuffer)_value; break;
- case 10:contentType = (Utf8)_value; break;
- case 11:prevSignature = (ByteBuffer)_value; break;
- case 12:signature = (ByteBuffer)_value; break;
- case 13:title = (Utf8)_value; break;
- case 14:text = (Utf8)_value; break;
- case 15:parseStatus = (ParseStatus)_value; break;
- case 16:score = (Float)_value; break;
- case 17:reprUrl = (Utf8)_value; break;
- case 18:headers = (Map)_value; break;
- case 19:outlinks = (Map)_value; break;
- case 20:inlinks = (Map)_value; break;
- case 21:markers = (Map)_value; break;
- case 22:metadata = (Map)_value; break;
- case 23:batchId = (Utf8)_value; break;
- default: throw new AvroRuntimeException("Bad index");
- }
- }
- public Utf8 getBaseUrl() {
- return (Utf8) get(0);
- }
- public void setBaseUrl(Utf8 value) {
- put(0, value);
- }
- public int getStatus() {
- return (Integer) get(1);
- }
- public void setStatus(int value) {
- put(1, value);
- }
- public long getFetchTime() {
- return (Long) get(2);
- }
- public void setFetchTime(long value) {
- put(2, value);
- }
- public long getPrevFetchTime() {
- return (Long) get(3);
- }
- public void setPrevFetchTime(long value) {
- put(3, value);
- }
- public int getFetchInterval() {
- return (Integer) get(4);
- }
- public void setFetchInterval(int value) {
- put(4, value);
- }
- public int getRetriesSinceFetch() {
- return (Integer) get(5);
- }
- public void setRetriesSinceFetch(int value) {
- put(5, value);
- }
- public long getModifiedTime() {
- return (Long) get(6);
- }
- public void setModifiedTime(long value) {
- put(6, value);
- }
- public long getPrevModifiedTime() {
- return (Long) get(7);
- }
- public void setPrevModifiedTime(long value) {
- put(7, value);
+ public void put(int field$, Object value) {
+ switch (field$) {
+ case 0: __g__dirty = (java.nio.ByteBuffer)(value); break;
+ case 1: baseUrl = (CharSequence)(value); break;
+ case 2: status = (Integer)(value); break;
+ case 3: fetchTime = (Long)(value); break;
+ case 4: prevFetchTime = (Long)(value); break;
+ case 5: fetchInterval = (Integer)(value); break;
+ case 6: retriesSinceFetch = (Integer)(value); break;
+ case 7: modifiedTime = (Long)(value); break;
+ case 8: prevModifiedTime = (Long)(value); break;
+ case 9: protocolStatus = (ProtocolStatus)(value); break;
+ case 10: content = (java.nio.ByteBuffer)(value); break;
+ case 11: contentType = (CharSequence)(value); break;
+ case 12: prevSignature = (java.nio.ByteBuffer)(value); break;
+ case 13: signature = (java.nio.ByteBuffer)(value); break;
+ case 14: title = (CharSequence)(value); break;
+ case 15: text = (CharSequence)(value); break;
+ case 16: parseStatus = (ParseStatus)(value); break;
+ case 17: score = (Float)(value); break;
+ case 18: reprUrl = (CharSequence)(value); break;
+ case 19: headers = (java.util.Map)((value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)value)); break;
+ case 20: outlinks = (java.util.Map)((value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)value)); break;
+ case 21: inlinks = (java.util.Map)((value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)value)); break;
+ case 22: markers = (java.util.Map)((value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)value)); break;
+ case 23: metadata = (java.util.Map)((value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)value)); break;
+ case 24: batchId = (CharSequence)(value); break;
+ default: throw new org.apache.avro.AvroRuntimeException("Bad index");
+ }
}
- public ProtocolStatus getProtocolStatus() {
- return (ProtocolStatus) get(8);
+
+ /**
+ * Gets the value of the 'baseUrl' field.
+ */
+ public CharSequence getBaseUrl() {
+ return baseUrl;
}
- public void setProtocolStatus(ProtocolStatus value) {
- put(8, value);
+
+ /**
+ * Sets the value of the 'baseUrl' field.
+ * @param value the value to set.
+ */
+ public void setBaseUrl(CharSequence value) {
+ this.baseUrl = value;
+ setDirty(1);
+ }
+
+ /**
+ * Checks the dirty status of the 'baseUrl' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * @param value the value to set.
+ */
+ public boolean isBaseUrlDirty(CharSequence value) {
+ return isDirty(1);
}
- public ByteBuffer getContent() {
- return (ByteBuffer) get(9);
+
+ /**
+ * Gets the value of the 'status' field.
+ */
+ public Integer getStatus() {
+ return status;
}
- public void setContent(ByteBuffer value) {
- put(9, value);
+
+ /**
+ * Sets the value of the 'status' field.
+ * @param value the value to set.
+ */
+ public void setStatus(Integer value) {
+ this.status = value;
+ setDirty(2);
+ }
+
+ /**
+ * Checks the dirty status of the 'status' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * @param value the value to set.
+ */
+ public boolean isStatusDirty(Integer value) {
+ return isDirty(2);
}
- public Utf8 getContentType() {
- return (Utf8) get(10);
+
+ /**
+ * Gets the value of the 'fetchTime' field.
+ */
+ public Long getFetchTime() {
+ return fetchTime;
}
- public void setContentType(Utf8 value) {
- put(10, value);
+
+ /**
+ * Sets the value of the 'fetchTime' field.
+ * @param value the value to set.
+ */
+ public void setFetchTime(Long value) {
+ this.fetchTime = value;
+ setDirty(3);
+ }
+
+ /**
+ * Checks the dirty status of the 'fetchTime' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * @param value the value to set.
+ */
+ public boolean isFetchTimeDirty(Long value) {
+ return isDirty(3);
}
- public ByteBuffer getPrevSignature() {
- return (ByteBuffer) get(11);
+
+ /**
+ * Gets the value of the 'prevFetchTime' field.
+ */
+ public Long getPrevFetchTime() {
+ return prevFetchTime;
}
- public void setPrevSignature(ByteBuffer value) {
- put(11, value);
+
+ /**
+ * Sets the value of the 'prevFetchTime' field.
+ * @param value the value to set.
+ */
+ public void setPrevFetchTime(Long value) {
+ this.prevFetchTime = value;
+ setDirty(4);
+ }
+
+ /**
+ * Checks the dirty status of the 'prevFetchTime' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * @param value the value to set.
+ */
+ public boolean isPrevFetchTimeDirty(Long value) {
+ return isDirty(4);
}
- public ByteBuffer getSignature() {
- return (ByteBuffer) get(12);
+
+ /**
+ * Gets the value of the 'fetchInterval' field.
+ */
+ public Integer getFetchInterval() {
+ return fetchInterval;
}
- public void setSignature(ByteBuffer value) {
- put(12, value);
+
+ /**
+ * Sets the value of the 'fetchInterval' field.
+ * @param value the value to set.
+ */
+ public void setFetchInterval(Integer value) {
+ this.fetchInterval = value;
+ setDirty(5);
+ }
+
+ /**
+ * Checks the dirty status of the 'fetchInterval' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * @param value the value to set.
+ */
+ public boolean isFetchIntervalDirty(Integer value) {
+ return isDirty(5);
}
- public Utf8 getTitle() {
- return (Utf8) get(13);
+
+ /**
+ * Gets the value of the 'retriesSinceFetch' field.
+ */
+ public Integer getRetriesSinceFetch() {
+ return retriesSinceFetch;
}
- public void setTitle(Utf8 value) {
- put(13, value);
+
+ /**
+ * Sets the value of the 'retriesSinceFetch' field.
+ * @param value the value to set.
+ */
+ public void setRetriesSinceFetch(Integer value) {
+ this.retriesSinceFetch = value;
+ setDirty(6);
+ }
+
+ /**
+ * Checks the dirty status of the 'retriesSinceFetch' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * @param value the value to set.
+ */
+ public boolean isRetriesSinceFetchDirty(Integer value) {
+ return isDirty(6);
}
- public Utf8 getText() {
- return (Utf8) get(14);
+
+ /**
+ * Gets the value of the 'modifiedTime' field.
+ */
+ public Long getModifiedTime() {
+ return modifiedTime;
}
- public void setText(Utf8 value) {
- put(14, value);
+
+ /**
+ * Sets the value of the 'modifiedTime' field.
+ * @param value the value to set.
+ */
+ public void setModifiedTime(Long value) {
+ this.modifiedTime = value;
+ setDirty(7);
+ }
+
+ /**
+ * Checks the dirty status of the 'modifiedTime' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * @param value the value to set.
+ */
+ public boolean isModifiedTimeDirty(Long value) {
+ return isDirty(7);
}
- public ParseStatus getParseStatus() {
- return (ParseStatus) get(15);
+
+ /**
+ * Gets the value of the 'prevModifiedTime' field.
+ */
+ public Long getPrevModifiedTime() {
+ return prevModifiedTime;
}
- public void setParseStatus(ParseStatus value) {
- put(15, value);
+
+ /**
+ * Sets the value of the 'prevModifiedTime' field.
+ * @param value the value to set.
+ */
+ public void setPrevModifiedTime(Long value) {
+ this.prevModifiedTime = value;
+ setDirty(8);
+ }
+
+ /**
+ * Checks the dirty status of the 'prevModifiedTime' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * @param value the value to set.
+ */
+ public boolean isPrevModifiedTimeDirty(Long value) {
+ return isDirty(8);
}
- public float getScore() {
- return (Float) get(16);
+
+ /**
+ * Gets the value of the 'protocolStatus' field.
+ */
+ public ProtocolStatus getProtocolStatus() {
+ return protocolStatus;
}
- public void setScore(float value) {
- put(16, value);
+
+ /**
+ * Sets the value of the 'protocolStatus' field.
+ * @param value the value to set.
+ */
+ public void setProtocolStatus(ProtocolStatus value) {
+ this.protocolStatus = value;
+ setDirty(9);
}
- public Utf8 getReprUrl() {
- return (Utf8) get(17);
+
+ /**
+ * Checks the dirty status of the 'protocolStatus' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * @param value the value to set.
+ */
+ public boolean isProtocolStatusDirty(ProtocolStatus value) {
+ return isDirty(9);
}
- public void setReprUrl(Utf8 value) {
- put(17, value);
+
+ /**
+ * Gets the value of the 'content' field.
+ */
+ public java.nio.ByteBuffer getContent() {
+ return content;
}
- @SuppressWarnings("unchecked")
- public Map getHeaders() {
- return (Map) get(18);
+
+ /**
+ * Sets the value of the 'content' field.
+ * @param value the value to set.
+ */
+ public void setContent(java.nio.ByteBuffer value) {
+ this.content = value;
+ setDirty(10);
+ }
+
+ /**
+ * Checks the dirty status of the 'content' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * @param value the value to set.
+ */
+ public boolean isContentDirty(java.nio.ByteBuffer value) {
+ return isDirty(10);
}
- public Utf8 getFromHeaders(Utf8 key) {
- if (headers == null) { return null; }
- return headers.get(key);
+
+ /**
+ * Gets the value of the 'contentType' field.
+ */
+ public CharSequence getContentType() {
+ return contentType;
}
- public void putToHeaders(Utf8 key, Utf8 value) {
- getStateManager().setDirty(this, 18);
- headers.put(key, value);
+
+ /**
+ * Sets the value of the 'contentType' field.
+ * @param value the value to set.
+ */
+ public void setContentType(CharSequence value) {
+ this.contentType = value;
+ setDirty(11);
+ }
+
+ /**
+ * Checks the dirty status of the 'contentType' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * @param value the value to set.
+ */
+ public boolean isContentTypeDirty(CharSequence value) {
+ return isDirty(11);
}
- public Utf8 removeFromHeaders(Utf8 key) {
- if (headers == null) { return null; }
- getStateManager().setDirty(this, 18);
- return headers.remove(key);
+
+ /**
+ * Gets the value of the 'prevSignature' field.
+ */
+ public java.nio.ByteBuffer getPrevSignature() {
+ return prevSignature;
}
- @SuppressWarnings("unchecked")
- public Map getOutlinks() {
- return (Map) get(19);
+
+ /**
+ * Sets the value of the 'prevSignature' field.
+ * @param value the value to set.
+ */
+ public void setPrevSignature(java.nio.ByteBuffer value) {
+ this.prevSignature = value;
+ setDirty(12);
+ }
+
+ /**
+ * Checks the dirty status of the 'prevSignature' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * @param value the value to set.
+ */
+ public boolean isPrevSignatureDirty(java.nio.ByteBuffer value) {
+ return isDirty(12);
}
- public Utf8 getFromOutlinks(Utf8 key) {
- if (outlinks == null) { return null; }
- return outlinks.get(key);
+
+ /**
+ * Gets the value of the 'signature' field.
+ */
+ public java.nio.ByteBuffer getSignature() {
+ return signature;
}
- public void putToOutlinks(Utf8 key, Utf8 value) {
- getStateManager().setDirty(this, 19);
- outlinks.put(key, value);
+
+ /**
+ * Sets the value of the 'signature' field.
+ * @param value the value to set.
+ */
+ public void setSignature(java.nio.ByteBuffer value) {
+ this.signature = value;
+ setDirty(13);
+ }
+
+ /**
+ * Checks the dirty status of the 'signature' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * @param value the value to set.
+ */
+ public boolean isSignatureDirty(java.nio.ByteBuffer value) {
+ return isDirty(13);
}
- public Utf8 removeFromOutlinks(Utf8 key) {
- if (outlinks == null) { return null; }
- getStateManager().setDirty(this, 19);
- return outlinks.remove(key);
+
+ /**
+ * Gets the value of the 'title' field.
+ */
+ public CharSequence getTitle() {
+ return title;
}
- @SuppressWarnings("unchecked")
- public Map getInlinks() {
- return (Map) get(20);
+
+ /**
+ * Sets the value of the 'title' field.
+ * @param value the value to set.
+ */
+ public void setTitle(CharSequence value) {
+ this.title = value;
+ setDirty(14);
+ }
+
+ /**
+ * Checks the dirty status of the 'title' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * @param value the value to set.
+ */
+ public boolean isTitleDirty(CharSequence value) {
+ return isDirty(14);
}
- public Utf8 getFromInlinks(Utf8 key) {
- if (inlinks == null) { return null; }
- return inlinks.get(key);
+
+ /**
+ * Gets the value of the 'text' field.
+ */
+ public CharSequence getText() {
+ return text;
+ }
+
+ /**
+ * Sets the value of the 'text' field.
+ * @param value the value to set.
+ */
+ public void setText(CharSequence value) {
+ this.text = value;
+ setDirty(15);
+ }
+
+ /**
+ * Checks the dirty status of the 'text' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * @param value the value to set.
+ */
+ public boolean isTextDirty(CharSequence value) {
+ return isDirty(15);
+ }
+
+ /**
+ * Gets the value of the 'parseStatus' field.
+ */
+ public ParseStatus getParseStatus() {
+ return parseStatus;
+ }
+
+ /**
+ * Sets the value of the 'parseStatus' field.
+ * @param value the value to set.
+ */
+ public void setParseStatus(ParseStatus value) {
+ this.parseStatus = value;
+ setDirty(16);
+ }
+
+ /**
+ * Checks the dirty status of the 'parseStatus' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * @param value the value to set.
+ */
+ public boolean isParseStatusDirty(ParseStatus value) {
+ return isDirty(16);
+ }
+
+ /**
+ * Gets the value of the 'score' field.
+ */
+ public Float getScore() {
+ return score;
+ }
+
+ /**
+ * Sets the value of the 'score' field.
+ * @param value the value to set.
+ */
+ public void setScore(Float value) {
+ this.score = value;
+ setDirty(17);
+ }
+
+ /**
+ * Checks the dirty status of the 'score' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * @param value the value to set.
+ */
+ public boolean isScoreDirty(Float value) {
+ return isDirty(17);
+ }
+
+ /**
+ * Gets the value of the 'reprUrl' field.
+ */
+ public CharSequence getReprUrl() {
+ return reprUrl;
+ }
+
+ /**
+ * Sets the value of the 'reprUrl' field.
+ * @param value the value to set.
+ */
+ public void setReprUrl(CharSequence value) {
+ this.reprUrl = value;
+ setDirty(18);
+ }
+
+ /**
+ * Checks the dirty status of the 'reprUrl' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * @param value the value to set.
+ */
+ public boolean isReprUrlDirty(CharSequence value) {
+ return isDirty(18);
+ }
+
+ /**
+ * Gets the value of the 'headers' field.
+ */
+ public java.util.Map getHeaders() {
+ return headers;
+ }
+
+ /**
+ * Sets the value of the 'headers' field.
+ * @param value the value to set.
+ */
+ public void setHeaders(java.util.Map value) {
+ this.headers = (value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
+ setDirty(19);
+ }
+
+ /**
+ * Checks the dirty status of the 'headers' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * @param value the value to set.
+ */
+ public boolean isHeadersDirty(java.util.Map value) {
+ return isDirty(19);
+ }
+
+ /**
+ * Gets the value of the 'outlinks' field.
+ */
+ public java.util.Map getOutlinks() {
+ return outlinks;
}
- public void putToInlinks(Utf8 key, Utf8 value) {
- getStateManager().setDirty(this, 20);
- inlinks.put(key, value);
+
+ /**
+ * Sets the value of the 'outlinks' field.
+ * @param value the value to set.
+ */
+ public void setOutlinks(java.util.Map value) {
+ this.outlinks = (value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
+ setDirty(20);
+ }
+
+ /**
+ * Checks the dirty status of the 'outlinks' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * @param value the value to set.
+ */
+ public boolean isOutlinksDirty(java.util.Map value) {
+ return isDirty(20);
}
- public Utf8 removeFromInlinks(Utf8 key) {
- if (inlinks == null) { return null; }
- getStateManager().setDirty(this, 20);
- return inlinks.remove(key);
+
+ /**
+ * Gets the value of the 'inlinks' field.
+ */
+ public java.util.Map getInlinks() {
+ return inlinks;
}
- @SuppressWarnings("unchecked")
- public Map getMarkers() {
- return (Map) get(21);
+
+ /**
+ * Sets the value of the 'inlinks' field.
+ * @param value the value to set.
+ */
+ public void setInlinks(java.util.Map value) {
+ this.inlinks = (value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
+ setDirty(21);
+ }
+
+ /**
+ * Checks the dirty status of the 'inlinks' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * @param value the value to set.
+ */
+ public boolean isInlinksDirty(java.util.Map value) {
+ return isDirty(21);
}
- public Utf8 getFromMarkers(Utf8 key) {
- if (markers == null) { return null; }
- return markers.get(key);
+
+ /**
+ * Gets the value of the 'markers' field.
+ */
+ public java.util.Map getMarkers() {
+ return markers;
}
- public void putToMarkers(Utf8 key, Utf8 value) {
- getStateManager().setDirty(this, 21);
- markers.put(key, value);
+
+ /**
+ * Sets the value of the 'markers' field.
+ * @param value the value to set.
+ */
+ public void setMarkers(java.util.Map value) {
+ this.markers = (value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
+ setDirty(22);
+ }
+
+ /**
+ * Checks the dirty status of the 'markers' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * @param value the value to set.
+ */
+ public boolean isMarkersDirty(java.util.Map value) {
+ return isDirty(22);
}
- public Utf8 removeFromMarkers(Utf8 key) {
- if (markers == null) { return null; }
- getStateManager().setDirty(this, 21);
- return markers.remove(key);
+
+ /**
+ * Gets the value of the 'metadata' field.
+ */
+ public java.util.Map getMetadata() {
+ return metadata;
}
- @SuppressWarnings("unchecked")
- public Map getMetadata() {
- return (Map) get(22);
+
+ /**
+ * Sets the value of the 'metadata' field.
+ * @param value the value to set.
+ */
+ public void setMetadata(java.util.Map value) {
+ this.metadata = (value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
+ setDirty(23);
+ }
+
+ /**
+ * Checks the dirty status of the 'metadata' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * @param value the value to set.
+ */
+ public boolean isMetadataDirty(java.util.Map value) {
+ return isDirty(23);
}
- public ByteBuffer getFromMetadata(Utf8 key) {
- if (metadata == null) { return null; }
- return metadata.get(key);
+
+ /**
+ * Gets the value of the 'batchId' field.
+ */
+ public CharSequence getBatchId() {
+ return batchId;
}
- public void putToMetadata(Utf8 key, ByteBuffer value) {
- getStateManager().setDirty(this, 22);
- metadata.put(key, value);
+
+ /**
+ * Sets the value of the 'batchId' field.
+ * @param value the value to set.
+ */
+ public void setBatchId(CharSequence value) {
+ this.batchId = value;
+ setDirty(24);
+ }
+
+ /**
+ * Checks the dirty status of the 'batchId' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * @param value the value to set.
+ */
+ public boolean isBatchIdDirty(CharSequence value) {
+ return isDirty(24);
}
- public ByteBuffer removeFromMetadata(Utf8 key) {
- if (metadata == null) { return null; }
- getStateManager().setDirty(this, 22);
- return metadata.remove(key);
+
+ /** Creates a new WebPage RecordBuilder */
+ public static Builder newBuilder() {
+ return new Builder();
+ }
+
+ /** Creates a new WebPage RecordBuilder by copying an existing Builder */
+ public static Builder newBuilder(Builder other) {
+ return new Builder(other);
+ }
+
+ /** Creates a new WebPage RecordBuilder by copying an existing WebPage instance */
+ public static Builder newBuilder(WebPage other) {
+ return new Builder(other);
+ }
+
+ private static java.nio.ByteBuffer deepCopyToWriteOnlyBuffer(
+ java.nio.ByteBuffer input) {
+ java.nio.ByteBuffer copy = java.nio.ByteBuffer.allocate(input.capacity());
+ int position = input.position();
+ input.reset();
+ int mark = input.position();
+ int limit = input.limit();
+ input.rewind();
+ input.limit(input.capacity());
+ copy.put(input);
+ input.rewind();
+ copy.rewind();
+ input.position(mark);
+ input.mark();
+ copy.position(mark);
+ copy.mark();
+ input.position(position);
+ copy.position(position);
+ input.limit(limit);
+ copy.limit(limit);
+ return copy.asReadOnlyBuffer();
+ }
+
+ /**
+ * RecordBuilder for WebPage instances.
+ */
+ public static class Builder extends org.apache.avro.specific.SpecificRecordBuilderBase
+ implements org.apache.avro.data.RecordBuilder {
+
+ private java.nio.ByteBuffer __g__dirty;
+ private CharSequence baseUrl;
+ private int status;
+ private long fetchTime;
+ private long prevFetchTime;
+ private int fetchInterval;
+ private int retriesSinceFetch;
+ private long modifiedTime;
+ private long prevModifiedTime;
+ private ProtocolStatus protocolStatus;
+ private java.nio.ByteBuffer content;
+ private CharSequence contentType;
+ private java.nio.ByteBuffer prevSignature;
+ private java.nio.ByteBuffer signature;
+ private CharSequence title;
+ private CharSequence text;
+ private ParseStatus parseStatus;
+ private float score;
+ private CharSequence reprUrl;
+ private java.util.Map headers;
+ private java.util.Map outlinks;
+ private java.util.Map inlinks;
+ private java.util.Map markers;
+ private java.util.Map metadata;
+ private CharSequence batchId;
+
+ /** Creates a new Builder */
+ private Builder() {
+ super(WebPage.SCHEMA$);
+ }
+
+ /** Creates a Builder by copying an existing Builder */
+ private Builder(Builder other) {
+ super(other);
+ }
+
+ /** Creates a Builder by copying an existing WebPage instance */
+ private Builder(WebPage other) {
+ super(WebPage.SCHEMA$);
+ if (isValidValue(fields()[0], other.__g__dirty)) {
+ this.__g__dirty = (java.nio.ByteBuffer) data().deepCopy(fields()[0].schema(), other.__g__dirty);
+ fieldSetFlags()[0] = true;
+ }
+ if (isValidValue(fields()[1], other.baseUrl)) {
+ this.baseUrl = (CharSequence) data().deepCopy(fields()[1].schema(), other.baseUrl);
+ fieldSetFlags()[1] = true;
+ }
+ if (isValidValue(fields()[2], other.status)) {
+ this.status = (Integer) data().deepCopy(fields()[2].schema(), other.status);
+ fieldSetFlags()[2] = true;
+ }
+ if (isValidValue(fields()[3], other.fetchTime)) {
+ this.fetchTime = (Long) data().deepCopy(fields()[3].schema(), other.fetchTime);
+ fieldSetFlags()[3] = true;
+ }
+ if (isValidValue(fields()[4], other.prevFetchTime)) {
+ this.prevFetchTime = (Long) data().deepCopy(fields()[4].schema(), other.prevFetchTime);
+ fieldSetFlags()[4] = true;
+ }
+ if (isValidValue(fields()[5], other.fetchInterval)) {
+ this.fetchInterval = (Integer) data().deepCopy(fields()[5].schema(), other.fetchInterval);
+ fieldSetFlags()[5] = true;
+ }
+ if (isValidValue(fields()[6], other.retriesSinceFetch)) {
+ this.retriesSinceFetch = (Integer) data().deepCopy(fields()[6].schema(), other.retriesSinceFetch);
+ fieldSetFlags()[6] = true;
+ }
+ if (isValidValue(fields()[7], other.modifiedTime)) {
+ this.modifiedTime = (Long) data().deepCopy(fields()[7].schema(), other.modifiedTime);
+ fieldSetFlags()[7] = true;
+ }
+ if (isValidValue(fields()[8], other.prevModifiedTime)) {
+ this.prevModifiedTime = (Long) data().deepCopy(fields()[8].schema(), other.prevModifiedTime);
+ fieldSetFlags()[8] = true;
+ }
+ if (isValidValue(fields()[9], other.protocolStatus)) {
+ this.protocolStatus = (ProtocolStatus) data().deepCopy(fields()[9].schema(), other.protocolStatus);
+ fieldSetFlags()[9] = true;
+ }
+ if (isValidValue(fields()[10], other.content)) {
+ this.content = (java.nio.ByteBuffer) data().deepCopy(fields()[10].schema(), other.content);
+ fieldSetFlags()[10] = true;
+ }
+ if (isValidValue(fields()[11], other.contentType)) {
+ this.contentType = (CharSequence) data().deepCopy(fields()[11].schema(), other.contentType);
+ fieldSetFlags()[11] = true;
+ }
+ if (isValidValue(fields()[12], other.prevSignature)) {
+ this.prevSignature = (java.nio.ByteBuffer) data().deepCopy(fields()[12].schema(), other.prevSignature);
+ fieldSetFlags()[12] = true;
+ }
+ if (isValidValue(fields()[13], other.signature)) {
+ this.signature = (java.nio.ByteBuffer) data().deepCopy(fields()[13].schema(), other.signature);
+ fieldSetFlags()[13] = true;
+ }
+ if (isValidValue(fields()[14], other.title)) {
+ this.title = (CharSequence) data().deepCopy(fields()[14].schema(), other.title);
+ fieldSetFlags()[14] = true;
+ }
+ if (isValidValue(fields()[15], other.text)) {
+ this.text = (CharSequence) data().deepCopy(fields()[15].schema(), other.text);
+ fieldSetFlags()[15] = true;
+ }
+ if (isValidValue(fields()[16], other.parseStatus)) {
+ this.parseStatus = (ParseStatus) data().deepCopy(fields()[16].schema(), other.parseStatus);
+ fieldSetFlags()[16] = true;
+ }
+ if (isValidValue(fields()[17], other.score)) {
+ this.score = (Float) data().deepCopy(fields()[17].schema(), other.score);
+ fieldSetFlags()[17] = true;
+ }
+ if (isValidValue(fields()[18], other.reprUrl)) {
+ this.reprUrl = (CharSequence) data().deepCopy(fields()[18].schema(), other.reprUrl);
+ fieldSetFlags()[18] = true;
+ }
+ if (isValidValue(fields()[19], other.headers)) {
+ this.headers = (java.util.Map) data().deepCopy(fields()[19].schema(), other.headers);
+ fieldSetFlags()[19] = true;
+ }
+ if (isValidValue(fields()[20], other.outlinks)) {
+ this.outlinks = (java.util.Map) data().deepCopy(fields()[20].schema(), other.outlinks);
+ fieldSetFlags()[20] = true;
+ }
+ if (isValidValue(fields()[21], other.inlinks)) {
+ this.inlinks = (java.util.Map) data().deepCopy(fields()[21].schema(), other.inlinks);
+ fieldSetFlags()[21] = true;
+ }
+ if (isValidValue(fields()[22], other.markers)) {
+ this.markers = (java.util.Map) data().deepCopy(fields()[22].schema(), other.markers);
+ fieldSetFlags()[22] = true;
+ }
+ if (isValidValue(fields()[23], other.metadata)) {
+ this.metadata = (java.util.Map) data().deepCopy(fields()[23].schema(), other.metadata);
+ fieldSetFlags()[23] = true;
+ }
+ if (isValidValue(fields()[24], other.batchId)) {
+ this.batchId = (CharSequence) data().deepCopy(fields()[24].schema(), other.batchId);
+ fieldSetFlags()[24] = true;
+ }
+ }
+
+ /** Gets the value of the 'baseUrl' field */
+ public CharSequence getBaseUrl() {
+ return baseUrl;
+ }
+
+ /** Sets the value of the 'baseUrl' field */
+ public Builder setBaseUrl(CharSequence value) {
+ validate(fields()[1], value);
+ this.baseUrl = value;
+ fieldSetFlags()[1] = true;
+ return this;
+ }
+
+ /** Checks whether the 'baseUrl' field has been set */
+ public boolean hasBaseUrl() {
+ return fieldSetFlags()[1];
+ }
+
+ /** Clears the value of the 'baseUrl' field */
+ public Builder clearBaseUrl() {
+ baseUrl = null;
+ fieldSetFlags()[1] = false;
+ return this;
+ }
+
+ /** Gets the value of the 'status' field */
+ public Integer getStatus() {
+ return status;
+ }
+
+ /** Sets the value of the 'status' field */
+ public Builder setStatus(int value) {
+ validate(fields()[2], value);
+ this.status = value;
+ fieldSetFlags()[2] = true;
+ return this;
+ }
+
+ /** Checks whether the 'status' field has been set */
+ public boolean hasStatus() {
+ return fieldSetFlags()[2];
+ }
+
+ /** Clears the value of the 'status' field */
+ public Builder clearStatus() {
+ fieldSetFlags()[2] = false;
+ return this;
+ }
+
+ /** Gets the value of the 'fetchTime' field */
+ public Long getFetchTime() {
+ return fetchTime;
+ }
+
+ /** Sets the value of the 'fetchTime' field */
+ public Builder setFetchTime(long value) {
+ validate(fields()[3], value);
+ this.fetchTime = value;
+ fieldSetFlags()[3] = true;
+ return this;
+ }
+
+ /** Checks whether the 'fetchTime' field has been set */
+ public boolean hasFetchTime() {
+ return fieldSetFlags()[3];
+ }
+
+ /** Clears the value of the 'fetchTime' field */
+ public Builder clearFetchTime() {
+ fieldSetFlags()[3] = false;
+ return this;
+ }
+
+ /** Gets the value of the 'prevFetchTime' field */
+ public Long getPrevFetchTime() {
+ return prevFetchTime;
+ }
+
+ /** Sets the value of the 'prevFetchTime' field */
+ public Builder setPrevFetchTime(long value) {
+ validate(fields()[4], value);
+ this.prevFetchTime = value;
+ fieldSetFlags()[4] = true;
+ return this;
+ }
+
+ /** Checks whether the 'prevFetchTime' field has been set */
+ public boolean hasPrevFetchTime() {
+ return fieldSetFlags()[4];
+ }
+
+ /** Clears the value of the 'prevFetchTime' field */
+ public Builder clearPrevFetchTime() {
+ fieldSetFlags()[4] = false;
+ return this;
+ }
+
+ /** Gets the value of the 'fetchInterval' field */
+ public Integer getFetchInterval() {
+ return fetchInterval;
+ }
+
+ /** Sets the value of the 'fetchInterval' field */
+ public Builder setFetchInterval(int value) {
+ validate(fields()[5], value);
+ this.fetchInterval = value;
+ fieldSetFlags()[5] = true;
+ return this;
+ }
+
+ /** Checks whether the 'fetchInterval' field has been set */
+ public boolean hasFetchInterval() {
+ return fieldSetFlags()[5];
+ }
+
+ /** Clears the value of the 'fetchInterval' field */
+ public Builder clearFetchInterval() {
+ fieldSetFlags()[5] = false;
+ return this;
+ }
+
+ /** Gets the value of the 'retriesSinceFetch' field */
+ public Integer getRetriesSinceFetch() {
+ return retriesSinceFetch;
+ }
+
+ /** Sets the value of the 'retriesSinceFetch' field */
+ public Builder setRetriesSinceFetch(int value) {
+ validate(fields()[6], value);
+ this.retriesSinceFetch = value;
+ fieldSetFlags()[6] = true;
+ return this;
+ }
+
+ /** Checks whether the 'retriesSinceFetch' field has been set */
+ public boolean hasRetriesSinceFetch() {
+ return fieldSetFlags()[6];
+ }
+
+ /** Clears the value of the 'retriesSinceFetch' field */
+ public Builder clearRetriesSinceFetch() {
+ fieldSetFlags()[6] = false;
+ return this;
+ }
+
+ /** Gets the value of the 'modifiedTime' field */
+ public Long getModifiedTime() {
+ return modifiedTime;
+ }
+
+ /** Sets the value of the 'modifiedTime' field */
+ public Builder setModifiedTime(long value) {
+ validate(fields()[7], value);
+ this.modifiedTime = value;
+ fieldSetFlags()[7] = true;
+ return this;
+ }
+
+ /** Checks whether the 'modifiedTime' field has been set */
+ public boolean hasModifiedTime() {
+ return fieldSetFlags()[7];
+ }
+
+ /** Clears the value of the 'modifiedTime' field */
+ public Builder clearModifiedTime() {
+ fieldSetFlags()[7] = false;
+ return this;
+ }
+
+ /** Gets the value of the 'prevModifiedTime' field */
+ public Long getPrevModifiedTime() {
+ return prevModifiedTime;
+ }
+
+ /** Sets the value of the 'prevModifiedTime' field */
+ public Builder setPrevModifiedTime(long value) {
+ validate(fields()[8], value);
+ this.prevModifiedTime = value;
+ fieldSetFlags()[8] = true;
+ return this;
+ }
+
+ /** Checks whether the 'prevModifiedTime' field has been set */
+ public boolean hasPrevModifiedTime() {
+ return fieldSetFlags()[8];
+ }
+
+ /** Clears the value of the 'prevModifiedTime' field */
+ public Builder clearPrevModifiedTime() {
+ fieldSetFlags()[8] = false;
+ return this;
+ }
+
+ /** Gets the value of the 'protocolStatus' field */
+ public ProtocolStatus getProtocolStatus() {
+ return protocolStatus;
+ }
+
+ /** Sets the value of the 'protocolStatus' field */
+ public Builder setProtocolStatus(ProtocolStatus value) {
+ validate(fields()[9], value);
+ this.protocolStatus = value;
+ fieldSetFlags()[9] = true;
+ return this;
+ }
+
+ /** Checks whether the 'protocolStatus' field has been set */
+ public boolean hasProtocolStatus() {
+ return fieldSetFlags()[9];
+ }
+
+ /** Clears the value of the 'protocolStatus' field */
+ public Builder clearProtocolStatus() {
+ protocolStatus = null;
+ fieldSetFlags()[9] = false;
+ return this;
+ }
+
+ /** Gets the value of the 'content' field */
+ public java.nio.ByteBuffer getContent() {
+ return content;
+ }
+
+ /** Sets the value of the 'content' field */
+ public Builder setContent(java.nio.ByteBuffer value) {
+ validate(fields()[10], value);
+ this.content = value;
+ fieldSetFlags()[10] = true;
+ return this;
+ }
+
+ /** Checks whether the 'content' field has been set */
+ public boolean hasContent() {
+ return fieldSetFlags()[10];
+ }
+
+ /** Clears the value of the 'content' field */
+ public Builder clearContent() {
+ content = null;
+ fieldSetFlags()[10] = false;
+ return this;
+ }
+
+ /** Gets the value of the 'contentType' field */
+ public CharSequence getContentType() {
+ return contentType;
+ }
+
+ /** Sets the value of the 'contentType' field */
+ public Builder setContentType(CharSequence value) {
+ validate(fields()[11], value);
+ this.contentType = value;
+ fieldSetFlags()[11] = true;
+ return this;
+ }
+
+ /** Checks whether the 'contentType' field has been set */
+ public boolean hasContentType() {
+ return fieldSetFlags()[11];
+ }
+
+ /** Clears the value of the 'contentType' field */
+ public Builder clearContentType() {
+ contentType = null;
+ fieldSetFlags()[11] = false;
+ return this;
+ }
+
+ /** Gets the value of the 'prevSignature' field */
+ public java.nio.ByteBuffer getPrevSignature() {
+ return prevSignature;
+ }
+
+ /** Sets the value of the 'prevSignature' field */
+ public Builder setPrevSignature(java.nio.ByteBuffer value) {
+ validate(fields()[12], value);
+ this.prevSignature = value;
+ fieldSetFlags()[12] = true;
+ return this;
+ }
+
+ /** Checks whether the 'prevSignature' field has been set */
+ public boolean hasPrevSignature() {
+ return fieldSetFlags()[12];
+ }
+
+ /** Clears the value of the 'prevSignature' field */
+ public Builder clearPrevSignature() {
+ prevSignature = null;
+ fieldSetFlags()[12] = false;
+ return this;
+ }
+
+ /** Gets the value of the 'signature' field */
+ public java.nio.ByteBuffer getSignature() {
+ return signature;
+ }
+
+ /** Sets the value of the 'signature' field */
+ public Builder setSignature(java.nio.ByteBuffer value) {
+ validate(fields()[13], value);
+ this.signature = value;
+ fieldSetFlags()[13] = true;
+ return this;
+ }
+
+ /** Checks whether the 'signature' field has been set */
+ public boolean hasSignature() {
+ return fieldSetFlags()[13];
+ }
+
+ /** Clears the value of the 'signature' field */
+ public Builder clearSignature() {
+ signature = null;
+ fieldSetFlags()[13] = false;
+ return this;
+ }
+
+ /** Gets the value of the 'title' field */
+ public CharSequence getTitle() {
+ return title;
+ }
+
+ /** Sets the value of the 'title' field */
+ public Builder setTitle(CharSequence value) {
+ validate(fields()[14], value);
+ this.title = value;
+ fieldSetFlags()[14] = true;
+ return this;
+ }
+
+ /** Checks whether the 'title' field has been set */
+ public boolean hasTitle() {
+ return fieldSetFlags()[14];
+ }
+
+ /** Clears the value of the 'title' field */
+ public Builder clearTitle() {
+ title = null;
+ fieldSetFlags()[14] = false;
+ return this;
+ }
+
+ /** Gets the value of the 'text' field */
+ public CharSequence getText() {
+ return text;
+ }
+
+ /** Sets the value of the 'text' field */
+ public Builder setText(CharSequence value) {
+ validate(fields()[15], value);
+ this.text = value;
+ fieldSetFlags()[15] = true;
+ return this;
+ }
+
+ /** Checks whether the 'text' field has been set */
+ public boolean hasText() {
+ return fieldSetFlags()[15];
+ }
+
+ /** Clears the value of the 'text' field */
+ public Builder clearText() {
+ text = null;
+ fieldSetFlags()[15] = false;
+ return this;
+ }
+
+ /** Gets the value of the 'parseStatus' field */
+ public ParseStatus getParseStatus() {
+ return parseStatus;
+ }
+
+ /** Sets the value of the 'parseStatus' field */
+ public Builder setParseStatus(ParseStatus value) {
+ validate(fields()[16], value);
+ this.parseStatus = value;
+ fieldSetFlags()[16] = true;
+ return this;
+ }
+
+ /** Checks whether the 'parseStatus' field has been set */
+ public boolean hasParseStatus() {
+ return fieldSetFlags()[16];
+ }
+
+ /** Clears the value of the 'parseStatus' field */
+ public Builder clearParseStatus() {
+ parseStatus = null;
+ fieldSetFlags()[16] = false;
+ return this;
+ }
+
+ /** Gets the value of the 'score' field */
+ public Float getScore() {
+ return score;
+ }
+
+ /** Sets the value of the 'score' field */
+ public Builder setScore(float value) {
+ validate(fields()[17], value);
+ this.score = value;
+ fieldSetFlags()[17] = true;
+ return this;
+ }
+
+ /** Checks whether the 'score' field has been set */
+ public boolean hasScore() {
+ return fieldSetFlags()[17];
+ }
+
+ /** Clears the value of the 'score' field */
+ public Builder clearScore() {
+ fieldSetFlags()[17] = false;
+ return this;
+ }
+
+ /** Gets the value of the 'reprUrl' field */
+ public CharSequence getReprUrl() {
+ return reprUrl;
+ }
+
+ /** Sets the value of the 'reprUrl' field */
+ public Builder setReprUrl(CharSequence value) {
+ validate(fields()[18], value);
+ this.reprUrl = value;
+ fieldSetFlags()[18] = true;
+ return this;
+ }
+
+ /** Checks whether the 'reprUrl' field has been set */
+ public boolean hasReprUrl() {
+ return fieldSetFlags()[18];
+ }
+
+ /** Clears the value of the 'reprUrl' field */
+ public Builder clearReprUrl() {
+ reprUrl = null;
+ fieldSetFlags()[18] = false;
+ return this;
+ }
+
+ /** Gets the value of the 'headers' field */
+ public java.util.Map getHeaders() {
+ return headers;
+ }
+
+ /** Sets the value of the 'headers' field */
+ public Builder setHeaders(java.util.Map value) {
+ validate(fields()[19], value);
+ this.headers = value;
+ fieldSetFlags()[19] = true;
+ return this;
+ }
+
+ /** Checks whether the 'headers' field has been set */
+ public boolean hasHeaders() {
+ return fieldSetFlags()[19];
+ }
+
+ /** Clears the value of the 'headers' field */
+ public Builder clearHeaders() {
+ headers = null;
+ fieldSetFlags()[19] = false;
+ return this;
+ }
+
+ /** Gets the value of the 'outlinks' field */
+ public java.util.Map getOutlinks() {
+ return outlinks;
+ }
+
+ /** Sets the value of the 'outlinks' field */
+ public Builder setOutlinks(java.util.Map value) {
+ validate(fields()[20], value);
+ this.outlinks = value;
+ fieldSetFlags()[20] = true;
+ return this;
+ }
+
+ /** Checks whether the 'outlinks' field has been set */
+ public boolean hasOutlinks() {
+ return fieldSetFlags()[20];
+ }
+
+ /** Clears the value of the 'outlinks' field */
+ public Builder clearOutlinks() {
+ outlinks = null;
+ fieldSetFlags()[20] = false;
+ return this;
+ }
+
+ /** Gets the value of the 'inlinks' field */
+ public java.util.Map getInlinks() {
+ return inlinks;
+ }
+
+ /** Sets the value of the 'inlinks' field */
+ public Builder setInlinks(java.util.Map value) {
+ validate(fields()[21], value);
+ this.inlinks = value;
+ fieldSetFlags()[21] = true;
+ return this;
+ }
+
+ /** Checks whether the 'inlinks' field has been set */
+ public boolean hasInlinks() {
+ return fieldSetFlags()[21];
+ }
+
+ /** Clears the value of the 'inlinks' field */
+ public Builder clearInlinks() {
+ inlinks = null;
+ fieldSetFlags()[21] = false;
+ return this;
+ }
+
+ /** Gets the value of the 'markers' field */
+ public java.util.Map getMarkers() {
+ return markers;
+ }
+
+ /** Sets the value of the 'markers' field */
+ public Builder setMarkers(java.util.Map value) {
+ validate(fields()[22], value);
+ this.markers = value;
+ fieldSetFlags()[22] = true;
+ return this;
+ }
+
+ /** Checks whether the 'markers' field has been set */
+ public boolean hasMarkers() {
+ return fieldSetFlags()[22];
+ }
+
+ /** Clears the value of the 'markers' field */
+ public Builder clearMarkers() {
+ markers = null;
+ fieldSetFlags()[22] = false;
+ return this;
+ }
+
+ /** Gets the value of the 'metadata' field */
+ public java.util.Map getMetadata() {
+ return metadata;
+ }
+
+ /** Sets the value of the 'metadata' field */
+ public Builder setMetadata(java.util.Map value) {
+ validate(fields()[23], value);
+ this.metadata = value;
+ fieldSetFlags()[23] = true;
+ return this;
+ }
+
+ /** Checks whether the 'metadata' field has been set */
+ public boolean hasMetadata() {
+ return fieldSetFlags()[23];
+ }
+
+ /** Clears the value of the 'metadata' field */
+ public Builder clearMetadata() {
+ metadata = null;
+ fieldSetFlags()[23] = false;
+ return this;
+ }
+
+ /** Gets the value of the 'batchId' field */
+ public CharSequence getBatchId() {
+ return batchId;
+ }
+
+ /** Sets the value of the 'batchId' field */
+ public Builder setBatchId(CharSequence value) {
+ validate(fields()[24], value);
+ this.batchId = value;
+ fieldSetFlags()[24] = true;
+ return this;
+ }
+
+ /** Checks whether the 'batchId' field has been set */
+ public boolean hasBatchId() {
+ return fieldSetFlags()[24];
+ }
+
+ /** Clears the value of the 'batchId' field */
+ public Builder clearBatchId() {
+ batchId = null;
+ fieldSetFlags()[24] = false;
+ return this;
+ }
+
+ @Override
+ public WebPage build() {
+ try {
+ WebPage record = new WebPage();
+ record.__g__dirty = fieldSetFlags()[0] ? this.__g__dirty : (java.nio.ByteBuffer) java.nio.ByteBuffer.wrap(new byte[4]);
+ record.baseUrl = fieldSetFlags()[1] ? this.baseUrl : (CharSequence) defaultValue(fields()[1]);
+ record.status = fieldSetFlags()[2] ? this.status : (Integer) defaultValue(fields()[2]);
+ record.fetchTime = fieldSetFlags()[3] ? this.fetchTime : (Long) defaultValue(fields()[3]);
+ record.prevFetchTime = fieldSetFlags()[4] ? this.prevFetchTime : (Long) defaultValue(fields()[4]);
+ record.fetchInterval = fieldSetFlags()[5] ? this.fetchInterval : (Integer) defaultValue(fields()[5]);
+ record.retriesSinceFetch = fieldSetFlags()[6] ? this.retriesSinceFetch : (Integer) defaultValue(fields()[6]);
+ record.modifiedTime = fieldSetFlags()[7] ? this.modifiedTime : (Long) defaultValue(fields()[7]);
+ record.prevModifiedTime = fieldSetFlags()[8] ? this.prevModifiedTime : (Long) defaultValue(fields()[8]);
+ record.protocolStatus = fieldSetFlags()[9] ? this.protocolStatus : (ProtocolStatus) defaultValue(fields()[9]);
+ record.content = fieldSetFlags()[10] ? this.content : (java.nio.ByteBuffer) defaultValue(fields()[10]);
+ record.contentType = fieldSetFlags()[11] ? this.contentType : (CharSequence) defaultValue(fields()[11]);
+ record.prevSignature = fieldSetFlags()[12] ? this.prevSignature : (java.nio.ByteBuffer) defaultValue(fields()[12]);
+ record.signature = fieldSetFlags()[13] ? this.signature : (java.nio.ByteBuffer) defaultValue(fields()[13]);
+ record.title = fieldSetFlags()[14] ? this.title : (CharSequence) defaultValue(fields()[14]);
+ record.text = fieldSetFlags()[15] ? this.text : (CharSequence) defaultValue(fields()[15]);
+ record.parseStatus = fieldSetFlags()[16] ? this.parseStatus : (ParseStatus) defaultValue(fields()[16]);
+ record.score = fieldSetFlags()[17] ? this.score : (Float) defaultValue(fields()[17]);
+ record.reprUrl = fieldSetFlags()[18] ? this.reprUrl : (CharSequence) defaultValue(fields()[18]);
+ record.headers = fieldSetFlags()[19] ? this.headers : (java.util.Map