Index: build.xml
===================================================================
--- build.xml (revision 1663736)
+++ build.xml (working copy)
@@ -540,21 +540,7 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
+
Index: conf/gora-cassandra-mapping.xml
===================================================================
--- conf/gora-cassandra-mapping.xml (revision 1663736)
+++ conf/gora-cassandra-mapping.xml (working copy)
@@ -17,34 +17,46 @@
-->
-
+
-
+
+
-
@@ -78,10 +90,10 @@
-
-
-
-
+
+
+
+
Index: conf/gora.properties
===================================================================
--- conf/gora.properties (revision 1663736)
+++ conf/gora.properties (working copy)
@@ -66,8 +66,16 @@
# CassandraStore properties #
#############################
-# gora.cassandrastore.servers=localhost:9160
+#gora.datastore.default=org.apache.gora.cassandra.CassandraStore
+#gora.cassandrastore.cluster=Test Cluster
+#gora.cassandrastore.host=localhost:9160
+# property is annotated in CassandraClient#checkKeyspace()
+# options are ANY, ONE, TWO, THREE, LOCAL_QUORUM, EACH_QUORUM, QUORUM and ALL.
+#gora.cassandrastore.cf.consistency.level=ONE
+#gora.cassandrastore.read.consistency.level=QUORUM
+#gora.cassandrastore.write.consistency.level=ONE
+
#######################
# MemStore properties #
#######################
Index: conf/nutch-default.xml
===================================================================
--- conf/nutch-default.xml (revision 1663736)
+++ conf/nutch-default.xml (working copy)
@@ -398,13 +398,13 @@
db.fetch.schedule.adaptive.min_interval
- 60.0
+ 60
Minimum fetchInterval, in seconds.
db.fetch.schedule.adaptive.max_interval
- 31536000.0
+ 31536000
Maximum fetchInterval, in seconds (365 days).
NOTE: this is limited by db.fetch.interval.max. Pages with
fetchInterval larger than db.fetch.interval.max
@@ -1353,4 +1353,14 @@
+
+ io.serializations
+ org.apache.hadoop.io.serializer.WritableSerialization,org.apache.hadoop.io.serializer.JavaSerialization
+
+ A list of serialization classes that can be used for
+ obtaining serializers and deserializers.
+
+
Index: default.properties
===================================================================
--- default.properties (revision 1663736)
+++ default.properties (working copy)
@@ -44,7 +44,7 @@
javadoc.proxy.host=-J-DproxyHost=
javadoc.proxy.port=-J-DproxyPort=
javadoc.link.java=http://docs.oracle.com/javase/7/docs/api/
-javadoc.link.hadoop=http://hadoop.apache.org/docs/r1.2.0/api/
+javadoc.link.hadoop=http://hadoop.apache.org/docs/r2.5.2/api/
javadoc.packages=org.apache.nutch.*
dist.dir=./dist
Index: ivy/ivy.xml
===================================================================
--- ivy/ivy.xml (revision 1663736)
+++ ivy/ivy.xml (working copy)
@@ -46,13 +46,13 @@
-
+
@@ -67,7 +67,7 @@
-
+
@@ -80,12 +80,12 @@
-
+
@@ -103,7 +103,9 @@
-
+
+
-
+
+
+
-
-
+
+
+
+
+
+
Index: ivy/ivysettings.xml
===================================================================
--- ivy/ivysettings.xml (revision 1663736)
+++ ivy/ivysettings.xml (working copy)
@@ -39,6 +39,10 @@
value="[organisation]/[module]/[revision]/[module]-[revision]"/>
+
+
+
+
@@ -63,15 +67,29 @@
pattern="${maven2.pattern.ext}"
m2compatible="true"
/>
-
+
+
+
+
+
+
+
+
@@ -93,6 +111,7 @@
rather than look for them online.
-->
+
Index: src/java/org/apache/nutch/api/model/request/SeedList.java
===================================================================
--- src/java/org/apache/nutch/api/model/request/SeedList.java (revision 1663736)
+++ src/java/org/apache/nutch/api/model/request/SeedList.java (working copy)
@@ -21,6 +21,11 @@
public class SeedList implements Serializable {
+ /**
+ *
+ */
+ private static final long serialVersionUID = 1L;
+
private Long id;
private String name;
Index: src/java/org/apache/nutch/api/model/request/SeedUrl.java
===================================================================
--- src/java/org/apache/nutch/api/model/request/SeedUrl.java (revision 1663736)
+++ src/java/org/apache/nutch/api/model/request/SeedUrl.java (working copy)
@@ -4,6 +4,11 @@
public class SeedUrl implements Serializable {
+ /**
+ *
+ */
+ private static final long serialVersionUID = 1L;
+
private Long id;
private SeedList seedList;
Index: src/java/org/apache/nutch/crawl/DbUpdateMapper.java
===================================================================
--- src/java/org/apache/nutch/crawl/DbUpdateMapper.java (revision 1663736)
+++ src/java/org/apache/nutch/crawl/DbUpdateMapper.java (working copy)
@@ -25,9 +25,7 @@
import org.apache.avro.util.Utf8;
import org.apache.nutch.metadata.Nutch;
import org.apache.nutch.storage.Mark;
-import org.apache.nutch.util.NutchJob;
import org.slf4j.Logger;
-import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.util.StringUtils;
import org.apache.nutch.scoring.ScoreDatum;
import org.apache.nutch.scoring.ScoringFilterException;
Index: src/java/org/apache/nutch/crawl/DbUpdaterJob.java
===================================================================
--- src/java/org/apache/nutch/crawl/DbUpdaterJob.java (revision 1663736)
+++ src/java/org/apache/nutch/crawl/DbUpdaterJob.java (working copy)
@@ -24,7 +24,6 @@
import org.apache.avro.util.Utf8;
import org.apache.gora.filter.FilterOp;
import org.apache.gora.filter.MapFieldValueFilter;
-import org.apache.gora.filter.SingleFieldValueFilter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
Index: src/java/org/apache/nutch/crawl/GeneratorMapper.java
===================================================================
--- src/java/org/apache/nutch/crawl/GeneratorMapper.java (revision 1663736)
+++ src/java/org/apache/nutch/crawl/GeneratorMapper.java (working copy)
@@ -16,7 +16,6 @@
******************************************************************************/
package org.apache.nutch.crawl;
-import org.apache.avro.util.Utf8;
import org.apache.gora.mapreduce.GoraMapper;
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.crawl.GeneratorJob.SelectorEntry;
@@ -31,8 +30,6 @@
import java.io.IOException;
import java.net.MalformedURLException;
-import java.nio.ByteBuffer;
-import java.util.HashMap;
public class GeneratorMapper extends
GoraMapper {
Index: src/java/org/apache/nutch/crawl/GeneratorReducer.java
===================================================================
--- src/java/org/apache/nutch/crawl/GeneratorReducer.java (revision 1663736)
+++ src/java/org/apache/nutch/crawl/GeneratorReducer.java (working copy)
@@ -23,9 +23,6 @@
import org.apache.avro.util.Utf8;
import org.apache.gora.mapreduce.GoraReducer;
-import org.apache.gora.query.Query;
-import org.apache.gora.query.Result;
-import org.apache.gora.store.DataStore;
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.crawl.GeneratorJob.SelectorEntry;
import org.apache.nutch.fetcher.FetcherJob.FetcherMapper;
Index: src/java/org/apache/nutch/crawl/WebTableReader.java
===================================================================
--- src/java/org/apache/nutch/crawl/WebTableReader.java (revision 1663736)
+++ src/java/org/apache/nutch/crawl/WebTableReader.java (working copy)
@@ -339,6 +339,7 @@
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
+ @SuppressWarnings("unused")
boolean success = job.waitForCompletion(true);
if (LOG.isInfoEnabled()) {
Index: src/java/org/apache/nutch/fetcher/FetcherJob.java
===================================================================
--- src/java/org/apache/nutch/fetcher/FetcherJob.java (revision 1663736)
+++ src/java/org/apache/nutch/fetcher/FetcherJob.java (working copy)
@@ -18,13 +18,10 @@
import java.io.IOException;
import java.text.SimpleDateFormat;
-import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Map;
import java.util.Random;
-import java.util.StringTokenizer;
-
import org.apache.avro.util.Utf8;
import org.apache.gora.filter.FilterOp;
import org.apache.gora.filter.MapFieldValueFilter;
Index: src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java
===================================================================
--- src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java (revision 1663736)
+++ src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java (working copy)
@@ -19,7 +19,6 @@
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
-import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
@@ -45,7 +44,6 @@
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
-import org.apache.solr.client.solrj.impl.HttpSolrServer;
import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
@@ -370,7 +368,7 @@
getConf().set(SolrConstants.SERVER_URL, solrUrl);
- Job job = new Job(getConf(), "solrdedup");
+ Job job = Job.getInstance(getConf(), "solrdedup");
job.setInputFormatClass(SolrInputFormat.class);
job.setOutputFormatClass(NullOutputFormat.class);
Index: src/java/org/apache/nutch/parse/OutlinkExtractor.java
===================================================================
--- src/java/org/apache/nutch/parse/OutlinkExtractor.java (revision 1663736)
+++ src/java/org/apache/nutch/parse/OutlinkExtractor.java (working copy)
@@ -1,5 +1,5 @@
/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
+p * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
Index: src/java/org/apache/nutch/parse/ParseStatusCodes.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseStatusCodes.java (revision 1663736)
+++ src/java/org/apache/nutch/parse/ParseStatusCodes.java (working copy)
@@ -16,8 +16,6 @@
******************************************************************************/
package org.apache.nutch.parse;
-import java.util.HashMap;
-
public interface ParseStatusCodes {
// Primary status codes:
Index: src/java/org/apache/nutch/parse/ParseStatusUtils.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseStatusUtils.java (revision 1663736)
+++ src/java/org/apache/nutch/parse/ParseStatusUtils.java (working copy)
@@ -16,7 +16,6 @@
******************************************************************************/
package org.apache.nutch.parse;
-import org.apache.avro.generic.GenericArray;
import org.apache.avro.util.Utf8;
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.storage.ParseStatus;
Index: src/java/org/apache/nutch/parse/ParserJob.java
===================================================================
--- src/java/org/apache/nutch/parse/ParserJob.java (revision 1663736)
+++ src/java/org/apache/nutch/parse/ParserJob.java (working copy)
@@ -33,7 +33,6 @@
import org.apache.hadoop.util.ToolRunner;
import org.apache.nutch.crawl.GeneratorJob;
import org.apache.nutch.crawl.SignatureFactory;
-import org.apache.nutch.crawl.URLWebPage;
import org.apache.nutch.metadata.HttpHeaders;
import org.apache.nutch.metadata.Nutch;
import org.apache.nutch.storage.Mark;
@@ -49,7 +48,6 @@
import org.apache.nutch.util.TimingUtil;
import org.apache.nutch.util.ToolUtil;
import org.apache.gora.filter.FilterOp;
-import org.apache.gora.filter.SingleFieldValueFilter;
import org.apache.gora.mapreduce.GoraMapper;
public class ParserJob extends NutchTool implements Tool {
Index: src/java/org/apache/nutch/plugin/Extension.java
===================================================================
--- src/java/org/apache/nutch/plugin/Extension.java (revision 1663736)
+++ src/java/org/apache/nutch/plugin/Extension.java (working copy)
@@ -153,7 +153,7 @@
synchronized (getId()) {
try {
PluginRepository pluginRepository = PluginRepository.get(conf);
- Class extensionClazz = pluginRepository.getCachedClass(fDescriptor,
+ Class> extensionClazz = pluginRepository.getCachedClass(fDescriptor,
getClazz());
// lazy loading of Plugin in case there is no instance of the plugin
// already.
Index: src/java/org/apache/nutch/plugin/PluginRepository.java
===================================================================
--- src/java/org/apache/nutch/plugin/PluginRepository.java (revision 1663736)
+++ src/java/org/apache/nutch/plugin/PluginRepository.java (working copy)
@@ -315,7 +315,7 @@
}
}
- public Class getCachedClass(PluginDescriptor pDescriptor, String className)
+ public Class> getCachedClass(PluginDescriptor pDescriptor, String className)
throws ClassNotFoundException {
Map descMap = CLASS_CACHE.get(className);
if (descMap == null) {
Index: src/java/org/apache/nutch/protocol/ProtocolStatusUtils.java
===================================================================
--- src/java/org/apache/nutch/protocol/ProtocolStatusUtils.java (revision 1663736)
+++ src/java/org/apache/nutch/protocol/ProtocolStatusUtils.java (working copy)
@@ -16,7 +16,6 @@
******************************************************************************/
package org.apache.nutch.protocol;
-import org.apache.avro.generic.GenericArray;
import org.apache.avro.util.Utf8;
import org.apache.nutch.storage.ProtocolStatus;
import org.apache.nutch.util.TableUtil;
Index: src/java/org/apache/nutch/protocol/RobotRulesParser.java
===================================================================
--- src/java/org/apache/nutch/protocol/RobotRulesParser.java (revision 1663736)
+++ src/java/org/apache/nutch/protocol/RobotRulesParser.java (working copy)
@@ -22,7 +22,6 @@
import java.io.FileReader;
import java.io.LineNumberReader;
import java.net.URL;
-import java.util.ArrayList;
import java.util.Hashtable;
import java.util.StringTokenizer;
@@ -33,8 +32,6 @@
// Nutch imports
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configurable;
-import org.apache.hadoop.io.Text;
-
import com.google.common.io.Files;
import crawlercommons.robots.BaseRobotRules;
Index: src/java/org/apache/nutch/storage/Host.java
===================================================================
--- src/java/org/apache/nutch/storage/Host.java (revision 1663736)
+++ src/java/org/apache/nutch/storage/Host.java (working copy)
@@ -1,12 +1,12 @@
-/*******************************************************************************
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
+ * contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
+ * the License. You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
@@ -13,13 +13,13 @@
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
- ******************************************************************************/
+ */
/**
* Autogenerated by Avro
*
* DO NOT EDIT DIRECTLY
*/
-package org.apache.nutch.storage;
+package org.apache.nutch.storage;
import org.apache.avro.util.Utf8;
import org.apache.nutch.util.Bytes;
@@ -26,15 +26,15 @@
@SuppressWarnings("all")
/** Host represents a store of webpages or other data which resides on a server or other computer so that it can be accessed over the Internet */
-public class Host extends org.apache.gora.persistency.impl.PersistentBase
- implements org.apache.avro.specific.SpecificRecord,
- org.apache.gora.persistency.Persistent {
- public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser()
- .parse("{\"type\":\"record\",\"name\":\"Host\",\"namespace\":\"org.apache.nutch.storage\",\"doc\":\"Host represents a store of webpages or other data which resides on a server or other computer so that it can be accessed over the Internet\",\"fields\":[{\"name\":\"metadata\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"bytes\"]},\"doc\":\"A multivalued metadata container used for storing a wide variety of host metadata such as structured web server characterists etc\",\"default\":{}},{\"name\":\"outlinks\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"doc\":\"Hyperlinks which direct outside of the current host domain these can used in a histogram style manner to generate host statistics\",\"default\":{}},{\"name\":\"inlinks\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"doc\":\"Hyperlinks which link to pages within the current host domain these can used in a histogram style manner to generate host statistics\",\"default\":{}}]}");
+public class Host extends org.apache.gora.persistency.impl.PersistentBase implements org.apache.avro.specific.SpecificRecord, org.apache.gora.persistency.Persistent {
+ public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"Host\",\"namespace\":\"org.apache.nutch.storage\",\"doc\":\"Host represents a store of webpages or other data which resides on a server or other computer so that it can be accessed over the Internet\",\"fields\":[{\"name\":\"metadata\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"bytes\"]},\"doc\":\"A multivalued metadata container used for storing a wide variety of host metadata such as structured web server characterists etc\",\"default\":{}},{\"name\":\"outlinks\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"doc\":\"Hyperlinks which direct outside of the current host domain these can used in a histogram style manner to generate host statistics\",\"default\":{}},{\"name\":\"inlinks\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"doc\":\"Hyperlinks which link to pages within the current host domain these can used in a histogram style manner to generate host statistics\",\"default\":{}}]}");
/** Enum containing all data bean's fields. */
public static enum Field {
- METADATA(0, "metadata"), OUTLINKS(1, "outlinks"), INLINKS(2, "inlinks"), ;
+ METADATA(0, "metadata"),
+ OUTLINKS(1, "outlinks"),
+ INLINKS(2, "inlinks"),
+ ;
/**
* Field's index.
*/
@@ -47,51 +47,38 @@
/**
* Field's constructor
- *
- * @param index
- * field's index.
- * @param name
- * field's name.
+ * @param index field's index.
+ * @param name field's name.
*/
- Field(int index, String name) {
- this.index = index;
- this.name = name;
- }
+ Field(int index, String name) {this.index=index;this.name=name;}
/**
* Gets field's index.
- *
* @return int field's index.
*/
- public int getIndex() {
- return index;
- }
+ public int getIndex() {return index;}
/**
* Gets field's name.
- *
* @return String field's name.
*/
- public String getName() {
- return name;
- }
+ public String getName() {return name;}
/**
* Gets field's attributes to string.
- *
* @return String field's attributes to string.
*/
- public String toString() {
- return name;
- }
+ public String toString() {return name;}
};
- public static final String[] _ALL_FIELDS = { "metadata", "outlinks",
- "inlinks", };
+ public static final String[] _ALL_FIELDS = {
+ "metadata",
+ "outlinks",
+ "inlinks",
+ };
/**
* Gets the total field count.
- *
* @return int field count
*/
public int getFieldsCount() {
@@ -98,160 +85,103 @@
return Host._ALL_FIELDS.length;
}
- /**
- * A multivalued metadata container used for storing a wide variety of host
- * metadata such as structured web server characterists etc
- */
- private java.util.Map metadata;
- /**
- * Hyperlinks which direct outside of the current host domain these can used
- * in a histogram style manner to generate host statistics
- */
- private java.util.Map outlinks;
- /**
- * Hyperlinks which link to pages within the current host domain these can
- * used in a histogram style manner to generate host statistics
- */
- private java.util.Map inlinks;
-
- public org.apache.avro.Schema getSchema() {
- return SCHEMA$;
- }
-
- // Used by DatumWriter. Applications should not call.
+ /** A multivalued metadata container used for storing a wide variety of host metadata such as structured web server characterists etc */
+ private java.util.Map metadata;
+ /** Hyperlinks which direct outside of the current host domain these can used in a histogram style manner to generate host statistics */
+ private java.util.Map outlinks;
+ /** Hyperlinks which link to pages within the current host domain these can used in a histogram style manner to generate host statistics */
+ private java.util.Map inlinks;
+ public org.apache.avro.Schema getSchema() { return SCHEMA$; }
+ // Used by DatumWriter. Applications should not call.
public java.lang.Object get(int field$) {
switch (field$) {
- case 0:
- return metadata;
- case 1:
- return outlinks;
- case 2:
- return inlinks;
- default:
- throw new org.apache.avro.AvroRuntimeException("Bad index");
+ case 0: return metadata;
+ case 1: return outlinks;
+ case 2: return inlinks;
+ default: throw new org.apache.avro.AvroRuntimeException("Bad index");
}
}
-
- // Used by DatumReader. Applications should not call.
- @SuppressWarnings(value = "unchecked")
+
+ // Used by DatumReader. Applications should not call.
+ @SuppressWarnings(value="unchecked")
public void put(int field$, java.lang.Object value) {
switch (field$) {
- case 0:
- metadata = (java.util.Map) ((value instanceof org.apache.gora.persistency.Dirtyable) ? value
- : new org.apache.gora.persistency.impl.DirtyMapWrapper(
- (java.util.Map) value));
- break;
- case 1:
- outlinks = (java.util.Map) ((value instanceof org.apache.gora.persistency.Dirtyable) ? value
- : new org.apache.gora.persistency.impl.DirtyMapWrapper(
- (java.util.Map) value));
- break;
- case 2:
- inlinks = (java.util.Map) ((value instanceof org.apache.gora.persistency.Dirtyable) ? value
- : new org.apache.gora.persistency.impl.DirtyMapWrapper(
- (java.util.Map) value));
- break;
- default:
- throw new org.apache.avro.AvroRuntimeException("Bad index");
+ case 0: metadata = (java.util.Map)((value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)value)); break;
+ case 1: outlinks = (java.util.Map)((value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)value)); break;
+ case 2: inlinks = (java.util.Map)((value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)value)); break;
+ default: throw new org.apache.avro.AvroRuntimeException("Bad index");
}
}
/**
- * Gets the value of the 'metadata' field. A multivalued metadata container
- * used for storing a wide variety of host metadata such as structured web
- * server characterists etc
- */
- public java.util.Map getMetadata() {
+ * Gets the value of the 'metadata' field.
+ * A multivalued metadata container used for storing a wide variety of host metadata such as structured web server characterists etc */
+ public java.util.Map getMetadata() {
return metadata;
}
/**
- * Sets the value of the 'metadata' field. A multivalued metadata container
- * used for storing a wide variety of host metadata such as structured web
- * server characterists etc * @param value the value to set.
+ * Sets the value of the 'metadata' field.
+ * A multivalued metadata container used for storing a wide variety of host metadata such as structured web server characterists etc * @param value the value to set.
*/
- public void setMetadata(
- java.util.Map value) {
- this.metadata = (value instanceof org.apache.gora.persistency.Dirtyable) ? value
- : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
+ public void setMetadata(java.util.Map value) {
+ this.metadata = (value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
setDirty(0);
}
-
+
/**
- * Checks the dirty status of the 'metadata' field. A field is dirty if it
- * represents a change that has not yet been written to the database. A
- * multivalued metadata container used for storing a wide variety of host
- * metadata such as structured web server characterists etc * @param value the
- * value to set.
+ * Checks the dirty status of the 'metadata' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * A multivalued metadata container used for storing a wide variety of host metadata such as structured web server characterists etc * @param value the value to set.
*/
- public boolean isMetadataDirty(
- java.util.Map value) {
+ public boolean isMetadataDirty(java.util.Map value) {
return isDirty(0);
}
/**
- * Gets the value of the 'outlinks' field. Hyperlinks which direct outside of
- * the current host domain these can used in a histogram style manner to
- * generate host statistics
- */
- public java.util.Map getOutlinks() {
+ * Gets the value of the 'outlinks' field.
+ * Hyperlinks which direct outside of the current host domain these can used in a histogram style manner to generate host statistics */
+ public java.util.Map getOutlinks() {
return outlinks;
}
/**
- * Sets the value of the 'outlinks' field. Hyperlinks which direct outside of
- * the current host domain these can used in a histogram style manner to
- * generate host statistics * @param value the value to set.
+ * Sets the value of the 'outlinks' field.
+ * Hyperlinks which direct outside of the current host domain these can used in a histogram style manner to generate host statistics * @param value the value to set.
*/
- public void setOutlinks(
- java.util.Map value) {
- this.outlinks = (value instanceof org.apache.gora.persistency.Dirtyable) ? value
- : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
+ public void setOutlinks(java.util.Map value) {
+ this.outlinks = (value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
setDirty(1);
}
-
+
/**
- * Checks the dirty status of the 'outlinks' field. A field is dirty if it
- * represents a change that has not yet been written to the database.
- * Hyperlinks which direct outside of the current host domain these can used
- * in a histogram style manner to generate host statistics * @param value the
- * value to set.
+ * Checks the dirty status of the 'outlinks' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * Hyperlinks which direct outside of the current host domain these can used in a histogram style manner to generate host statistics * @param value the value to set.
*/
- public boolean isOutlinksDirty(
- java.util.Map value) {
+ public boolean isOutlinksDirty(java.util.Map value) {
return isDirty(1);
}
/**
- * Gets the value of the 'inlinks' field. Hyperlinks which link to pages
- * within the current host domain these can used in a histogram style manner
- * to generate host statistics
- */
- public java.util.Map getInlinks() {
+ * Gets the value of the 'inlinks' field.
+ * Hyperlinks which link to pages within the current host domain these can used in a histogram style manner to generate host statistics */
+ public java.util.Map getInlinks() {
return inlinks;
}
/**
- * Sets the value of the 'inlinks' field. Hyperlinks which link to pages
- * within the current host domain these can used in a histogram style manner
- * to generate host statistics * @param value the value to set.
+ * Sets the value of the 'inlinks' field.
+ * Hyperlinks which link to pages within the current host domain these can used in a histogram style manner to generate host statistics * @param value the value to set.
*/
- public void setInlinks(
- java.util.Map value) {
- this.inlinks = (value instanceof org.apache.gora.persistency.Dirtyable) ? value
- : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
+ public void setInlinks(java.util.Map value) {
+ this.inlinks = (value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
setDirty(2);
}
-
+
/**
- * Checks the dirty status of the 'inlinks' field. A field is dirty if it
- * represents a change that has not yet been written to the database.
- * Hyperlinks which link to pages within the current host domain these can
- * used in a histogram style manner to generate host statistics * @param value
- * the value to set.
+ * Checks the dirty status of the 'inlinks' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * Hyperlinks which link to pages within the current host domain these can used in a histogram style manner to generate host statistics * @param value the value to set.
*/
- public boolean isInlinksDirty(
- java.util.Map value) {
+ public boolean isInlinksDirty(java.util.Map value) {
return isDirty(2);
}
@@ -259,19 +189,17 @@
public static org.apache.nutch.storage.Host.Builder newBuilder() {
return new org.apache.nutch.storage.Host.Builder();
}
-
+
/** Creates a new Host RecordBuilder by copying an existing Builder */
- public static org.apache.nutch.storage.Host.Builder newBuilder(
- org.apache.nutch.storage.Host.Builder other) {
+ public static org.apache.nutch.storage.Host.Builder newBuilder(org.apache.nutch.storage.Host.Builder other) {
return new org.apache.nutch.storage.Host.Builder(other);
}
-
+
/** Creates a new Host RecordBuilder by copying an existing Host instance */
- public static org.apache.nutch.storage.Host.Builder newBuilder(
- org.apache.nutch.storage.Host other) {
+ public static org.apache.nutch.storage.Host.Builder newBuilder(org.apache.nutch.storage.Host other) {
return new org.apache.nutch.storage.Host.Builder(other);
}
-
+
private static java.nio.ByteBuffer deepCopyToReadOnlyBuffer(
java.nio.ByteBuffer input) {
java.nio.ByteBuffer copy = java.nio.ByteBuffer.allocate(input.capacity());
@@ -294,67 +222,80 @@
copy.limit(limit);
return copy.asReadOnlyBuffer();
}
-
+
+ public boolean contains(String key) {
+ return metadata.containsKey(new Utf8(key));
+ }
+
+ public String getValue(String key, String defaultValue) {
+ if (!contains(key)) return defaultValue;
+ return Bytes.toString(metadata.get(new Utf8(key)));
+ }
+
+ public int getInt(String key, int defaultValue) {
+ if (!contains(key)) return defaultValue;
+ return Integer.parseInt(getValue(key,null));
+ }
+ public long getLong(String key, long defaultValue) {
+ if (!contains(key)) return defaultValue;
+ return Long.parseLong(getValue(key,null));
+ }
+
/**
* RecordBuilder for Host instances.
*/
- public static class Builder extends
- org.apache.avro.specific.SpecificRecordBuilderBase implements
- org.apache.avro.data.RecordBuilder {
+ public static class Builder extends org.apache.avro.specific.SpecificRecordBuilderBase
+ implements org.apache.avro.data.RecordBuilder {
- private java.util.Map metadata;
- private java.util.Map outlinks;
- private java.util.Map inlinks;
+ private java.util.Map metadata;
+ private java.util.Map outlinks;
+ private java.util.Map inlinks;
/** Creates a new Builder */
private Builder() {
super(org.apache.nutch.storage.Host.SCHEMA$);
}
-
+
/** Creates a Builder by copying an existing Builder */
private Builder(org.apache.nutch.storage.Host.Builder other) {
super(other);
}
-
+
/** Creates a Builder by copying an existing Host instance */
private Builder(org.apache.nutch.storage.Host other) {
- super(org.apache.nutch.storage.Host.SCHEMA$);
+ super(org.apache.nutch.storage.Host.SCHEMA$);
if (isValidValue(fields()[0], other.metadata)) {
- this.metadata = (java.util.Map) data()
- .deepCopy(fields()[0].schema(), other.metadata);
+ this.metadata = (java.util.Map) data().deepCopy(fields()[0].schema(), other.metadata);
fieldSetFlags()[0] = true;
}
if (isValidValue(fields()[1], other.outlinks)) {
- this.outlinks = (java.util.Map) data()
- .deepCopy(fields()[1].schema(), other.outlinks);
+ this.outlinks = (java.util.Map) data().deepCopy(fields()[1].schema(), other.outlinks);
fieldSetFlags()[1] = true;
}
if (isValidValue(fields()[2], other.inlinks)) {
- this.inlinks = (java.util.Map) data()
- .deepCopy(fields()[2].schema(), other.inlinks);
+ this.inlinks = (java.util.Map) data().deepCopy(fields()[2].schema(), other.inlinks);
fieldSetFlags()[2] = true;
}
}
/** Gets the value of the 'metadata' field */
- public java.util.Map getMetadata() {
+ public java.util.Map getMetadata() {
return metadata;
}
-
+
/** Sets the value of the 'metadata' field */
- public org.apache.nutch.storage.Host.Builder setMetadata(
- java.util.Map value) {
+ public org.apache.nutch.storage.Host.Builder setMetadata(java.util.Map value) {
validate(fields()[0], value);
this.metadata = value;
fieldSetFlags()[0] = true;
- return this;
+ return this;
}
-
+
/** Checks whether the 'metadata' field has been set */
public boolean hasMetadata() {
return fieldSetFlags()[0];
}
-
+
/** Clears the value of the 'metadata' field */
public org.apache.nutch.storage.Host.Builder clearMetadata() {
metadata = null;
@@ -361,26 +302,25 @@
fieldSetFlags()[0] = false;
return this;
}
-
+
/** Gets the value of the 'outlinks' field */
- public java.util.Map getOutlinks() {
+ public java.util.Map getOutlinks() {
return outlinks;
}
-
+
/** Sets the value of the 'outlinks' field */
- public org.apache.nutch.storage.Host.Builder setOutlinks(
- java.util.Map value) {
+ public org.apache.nutch.storage.Host.Builder setOutlinks(java.util.Map value) {
validate(fields()[1], value);
this.outlinks = value;
fieldSetFlags()[1] = true;
- return this;
+ return this;
}
-
+
/** Checks whether the 'outlinks' field has been set */
public boolean hasOutlinks() {
return fieldSetFlags()[1];
}
-
+
/** Clears the value of the 'outlinks' field */
public org.apache.nutch.storage.Host.Builder clearOutlinks() {
outlinks = null;
@@ -387,26 +327,25 @@
fieldSetFlags()[1] = false;
return this;
}
-
+
/** Gets the value of the 'inlinks' field */
- public java.util.Map getInlinks() {
+ public java.util.Map getInlinks() {
return inlinks;
}
-
+
/** Sets the value of the 'inlinks' field */
- public org.apache.nutch.storage.Host.Builder setInlinks(
- java.util.Map value) {
+ public org.apache.nutch.storage.Host.Builder setInlinks(java.util.Map value) {
validate(fields()[2], value);
this.inlinks = value;
fieldSetFlags()[2] = true;
- return this;
+ return this;
}
-
+
/** Checks whether the 'inlinks' field has been set */
public boolean hasInlinks() {
return fieldSetFlags()[2];
}
-
+
/** Clears the value of the 'inlinks' field */
public org.apache.nutch.storage.Host.Builder clearInlinks() {
inlinks = null;
@@ -413,20 +352,14 @@
fieldSetFlags()[2] = false;
return this;
}
-
+
@Override
public Host build() {
try {
Host record = new Host();
- record.metadata = fieldSetFlags()[0] ? this.metadata
- : (java.util.Map) new org.apache.gora.persistency.impl.DirtyMapWrapper(
- (java.util.Map) defaultValue(fields()[0]));
- record.outlinks = fieldSetFlags()[1] ? this.outlinks
- : (java.util.Map) new org.apache.gora.persistency.impl.DirtyMapWrapper(
- (java.util.Map) defaultValue(fields()[1]));
- record.inlinks = fieldSetFlags()[2] ? this.inlinks
- : (java.util.Map) new org.apache.gora.persistency.impl.DirtyMapWrapper(
- (java.util.Map) defaultValue(fields()[2]));
+ record.metadata = fieldSetFlags()[0] ? this.metadata : (java.util.Map) new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)defaultValue(fields()[0]));
+ record.outlinks = fieldSetFlags()[1] ? this.outlinks : (java.util.Map) new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)defaultValue(fields()[1]));
+ record.inlinks = fieldSetFlags()[2] ? this.inlinks : (java.util.Map) new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)defaultValue(fields()[2]));
return record;
} catch (Exception e) {
throw new org.apache.avro.AvroRuntimeException(e);
@@ -433,155 +366,92 @@
}
}
}
-
- public Host.Tombstone getTombstone() {
- return TOMBSTONE;
+
+ public Host.Tombstone getTombstone(){
+ return TOMBSTONE;
}
- public Host newInstance() {
+ public Host newInstance(){
return newBuilder().build();
}
- // TODO NUTCH-1709 Generated classes o.a.n.storage.Host and
- // o.a.n.storage.ProtocolStatus contain methods not defined in source .avsc
- public boolean contains(String key) {
- return metadata.containsKey(new Utf8(key));
- }
-
- // TODO NUTCH-1709 Generated classes o.a.n.storage.Host and
- // o.a.n.storage.ProtocolStatus contain methods not defined in source .avsc
- public String getValue(String key, String defaultValue) {
- if (!contains(key))
- return defaultValue;
- return Bytes.toString(metadata.get(new Utf8(key)));
- }
-
- // TODO NUTCH-1709 Generated classes o.a.n.storage.Host and
- // o.a.n.storage.ProtocolStatus contain methods not defined in source .avsc
- public int getInt(String key, int defaultValue) {
- if (!contains(key))
- return defaultValue;
- return Integer.parseInt(getValue(key, null));
- }
-
- // TODO NUTCH-1709 Generated classes o.a.n.storage.Host and
- // o.a.n.storage.ProtocolStatus contain methods not defined in source .avsc
- public long getLong(String key, long defaultValue) {
- if (!contains(key))
- return defaultValue;
- return Long.parseLong(getValue(key, null));
- }
-
private static final Tombstone TOMBSTONE = new Tombstone();
-
- public static final class Tombstone extends Host implements
- org.apache.gora.persistency.Tombstone {
-
- private Tombstone() {
- }
-
- /**
- * Gets the value of the 'metadata' field. A multivalued metadata container
- * used for storing a wide variety of host metadata such as structured web
- * server characterists etc
- */
- public java.util.Map getMetadata() {
- throw new java.lang.UnsupportedOperationException(
- "Get is not supported on tombstones");
- }
-
- /**
- * Sets the value of the 'metadata' field. A multivalued metadata container
- * used for storing a wide variety of host metadata such as structured web
- * server characterists etc * @param value the value to set.
- */
- public void setMetadata(
- java.util.Map value) {
- throw new java.lang.UnsupportedOperationException(
- "Set is not supported on tombstones");
- }
-
- /**
- * Checks the dirty status of the 'metadata' field. A field is dirty if it
- * represents a change that has not yet been written to the database. A
- * multivalued metadata container used for storing a wide variety of host
- * metadata such as structured web server characterists etc * @param value
- * the value to set.
- */
- public boolean isMetadataDirty(
- java.util.Map value) {
- throw new java.lang.UnsupportedOperationException(
- "IsDirty is not supported on tombstones");
- }
-
- /**
- * Gets the value of the 'outlinks' field. Hyperlinks which direct outside
- * of the current host domain these can used in a histogram style manner to
- * generate host statistics
- */
- public java.util.Map getOutlinks() {
- throw new java.lang.UnsupportedOperationException(
- "Get is not supported on tombstones");
- }
-
- /**
- * Sets the value of the 'outlinks' field. Hyperlinks which direct outside
- * of the current host domain these can used in a histogram style manner to
- * generate host statistics * @param value the value to set.
- */
- public void setOutlinks(
- java.util.Map value) {
- throw new java.lang.UnsupportedOperationException(
- "Set is not supported on tombstones");
- }
-
- /**
- * Checks the dirty status of the 'outlinks' field. A field is dirty if it
- * represents a change that has not yet been written to the database.
- * Hyperlinks which direct outside of the current host domain these can used
- * in a histogram style manner to generate host statistics * @param value
- * the value to set.
- */
- public boolean isOutlinksDirty(
- java.util.Map value) {
- throw new java.lang.UnsupportedOperationException(
- "IsDirty is not supported on tombstones");
- }
-
- /**
- * Gets the value of the 'inlinks' field. Hyperlinks which link to pages
- * within the current host domain these can used in a histogram style manner
- * to generate host statistics
- */
- public java.util.Map getInlinks() {
- throw new java.lang.UnsupportedOperationException(
- "Get is not supported on tombstones");
- }
-
- /**
- * Sets the value of the 'inlinks' field. Hyperlinks which link to pages
- * within the current host domain these can used in a histogram style manner
- * to generate host statistics * @param value the value to set.
- */
- public void setInlinks(
- java.util.Map value) {
- throw new java.lang.UnsupportedOperationException(
- "Set is not supported on tombstones");
- }
-
- /**
- * Checks the dirty status of the 'inlinks' field. A field is dirty if it
- * represents a change that has not yet been written to the database.
- * Hyperlinks which link to pages within the current host domain these can
- * used in a histogram style manner to generate host statistics * @param
- * value the value to set.
- */
- public boolean isInlinksDirty(
- java.util.Map value) {
- throw new java.lang.UnsupportedOperationException(
- "IsDirty is not supported on tombstones");
- }
-
+
+ public static final class Tombstone extends Host implements org.apache.gora.persistency.Tombstone {
+
+ private Tombstone() { }
+
+ /**
+ * Gets the value of the 'metadata' field.
+ * A multivalued metadata container used for storing a wide variety of host metadata such as structured web server characterists etc */
+ public java.util.Map getMetadata() {
+ throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
+ }
+
+ /**
+ * Sets the value of the 'metadata' field.
+ * A multivalued metadata container used for storing a wide variety of host metadata such as structured web server characterists etc * @param value the value to set.
+ */
+ public void setMetadata(java.util.Map value) {
+ throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
+ }
+
+ /**
+ * Checks the dirty status of the 'metadata' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * A multivalued metadata container used for storing a wide variety of host metadata such as structured web server characterists etc * @param value the value to set.
+ */
+ public boolean isMetadataDirty(java.util.Map value) {
+ throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
+ }
+
+ /**
+ * Gets the value of the 'outlinks' field.
+ * Hyperlinks which direct outside of the current host domain these can used in a histogram style manner to generate host statistics */
+ public java.util.Map getOutlinks() {
+ throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
+ }
+
+ /**
+ * Sets the value of the 'outlinks' field.
+ * Hyperlinks which direct outside of the current host domain these can used in a histogram style manner to generate host statistics * @param value the value to set.
+ */
+ public void setOutlinks(java.util.Map value) {
+ throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
+ }
+
+ /**
+ * Checks the dirty status of the 'outlinks' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * Hyperlinks which direct outside of the current host domain these can used in a histogram style manner to generate host statistics * @param value the value to set.
+ */
+ public boolean isOutlinksDirty(java.util.Map value) {
+ throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
+ }
+
+ /**
+ * Gets the value of the 'inlinks' field.
+ * Hyperlinks which link to pages within the current host domain these can used in a histogram style manner to generate host statistics */
+ public java.util.Map getInlinks() {
+ throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
+ }
+
+ /**
+ * Sets the value of the 'inlinks' field.
+ * Hyperlinks which link to pages within the current host domain these can used in a histogram style manner to generate host statistics * @param value the value to set.
+ */
+ public void setInlinks(java.util.Map value) {
+ throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
+ }
+
+ /**
+ * Checks the dirty status of the 'inlinks' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * Hyperlinks which link to pages within the current host domain these can used in a histogram style manner to generate host statistics * @param value the value to set.
+ */
+ public boolean isInlinksDirty(java.util.Map value) {
+ throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
+ }
+
+
}
+
+}
-}
Index: src/java/org/apache/nutch/storage/ParseStatus.java
===================================================================
--- src/java/org/apache/nutch/storage/ParseStatus.java (revision 1663736)
+++ src/java/org/apache/nutch/storage/ParseStatus.java (working copy)
@@ -1,12 +1,12 @@
-/*******************************************************************************
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
+ * contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
+ * the License. You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
@@ -13,26 +13,24 @@
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
- ******************************************************************************/
+ */
/**
* Autogenerated by Avro
*
* DO NOT EDIT DIRECTLY
*/
-package org.apache.nutch.storage;
-
+package org.apache.nutch.storage;
@SuppressWarnings("all")
/** A nested container representing parse status data captured from invocation of parsers on fetch of a WebPage */
-public class ParseStatus extends
- org.apache.gora.persistency.impl.PersistentBase implements
- org.apache.avro.specific.SpecificRecord,
- org.apache.gora.persistency.Persistent {
- public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser()
- .parse("{\"type\":\"record\",\"name\":\"ParseStatus\",\"namespace\":\"org.apache.nutch.storage\",\"doc\":\"A nested container representing parse status data captured from invocation of parsers on fetch of a WebPage\",\"fields\":[{\"name\":\"majorCode\",\"type\":\"int\",\"doc\":\"Major parsing status' including NOTPARSED (Parsing was not performed), SUCCESS (Parsing succeeded), FAILED (General failure. There may be a more specific error message in arguments.)\",\"default\":0},{\"name\":\"minorCode\",\"type\":\"int\",\"doc\":\"Minor parsing status' including SUCCESS_OK - Successful parse devoid of anomalies or issues, SUCCESS_REDIRECT - Parsed content contains a directive to redirect to another URL. The target URL can be retrieved from the arguments., FAILED_EXCEPTION - Parsing failed. An Exception occured which may be retrieved from the arguments., FAILED_TRUNCATED - Parsing failed. Content was truncated, but the parser cannot handle incomplete content., FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content may be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing failed. Other related parts of the content are needed to complete parsing. The list of URLs to missing parts may be provided in arguments. The Fetcher may decide to fetch these parts at once, then put them into Content.metadata, and supply them for re-parsing., FAILED_MISING_CONTENT - Parsing failed. There was no content to be parsed - probably caused by errors at protocol stage.\",\"default\":0},{\"name\":\"args\",\"type\":{\"type\":\"array\",\"items\":\"string\"},\"doc\":\"Optional arguments supplied to compliment and/or justify the parse status code.\",\"default\":[]}]}");
+public class ParseStatus extends org.apache.gora.persistency.impl.PersistentBase implements org.apache.avro.specific.SpecificRecord, org.apache.gora.persistency.Persistent {
+ public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"ParseStatus\",\"namespace\":\"org.apache.nutch.storage\",\"doc\":\"A nested container representing parse status data captured from invocation of parsers on fetch of a WebPage\",\"fields\":[{\"name\":\"majorCode\",\"type\":\"int\",\"doc\":\"Major parsing status' including NOTPARSED (Parsing was not performed), SUCCESS (Parsing succeeded), FAILED (General failure. There may be a more specific error message in arguments.)\",\"default\":0},{\"name\":\"minorCode\",\"type\":\"int\",\"doc\":\"Minor parsing status' including SUCCESS_OK - Successful parse devoid of anomalies or issues, SUCCESS_REDIRECT - Parsed content contains a directive to redirect to another URL. The target URL can be retrieved from the arguments., FAILED_EXCEPTION - Parsing failed. An Exception occured which may be retrieved from the arguments., FAILED_TRUNCATED - Parsing failed. Content was truncated, but the parser cannot handle incomplete content., FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content may be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing failed. Other related parts of the content are needed to complete parsing. The list of URLs to missing parts may be provided in arguments. The Fetcher may decide to fetch these parts at once, then put them into Content.metadata, and supply them for re-parsing., FAILED_MISING_CONTENT - Parsing failed. There was no content to be parsed - probably caused by errors at protocol stage.\",\"default\":0},{\"name\":\"args\",\"type\":{\"type\":\"array\",\"items\":\"string\"},\"doc\":\"Optional arguments supplied to compliment and/or justify the parse status code.\",\"default\":[]}]}");
/** Enum containing all data bean's fields. */
public static enum Field {
- MAJOR_CODE(0, "majorCode"), MINOR_CODE(1, "minorCode"), ARGS(2, "args"), ;
+ MAJOR_CODE(0, "majorCode"),
+ MINOR_CODE(1, "minorCode"),
+ ARGS(2, "args"),
+ ;
/**
* Field's index.
*/
@@ -45,51 +43,38 @@
/**
* Field's constructor
- *
- * @param index
- * field's index.
- * @param name
- * field's name.
+ * @param index field's index.
+ * @param name field's name.
*/
- Field(int index, String name) {
- this.index = index;
- this.name = name;
- }
+ Field(int index, String name) {this.index=index;this.name=name;}
/**
* Gets field's index.
- *
* @return int field's index.
*/
- public int getIndex() {
- return index;
- }
+ public int getIndex() {return index;}
/**
* Gets field's name.
- *
* @return String field's name.
*/
- public String getName() {
- return name;
- }
+ public String getName() {return name;}
/**
* Gets field's attributes to string.
- *
* @return String field's attributes to string.
*/
- public String toString() {
- return name;
- }
+ public String toString() {return name;}
};
- public static final String[] _ALL_FIELDS = { "majorCode", "minorCode",
- "args", };
+ public static final String[] _ALL_FIELDS = {
+ "majorCode",
+ "minorCode",
+ "args",
+ };
/**
* Gets the total field count.
- *
* @return int field count
*/
public int getFieldsCount() {
@@ -96,98 +81,53 @@
return ParseStatus._ALL_FIELDS.length;
}
- /**
- * Major parsing status' including NOTPARSED (Parsing was not performed),
- * SUCCESS (Parsing succeeded), FAILED (General failure. There may be a more
- * specific error message in arguments.)
- */
+ /** Major parsing status' including NOTPARSED (Parsing was not performed), SUCCESS (Parsing succeeded), FAILED (General failure. There may be a more specific error message in arguments.) */
private int majorCode;
- /**
- * Minor parsing status' including SUCCESS_OK - Successful parse devoid of
- * anomalies or issues, SUCCESS_REDIRECT - Parsed content contains a directive
- * to redirect to another URL. The target URL can be retrieved from the
- * arguments., FAILED_EXCEPTION - Parsing failed. An Exception occured which
- * may be retrieved from the arguments., FAILED_TRUNCATED - Parsing failed.
- * Content was truncated, but the parser cannot handle incomplete content.,
- * FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content may
- * be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing failed. Other
- * related parts of the content are needed to complete parsing. The list of
- * URLs to missing parts may be provided in arguments. The Fetcher may decide
- * to fetch these parts at once, then put them into Content.metadata, and
- * supply them for re-parsing., FAILED_MISING_CONTENT - Parsing failed. There
- * was no content to be parsed - probably caused by errors at protocol stage.
- */
+ /** Minor parsing status' including SUCCESS_OK - Successful parse devoid of anomalies or issues, SUCCESS_REDIRECT - Parsed content contains a directive to redirect to another URL. The target URL can be retrieved from the arguments., FAILED_EXCEPTION - Parsing failed. An Exception occured which may be retrieved from the arguments., FAILED_TRUNCATED - Parsing failed. Content was truncated, but the parser cannot handle incomplete content., FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content may be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing failed. Other related parts of the content are needed to complete parsing. The list of URLs to missing parts may be provided in arguments. The Fetcher may decide to fetch these parts at once, then put them into Content.metadata, and supply them for re-parsing., FAILED_MISING_CONTENT - Parsing failed. There was no content to be parsed - probably caused by errors at protocol stage. */
private int minorCode;
- /**
- * Optional arguments supplied to compliment and/or justify the parse status
- * code.
- */
+ /** Optional arguments supplied to compliment and/or justify the parse status code. */
private java.util.List args;
-
- public org.apache.avro.Schema getSchema() {
- return SCHEMA$;
- }
-
- // Used by DatumWriter. Applications should not call.
+ public org.apache.avro.Schema getSchema() { return SCHEMA$; }
+ // Used by DatumWriter. Applications should not call.
public java.lang.Object get(int field$) {
switch (field$) {
- case 0:
- return majorCode;
- case 1:
- return minorCode;
- case 2:
- return args;
- default:
- throw new org.apache.avro.AvroRuntimeException("Bad index");
+ case 0: return majorCode;
+ case 1: return minorCode;
+ case 2: return args;
+ default: throw new org.apache.avro.AvroRuntimeException("Bad index");
}
}
-
- // Used by DatumReader. Applications should not call.
- @SuppressWarnings(value = "unchecked")
+
+ // Used by DatumReader. Applications should not call.
+ @SuppressWarnings(value="unchecked")
public void put(int field$, java.lang.Object value) {
switch (field$) {
- case 0:
- majorCode = (java.lang.Integer) (value);
- break;
- case 1:
- minorCode = (java.lang.Integer) (value);
- break;
- case 2:
- args = (java.util.List) ((value instanceof org.apache.gora.persistency.Dirtyable) ? value
- : new org.apache.gora.persistency.impl.DirtyListWrapper(
- (java.util.List) value));
- break;
- default:
- throw new org.apache.avro.AvroRuntimeException("Bad index");
+ case 0: majorCode = (java.lang.Integer)(value); break;
+ case 1: minorCode = (java.lang.Integer)(value); break;
+ case 2: args = (java.util.List)((value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyListWrapper((java.util.List)value)); break;
+ default: throw new org.apache.avro.AvroRuntimeException("Bad index");
}
}
/**
- * Gets the value of the 'majorCode' field. Major parsing status' including
- * NOTPARSED (Parsing was not performed), SUCCESS (Parsing succeeded), FAILED
- * (General failure. There may be a more specific error message in arguments.)
- */
+ * Gets the value of the 'majorCode' field.
+ * Major parsing status' including NOTPARSED (Parsing was not performed), SUCCESS (Parsing succeeded), FAILED (General failure. There may be a more specific error message in arguments.) */
public java.lang.Integer getMajorCode() {
return majorCode;
}
/**
- * Sets the value of the 'majorCode' field. Major parsing status' including
- * NOTPARSED (Parsing was not performed), SUCCESS (Parsing succeeded), FAILED
- * (General failure. There may be a more specific error message in arguments.)
- * * @param value the value to set.
+ * Sets the value of the 'majorCode' field.
+ * Major parsing status' including NOTPARSED (Parsing was not performed), SUCCESS (Parsing succeeded), FAILED (General failure. There may be a more specific error message in arguments.) * @param value the value to set.
*/
public void setMajorCode(java.lang.Integer value) {
this.majorCode = value;
setDirty(0);
}
-
+
/**
- * Checks the dirty status of the 'majorCode' field. A field is dirty if it
- * represents a change that has not yet been written to the database. Major
- * parsing status' including NOTPARSED (Parsing was not performed), SUCCESS
- * (Parsing succeeded), FAILED (General failure. There may be a more specific
- * error message in arguments.) * @param value the value to set.
+ * Checks the dirty status of the 'majorCode' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * Major parsing status' including NOTPARSED (Parsing was not performed), SUCCESS (Parsing succeeded), FAILED (General failure. There may be a more specific error message in arguments.) * @param value the value to set.
*/
public boolean isMajorCodeDirty(java.lang.Integer value) {
return isDirty(0);
@@ -194,65 +134,24 @@
}
/**
- * Gets the value of the 'minorCode' field. Minor parsing status' including
- * SUCCESS_OK - Successful parse devoid of anomalies or issues,
- * SUCCESS_REDIRECT - Parsed content contains a directive to redirect to
- * another URL. The target URL can be retrieved from the arguments.,
- * FAILED_EXCEPTION - Parsing failed. An Exception occured which may be
- * retrieved from the arguments., FAILED_TRUNCATED - Parsing failed. Content
- * was truncated, but the parser cannot handle incomplete content.,
- * FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content may
- * be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing failed.
- * Other related parts of the content are needed to complete parsing. The list
- * of URLs to missing parts may be provided in arguments. The Fetcher may
- * decide to fetch these parts at once, then put them into Content.metadata,
- * and supply them for re-parsing., FAILED_MISING_CONTENT - Parsing failed.
- * There was no content to be parsed - probably caused by errors at protocol
- * stage.
- */
+ * Gets the value of the 'minorCode' field.
+ * Minor parsing status' including SUCCESS_OK - Successful parse devoid of anomalies or issues, SUCCESS_REDIRECT - Parsed content contains a directive to redirect to another URL. The target URL can be retrieved from the arguments., FAILED_EXCEPTION - Parsing failed. An Exception occured which may be retrieved from the arguments., FAILED_TRUNCATED - Parsing failed. Content was truncated, but the parser cannot handle incomplete content., FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content may be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing failed. Other related parts of the content are needed to complete parsing. The list of URLs to missing parts may be provided in arguments. The Fetcher may decide to fetch these parts at once, then put them into Content.metadata, and supply them for re-parsing., FAILED_MISING_CONTENT - Parsing failed. There was no content to be parsed - probably caused by errors at protocol stage. */
public java.lang.Integer getMinorCode() {
return minorCode;
}
/**
- * Sets the value of the 'minorCode' field. Minor parsing status' including
- * SUCCESS_OK - Successful parse devoid of anomalies or issues,
- * SUCCESS_REDIRECT - Parsed content contains a directive to redirect to
- * another URL. The target URL can be retrieved from the arguments.,
- * FAILED_EXCEPTION - Parsing failed. An Exception occured which may be
- * retrieved from the arguments., FAILED_TRUNCATED - Parsing failed. Content
- * was truncated, but the parser cannot handle incomplete content.,
- * FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content may
- * be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing failed.
- * Other related parts of the content are needed to complete parsing. The list
- * of URLs to missing parts may be provided in arguments. The Fetcher may
- * decide to fetch these parts at once, then put them into Content.metadata,
- * and supply them for re-parsing., FAILED_MISING_CONTENT - Parsing failed.
- * There was no content to be parsed - probably caused by errors at protocol
- * stage. * @param value the value to set.
+ * Sets the value of the 'minorCode' field.
+ * Minor parsing status' including SUCCESS_OK - Successful parse devoid of anomalies or issues, SUCCESS_REDIRECT - Parsed content contains a directive to redirect to another URL. The target URL can be retrieved from the arguments., FAILED_EXCEPTION - Parsing failed. An Exception occured which may be retrieved from the arguments., FAILED_TRUNCATED - Parsing failed. Content was truncated, but the parser cannot handle incomplete content., FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content may be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing failed. Other related parts of the content are needed to complete parsing. The list of URLs to missing parts may be provided in arguments. The Fetcher may decide to fetch these parts at once, then put them into Content.metadata, and supply them for re-parsing., FAILED_MISING_CONTENT - Parsing failed. There was no content to be parsed - probably caused by errors at protocol stage. * @param value the value to set.
*/
public void setMinorCode(java.lang.Integer value) {
this.minorCode = value;
setDirty(1);
}
-
+
/**
- * Checks the dirty status of the 'minorCode' field. A field is dirty if it
- * represents a change that has not yet been written to the database. Minor
- * parsing status' including SUCCESS_OK - Successful parse devoid of anomalies
- * or issues, SUCCESS_REDIRECT - Parsed content contains a directive to
- * redirect to another URL. The target URL can be retrieved from the
- * arguments., FAILED_EXCEPTION - Parsing failed. An Exception occured which
- * may be retrieved from the arguments., FAILED_TRUNCATED - Parsing failed.
- * Content was truncated, but the parser cannot handle incomplete content.,
- * FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content may
- * be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing failed.
- * Other related parts of the content are needed to complete parsing. The list
- * of URLs to missing parts may be provided in arguments. The Fetcher may
- * decide to fetch these parts at once, then put them into Content.metadata,
- * and supply them for re-parsing., FAILED_MISING_CONTENT - Parsing failed.
- * There was no content to be parsed - probably caused by errors at protocol
- * stage. * @param value the value to set.
+ * Checks the dirty status of the 'minorCode' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * Minor parsing status' including SUCCESS_OK - Successful parse devoid of anomalies or issues, SUCCESS_REDIRECT - Parsed content contains a directive to redirect to another URL. The target URL can be retrieved from the arguments., FAILED_EXCEPTION - Parsing failed. An Exception occured which may be retrieved from the arguments., FAILED_TRUNCATED - Parsing failed. Content was truncated, but the parser cannot handle incomplete content., FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content may be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing failed. Other related parts of the content are needed to complete parsing. The list of URLs to missing parts may be provided in arguments. The Fetcher may decide to fetch these parts at once, then put them into Content.metadata, and supply them for re-parsing., FAILED_MISING_CONTENT - Parsing failed. There was no content to be parsed - probably caused by errors at protocol stage. * @param value the value to set.
*/
public boolean isMinorCodeDirty(java.lang.Integer value) {
return isDirty(1);
@@ -259,29 +158,24 @@
}
/**
- * Gets the value of the 'args' field. Optional arguments supplied to
- * compliment and/or justify the parse status code.
- */
+ * Gets the value of the 'args' field.
+ * Optional arguments supplied to compliment and/or justify the parse status code. */
public java.util.List getArgs() {
return args;
}
/**
- * Sets the value of the 'args' field. Optional arguments supplied to
- * compliment and/or justify the parse status code. * @param value the value
- * to set.
+ * Sets the value of the 'args' field.
+ * Optional arguments supplied to compliment and/or justify the parse status code. * @param value the value to set.
*/
public void setArgs(java.util.List value) {
- this.args = (value instanceof org.apache.gora.persistency.Dirtyable) ? value
- : new org.apache.gora.persistency.impl.DirtyListWrapper(value);
+ this.args = (value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyListWrapper(value);
setDirty(2);
}
-
+
/**
- * Checks the dirty status of the 'args' field. A field is dirty if it
- * represents a change that has not yet been written to the database. Optional
- * arguments supplied to compliment and/or justify the parse status code. * @param
- * value the value to set.
+ * Checks the dirty status of the 'args' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * Optional arguments supplied to compliment and/or justify the parse status code. * @param value the value to set.
*/
public boolean isArgsDirty(java.util.List value) {
return isDirty(2);
@@ -291,22 +185,17 @@
public static org.apache.nutch.storage.ParseStatus.Builder newBuilder() {
return new org.apache.nutch.storage.ParseStatus.Builder();
}
-
+
/** Creates a new ParseStatus RecordBuilder by copying an existing Builder */
- public static org.apache.nutch.storage.ParseStatus.Builder newBuilder(
- org.apache.nutch.storage.ParseStatus.Builder other) {
+ public static org.apache.nutch.storage.ParseStatus.Builder newBuilder(org.apache.nutch.storage.ParseStatus.Builder other) {
return new org.apache.nutch.storage.ParseStatus.Builder(other);
}
-
- /**
- * Creates a new ParseStatus RecordBuilder by copying an existing ParseStatus
- * instance
- */
- public static org.apache.nutch.storage.ParseStatus.Builder newBuilder(
- org.apache.nutch.storage.ParseStatus other) {
+
+ /** Creates a new ParseStatus RecordBuilder by copying an existing ParseStatus instance */
+ public static org.apache.nutch.storage.ParseStatus.Builder newBuilder(org.apache.nutch.storage.ParseStatus other) {
return new org.apache.nutch.storage.ParseStatus.Builder(other);
}
-
+
private static java.nio.ByteBuffer deepCopyToReadOnlyBuffer(
java.nio.ByteBuffer input) {
java.nio.ByteBuffer copy = java.nio.ByteBuffer.allocate(input.capacity());
@@ -329,13 +218,12 @@
copy.limit(limit);
return copy.asReadOnlyBuffer();
}
-
+
/**
* RecordBuilder for ParseStatus instances.
*/
- public static class Builder extends
- org.apache.avro.specific.SpecificRecordBuilderBase implements
- org.apache.avro.data.RecordBuilder {
+ public static class Builder extends org.apache.avro.specific.SpecificRecordBuilderBase
+ implements org.apache.avro.data.RecordBuilder {
private int majorCode;
private int minorCode;
@@ -345,28 +233,25 @@
private Builder() {
super(org.apache.nutch.storage.ParseStatus.SCHEMA$);
}
-
+
/** Creates a Builder by copying an existing Builder */
private Builder(org.apache.nutch.storage.ParseStatus.Builder other) {
super(other);
}
-
+
/** Creates a Builder by copying an existing ParseStatus instance */
private Builder(org.apache.nutch.storage.ParseStatus other) {
- super(org.apache.nutch.storage.ParseStatus.SCHEMA$);
+ super(org.apache.nutch.storage.ParseStatus.SCHEMA$);
if (isValidValue(fields()[0], other.majorCode)) {
- this.majorCode = (java.lang.Integer) data().deepCopy(
- fields()[0].schema(), other.majorCode);
+ this.majorCode = (java.lang.Integer) data().deepCopy(fields()[0].schema(), other.majorCode);
fieldSetFlags()[0] = true;
}
if (isValidValue(fields()[1], other.minorCode)) {
- this.minorCode = (java.lang.Integer) data().deepCopy(
- fields()[1].schema(), other.minorCode);
+ this.minorCode = (java.lang.Integer) data().deepCopy(fields()[1].schema(), other.minorCode);
fieldSetFlags()[1] = true;
}
if (isValidValue(fields()[2], other.args)) {
- this.args = (java.util.List) data().deepCopy(
- fields()[2].schema(), other.args);
+ this.args = (java.util.List) data().deepCopy(fields()[2].schema(), other.args);
fieldSetFlags()[2] = true;
}
}
@@ -375,69 +260,68 @@
public java.lang.Integer getMajorCode() {
return majorCode;
}
-
+
/** Sets the value of the 'majorCode' field */
public org.apache.nutch.storage.ParseStatus.Builder setMajorCode(int value) {
validate(fields()[0], value);
this.majorCode = value;
fieldSetFlags()[0] = true;
- return this;
+ return this;
}
-
+
/** Checks whether the 'majorCode' field has been set */
public boolean hasMajorCode() {
return fieldSetFlags()[0];
}
-
+
/** Clears the value of the 'majorCode' field */
public org.apache.nutch.storage.ParseStatus.Builder clearMajorCode() {
fieldSetFlags()[0] = false;
return this;
}
-
+
/** Gets the value of the 'minorCode' field */
public java.lang.Integer getMinorCode() {
return minorCode;
}
-
+
/** Sets the value of the 'minorCode' field */
public org.apache.nutch.storage.ParseStatus.Builder setMinorCode(int value) {
validate(fields()[1], value);
this.minorCode = value;
fieldSetFlags()[1] = true;
- return this;
+ return this;
}
-
+
/** Checks whether the 'minorCode' field has been set */
public boolean hasMinorCode() {
return fieldSetFlags()[1];
}
-
+
/** Clears the value of the 'minorCode' field */
public org.apache.nutch.storage.ParseStatus.Builder clearMinorCode() {
fieldSetFlags()[1] = false;
return this;
}
-
+
/** Gets the value of the 'args' field */
public java.util.List getArgs() {
return args;
}
-
+
/** Sets the value of the 'args' field */
- public org.apache.nutch.storage.ParseStatus.Builder setArgs(
- java.util.List value) {
+ public org.apache.nutch.storage.ParseStatus.Builder setArgs(java.util.List value) {
validate(fields()[2], value);
this.args = value;
fieldSetFlags()[2] = true;
- return this;
+ return this;
}
-
+
/** Checks whether the 'args' field has been set */
public boolean hasArgs() {
return fieldSetFlags()[2];
}
-
+
/** Clears the value of the 'args' field */
public org.apache.nutch.storage.ParseStatus.Builder clearArgs() {
args = null;
@@ -444,18 +328,14 @@
fieldSetFlags()[2] = false;
return this;
}
-
+
@Override
public ParseStatus build() {
try {
ParseStatus record = new ParseStatus();
- record.majorCode = fieldSetFlags()[0] ? this.majorCode
- : (java.lang.Integer) defaultValue(fields()[0]);
- record.minorCode = fieldSetFlags()[1] ? this.minorCode
- : (java.lang.Integer) defaultValue(fields()[1]);
- record.args = fieldSetFlags()[2] ? this.args
- : (java.util.List) new org.apache.gora.persistency.impl.DirtyListWrapper(
- (java.util.List) defaultValue(fields()[2]));
+ record.majorCode = fieldSetFlags()[0] ? this.majorCode : (java.lang.Integer) defaultValue(fields()[0]);
+ record.minorCode = fieldSetFlags()[1] ? this.minorCode : (java.lang.Integer) defaultValue(fields()[1]);
+ record.args = fieldSetFlags()[2] ? this.args : (java.util.List) new org.apache.gora.persistency.impl.DirtyListWrapper((java.util.List)defaultValue(fields()[2]));
return record;
} catch (Exception e) {
throw new org.apache.avro.AvroRuntimeException(e);
@@ -462,155 +342,92 @@
}
}
}
-
- public ParseStatus.Tombstone getTombstone() {
- return TOMBSTONE;
+
+ public ParseStatus.Tombstone getTombstone(){
+ return TOMBSTONE;
}
- public ParseStatus newInstance() {
+ public ParseStatus newInstance(){
return newBuilder().build();
}
private static final Tombstone TOMBSTONE = new Tombstone();
-
- public static final class Tombstone extends ParseStatus implements
- org.apache.gora.persistency.Tombstone {
-
- private Tombstone() {
- }
-
- /**
- * Gets the value of the 'majorCode' field. Major parsing status' including
- * NOTPARSED (Parsing was not performed), SUCCESS (Parsing succeeded),
- * FAILED (General failure. There may be a more specific error message in
- * arguments.)
- */
- public java.lang.Integer getMajorCode() {
- throw new java.lang.UnsupportedOperationException(
- "Get is not supported on tombstones");
- }
-
- /**
- * Sets the value of the 'majorCode' field. Major parsing status' including
- * NOTPARSED (Parsing was not performed), SUCCESS (Parsing succeeded),
- * FAILED (General failure. There may be a more specific error message in
- * arguments.) * @param value the value to set.
- */
- public void setMajorCode(java.lang.Integer value) {
- throw new java.lang.UnsupportedOperationException(
- "Set is not supported on tombstones");
- }
-
- /**
- * Checks the dirty status of the 'majorCode' field. A field is dirty if it
- * represents a change that has not yet been written to the database. Major
- * parsing status' including NOTPARSED (Parsing was not performed), SUCCESS
- * (Parsing succeeded), FAILED (General failure. There may be a more
- * specific error message in arguments.) * @param value the value to set.
- */
- public boolean isMajorCodeDirty(java.lang.Integer value) {
- throw new java.lang.UnsupportedOperationException(
- "IsDirty is not supported on tombstones");
- }
-
- /**
- * Gets the value of the 'minorCode' field. Minor parsing status' including
- * SUCCESS_OK - Successful parse devoid of anomalies or issues,
- * SUCCESS_REDIRECT - Parsed content contains a directive to redirect to
- * another URL. The target URL can be retrieved from the arguments.,
- * FAILED_EXCEPTION - Parsing failed. An Exception occured which may be
- * retrieved from the arguments., FAILED_TRUNCATED - Parsing failed. Content
- * was truncated, but the parser cannot handle incomplete content.,
- * FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content
- * may be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing
- * failed. Other related parts of the content are needed to complete
- * parsing. The list of URLs to missing parts may be provided in arguments.
- * The Fetcher may decide to fetch these parts at once, then put them into
- * Content.metadata, and supply them for re-parsing., FAILED_MISING_CONTENT
- * - Parsing failed. There was no content to be parsed - probably caused by
- * errors at protocol stage.
- */
- public java.lang.Integer getMinorCode() {
- throw new java.lang.UnsupportedOperationException(
- "Get is not supported on tombstones");
- }
-
- /**
- * Sets the value of the 'minorCode' field. Minor parsing status' including
- * SUCCESS_OK - Successful parse devoid of anomalies or issues,
- * SUCCESS_REDIRECT - Parsed content contains a directive to redirect to
- * another URL. The target URL can be retrieved from the arguments.,
- * FAILED_EXCEPTION - Parsing failed. An Exception occured which may be
- * retrieved from the arguments., FAILED_TRUNCATED - Parsing failed. Content
- * was truncated, but the parser cannot handle incomplete content.,
- * FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content
- * may be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing
- * failed. Other related parts of the content are needed to complete
- * parsing. The list of URLs to missing parts may be provided in arguments.
- * The Fetcher may decide to fetch these parts at once, then put them into
- * Content.metadata, and supply them for re-parsing., FAILED_MISING_CONTENT
- * - Parsing failed. There was no content to be parsed - probably caused by
- * errors at protocol stage. * @param value the value to set.
- */
- public void setMinorCode(java.lang.Integer value) {
- throw new java.lang.UnsupportedOperationException(
- "Set is not supported on tombstones");
- }
-
- /**
- * Checks the dirty status of the 'minorCode' field. A field is dirty if it
- * represents a change that has not yet been written to the database. Minor
- * parsing status' including SUCCESS_OK - Successful parse devoid of
- * anomalies or issues, SUCCESS_REDIRECT - Parsed content contains a
- * directive to redirect to another URL. The target URL can be retrieved
- * from the arguments., FAILED_EXCEPTION - Parsing failed. An Exception
- * occured which may be retrieved from the arguments., FAILED_TRUNCATED -
- * Parsing failed. Content was truncated, but the parser cannot handle
- * incomplete content., FAILED_INVALID_FORMAT - Parsing failed. Invalid
- * format e.g. the content may be corrupted or of wrong type.,
- * FAILED_MISSING_PARTS - Parsing failed. Other related parts of the content
- * are needed to complete parsing. The list of URLs to missing parts may be
- * provided in arguments. The Fetcher may decide to fetch these parts at
- * once, then put them into Content.metadata, and supply them for
- * re-parsing., FAILED_MISING_CONTENT - Parsing failed. There was no content
- * to be parsed - probably caused by errors at protocol stage. * @param
- * value the value to set.
- */
- public boolean isMinorCodeDirty(java.lang.Integer value) {
- throw new java.lang.UnsupportedOperationException(
- "IsDirty is not supported on tombstones");
- }
-
- /**
- * Gets the value of the 'args' field. Optional arguments supplied to
- * compliment and/or justify the parse status code.
- */
- public java.util.List getArgs() {
- throw new java.lang.UnsupportedOperationException(
- "Get is not supported on tombstones");
- }
-
- /**
- * Sets the value of the 'args' field. Optional arguments supplied to
- * compliment and/or justify the parse status code. * @param value the value
- * to set.
- */
- public void setArgs(java.util.List value) {
- throw new java.lang.UnsupportedOperationException(
- "Set is not supported on tombstones");
- }
-
- /**
- * Checks the dirty status of the 'args' field. A field is dirty if it
- * represents a change that has not yet been written to the database.
- * Optional arguments supplied to compliment and/or justify the parse status
- * code. * @param value the value to set.
- */
- public boolean isArgsDirty(java.util.List value) {
- throw new java.lang.UnsupportedOperationException(
- "IsDirty is not supported on tombstones");
- }
-
+
+ public static final class Tombstone extends ParseStatus implements org.apache.gora.persistency.Tombstone {
+
+ private Tombstone() { }
+
+ /**
+ * Gets the value of the 'majorCode' field.
+ * Major parsing status' including NOTPARSED (Parsing was not performed), SUCCESS (Parsing succeeded), FAILED (General failure. There may be a more specific error message in arguments.) */
+ public java.lang.Integer getMajorCode() {
+ throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
+ }
+
+ /**
+ * Sets the value of the 'majorCode' field.
+ * Major parsing status' including NOTPARSED (Parsing was not performed), SUCCESS (Parsing succeeded), FAILED (General failure. There may be a more specific error message in arguments.) * @param value the value to set.
+ */
+ public void setMajorCode(java.lang.Integer value) {
+ throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
+ }
+
+ /**
+ * Checks the dirty status of the 'majorCode' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * Major parsing status' including NOTPARSED (Parsing was not performed), SUCCESS (Parsing succeeded), FAILED (General failure. There may be a more specific error message in arguments.) * @param value the value to set.
+ */
+ public boolean isMajorCodeDirty(java.lang.Integer value) {
+ throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
+ }
+
+ /**
+ * Gets the value of the 'minorCode' field.
+ * Minor parsing status' including SUCCESS_OK - Successful parse devoid of anomalies or issues, SUCCESS_REDIRECT - Parsed content contains a directive to redirect to another URL. The target URL can be retrieved from the arguments., FAILED_EXCEPTION - Parsing failed. An Exception occured which may be retrieved from the arguments., FAILED_TRUNCATED - Parsing failed. Content was truncated, but the parser cannot handle incomplete content., FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content may be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing failed. Other related parts of the content are needed to complete parsing. The list of URLs to missing parts may be provided in arguments. The Fetcher may decide to fetch these parts at once, then put them into Content.metadata, and supply them for re-parsing., FAILED_MISING_CONTENT - Parsing failed. There was no content to be parsed - probably caused by errors at protocol stage. */
+ public java.lang.Integer getMinorCode() {
+ throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
+ }
+
+ /**
+ * Sets the value of the 'minorCode' field.
+ * Minor parsing status' including SUCCESS_OK - Successful parse devoid of anomalies or issues, SUCCESS_REDIRECT - Parsed content contains a directive to redirect to another URL. The target URL can be retrieved from the arguments., FAILED_EXCEPTION - Parsing failed. An Exception occured which may be retrieved from the arguments., FAILED_TRUNCATED - Parsing failed. Content was truncated, but the parser cannot handle incomplete content., FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content may be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing failed. Other related parts of the content are needed to complete parsing. The list of URLs to missing parts may be provided in arguments. The Fetcher may decide to fetch these parts at once, then put them into Content.metadata, and supply them for re-parsing., FAILED_MISING_CONTENT - Parsing failed. There was no content to be parsed - probably caused by errors at protocol stage. * @param value the value to set.
+ */
+ public void setMinorCode(java.lang.Integer value) {
+ throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
+ }
+
+ /**
+ * Checks the dirty status of the 'minorCode' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * Minor parsing status' including SUCCESS_OK - Successful parse devoid of anomalies or issues, SUCCESS_REDIRECT - Parsed content contains a directive to redirect to another URL. The target URL can be retrieved from the arguments., FAILED_EXCEPTION - Parsing failed. An Exception occured which may be retrieved from the arguments., FAILED_TRUNCATED - Parsing failed. Content was truncated, but the parser cannot handle incomplete content., FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content may be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing failed. Other related parts of the content are needed to complete parsing. The list of URLs to missing parts may be provided in arguments. The Fetcher may decide to fetch these parts at once, then put them into Content.metadata, and supply them for re-parsing., FAILED_MISING_CONTENT - Parsing failed. There was no content to be parsed - probably caused by errors at protocol stage. * @param value the value to set.
+ */
+ public boolean isMinorCodeDirty(java.lang.Integer value) {
+ throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
+ }
+
+ /**
+ * Gets the value of the 'args' field.
+ * Optional arguments supplied to compliment and/or justify the parse status code. */
+ public java.util.List getArgs() {
+ throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
+ }
+
+ /**
+ * Sets the value of the 'args' field.
+ * Optional arguments supplied to compliment and/or justify the parse status code. * @param value the value to set.
+ */
+ public void setArgs(java.util.List value) {
+ throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
+ }
+
+ /**
+ * Checks the dirty status of the 'args' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * Optional arguments supplied to compliment and/or justify the parse status code. * @param value the value to set.
+ */
+ public boolean isArgsDirty(java.util.List value) {
+ throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
+ }
+
+
}
+
+}
-}
Index: src/java/org/apache/nutch/storage/ProtocolStatus.java
===================================================================
--- src/java/org/apache/nutch/storage/ProtocolStatus.java (revision 1663736)
+++ src/java/org/apache/nutch/storage/ProtocolStatus.java (working copy)
@@ -1,12 +1,12 @@
-/*******************************************************************************
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
+ * contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
+ * the License. You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
@@ -13,28 +13,27 @@
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
- ******************************************************************************/
+ */
/**
* Autogenerated by Avro
*
* DO NOT EDIT DIRECTLY
*/
-package org.apache.nutch.storage;
+package org.apache.nutch.storage;
import org.apache.nutch.protocol.ProtocolStatusUtils;
@SuppressWarnings("all")
/** A nested container representing data captured from web server responses. */
-public class ProtocolStatus extends
- org.apache.gora.persistency.impl.PersistentBase implements
- org.apache.avro.specific.SpecificRecord,
- org.apache.gora.persistency.Persistent {
- public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser()
- .parse("{\"type\":\"record\",\"name\":\"ProtocolStatus\",\"namespace\":\"org.apache.nutch.storage\",\"doc\":\"A nested container representing data captured from web server responses.\",\"fields\":[{\"name\":\"code\",\"type\":\"int\",\"doc\":\"A protocol response code which can be one of SUCCESS - content was retrieved without errors, FAILED - Content was not retrieved. Any further errors may be indicated in args, PROTO_NOT_FOUND - This protocol was not found. Application may attempt to retry later, GONE - Resource is gone, MOVED - Resource has moved permanently. New url should be found in args, TEMP_MOVED - Resource has moved temporarily. New url should be found in args., NOTFOUND - Resource was not found, RETRY - Temporary failure. Application may retry immediately., EXCEPTION - Unspecified exception occured. Further information may be provided in args., ACCESS_DENIED - Access denied - authorization required, but missing/incorrect., ROBOTS_DENIED - Access denied by robots.txt rules., REDIR_EXCEEDED - Too many redirects., NOTFETCHING - Not fetching., NOTMODIFIED - Unchanged since the last fetch., WOULDBLOCK - Request was refused by protocol plugins, because it would block. The expected number of milliseconds to wait before retry may be provided in args., BLOCKED - Thread was blocked http.max.delays times during fetching.\",\"default\":0},{\"name\":\"args\",\"type\":{\"type\":\"array\",\"items\":\"string\"},\"doc\":\"Optional arguments supplied to compliment and/or justify the response code.\",\"default\":[]},{\"name\":\"lastModified\",\"type\":\"long\",\"doc\":\"A server reponse indicating when this page was last modified, this can be unreliable at times hence this is used as a default fall back value for the preferred 'modifiedTime' and 'preModifiedTime' obtained from the WebPage itself.\",\"default\":0}]}");
+public class ProtocolStatus extends org.apache.gora.persistency.impl.PersistentBase implements org.apache.avro.specific.SpecificRecord, org.apache.gora.persistency.Persistent {
+ public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"ProtocolStatus\",\"namespace\":\"org.apache.nutch.storage\",\"doc\":\"A nested container representing data captured from web server responses.\",\"fields\":[{\"name\":\"code\",\"type\":\"int\",\"doc\":\"A protocol response code which can be one of SUCCESS - content was retrieved without errors, FAILED - Content was not retrieved. Any further errors may be indicated in args, PROTO_NOT_FOUND - This protocol was not found. Application may attempt to retry later, GONE - Resource is gone, MOVED - Resource has moved permanently. New url should be found in args, TEMP_MOVED - Resource has moved temporarily. New url should be found in args., NOTFOUND - Resource was not found, RETRY - Temporary failure. Application may retry immediately., EXCEPTION - Unspecified exception occured. Further information may be provided in args., ACCESS_DENIED - Access denied - authorization required, but missing/incorrect., ROBOTS_DENIED - Access denied by robots.txt rules., REDIR_EXCEEDED - Too many redirects., NOTFETCHING - Not fetching., NOTMODIFIED - Unchanged since the last fetch., WOULDBLOCK - Request was refused by protocol plugins, because it would block. The expected number of milliseconds to wait before retry may be provided in args., BLOCKED - Thread was blocked http.max.delays times during fetching.\",\"default\":0},{\"name\":\"args\",\"type\":{\"type\":\"array\",\"items\":\"string\"},\"doc\":\"Optional arguments supplied to compliment and/or justify the response code.\",\"default\":[]},{\"name\":\"lastModified\",\"type\":\"long\",\"doc\":\"A server reponse indicating when this page was last modified, this can be unreliable at times hence this is used as a default fall back value for the preferred 'modifiedTime' and 'preModifiedTime' obtained from the WebPage itself.\",\"default\":0}]}");
/** Enum containing all data bean's fields. */
public static enum Field {
- CODE(0, "code"), ARGS(1, "args"), LAST_MODIFIED(2, "lastModified"), ;
+ CODE(0, "code"),
+ ARGS(1, "args"),
+ LAST_MODIFIED(2, "lastModified"),
+ ;
/**
* Field's index.
*/
@@ -47,50 +46,38 @@
/**
* Field's constructor
- *
- * @param index
- * field's index.
- * @param name
- * field's name.
+ * @param index field's index.
+ * @param name field's name.
*/
- Field(int index, String name) {
- this.index = index;
- this.name = name;
- }
+ Field(int index, String name) {this.index=index;this.name=name;}
/**
* Gets field's index.
- *
* @return int field's index.
*/
- public int getIndex() {
- return index;
- }
+ public int getIndex() {return index;}
/**
* Gets field's name.
- *
* @return String field's name.
*/
- public String getName() {
- return name;
- }
+ public String getName() {return name;}
/**
* Gets field's attributes to string.
- *
* @return String field's attributes to string.
*/
- public String toString() {
- return name;
- }
+ public String toString() {return name;}
};
- public static final String[] _ALL_FIELDS = { "code", "args", "lastModified", };
+ public static final String[] _ALL_FIELDS = {
+ "code",
+ "args",
+ "lastModified",
+ };
/**
* Gets the total field count.
- *
* @return int field count
*/
public int getFieldsCount() {
@@ -97,140 +84,53 @@
return ProtocolStatus._ALL_FIELDS.length;
}
- /**
- * A protocol response code which can be one of SUCCESS - content was
- * retrieved without errors, FAILED - Content was not retrieved. Any further
- * errors may be indicated in args, PROTO_NOT_FOUND - This protocol was not
- * found. Application may attempt to retry later, GONE - Resource is gone,
- * MOVED - Resource has moved permanently. New url should be found in args,
- * TEMP_MOVED - Resource has moved temporarily. New url should be found in
- * args., NOTFOUND - Resource was not found, RETRY - Temporary failure.
- * Application may retry immediately., EXCEPTION - Unspecified exception
- * occured. Further information may be provided in args., ACCESS_DENIED -
- * Access denied - authorization required, but missing/incorrect.,
- * ROBOTS_DENIED - Access denied by robots.txt rules., REDIR_EXCEEDED - Too
- * many redirects., NOTFETCHING - Not fetching., NOTMODIFIED - Unchanged since
- * the last fetch., WOULDBLOCK - Request was refused by protocol plugins,
- * because it would block. The expected number of milliseconds to wait before
- * retry may be provided in args., BLOCKED - Thread was blocked http.max.delays
- * times during fetching.
- */
+ /** A protocol response code which can be one of SUCCESS - content was retrieved without errors, FAILED - Content was not retrieved. Any further errors may be indicated in args, PROTO_NOT_FOUND - This protocol was not found. Application may attempt to retry later, GONE - Resource is gone, MOVED - Resource has moved permanently. New url should be found in args, TEMP_MOVED - Resource has moved temporarily. New url should be found in args., NOTFOUND - Resource was not found, RETRY - Temporary failure. Application may retry immediately., EXCEPTION - Unspecified exception occured. Further information may be provided in args., ACCESS_DENIED - Access denied - authorization required, but missing/incorrect., ROBOTS_DENIED - Access denied by robots.txt rules., REDIR_EXCEEDED - Too many redirects., NOTFETCHING - Not fetching., NOTMODIFIED - Unchanged since the last fetch., WOULDBLOCK - Request was refused by protocol plugins, because it would block. The expected number of milliseconds to wait before retry may be provided in args., BLOCKED - Thread was blocked http.max.delays times during fetching. */
private int code;
- /**
- * Optional arguments supplied to compliment and/or justify the response code.
- */
+ /** Optional arguments supplied to compliment and/or justify the response code. */
private java.util.List args;
- /**
- * A server reponse indicating when this page was last modified, this can be
- * unreliable at times hence this is used as a default fall back value for the
- * preferred 'modifiedTime' and 'preModifiedTime' obtained from the WebPage
- * itself.
- */
+ /** A server reponse indicating when this page was last modified, this can be unreliable at times hence this is used as a default fall back value for the preferred 'modifiedTime' and 'preModifiedTime' obtained from the WebPage itself. */
private long lastModified;
-
- public org.apache.avro.Schema getSchema() {
- return SCHEMA$;
- }
-
- // Used by DatumWriter. Applications should not call.
+ public org.apache.avro.Schema getSchema() { return SCHEMA$; }
+ // Used by DatumWriter. Applications should not call.
public java.lang.Object get(int field$) {
switch (field$) {
- case 0:
- return code;
- case 1:
- return args;
- case 2:
- return lastModified;
- default:
- throw new org.apache.avro.AvroRuntimeException("Bad index");
+ case 0: return code;
+ case 1: return args;
+ case 2: return lastModified;
+ default: throw new org.apache.avro.AvroRuntimeException("Bad index");
}
}
-
- // Used by DatumReader. Applications should not call.
- @SuppressWarnings(value = "unchecked")
+
+ // Used by DatumReader. Applications should not call.
+ @SuppressWarnings(value="unchecked")
public void put(int field$, java.lang.Object value) {
switch (field$) {
- case 0:
- code = (java.lang.Integer) (value);
- break;
- case 1:
- args = (java.util.List) ((value instanceof org.apache.gora.persistency.Dirtyable) ? value
- : new org.apache.gora.persistency.impl.DirtyListWrapper(
- (java.util.List) value));
- break;
- case 2:
- lastModified = (java.lang.Long) (value);
- break;
- default:
- throw new org.apache.avro.AvroRuntimeException("Bad index");
+ case 0: code = (java.lang.Integer)(value); break;
+ case 1: args = (java.util.List)((value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyListWrapper((java.util.List)value)); break;
+ case 2: lastModified = (java.lang.Long)(value); break;
+ default: throw new org.apache.avro.AvroRuntimeException("Bad index");
}
}
/**
- * Gets the value of the 'code' field. A protocol response code which can be
- * one of SUCCESS - content was retrieved without errors, FAILED - Content was
- * not retrieved. Any further errors may be indicated in args, PROTO_NOT_FOUND
- * - This protocol was not found. Application may attempt to retry later, GONE
- * - Resource is gone, MOVED - Resource has moved permanently. New url should
- * be found in args, TEMP_MOVED - Resource has moved temporarily. New url
- * should be found in args., NOTFOUND - Resource was not found, RETRY -
- * Temporary failure. Application may retry immediately., EXCEPTION -
- * Unspecified exception occured. Further information may be provided in
- * args., ACCESS_DENIED - Access denied - authorization required, but
- * missing/incorrect., ROBOTS_DENIED - Access denied by robots.txt rules.,
- * REDIR_EXCEEDED - Too many redirects., NOTFETCHING - Not fetching.,
- * NOTMODIFIED - Unchanged since the last fetch., WOULDBLOCK - Request was
- * refused by protocol plugins, because it would block. The expected number of
- * milliseconds to wait before retry may be provided in args., BLOCKED -
- * Thread was blocked http.max.delays times during fetching.
- */
+ * Gets the value of the 'code' field.
+ * A protocol response code which can be one of SUCCESS - content was retrieved without errors, FAILED - Content was not retrieved. Any further errors may be indicated in args, PROTO_NOT_FOUND - This protocol was not found. Application may attempt to retry later, GONE - Resource is gone, MOVED - Resource has moved permanently. New url should be found in args, TEMP_MOVED - Resource has moved temporarily. New url should be found in args., NOTFOUND - Resource was not found, RETRY - Temporary failure. Application may retry immediately., EXCEPTION - Unspecified exception occured. Further information may be provided in args., ACCESS_DENIED - Access denied - authorization required, but missing/incorrect., ROBOTS_DENIED - Access denied by robots.txt rules., REDIR_EXCEEDED - Too many redirects., NOTFETCHING - Not fetching., NOTMODIFIED - Unchanged since the last fetch., WOULDBLOCK - Request was refused by protocol plugins, because it would block. The expected number of milliseconds to wait before retry may be provided in args., BLOCKED - Thread was blocked http.max.delays times during fetching. */
public java.lang.Integer getCode() {
return code;
}
/**
- * Sets the value of the 'code' field. A protocol response code which can be
- * one of SUCCESS - content was retrieved without errors, FAILED - Content was
- * not retrieved. Any further errors may be indicated in args, PROTO_NOT_FOUND
- * - This protocol was not found. Application may attempt to retry later, GONE
- * - Resource is gone, MOVED - Resource has moved permanently. New url should
- * be found in args, TEMP_MOVED - Resource has moved temporarily. New url
- * should be found in args., NOTFOUND - Resource was not found, RETRY -
- * Temporary failure. Application may retry immediately., EXCEPTION -
- * Unspecified exception occured. Further information may be provided in
- * args., ACCESS_DENIED - Access denied - authorization required, but
- * missing/incorrect., ROBOTS_DENIED - Access denied by robots.txt rules.,
- * REDIR_EXCEEDED - Too many redirects., NOTFETCHING - Not fetching.,
- * NOTMODIFIED - Unchanged since the last fetch., WOULDBLOCK - Request was
- * refused by protocol plugins, because it would block. The expected number of
- * milliseconds to wait before retry may be provided in args., BLOCKED -
- * Thread was blocked http.max.delays times during fetching. * @param value
- * the value to set.
+ * Sets the value of the 'code' field.
+ * A protocol response code which can be one of SUCCESS - content was retrieved without errors, FAILED - Content was not retrieved. Any further errors may be indicated in args, PROTO_NOT_FOUND - This protocol was not found. Application may attempt to retry later, GONE - Resource is gone, MOVED - Resource has moved permanently. New url should be found in args, TEMP_MOVED - Resource has moved temporarily. New url should be found in args., NOTFOUND - Resource was not found, RETRY - Temporary failure. Application may retry immediately., EXCEPTION - Unspecified exception occured. Further information may be provided in args., ACCESS_DENIED - Access denied - authorization required, but missing/incorrect., ROBOTS_DENIED - Access denied by robots.txt rules., REDIR_EXCEEDED - Too many redirects., NOTFETCHING - Not fetching., NOTMODIFIED - Unchanged since the last fetch., WOULDBLOCK - Request was refused by protocol plugins, because it would block. The expected number of milliseconds to wait before retry may be provided in args., BLOCKED - Thread was blocked http.max.delays times during fetching. * @param value the value to set.
*/
public void setCode(java.lang.Integer value) {
this.code = value;
setDirty(0);
}
-
+
/**
- * Checks the dirty status of the 'code' field. A field is dirty if it
- * represents a change that has not yet been written to the database. A
- * protocol response code which can be one of SUCCESS - content was retrieved
- * without errors, FAILED - Content was not retrieved. Any further errors may
- * be indicated in args, PROTO_NOT_FOUND - This protocol was not found.
- * Application may attempt to retry later, GONE - Resource is gone, MOVED -
- * Resource has moved permanently. New url should be found in args, TEMP_MOVED
- * - Resource has moved temporarily. New url should be found in args.,
- * NOTFOUND - Resource was not found, RETRY - Temporary failure. Application
- * may retry immediately., EXCEPTION - Unspecified exception occured. Further
- * information may be provided in args., ACCESS_DENIED - Access denied -
- * authorization required, but missing/incorrect., ROBOTS_DENIED - Access
- * denied by robots.txt rules., REDIR_EXCEEDED - Too many redirects.,
- * NOTFETCHING - Not fetching., NOTMODIFIED - Unchanged since the last fetch.,
- * WOULDBLOCK - Request was refused by protocol plugins, because it would
- * block. The expected number of milliseconds to wait before retry may be
- * provided in args., BLOCKED - Thread was blocked http.max.delays times
- * during fetching. * @param value the value to set.
+ * Checks the dirty status of the 'code' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * A protocol response code which can be one of SUCCESS - content was retrieved without errors, FAILED - Content was not retrieved. Any further errors may be indicated in args, PROTO_NOT_FOUND - This protocol was not found. Application may attempt to retry later, GONE - Resource is gone, MOVED - Resource has moved permanently. New url should be found in args, TEMP_MOVED - Resource has moved temporarily. New url should be found in args., NOTFOUND - Resource was not found, RETRY - Temporary failure. Application may retry immediately., EXCEPTION - Unspecified exception occured. Further information may be provided in args., ACCESS_DENIED - Access denied - authorization required, but missing/incorrect., ROBOTS_DENIED - Access denied by robots.txt rules., REDIR_EXCEEDED - Too many redirects., NOTFETCHING - Not fetching., NOTMODIFIED - Unchanged since the last fetch., WOULDBLOCK - Request was refused by protocol plugins, because it would block. The expected number of milliseconds to wait before retry may be provided in args., BLOCKED - Thread was blocked http.max.delays times during fetching. * @param value the value to set.
*/
public boolean isCodeDirty(java.lang.Integer value) {
return isDirty(0);
@@ -237,29 +137,24 @@
}
/**
- * Gets the value of the 'args' field. Optional arguments supplied to
- * compliment and/or justify the response code.
- */
+ * Gets the value of the 'args' field.
+ * Optional arguments supplied to compliment and/or justify the response code. */
public java.util.List getArgs() {
return args;
}
/**
- * Sets the value of the 'args' field. Optional arguments supplied to
- * compliment and/or justify the response code. * @param value the value to
- * set.
+ * Sets the value of the 'args' field.
+ * Optional arguments supplied to compliment and/or justify the response code. * @param value the value to set.
*/
public void setArgs(java.util.List value) {
- this.args = (value instanceof org.apache.gora.persistency.Dirtyable) ? value
- : new org.apache.gora.persistency.impl.DirtyListWrapper(value);
+ this.args = (value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyListWrapper(value);
setDirty(1);
}
-
+
/**
- * Checks the dirty status of the 'args' field. A field is dirty if it
- * represents a change that has not yet been written to the database. Optional
- * arguments supplied to compliment and/or justify the response code. * @param
- * value the value to set.
+ * Checks the dirty status of the 'args' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * Optional arguments supplied to compliment and/or justify the response code. * @param value the value to set.
*/
public boolean isArgsDirty(java.util.List value) {
return isDirty(1);
@@ -266,34 +161,24 @@
}
/**
- * Gets the value of the 'lastModified' field. A server reponse indicating
- * when this page was last modified, this can be unreliable at times hence
- * this is used as a default fall back value for the preferred 'modifiedTime'
- * and 'preModifiedTime' obtained from the WebPage itself.
- */
+ * Gets the value of the 'lastModified' field.
+ * A server reponse indicating when this page was last modified, this can be unreliable at times hence this is used as a default fall back value for the preferred 'modifiedTime' and 'preModifiedTime' obtained from the WebPage itself. */
public java.lang.Long getLastModified() {
return lastModified;
}
/**
- * Sets the value of the 'lastModified' field. A server reponse indicating
- * when this page was last modified, this can be unreliable at times hence
- * this is used as a default fall back value for the preferred 'modifiedTime'
- * and 'preModifiedTime' obtained from the WebPage itself. * @param value the
- * value to set.
+ * Sets the value of the 'lastModified' field.
+ * A server reponse indicating when this page was last modified, this can be unreliable at times hence this is used as a default fall back value for the preferred 'modifiedTime' and 'preModifiedTime' obtained from the WebPage itself. * @param value the value to set.
*/
public void setLastModified(java.lang.Long value) {
this.lastModified = value;
setDirty(2);
}
-
+
/**
- * Checks the dirty status of the 'lastModified' field. A field is dirty if it
- * represents a change that has not yet been written to the database. A server
- * reponse indicating when this page was last modified, this can be unreliable
- * at times hence this is used as a default fall back value for the preferred
- * 'modifiedTime' and 'preModifiedTime' obtained from the WebPage itself. * @param
- * value the value to set.
+ * Checks the dirty status of the 'lastModified' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * A server reponse indicating when this page was last modified, this can be unreliable at times hence this is used as a default fall back value for the preferred 'modifiedTime' and 'preModifiedTime' obtained from the WebPage itself. * @param value the value to set.
*/
public boolean isLastModifiedDirty(java.lang.Long value) {
return isDirty(2);
@@ -303,22 +188,17 @@
public static org.apache.nutch.storage.ProtocolStatus.Builder newBuilder() {
return new org.apache.nutch.storage.ProtocolStatus.Builder();
}
-
+
/** Creates a new ProtocolStatus RecordBuilder by copying an existing Builder */
- public static org.apache.nutch.storage.ProtocolStatus.Builder newBuilder(
- org.apache.nutch.storage.ProtocolStatus.Builder other) {
+ public static org.apache.nutch.storage.ProtocolStatus.Builder newBuilder(org.apache.nutch.storage.ProtocolStatus.Builder other) {
return new org.apache.nutch.storage.ProtocolStatus.Builder(other);
}
-
- /**
- * Creates a new ProtocolStatus RecordBuilder by copying an existing
- * ProtocolStatus instance
- */
- public static org.apache.nutch.storage.ProtocolStatus.Builder newBuilder(
- org.apache.nutch.storage.ProtocolStatus other) {
+
+ /** Creates a new ProtocolStatus RecordBuilder by copying an existing ProtocolStatus instance */
+ public static org.apache.nutch.storage.ProtocolStatus.Builder newBuilder(org.apache.nutch.storage.ProtocolStatus other) {
return new org.apache.nutch.storage.ProtocolStatus.Builder(other);
}
-
+
private static java.nio.ByteBuffer deepCopyToReadOnlyBuffer(
java.nio.ByteBuffer input) {
java.nio.ByteBuffer copy = java.nio.ByteBuffer.allocate(input.capacity());
@@ -341,13 +221,20 @@
copy.limit(limit);
return copy.asReadOnlyBuffer();
}
-
+
/**
+ * A convenience method which returns a successful {@link ProtocolStatus}.
+ * @return the {@link ProtocolStatus} value for 200 (success).
+ */
+ public boolean isSuccess() {
+ return code == ProtocolStatusUtils.SUCCESS;
+ }
+
+ /**
* RecordBuilder for ProtocolStatus instances.
*/
- public static class Builder extends
- org.apache.avro.specific.SpecificRecordBuilderBase
- implements org.apache.avro.data.RecordBuilder {
+ public static class Builder extends org.apache.avro.specific.SpecificRecordBuilderBase
+ implements org.apache.avro.data.RecordBuilder {
private int code;
private java.util.List args;
@@ -357,28 +244,25 @@
private Builder() {
super(org.apache.nutch.storage.ProtocolStatus.SCHEMA$);
}
-
+
/** Creates a Builder by copying an existing Builder */
private Builder(org.apache.nutch.storage.ProtocolStatus.Builder other) {
super(other);
}
-
+
/** Creates a Builder by copying an existing ProtocolStatus instance */
private Builder(org.apache.nutch.storage.ProtocolStatus other) {
- super(org.apache.nutch.storage.ProtocolStatus.SCHEMA$);
+ super(org.apache.nutch.storage.ProtocolStatus.SCHEMA$);
if (isValidValue(fields()[0], other.code)) {
- this.code = (java.lang.Integer) data().deepCopy(fields()[0].schema(),
- other.code);
+ this.code = (java.lang.Integer) data().deepCopy(fields()[0].schema(), other.code);
fieldSetFlags()[0] = true;
}
if (isValidValue(fields()[1], other.args)) {
- this.args = (java.util.List) data().deepCopy(
- fields()[1].schema(), other.args);
+ this.args = (java.util.List) data().deepCopy(fields()[1].schema(), other.args);
fieldSetFlags()[1] = true;
}
if (isValidValue(fields()[2], other.lastModified)) {
- this.lastModified = (java.lang.Long) data().deepCopy(
- fields()[2].schema(), other.lastModified);
+ this.lastModified = (java.lang.Long) data().deepCopy(fields()[2].schema(), other.lastModified);
fieldSetFlags()[2] = true;
}
}
@@ -387,45 +271,44 @@
public java.lang.Integer getCode() {
return code;
}
-
+
/** Sets the value of the 'code' field */
public org.apache.nutch.storage.ProtocolStatus.Builder setCode(int value) {
validate(fields()[0], value);
this.code = value;
fieldSetFlags()[0] = true;
- return this;
+ return this;
}
-
+
/** Checks whether the 'code' field has been set */
public boolean hasCode() {
return fieldSetFlags()[0];
}
-
+
/** Clears the value of the 'code' field */
public org.apache.nutch.storage.ProtocolStatus.Builder clearCode() {
fieldSetFlags()[0] = false;
return this;
}
-
+
/** Gets the value of the 'args' field */
public java.util.List getArgs() {
return args;
}
-
+
/** Sets the value of the 'args' field */
- public org.apache.nutch.storage.ProtocolStatus.Builder setArgs(
- java.util.List value) {
+ public org.apache.nutch.storage.ProtocolStatus.Builder setArgs(java.util.List value) {
validate(fields()[1], value);
this.args = value;
fieldSetFlags()[1] = true;
- return this;
+ return this;
}
-
+
/** Checks whether the 'args' field has been set */
public boolean hasArgs() {
return fieldSetFlags()[1];
}
-
+
/** Clears the value of the 'args' field */
public org.apache.nutch.storage.ProtocolStatus.Builder clearArgs() {
args = null;
@@ -432,43 +315,38 @@
fieldSetFlags()[1] = false;
return this;
}
-
+
/** Gets the value of the 'lastModified' field */
public java.lang.Long getLastModified() {
return lastModified;
}
-
+
/** Sets the value of the 'lastModified' field */
- public org.apache.nutch.storage.ProtocolStatus.Builder setLastModified(
- long value) {
+ public org.apache.nutch.storage.ProtocolStatus.Builder setLastModified(long value) {
validate(fields()[2], value);
this.lastModified = value;
fieldSetFlags()[2] = true;
- return this;
+ return this;
}
-
+
/** Checks whether the 'lastModified' field has been set */
public boolean hasLastModified() {
return fieldSetFlags()[2];
}
-
+
/** Clears the value of the 'lastModified' field */
public org.apache.nutch.storage.ProtocolStatus.Builder clearLastModified() {
fieldSetFlags()[2] = false;
return this;
}
-
+
@Override
public ProtocolStatus build() {
try {
ProtocolStatus record = new ProtocolStatus();
- record.code = fieldSetFlags()[0] ? this.code
- : (java.lang.Integer) defaultValue(fields()[0]);
- record.args = fieldSetFlags()[1] ? this.args
- : (java.util.List) new org.apache.gora.persistency.impl.DirtyListWrapper(
- (java.util.List) defaultValue(fields()[1]));
- record.lastModified = fieldSetFlags()[2] ? this.lastModified
- : (java.lang.Long) defaultValue(fields()[2]);
+ record.code = fieldSetFlags()[0] ? this.code : (java.lang.Integer) defaultValue(fields()[0]);
+ record.args = fieldSetFlags()[1] ? this.args : (java.util.List) new org.apache.gora.persistency.impl.DirtyListWrapper((java.util.List)defaultValue(fields()[1]));
+ record.lastModified = fieldSetFlags()[2] ? this.lastModified : (java.lang.Long) defaultValue(fields()[2]);
return record;
} catch (Exception e) {
throw new org.apache.avro.AvroRuntimeException(e);
@@ -475,173 +353,92 @@
}
}
}
-
- public ProtocolStatus.Tombstone getTombstone() {
- return TOMBSTONE;
+
+ public ProtocolStatus.Tombstone getTombstone(){
+ return TOMBSTONE;
}
- public ProtocolStatus newInstance() {
+ public ProtocolStatus newInstance(){
return newBuilder().build();
}
- // TODO NUTCH-1709 Generated classes o.a.n.storage.Host and
- // o.a.n.storage.ProtocolStatus contain methods not defined in source .avsc
- /**
- * A convenience method which returns a successful {@link ProtocolStatus}.
- *
- * @return the {@link ProtocolStatus} value for 200 (success).
- */
- public boolean isSuccess() {
- return code == ProtocolStatusUtils.SUCCESS;
- }
-
private static final Tombstone TOMBSTONE = new Tombstone();
-
- public static final class Tombstone extends ProtocolStatus implements
- org.apache.gora.persistency.Tombstone {
-
- private Tombstone() {
- }
-
- /**
- * Gets the value of the 'code' field. A protocol response code which can be
- * one of SUCCESS - content was retrieved without errors, FAILED - Content
- * was not retrieved. Any further errors may be indicated in args,
- * PROTO_NOT_FOUND - This protocol was not found. Application may attempt to
- * retry later, GONE - Resource is gone, MOVED - Resource has moved
- * permanently. New url should be found in args, TEMP_MOVED - Resource has
- * moved temporarily. New url should be found in args., NOTFOUND - Resource
- * was not found, RETRY - Temporary failure. Application may retry
- * immediately., EXCEPTION - Unspecified exception occured. Further
- * information may be provided in args., ACCESS_DENIED - Access denied -
- * authorization required, but missing/incorrect., ROBOTS_DENIED - Access
- * denied by robots.txt rules., REDIR_EXCEEDED - Too many redirects.,
- * NOTFETCHING - Not fetching., NOTMODIFIED - Unchanged since the last
- * fetch., WOULDBLOCK - Request was refused by protocol plugins, because it
- * would block. The expected number of milliseconds to wait before retry may
- * be provided in args., BLOCKED - Thread was blocked http.max.delays times
- * during fetching.
- */
- public java.lang.Integer getCode() {
- throw new java.lang.UnsupportedOperationException(
- "Get is not supported on tombstones");
- }
-
- /**
- * Sets the value of the 'code' field. A protocol response code which can be
- * one of SUCCESS - content was retrieved without errors, FAILED - Content
- * was not retrieved. Any further errors may be indicated in args,
- * PROTO_NOT_FOUND - This protocol was not found. Application may attempt to
- * retry later, GONE - Resource is gone, MOVED - Resource has moved
- * permanently. New url should be found in args, TEMP_MOVED - Resource has
- * moved temporarily. New url should be found in args., NOTFOUND - Resource
- * was not found, RETRY - Temporary failure. Application may retry
- * immediately., EXCEPTION - Unspecified exception occured. Further
- * information may be provided in args., ACCESS_DENIED - Access denied -
- * authorization required, but missing/incorrect., ROBOTS_DENIED - Access
- * denied by robots.txt rules., REDIR_EXCEEDED - Too many redirects.,
- * NOTFETCHING - Not fetching., NOTMODIFIED - Unchanged since the last
- * fetch., WOULDBLOCK - Request was refused by protocol plugins, because it
- * would block. The expected number of milliseconds to wait before retry may
- * be provided in args., BLOCKED - Thread was blocked http.max.delays times
- * during fetching. * @param value the value to set.
- */
- public void setCode(java.lang.Integer value) {
- throw new java.lang.UnsupportedOperationException(
- "Set is not supported on tombstones");
- }
-
- /**
- * Checks the dirty status of the 'code' field. A field is dirty if it
- * represents a change that has not yet been written to the database. A
- * protocol response code which can be one of SUCCESS - content was
- * retrieved without errors, FAILED - Content was not retrieved. Any further
- * errors may be indicated in args, PROTO_NOT_FOUND - This protocol was not
- * found. Application may attempt to retry later, GONE - Resource is gone,
- * MOVED - Resource has moved permanently. New url should be found in args,
- * TEMP_MOVED - Resource has moved temporarily. New url should be found in
- * args., NOTFOUND - Resource was not found, RETRY - Temporary failure.
- * Application may retry immediately., EXCEPTION - Unspecified exception
- * occured. Further information may be provided in args., ACCESS_DENIED -
- * Access denied - authorization required, but missing/incorrect.,
- * ROBOTS_DENIED - Access denied by robots.txt rules., REDIR_EXCEEDED - Too
- * many redirects., NOTFETCHING - Not fetching., NOTMODIFIED - Unchanged
- * since the last fetch., WOULDBLOCK - Request was refused by protocol
- * plugins, because it would block. The expected number of milliseconds to
- * wait before retry may be provided in args., BLOCKED - Thread was blocked
- * http.max.delays times during fetching. * @param value the value to set.
- */
- public boolean isCodeDirty(java.lang.Integer value) {
- throw new java.lang.UnsupportedOperationException(
- "IsDirty is not supported on tombstones");
- }
-
- /**
- * Gets the value of the 'args' field. Optional arguments supplied to
- * compliment and/or justify the response code.
- */
- public java.util.List getArgs() {
- throw new java.lang.UnsupportedOperationException(
- "Get is not supported on tombstones");
- }
-
- /**
- * Sets the value of the 'args' field. Optional arguments supplied to
- * compliment and/or justify the response code. * @param value the value to
- * set.
- */
- public void setArgs(java.util.List value) {
- throw new java.lang.UnsupportedOperationException(
- "Set is not supported on tombstones");
- }
-
- /**
- * Checks the dirty status of the 'args' field. A field is dirty if it
- * represents a change that has not yet been written to the database.
- * Optional arguments supplied to compliment and/or justify the response
- * code. * @param value the value to set.
- */
- public boolean isArgsDirty(java.util.List value) {
- throw new java.lang.UnsupportedOperationException(
- "IsDirty is not supported on tombstones");
- }
-
- /**
- * Gets the value of the 'lastModified' field. A server reponse indicating
- * when this page was last modified, this can be unreliable at times hence
- * this is used as a default fall back value for the preferred
- * 'modifiedTime' and 'preModifiedTime' obtained from the WebPage itself.
- */
- public java.lang.Long getLastModified() {
- throw new java.lang.UnsupportedOperationException(
- "Get is not supported on tombstones");
- }
-
- /**
- * Sets the value of the 'lastModified' field. A server reponse indicating
- * when this page was last modified, this can be unreliable at times hence
- * this is used as a default fall back value for the preferred
- * 'modifiedTime' and 'preModifiedTime' obtained from the WebPage itself. * @param
- * value the value to set.
- */
- public void setLastModified(java.lang.Long value) {
- throw new java.lang.UnsupportedOperationException(
- "Set is not supported on tombstones");
- }
-
- /**
- * Checks the dirty status of the 'lastModified' field. A field is dirty if
- * it represents a change that has not yet been written to the database. A
- * server reponse indicating when this page was last modified, this can be
- * unreliable at times hence this is used as a default fall back value for
- * the preferred 'modifiedTime' and 'preModifiedTime' obtained from the
- * WebPage itself. * @param value the value to set.
- */
- public boolean isLastModifiedDirty(java.lang.Long value) {
- throw new java.lang.UnsupportedOperationException(
- "IsDirty is not supported on tombstones");
- }
-
+
+ public static final class Tombstone extends ProtocolStatus implements org.apache.gora.persistency.Tombstone {
+
+ private Tombstone() { }
+
+ /**
+ * Gets the value of the 'code' field.
+ * A protocol response code which can be one of SUCCESS - content was retrieved without errors, FAILED - Content was not retrieved. Any further errors may be indicated in args, PROTO_NOT_FOUND - This protocol was not found. Application may attempt to retry later, GONE - Resource is gone, MOVED - Resource has moved permanently. New url should be found in args, TEMP_MOVED - Resource has moved temporarily. New url should be found in args., NOTFOUND - Resource was not found, RETRY - Temporary failure. Application may retry immediately., EXCEPTION - Unspecified exception occured. Further information may be provided in args., ACCESS_DENIED - Access denied - authorization required, but missing/incorrect., ROBOTS_DENIED - Access denied by robots.txt rules., REDIR_EXCEEDED - Too many redirects., NOTFETCHING - Not fetching., NOTMODIFIED - Unchanged since the last fetch., WOULDBLOCK - Request was refused by protocol plugins, because it would block. The expected number of milliseconds to wait before retry may be provided in args., BLOCKED - Thread was blocked http.max.delays times during fetching. */
+ public java.lang.Integer getCode() {
+ throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
+ }
+
+ /**
+ * Sets the value of the 'code' field.
+ * A protocol response code which can be one of SUCCESS - content was retrieved without errors, FAILED - Content was not retrieved. Any further errors may be indicated in args, PROTO_NOT_FOUND - This protocol was not found. Application may attempt to retry later, GONE - Resource is gone, MOVED - Resource has moved permanently. New url should be found in args, TEMP_MOVED - Resource has moved temporarily. New url should be found in args., NOTFOUND - Resource was not found, RETRY - Temporary failure. Application may retry immediately., EXCEPTION - Unspecified exception occured. Further information may be provided in args., ACCESS_DENIED - Access denied - authorization required, but missing/incorrect., ROBOTS_DENIED - Access denied by robots.txt rules., REDIR_EXCEEDED - Too many redirects., NOTFETCHING - Not fetching., NOTMODIFIED - Unchanged since the last fetch., WOULDBLOCK - Request was refused by protocol plugins, because it would block. The expected number of milliseconds to wait before retry may be provided in args., BLOCKED - Thread was blocked http.max.delays times during fetching. * @param value the value to set.
+ */
+ public void setCode(java.lang.Integer value) {
+ throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
+ }
+
+ /**
+ * Checks the dirty status of the 'code' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * A protocol response code which can be one of SUCCESS - content was retrieved without errors, FAILED - Content was not retrieved. Any further errors may be indicated in args, PROTO_NOT_FOUND - This protocol was not found. Application may attempt to retry later, GONE - Resource is gone, MOVED - Resource has moved permanently. New url should be found in args, TEMP_MOVED - Resource has moved temporarily. New url should be found in args., NOTFOUND - Resource was not found, RETRY - Temporary failure. Application may retry immediately., EXCEPTION - Unspecified exception occured. Further information may be provided in args., ACCESS_DENIED - Access denied - authorization required, but missing/incorrect., ROBOTS_DENIED - Access denied by robots.txt rules., REDIR_EXCEEDED - Too many redirects., NOTFETCHING - Not fetching., NOTMODIFIED - Unchanged since the last fetch., WOULDBLOCK - Request was refused by protocol plugins, because it would block. The expected number of milliseconds to wait before retry may be provided in args., BLOCKED - Thread was blocked http.max.delays times during fetching. * @param value the value to set.
+ */
+ public boolean isCodeDirty(java.lang.Integer value) {
+ throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
+ }
+
+ /**
+ * Gets the value of the 'args' field.
+ * Optional arguments supplied to compliment and/or justify the response code. */
+ public java.util.List getArgs() {
+ throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
+ }
+
+ /**
+ * Sets the value of the 'args' field.
+ * Optional arguments supplied to compliment and/or justify the response code. * @param value the value to set.
+ */
+ public void setArgs(java.util.List value) {
+ throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
+ }
+
+ /**
+ * Checks the dirty status of the 'args' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * Optional arguments supplied to compliment and/or justify the response code. * @param value the value to set.
+ */
+ public boolean isArgsDirty(java.util.List value) {
+ throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
+ }
+
+ /**
+ * Gets the value of the 'lastModified' field.
+ * A server reponse indicating when this page was last modified, this can be unreliable at times hence this is used as a default fall back value for the preferred 'modifiedTime' and 'preModifiedTime' obtained from the WebPage itself. */
+ public java.lang.Long getLastModified() {
+ throw new java.lang.UnsupportedOperationException("Get is not supported on tombstones");
+ }
+
+ /**
+ * Sets the value of the 'lastModified' field.
+ * A server reponse indicating when this page was last modified, this can be unreliable at times hence this is used as a default fall back value for the preferred 'modifiedTime' and 'preModifiedTime' obtained from the WebPage itself. * @param value the value to set.
+ */
+ public void setLastModified(java.lang.Long value) {
+ throw new java.lang.UnsupportedOperationException("Set is not supported on tombstones");
+ }
+
+ /**
+ * Checks the dirty status of the 'lastModified' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * A server reponse indicating when this page was last modified, this can be unreliable at times hence this is used as a default fall back value for the preferred 'modifiedTime' and 'preModifiedTime' obtained from the WebPage itself. * @param value the value to set.
+ */
+ public boolean isLastModifiedDirty(java.lang.Long value) {
+ throw new java.lang.UnsupportedOperationException("IsDirty is not supported on tombstones");
+ }
+
+
}
+
+}
-}
Index: src/java/org/apache/nutch/storage/WebPage.java
===================================================================
--- src/java/org/apache/nutch/storage/WebPage.java (revision 1663736)
+++ src/java/org/apache/nutch/storage/WebPage.java (working copy)
@@ -1,12 +1,12 @@
-/*******************************************************************************
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
+ * contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
+ * the License. You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
@@ -13,34 +13,45 @@
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
- ******************************************************************************/
+ */
/**
* Autogenerated by Avro
*
* DO NOT EDIT DIRECTLY
*/
-package org.apache.nutch.storage;
-
+package org.apache.nutch.storage;
@SuppressWarnings("all")
/** WebPage is the primary data structure in Nutch representing crawl data for a given WebPage at some point in time */
-public class WebPage extends org.apache.gora.persistency.impl.PersistentBase
- implements org.apache.avro.specific.SpecificRecord,
- org.apache.gora.persistency.Persistent {
- public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser()
- .parse("{\"type\":\"record\",\"name\":\"WebPage\",\"namespace\":\"org.apache.nutch.storage\",\"doc\":\"WebPage is the primary data structure in Nutch representing crawl data for a given WebPage at some point in time\",\"fields\":[{\"name\":\"baseUrl\",\"type\":[\"null\",\"string\"],\"doc\":\"The original associated with this WebPage.\",\"default\":null},{\"name\":\"status\",\"type\":\"int\",\"doc\":\"A crawl status associated with the WebPage, can be of value STATUS_UNFETCHED - WebPage was not fetched yet, STATUS_FETCHED - WebPage was successfully fetched, STATUS_GONE - WebPage no longer exists, STATUS_REDIR_TEMP - WebPage temporarily redirects to other page, STATUS_REDIR_PERM - WebPage permanently redirects to other page, STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g. transient errors and STATUS_NOTMODIFIED - fetching successful - page is not modified\",\"default\":0},{\"name\":\"fetchTime\",\"type\":\"long\",\"doc\":\"The system time in milliseconds for when the page was fetched.\",\"default\":0},{\"name\":\"prevFetchTime\",\"type\":\"long\",\"doc\":\"The system time in milliseconds for when the page was last fetched if it was previously fetched which can be used to calculate time delta within a fetching schedule implementation\",\"default\":0},{\"name\":\"fetchInterval\",\"type\":\"int\",\"doc\":\"The default number of seconds between re-fetches of a page. The default is considered as 30 days unless a custom fetch schedle is implemented.\",\"default\":0},{\"name\":\"retriesSinceFetch\",\"type\":\"int\",\"doc\":\"The number of retried attempts at fetching the WebPage since it was last successfully fetched.\",\"default\":0},{\"name\":\"modifiedTime\",\"type\":\"long\",\"doc\":\"The system time in milliseconds for when this WebPage was modified by the WebPage author, if this is not available we default to the server for this information. This is important to understand the changing nature of the WebPage.\",\"default\":0},{\"name\":\"prevModifiedTime\",\"type\":\"long\",\"doc\":\"The system time in milliseconds for when this WebPage was previously modified by the author, if this is not available then we default to the server for this information. This is important to understand the changing nature of a WebPage.\",\"default\":0},{\"name\":\"protocolStatus\",\"type\":[\"null\",{\"type\":\"record\",\"name\":\"ProtocolStatus\",\"doc\":\"A nested container representing data captured from web server responses.\",\"fields\":[{\"name\":\"code\",\"type\":\"int\",\"doc\":\"A protocol response code which can be one of SUCCESS - content was retrieved without errors, FAILED - Content was not retrieved. Any further errors may be indicated in args, PROTO_NOT_FOUND - This protocol was not found. Application may attempt to retry later, GONE - Resource is gone, MOVED - Resource has moved permanently. New url should be found in args, TEMP_MOVED - Resource has moved temporarily. New url should be found in args., NOTFOUND - Resource was not found, RETRY - Temporary failure. Application may retry immediately., EXCEPTION - Unspecified exception occured. Further information may be provided in args., ACCESS_DENIED - Access denied - authorization required, but missing/incorrect., ROBOTS_DENIED - Access denied by robots.txt rules., REDIR_EXCEEDED - Too many redirects., NOTFETCHING - Not fetching., NOTMODIFIED - Unchanged since the last fetch., WOULDBLOCK - Request was refused by protocol plugins, because it would block. The expected number of milliseconds to wait before retry may be provided in args., BLOCKED - Thread was blocked http.max.delays times during fetching.\",\"default\":0},{\"name\":\"args\",\"type\":{\"type\":\"array\",\"items\":\"string\"},\"doc\":\"Optional arguments supplied to compliment and/or justify the response code.\",\"default\":[]},{\"name\":\"lastModified\",\"type\":\"long\",\"doc\":\"A server reponse indicating when this page was last modified, this can be unreliable at times hence this is used as a default fall back value for the preferred 'modifiedTime' and 'preModifiedTime' obtained from the WebPage itself.\",\"default\":0}]}],\"default\":null},{\"name\":\"content\",\"type\":[\"null\",\"bytes\"],\"doc\":\"The entire raw document content e.g. raw XHTML\",\"default\":null},{\"name\":\"contentType\",\"type\":[\"null\",\"string\"],\"doc\":\"The type of the content contained within the document itself. ContentType is an alias for MimeType. Historically, this parameter was only called MimeType, but since this is actually the value included in the HTTP Content-Type header, it can also include the character set encoding, which makes it more than just a MimeType specification. If MimeType is specified e.g. not None, that value is used. Otherwise, ContentType is used. If neither is given, the DEFAULT_CONTENT_TYPE setting is used.\",\"default\":null},{\"name\":\"prevSignature\",\"type\":[\"null\",\"bytes\"],\"doc\":\"An implementation of a WebPage's previous signature from which it can be identified and referenced at any point in time. This can be used to uniquely identify WebPage deltas based on page fingerprints.\",\"default\":null},{\"name\":\"signature\",\"type\":[\"null\",\"bytes\"],\"doc\":\"An implementation of a WebPage's signature from which it can be identified and referenced at any point in time. This is essentially the WebPage's fingerprint represnting its state for any point in time.\",\"default\":null},{\"name\":\"title\",\"type\":[\"null\",\"string\"],\"doc\":\"The title of the WebPage.\",\"default\":null},{\"name\":\"text\",\"type\":[\"null\",\"string\"],\"doc\":\"The textual content of the WebPage devoid from native markup.\",\"default\":null},{\"name\":\"parseStatus\",\"type\":[\"null\",{\"type\":\"record\",\"name\":\"ParseStatus\",\"doc\":\"A nested container representing parse status data captured from invocation of parsers on fetch of a WebPage\",\"fields\":[{\"name\":\"majorCode\",\"type\":\"int\",\"doc\":\"Major parsing status' including NOTPARSED (Parsing was not performed), SUCCESS (Parsing succeeded), FAILED (General failure. There may be a more specific error message in arguments.)\",\"default\":0},{\"name\":\"minorCode\",\"type\":\"int\",\"doc\":\"Minor parsing status' including SUCCESS_OK - Successful parse devoid of anomalies or issues, SUCCESS_REDIRECT - Parsed content contains a directive to redirect to another URL. The target URL can be retrieved from the arguments., FAILED_EXCEPTION - Parsing failed. An Exception occured which may be retrieved from the arguments., FAILED_TRUNCATED - Parsing failed. Content was truncated, but the parser cannot handle incomplete content., FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content may be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing failed. Other related parts of the content are needed to complete parsing. The list of URLs to missing parts may be provided in arguments. The Fetcher may decide to fetch these parts at once, then put them into Content.metadata, and supply them for re-parsing., FAILED_MISING_CONTENT - Parsing failed. There was no content to be parsed - probably caused by errors at protocol stage.\",\"default\":0},{\"name\":\"args\",\"type\":{\"type\":\"array\",\"items\":\"string\"},\"doc\":\"Optional arguments supplied to compliment and/or justify the parse status code.\",\"default\":[]}]}],\"default\":null},{\"name\":\"score\",\"type\":\"float\",\"doc\":\"A score used to determine a WebPage's relevance within the web graph it is part of. This score may change over time based on graph characteristics.\",\"default\":0},{\"name\":\"reprUrl\",\"type\":[\"null\",\"string\"],\"doc\":\"In the case where we are given two urls, a source and a destination of a redirect, we should determine and persist the representative url. The logic used to determine this is based largely on Yahoo!'s Slurp Crawler\",\"default\":null},{\"name\":\"headers\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"doc\":\"Header information returned from the web server used to server the content which is subsequently fetched from. This includes keys such as TRANSFER_ENCODING, CONTENT_ENCODING, CONTENT_LANGUAGE, CONTENT_LENGTH, CONTENT_LOCATION, CONTENT_DISPOSITION, CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED and LOCATION.\",\"default\":{}},{\"name\":\"outlinks\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"doc\":\"Embedded hyperlinks which direct outside of the current domain.\",\"default\":{}},{\"name\":\"inlinks\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"doc\":\"Embedded hyperlinks which link to pages within the current domain.\",\"default\":{}},{\"name\":\"markers\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"doc\":\"Markers flags which represent user and machine decisions which have affected influenced a WebPage's current state. Markers can be system specific and user machine driven in nature. They are assigned to a WebPage on a job-by-job basis and thier values indicative of what actions should be associated with a WebPage.\",\"default\":{}},{\"name\":\"metadata\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"bytes\"]},\"doc\":\"A multi-valued metadata container used for storing everything from structured WebPage characterists, to ad-hoc extraction and metadata augmentation for any given WebPage.\",\"default\":{}},{\"name\":\"batchId\",\"type\":[\"null\",\"string\"],\"doc\":\"A batchId that this WebPage is assigned to. WebPage's are fetched in batches, called fetchlists. Pages are partitioned but can always be associated and fetched alongside pages of similar value (within a crawl cycle) based on batchId.\",\"default\":null}]}");
+public class WebPage extends org.apache.gora.persistency.impl.PersistentBase implements org.apache.avro.specific.SpecificRecord, org.apache.gora.persistency.Persistent {
+ public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"WebPage\",\"namespace\":\"org.apache.nutch.storage\",\"doc\":\"WebPage is the primary data structure in Nutch representing crawl data for a given WebPage at some point in time\",\"fields\":[{\"name\":\"baseUrl\",\"type\":[\"null\",\"string\"],\"doc\":\"The original associated with this WebPage.\",\"default\":null},{\"name\":\"status\",\"type\":\"int\",\"doc\":\"A crawl status associated with the WebPage, can be of value STATUS_UNFETCHED - WebPage was not fetched yet, STATUS_FETCHED - WebPage was successfully fetched, STATUS_GONE - WebPage no longer exists, STATUS_REDIR_TEMP - WebPage temporarily redirects to other page, STATUS_REDIR_PERM - WebPage permanently redirects to other page, STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g. transient errors and STATUS_NOTMODIFIED - fetching successful - page is not modified\",\"default\":0},{\"name\":\"fetchTime\",\"type\":\"long\",\"doc\":\"The system time in milliseconds for when the page was fetched.\",\"default\":0},{\"name\":\"prevFetchTime\",\"type\":\"long\",\"doc\":\"The system time in milliseconds for when the page was last fetched if it was previously fetched which can be used to calculate time delta within a fetching schedule implementation\",\"default\":0},{\"name\":\"fetchInterval\",\"type\":\"int\",\"doc\":\"The default number of seconds between re-fetches of a page. The default is considered as 30 days unless a custom fetch schedle is implemented.\",\"default\":0},{\"name\":\"retriesSinceFetch\",\"type\":\"int\",\"doc\":\"The number of retried attempts at fetching the WebPage since it was last successfully fetched.\",\"default\":0},{\"name\":\"modifiedTime\",\"type\":\"long\",\"doc\":\"The system time in milliseconds for when this WebPage was modified by the WebPage author, if this is not available we default to the server for this information. This is important to understand the changing nature of the WebPage.\",\"default\":0},{\"name\":\"prevModifiedTime\",\"type\":\"long\",\"doc\":\"The system time in milliseconds for when this WebPage was previously modified by the author, if this is not available then we default to the server for this information. This is important to understand the changing nature of a WebPage.\",\"default\":0},{\"name\":\"protocolStatus\",\"type\":[\"null\",{\"type\":\"record\",\"name\":\"ProtocolStatus\",\"doc\":\"A nested container representing data captured from web server responses.\",\"fields\":[{\"name\":\"code\",\"type\":\"int\",\"doc\":\"A protocol response code which can be one of SUCCESS - content was retrieved without errors, FAILED - Content was not retrieved. Any further errors may be indicated in args, PROTO_NOT_FOUND - This protocol was not found. Application may attempt to retry later, GONE - Resource is gone, MOVED - Resource has moved permanently. New url should be found in args, TEMP_MOVED - Resource has moved temporarily. New url should be found in args., NOTFOUND - Resource was not found, RETRY - Temporary failure. Application may retry immediately., EXCEPTION - Unspecified exception occured. Further information may be provided in args., ACCESS_DENIED - Access denied - authorization required, but missing/incorrect., ROBOTS_DENIED - Access denied by robots.txt rules., REDIR_EXCEEDED - Too many redirects., NOTFETCHING - Not fetching., NOTMODIFIED - Unchanged since the last fetch., WOULDBLOCK - Request was refused by protocol plugins, because it would block. The expected number of milliseconds to wait before retry may be provided in args., BLOCKED - Thread was blocked http.max.delays times during fetching.\",\"default\":0},{\"name\":\"args\",\"type\":{\"type\":\"array\",\"items\":\"string\"},\"doc\":\"Optional arguments supplied to compliment and/or justify the response code.\",\"default\":[]},{\"name\":\"lastModified\",\"type\":\"long\",\"doc\":\"A server reponse indicating when this page was last modified, this can be unreliable at times hence this is used as a default fall back value for the preferred 'modifiedTime' and 'preModifiedTime' obtained from the WebPage itself.\",\"default\":0}]}],\"default\":null},{\"name\":\"content\",\"type\":[\"null\",\"bytes\"],\"doc\":\"The entire raw document content e.g. raw XHTML\",\"default\":null},{\"name\":\"contentType\",\"type\":[\"null\",\"string\"],\"doc\":\"The type of the content contained within the document itself. ContentType is an alias for MimeType. Historically, this parameter was only called MimeType, but since this is actually the value included in the HTTP Content-Type header, it can also include the character set encoding, which makes it more than just a MimeType specification. If MimeType is specified e.g. not None, that value is used. Otherwise, ContentType is used. If neither is given, the DEFAULT_CONTENT_TYPE setting is used.\",\"default\":null},{\"name\":\"prevSignature\",\"type\":[\"null\",\"bytes\"],\"doc\":\"An implementation of a WebPage's previous signature from which it can be identified and referenced at any point in time. This can be used to uniquely identify WebPage deltas based on page fingerprints.\",\"default\":null},{\"name\":\"signature\",\"type\":[\"null\",\"bytes\"],\"doc\":\"An implementation of a WebPage's signature from which it can be identified and referenced at any point in time. This is essentially the WebPage's fingerprint represnting its state for any point in time.\",\"default\":null},{\"name\":\"title\",\"type\":[\"null\",\"string\"],\"doc\":\"The title of the WebPage.\",\"default\":null},{\"name\":\"text\",\"type\":[\"null\",\"string\"],\"doc\":\"The textual content of the WebPage devoid from native markup.\",\"default\":null},{\"name\":\"parseStatus\",\"type\":[\"null\",{\"type\":\"record\",\"name\":\"ParseStatus\",\"doc\":\"A nested container representing parse status data captured from invocation of parsers on fetch of a WebPage\",\"fields\":[{\"name\":\"majorCode\",\"type\":\"int\",\"doc\":\"Major parsing status' including NOTPARSED (Parsing was not performed), SUCCESS (Parsing succeeded), FAILED (General failure. There may be a more specific error message in arguments.)\",\"default\":0},{\"name\":\"minorCode\",\"type\":\"int\",\"doc\":\"Minor parsing status' including SUCCESS_OK - Successful parse devoid of anomalies or issues, SUCCESS_REDIRECT - Parsed content contains a directive to redirect to another URL. The target URL can be retrieved from the arguments., FAILED_EXCEPTION - Parsing failed. An Exception occured which may be retrieved from the arguments., FAILED_TRUNCATED - Parsing failed. Content was truncated, but the parser cannot handle incomplete content., FAILED_INVALID_FORMAT - Parsing failed. Invalid format e.g. the content may be corrupted or of wrong type., FAILED_MISSING_PARTS - Parsing failed. Other related parts of the content are needed to complete parsing. The list of URLs to missing parts may be provided in arguments. The Fetcher may decide to fetch these parts at once, then put them into Content.metadata, and supply them for re-parsing., FAILED_MISING_CONTENT - Parsing failed. There was no content to be parsed - probably caused by errors at protocol stage.\",\"default\":0},{\"name\":\"args\",\"type\":{\"type\":\"array\",\"items\":\"string\"},\"doc\":\"Optional arguments supplied to compliment and/or justify the parse status code.\",\"default\":[]}]}],\"default\":null},{\"name\":\"score\",\"type\":\"float\",\"doc\":\"A score used to determine a WebPage's relevance within the web graph it is part of. This score may change over time based on graph characteristics.\",\"default\":0},{\"name\":\"reprUrl\",\"type\":[\"null\",\"string\"],\"doc\":\"In the case where we are given two urls, a source and a destination of a redirect, we should determine and persist the representative url. The logic used to determine this is based largely on Yahoo!'s Slurp Crawler\",\"default\":null},{\"name\":\"headers\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"doc\":\"Header information returned from the web server used to server the content which is subsequently fetched from. This includes keys such as TRANSFER_ENCODING, CONTENT_ENCODING, CONTENT_LANGUAGE, CONTENT_LENGTH, CONTENT_LOCATION, CONTENT_DISPOSITION, CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED and LOCATION.\",\"default\":{}},{\"name\":\"outlinks\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"doc\":\"Embedded hyperlinks which direct outside of the current domain.\",\"default\":{}},{\"name\":\"inlinks\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"doc\":\"Embedded hyperlinks which link to pages within the current domain.\",\"default\":{}},{\"name\":\"markers\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"doc\":\"Markers flags which represent user and machine decisions which have affected influenced a WebPage's current state. Markers can be system specific and user machine driven in nature. They are assigned to a WebPage on a job-by-job basis and thier values indicative of what actions should be associated with a WebPage.\",\"default\":{}},{\"name\":\"metadata\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"bytes\"]},\"doc\":\"A multi-valued metadata container used for storing everything from structured WebPage characterists, to ad-hoc extraction and metadata augmentation for any given WebPage.\",\"default\":{}},{\"name\":\"batchId\",\"type\":[\"null\",\"string\"],\"doc\":\"A batchId that this WebPage is assigned to. WebPage's are fetched in batches, called fetchlists. Pages are partitioned but can always be associated and fetched alongside pages of similar value (within a crawl cycle) based on batchId.\",\"default\":null}]}");
/** Enum containing all data bean's fields. */
public static enum Field {
- BASE_URL(0, "baseUrl"), STATUS(1, "status"), FETCH_TIME(2, "fetchTime"), PREV_FETCH_TIME(
- 3, "prevFetchTime"), FETCH_INTERVAL(4, "fetchInterval"), RETRIES_SINCE_FETCH(
- 5, "retriesSinceFetch"), MODIFIED_TIME(6, "modifiedTime"), PREV_MODIFIED_TIME(
- 7, "prevModifiedTime"), PROTOCOL_STATUS(8, "protocolStatus"), CONTENT(
- 9, "content"), CONTENT_TYPE(10, "contentType"), PREV_SIGNATURE(11,
- "prevSignature"), SIGNATURE(12, "signature"), TITLE(13, "title"), TEXT(
- 14, "text"), PARSE_STATUS(15, "parseStatus"), SCORE(16, "score"), REPR_URL(
- 17, "reprUrl"), HEADERS(18, "headers"), OUTLINKS(19, "outlinks"), INLINKS(
- 20, "inlinks"), MARKERS(21, "markers"), METADATA(22, "metadata"), BATCH_ID(
- 23, "batchId"), ;
+ BASE_URL(0, "baseUrl"),
+ STATUS(1, "status"),
+ FETCH_TIME(2, "fetchTime"),
+ PREV_FETCH_TIME(3, "prevFetchTime"),
+ FETCH_INTERVAL(4, "fetchInterval"),
+ RETRIES_SINCE_FETCH(5, "retriesSinceFetch"),
+ MODIFIED_TIME(6, "modifiedTime"),
+ PREV_MODIFIED_TIME(7, "prevModifiedTime"),
+ PROTOCOL_STATUS(8, "protocolStatus"),
+ CONTENT(9, "content"),
+ CONTENT_TYPE(10, "contentType"),
+ PREV_SIGNATURE(11, "prevSignature"),
+ SIGNATURE(12, "signature"),
+ TITLE(13, "title"),
+ TEXT(14, "text"),
+ PARSE_STATUS(15, "parseStatus"),
+ SCORE(16, "score"),
+ REPR_URL(17, "reprUrl"),
+ HEADERS(18, "headers"),
+ OUTLINKS(19, "outlinks"),
+ INLINKS(20, "inlinks"),
+ MARKERS(21, "markers"),
+ METADATA(22, "metadata"),
+ BATCH_ID(23, "batchId"),
+ ;
/**
* Field's index.
*/
@@ -53,55 +64,59 @@
/**
* Field's constructor
- *
- * @param index
- * field's index.
- * @param name
- * field's name.
+ * @param index field's index.
+ * @param name field's name.
*/
- Field(int index, String name) {
- this.index = index;
- this.name = name;
- }
+ Field(int index, String name) {this.index=index;this.name=name;}
/**
* Gets field's index.
- *
* @return int field's index.
*/
- public int getIndex() {
- return index;
- }
+ public int getIndex() {return index;}
/**
* Gets field's name.
- *
* @return String field's name.
*/
- public String getName() {
- return name;
- }
+ public String getName() {return name;}
/**
* Gets field's attributes to string.
- *
* @return String field's attributes to string.
*/
- public String toString() {
- return name;
- }
+ public String toString() {return name;}
};
- public static final String[] _ALL_FIELDS = { "baseUrl", "status",
- "fetchTime", "prevFetchTime", "fetchInterval", "retriesSinceFetch",
- "modifiedTime", "prevModifiedTime", "protocolStatus", "content",
- "contentType", "prevSignature", "signature", "title", "text",
- "parseStatus", "score", "reprUrl", "headers", "outlinks", "inlinks",
- "markers", "metadata", "batchId", };
+ public static final String[] _ALL_FIELDS = {
+ "baseUrl",
+ "status",
+ "fetchTime",
+ "prevFetchTime",
+ "fetchInterval",
+ "retriesSinceFetch",
+ "modifiedTime",
+ "prevModifiedTime",
+ "protocolStatus",
+ "content",
+ "contentType",
+ "prevSignature",
+ "signature",
+ "title",
+ "text",
+ "parseStatus",
+ "score",
+ "reprUrl",
+ "headers",
+ "outlinks",
+ "inlinks",
+ "markers",
+ "metadata",
+ "batchId",
+ };
/**
* Gets the total field count.
- *
* @return int field count
*/
public int getFieldsCount() {
@@ -110,72 +125,28 @@
/** The original associated with this WebPage. */
private java.lang.CharSequence baseUrl;
- /**
- * A crawl status associated with the WebPage, can be of value
- * STATUS_UNFETCHED - WebPage was not fetched yet, STATUS_FETCHED - WebPage
- * was successfully fetched, STATUS_GONE - WebPage no longer exists,
- * STATUS_REDIR_TEMP - WebPage temporarily redirects to other page,
- * STATUS_REDIR_PERM - WebPage permanently redirects to other page,
- * STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g. transient
- * errors and STATUS_NOTMODIFIED - fetching successful - page is not modified
- */
+ /** A crawl status associated with the WebPage, can be of value STATUS_UNFETCHED - WebPage was not fetched yet, STATUS_FETCHED - WebPage was successfully fetched, STATUS_GONE - WebPage no longer exists, STATUS_REDIR_TEMP - WebPage temporarily redirects to other page, STATUS_REDIR_PERM - WebPage permanently redirects to other page, STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g. transient errors and STATUS_NOTMODIFIED - fetching successful - page is not modified */
private int status;
/** The system time in milliseconds for when the page was fetched. */
private long fetchTime;
- /**
- * The system time in milliseconds for when the page was last fetched if it
- * was previously fetched which can be used to calculate time delta within a
- * fetching schedule implementation
- */
+ /** The system time in milliseconds for when the page was last fetched if it was previously fetched which can be used to calculate time delta within a fetching schedule implementation */
private long prevFetchTime;
- /**
- * The default number of seconds between re-fetches of a page. The default is
- * considered as 30 days unless a custom fetch schedle is implemented.
- */
+ /** The default number of seconds between re-fetches of a page. The default is considered as 30 days unless a custom fetch schedle is implemented. */
private int fetchInterval;
- /**
- * The number of retried attempts at fetching the WebPage since it was last
- * successfully fetched.
- */
+ /** The number of retried attempts at fetching the WebPage since it was last successfully fetched. */
private int retriesSinceFetch;
- /**
- * The system time in milliseconds for when this WebPage was modified by the
- * WebPage author, if this is not available we default to the server for this
- * information. This is important to understand the changing nature of the
- * WebPage.
- */
+ /** The system time in milliseconds for when this WebPage was modified by the WebPage author, if this is not available we default to the server for this information. This is important to understand the changing nature of the WebPage. */
private long modifiedTime;
- /**
- * The system time in milliseconds for when this WebPage was previously
- * modified by the author, if this is not available then we default to the
- * server for this information. This is important to understand the changing
- * nature of a WebPage.
- */
+ /** The system time in milliseconds for when this WebPage was previously modified by the author, if this is not available then we default to the server for this information. This is important to understand the changing nature of a WebPage. */
private long prevModifiedTime;
private org.apache.nutch.storage.ProtocolStatus protocolStatus;
/** The entire raw document content e.g. raw XHTML */
private java.nio.ByteBuffer content;
- /**
- * The type of the content contained within the document itself. ContentType
- * is an alias for MimeType. Historically, this parameter was only called
- * MimeType, but since this is actually the value included in the HTTP
- * Content-Type header, it can also include the character set encoding, which
- * makes it more than just a MimeType specification. If MimeType is specified
- * e.g. not None, that value is used. Otherwise, ContentType is used. If
- * neither is given, the DEFAULT_CONTENT_TYPE setting is used.
- */
+ /** The type of the content contained within the document itself. ContentType is an alias for MimeType. Historically, this parameter was only called MimeType, but since this is actually the value included in the HTTP Content-Type header, it can also include the character set encoding, which makes it more than just a MimeType specification. If MimeType is specified e.g. not None, that value is used. Otherwise, ContentType is used. If neither is given, the DEFAULT_CONTENT_TYPE setting is used. */
private java.lang.CharSequence contentType;
- /**
- * An implementation of a WebPage's previous signature from which it can be
- * identified and referenced at any point in time. This can be used to
- * uniquely identify WebPage deltas based on page fingerprints.
- */
+ /** An implementation of a WebPage's previous signature from which it can be identified and referenced at any point in time. This can be used to uniquely identify WebPage deltas based on page fingerprints. */
private java.nio.ByteBuffer prevSignature;
- /**
- * An implementation of a WebPage's signature from which it can be identified
- * and referenced at any point in time. This is essentially the WebPage's
- * fingerprint represnting its state for any point in time.
- */
+ /** An implementation of a WebPage's signature from which it can be identified and referenced at any point in time. This is essentially the WebPage's fingerprint represnting its state for any point in time. */
private java.nio.ByteBuffer signature;
/** The title of the WebPage. */
private java.lang.CharSequence title;
@@ -182,223 +153,105 @@
/** The textual content of the WebPage devoid from native markup. */
private java.lang.CharSequence text;
private org.apache.nutch.storage.ParseStatus parseStatus;
- /**
- * A score used to determine a WebPage's relevance within the web graph it is
- * part of. This score may change over time based on graph characteristics.
- */
+ /** A score used to determine a WebPage's relevance within the web graph it is part of. This score may change over time based on graph characteristics. */
private float score;
- /**
- * In the case where we are given two urls, a source and a destination of a
- * redirect, we should determine and persist the representative url. The logic
- * used to determine this is based largely on Yahoo!'s Slurp Crawler
- */
+ /** In the case where we are given two urls, a source and a destination of a redirect, we should determine and persist the representative url. The logic used to determine this is based largely on Yahoo!'s Slurp Crawler */
private java.lang.CharSequence reprUrl;
- /**
- * Header information returned from the web server used to server the content
- * which is subsequently fetched from. This includes keys such as
- * TRANSFER_ENCODING, CONTENT_ENCODING, CONTENT_LANGUAGE, CONTENT_LENGTH,
- * CONTENT_LOCATION, CONTENT_DISPOSITION, CONTENT_MD5, CONTENT_TYPE,
- * LAST_MODIFIED and LOCATION.
- */
- private java.util.Map headers;
+ /** Header information returned from the web server used to server the content which is subsequently fetched from. This includes keys such as TRANSFER_ENCODING, CONTENT_ENCODING, CONTENT_LANGUAGE, CONTENT_LENGTH, CONTENT_LOCATION, CONTENT_DISPOSITION, CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED and LOCATION. */
+ private java.util.Map headers;
/** Embedded hyperlinks which direct outside of the current domain. */
- private java.util.Map outlinks;
+ private java.util.Map outlinks;
/** Embedded hyperlinks which link to pages within the current domain. */
- private java.util.Map inlinks;
- /**
- * Markers flags which represent user and machine decisions which have
- * affected influenced a WebPage's current state. Markers can be system
- * specific and user machine driven in nature. They are assigned to a WebPage
- * on a job-by-job basis and thier values indicative of what actions should be
- * associated with a WebPage.
- */
- private java.util.Map markers;
- /**
- * A multi-valued metadata container used for storing everything from
- * structured WebPage characterists, to ad-hoc extraction and metadata
- * augmentation for any given WebPage.
- */
- private java.util.Map metadata;
- /**
- * A batchId that this WebPage is assigned to. WebPage's are fetched in
- * batches, called fetchlists. Pages are partitioned but can always be
- * associated and fetched alongside pages of similar value (within a crawl
- * cycle) based on batchId.
- */
+ private java.util.Map inlinks;
+ /** Markers flags which represent user and machine decisions which have affected influenced a WebPage's current state. Markers can be system specific and user machine driven in nature. They are assigned to a WebPage on a job-by-job basis and thier values indicative of what actions should be associated with a WebPage. */
+ private java.util.Map markers;
+ /** A multi-valued metadata container used for storing everything from structured WebPage characterists, to ad-hoc extraction and metadata augmentation for any given WebPage. */
+ private java.util.Map metadata;
+ /** A batchId that this WebPage is assigned to. WebPage's are fetched in batches, called fetchlists. Pages are partitioned but can always be associated and fetched alongside pages of similar value (within a crawl cycle) based on batchId. */
private java.lang.CharSequence batchId;
-
- public org.apache.avro.Schema getSchema() {
- return SCHEMA$;
- }
-
- // Used by DatumWriter. Applications should not call.
+ public org.apache.avro.Schema getSchema() { return SCHEMA$; }
+ // Used by DatumWriter. Applications should not call.
public java.lang.Object get(int field$) {
switch (field$) {
- case 0:
- return baseUrl;
- case 1:
- return status;
- case 2:
- return fetchTime;
- case 3:
- return prevFetchTime;
- case 4:
- return fetchInterval;
- case 5:
- return retriesSinceFetch;
- case 6:
- return modifiedTime;
- case 7:
- return prevModifiedTime;
- case 8:
- return protocolStatus;
- case 9:
- return content;
- case 10:
- return contentType;
- case 11:
- return prevSignature;
- case 12:
- return signature;
- case 13:
- return title;
- case 14:
- return text;
- case 15:
- return parseStatus;
- case 16:
- return score;
- case 17:
- return reprUrl;
- case 18:
- return headers;
- case 19:
- return outlinks;
- case 20:
- return inlinks;
- case 21:
- return markers;
- case 22:
- return metadata;
- case 23:
- return batchId;
- default:
- throw new org.apache.avro.AvroRuntimeException("Bad index");
+ case 0: return baseUrl;
+ case 1: return status;
+ case 2: return fetchTime;
+ case 3: return prevFetchTime;
+ case 4: return fetchInterval;
+ case 5: return retriesSinceFetch;
+ case 6: return modifiedTime;
+ case 7: return prevModifiedTime;
+ case 8: return protocolStatus;
+ case 9: return content;
+ case 10: return contentType;
+ case 11: return prevSignature;
+ case 12: return signature;
+ case 13: return title;
+ case 14: return text;
+ case 15: return parseStatus;
+ case 16: return score;
+ case 17: return reprUrl;
+ case 18: return headers;
+ case 19: return outlinks;
+ case 20: return inlinks;
+ case 21: return markers;
+ case 22: return metadata;
+ case 23: return batchId;
+ default: throw new org.apache.avro.AvroRuntimeException("Bad index");
}
}
-
- // Used by DatumReader. Applications should not call.
- @SuppressWarnings(value = "unchecked")
+
+ // Used by DatumReader. Applications should not call.
+ @SuppressWarnings(value="unchecked")
public void put(int field$, java.lang.Object value) {
switch (field$) {
- case 0:
- baseUrl = (java.lang.CharSequence) (value);
- break;
- case 1:
- status = (java.lang.Integer) (value);
- break;
- case 2:
- fetchTime = (java.lang.Long) (value);
- break;
- case 3:
- prevFetchTime = (java.lang.Long) (value);
- break;
- case 4:
- fetchInterval = (java.lang.Integer) (value);
- break;
- case 5:
- retriesSinceFetch = (java.lang.Integer) (value);
- break;
- case 6:
- modifiedTime = (java.lang.Long) (value);
- break;
- case 7:
- prevModifiedTime = (java.lang.Long) (value);
- break;
- case 8:
- protocolStatus = (org.apache.nutch.storage.ProtocolStatus) (value);
- break;
- case 9:
- content = (java.nio.ByteBuffer) (value);
- break;
- case 10:
- contentType = (java.lang.CharSequence) (value);
- break;
- case 11:
- prevSignature = (java.nio.ByteBuffer) (value);
- break;
- case 12:
- signature = (java.nio.ByteBuffer) (value);
- break;
- case 13:
- title = (java.lang.CharSequence) (value);
- break;
- case 14:
- text = (java.lang.CharSequence) (value);
- break;
- case 15:
- parseStatus = (org.apache.nutch.storage.ParseStatus) (value);
- break;
- case 16:
- score = (java.lang.Float) (value);
- break;
- case 17:
- reprUrl = (java.lang.CharSequence) (value);
- break;
- case 18:
- headers = (java.util.Map) ((value instanceof org.apache.gora.persistency.Dirtyable) ? value
- : new org.apache.gora.persistency.impl.DirtyMapWrapper(
- (java.util.Map) value));
- break;
- case 19:
- outlinks = (java.util.Map) ((value instanceof org.apache.gora.persistency.Dirtyable) ? value
- : new org.apache.gora.persistency.impl.DirtyMapWrapper(
- (java.util.Map) value));
- break;
- case 20:
- inlinks = (java.util.Map) ((value instanceof org.apache.gora.persistency.Dirtyable) ? value
- : new org.apache.gora.persistency.impl.DirtyMapWrapper(
- (java.util.Map) value));
- break;
- case 21:
- markers = (java.util.Map) ((value instanceof org.apache.gora.persistency.Dirtyable) ? value
- : new org.apache.gora.persistency.impl.DirtyMapWrapper(
- (java.util.Map) value));
- break;
- case 22:
- metadata = (java.util.Map) ((value instanceof org.apache.gora.persistency.Dirtyable) ? value
- : new org.apache.gora.persistency.impl.DirtyMapWrapper(
- (java.util.Map) value));
- break;
- case 23:
- batchId = (java.lang.CharSequence) (value);
- break;
- default:
- throw new org.apache.avro.AvroRuntimeException("Bad index");
+ case 0: baseUrl = (java.lang.CharSequence)(value); break;
+ case 1: status = (java.lang.Integer)(value); break;
+ case 2: fetchTime = (java.lang.Long)(value); break;
+ case 3: prevFetchTime = (java.lang.Long)(value); break;
+ case 4: fetchInterval = (java.lang.Integer)(value); break;
+ case 5: retriesSinceFetch = (java.lang.Integer)(value); break;
+ case 6: modifiedTime = (java.lang.Long)(value); break;
+ case 7: prevModifiedTime = (java.lang.Long)(value); break;
+ case 8: protocolStatus = (org.apache.nutch.storage.ProtocolStatus)(value); break;
+ case 9: content = (java.nio.ByteBuffer)(value); break;
+ case 10: contentType = (java.lang.CharSequence)(value); break;
+ case 11: prevSignature = (java.nio.ByteBuffer)(value); break;
+ case 12: signature = (java.nio.ByteBuffer)(value); break;
+ case 13: title = (java.lang.CharSequence)(value); break;
+ case 14: text = (java.lang.CharSequence)(value); break;
+ case 15: parseStatus = (org.apache.nutch.storage.ParseStatus)(value); break;
+ case 16: score = (java.lang.Float)(value); break;
+ case 17: reprUrl = (java.lang.CharSequence)(value); break;
+ case 18: headers = (java.util.Map)((value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)value)); break;
+ case 19: outlinks = (java.util.Map)((value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)value)); break;
+ case 20: inlinks = (java.util.Map)((value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)value)); break;
+ case 21: markers = (java.util.Map)((value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)value)); break;
+ case 22: metadata = (java.util.Map)((value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)value)); break;
+ case 23: batchId = (java.lang.CharSequence)(value); break;
+ default: throw new org.apache.avro.AvroRuntimeException("Bad index");
}
}
/**
- * Gets the value of the 'baseUrl' field. The original associated with this
- * WebPage.
- */
+ * Gets the value of the 'baseUrl' field.
+ * The original associated with this WebPage. */
public java.lang.CharSequence getBaseUrl() {
return baseUrl;
}
/**
- * Sets the value of the 'baseUrl' field. The original associated with this
- * WebPage. * @param value the value to set.
+ * Sets the value of the 'baseUrl' field.
+ * The original associated with this WebPage. * @param value the value to set.
*/
public void setBaseUrl(java.lang.CharSequence value) {
this.baseUrl = value;
setDirty(0);
}
-
+
/**
- * Checks the dirty status of the 'baseUrl' field. A field is dirty if it
- * represents a change that has not yet been written to the database. The
- * original associated with this WebPage. * @param value the value to set.
+ * Checks the dirty status of the 'baseUrl' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * The original associated with this WebPage. * @param value the value to set.
*/
public boolean isBaseUrlDirty(java.lang.CharSequence value) {
return isDirty(0);
@@ -405,43 +258,24 @@
}
/**
- * Gets the value of the 'status' field. A crawl status associated with the
- * WebPage, can be of value STATUS_UNFETCHED - WebPage was not fetched yet,
- * STATUS_FETCHED - WebPage was successfully fetched, STATUS_GONE - WebPage no
- * longer exists, STATUS_REDIR_TEMP - WebPage temporarily redirects to other
- * page, STATUS_REDIR_PERM - WebPage permanently redirects to other page,
- * STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g. transient
- * errors and STATUS_NOTMODIFIED - fetching successful - page is not modified
- */
+ * Gets the value of the 'status' field.
+ * A crawl status associated with the WebPage, can be of value STATUS_UNFETCHED - WebPage was not fetched yet, STATUS_FETCHED - WebPage was successfully fetched, STATUS_GONE - WebPage no longer exists, STATUS_REDIR_TEMP - WebPage temporarily redirects to other page, STATUS_REDIR_PERM - WebPage permanently redirects to other page, STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g. transient errors and STATUS_NOTMODIFIED - fetching successful - page is not modified */
public java.lang.Integer getStatus() {
return status;
}
/**
- * Sets the value of the 'status' field. A crawl status associated with the
- * WebPage, can be of value STATUS_UNFETCHED - WebPage was not fetched yet,
- * STATUS_FETCHED - WebPage was successfully fetched, STATUS_GONE - WebPage no
- * longer exists, STATUS_REDIR_TEMP - WebPage temporarily redirects to other
- * page, STATUS_REDIR_PERM - WebPage permanently redirects to other page,
- * STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g. transient
- * errors and STATUS_NOTMODIFIED - fetching successful - page is not modified
- * * @param value the value to set.
+ * Sets the value of the 'status' field.
+ * A crawl status associated with the WebPage, can be of value STATUS_UNFETCHED - WebPage was not fetched yet, STATUS_FETCHED - WebPage was successfully fetched, STATUS_GONE - WebPage no longer exists, STATUS_REDIR_TEMP - WebPage temporarily redirects to other page, STATUS_REDIR_PERM - WebPage permanently redirects to other page, STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g. transient errors and STATUS_NOTMODIFIED - fetching successful - page is not modified * @param value the value to set.
*/
public void setStatus(java.lang.Integer value) {
this.status = value;
setDirty(1);
}
-
+
/**
- * Checks the dirty status of the 'status' field. A field is dirty if it
- * represents a change that has not yet been written to the database. A crawl
- * status associated with the WebPage, can be of value STATUS_UNFETCHED -
- * WebPage was not fetched yet, STATUS_FETCHED - WebPage was successfully
- * fetched, STATUS_GONE - WebPage no longer exists, STATUS_REDIR_TEMP -
- * WebPage temporarily redirects to other page, STATUS_REDIR_PERM - WebPage
- * permanently redirects to other page, STATUS_RETRY - Fetching unsuccessful,
- * needs to be retried e.g. transient errors and STATUS_NOTMODIFIED - fetching
- * successful - page is not modified * @param value the value to set.
+ * Checks the dirty status of the 'status' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * A crawl status associated with the WebPage, can be of value STATUS_UNFETCHED - WebPage was not fetched yet, STATUS_FETCHED - WebPage was successfully fetched, STATUS_GONE - WebPage no longer exists, STATUS_REDIR_TEMP - WebPage temporarily redirects to other page, STATUS_REDIR_PERM - WebPage permanently redirects to other page, STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g. transient errors and STATUS_NOTMODIFIED - fetching successful - page is not modified * @param value the value to set.
*/
public boolean isStatusDirty(java.lang.Integer value) {
return isDirty(1);
@@ -448,27 +282,24 @@
}
/**
- * Gets the value of the 'fetchTime' field. The system time in milliseconds
- * for when the page was fetched.
- */
+ * Gets the value of the 'fetchTime' field.
+ * The system time in milliseconds for when the page was fetched. */
public java.lang.Long getFetchTime() {
return fetchTime;
}
/**
- * Sets the value of the 'fetchTime' field. The system time in milliseconds
- * for when the page was fetched. * @param value the value to set.
+ * Sets the value of the 'fetchTime' field.
+ * The system time in milliseconds for when the page was fetched. * @param value the value to set.
*/
public void setFetchTime(java.lang.Long value) {
this.fetchTime = value;
setDirty(2);
}
-
+
/**
- * Checks the dirty status of the 'fetchTime' field. A field is dirty if it
- * represents a change that has not yet been written to the database. The
- * system time in milliseconds for when the page was fetched. * @param value
- * the value to set.
+ * Checks the dirty status of the 'fetchTime' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * The system time in milliseconds for when the page was fetched. * @param value the value to set.
*/
public boolean isFetchTimeDirty(java.lang.Long value) {
return isDirty(2);
@@ -475,32 +306,24 @@
}
/**
- * Gets the value of the 'prevFetchTime' field. The system time in
- * milliseconds for when the page was last fetched if it was previously
- * fetched which can be used to calculate time delta within a fetching
- * schedule implementation
- */
+ * Gets the value of the 'prevFetchTime' field.
+ * The system time in milliseconds for when the page was last fetched if it was previously fetched which can be used to calculate time delta within a fetching schedule implementation */
public java.lang.Long getPrevFetchTime() {
return prevFetchTime;
}
/**
- * Sets the value of the 'prevFetchTime' field. The system time in
- * milliseconds for when the page was last fetched if it was previously
- * fetched which can be used to calculate time delta within a fetching
- * schedule implementation * @param value the value to set.
+ * Sets the value of the 'prevFetchTime' field.
+ * The system time in milliseconds for when the page was last fetched if it was previously fetched which can be used to calculate time delta within a fetching schedule implementation * @param value the value to set.
*/
public void setPrevFetchTime(java.lang.Long value) {
this.prevFetchTime = value;
setDirty(3);
}
-
+
/**
- * Checks the dirty status of the 'prevFetchTime' field. A field is dirty if
- * it represents a change that has not yet been written to the database. The
- * system time in milliseconds for when the page was last fetched if it was
- * previously fetched which can be used to calculate time delta within a
- * fetching schedule implementation * @param value the value to set.
+ * Checks the dirty status of the 'prevFetchTime' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * The system time in milliseconds for when the page was last fetched if it was previously fetched which can be used to calculate time delta within a fetching schedule implementation * @param value the value to set.
*/
public boolean isPrevFetchTimeDirty(java.lang.Long value) {
return isDirty(3);
@@ -507,30 +330,24 @@
}
/**
- * Gets the value of the 'fetchInterval' field. The default number of seconds
- * between re-fetches of a page. The default is considered as 30 days unless a
- * custom fetch schedle is implemented.
- */
+ * Gets the value of the 'fetchInterval' field.
+ * The default number of seconds between re-fetches of a page. The default is considered as 30 days unless a custom fetch schedle is implemented. */
public java.lang.Integer getFetchInterval() {
return fetchInterval;
}
/**
- * Sets the value of the 'fetchInterval' field. The default number of seconds
- * between re-fetches of a page. The default is considered as 30 days unless a
- * custom fetch schedle is implemented. * @param value the value to set.
+ * Sets the value of the 'fetchInterval' field.
+ * The default number of seconds between re-fetches of a page. The default is considered as 30 days unless a custom fetch schedle is implemented. * @param value the value to set.
*/
public void setFetchInterval(java.lang.Integer value) {
this.fetchInterval = value;
setDirty(4);
}
-
+
/**
- * Checks the dirty status of the 'fetchInterval' field. A field is dirty if
- * it represents a change that has not yet been written to the database. The
- * default number of seconds between re-fetches of a page. The default is
- * considered as 30 days unless a custom fetch schedle is implemented. * @param
- * value the value to set.
+ * Checks the dirty status of the 'fetchInterval' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * The default number of seconds between re-fetches of a page. The default is considered as 30 days unless a custom fetch schedle is implemented. * @param value the value to set.
*/
public boolean isFetchIntervalDirty(java.lang.Integer value) {
return isDirty(4);
@@ -537,28 +354,24 @@
}
/**
- * Gets the value of the 'retriesSinceFetch' field. The number of retried
- * attempts at fetching the WebPage since it was last successfully fetched.
- */
+ * Gets the value of the 'retriesSinceFetch' field.
+ * The number of retried attempts at fetching the WebPage since it was last successfully fetched. */
public java.lang.Integer getRetriesSinceFetch() {
return retriesSinceFetch;
}
/**
- * Sets the value of the 'retriesSinceFetch' field. The number of retried
- * attempts at fetching the WebPage since it was last successfully fetched. * @param
- * value the value to set.
+ * Sets the value of the 'retriesSinceFetch' field.
+ * The number of retried attempts at fetching the WebPage since it was last successfully fetched. * @param value the value to set.
*/
public void setRetriesSinceFetch(java.lang.Integer value) {
this.retriesSinceFetch = value;
setDirty(5);
}
-
+
/**
- * Checks the dirty status of the 'retriesSinceFetch' field. A field is dirty
- * if it represents a change that has not yet been written to the database.
- * The number of retried attempts at fetching the WebPage since it was last
- * successfully fetched. * @param value the value to set.
+ * Checks the dirty status of the 'retriesSinceFetch' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * The number of retried attempts at fetching the WebPage since it was last successfully fetched. * @param value the value to set.
*/
public boolean isRetriesSinceFetchDirty(java.lang.Integer value) {
return isDirty(5);
@@ -565,34 +378,24 @@
}
/**
- * Gets the value of the 'modifiedTime' field. The system time in milliseconds
- * for when this WebPage was modified by the WebPage author, if this is not
- * available we default to the server for this information. This is important
- * to understand the changing nature of the WebPage.
- */
+ * Gets the value of the 'modifiedTime' field.
+ * The system time in milliseconds for when this WebPage was modified by the WebPage author, if this is not available we default to the server for this information. This is important to understand the changing nature of the WebPage. */
public java.lang.Long getModifiedTime() {
return modifiedTime;
}
/**
- * Sets the value of the 'modifiedTime' field. The system time in milliseconds
- * for when this WebPage was modified by the WebPage author, if this is not
- * available we default to the server for this information. This is important
- * to understand the changing nature of the WebPage. * @param value the value
- * to set.
+ * Sets the value of the 'modifiedTime' field.
+ * The system time in milliseconds for when this WebPage was modified by the WebPage author, if this is not available we default to the server for this information. This is important to understand the changing nature of the WebPage. * @param value the value to set.
*/
public void setModifiedTime(java.lang.Long value) {
this.modifiedTime = value;
setDirty(6);
}
-
+
/**
- * Checks the dirty status of the 'modifiedTime' field. A field is dirty if it
- * represents a change that has not yet been written to the database. The
- * system time in milliseconds for when this WebPage was modified by the
- * WebPage author, if this is not available we default to the server for this
- * information. This is important to understand the changing nature of the
- * WebPage. * @param value the value to set.
+ * Checks the dirty status of the 'modifiedTime' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * The system time in milliseconds for when this WebPage was modified by the WebPage author, if this is not available we default to the server for this information. This is important to understand the changing nature of the WebPage. * @param value the value to set.
*/
public boolean isModifiedTimeDirty(java.lang.Long value) {
return isDirty(6);
@@ -599,35 +402,24 @@
}
/**
- * Gets the value of the 'prevModifiedTime' field. The system time in
- * milliseconds for when this WebPage was previously modified by the author,
- * if this is not available then we default to the server for this
- * information. This is important to understand the changing nature of a
- * WebPage.
- */
+ * Gets the value of the 'prevModifiedTime' field.
+ * The system time in milliseconds for when this WebPage was previously modified by the author, if this is not available then we default to the server for this information. This is important to understand the changing nature of a WebPage. */
public java.lang.Long getPrevModifiedTime() {
return prevModifiedTime;
}
/**
- * Sets the value of the 'prevModifiedTime' field. The system time in
- * milliseconds for when this WebPage was previously modified by the author,
- * if this is not available then we default to the server for this
- * information. This is important to understand the changing nature of a
- * WebPage. * @param value the value to set.
+ * Sets the value of the 'prevModifiedTime' field.
+ * The system time in milliseconds for when this WebPage was previously modified by the author, if this is not available then we default to the server for this information. This is important to understand the changing nature of a WebPage. * @param value the value to set.
*/
public void setPrevModifiedTime(java.lang.Long value) {
this.prevModifiedTime = value;
setDirty(7);
}
-
+
/**
- * Checks the dirty status of the 'prevModifiedTime' field. A field is dirty
- * if it represents a change that has not yet been written to the database.
- * The system time in milliseconds for when this WebPage was previously
- * modified by the author, if this is not available then we default to the
- * server for this information. This is important to understand the changing
- * nature of a WebPage. * @param value the value to set.
+ * Checks the dirty status of the 'prevModifiedTime' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * The system time in milliseconds for when this WebPage was previously modified by the author, if this is not available then we default to the server for this information. This is important to understand the changing nature of a WebPage. * @param value the value to set.
*/
public boolean isPrevModifiedTimeDirty(java.lang.Long value) {
return isDirty(7);
@@ -642,48 +434,40 @@
/**
* Sets the value of the 'protocolStatus' field.
- *
- * @param value
- * the value to set.
+ * @param value the value to set.
*/
public void setProtocolStatus(org.apache.nutch.storage.ProtocolStatus value) {
this.protocolStatus = value;
setDirty(8);
}
-
+
/**
- * Checks the dirty status of the 'protocolStatus' field. A field is dirty if
- * it represents a change that has not yet been written to the database.
- *
- * @param value
- * the value to set.
+ * Checks the dirty status of the 'protocolStatus' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * @param value the value to set.
*/
- public boolean isProtocolStatusDirty(
- org.apache.nutch.storage.ProtocolStatus value) {
+ public boolean isProtocolStatusDirty(org.apache.nutch.storage.ProtocolStatus value) {
return isDirty(8);
}
/**
- * Gets the value of the 'content' field. The entire raw document content e.g.
- * raw XHTML
- */
+ * Gets the value of the 'content' field.
+ * The entire raw document content e.g. raw XHTML */
public java.nio.ByteBuffer getContent() {
return content;
}
/**
- * Sets the value of the 'content' field. The entire raw document content e.g.
- * raw XHTML * @param value the value to set.
+ * Sets the value of the 'content' field.
+ * The entire raw document content e.g. raw XHTML * @param value the value to set.
*/
public void setContent(java.nio.ByteBuffer value) {
this.content = value;
setDirty(9);
}
-
+
/**
- * Checks the dirty status of the 'content' field. A field is dirty if it
- * represents a change that has not yet been written to the database. The
- * entire raw document content e.g. raw XHTML * @param value the value to set.
+ * Checks the dirty status of the 'content' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * The entire raw document content e.g. raw XHTML * @param value the value to set.
*/
public boolean isContentDirty(java.nio.ByteBuffer value) {
return isDirty(9);
@@ -690,44 +474,24 @@
}
/**
- * Gets the value of the 'contentType' field. The type of the content
- * contained within the document itself. ContentType is an alias for MimeType.
- * Historically, this parameter was only called MimeType, but since this is
- * actually the value included in the HTTP Content-Type header, it can also
- * include the character set encoding, which makes it more than just a
- * MimeType specification. If MimeType is specified e.g. not None, that value
- * is used. Otherwise, ContentType is used. If neither is given, the
- * DEFAULT_CONTENT_TYPE setting is used.
- */
+ * Gets the value of the 'contentType' field.
+ * The type of the content contained within the document itself. ContentType is an alias for MimeType. Historically, this parameter was only called MimeType, but since this is actually the value included in the HTTP Content-Type header, it can also include the character set encoding, which makes it more than just a MimeType specification. If MimeType is specified e.g. not None, that value is used. Otherwise, ContentType is used. If neither is given, the DEFAULT_CONTENT_TYPE setting is used. */
public java.lang.CharSequence getContentType() {
return contentType;
}
/**
- * Sets the value of the 'contentType' field. The type of the content
- * contained within the document itself. ContentType is an alias for MimeType.
- * Historically, this parameter was only called MimeType, but since this is
- * actually the value included in the HTTP Content-Type header, it can also
- * include the character set encoding, which makes it more than just a
- * MimeType specification. If MimeType is specified e.g. not None, that value
- * is used. Otherwise, ContentType is used. If neither is given, the
- * DEFAULT_CONTENT_TYPE setting is used. * @param value the value to set.
+ * Sets the value of the 'contentType' field.
+ * The type of the content contained within the document itself. ContentType is an alias for MimeType. Historically, this parameter was only called MimeType, but since this is actually the value included in the HTTP Content-Type header, it can also include the character set encoding, which makes it more than just a MimeType specification. If MimeType is specified e.g. not None, that value is used. Otherwise, ContentType is used. If neither is given, the DEFAULT_CONTENT_TYPE setting is used. * @param value the value to set.
*/
public void setContentType(java.lang.CharSequence value) {
this.contentType = value;
setDirty(10);
}
-
+
/**
- * Checks the dirty status of the 'contentType' field. A field is dirty if it
- * represents a change that has not yet been written to the database. The type
- * of the content contained within the document itself. ContentType is an
- * alias for MimeType. Historically, this parameter was only called MimeType,
- * but since this is actually the value included in the HTTP Content-Type
- * header, it can also include the character set encoding, which makes it more
- * than just a MimeType specification. If MimeType is specified e.g. not None,
- * that value is used. Otherwise, ContentType is used. If neither is given,
- * the DEFAULT_CONTENT_TYPE setting is used. * @param value the value to set.
+ * Checks the dirty status of the 'contentType' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * The type of the content contained within the document itself. ContentType is an alias for MimeType. Historically, this parameter was only called MimeType, but since this is actually the value included in the HTTP Content-Type header, it can also include the character set encoding, which makes it more than just a MimeType specification. If MimeType is specified e.g. not None, that value is used. Otherwise, ContentType is used. If neither is given, the DEFAULT_CONTENT_TYPE setting is used. * @param value the value to set.
*/
public boolean isContentTypeDirty(java.lang.CharSequence value) {
return isDirty(10);
@@ -734,33 +498,24 @@
}
/**
- * Gets the value of the 'prevSignature' field. An implementation of a
- * WebPage's previous signature from which it can be identified and referenced
- * at any point in time. This can be used to uniquely identify WebPage deltas
- * based on page fingerprints.
- */
+ * Gets the value of the 'prevSignature' field.
+ * An implementation of a WebPage's previous signature from which it can be identified and referenced at any point in time. This can be used to uniquely identify WebPage deltas based on page fingerprints. */
public java.nio.ByteBuffer getPrevSignature() {
return prevSignature;
}
/**
- * Sets the value of the 'prevSignature' field. An implementation of a
- * WebPage's previous signature from which it can be identified and referenced
- * at any point in time. This can be used to uniquely identify WebPage deltas
- * based on page fingerprints. * @param value the value to set.
+ * Sets the value of the 'prevSignature' field.
+ * An implementation of a WebPage's previous signature from which it can be identified and referenced at any point in time. This can be used to uniquely identify WebPage deltas based on page fingerprints. * @param value the value to set.
*/
public void setPrevSignature(java.nio.ByteBuffer value) {
this.prevSignature = value;
setDirty(11);
}
-
+
/**
- * Checks the dirty status of the 'prevSignature' field. A field is dirty if
- * it represents a change that has not yet been written to the database. An
- * implementation of a WebPage's previous signature from which it can be
- * identified and referenced at any point in time. This can be used to
- * uniquely identify WebPage deltas based on page fingerprints. * @param value
- * the value to set.
+ * Checks the dirty status of the 'prevSignature' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * An implementation of a WebPage's previous signature from which it can be identified and referenced at any point in time. This can be used to uniquely identify WebPage deltas based on page fingerprints. * @param value the value to set.
*/
public boolean isPrevSignatureDirty(java.nio.ByteBuffer value) {
return isDirty(11);
@@ -767,33 +522,24 @@
}
/**
- * Gets the value of the 'signature' field. An implementation of a WebPage's
- * signature from which it can be identified and referenced at any point in
- * time. This is essentially the WebPage's fingerprint represnting its state
- * for any point in time.
- */
+ * Gets the value of the 'signature' field.
+ * An implementation of a WebPage's signature from which it can be identified and referenced at any point in time. This is essentially the WebPage's fingerprint represnting its state for any point in time. */
public java.nio.ByteBuffer getSignature() {
return signature;
}
/**
- * Sets the value of the 'signature' field. An implementation of a WebPage's
- * signature from which it can be identified and referenced at any point in
- * time. This is essentially the WebPage's fingerprint represnting its state
- * for any point in time. * @param value the value to set.
+ * Sets the value of the 'signature' field.
+ * An implementation of a WebPage's signature from which it can be identified and referenced at any point in time. This is essentially the WebPage's fingerprint represnting its state for any point in time. * @param value the value to set.
*/
public void setSignature(java.nio.ByteBuffer value) {
this.signature = value;
setDirty(12);
}
-
+
/**
- * Checks the dirty status of the 'signature' field. A field is dirty if it
- * represents a change that has not yet been written to the database. An
- * implementation of a WebPage's signature from which it can be identified and
- * referenced at any point in time. This is essentially the WebPage's
- * fingerprint represnting its state for any point in time. * @param value the
- * value to set.
+ * Checks the dirty status of the 'signature' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * An implementation of a WebPage's signature from which it can be identified and referenced at any point in time. This is essentially the WebPage's fingerprint represnting its state for any point in time. * @param value the value to set.
*/
public boolean isSignatureDirty(java.nio.ByteBuffer value) {
return isDirty(12);
@@ -800,25 +546,24 @@
}
/**
- * Gets the value of the 'title' field. The title of the WebPage.
- */
+ * Gets the value of the 'title' field.
+ * The title of the WebPage. */
public java.lang.CharSequence getTitle() {
return title;
}
/**
- * Sets the value of the 'title' field. The title of the WebPage. * @param
- * value the value to set.
+ * Sets the value of the 'title' field.
+ * The title of the WebPage. * @param value the value to set.
*/
public void setTitle(java.lang.CharSequence value) {
this.title = value;
setDirty(13);
}
-
+
/**
- * Checks the dirty status of the 'title' field. A field is dirty if it
- * represents a change that has not yet been written to the database. The
- * title of the WebPage. * @param value the value to set.
+ * Checks the dirty status of the 'title' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * The title of the WebPage. * @param value the value to set.
*/
public boolean isTitleDirty(java.lang.CharSequence value) {
return isDirty(13);
@@ -825,27 +570,24 @@
}
/**
- * Gets the value of the 'text' field. The textual content of the WebPage
- * devoid from native markup.
- */
+ * Gets the value of the 'text' field.
+ * The textual content of the WebPage devoid from native markup. */
public java.lang.CharSequence getText() {
return text;
}
/**
- * Sets the value of the 'text' field. The textual content of the WebPage
- * devoid from native markup. * @param value the value to set.
+ * Sets the value of the 'text' field.
+ * The textual content of the WebPage devoid from native markup. * @param value the value to set.
*/
public void setText(java.lang.CharSequence value) {
this.text = value;
setDirty(14);
}
-
+
/**
- * Checks the dirty status of the 'text' field. A field is dirty if it
- * represents a change that has not yet been written to the database. The
- * textual content of the WebPage devoid from native markup. * @param value
- * the value to set.
+ * Checks the dirty status of the 'text' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * The textual content of the WebPage devoid from native markup. * @param value the value to set.
*/
public boolean isTextDirty(java.lang.CharSequence value) {
return isDirty(14);
@@ -860,21 +602,16 @@
/**
* Sets the value of the 'parseStatus' field.
- *
- * @param value
- * the value to set.
+ * @param value the value to set.
*/
public void setParseStatus(org.apache.nutch.storage.ParseStatus value) {
this.parseStatus = value;
setDirty(15);
}
-
+
/**
- * Checks the dirty status of the 'parseStatus' field. A field is dirty if it
- * represents a change that has not yet been written to the database.
- *
- * @param value
- * the value to set.
+ * Checks the dirty status of the 'parseStatus' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * @param value the value to set.
*/
public boolean isParseStatusDirty(org.apache.nutch.storage.ParseStatus value) {
return isDirty(15);
@@ -881,30 +618,24 @@
}
/**
- * Gets the value of the 'score' field. A score used to determine a WebPage's
- * relevance within the web graph it is part of. This score may change over
- * time based on graph characteristics.
- */
+ * Gets the value of the 'score' field.
+ * A score used to determine a WebPage's relevance within the web graph it is part of. This score may change over time based on graph characteristics. */
public java.lang.Float getScore() {
return score;
}
/**
- * Sets the value of the 'score' field. A score used to determine a WebPage's
- * relevance within the web graph it is part of. This score may change over
- * time based on graph characteristics. * @param value the value to set.
+ * Sets the value of the 'score' field.
+ * A score used to determine a WebPage's relevance within the web graph it is part of. This score may change over time based on graph characteristics. * @param value the value to set.
*/
public void setScore(java.lang.Float value) {
this.score = value;
setDirty(16);
}
-
+
/**
- * Checks the dirty status of the 'score' field. A field is dirty if it
- * represents a change that has not yet been written to the database. A score
- * used to determine a WebPage's relevance within the web graph it is part of.
- * This score may change over time based on graph characteristics. * @param
- * value the value to set.
+ * Checks the dirty status of the 'score' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * A score used to determine a WebPage's relevance within the web graph it is part of. This score may change over time based on graph characteristics. * @param value the value to set.
*/
public boolean isScoreDirty(java.lang.Float value) {
return isDirty(16);
@@ -911,33 +642,24 @@
}
/**
- * Gets the value of the 'reprUrl' field. In the case where we are given two
- * urls, a source and a destination of a redirect, we should determine and
- * persist the representative url. The logic used to determine this is based
- * largely on Yahoo!'s Slurp Crawler
- */
+ * Gets the value of the 'reprUrl' field.
+ * In the case where we are given two urls, a source and a destination of a redirect, we should determine and persist the representative url. The logic used to determine this is based largely on Yahoo!'s Slurp Crawler */
public java.lang.CharSequence getReprUrl() {
return reprUrl;
}
/**
- * Sets the value of the 'reprUrl' field. In the case where we are given two
- * urls, a source and a destination of a redirect, we should determine and
- * persist the representative url. The logic used to determine this is based
- * largely on Yahoo!'s Slurp Crawler * @param value the value to set.
+ * Sets the value of the 'reprUrl' field.
+ * In the case where we are given two urls, a source and a destination of a redirect, we should determine and persist the representative url. The logic used to determine this is based largely on Yahoo!'s Slurp Crawler * @param value the value to set.
*/
public void setReprUrl(java.lang.CharSequence value) {
this.reprUrl = value;
setDirty(17);
}
-
+
/**
- * Checks the dirty status of the 'reprUrl' field. A field is dirty if it
- * represents a change that has not yet been written to the database. In the
- * case where we are given two urls, a source and a destination of a redirect,
- * we should determine and persist the representative url. The logic used to
- * determine this is based largely on Yahoo!'s Slurp Crawler * @param value
- * the value to set.
+ * Checks the dirty status of the 'reprUrl' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * In the case where we are given two urls, a source and a destination of a redirect, we should determine and persist the representative url. The logic used to determine this is based largely on Yahoo!'s Slurp Crawler * @param value the value to set.
*/
public boolean isReprUrlDirty(java.lang.CharSequence value) {
return isDirty(17);
@@ -944,208 +666,144 @@
}
/**
- * Gets the value of the 'headers' field. Header information returned from the
- * web server used to server the content which is subsequently fetched from.
- * This includes keys such as TRANSFER_ENCODING, CONTENT_ENCODING,
- * CONTENT_LANGUAGE, CONTENT_LENGTH, CONTENT_LOCATION, CONTENT_DISPOSITION,
- * CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED and LOCATION.
- */
- public java.util.Map getHeaders() {
+ * Gets the value of the 'headers' field.
+ * Header information returned from the web server used to server the content which is subsequently fetched from. This includes keys such as TRANSFER_ENCODING, CONTENT_ENCODING, CONTENT_LANGUAGE, CONTENT_LENGTH, CONTENT_LOCATION, CONTENT_DISPOSITION, CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED and LOCATION. */
+ public java.util.Map getHeaders() {
return headers;
}
/**
- * Sets the value of the 'headers' field. Header information returned from the
- * web server used to server the content which is subsequently fetched from.
- * This includes keys such as TRANSFER_ENCODING, CONTENT_ENCODING,
- * CONTENT_LANGUAGE, CONTENT_LENGTH, CONTENT_LOCATION, CONTENT_DISPOSITION,
- * CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED and LOCATION. * @param value the
- * value to set.
+ * Sets the value of the 'headers' field.
+ * Header information returned from the web server used to server the content which is subsequently fetched from. This includes keys such as TRANSFER_ENCODING, CONTENT_ENCODING, CONTENT_LANGUAGE, CONTENT_LENGTH, CONTENT_LOCATION, CONTENT_DISPOSITION, CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED and LOCATION. * @param value the value to set.
*/
- public void setHeaders(
- java.util.Map value) {
- this.headers = (value instanceof org.apache.gora.persistency.Dirtyable) ? value
- : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
+ public void setHeaders(java.util.Map value) {
+ this.headers = (value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
setDirty(18);
}
-
+
/**
- * Checks the dirty status of the 'headers' field. A field is dirty if it
- * represents a change that has not yet been written to the database. Header
- * information returned from the web server used to server the content which
- * is subsequently fetched from. This includes keys such as TRANSFER_ENCODING,
- * CONTENT_ENCODING, CONTENT_LANGUAGE, CONTENT_LENGTH, CONTENT_LOCATION,
- * CONTENT_DISPOSITION, CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED and LOCATION.
- * * @param value the value to set.
+ * Checks the dirty status of the 'headers' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * Header information returned from the web server used to server the content which is subsequently fetched from. This includes keys such as TRANSFER_ENCODING, CONTENT_ENCODING, CONTENT_LANGUAGE, CONTENT_LENGTH, CONTENT_LOCATION, CONTENT_DISPOSITION, CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED and LOCATION. * @param value the value to set.
*/
- public boolean isHeadersDirty(
- java.util.Map value) {
+ public boolean isHeadersDirty(java.util.Map value) {
return isDirty(18);
}
/**
- * Gets the value of the 'outlinks' field. Embedded hyperlinks which direct
- * outside of the current domain.
- */
- public java.util.Map getOutlinks() {
+ * Gets the value of the 'outlinks' field.
+ * Embedded hyperlinks which direct outside of the current domain. */
+ public java.util.Map getOutlinks() {
return outlinks;
}
/**
- * Sets the value of the 'outlinks' field. Embedded hyperlinks which direct
- * outside of the current domain. * @param value the value to set.
+ * Sets the value of the 'outlinks' field.
+ * Embedded hyperlinks which direct outside of the current domain. * @param value the value to set.
*/
- public void setOutlinks(
- java.util.Map value) {
- this.outlinks = (value instanceof org.apache.gora.persistency.Dirtyable) ? value
- : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
+ public void setOutlinks(java.util.Map value) {
+ this.outlinks = (value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
setDirty(19);
}
-
+
/**
- * Checks the dirty status of the 'outlinks' field. A field is dirty if it
- * represents a change that has not yet been written to the database. Embedded
- * hyperlinks which direct outside of the current domain. * @param value the
- * value to set.
+ * Checks the dirty status of the 'outlinks' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * Embedded hyperlinks which direct outside of the current domain. * @param value the value to set.
*/
- public boolean isOutlinksDirty(
- java.util.Map value) {
+ public boolean isOutlinksDirty(java.util.Map value) {
return isDirty(19);
}
/**
- * Gets the value of the 'inlinks' field. Embedded hyperlinks which link to
- * pages within the current domain.
- */
- public java.util.Map getInlinks() {
+ * Gets the value of the 'inlinks' field.
+ * Embedded hyperlinks which link to pages within the current domain. */
+ public java.util.Map getInlinks() {
return inlinks;
}
/**
- * Sets the value of the 'inlinks' field. Embedded hyperlinks which link to
- * pages within the current domain. * @param value the value to set.
+ * Sets the value of the 'inlinks' field.
+ * Embedded hyperlinks which link to pages within the current domain. * @param value the value to set.
*/
- public void setInlinks(
- java.util.Map value) {
- this.inlinks = (value instanceof org.apache.gora.persistency.Dirtyable) ? value
- : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
+ public void setInlinks(java.util.Map value) {
+ this.inlinks = (value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
setDirty(20);
}
-
+
/**
- * Checks the dirty status of the 'inlinks' field. A field is dirty if it
- * represents a change that has not yet been written to the database. Embedded
- * hyperlinks which link to pages within the current domain. * @param value
- * the value to set.
+ * Checks the dirty status of the 'inlinks' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * Embedded hyperlinks which link to pages within the current domain. * @param value the value to set.
*/
- public boolean isInlinksDirty(
- java.util.Map value) {
+ public boolean isInlinksDirty(java.util.Map value) {
return isDirty(20);
}
/**
- * Gets the value of the 'markers' field. Markers flags which represent user
- * and machine decisions which have affected influenced a WebPage's current
- * state. Markers can be system specific and user machine driven in nature.
- * They are assigned to a WebPage on a job-by-job basis and thier values
- * indicative of what actions should be associated with a WebPage.
- */
- public java.util.Map getMarkers() {
+ * Gets the value of the 'markers' field.
+ * Markers flags which represent user and machine decisions which have affected influenced a WebPage's current state. Markers can be system specific and user machine driven in nature. They are assigned to a WebPage on a job-by-job basis and thier values indicative of what actions should be associated with a WebPage. */
+ public java.util.Map getMarkers() {
return markers;
}
/**
- * Sets the value of the 'markers' field. Markers flags which represent user
- * and machine decisions which have affected influenced a WebPage's current
- * state. Markers can be system specific and user machine driven in nature.
- * They are assigned to a WebPage on a job-by-job basis and thier values
- * indicative of what actions should be associated with a WebPage. * @param
- * value the value to set.
+ * Sets the value of the 'markers' field.
+ * Markers flags which represent user and machine decisions which have affected influenced a WebPage's current state. Markers can be system specific and user machine driven in nature. They are assigned to a WebPage on a job-by-job basis and thier values indicative of what actions should be associated with a WebPage. * @param value the value to set.
*/
- public void setMarkers(
- java.util.Map value) {
- this.markers = (value instanceof org.apache.gora.persistency.Dirtyable) ? value
- : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
+ public void setMarkers(java.util.Map value) {
+ this.markers = (value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
setDirty(21);
}
-
+
/**
- * Checks the dirty status of the 'markers' field. A field is dirty if it
- * represents a change that has not yet been written to the database. Markers
- * flags which represent user and machine decisions which have affected
- * influenced a WebPage's current state. Markers can be system specific and
- * user machine driven in nature. They are assigned to a WebPage on a
- * job-by-job basis and thier values indicative of what actions should be
- * associated with a WebPage. * @param value the value to set.
+ * Checks the dirty status of the 'markers' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * Markers flags which represent user and machine decisions which have affected influenced a WebPage's current state. Markers can be system specific and user machine driven in nature. They are assigned to a WebPage on a job-by-job basis and thier values indicative of what actions should be associated with a WebPage. * @param value the value to set.
*/
- public boolean isMarkersDirty(
- java.util.Map value) {
+ public boolean isMarkersDirty(java.util.Map value) {
return isDirty(21);
}
/**
- * Gets the value of the 'metadata' field. A multi-valued metadata container
- * used for storing everything from structured WebPage characterists, to
- * ad-hoc extraction and metadata augmentation for any given WebPage.
- */
- public java.util.Map getMetadata() {
+ * Gets the value of the 'metadata' field.
+ * A multi-valued metadata container used for storing everything from structured WebPage characterists, to ad-hoc extraction and metadata augmentation for any given WebPage. */
+ public java.util.Map getMetadata() {
return metadata;
}
/**
- * Sets the value of the 'metadata' field. A multi-valued metadata container
- * used for storing everything from structured WebPage characterists, to
- * ad-hoc extraction and metadata augmentation for any given WebPage. * @param
- * value the value to set.
+ * Sets the value of the 'metadata' field.
+ * A multi-valued metadata container used for storing everything from structured WebPage characterists, to ad-hoc extraction and metadata augmentation for any given WebPage. * @param value the value to set.
*/
- public void setMetadata(
- java.util.Map value) {
- this.metadata = (value instanceof org.apache.gora.persistency.Dirtyable) ? value
- : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
+ public void setMetadata(java.util.Map value) {
+ this.metadata = (value instanceof org.apache.gora.persistency.Dirtyable) ? value : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
setDirty(22);
}
-
+
/**
- * Checks the dirty status of the 'metadata' field. A field is dirty if it
- * represents a change that has not yet been written to the database. A
- * multi-valued metadata container used for storing everything from structured
- * WebPage characterists, to ad-hoc extraction and metadata augmentation for
- * any given WebPage. * @param value the value to set.
+ * Checks the dirty status of the 'metadata' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * A multi-valued metadata container used for storing everything from structured WebPage characterists, to ad-hoc extraction and metadata augmentation for any given WebPage. * @param value the value to set.
*/
- public boolean isMetadataDirty(
- java.util.Map value) {
+ public boolean isMetadataDirty(java.util.Map value) {
return isDirty(22);
}
/**
- * Gets the value of the 'batchId' field. A batchId that this WebPage is
- * assigned to. WebPage's are fetched in batches, called fetchlists. Pages are
- * partitioned but can always be associated and fetched alongside pages of
- * similar value (within a crawl cycle) based on batchId.
- */
+ * Gets the value of the 'batchId' field.
+ * A batchId that this WebPage is assigned to. WebPage's are fetched in batches, called fetchlists. Pages are partitioned but can always be associated and fetched alongside pages of similar value (within a crawl cycle) based on batchId. */
public java.lang.CharSequence getBatchId() {
return batchId;
}
/**
- * Sets the value of the 'batchId' field. A batchId that this WebPage is
- * assigned to. WebPage's are fetched in batches, called fetchlists. Pages are
- * partitioned but can always be associated and fetched alongside pages of
- * similar value (within a crawl cycle) based on batchId. * @param value the
- * value to set.
+ * Sets the value of the 'batchId' field.
+ * A batchId that this WebPage is assigned to. WebPage's are fetched in batches, called fetchlists. Pages are partitioned but can always be associated and fetched alongside pages of similar value (within a crawl cycle) based on batchId. * @param value the value to set.
*/
public void setBatchId(java.lang.CharSequence value) {
this.batchId = value;
setDirty(23);
}
-
+
/**
- * Checks the dirty status of the 'batchId' field. A field is dirty if it
- * represents a change that has not yet been written to the database. A
- * batchId that this WebPage is assigned to. WebPage's are fetched in batches,
- * called fetchlists. Pages are partitioned but can always be associated and
- * fetched alongside pages of similar value (within a crawl cycle) based on
- * batchId. * @param value the value to set.
+ * Checks the dirty status of the 'batchId' field. A field is dirty if it represents a change that has not yet been written to the database.
+ * A batchId that this WebPage is assigned to. WebPage's are fetched in batches, called fetchlists. Pages are partitioned but can always be associated and fetched alongside pages of similar value (within a crawl cycle) based on batchId. * @param value the value to set.
*/
public boolean isBatchIdDirty(java.lang.CharSequence value) {
return isDirty(23);
@@ -1155,21 +813,17 @@
public static org.apache.nutch.storage.WebPage.Builder newBuilder() {
return new org.apache.nutch.storage.WebPage.Builder();
}
-
+
/** Creates a new WebPage RecordBuilder by copying an existing Builder */
- public static org.apache.nutch.storage.WebPage.Builder newBuilder(
- org.apache.nutch.storage.WebPage.Builder other) {
+ public static org.apache.nutch.storage.WebPage.Builder newBuilder(org.apache.nutch.storage.WebPage.Builder other) {
return new org.apache.nutch.storage.WebPage.Builder(other);
}
-
- /**
- * Creates a new WebPage RecordBuilder by copying an existing WebPage instance
- */
- public static org.apache.nutch.storage.WebPage.Builder newBuilder(
- org.apache.nutch.storage.WebPage other) {
+
+ /** Creates a new WebPage RecordBuilder by copying an existing WebPage instance */
+ public static org.apache.nutch.storage.WebPage.Builder newBuilder(org.apache.nutch.storage.WebPage other) {
return new org.apache.nutch.storage.WebPage.Builder(other);
}
-
+
private static java.nio.ByteBuffer deepCopyToReadOnlyBuffer(
java.nio.ByteBuffer input) {
java.nio.ByteBuffer copy = java.nio.ByteBuffer.allocate(input.capacity());
@@ -1192,13 +846,12 @@
copy.limit(limit);
return copy.asReadOnlyBuffer();
}
-
+
/**
* RecordBuilder for WebPage instances.
*/
- public static class Builder extends
- org.apache.avro.specific.SpecificRecordBuilderBase implements
- org.apache.avro.data.RecordBuilder {
+ public static class Builder extends org.apache.avro.specific.SpecificRecordBuilderBase
+ implements org.apache.avro.data.RecordBuilder {
private java.lang.CharSequence baseUrl;
private int status;
@@ -1218,11 +871,11 @@
private org.apache.nutch.storage.ParseStatus parseStatus;
private float score;
private java.lang.CharSequence reprUrl;
- private java.util.Map headers;
- private java.util.Map outlinks;
- private java.util.Map inlinks;
- private java.util.Map markers;
- private java.util.Map metadata;
+ private java.util.Map headers;
+ private java.util.Map outlinks;
+ private java.util.Map inlinks;
+ private java.util.Map markers;
+ private java.util.Map metadata;
private java.lang.CharSequence batchId;
/** Creates a new Builder */
@@ -1229,133 +882,109 @@
private Builder() {
super(org.apache.nutch.storage.WebPage.SCHEMA$);
}
-
+
/** Creates a Builder by copying an existing Builder */
private Builder(org.apache.nutch.storage.WebPage.Builder other) {
super(other);
}
-
+
/** Creates a Builder by copying an existing WebPage instance */
private Builder(org.apache.nutch.storage.WebPage other) {
- super(org.apache.nutch.storage.WebPage.SCHEMA$);
+ super(org.apache.nutch.storage.WebPage.SCHEMA$);
if (isValidValue(fields()[0], other.baseUrl)) {
- this.baseUrl = (java.lang.CharSequence) data().deepCopy(
- fields()[0].schema(), other.baseUrl);
+ this.baseUrl = (java.lang.CharSequence) data().deepCopy(fields()[0].schema(), other.baseUrl);
fieldSetFlags()[0] = true;
}
if (isValidValue(fields()[1], other.status)) {
- this.status = (java.lang.Integer) data().deepCopy(fields()[1].schema(),
- other.status);
+ this.status = (java.lang.Integer) data().deepCopy(fields()[1].schema(), other.status);
fieldSetFlags()[1] = true;
}
if (isValidValue(fields()[2], other.fetchTime)) {
- this.fetchTime = (java.lang.Long) data().deepCopy(fields()[2].schema(),
- other.fetchTime);
+ this.fetchTime = (java.lang.Long) data().deepCopy(fields()[2].schema(), other.fetchTime);
fieldSetFlags()[2] = true;
}
if (isValidValue(fields()[3], other.prevFetchTime)) {
- this.prevFetchTime = (java.lang.Long) data().deepCopy(
- fields()[3].schema(), other.prevFetchTime);
+ this.prevFetchTime = (java.lang.Long) data().deepCopy(fields()[3].schema(), other.prevFetchTime);
fieldSetFlags()[3] = true;
}
if (isValidValue(fields()[4], other.fetchInterval)) {
- this.fetchInterval = (java.lang.Integer) data().deepCopy(
- fields()[4].schema(), other.fetchInterval);
+ this.fetchInterval = (java.lang.Integer) data().deepCopy(fields()[4].schema(), other.fetchInterval);
fieldSetFlags()[4] = true;
}
if (isValidValue(fields()[5], other.retriesSinceFetch)) {
- this.retriesSinceFetch = (java.lang.Integer) data().deepCopy(
- fields()[5].schema(), other.retriesSinceFetch);
+ this.retriesSinceFetch = (java.lang.Integer) data().deepCopy(fields()[5].schema(), other.retriesSinceFetch);
fieldSetFlags()[5] = true;
}
if (isValidValue(fields()[6], other.modifiedTime)) {
- this.modifiedTime = (java.lang.Long) data().deepCopy(
- fields()[6].schema(), other.modifiedTime);
+ this.modifiedTime = (java.lang.Long) data().deepCopy(fields()[6].schema(), other.modifiedTime);
fieldSetFlags()[6] = true;
}
if (isValidValue(fields()[7], other.prevModifiedTime)) {
- this.prevModifiedTime = (java.lang.Long) data().deepCopy(
- fields()[7].schema(), other.prevModifiedTime);
+ this.prevModifiedTime = (java.lang.Long) data().deepCopy(fields()[7].schema(), other.prevModifiedTime);
fieldSetFlags()[7] = true;
}
if (isValidValue(fields()[8], other.protocolStatus)) {
- this.protocolStatus = (org.apache.nutch.storage.ProtocolStatus) data()
- .deepCopy(fields()[8].schema(), other.protocolStatus);
+ this.protocolStatus = (org.apache.nutch.storage.ProtocolStatus) data().deepCopy(fields()[8].schema(), other.protocolStatus);
fieldSetFlags()[8] = true;
}
if (isValidValue(fields()[9], other.content)) {
- this.content = (java.nio.ByteBuffer) data().deepCopy(
- fields()[9].schema(), other.content);
+ this.content = (java.nio.ByteBuffer) data().deepCopy(fields()[9].schema(), other.content);
fieldSetFlags()[9] = true;
}
if (isValidValue(fields()[10], other.contentType)) {
- this.contentType = (java.lang.CharSequence) data().deepCopy(
- fields()[10].schema(), other.contentType);
+ this.contentType = (java.lang.CharSequence) data().deepCopy(fields()[10].schema(), other.contentType);
fieldSetFlags()[10] = true;
}
if (isValidValue(fields()[11], other.prevSignature)) {
- this.prevSignature = (java.nio.ByteBuffer) data().deepCopy(
- fields()[11].schema(), other.prevSignature);
+ this.prevSignature = (java.nio.ByteBuffer) data().deepCopy(fields()[11].schema(), other.prevSignature);
fieldSetFlags()[11] = true;
}
if (isValidValue(fields()[12], other.signature)) {
- this.signature = (java.nio.ByteBuffer) data().deepCopy(
- fields()[12].schema(), other.signature);
+ this.signature = (java.nio.ByteBuffer) data().deepCopy(fields()[12].schema(), other.signature);
fieldSetFlags()[12] = true;
}
if (isValidValue(fields()[13], other.title)) {
- this.title = (java.lang.CharSequence) data().deepCopy(
- fields()[13].schema(), other.title);
+ this.title = (java.lang.CharSequence) data().deepCopy(fields()[13].schema(), other.title);
fieldSetFlags()[13] = true;
}
if (isValidValue(fields()[14], other.text)) {
- this.text = (java.lang.CharSequence) data().deepCopy(
- fields()[14].schema(), other.text);
+ this.text = (java.lang.CharSequence) data().deepCopy(fields()[14].schema(), other.text);
fieldSetFlags()[14] = true;
}
if (isValidValue(fields()[15], other.parseStatus)) {
- this.parseStatus = (org.apache.nutch.storage.ParseStatus) data()
- .deepCopy(fields()[15].schema(), other.parseStatus);
+ this.parseStatus = (org.apache.nutch.storage.ParseStatus) data().deepCopy(fields()[15].schema(), other.parseStatus);
fieldSetFlags()[15] = true;
}
if (isValidValue(fields()[16], other.score)) {
- this.score = (java.lang.Float) data().deepCopy(fields()[16].schema(),
- other.score);
+ this.score = (java.lang.Float) data().deepCopy(fields()[16].schema(), other.score);
fieldSetFlags()[16] = true;
}
if (isValidValue(fields()[17], other.reprUrl)) {
- this.reprUrl = (java.lang.CharSequence) data().deepCopy(
- fields()[17].schema(), other.reprUrl);
+ this.reprUrl = (java.lang.CharSequence) data().deepCopy(fields()[17].schema(), other.reprUrl);
fieldSetFlags()[17] = true;
}
if (isValidValue(fields()[18], other.headers)) {
- this.headers = (java.util.Map) data()
- .deepCopy(fields()[18].schema(), other.headers);
+ this.headers = (java.util.Map) data().deepCopy(fields()[18].schema(), other.headers);
fieldSetFlags()[18] = true;
}
if (isValidValue(fields()[19], other.outlinks)) {
- this.outlinks = (java.util.Map) data()
- .deepCopy(fields()[19].schema(), other.outlinks);
+ this.outlinks = (java.util.Map) data().deepCopy(fields()[19].schema(), other.outlinks);
fieldSetFlags()[19] = true;
}
if (isValidValue(fields()[20], other.inlinks)) {
- this.inlinks = (java.util.Map) data()
- .deepCopy(fields()[20].schema(), other.inlinks);
+ this.inlinks = (java.util.Map) data().deepCopy(fields()[20].schema(), other.inlinks);
fieldSetFlags()[20] = true;
}
if (isValidValue(fields()[21], other.markers)) {
- this.markers = (java.util.Map) data()
- .deepCopy(fields()[21].schema(), other.markers);
+ this.markers = (java.util.Map) data().deepCopy(fields()[21].schema(), other.markers);
fieldSetFlags()[21] = true;
}
if (isValidValue(fields()[22], other.metadata)) {
- this.metadata = (java.util.Map) data()
- .deepCopy(fields()[22].schema(), other.metadata);
+ this.metadata = (java.util.Map) data().deepCopy(fields()[22].schema(), other.metadata);
fieldSetFlags()[22] = true;
}
if (isValidValue(fields()[23], other.batchId)) {
- this.batchId = (java.lang.CharSequence) data().deepCopy(
- fields()[23].schema(), other.batchId);
+ this.batchId = (java.lang.CharSequence) data().deepCopy(fields()[23].schema(), other.batchId);
fieldSetFlags()[23] = true;
}
}
@@ -1364,21 +993,20 @@
public java.lang.CharSequence getBaseUrl() {
return baseUrl;
}
-
+
/** Sets the value of the 'baseUrl' field */
- public org.apache.nutch.storage.WebPage.Builder setBaseUrl(
- java.lang.CharSequence value) {
+ public org.apache.nutch.storage.WebPage.Builder setBaseUrl(java.lang.CharSequence value) {
validate(fields()[0], value);
this.baseUrl = value;
fieldSetFlags()[0] = true;
- return this;
+ return this;
}
-
+
/** Checks whether the 'baseUrl' field has been set */
public boolean hasBaseUrl() {
return fieldSetFlags()[0];
}
-
+
/** Clears the value of the 'baseUrl' field */
public org.apache.nutch.storage.WebPage.Builder clearBaseUrl() {
baseUrl = null;
@@ -1385,196 +1013,193 @@
fieldSetFlags()[0] = false;
return this;
}
-
+
/** Gets the value of the 'status' field */
public java.lang.Integer getStatus() {
return status;
}
-
+
/** Sets the value of the 'status' field */
public org.apache.nutch.storage.WebPage.Builder setStatus(int value) {
validate(fields()[1], value);
this.status = value;
fieldSetFlags()[1] = true;
- return this;
+ return this;
}
-
+
/** Checks whether the 'status' field has been set */
public boolean hasStatus() {
return fieldSetFlags()[1];
}
-
+
/** Clears the value of the 'status' field */
public org.apache.nutch.storage.WebPage.Builder clearStatus() {
fieldSetFlags()[1] = false;
return this;
}
-
+
/** Gets the value of the 'fetchTime' field */
public java.lang.Long getFetchTime() {
return fetchTime;
}
-
+
/** Sets the value of the 'fetchTime' field */
public org.apache.nutch.storage.WebPage.Builder setFetchTime(long value) {
validate(fields()[2], value);
this.fetchTime = value;
fieldSetFlags()[2] = true;
- return this;
+ return this;
}
-
+
/** Checks whether the 'fetchTime' field has been set */
public boolean hasFetchTime() {
return fieldSetFlags()[2];
}
-
+
/** Clears the value of the 'fetchTime' field */
public org.apache.nutch.storage.WebPage.Builder clearFetchTime() {
fieldSetFlags()[2] = false;
return this;
}
-
+
/** Gets the value of the 'prevFetchTime' field */
public java.lang.Long getPrevFetchTime() {
return prevFetchTime;
}
-
+
/** Sets the value of the 'prevFetchTime' field */
public org.apache.nutch.storage.WebPage.Builder setPrevFetchTime(long value) {
validate(fields()[3], value);
this.prevFetchTime = value;
fieldSetFlags()[3] = true;
- return this;
+ return this;
}
-
+
/** Checks whether the 'prevFetchTime' field has been set */
public boolean hasPrevFetchTime() {
return fieldSetFlags()[3];
}
-
+
/** Clears the value of the 'prevFetchTime' field */
public org.apache.nutch.storage.WebPage.Builder clearPrevFetchTime() {
fieldSetFlags()[3] = false;
return this;
}
-
+
/** Gets the value of the 'fetchInterval' field */
public java.lang.Integer getFetchInterval() {
return fetchInterval;
}
-
+
/** Sets the value of the 'fetchInterval' field */
public org.apache.nutch.storage.WebPage.Builder setFetchInterval(int value) {
validate(fields()[4], value);
this.fetchInterval = value;
fieldSetFlags()[4] = true;
- return this;
+ return this;
}
-
+
/** Checks whether the 'fetchInterval' field has been set */
public boolean hasFetchInterval() {
return fieldSetFlags()[4];
}
-
+
/** Clears the value of the 'fetchInterval' field */
public org.apache.nutch.storage.WebPage.Builder clearFetchInterval() {
fieldSetFlags()[4] = false;
return this;
}
-
+
/** Gets the value of the 'retriesSinceFetch' field */
public java.lang.Integer getRetriesSinceFetch() {
return retriesSinceFetch;
}
-
+
/** Sets the value of the 'retriesSinceFetch' field */
- public org.apache.nutch.storage.WebPage.Builder setRetriesSinceFetch(
- int value) {
+ public org.apache.nutch.storage.WebPage.Builder setRetriesSinceFetch(int value) {
validate(fields()[5], value);
this.retriesSinceFetch = value;
fieldSetFlags()[5] = true;
- return this;
+ return this;
}
-
+
/** Checks whether the 'retriesSinceFetch' field has been set */
public boolean hasRetriesSinceFetch() {
return fieldSetFlags()[5];
}
-
+
/** Clears the value of the 'retriesSinceFetch' field */
public org.apache.nutch.storage.WebPage.Builder clearRetriesSinceFetch() {
fieldSetFlags()[5] = false;
return this;
}
-
+
/** Gets the value of the 'modifiedTime' field */
public java.lang.Long getModifiedTime() {
return modifiedTime;
}
-
+
/** Sets the value of the 'modifiedTime' field */
public org.apache.nutch.storage.WebPage.Builder setModifiedTime(long value) {
validate(fields()[6], value);
this.modifiedTime = value;
fieldSetFlags()[6] = true;
- return this;
+ return this;
}
-
+
/** Checks whether the 'modifiedTime' field has been set */
public boolean hasModifiedTime() {
return fieldSetFlags()[6];
}
-
+
/** Clears the value of the 'modifiedTime' field */
public org.apache.nutch.storage.WebPage.Builder clearModifiedTime() {
fieldSetFlags()[6] = false;
return this;
}
-
+
/** Gets the value of the 'prevModifiedTime' field */
public java.lang.Long getPrevModifiedTime() {
return prevModifiedTime;
}
-
+
/** Sets the value of the 'prevModifiedTime' field */
- public org.apache.nutch.storage.WebPage.Builder setPrevModifiedTime(
- long value) {
+ public org.apache.nutch.storage.WebPage.Builder setPrevModifiedTime(long value) {
validate(fields()[7], value);
this.prevModifiedTime = value;
fieldSetFlags()[7] = true;
- return this;
+ return this;
}
-
+
/** Checks whether the 'prevModifiedTime' field has been set */
public boolean hasPrevModifiedTime() {
return fieldSetFlags()[7];
}
-
+
/** Clears the value of the 'prevModifiedTime' field */
public org.apache.nutch.storage.WebPage.Builder clearPrevModifiedTime() {
fieldSetFlags()[7] = false;
return this;
}
-
+
/** Gets the value of the 'protocolStatus' field */
public org.apache.nutch.storage.ProtocolStatus getProtocolStatus() {
return protocolStatus;
}
-
+
/** Sets the value of the 'protocolStatus' field */
- public org.apache.nutch.storage.WebPage.Builder setProtocolStatus(
- org.apache.nutch.storage.ProtocolStatus value) {
+ public org.apache.nutch.storage.WebPage.Builder setProtocolStatus(org.apache.nutch.storage.ProtocolStatus value) {
validate(fields()[8], value);
this.protocolStatus = value;
fieldSetFlags()[8] = true;
- return this;
+ return this;
}
-
+
/** Checks whether the 'protocolStatus' field has been set */
public boolean hasProtocolStatus() {
return fieldSetFlags()[8];
}
-
+
/** Clears the value of the 'protocolStatus' field */
public org.apache.nutch.storage.WebPage.Builder clearProtocolStatus() {
protocolStatus = null;
@@ -1581,26 +1206,25 @@
fieldSetFlags()[8] = false;
return this;
}
-
+
/** Gets the value of the 'content' field */
public java.nio.ByteBuffer getContent() {
return content;
}
-
+
/** Sets the value of the 'content' field */
- public org.apache.nutch.storage.WebPage.Builder setContent(
- java.nio.ByteBuffer value) {
+ public org.apache.nutch.storage.WebPage.Builder setContent(java.nio.ByteBuffer value) {
validate(fields()[9], value);
this.content = value;
fieldSetFlags()[9] = true;
- return this;
+ return this;
}
-
+
/** Checks whether the 'content' field has been set */
public boolean hasContent() {
return fieldSetFlags()[9];
}
-
+
/** Clears the value of the 'content' field */
public org.apache.nutch.storage.WebPage.Builder clearContent() {
content = null;
@@ -1607,26 +1231,25 @@
fieldSetFlags()[9] = false;
return this;
}
-
+
/** Gets the value of the 'contentType' field */
public java.lang.CharSequence getContentType() {
return contentType;
}
-
+
/** Sets the value of the 'contentType' field */
- public org.apache.nutch.storage.WebPage.Builder setContentType(
- java.lang.CharSequence value) {
+ public org.apache.nutch.storage.WebPage.Builder setContentType(java.lang.CharSequence value) {
validate(fields()[10], value);
this.contentType = value;
fieldSetFlags()[10] = true;
- return this;
+ return this;
}
-
+
/** Checks whether the 'contentType' field has been set */
public boolean hasContentType() {
return fieldSetFlags()[10];
}
-
+
/** Clears the value of the 'contentType' field */
public org.apache.nutch.storage.WebPage.Builder clearContentType() {
contentType = null;
@@ -1633,26 +1256,25 @@
fieldSetFlags()[10] = false;
return this;
}
-
+
/** Gets the value of the 'prevSignature' field */
public java.nio.ByteBuffer getPrevSignature() {
return prevSignature;
}
-
+
/** Sets the value of the 'prevSignature' field */
- public org.apache.nutch.storage.WebPage.Builder setPrevSignature(
- java.nio.ByteBuffer value) {
+ public org.apache.nutch.storage.WebPage.Builder setPrevSignature(java.nio.ByteBuffer value) {
validate(fields()[11], value);
this.prevSignature = value;
fieldSetFlags()[11] = true;
- return this;
+ return this;
}
-
+
/** Checks whether the 'prevSignature' field has been set */
public boolean hasPrevSignature() {
return fieldSetFlags()[11];
}
-
+
/** Clears the value of the 'prevSignature' field */
public org.apache.nutch.storage.WebPage.Builder clearPrevSignature() {
prevSignature = null;
@@ -1659,26 +1281,25 @@
fieldSetFlags()[11] = false;
return this;
}
-
+
/** Gets the value of the 'signature' field */
public java.nio.ByteBuffer getSignature() {
return signature;
}
-
+
/** Sets the value of the 'signature' field */
- public org.apache.nutch.storage.WebPage.Builder setSignature(
- java.nio.ByteBuffer value) {
+ public org.apache.nutch.storage.WebPage.Builder setSignature(java.nio.ByteBuffer value) {
validate(fields()[12], value);
this.signature = value;
fieldSetFlags()[12] = true;
- return this;
+ return this;
}
-
+
/** Checks whether the 'signature' field has been set */
public boolean hasSignature() {
return fieldSetFlags()[12];
}
-
+
/** Clears the value of the 'signature' field */
public org.apache.nutch.storage.WebPage.Builder clearSignature() {
signature = null;
@@ -1685,26 +1306,25 @@
fieldSetFlags()[12] = false;
return this;
}
-
+
/** Gets the value of the 'title' field */
public java.lang.CharSequence getTitle() {
return title;
}
-
+
/** Sets the value of the 'title' field */
- public org.apache.nutch.storage.WebPage.Builder setTitle(
- java.lang.CharSequence value) {
+ public org.apache.nutch.storage.WebPage.Builder setTitle(java.lang.CharSequence value) {
validate(fields()[13], value);
this.title = value;
fieldSetFlags()[13] = true;
- return this;
+ return this;
}
-
+
/** Checks whether the 'title' field has been set */
public boolean hasTitle() {
return fieldSetFlags()[13];
}
-
+
/** Clears the value of the 'title' field */
public org.apache.nutch.storage.WebPage.Builder clearTitle() {
title = null;
@@ -1711,26 +1331,25 @@
fieldSetFlags()[13] = false;
return this;
}
-
+
/** Gets the value of the 'text' field */
public java.lang.CharSequence getText() {
return text;
}
-
+
/** Sets the value of the 'text' field */
- public org.apache.nutch.storage.WebPage.Builder setText(
- java.lang.CharSequence value) {
+ public org.apache.nutch.storage.WebPage.Builder setText(java.lang.CharSequence value) {
validate(fields()[14], value);
this.text = value;
fieldSetFlags()[14] = true;
- return this;
+ return this;
}
-
+
/** Checks whether the 'text' field has been set */
public boolean hasText() {
return fieldSetFlags()[14];
}
-
+
/** Clears the value of the 'text' field */
public org.apache.nutch.storage.WebPage.Builder clearText() {
text = null;
@@ -1737,26 +1356,25 @@
fieldSetFlags()[14] = false;
return this;
}
-
+
/** Gets the value of the 'parseStatus' field */
public org.apache.nutch.storage.ParseStatus getParseStatus() {
return parseStatus;
}
-
+
/** Sets the value of the 'parseStatus' field */
- public org.apache.nutch.storage.WebPage.Builder setParseStatus(
- org.apache.nutch.storage.ParseStatus value) {
+ public org.apache.nutch.storage.WebPage.Builder setParseStatus(org.apache.nutch.storage.ParseStatus value) {
validate(fields()[15], value);
this.parseStatus = value;
fieldSetFlags()[15] = true;
- return this;
+ return this;
}
-
+
/** Checks whether the 'parseStatus' field has been set */
public boolean hasParseStatus() {
return fieldSetFlags()[15];
}
-
+
/** Clears the value of the 'parseStatus' field */
public org.apache.nutch.storage.WebPage.Builder clearParseStatus() {
parseStatus = null;
@@ -1763,50 +1381,49 @@
fieldSetFlags()[15] = false;
return this;
}
-
+
/** Gets the value of the 'score' field */
public java.lang.Float getScore() {
return score;
}
-
+
/** Sets the value of the 'score' field */
public org.apache.nutch.storage.WebPage.Builder setScore(float value) {
validate(fields()[16], value);
this.score = value;
fieldSetFlags()[16] = true;
- return this;
+ return this;
}
-
+
/** Checks whether the 'score' field has been set */
public boolean hasScore() {
return fieldSetFlags()[16];
}
-
+
/** Clears the value of the 'score' field */
public org.apache.nutch.storage.WebPage.Builder clearScore() {
fieldSetFlags()[16] = false;
return this;
}
-
+
/** Gets the value of the 'reprUrl' field */
public java.lang.CharSequence getReprUrl() {
return reprUrl;
}
-
+
/** Sets the value of the 'reprUrl' field */
- public org.apache.nutch.storage.WebPage.Builder setReprUrl(
- java.lang.CharSequence value) {
+ public org.apache.nutch.storage.WebPage.Builder setReprUrl(java.lang.CharSequence value) {
validate(fields()[17], value);
this.reprUrl = value;
fieldSetFlags()[17] = true;
- return this;
+ return this;
}
-
+
/** Checks whether the 'reprUrl' field has been set */
public boolean hasReprUrl() {
return fieldSetFlags()[17];
}
-
+
/** Clears the value of the 'reprUrl' field */
public org.apache.nutch.storage.WebPage.Builder clearReprUrl() {
reprUrl = null;
@@ -1813,26 +1430,25 @@
fieldSetFlags()[17] = false;
return this;
}
-
+
/** Gets the value of the 'headers' field */
- public java.util.Map getHeaders() {
+ public java.util.Map getHeaders() {
return headers;
}
-
+
/** Sets the value of the 'headers' field */
- public org.apache.nutch.storage.WebPage.Builder setHeaders(
- java.util.Map value) {
+ public org.apache.nutch.storage.WebPage.Builder setHeaders(java.util.Map value) {
validate(fields()[18], value);
this.headers = value;
fieldSetFlags()[18] = true;
- return this;
+ return this;
}
-
+
/** Checks whether the 'headers' field has been set */
public boolean hasHeaders() {
return fieldSetFlags()[18];
}
-
+
/** Clears the value of the 'headers' field */
public org.apache.nutch.storage.WebPage.Builder clearHeaders() {
headers = null;
@@ -1839,26 +1455,25 @@
fieldSetFlags()[18] = false;
return this;
}
-
+
/** Gets the value of the 'outlinks' field */
- public java.util.Map getOutlinks() {
+ public java.util.Map getOutlinks() {
return outlinks;
}
-
+
/** Sets the value of the 'outlinks' field */
- public org.apache.nutch.storage.WebPage.Builder setOutlinks(
- java.util.Map value) {
+ public org.apache.nutch.storage.WebPage.Builder setOutlinks(java.util.Map value) {
validate(fields()[19], value);
this.outlinks = value;
fieldSetFlags()[19] = true;
- return this;
+ return this;
}
-
+
/** Checks whether the 'outlinks' field has been set */
public boolean hasOutlinks() {
return fieldSetFlags()[19];
}
-
+
/** Clears the value of the 'outlinks' field */
public org.apache.nutch.storage.WebPage.Builder clearOutlinks() {
outlinks = null;
@@ -1865,26 +1480,25 @@
fieldSetFlags()[19] = false;
return this;
}
-
+
/** Gets the value of the 'inlinks' field */
- public java.util.Map getInlinks() {
+ public java.util.Map getInlinks() {
return inlinks;
}
-
+
/** Sets the value of the 'inlinks' field */
- public org.apache.nutch.storage.WebPage.Builder setInlinks(
- java.util.Map value) {
+ public org.apache.nutch.storage.WebPage.Builder setInlinks(java.util.Map value) {
validate(fields()[20], value);
this.inlinks = value;
fieldSetFlags()[20] = true;
- return this;
+ return this;
}
-
+
/** Checks whether the 'inlinks' field has been set */
public boolean hasInlinks() {
return fieldSetFlags()[20];
}
-
+
/** Clears the value of the 'inlinks' field */
public org.apache.nutch.storage.WebPage.Builder clearInlinks() {
inlinks = null;
@@ -1891,26 +1505,25 @@
fieldSetFlags()[20] = false;
return this;
}
-
+
/** Gets the value of the 'markers' field */
- public java.util.Map getMarkers() {
+ public java.util.Map getMarkers() {
return markers;
}
-
+
/** Sets the value of the 'markers' field */
- public org.apache.nutch.storage.WebPage.Builder setMarkers(
- java.util.Map value) {
+ public org.apache.nutch.storage.WebPage.Builder setMarkers(java.util.Map value) {
validate(fields()[21], value);
this.markers = value;
fieldSetFlags()[21] = true;
- return this;
+ return this;
}
-
+
/** Checks whether the 'markers' field has been set */
public boolean hasMarkers() {
return fieldSetFlags()[21];
}
-
+
/** Clears the value of the 'markers' field */
public org.apache.nutch.storage.WebPage.Builder clearMarkers() {
markers = null;
@@ -1917,26 +1530,25 @@
fieldSetFlags()[21] = false;
return this;
}
-
+
/** Gets the value of the 'metadata' field */
- public java.util.Map getMetadata() {
+ public java.util.Map getMetadata() {
return metadata;
}
-
+
/** Sets the value of the 'metadata' field */
- public org.apache.nutch.storage.WebPage.Builder setMetadata(
- java.util.Map value) {
+ public org.apache.nutch.storage.WebPage.Builder setMetadata(java.util.Map value) {
validate(fields()[22], value);
this.metadata = value;
fieldSetFlags()[22] = true;
- return this;
+ return this;
}
-
+
/** Checks whether the 'metadata' field has been set */
public boolean hasMetadata() {
return fieldSetFlags()[22];
}
-
+
/** Clears the value of the 'metadata' field */
public org.apache.nutch.storage.WebPage.Builder clearMetadata() {
metadata = null;
@@ -1943,26 +1555,25 @@
fieldSetFlags()[22] = false;
return this;
}
-
+
/** Gets the value of the 'batchId' field */
public java.lang.CharSequence getBatchId() {
return batchId;
}
-
+
/** Sets the value of the 'batchId' field */
- public org.apache.nutch.storage.WebPage.Builder setBatchId(
- java.lang.CharSequence value) {
+ public org.apache.nutch.storage.WebPage.Builder setBatchId(java.lang.CharSequence value) {
validate(fields()[23], value);
this.batchId = value;
fieldSetFlags()[23] = true;
- return this;
+ return this;
}
-
+
/** Checks whether the 'batchId' field has been set */
public boolean hasBatchId() {
return fieldSetFlags()[23];
}
-
+
/** Clears the value of the 'batchId' field */
public org.apache.nutch.storage.WebPage.Builder clearBatchId() {
batchId = null;
@@ -1969,64 +1580,35 @@
fieldSetFlags()[23] = false;
return this;
}
-
+
@Override
public WebPage build() {
try {
WebPage record = new WebPage();
- record.baseUrl = fieldSetFlags()[0] ? this.baseUrl
- : (java.lang.CharSequence) defaultValue(fields()[0]);
- record.status = fieldSetFlags()[1] ? this.status
- : (java.lang.Integer) defaultValue(fields()[1]);
- record.fetchTime = fieldSetFlags()[2] ? this.fetchTime
- : (java.lang.Long) defaultValue(fields()[2]);
- record.prevFetchTime = fieldSetFlags()[3] ? this.prevFetchTime
- : (java.lang.Long) defaultValue(fields()[3]);
- record.fetchInterval = fieldSetFlags()[4] ? this.fetchInterval
- : (java.lang.Integer) defaultValue(fields()[4]);
- record.retriesSinceFetch = fieldSetFlags()[5] ? this.retriesSinceFetch
- : (java.lang.Integer) defaultValue(fields()[5]);
- record.modifiedTime = fieldSetFlags()[6] ? this.modifiedTime
- : (java.lang.Long) defaultValue(fields()[6]);
- record.prevModifiedTime = fieldSetFlags()[7] ? this.prevModifiedTime
- : (java.lang.Long) defaultValue(fields()[7]);
- record.protocolStatus = fieldSetFlags()[8] ? this.protocolStatus
- : (org.apache.nutch.storage.ProtocolStatus) defaultValue(fields()[8]);
- record.content = fieldSetFlags()[9] ? this.content
- : (java.nio.ByteBuffer) defaultValue(fields()[9]);
- record.contentType = fieldSetFlags()[10] ? this.contentType
- : (java.lang.CharSequence) defaultValue(fields()[10]);
- record.prevSignature = fieldSetFlags()[11] ? this.prevSignature
- : (java.nio.ByteBuffer) defaultValue(fields()[11]);
- record.signature = fieldSetFlags()[12] ? this.signature
- : (java.nio.ByteBuffer) defaultValue(fields()[12]);
- record.title = fieldSetFlags()[13] ? this.title
- : (java.lang.CharSequence) defaultValue(fields()[13]);
- record.text = fieldSetFlags()[14] ? this.text
- : (java.lang.CharSequence) defaultValue(fields()[14]);
- record.parseStatus = fieldSetFlags()[15] ? this.parseStatus
- : (org.apache.nutch.storage.ParseStatus) defaultValue(fields()[15]);
- record.score = fieldSetFlags()[16] ? this.score
- : (java.lang.Float) defaultValue(fields()[16]);
- record.reprUrl = fieldSetFlags()[17] ? this.reprUrl
- : (java.lang.CharSequence) defaultValue(fields()[17]);
- record.headers = fieldSetFlags()[18] ? this.headers
- : (java.util.Map) new org.apache.gora.persistency.impl.DirtyMapWrapper(
- (java.util.Map) defaultValue(fields()[18]));
- record.outlinks = fieldSetFlags()[19] ? this.outlinks
- : (java.util.Map) new org.apache.gora.persistency.impl.DirtyMapWrapper(
- (java.util.Map) defaultValue(fields()[19]));
- record.inlinks = fieldSetFlags()[20] ? this.inlinks
- : (java.util.Map) new org.apache.gora.persistency.impl.DirtyMapWrapper(
- (java.util.Map) defaultValue(fields()[20]));
- record.markers = fieldSetFlags()[21] ? this.markers
- : (java.util.Map) new org.apache.gora.persistency.impl.DirtyMapWrapper(
- (java.util.Map) defaultValue(fields()[21]));
- record.metadata = fieldSetFlags()[22] ? this.metadata
- : (java.util.Map) new org.apache.gora.persistency.impl.DirtyMapWrapper(
- (java.util.Map) defaultValue(fields()[22]));
- record.batchId = fieldSetFlags()[23] ? this.batchId
- : (java.lang.CharSequence) defaultValue(fields()[23]);
+ record.baseUrl = fieldSetFlags()[0] ? this.baseUrl : (java.lang.CharSequence) defaultValue(fields()[0]);
+ record.status = fieldSetFlags()[1] ? this.status : (java.lang.Integer) defaultValue(fields()[1]);
+ record.fetchTime = fieldSetFlags()[2] ? this.fetchTime : (java.lang.Long) defaultValue(fields()[2]);
+ record.prevFetchTime = fieldSetFlags()[3] ? this.prevFetchTime : (java.lang.Long) defaultValue(fields()[3]);
+ record.fetchInterval = fieldSetFlags()[4] ? this.fetchInterval : (java.lang.Integer) defaultValue(fields()[4]);
+ record.retriesSinceFetch = fieldSetFlags()[5] ? this.retriesSinceFetch : (java.lang.Integer) defaultValue(fields()[5]);
+ record.modifiedTime = fieldSetFlags()[6] ? this.modifiedTime : (java.lang.Long) defaultValue(fields()[6]);
+ record.prevModifiedTime = fieldSetFlags()[7] ? this.prevModifiedTime : (java.lang.Long) defaultValue(fields()[7]);
+ record.protocolStatus = fieldSetFlags()[8] ? this.protocolStatus : (org.apache.nutch.storage.ProtocolStatus) defaultValue(fields()[8]);
+ record.content = fieldSetFlags()[9] ? this.content : (java.nio.ByteBuffer) defaultValue(fields()[9]);
+ record.contentType = fieldSetFlags()[10] ? this.contentType : (java.lang.CharSequence) defaultValue(fields()[10]);
+ record.prevSignature = fieldSetFlags()[11] ? this.prevSignature : (java.nio.ByteBuffer) defaultValue(fields()[11]);
+ record.signature = fieldSetFlags()[12] ? this.signature : (java.nio.ByteBuffer) defaultValue(fields()[12]);
+ record.title = fieldSetFlags()[13] ? this.title : (java.lang.CharSequence) defaultValue(fields()[13]);
+ record.text = fieldSetFlags()[14] ? this.text : (java.lang.CharSequence) defaultValue(fields()[14]);
+ record.parseStatus = fieldSetFlags()[15] ? this.parseStatus : (org.apache.nutch.storage.ParseStatus) defaultValue(fields()[15]);
+ record.score = fieldSetFlags()[16] ? this.score : (java.lang.Float) defaultValue(fields()[16]);
+ record.reprUrl = fieldSetFlags()[17] ? this.reprUrl : (java.lang.CharSequence) defaultValue(fields()[17]);
+ record.headers = fieldSetFlags()[18] ? this.headers : (java.util.Map) new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)defaultValue(fields()[18]));
+ record.outlinks = fieldSetFlags()[19] ? this.outlinks : (java.util.Map) new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)defaultValue(fields()[19]));
+ record.inlinks = fieldSetFlags()[20] ? this.inlinks : (java.util.Map) new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)defaultValue(fields()[20]));
+ record.markers = fieldSetFlags()[21] ? this.markers : (java.util.Map) new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)defaultValue(fields()[21]));
+ record.metadata = fieldSetFlags()[22] ? this.metadata : (java.util.Map) new org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)defaultValue(fields()[22]));
+ record.batchId = fieldSetFlags()[23] ? this.batchId : (java.lang.CharSequence) defaultValue(fields()[23]);
return record;
} catch (Exception e) {
throw new org.apache.avro.AvroRuntimeException(e);
@@ -2033,847 +1615,575 @@
}
}
}
-
- public WebPage.Tombstone getTombstone() {
- return TOMBSTONE;
+
+ public WebPage.Tombstone getTombstone(){
+ return TOMBSTONE;
}
- public WebPage newInstance() {
+ public WebPage newInstance(){
return newBuilder().build();
}
private static final Tombstone TOMBSTONE = new Tombstone();
-
- public static final class Tombstone extends WebPage implements
- org.apache.gora.persistency.Tombstone {
-
- private Tombstone() {
- }
-
- /**
- * Gets the value of the 'baseUrl' field. The original associated with this
- * WebPage.
- */
- public java.lang.CharSequence getBaseUrl() {
- throw new java.lang.UnsupportedOperationException(
- "Get is not supported on tombstones");
- }
-
- /**
- * Sets the value of the 'baseUrl' field. The original associated with this
- * WebPage. * @param value the value to set.
- */
- public void setBaseUrl(java.lang.CharSequence value) {
- throw new java.lang.UnsupportedOperationException(
- "Set is not supported on tombstones");
- }
-
- /**
- * Checks the dirty status of the 'baseUrl' field. A field is dirty if it
- * represents a change that has not yet been written to the database. The
- * original associated with this WebPage. * @param value the value to set.
- */
- public boolean isBaseUrlDirty(java.lang.CharSequence value) {
- throw new java.lang.UnsupportedOperationException(
- "IsDirty is not supported on tombstones");
- }
-
- /**
- * Gets the value of the 'status' field. A crawl status associated with the
- * WebPage, can be of value STATUS_UNFETCHED - WebPage was not fetched yet,
- * STATUS_FETCHED - WebPage was successfully fetched, STATUS_GONE - WebPage
- * no longer exists, STATUS_REDIR_TEMP - WebPage temporarily redirects to
- * other page, STATUS_REDIR_PERM - WebPage permanently redirects to other
- * page, STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g.
- * transient errors and STATUS_NOTMODIFIED - fetching successful - page is
- * not modified
- */
- public java.lang.Integer getStatus() {
- throw new java.lang.UnsupportedOperationException(
- "Get is not supported on tombstones");
- }
-
- /**
- * Sets the value of the 'status' field. A crawl status associated with the
- * WebPage, can be of value STATUS_UNFETCHED - WebPage was not fetched yet,
- * STATUS_FETCHED - WebPage was successfully fetched, STATUS_GONE - WebPage
- * no longer exists, STATUS_REDIR_TEMP - WebPage temporarily redirects to
- * other page, STATUS_REDIR_PERM - WebPage permanently redirects to other
- * page, STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g.
- * transient errors and STATUS_NOTMODIFIED - fetching successful - page is
- * not modified * @param value the value to set.
- */
- public void setStatus(java.lang.Integer value) {
- throw new java.lang.UnsupportedOperationException(
- "Set is not supported on tombstones");
- }
-
- /**
- * Checks the dirty status of the 'status' field. A field is dirty if it
- * represents a change that has not yet been written to the database. A
- * crawl status associated with the WebPage, can be of value
- * STATUS_UNFETCHED - WebPage was not fetched yet, STATUS_FETCHED - WebPage
- * was successfully fetched, STATUS_GONE - WebPage no longer exists,
- * STATUS_REDIR_TEMP - WebPage temporarily redirects to other page,
- * STATUS_REDIR_PERM - WebPage permanently redirects to other page,
- * STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g. transient
- * errors and STATUS_NOTMODIFIED - fetching successful - page is not
- * modified * @param value the value to set.
- */
- public boolean isStatusDirty(java.lang.Integer value) {
- throw new java.lang.UnsupportedOperationException(
- "IsDirty is not supported on tombstones");
- }
-
- /**
- * Gets the value of the 'fetchTime' field. The system time in milliseconds
- * for when the page was fetched.
- */
- public java.lang.Long getFetchTime() {
- throw new java.lang.UnsupportedOperationException(
- "Get is not supported on tombstones");
- }
-
- /**
- * Sets the value of the 'fetchTime' field. The system time in milliseconds
- * for when the page was fetched. * @param value the value to set.
- */
- public void setFetchTime(java.lang.Long value) {
- throw new java.lang.UnsupportedOperationException(
- "Set is not supported on tombstones");
- }
-
- /**
- * Checks the dirty status of the 'fetchTime' field. A field is dirty if it
- * represents a change that has not yet been written to the database. The
- * system time in milliseconds for when the page was fetched. * @param value
- * the value to set.
- */
- public boolean isFetchTimeDirty(java.lang.Long value) {
- throw new java.lang.UnsupportedOperationException(
- "IsDirty is not supported on tombstones");
- }
-
- /**
- * Gets the value of the 'prevFetchTime' field. The system time in
- * milliseconds for when the page was last fetched if it was previously
- * fetched which can be used to calculate time delta within a fetching
- * schedule implementation
- */
- public java.lang.Long getPrevFetchTime() {
- throw new java.lang.UnsupportedOperationException(
- "Get is not supported on tombstones");
- }
-
- /**
- * Sets the value of the 'prevFetchTime' field. The system time in
- * milliseconds for when the page was last fetched if it was previously
- * fetched which can be used to calculate time delta within a fetching
- * schedule implementation * @param value the value to set.
- */
- public void setPrevFetchTime(java.lang.Long value) {
- throw new java.lang.UnsupportedOperationException(
- "Set is not supported on tombstones");
- }
-
- /**
- * Checks the dirty status of the 'prevFetchTime' field. A field is dirty if
- * it represents a change that has not yet been written to the database. The
- * system time in milliseconds for when the page was last fetched if it was
- * previously fetched which can be used to calculate time delta within a
- * fetching schedule implementation * @param value the value to set.
- */
- public boolean isPrevFetchTimeDirty(java.lang.Long value) {
- throw new java.lang.UnsupportedOperationException(
- "IsDirty is not supported on tombstones");
- }
-
- /**
- * Gets the value of the 'fetchInterval' field. The default number of
- * seconds between re-fetches of a page. The default is considered as 30
- * days unless a custom fetch schedle is implemented.
- */
- public java.lang.Integer getFetchInterval() {
- throw new java.lang.UnsupportedOperationException(
- "Get is not supported on tombstones");
- }
-
- /**
- * Sets the value of the 'fetchInterval' field. The default number of
- * seconds between re-fetches of a page. The default is considered as 30
- * days unless a custom fetch schedle is implemented. * @param value the
- * value to set.
- */
- public void setFetchInterval(java.lang.Integer value) {
- throw new java.lang.UnsupportedOperationException(
- "Set is not supported on tombstones");
- }
-
- /**
- * Checks the dirty status of the 'fetchInterval' field. A field is dirty if
- * it represents a change that has not yet been written to the database. The
- * default number of seconds between re-fetches of a page. The default is
- * considered as 30 days unless a custom fetch schedle is implemented. * @param
- * value the value to set.
- */
- public boolean isFetchIntervalDirty(java.lang.Integer value) {
- throw new java.lang.UnsupportedOperationException(
- "IsDirty is not supported on tombstones");
- }
-
- /**
- * Gets the value of the 'retriesSinceFetch' field. The number of retried
- * attempts at fetching the WebPage since it was last successfully fetched.
- */
- public java.lang.Integer getRetriesSinceFetch() {
- throw new java.lang.UnsupportedOperationException(
- "Get is not supported on tombstones");
- }
-
- /**
- * Sets the value of the 'retriesSinceFetch' field. The number of retried
- * attempts at fetching the WebPage since it was last successfully fetched.
- * * @param value the value to set.
- */
- public void setRetriesSinceFetch(java.lang.Integer value) {
- throw new java.lang.UnsupportedOperationException(
- "Set is not supported on tombstones");
- }
-
- /**
- * Checks the dirty status of the 'retriesSinceFetch' field. A field is
- * dirty if it represents a change that has not yet been written to the
- * database. The number of retried attempts at fetching the WebPage since it
- * was last successfully fetched. * @param value the value to set.
- */
- public boolean isRetriesSinceFetchDirty(java.lang.Integer value) {
- throw new java.lang.UnsupportedOperationException(
- "IsDirty is not supported on tombstones");
- }
-
- /**
- * Gets the value of the 'modifiedTime' field. The system time in
- * milliseconds for when this WebPage was modified by the WebPage author, if
- * this is not available we default to the server for this information. This
- * is important to understand the changing nature of the WebPage.
- */
- public java.lang.Long getModifiedTime() {
- throw new java.lang.UnsupportedOperationException(
- "Get is not supported on tombstones");
- }
-
- /**
- * Sets the value of the 'modifiedTime' field. The system time in
- * milliseconds for when this WebPage was modified by the WebPage author, if
- * this is not available we default to the server for this information. This
- * is important to understand the changing nature of the WebPage. * @param
- * value the value to set.
- */
- public void setModifiedTime(java.lang.Long value) {
- throw new java.lang.UnsupportedOperationException(
- "Set is not supported on tombstones");
- }
-
- /**
- * Checks the dirty status of the 'modifiedTime' field. A field is dirty if
- * it represents a change that has not yet been written to the database. The
- * system time in milliseconds for when this WebPage was modified by the
- * WebPage author, if this is not available we default to the server for
- * this information. This is important to understand the changing nature of
- * the WebPage. * @param value the value to set.
- */
- public boolean isModifiedTimeDirty(java.lang.Long value) {
- throw new java.lang.UnsupportedOperationException(
- "IsDirty is not supported on tombstones");
- }
-
- /**
- * Gets the value of the 'prevModifiedTime' field. The system time in
- * milliseconds for when this WebPage was previously modified by the author,
- * if this is not available then we default to the server for this
- * information. This is important to understand the changing nature of a
- * WebPage.
- */
- public java.lang.Long getPrevModifiedTime() {
- throw new java.lang.UnsupportedOperationException(
- "Get is not supported on tombstones");
- }
-
- /**
- * Sets the value of the 'prevModifiedTime' field. The system time in
- * milliseconds for when this WebPage was previously modified by the author,
- * if this is not available then we default to the server for this
- * information. This is important to understand the changing nature of a
- * WebPage. * @param value the value to set.
- */
- public void setPrevModifiedTime(java.lang.Long value) {
- throw new java.lang.UnsupportedOperationException(
- "Set is not supported on tombstones");
- }
-
- /**
- * Checks the dirty status of the 'prevModifiedTime' field. A field is dirty
- * if it represents a change that has not yet been written to the database.
- * The system time in milliseconds for when this WebPage was previously
- * modified by the author, if this is not available then we default to the
- * server for this information. This is important to understand the changing
- * nature of a WebPage. * @param value the value to set.
- */
- public boolean isPrevModifiedTimeDirty(java.lang.Long value) {
- throw new java.lang.UnsupportedOperationException(
- "IsDirty is not supported on tombstones");
- }
-
- /**
- * Gets the value of the 'protocolStatus' field.
- */
- public org.apache.nutch.storage.ProtocolStatus getProtocolStatus() {
- throw new java.lang.UnsupportedOperationException(
- "Get is not supported on tombstones");
- }
-
- /**
- * Sets the value of the 'protocolStatus' field.
- *
- * @param value
- * the value to set.
- */
- public void setProtocolStatus(org.apache.nutch.storage.ProtocolStatus value) {
- throw new java.lang.UnsupportedOperationException(
- "Set is not supported on tombstones");
- }
-
- /**
- * Checks the dirty status of the 'protocolStatus' field. A field is dirty
- * if it represents a change that has not yet been written to the database.
- *
- * @param value
- * the value to set.
- */
- public boolean isProtocolStatusDirty(
- org.apache.nutch.storage.ProtocolStatus value) {
- throw new java.lang.UnsupportedOperationException(
- "IsDirty is not supported on tombstones");
- }
-
- /**
- * Gets the value of the 'content' field. The entire raw document content
- * e.g. raw XHTML
- */
- public java.nio.ByteBuffer getContent() {
- throw new java.lang.UnsupportedOperationException(
- "Get is not supported on tombstones");
- }
-
- /**
- * Sets the value of the 'content' field. The entire raw document content
- * e.g. raw XHTML * @param value the value to set.
- */
- public void setContent(java.nio.ByteBuffer value) {
- throw new java.lang.UnsupportedOperationException(
- "Set is not supported on tombstones");
- }
-
- /**
- * Checks the dirty status of the 'content' field. A field is dirty if it
- * represents a change that has not yet been written to the database. The
- * entire raw document content e.g. raw XHTML * @param value the value to
- * set.
- */
- public boolean isContentDirty(java.nio.ByteBuffer value) {
- throw new java.lang.UnsupportedOperationException(
- "IsDirty is not supported on tombstones");
- }
-
- /**
- * Gets the value of the 'contentType' field. The type of the content
- * contained within the document itself. ContentType is an alias for
- * MimeType. Historically, this parameter was only called MimeType, but
- * since this is actually the value included in the HTTP Content-Type
- * header, it can also include the character set encoding, which makes it
- * more than just a MimeType specification. If MimeType is specified e.g.
- * not None, that value is used. Otherwise, ContentType is used. If neither
- * is given, the DEFAULT_CONTENT_TYPE setting is used.
- */
- public java.lang.CharSequence getContentType() {
- throw new java.lang.UnsupportedOperationException(
- "Get is not supported on tombstones");
- }
-
- /**
- * Sets the value of the 'contentType' field. The type of the content
- * contained within the document itself. ContentType is an alias for
- * MimeType. Historically, this parameter was only called MimeType, but
- * since this is actually the value included in the HTTP Content-Type
- * header, it can also include the character set encoding, which makes it
- * more than just a MimeType specification. If MimeType is specified e.g.
- * not None, that value is used. Otherwise, ContentType is used. If neither
- * is given, the DEFAULT_CONTENT_TYPE setting is used. * @param value the
- * value to set.
- */
- public void setContentType(java.lang.CharSequence value) {
- throw new java.lang.UnsupportedOperationException(
- "Set is not supported on tombstones");
- }
-
- /**
- * Checks the dirty status of the 'contentType' field. A field is dirty if
- * it represents a change that has not yet been written to the database. The
- * type of the content contained within the document itself. ContentType is
- * an alias for MimeType. Historically, this parameter was only called
- * MimeType, but since this is actually the value included in the HTTP
- * Content-Type header, it can also include the character set encoding,
- * which makes it more than just a MimeType specification. If MimeType is
- * specified e.g. not None, that value is used. Otherwise, ContentType is
- * used. If neither is given, the DEFAULT_CONTENT_TYPE setting is used. * @param
- * value the value to set.
- */
- public boolean isContentTypeDirty(java.lang.CharSequence value) {
- throw new java.lang.UnsupportedOperationException(
- "IsDirty is not supported on tombstones");
- }
-
- /**
- * Gets the value of the 'prevSignature' field. An implementation of a
- * WebPage's previous signature from which it can be identified and
- * referenced at any point in time. This can be used to uniquely identify
- * WebPage deltas based on page fingerprints.
- */
- public java.nio.ByteBuffer getPrevSignature() {
- throw new java.lang.UnsupportedOperationException(
- "Get is not supported on tombstones");
- }
-
- /**
- * Sets the value of the 'prevSignature' field. An implementation of a
- * WebPage's previous signature from which it can be identified and
- * referenced at any point in time. This can be used to uniquely identify
- * WebPage deltas based on page fingerprints. * @param value the value to
- * set.
- */
- public void setPrevSignature(java.nio.ByteBuffer value) {
- throw new java.lang.UnsupportedOperationException(
- "Set is not supported on tombstones");
- }
-
- /**
- * Checks the dirty status of the 'prevSignature' field. A field is dirty if
- * it represents a change that has not yet been written to the database. An
- * implementation of a WebPage's previous signature from which it can be
- * identified and referenced at any point in time. This can be used to
- * uniquely identify WebPage deltas based on page fingerprints. * @param
- * value the value to set.
- */
- public boolean isPrevSignatureDirty(java.nio.ByteBuffer value) {
- throw new java.lang.UnsupportedOperationException(
- "IsDirty is not supported on tombstones");
- }
-
- /**
- * Gets the value of the 'signature' field. An implementation of a WebPage's
- * signature from which it can be identified and referenced at any point in
- * time. This is essentially the WebPage's fingerprint represnting its state
- * for any point in time.
- */
- public java.nio.ByteBuffer getSignature() {
- throw new java.lang.UnsupportedOperationException(
- "Get is not supported on tombstones");
- }
-
- /**
- * Sets the value of the 'signature' field. An implementation of a WebPage's
- * signature from which it can be identified and referenced at any point in
- * time. This is essentially the WebPage's fingerprint represnting its state
- * for any point in time. * @param value the value to set.
- */
- public void setSignature(java.nio.ByteBuffer value) {
- throw new java.lang.UnsupportedOperationException(
- "Set is not supported on tombstones");
- }
-
- /**
- * Checks the dirty status of the 'signature' field. A field is dirty if it
- * represents a change that has not yet been written to the database. An
- * implementation of a WebPage's signature from which it can be identified
- * and referenced at any point in time. This is essentially the WebPage's
- * fingerprint represnting its state for any point in time. * @param value
- * the value to set.
- */
- public boolean isSignatureDirty(java.nio.ByteBuffer value) {
- throw new java.lang.UnsupportedOperationException(
- "IsDirty is not supported on tombstones");
- }
-
- /**
- * Gets the value of the 'title' field. The title of the WebPage.
- */
- public java.lang.CharSequence getTitle() {
- throw new java.lang.UnsupportedOperationException(
- "Get is not supported on tombstones");
- }
-
- /**
- * Sets the value of the 'title' field. The title of the WebPage. * @param
- * value the value to set.
- */
- public void setTitle(java.lang.CharSequence value) {
- throw new java.lang.UnsupportedOperationException(
- "Set is not supported on tombstones");
- }
-
- /**
- * Checks the dirty status of the 'title' field. A field is dirty if it
- * represents a change that has not yet been written to the database. The
- * title of the WebPage. * @param value the value to set.
- */
- public boolean isTitleDirty(java.lang.CharSequence value) {
- throw new java.lang.UnsupportedOperationException(
- "IsDirty is not supported on tombstones");
- }
-
- /**
- * Gets the value of the 'text' field. The textual content of the WebPage
- * devoid from native markup.
- */
- public java.lang.CharSequence getText() {
- throw new java.lang.UnsupportedOperationException(
- "Get is not supported on tombstones");
- }
-
- /**
- * Sets the value of the 'text' field. The textual content of the WebPage
- * devoid from native markup. * @param value the value to set.
- */
- public void setText(java.lang.CharSequence value) {
- throw new java.lang.UnsupportedOperationException(
- "Set is not supported on tombstones");
- }
-
- /**
- * Checks the dirty status of the 'text' field. A field is dirty if it
- * represents a change that has not yet been written to the database. The
- * textual content of the WebPage devoid from native markup. * @param value
- * the value to set.
- */
- public boolean isTextDirty(java.lang.CharSequence value) {
- throw new java.lang.UnsupportedOperationException(
- "IsDirty is not supported on tombstones");
- }
-
- /**
- * Gets the value of the 'parseStatus' field.
- */
- public org.apache.nutch.storage.ParseStatus getParseStatus() {
- throw new java.lang.UnsupportedOperationException(
- "Get is not supported on tombstones");
- }
-
- /**
- * Sets the value of the 'parseStatus' field.
- *
- * @param value
- * the value to set.
- */
- public void setParseStatus(org.apache.nutch.storage.ParseStatus value) {
- throw new java.lang.UnsupportedOperationException(
- "Set is not supported on tombstones");
- }
-
- /**
- * Checks the dirty status of the 'parseStatus' field. A field is dirty if
- * it represents a change that has not yet been written to the database.
- *
- * @param value
- * the value to set.
- */
- public boolean isParseStatusDirty(org.apache.nutch.storage.ParseStatus value) {
- throw new java.lang.UnsupportedOperationException(
- "IsDirty is not supported on tombstones");
- }
-
- /**
- * Gets the value of the 'score' field. A score used to determine a
- * WebPage's relevance within the web graph it is part of. This score may
- * change over time based on graph characteristics.
- */
- public java.lang.Float getScore() {
- throw new java.lang.UnsupportedOperationException(
- "Get is not supported on tombstones");
- }
-
- /**
- * Sets the value of the 'score' field. A score used to determine a
- * WebPage's relevance within the web graph it is part of. This score may
- * change over time based on graph characteristics. * @param value the value
- * to set.
- */
- public void setScore(java.lang.Float value) {
- throw new java.lang.UnsupportedOperationException(
- "Set is not supported on tombstones");
- }
-
- /**
- * Checks the dirty status of the 'score' field. A field is dirty if it
- * represents a change that has not yet been written to the database. A
- * score used to determine a WebPage's relevance within the web graph it is
- * part of. This score may change over time based on graph characteristics.
- * * @param value the value to set.
- */
- public boolean isScoreDirty(java.lang.Float value) {
- throw new java.lang.UnsupportedOperationException(
- "IsDirty is not supported on tombstones");
- }
-
- /**
- * Gets the value of the 'reprUrl' field. In the case where we are given two
- * urls, a source and a destination of a redirect, we should determine and
- * persist the representative url. The logic used to determine this is based
- * largely on Yahoo!'s Slurp Crawler
- */
- public java.lang.CharSequence getReprUrl() {
- throw new java.lang.UnsupportedOperationException(
- "Get is not supported on tombstones");
- }
-
- /**
- * Sets the value of the 'reprUrl' field. In the case where we are given two
- * urls, a source and a destination of a redirect, we should determine and
- * persist the representative url. The logic used to determine this is based
- * largely on Yahoo!'s Slurp Crawler * @param value the value to set.
- */
- public void setReprUrl(java.lang.CharSequence value) {
- throw new java.lang.UnsupportedOperationException(
- "Set is not supported on tombstones");
- }
-
- /**
- * Checks the dirty status of the 'reprUrl' field. A field is dirty if it
- * represents a change that has not yet been written to the database. In the
- * case where we are given two urls, a source and a destination of a
- * redirect, we should determine and persist the representative url. The
- * logic used to determine this is based largely on Yahoo!'s Slurp Crawler * @param
- * value the value to set.
- */
- public boolean isReprUrlDirty(java.lang.CharSequence value) {
- throw new java.lang.UnsupportedOperationException(
- "IsDirty is not supported on tombstones");
- }
-
- /**
- * Gets the value of the 'headers' field. Header information returned from
- * the web server used to server the content which is subsequently fetched
- * from. This includes keys such as TRANSFER_ENCODING, CONTENT_ENCODING,
- * CONTENT_LANGUAGE, CONTENT_LENGTH, CONTENT_LOCATION, CONTENT_DISPOSITION,
- * CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED and LOCATION.
- */
- public java.util.Map getHeaders() {
- throw new java.lang.UnsupportedOperationException(
- "Get is not supported on tombstones");
- }
-
- /**
- * Sets the value of the 'headers' field. Header information returned from
- * the web server used to server the content which is subsequently fetched
- * from. This includes keys such as TRANSFER_ENCODING, CONTENT_ENCODING,
- * CONTENT_LANGUAGE, CONTENT_LENGTH, CONTENT_LOCATION, CONTENT_DISPOSITION,
- * CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED and LOCATION. * @param value the
- * value to set.
- */
- public void setHeaders(
- java.util.Map value) {
- throw new java.lang.UnsupportedOperationException(
- "Set is not supported on tombstones");
- }
-
- /**
- * Checks the dirty status of the 'headers' field. A field is dirty if it
- * represents a change that has not yet been written to the database. Header
- * information returned from the web server used to server the content which
- * is subsequently fetched from. This includes keys such as
- * TRANSFER_ENCODING, CONTENT_ENCODING, CONTENT_LANGUAGE, CONTENT_LENGTH,
- * CONTENT_LOCATION, CONTENT_DISPOSITION, CONTENT_MD5, CONTENT_TYPE,
- * LAST_MODIFIED and LOCATION. * @param value the value to set.
- */
- public boolean isHeadersDirty(
- java.util.Map value) {
- throw new java.lang.UnsupportedOperationException(
- "IsDirty is not supported on tombstones");
- }
-
- /**
- * Gets the value of the 'outlinks' field. Embedded hyperlinks which direct
- * outside of the current domain.
- */
- public java.util.Map getOutlinks() {
- throw new java.lang.UnsupportedOperationException(
- "Get is not supported on tombstones");
- }
-
- /**
- * Sets the value of the 'outlinks' field. Embedded hyperlinks which direct
- * outside of the current domain. * @param value the value to set.
- */
- public void setOutlinks(
- java.util.Map value) {
- throw new java.lang.UnsupportedOperationException(
- "Set is not supported on tombstones");
- }
-
- /**
- * Checks the dirty status of the 'outlinks' field. A field is dirty if it
- * represents a change that has not yet been written to the database.
- * Embedded hyperlinks which direct outside of the current domain. * @param
- * value the value to set.
- */
- public boolean isOutlinksDirty(
- java.util.Map value) {
- throw new java.lang.UnsupportedOperationException(
- "IsDirty is not supported on tombstones");
- }
-
- /**
- * Gets the value of the 'inlinks' field. Embedded hyperlinks which link to
- * pages within the current domain.
- */
- public java.util.Map getInlinks() {
- throw new java.lang.UnsupportedOperationException(
- "Get is not supported on tombstones");
- }
-
- /**
- * Sets the value of the 'inlinks' field. Embedded hyperlinks which link to
- * pages within the current domain. * @param value the value to set.
- */
- public void setInlinks(
- java.util.Map value) {
- throw new java.lang.UnsupportedOperationException(
- "Set is not supported on tombstones");
- }
-
- /**
- * Checks the dirty status of the 'inlinks' field. A field is dirty if it
- * represents a change that has not yet been written to the database.
- * Embedded hyperlinks which link to pages within the current domain. * @param
- * value the value to set.
- */
- public boolean isInlinksDirty(
- java.util.Map value) {
- throw new java.lang.UnsupportedOperationException(
- "IsDirty is not supported on tombstones");
- }
-
- /**
- * Gets the value of the 'markers' field. Markers flags which represent user
- * and machine decisions which have affected influenced a WebPage's current
- * state. Markers can be system specific and user machine driven in nature.
- * They are assigned to a WebPage on a job-by-job basis and thier values
- * indicative of what actions should be associated with a WebPage.
- */
- public java.util.Map getMarkers() {
- throw new java.lang.UnsupportedOperationException(
- "Get is not supported on tombstones");
- }
-
- /**
- * Sets the value of the 'markers' field. Markers flags which represent user
- * and machine decisions which have affected influenced a WebPage's current
- * state. Markers can be system specific and user machine driven in nature.
- * They are assigned to a WebPage on a job-by-job basis and thier values
- * indicative of what actions should be associated with a WebPage. * @param
- * value the value to set.
- */
- public void setMarkers(
- java.util.Map value) {
- throw new java.lang.UnsupportedOperationException(
- "Set is not supported on tombstones");
- }
-
- /**
- * Checks the dirty status of the 'markers' field. A field is dirty if it
- * represents a change that has not yet been written to the database.
- * Markers flags which represent user and machine decisions which have
- * affected influenced a WebPage's current state. Markers can be system
- * specific and user machine driven in nature. They are assigned to a
- * WebPage on a job-by-job basis and thier values indicative of what actions
- * should be associated with a WebPage. * @param value the value to set.
- */
- public boolean isMarkersDirty(
- java.util.Map value) {
- throw new java.lang.UnsupportedOperationException(
- "IsDirty is not supported on tombstones");
- }
-
- /**
- * Gets the value of the 'metadata' field. A multi-valued metadata container
- * used for storing everything from structured WebPage characterists, to
- * ad-hoc extraction and metadata augmentation for any given WebPage.
- */
- public java.util.Map