diff --git a/build.xml b/build.xml
index 7dfd15e..c36d7ac 100644
--- a/build.xml
+++ b/build.xml
@@ -154,6 +154,7 @@
+
@@ -201,6 +202,7 @@
+
@@ -566,6 +568,7 @@
+
@@ -613,6 +616,7 @@
+
@@ -928,7 +932,8 @@
-
+
+
diff --git a/conf/log4j.properties b/conf/log4j.properties
index 710a095..eb828a5 100644
--- a/conf/log4j.properties
+++ b/conf/log4j.properties
@@ -32,12 +32,9 @@ log4j.logger.org.apache.nutch.crawl.DbUpdaterJob=INFO,cmdstdout
log4j.logger.org.apache.nutch.host.HostDbUpdateJob=INFO,cmdstdout
log4j.logger.org.apache.nutch.fetcher.FetcherJob=INFO,cmdstdout
log4j.logger.org.apache.nutch.parse.ParserJob=INFO,cmdstdout
-log4j.logger.org.apache.nutch.indexer.IndexerJob=INFO,cmdstdout
-log4j.logger.org.apache.nutch.indexer.solr.SolrIndexerJob=INFO,cmdstdout
-log4j.logger.org.apache.nutch.indexer.solr.SolrWriter=INFO,cmdstdout
+log4j.logger.org.apache.nutch.indexer.IndexingJob=INFO,cmdstdout
log4j.logger.org.apache.nutch.indexer.DeleteDuplicates=INFO,cmdstdout
-log4j.logger.org.apache.nutch.indexer.solr.SolrClean=INFO,cmdstdout
-log4j.logger.org.apache.nutch.indexer.IndexCleanerJob=INFO,cmdstdout
+log4j.logger.org.apache.nutch.indexer.CleaningJob=INFO,cmdstdout
log4j.logger.org.apache.nutch.crawl.WebTableReader=INFO,cmdstdout
log4j.logger.org.apache.nutch.host.HostDbReader=INFO,cmdstdout
log4j.logger.org.apache.nutch.parse.ParserChecker=INFO,cmdstdout
diff --git a/conf/nutch-default.xml b/conf/nutch-default.xml
index 9803eb8..39c0930 100644
--- a/conf/nutch-default.xml
+++ b/conf/nutch-default.xml
@@ -859,7 +859,7 @@
plugin.includes
- protocol-http|urlfilter-regex|parse-(html|tika)|index-(basic|anchor)|urlnormalizer-(pass|regex|basic)|scoring-opic
+ protocol-http|urlfilter-regex|parse-(html|tika)|index-(basic|anchor)|indexer-solr|urlnormalizer-(pass|regex|basic)|scoring-opic
Regular expression naming plugin directory names to
include. Any plugin not matching this expression is excluded.
In any case you need at least include the nutch-extensionpoints plugin. By
diff --git a/conf/schema-solr4.xml b/conf/schema-solr4.xml
deleted file mode 100644
index 4d9ba9c..0000000
--- a/conf/schema-solr4.xml
+++ /dev/null
@@ -1,367 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- id
- text
-
-
-
-
-
-
-
-
-
-
-
diff --git a/conf/schema.xml b/conf/schema.xml
index f3de41c..4d9ba9c 100644
--- a/conf/schema.xml
+++ b/conf/schema.xml
@@ -1,124 +1,367 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- id
- content
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id
+ text
+
+
+
+
+
+
+
+
+
+
diff --git a/default.properties b/default.properties
index 843d85f..1782839 100644
--- a/default.properties
+++ b/default.properties
@@ -142,6 +142,11 @@ plugins.index=\
org.apache.nutch.indexer.subcollection*:\
org.apache.nutch.indexer.tld*
+# Indexing Backend Plugins
+#
+plugins.indexer=\
+ org.apache.nutch.indexwriter.solr*
+
#
# Misc. Plugins
#
diff --git a/ivy/ivy.xml b/ivy/ivy.xml
index c722702..4e908d0 100644
--- a/ivy/ivy.xml
+++ b/ivy/ivy.xml
@@ -35,7 +35,7 @@
-
@@ -44,8 +44,8 @@
conf="*->default" />
-
+
@@ -111,9 +111,9 @@
-->
-
+
+
+
+
+
+
diff --git a/src/plugin/indexer-solr/ivy.xml b/src/plugin/indexer-solr/ivy.xml
new file mode 100644
index 0000000..259e7c3
--- /dev/null
+++ b/src/plugin/indexer-solr/ivy.xml
@@ -0,0 +1,43 @@
+
+
+
+
+
+
+
+
+
+ Apache Nutch
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/plugin/indexer-solr/plugin.xml b/src/plugin/indexer-solr/plugin.xml
new file mode 100644
index 0000000..b3a263e
--- /dev/null
+++ b/src/plugin/indexer-solr/plugin.xml
@@ -0,0 +1,54 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrConstants.java b/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrConstants.java
new file mode 100644
index 0000000..28d3c6c
--- /dev/null
+++ b/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrConstants.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.indexwriter.solr;
+
+public interface SolrConstants {
+ public static final String SOLR_PREFIX = "solr.";
+
+ public static final String SERVER_URL = SOLR_PREFIX + "server.url";
+
+ public static final String COMMIT_SIZE = SOLR_PREFIX + "commit.size";
+
+ public static final String COMMIT_INDEX = SOLR_PREFIX + "commit.index";
+
+ public static final String MAPPING_FILE = SOLR_PREFIX + "mapping.file";
+
+ public static final String USE_AUTH = SOLR_PREFIX + "auth";
+
+ public static final String USERNAME = SOLR_PREFIX + "auth.username";
+
+ public static final String PASSWORD = SOLR_PREFIX + "auth.password";
+
+ public static final String ID_FIELD = "id";
+
+ public static final String URL_FIELD = "url";
+
+ public static final String BOOST_FIELD = "boost";
+
+ public static final String TIMESTAMP_FIELD = "tstamp";
+
+ public static final String DIGEST_FIELD = "digest";
+
+}
diff --git a/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrIndexWriter.java b/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrIndexWriter.java
new file mode 100644
index 0000000..d09203e
--- /dev/null
+++ b/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrIndexWriter.java
@@ -0,0 +1,179 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.indexwriter.solr;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map.Entry;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.indexer.NutchDocument;
+import org.apache.nutch.indexer.IndexWriter;
+import org.apache.solr.client.solrj.SolrServer;
+import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.client.solrj.impl.HttpSolrServer;
+import org.apache.solr.common.SolrInputDocument;
+
+public class SolrIndexWriter implements IndexWriter {
+
+ public static final Logger LOG = LoggerFactory
+ .getLogger(SolrIndexWriter.class);
+
+ private HttpSolrServer solr;
+ private SolrMappingReader solrMapping;
+
+ private Configuration config;
+
+ private final List inputDocs = new ArrayList();
+
+ private int batchSize;
+ private int numDeletes = 0;
+ private boolean delete = false;
+
+ protected static long documentCount = 0;
+
+ @Override
+ public void open(Configuration conf) throws IOException {
+ solr = SolrUtils.getHttpSolrServer(conf);
+ batchSize = conf.getInt(SolrConstants.COMMIT_SIZE, 1000);
+ solrMapping = SolrMappingReader.getInstance(conf);
+ }
+
+ @Override
+ public void write(NutchDocument doc) throws IOException {
+ final SolrInputDocument inputDoc = new SolrInputDocument();
+ for (final Entry> e : doc) {
+ for (final String val : e.getValue()) {
+
+ Object val2 = val;
+ if (e.getKey().equals("content") || e.getKey().equals("title")) {
+ val2 = SolrUtils.stripNonCharCodepoints(val);
+ }
+
+ inputDoc.addField(solrMapping.mapKey(e.getKey()), val2);
+ String sCopy = solrMapping.mapCopyKey(e.getKey());
+ if (sCopy != e.getKey()) {
+ inputDoc.addField(sCopy, val2);
+ }
+ }
+ }
+ inputDoc.setDocumentBoost(doc.getScore());
+ inputDocs.add(inputDoc);
+ documentCount++;
+ if (inputDocs.size() >= batchSize) {
+ try {
+ LOG.info("Adding " + Integer.toString(inputDocs.size()) + " documents");
+ solr.add(inputDocs);
+ } catch (final SolrServerException e) {
+ throw new IOException(e);
+ }
+ inputDocs.clear();
+ }
+ }
+
+ @Override
+ public void close() throws IOException {
+ try {
+ if (!inputDocs.isEmpty()) {
+ LOG.info("Adding " + Integer.toString(inputDocs.size()) + " documents");
+ solr.add(inputDocs);
+ inputDocs.clear();
+ } else if (numDeletes > 0) {
+ LOG.info("Deleted " + Integer.toString(numDeletes) + " documents");
+ }
+ } catch (final SolrServerException e) {
+ throw new IOException(e);
+ }
+ }
+
+ @Override
+ public Configuration getConf() {
+ return config;
+ }
+
+ @Override
+ public void setConf(Configuration conf) {
+ config = conf;
+ String serverURL = conf.get(SolrConstants.SERVER_URL);
+ if (serverURL == null) {
+ String message = "Missing SOLR URL. Should be set via -D "
+ + SolrConstants.SERVER_URL;
+ message += "\n" + describe();
+ LOG.error(message);
+ throw new RuntimeException(message);
+ }
+ }
+
+ @Override
+ public void delete(String key) throws IOException {
+ if (delete) {
+ try {
+ solr.deleteById(key);
+ numDeletes++;
+ } catch (final SolrServerException e) {
+ throw makeIOException(e);
+ }
+ }
+ }
+
+ @Override
+ public void update(NutchDocument doc) throws IOException {
+ write(doc);
+ }
+
+ @Override
+ public void commit() throws IOException {
+ try {
+ solr.commit();
+ LOG.info("Total " + documentCount + (documentCount > 1 ? " documents are " : " document is ") + "added.");
+ } catch (SolrServerException e) {
+ throw makeIOException(e);
+ }
+ }
+
+ public static IOException makeIOException(SolrServerException e) {
+ final IOException ioe = new IOException();
+ ioe.initCause(e);
+ return ioe;
+ }
+
+ @Override
+ public String describe() {
+ StringBuffer sb = new StringBuffer("SOLRIndexWriter\n");
+ sb.append("\t").append(SolrConstants.SERVER_URL)
+ .append(" : URL of the SOLR instance (mandatory)\n");
+ sb.append("\t").append(SolrConstants.COMMIT_SIZE)
+ .append(" : buffer size when sending to SOLR (default 1000)\n");
+ sb.append("\t")
+ .append(SolrConstants.MAPPING_FILE)
+ .append(
+ " : name of the mapping file for fields (default solrindex-mapping.xml)\n");
+ sb.append("\t").append(SolrConstants.USE_AUTH)
+ .append(" : use authentication (default false)\n");
+ sb.append("\t").append(SolrConstants.USERNAME)
+ .append(" : use authentication (default false)\n");
+ sb.append("\t").append(SolrConstants.USE_AUTH)
+ .append(" : username for authentication\n");
+ sb.append("\t").append(SolrConstants.PASSWORD)
+ .append(" : password for authentication\n");
+ return sb.toString();
+ }
+
+}
diff --git a/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrMappingReader.java b/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrMappingReader.java
new file mode 100644
index 0000000..139011a
--- /dev/null
+++ b/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrMappingReader.java
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.indexwriter.solr;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.MalformedURLException;
+import java.util.HashMap;
+import java.util.Map;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.util.ObjectCache;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.NodeList;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+
+public class SolrMappingReader {
+ public static Logger LOG = LoggerFactory.getLogger(SolrMappingReader.class);
+
+ private Configuration conf;
+
+ private Map keyMap = new HashMap();
+ private Map copyMap = new HashMap();
+ private String uniqueKey = "id";
+
+ public static synchronized SolrMappingReader getInstance(Configuration conf) {
+ ObjectCache cache = ObjectCache.get(conf);
+ SolrMappingReader instance = (SolrMappingReader)cache.getObject(SolrMappingReader.class.getName());
+ if (instance == null) {
+ instance = new SolrMappingReader(conf);
+ cache.setObject(SolrMappingReader.class.getName(), instance);
+ }
+ return instance;
+ }
+
+ protected SolrMappingReader(Configuration conf) {
+ this.conf = conf;
+ parseMapping();
+ }
+
+ private void parseMapping() {
+ InputStream ssInputStream = null;
+ ssInputStream = conf.getConfResourceAsInputStream(conf.get(SolrConstants.MAPPING_FILE, "solrindex-mapping.xml"));
+
+ InputSource inputSource = new InputSource(ssInputStream);
+ try {
+ DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
+ DocumentBuilder builder = factory.newDocumentBuilder();
+ Document document = builder.parse(inputSource);
+ Element rootElement = document.getDocumentElement();
+ NodeList fieldList = rootElement.getElementsByTagName("field");
+ if (fieldList.getLength() > 0) {
+ for (int i = 0; i < fieldList.getLength(); i++) {
+ Element element = (Element) fieldList.item(i);
+ LOG.info("source: " + element.getAttribute("source") + " dest: " + element.getAttribute("dest"));
+ keyMap.put(element.getAttribute("source"), element.getAttribute("dest"));
+ }
+ }
+ NodeList copyFieldList = rootElement.getElementsByTagName("copyField");
+ if (copyFieldList.getLength() > 0) {
+ for (int i = 0; i < copyFieldList.getLength(); i++) {
+ Element element = (Element) copyFieldList.item(i);
+ LOG.info("source: " + element.getAttribute("source") + " dest: " + element.getAttribute("dest"));
+ copyMap.put(element.getAttribute("source"), element.getAttribute("dest"));
+ }
+ }
+ NodeList uniqueKeyItem = rootElement.getElementsByTagName("uniqueKey");
+ if (uniqueKeyItem.getLength() > 1) {
+ LOG.warn("More than one unique key definitions found in solr index mapping, using default 'id'");
+ uniqueKey = "id";
+ }
+ else if (uniqueKeyItem.getLength() == 0) {
+ LOG.warn("No unique key definition found in solr index mapping using, default 'id'");
+ }
+ else{
+ uniqueKey = uniqueKeyItem.item(0).getFirstChild().getNodeValue();
+ }
+ } catch (MalformedURLException e) {
+ LOG.warn(e.toString());
+ } catch (SAXException e) {
+ LOG.warn(e.toString());
+ } catch (IOException e) {
+ LOG.warn(e.toString());
+ } catch (ParserConfigurationException e) {
+ LOG.warn(e.toString());
+ }
+ }
+
+ public Map getKeyMap() {
+ return keyMap;
+ }
+
+ public Map getCopyMap() {
+ return copyMap;
+ }
+
+ public String getUniqueKey() {
+ return uniqueKey;
+ }
+
+ public String hasCopy(String key) {
+ if (copyMap.containsKey(key)) {
+ key = copyMap.get(key);
+ }
+ return key;
+ }
+
+ public String mapKey(String key) throws IOException {
+ if(keyMap.containsKey(key)) {
+ key = keyMap.get(key);
+ }
+ return key;
+ }
+
+ public String mapCopyKey(String key) throws IOException {
+ if(copyMap.containsKey(key)) {
+ key = copyMap.get(key);
+ }
+ return key;
+ }
+}
diff --git a/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrUtils.java b/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrUtils.java
new file mode 100644
index 0000000..a276d47
--- /dev/null
+++ b/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrUtils.java
@@ -0,0 +1,62 @@
+package org.apache.nutch.indexwriter.solr;
+
+
+import org.apache.http.impl.client.DefaultHttpClient;
+import org.apache.http.auth.AuthScope;
+import org.apache.http.auth.UsernamePasswordCredentials;
+import org.apache.http.client.params.HttpClientParams;
+import org.apache.http.params.HttpParams;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.solr.client.solrj.impl.HttpSolrServer;
+
+import java.net.MalformedURLException;
+
+public class SolrUtils {
+
+ public static Logger LOG = LoggerFactory.getLogger(SolrUtils.class);
+
+ public static HttpSolrServer getHttpSolrServer(Configuration job) throws MalformedURLException {
+ DefaultHttpClient client = new DefaultHttpClient();
+
+ // Check for username/password
+ if (job.getBoolean(SolrConstants.USE_AUTH, false)) {
+ String username = job.get(SolrConstants.USERNAME);
+
+ LOG.info("Authenticating as: " + username);
+
+ AuthScope scope = new AuthScope(AuthScope.ANY_HOST, AuthScope.ANY_PORT, AuthScope.ANY_REALM, AuthScope.ANY_SCHEME);
+
+ client.getCredentialsProvider().setCredentials(scope, new UsernamePasswordCredentials(username, job.get(SolrConstants.PASSWORD)));
+
+ HttpParams params = client.getParams();
+ HttpClientParams.setAuthenticating(params, true);
+
+ client.setParams(params);
+ }
+
+ return new HttpSolrServer(job.get(SolrConstants.SERVER_URL), client);
+ }
+
+ public static String stripNonCharCodepoints(String input) {
+ StringBuilder retval = new StringBuilder();
+ char ch;
+
+ for (int i = 0; i < input.length(); i++) {
+ ch = input.charAt(i);
+
+ // Strip all non-characters http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:Noncharacter_Code_Point=True:]
+ // and non-printable control characters except tabulator, new line and carriage return
+ if (ch % 0x10000 != 0xffff && // 0xffff - 0x10ffff range step 0x10000
+ ch % 0x10000 != 0xfffe && // 0xfffe - 0x10fffe range
+ (ch <= 0xfdd0 || ch >= 0xfdef) && // 0xfdd0 - 0xfdef
+ (ch > 0x1F || ch == 0x9 || ch == 0xa || ch == 0xd)) {
+
+ retval.append(ch);
+ }
+ }
+
+ return retval.toString();
+ }
+}
\ No newline at end of file
diff --git a/src/plugin/nutch-extensionpoints/plugin.xml b/src/plugin/nutch-extensionpoints/plugin.xml
index 43fe045..d567f82 100644
--- a/src/plugin/nutch-extensionpoints/plugin.xml
+++ b/src/plugin/nutch-extensionpoints/plugin.xml
@@ -56,4 +56,7 @@
id="org.apache.nutch.scoring.ScoringFilter"
name="Nutch Scoring"/>
+