Index: conf/nutch-default.xml
===================================================================
--- conf/nutch-default.xml (revision 826600)
+++ conf/nutch-default.xml (working copy)
@@ -1277,4 +1277,13 @@
+
+
+ solrindex.schema.file
+ solrindex-schema.xml
+
+ Default file for mapping default nutch field to solr schema fields
+
+
+
Index: conf/solrindex-schema.xml
===================================================================
--- conf/solrindex-schema.xml (revision 0)
+++ conf/solrindex-schema.xml (revision 0)
@@ -0,0 +1,33 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id_url
+
\ No newline at end of file
Index: src/java/org/apache/nutch/searcher/SolrSearchBean.java
===================================================================
--- src/java/org/apache/nutch/searcher/SolrSearchBean.java (revision 826600)
+++ src/java/org/apache/nutch/searcher/SolrSearchBean.java (working copy)
@@ -35,6 +35,7 @@
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.ToStringUtils;
+import org.apache.nutch.indexer.solr.SolrSchemaReader;
import org.apache.nutch.indexer.solr.SolrWriter;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServer;
@@ -52,6 +53,8 @@
private final SolrServer solr;
private final QueryFilters filters;
+
+ private static String searchUID = new SolrSchemaReader().getUniqueKey();
public SolrSearchBean(Configuration conf, String solrServer)
throws IOException {
@@ -72,14 +75,16 @@
final BooleanQuery bQuery = filters.filter(query);
final SolrQuery solrQuery = new SolrQuery(stringify(bQuery));
-
+
+
+
solrQuery.setRows(numHits);
if (sortField == null) {
- solrQuery.setFields(dedupField, "score", "id");
+ solrQuery.setFields(dedupField, "score", searchUID);
sortField = "score";
} else {
- solrQuery.setFields(dedupField, sortField, "id");
+ solrQuery.setFields(dedupField, sortField, searchUID);
solrQuery.setSortField(sortField, reverse ? ORDER.asc : ORDER.desc);
}
@@ -113,7 +118,7 @@
final String dedupValue = (String) solrDoc.getFirstValue(dedupField);
- final String uniqueKey = (String )solrDoc.getFirstValue("id");
+ final String uniqueKey = (String )solrDoc.getFirstValue(searchUID);
hitArr[i] = new Hit(uniqueKey, sortValue, dedupValue);
}
@@ -124,7 +129,7 @@
public HitDetails getDetails(Hit hit) throws IOException {
QueryResponse response;
try {
- response = solr.query(new SolrQuery("id:\"" + hit.getUniqueKey() + "\""));
+ response = solr.query(new SolrQuery(searchUID + ":\"" + hit.getUniqueKey() + "\""));
} catch (final SolrServerException e) {
throw SolrWriter.makeIOException(e);
}
@@ -141,7 +146,7 @@
final StringBuilder buf = new StringBuilder();
buf.append("(");
for (final Hit hit : hits) {
- buf.append(" id:\"");
+ buf.append(" " + searchUID + ":\"");
buf.append(hit.getUniqueKey());
buf.append("\"");
}
@@ -169,7 +174,7 @@
new HashMap(hits.length);
for (final SolrDocument solrDoc : docList) {
final HitDetails details = buildDetails(solrDoc);
- detailsMap.put(details.getValue("id"), details);
+ detailsMap.put(details.getValue(searchUID), details);
}
final HitDetails[] detailsArr = new HitDetails[hits.length];
Index: src/java/org/apache/nutch/searcher/NutchBean.java
===================================================================
--- src/java/org/apache/nutch/searcher/NutchBean.java (revision 826600)
+++ src/java/org/apache/nutch/searcher/NutchBean.java (working copy)
@@ -93,7 +93,6 @@
final Path luceneConfig = new Path(dir, "search-servers.txt");
final Path solrConfig = new Path(dir, "solr-servers.txt");
final Path segmentConfig = new Path(dir, "segment-servers.txt");
-
if (fs.exists(luceneConfig) || fs.exists(solrConfig)) {
searchBean = new DistributedSearchBean(conf, luceneConfig, solrConfig);
} else {
Index: src/java/org/apache/nutch/indexer/solr/SolrSchemaReader.java
===================================================================
--- src/java/org/apache/nutch/indexer/solr/SolrSchemaReader.java (revision 0)
+++ src/java/org/apache/nutch/indexer/solr/SolrSchemaReader.java (revision 0)
@@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.indexer.solr;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.MalformedURLException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.util.NutchConfiguration;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.NodeList;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+
+public class SolrSchemaReader {
+
+ /** our log stream */
+ public static Log LOG = LogFactory.getLog(SolrSchemaReader.class);
+
+ /** The property name of the parse solr index schema location */
+ private static final String SS_FILE_SCHEMA = "solrindex.schema.file";
+ private static Configuration conf = NutchConfiguration.create();
+
+ private static Map keyMap = new HashMap();
+ private static Map copyMap = new HashMap();
+ private static String uniqueKey = "id";
+
+ public SolrSchemaReader(){
+ parseSolrindexSchema();
+ }
+
+ public static void parseSolrindexSchema() {
+
+ InputStream ssInputStream = null;
+ ssInputStream = conf.getConfResourceAsInputStream(conf.get(SS_FILE_SCHEMA));
+ InputSource inputSource = new InputSource(ssInputStream);
+ try {
+ DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
+ DocumentBuilder builder = factory.newDocumentBuilder();
+ Document document = builder.parse(inputSource);
+ Element rootElement = document.getDocumentElement();
+ NodeList fieldList = rootElement.getElementsByTagName("field");
+ if (fieldList.getLength() > 0) {
+ for (int i = 0; i < fieldList.getLength(); i++) {
+ Element element = (Element) fieldList.item(i);
+ LOG.info("source: " + element.getAttribute("sourceColName") + " dest: " + element.getAttribute("column"));
+ keyMap.put(element.getAttribute("sourceColName"), element.getAttribute("column"));
+ }
+ }
+ NodeList copyFieldList = rootElement.getElementsByTagName("copyField");
+ if (copyFieldList.getLength() > 0) {
+ for (int i = 0; i < copyFieldList.getLength(); i++) {
+ Element element = (Element) copyFieldList.item(i);
+ LOG.info("source: " + element.getAttribute("source") + " dest: " + element.getAttribute("dest"));
+ copyMap.put(element.getAttribute("source"), element.getAttribute("dest"));
+ }
+ }
+ NodeList uniqueKeyItem = rootElement.getElementsByTagName("uniqueKey");
+ if (uniqueKeyItem.getLength() > 1) {
+ LOG.warn("More than one unique key definitions found in solr index schema using default 'id'");
+ uniqueKey = "id";
+ throw new ParserConfigurationException("You can only have one unique key in your solr index schema more found");
+ }
+ else if (uniqueKeyItem.getLength() == 0) {
+ LOG.warn("No unique key definition found in solr index schema using default 'id'");
+ }
+ else{
+ uniqueKey = uniqueKeyItem.item(0).getFirstChild().getNodeValue();
+ }
+ } catch (MalformedURLException e) {
+ LOG.warn(e.toString());
+ } catch (SAXException e) {
+ LOG.warn(e.toString());
+ } catch (IOException e) {
+ LOG.warn(e.toString());
+ } catch (ParserConfigurationException e) {
+ LOG.warn(e.toString());
+ }
+ }
+
+ public Map getKeyMap(){
+ return keyMap;
+ }
+
+ public Map getCopyMap(){
+ return copyMap;
+ }
+
+ public String getUniqueKey(){
+ return uniqueKey;
+ }
+
+ public String hasCopy(String key){
+ if(copyMap.containsKey(key)){
+ key = (String) copyMap.get(key);
+ }
+ return key;
+ }
+
+ public String schemaTranslateKey(String key) throws IOException {
+ if(keyMap.containsKey(key)){
+ key = (String) keyMap.get(key);
+ }
+ return key;
+ }
+
+ public String schemaTranslateCopyMap(String key) throws IOException {
+ if(copyMap.containsKey(key)){
+ key = (String) copyMap.get(key);
+ }
+ return key;
+ }
+}
Index: src/java/org/apache/nutch/indexer/solr/SolrWriter.java
===================================================================
--- src/java/org/apache/nutch/indexer/solr/SolrWriter.java (revision 826600)
+++ src/java/org/apache/nutch/indexer/solr/SolrWriter.java (working copy)
@@ -19,6 +19,7 @@
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
+import java.util.Map;
import java.util.Map.Entry;
import org.apache.hadoop.mapred.JobConf;
@@ -32,6 +33,7 @@
public class SolrWriter implements NutchIndexWriter {
private SolrServer solr;
+ public static SolrSchemaReader solrSchema = new SolrSchemaReader();
private final List inputDocs =
new ArrayList();
@@ -48,7 +50,11 @@
final SolrInputDocument inputDoc = new SolrInputDocument();
for(final Entry> e : doc) {
for (final String val : e.getValue()) {
- inputDoc.addField(e.getKey(), val);
+ inputDoc.addField(solrSchema.schemaTranslateKey(e.getKey()), val);
+ String sCopy=solrSchema.schemaTranslateCopyMap(e.getKey());
+ if(sCopy!=e.getKey()){
+ inputDoc.addField(sCopy, val);
+ }
}
}
inputDoc.setDocumentBoost(doc.getScore());