Index: conf/nutch-default.xml
===================================================================
--- conf/nutch-default.xml (revision 825432)
+++ conf/nutch-default.xml (working copy)
@@ -1277,4 +1277,13 @@
+
+
+ solrindex.schema.file
+ solrindex-schema.xml
+
+ Default file for mapping default nutch field to solr schema fields
+
+
+
Index: src/java/org/apache/nutch/indexer/solr/SolrSchemaReader.java
===================================================================
--- src/java/org/apache/nutch/indexer/solr/SolrSchemaReader.java (revision 0)
+++ src/java/org/apache/nutch/indexer/solr/SolrSchemaReader.java (revision 0)
@@ -0,0 +1,64 @@
+package org.apache.nutch.indexer.solr;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.MalformedURLException;
+import java.util.HashMap;
+import java.util.Map;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.util.NutchConfiguration;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.NodeList;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+
+public class SolrSchemaReader {
+
+ /** our log stream */
+ public static Log LOG = LogFactory.getLog(SolrSchemaReader.class);
+
+ /** The property name of the parse-plugins location */
+ private static final String SS_FILE_SCHEMA = "solrindex.schema.file";
+ private static Configuration conf = NutchConfiguration.create();
+
+ public static Map parseSolrindexSchema() {
+ Map map = new HashMap();
+
+ InputStream ssInputStream = null;
+ ssInputStream = conf.getConfResourceAsInputStream(conf.get(SS_FILE_SCHEMA));
+ InputSource inputSource = new InputSource(ssInputStream);
+ try {
+ DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
+ DocumentBuilder builder = factory.newDocumentBuilder();
+ Document document = builder.parse(inputSource);
+ Element rootElement = document.getDocumentElement();
+ NodeList nodelist = rootElement.getElementsByTagName("field");
+ if (nodelist.getLength() > 0) {
+ for (int i = 0; i < nodelist.getLength(); i++) {
+ Element element = (Element) nodelist.item(i);
+ LOG.debug("source: " + element.getAttribute("sourceColName") + " dest: " + element.getAttribute("column"));
+ map.put(element.getAttribute("sourceColName"), element.getAttribute("column"));
+ }
+ }
+ } catch (MalformedURLException e) {
+ LOG.warn(e.toString());
+ } catch (SAXException e) {
+ LOG.warn(e.toString());
+ } catch (IOException e) {
+ LOG.warn(e.toString());
+ } catch (ParserConfigurationException e) {
+ LOG.warn(e.toString());
+ }
+ return map;
+ }
+
+
+}
Index: src/java/org/apache/nutch/indexer/solr/SolrWriter.java
===================================================================
--- src/java/org/apache/nutch/indexer/solr/SolrWriter.java (revision 825432)
+++ src/java/org/apache/nutch/indexer/solr/SolrWriter.java (working copy)
@@ -19,6 +19,7 @@
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
+import java.util.Map;
import java.util.Map.Entry;
import org.apache.hadoop.mapred.JobConf;
@@ -32,6 +33,7 @@
public class SolrWriter implements NutchIndexWriter {
private SolrServer solr;
+ public static Map map = SolrSchemaReader.parseSolrindexSchema();
private final List inputDocs =
new ArrayList();
@@ -48,7 +50,7 @@
final SolrInputDocument inputDoc = new SolrInputDocument();
for(final Entry> e : doc) {
for (final String val : e.getValue()) {
- inputDoc.addField(e.getKey(), val);
+ inputDoc.addField(schemaTranslate(e.getKey()), val);
}
}
inputDoc.setDocumentBoost(doc.getScore());
@@ -81,4 +83,10 @@
return ioe;
}
+ public static String schemaTranslate(String key) throws IOException {
+ if(map.containsKey(key)){
+ key = (String) map.get(key);
+ }
+ return key;
+ }
}