Index: docker/cassandra/README.md
===================================================================
--- docker/cassandra/README.md	(revision 1704713)
+++ docker/cassandra/README.md	(working copy)
@@ -1,13 +1,11 @@
-#Apache Nutch 2.x with Cassandra on Docker
+Apache Nutch 2.x with Cassandra on Docker
 =======================
 
-This project is 3 Docker containers running Apache Nutch 2.x configured with Cassandra storage.
+This project contains 3 Docker containers running Apache Nutch 2.x configured with [Apache Cassandra](http://cassandra.apache.org) storage.
 
-Due to the lack of integration information between Nutch 2.x / Cassandra, Mohamed Meabed (@Meabed) developed these docker containers with configuration and integration between them.
-
 This is project is fully operational but its still experimental, any feedback, suggestions should be directed to dev@nutch.apache.org and contribution(s) will be highly appreciated! 
 
-##Usage notes:
+#Usage
 
 1. Build the images and start the containers " NOTE: for Mac OS running boot2docker, Please read the Notes section Below ". 
 
Index: docker/cassandra/bin/build.sh
===================================================================
--- docker/cassandra/bin/build.sh	(revision 1704713)
+++ docker/cassandra/bin/build.sh	(working copy)
@@ -1,8 +1,23 @@
 #!/bin/sh
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 
 B_DIR="`pwd`/"
 docker pull meabed/debian-jdk
 
 #
-docker build -t "meabed/nutch:2.3" $B_DIR/nutch/
-docker build -t "meabed/cassandra" $B_DIR/cassandra/
+docker build -t "apache/nutch:2.x" $B_DIR/nutch/
+docker build -t "apache/cassandra" $B_DIR/cassandra/
Index: docker/cassandra/bin/ipof.sh
===================================================================
--- docker/cassandra/bin/ipof.sh	(revision 1704713)
+++ docker/cassandra/bin/ipof.sh	(working copy)
@@ -1,4 +1,19 @@
 #!/bin/sh
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 
 CONTAINER=$1
 docker inspect --format '{{ .NetworkSettings.IPAddress }}' $CONTAINER
Index: docker/cassandra/bin/nodes.sh
===================================================================
--- docker/cassandra/bin/nodes.sh	(revision 1704713)
+++ docker/cassandra/bin/nodes.sh	(working copy)
@@ -1,4 +1,19 @@
 #!/bin/sh
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 
 function isRunning {
     id=$(docker ps -a | grep $1 | awk '{print $1}')
Index: docker/cassandra/bin/restart.sh
===================================================================
--- docker/cassandra/bin/restart.sh	(revision 1704713)
+++ docker/cassandra/bin/restart.sh	(working copy)
@@ -1,4 +1,19 @@
 #!/bin/sh
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 
 B_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
 
Index: docker/cassandra/bin/start.sh
===================================================================
--- docker/cassandra/bin/start.sh	(revision 1704713)
+++ docker/cassandra/bin/start.sh	(working copy)
@@ -1,4 +1,19 @@
 #!/bin/sh
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 
 B_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
 DOCKER_DATA_FOLDER=$B_DIR/docker-data
@@ -8,7 +23,7 @@
 source "$B_DIR/nodes.sh"
 source "$B_DIR/stop.sh"
 
-cassandraId=$(docker run -d -P -v $DOCKER_DATA_FOLDER:/data:rw --name $cassandraNodeName meabed/cassandra)
+cassandraId=$(docker run -d -P -v $DOCKER_DATA_FOLDER:/data:rw --name $cassandraNodeName apache/cassandra)
 cassandraIP=$("$B_DIR"/ipof.sh $cassandraId)
 
 # -p 9200:9200
@@ -15,4 +30,5 @@
 # http://dockerhost:9200/_plugin/kopf/
 # http://dockerhost:9200/_plugin/HQ/
 
-docker run -d -p 8899:8899 -P -e CASSANDRA_NODE_NAME=$cassandraNodeName -it --link $cassandraNodeName:$cassandraNodeName -v $DOCKER_DATA_FOLDER:/data:rw --name $nutchNodeName meabed/nutch:2.3
+docker run -d -p 8899:8899 -P -e CASSANDRA_NODE_NAME=$cassandraNodeName -it --link $cassandraNodeName:$cassandraNodeName -v $DOCKER_DATA_FOLDER:/data:rw --name $nutchNodeName apache/nutch:2.x
+# apache/nutch2cassandra
Index: docker/cassandra/bin/stop.sh
===================================================================
--- docker/cassandra/bin/stop.sh	(revision 1704713)
+++ docker/cassandra/bin/stop.sh	(working copy)
@@ -1,4 +1,19 @@
 #!/bin/sh
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 
 B_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
 source "$B_DIR/nodes.sh"
Index: docker/cassandra/cassandra/Dockerfile
===================================================================
--- docker/cassandra/cassandra/Dockerfile	(revision 1704713)
+++ docker/cassandra/cassandra/Dockerfile	(working copy)
@@ -1,7 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
 #
-# Cassandra
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 # meabed/debian-jdk
-# docker build -t meabed/cassandra:latest .
+# docker build -t apache/cassandra:latest .
 #
 # sudo sysctl -w vm.max_map_count=2621444
 # sudo su
@@ -13,7 +26,7 @@
 # ulimit -c unlimited
 
 FROM meabed/debian-jdk
-MAINTAINER Mohamed Meabed "mo.meabed@gmail.com"
+MAINTAINER Nutch Developers "dev@nutch.apache.org"
 
 USER root
 ENV DEBIAN_FRONTEND noninteractive
@@ -20,7 +33,7 @@
 
 
 # ADD DataStax sources
-RUN echo "deb http://debian.datastax.com/community stable main" | tee -a /etc/apt/sources.list.d/cassandra.sources.list
+RUN echo "deb http://debian.datastax.com/community 2.1 main" | tee -a /etc/apt/sources.list.d/cassandra.sources.list
 RUN curl -L http://debian.datastax.com/debian/repo_key | apt-key add -
 
 RUN apt-get update
Index: docker/cassandra/cassandra/bootstrap.sh
===================================================================
--- docker/cassandra/cassandra/bootstrap.sh	(revision 1704713)
+++ docker/cassandra/cassandra/bootstrap.sh	(working copy)
@@ -1,4 +1,19 @@
 #!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 
 export PATH=$PATH:/usr/local/sbin/
 export PATH=$PATH:/usr/sbin/
Index: docker/cassandra/nutch/Dockerfile
===================================================================
--- docker/cassandra/nutch/Dockerfile	(revision 1704713)
+++ docker/cassandra/nutch/Dockerfile	(working copy)
@@ -1,30 +1,41 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
 #
-# Nutch
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 # meabed/debian-jdk
-# docker build -t meabed/nutch:latest .
+# docker build -t apache/nutch:2.x .
 #
 
 FROM meabed/debian-jdk
-MAINTAINER Mohamed Meabed "mo.meabed@gmail.com"
+MAINTAINER Nutch Developers "dev@nutch.apache.org"
 
 USER root
 ENV DEBIAN_FRONTEND noninteractive
 
-ENV NUTCH_VERSION 2.3
-
 #ant
-RUN apt-get install -y ant
+RUN apt-get update && apt-get install -y ant subversion --fix-missing
 
 #Download nutch
 
-RUN mkdir -p /opt/downloads && cd /opt/downloads && curl -SsfLO "http://archive.apache.org/dist/nutch/$NUTCH_VERSION/apache-nutch-$NUTCH_VERSION-src.tar.gz"
-RUN cd /opt && tar xvfz /opt/downloads/apache-nutch-$NUTCH_VERSION-src.tar.gz
-#WORKDIR /opt/apache-nutch-$NUTCH_VERSION
-ENV NUTCH_ROOT /opt/apache-nutch-$NUTCH_VERSION
+RUN mkdir -p /opt/downloads && cd /opt/downloads && svn co http://svn.apache.org/repos/asf/nutch/branches/2.x apache-nutch-2.x
+RUN cd /opt 
+RUN ln -s /opt/downloads/apache-nutch-2.x /opt/apache-nutch-2.x 
+ENV NUTCH_ROOT /opt/apache-nutch-2.x
 ENV HOME /root
 
 #Nutch-default
-# RUN sed -i '/^  <name>http.agent.name<\/name>$/{$!{N;s/^  <name>http.agent.name<\/name>\n  <value><\/value>$/  <name>http.agent.name<\/name>\n  <value>iData Bot<\/value>/;ty;P;D;:y}}' $NUTCH_ROOT/conf/nutch-default.xml
+# RUN sed -i '/^  <name>http.agent.name<\/name>$/{$!{N;s/^  <name>http.agent.name<\/name>\n  <value><\/value>$/  <name>http.agent.name<\/name>\n  <value>Nutch 2.X Cassandra Docker<\/value>/;ty;P;D;:y}}' $NUTCH_ROOT/conf/nutch-default.xml
 
 RUN vim -c 'g/name="gora-cassandra"/+1d' -c 'x' $NUTCH_ROOT/ivy/ivy.xml
 RUN vim -c 'g/name="gora-cassandra"/-1d' -c 'x' $NUTCH_ROOT/ivy/ivy.xml
@@ -39,15 +50,13 @@
 
 #Modification and compilation again
 
-ADD plugin/nutch2-index-html/src/plugin/ $NUTCH_ROOT/src/plugin/
-RUN sed  -i '/dir="index-more" target="deploy".*/ s/.*/&\n     <ant dir="index-html" target="deploy"\/>/' $NUTCH_ROOT/src/plugin/build.xml
-RUN sed  -i '/dir="index-more" target="clean".*/ s/.*/&\n     <ant dir="index-html" target="clean"\/>/' $NUTCH_ROOT/src/plugin/build.xml
+#ADD plugin/nutch2-index-html/src/plugin/ $NUTCH_ROOT/src/plugin/
+#RUN sed  -i '/dir="index-more" target="deploy".*/ s/.*/&\n     <ant dir="index-html" target="deploy"\/>/' #$NUTCH_ROOT/src/plugin/build.xml
+#RUN sed  -i '/dir="index-more" target="clean".*/ s/.*/&\n     <ant dir="index-html" target="clean"\/>/' #$NUTCH_ROOT/src/plugin/build.xml
+#RUN cd $NUTCH_ROOT && ant runtime
 
+RUN ln -s /opt/apache-nutch-2.x/runtime/local /opt/nutch
 
-RUN cd $NUTCH_ROOT && ant runtime
-
-RUN ln -s /opt/apache-nutch-$NUTCH_VERSION/runtime/local /opt/nutch
-
 ENV NUTCH_HOME /opt/nutch
 
 # urls folder we will use in crawling $NUTCH_HOME/bin/crawl urls crawlId(test01) elasticsearch_node_name(iData) iteration(1)
@@ -57,7 +66,7 @@
 ADD testUrls $NUTCH_HOME/testUrls
 
 # Adding rawcontent that hold html of the page field in index to elasticsearch
-RUN sed  -i '/field name="date" type.*/ s/.*/&\n\n        <field name="rawcontent" type="text" sstored="true" indexed="true" multiValued="false"\/>\n/' $NUTCH_HOME/conf/schema.xml
+#RUN sed  -i '/field name="date" type.*/ s/.*/&\n\n        <field name="rawcontent" type="text" sstored="true" indexed="true" multiValued="false"\/>\n/' $NUTCH_HOME/conf/schema.xml
 
 # remove nutche-site.xml default file to replace it by our configuration
 RUN rm $NUTCH_HOME/conf/nutch-site.xml
@@ -66,10 +75,6 @@
 # Port that nutchserver will use
 ENV NUTCHSERVER_PORT 8899
 
-#RUN cd $NUTCH_HOME && ls -al
-
-#RUN mkdir -p /opt/nutch/urls && cd /opt/crawl
-
 ADD bootstrap.sh /etc/bootstrap.sh
 RUN chown root:root /etc/bootstrap.sh
 RUN chmod 700 /etc/bootstrap.sh
Index: docker/cassandra/nutch/bootstrap.sh
===================================================================
--- docker/cassandra/nutch/bootstrap.sh	(revision 1704713)
+++ docker/cassandra/nutch/bootstrap.sh	(working copy)
@@ -1,4 +1,19 @@
 #!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 
 export PATH=$PATH:/usr/local/sbin/
 export PATH=$PATH:/usr/sbin/
Index: docker/cassandra/nutch/config/nutch-site.xml
===================================================================
--- docker/cassandra/nutch/config/nutch-site.xml	(revision 1704713)
+++ docker/cassandra/nutch/config/nutch-site.xml	(working copy)
@@ -1,6 +1,22 @@
 <?xml version="1.0"?>
 <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
 
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
 <configuration>
 
     <property>
@@ -20,16 +36,8 @@
         <value>0.0.1</value>
     </property>
     <property>
-        <name>http.agent.url</name>
-        <value>http://www.google.com</value>
-    </property>
-    <property>
-        <name>http.agent.email</name>
-        <value>mo.meabed@gmail.com</value>
-    </property>
-    <property>
         <name>http.content.limit</name>
-        <value>1000000</value>
+        <value>-1</value>
     </property>
     <property>
         <name>storage.data.store.class</name>
@@ -37,35 +45,6 @@
         <description>Default class for storing data</description>
     </property>
     <property>
-        <name>fetcher.server.delay</name>
-        <value>2.0</value>
-        <description>The number of seconds the fetcher will delay between
-            successive requests to the same server.
-        </description>
-    </property>
-    <property>
-        <name>indexer.max.title.length</name>
-        <value>300</value>
-        <description>The maximum number of characters of a title that are indexed. A value of -1 disables this check.
-            Used by index-basic.
-        </description>
-    </property>
-    <property>
-        <name>db.ignore.external.links</name>
-        <value>true</value>
-        <description>If true, outlinks leading from a page to external hosts
-            will be ignored. This is an effective way to limit the crawl to include
-            only initially injected hosts, without creating complex URLFilters.
-        </description>
-    </property>
-    <property>
-        <name>fetcher.parse</name>
-        <value>true</value>
-        <description>If true, fetcher will parse content. NOTE: previous releases would
-            default to true. Since 2.0 this is set to false as a safer default.
-        </description>
-    </property>
-    <property>
         <name>plugin.includes</name>
         <value>protocol-http|urlfilter-regex|parse-(html|tika)|index-(basic|anchor|more|html)|urlnormalizer-(pass|regex|basic)|scoring-opic|protocol-httpclient|language-identifier|indexer-solr</value>
         <description>Regular expression naming plugin directory names to
Index: docker/cassandra/nutch/testUrls/seed.txt
===================================================================
--- docker/cassandra/nutch/testUrls/seed.txt	(revision 1704713)
+++ docker/cassandra/nutch/testUrls/seed.txt	(working copy)
@@ -1 +1,16 @@
-http://www.google.com
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+http://nutch.apache.org
