cdh docker environment

2026-05-24 09:38:26 +00:00 · 2016-09-22 16:55:23 +09:00 · 2016-09-22 16:55:23 +09:00 · a7b5b2d9c8
commit a7b5b2d9c8
parent f2a5c594dc
8 changed files with 179 additions and 0 deletions
--- a/docs/_includes/themes/zeppelin/_navigation.html
+++ b/docs/_includes/themes/zeppelin/_navigation.html
@ -107,6 +107,7 @@
                <li><a href="{{BASE_PATH}}/install/spark_cluster_mode.html#spark-standalone-mode">Zeppelin on Spark Cluster Mode (Standalone)</a></li>
                <li><a href="{{BASE_PATH}}/install/spark_cluster_mode.html#spark-on-yarn-mode">Zeppelin on Spark Cluster Mode (YARN)</a></li>
                <li><a href="{{BASE_PATH}}/install/spark_cluster_mode.html#spark-on-mesos-mode">Zeppelin on Spark Cluster Mode (Mesos)</a></li>
+                <li><a href="{{BASE_PATH}}/install/spark_cluster_mode.html#zeppelin-on-cdh">Zeppelin on CDH</a></li>
                <li role="separator" class="divider"></li>
                <li class="title"><span><b>Contibute</b><span></li>
                <li><a href="{{BASE_PATH}}/development/writingzeppelininterpreter.html">Writing Zeppelin Interpreter</a></li>
--- a/docs/assets/themes/zeppelin/img/docs-img/cdh_yarn_applications.png
+++ b/docs/assets/themes/zeppelin/img/docs-img/cdh_yarn_applications.png
--- a/docs/index.md
+++ b/docs/index.md
@ -172,6 +172,7 @@ Join to our [Mailing list](https://zeppelin.apache.org/community.html) and repor
  * [Zeppelin on Spark Cluster Mode (Standalone via Docker)](./install/spark_cluster_mode.html#spark-standalone-mode)
  * [Zeppelin on Spark Cluster Mode (YARN via Docker)](./install/spark_cluster_mode.html#spark-on-yarn-mode)
  * [Zeppelin on Spark Cluster Mode (Mesos via Docker)](./install/spark_cluster_mode.html#spark-on-mesos-mode)
+  * [Zeppelin on CDH (via Docker)](./install/spark_cluster_mode.html#zeppelin-on-cdh)
 * Contribute
  * [Writing Zeppelin Interpreter](./development/writingzeppelininterpreter.html)
  * [Writing Zeppelin Application (Experimental)](./development/writingzeppelinapplication.html)
--- a/docs/install/spark_cluster_mode.md
+++ b/docs/install/spark_cluster_mode.md
@ -202,3 +202,78 @@ After running a single paragraph with Spark interpreter in Zeppelin, browse `htt

 <img src="../assets/themes/zeppelin/img/docs-img/mesos_frameworks.png" />

+
+## Zeppelin on CDH
+Cloudera officially provide docker container [here](https://www.cloudera.com/documentation/enterprise/5-6-x/topics/quickstart_docker_container.html)
+So we can easily build CDH docker environment following the [link](https://www.cloudera.com/documentation/enterprise/5-6-x/topics/quickstart_docker_container.html).
+
+
+### 1. Run docker
+
+```
+docker run -it \
+ -p 80:80 \
+ -p 4040:4040 \
+ -p 8020:8020 \
+ -p 8022:8022 \
+ -p 8030:8030 \
+ -p 8032:8032 \
+ -p 8033:8033 \
+ -p 8040:8040 \
+ -p 8042:8042 \
+ -p 8088:8088 \
+ -p 8480:8480 \
+ -p 8485:8485 \
+ -p 8888:8888 \
+ -p 9083:9083 \
+ -p 10020:10020 \
+ -p 10033:10033 \
+ -p 18088:18088 \
+ -p 19888:19888 \
+ -p 25000:25000 \
+ -p 25010:25010 \
+ -p 25020:25020 \
+ -p 50010:50010 \
+ -p 50020:50020 \
+ -p 50070:50070 \
+ -p 50075:50075 \
+ -h quickstart.cloudera --privileged=true \
+ agitated_payne_backup /usr/bin/docker-quickstart;
+```
+
+### 2. Verify running CDH.
+
+You can check each application web UI on following URLs.
+```
+# HADOOP
+http://localhost:50070/dfshealth.html#tab-overview
+
+# HIVE
+http://localhost:10002/hiveserver2.jsp
+
+# YARN Application
+http://localhost:8088/cluster
+
+# Spark history server
+http://localhost:18088
+```
+
+### 3. Configure Spark interpreter in Zeppelin
+Set following configurations to `conf/zeppelin-env.sh`.
+
+```
+export MASTER=yarn-client
+export HADOOP_CONF_DIR=[your_hadoop_conf_path]
+export SPARK_HOME=[your_spark_home_path]
+```
+
+`HADOOP_CONF_DIR`(Hadoop configuration path) is defined in `/scripts/docker/spark-cluster-managers/cdh/hdfs_conf`.
+
+Don't forget to set Spark `master` as `yarn-client` in Zeppelin **Interpreters** setting page like below.
+
+<img src="../assets/themes/zeppelin/img/docs-img/zeppelin_yarn_conf.png" />
+
+### 4. Run Zeppelin with Spark interpreter
+After running a single paragraph with Spark interpreter in Zeppelin, browse `http://<hostname>:8088/cluster/apps` and check Zeppelin application is running well or not.
+
+<img src="../assets/themes/zeppelin/img/docs-img/cdh_yarn_applications.png" />
--- a/scripts/docker/spark-cluster-managers/cdh/core-site.xml
+++ b/scripts/docker/spark-cluster-managers/cdh/core-site.xml
@ -0,0 +1,6 @@
+<configuration>
+  <property>
+    <name>fs.defaultFS</name>
+    <value>hdfs://0.0.0.0:8020</value>
+  </property>
+</configuration>
--- a/scripts/docker/spark-cluster-managers/cdh/hdfs-site.xml
+++ b/scripts/docker/spark-cluster-managers/cdh/hdfs-site.xml
@ -0,0 +1,64 @@
+<configuration>
+  <property>
+    <name>dfs.replication</name>
+    <value>1</value>
+  </property>
+
+
+  <property>
+    <name>dfs.data.dir</name>
+    <value>/data/hdfs</value>
+    <final>true</final>
+  </property>
+
+  <property>
+    <name>dfs.permissions</name>
+    <value>false</value>
+  </property>
+
+
+  <property>
+    <name>dfs.client.use.datanode.hostname</name>
+    <value>true</value>
+    <description>Whether clients should use datanode hostnames when
+      connecting to datanodes.
+    </description>
+  </property>
+
+  <property>
+    <name>dfs.datanode.use.datanode.hostname</name>
+    <value>true</value>
+    <description>Whether datanodes should use datanode hostnames when
+      connecting to other datanodes for data transfer.
+    </description>
+  </property>
+
+  <property>
+    <name>dfs.datanode.address</name>
+    <value>0.0.0.0:50010</value>
+    <description>
+      The address where the datanode server will listen to.
+      If the port is 0 then the server will start on a free port.
+    </description>
+  </property>
+
+  <property>
+    <name>dfs.datanode.http.address</name>
+    <value>0.0.0.0:50075</value>
+    <description>
+      The datanode http server address and port.
+      If the port is 0 then the server will start on a free port.
+    </description>
+  </property>
+
+  <property>
+    <name>dfs.datanode.ipc.address</name>
+    <value>0.0.0.0:50020</value>
+    <description>
+      The datanode ipc server address and port.
+      If the port is 0 then the server will start on a free port.
+    </description>
+  </property>
+
+</configuration>
+
--- a/scripts/docker/spark-cluster-managers/cdh/mapred-site.xml
+++ b/scripts/docker/spark-cluster-managers/cdh/mapred-site.xml
@ -0,0 +1,6 @@
+<configuration>
+  <property>
+    <name>mapreduce.framework.name</name>
+    <value>yarn</value>
+  </property>
+</configuration>
--- a/scripts/docker/spark-cluster-managers/cdh/yarn-site.xml
+++ b/scripts/docker/spark-cluster-managers/cdh/yarn-site.xml
@ -0,0 +1,26 @@
+<configuration>
+  <property>
+    <name>yarn.resourcemanager.scheduler.address</name>
+    <value>0.0.0.0:8030</value>
+  </property>
+  <property>
+    <name>yarn.resourcemanager.address</name>
+    <value>0.0.0.0:8032</value>
+  </property>
+  <property>
+    <name>yarn.resourcemanager.webapp.address</name>
+    <value>0.0.0.0:8088</value>
+  </property>
+  <property>
+    <name>yarn.resourcemanager.resource-tracker.address</name>
+    <value>0.0.0.0:8031</value>
+  </property>
+  <property>
+    <name>yarn.resourcemanager.admin.address</name>
+    <value>0.0.0.0:8033</value>
+  </property>
+  <property>
+      <name>yarn.application.classpath</name>
+      <value>/usr/local/hadoop/etc/hadoop, /usr/local/hadoop/share/hadoop/common/*, /usr/local/hadoop/share/hadoop/common/lib/*, /usr/local/hadoop/share/hadoop/hdfs/*, /usr/local/hadoop/share/hadoop/hdfs/lib/*, /usr/local/hadoop/share/hadoop/mapreduce/*, /usr/local/hadoop/share/hadoop/mapreduce/lib/*, /usr/local/hadoop/share/hadoop/yarn/*, /usr/local/hadoop/share/hadoop/yarn/lib/*, /usr/local/hadoop/share/spark/*</value>
+   </property>
+</configuration>