cdh docker environment

This commit is contained in:
astroshim 2016-09-22 16:55:23 +09:00
parent f2a5c594dc
commit a7b5b2d9c8
8 changed files with 179 additions and 0 deletions

View file

@ -107,6 +107,7 @@
<li><a href="{{BASE_PATH}}/install/spark_cluster_mode.html#spark-standalone-mode">Zeppelin on Spark Cluster Mode (Standalone)</a></li>
<li><a href="{{BASE_PATH}}/install/spark_cluster_mode.html#spark-on-yarn-mode">Zeppelin on Spark Cluster Mode (YARN)</a></li>
<li><a href="{{BASE_PATH}}/install/spark_cluster_mode.html#spark-on-mesos-mode">Zeppelin on Spark Cluster Mode (Mesos)</a></li>
<li><a href="{{BASE_PATH}}/install/spark_cluster_mode.html#zeppelin-on-cdh">Zeppelin on CDH</a></li>
<li role="separator" class="divider"></li>
<li class="title"><span><b>Contibute</b><span></li>
<li><a href="{{BASE_PATH}}/development/writingzeppelininterpreter.html">Writing Zeppelin Interpreter</a></li>

Binary file not shown.

After

Width:  |  Height:  |  Size: 122 KiB

View file

@ -172,6 +172,7 @@ Join to our [Mailing list](https://zeppelin.apache.org/community.html) and repor
* [Zeppelin on Spark Cluster Mode (Standalone via Docker)](./install/spark_cluster_mode.html#spark-standalone-mode)
* [Zeppelin on Spark Cluster Mode (YARN via Docker)](./install/spark_cluster_mode.html#spark-on-yarn-mode)
* [Zeppelin on Spark Cluster Mode (Mesos via Docker)](./install/spark_cluster_mode.html#spark-on-mesos-mode)
* [Zeppelin on CDH (via Docker)](./install/spark_cluster_mode.html#zeppelin-on-cdh)
* Contribute
* [Writing Zeppelin Interpreter](./development/writingzeppelininterpreter.html)
* [Writing Zeppelin Application (Experimental)](./development/writingzeppelinapplication.html)

View file

@ -202,3 +202,78 @@ After running a single paragraph with Spark interpreter in Zeppelin, browse `htt
<img src="../assets/themes/zeppelin/img/docs-img/mesos_frameworks.png" />
## Zeppelin on CDH
Cloudera officially provide docker container [here](https://www.cloudera.com/documentation/enterprise/5-6-x/topics/quickstart_docker_container.html)
So we can easily build CDH docker environment following the [link](https://www.cloudera.com/documentation/enterprise/5-6-x/topics/quickstart_docker_container.html).
### 1. Run docker
```
docker run -it \
-p 80:80 \
-p 4040:4040 \
-p 8020:8020 \
-p 8022:8022 \
-p 8030:8030 \
-p 8032:8032 \
-p 8033:8033 \
-p 8040:8040 \
-p 8042:8042 \
-p 8088:8088 \
-p 8480:8480 \
-p 8485:8485 \
-p 8888:8888 \
-p 9083:9083 \
-p 10020:10020 \
-p 10033:10033 \
-p 18088:18088 \
-p 19888:19888 \
-p 25000:25000 \
-p 25010:25010 \
-p 25020:25020 \
-p 50010:50010 \
-p 50020:50020 \
-p 50070:50070 \
-p 50075:50075 \
-h quickstart.cloudera --privileged=true \
agitated_payne_backup /usr/bin/docker-quickstart;
```
### 2. Verify running CDH.
You can check each application web UI on following URLs.
```
# HADOOP
http://localhost:50070/dfshealth.html#tab-overview
# HIVE
http://localhost:10002/hiveserver2.jsp
# YARN Application
http://localhost:8088/cluster
# Spark history server
http://localhost:18088
```
### 3. Configure Spark interpreter in Zeppelin
Set following configurations to `conf/zeppelin-env.sh`.
```
export MASTER=yarn-client
export HADOOP_CONF_DIR=[your_hadoop_conf_path]
export SPARK_HOME=[your_spark_home_path]
```
`HADOOP_CONF_DIR`(Hadoop configuration path) is defined in `/scripts/docker/spark-cluster-managers/cdh/hdfs_conf`.
Don't forget to set Spark `master` as `yarn-client` in Zeppelin **Interpreters** setting page like below.
<img src="../assets/themes/zeppelin/img/docs-img/zeppelin_yarn_conf.png" />
### 4. Run Zeppelin with Spark interpreter
After running a single paragraph with Spark interpreter in Zeppelin, browse `http://<hostname>:8088/cluster/apps` and check Zeppelin application is running well or not.
<img src="../assets/themes/zeppelin/img/docs-img/cdh_yarn_applications.png" />

View file

@ -0,0 +1,6 @@
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://0.0.0.0:8020</value>
</property>
</configuration>

View file

@ -0,0 +1,64 @@
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.data.dir</name>
<value>/data/hdfs</value>
<final>true</final>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
<property>
<name>dfs.client.use.datanode.hostname</name>
<value>true</value>
<description>Whether clients should use datanode hostnames when
connecting to datanodes.
</description>
</property>
<property>
<name>dfs.datanode.use.datanode.hostname</name>
<value>true</value>
<description>Whether datanodes should use datanode hostnames when
connecting to other datanodes for data transfer.
</description>
</property>
<property>
<name>dfs.datanode.address</name>
<value>0.0.0.0:50010</value>
<description>
The address where the datanode server will listen to.
If the port is 0 then the server will start on a free port.
</description>
</property>
<property>
<name>dfs.datanode.http.address</name>
<value>0.0.0.0:50075</value>
<description>
The datanode http server address and port.
If the port is 0 then the server will start on a free port.
</description>
</property>
<property>
<name>dfs.datanode.ipc.address</name>
<value>0.0.0.0:50020</value>
<description>
The datanode ipc server address and port.
If the port is 0 then the server will start on a free port.
</description>
</property>
</configuration>

View file

@ -0,0 +1,6 @@
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>

View file

@ -0,0 +1,26 @@
<configuration>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>0.0.0.0:8030</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>0.0.0.0:8032</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>0.0.0.0:8088</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>0.0.0.0:8031</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>0.0.0.0:8033</value>
</property>
<property>
<name>yarn.application.classpath</name>
<value>/usr/local/hadoop/etc/hadoop, /usr/local/hadoop/share/hadoop/common/*, /usr/local/hadoop/share/hadoop/common/lib/*, /usr/local/hadoop/share/hadoop/hdfs/*, /usr/local/hadoop/share/hadoop/hdfs/lib/*, /usr/local/hadoop/share/hadoop/mapreduce/*, /usr/local/hadoop/share/hadoop/mapreduce/lib/*, /usr/local/hadoop/share/hadoop/yarn/*, /usr/local/hadoop/share/hadoop/yarn/lib/*, /usr/local/hadoop/share/spark/*</value>
</property>
</configuration>