Merge pull request #2 from apache/master

update from original
This commit is contained in:
Fouad 2016-05-29 17:39:08 +03:00
commit 2586651ab9
319 changed files with 20479 additions and 2006 deletions

10
.gitignore vendored
View file

@ -12,15 +12,20 @@
spark/derby.log
spark/metastore_db
spark-1.*-bin-hadoop*
.spark-dist
zeppelin-server/derby.log
lens/lens-cli-hist.log
# conf file
conf/zeppelin-env.sh
conf/zeppelin-env.cmd
conf/zeppelin-site.xml
conf/keystore
conf/truststore
conf/interpreter.json
conf/notebook-authorization.json
conf/shiro.ini
# other generated files
spark/dependency-reduced-pom.xml
@ -35,6 +40,8 @@ zeppelin-web/bower_components
**nbproject/
**node/
#R
/r/lib/
# project level
/logs/
@ -80,6 +87,9 @@ Thumbs.db
target/
**/target/
# Generated by Jekyll
docs/_site/
*~
\#*\#
/.emacs.desktop

View file

@ -15,19 +15,34 @@
language: java
sudo: false
cache:
directories:
- .spark-dist
addons:
apt:
sources:
- r-packages-precise
packages:
- r-base-dev
- r-cran-evaluate
- r-cran-base64enc
matrix:
include:
# Test all modules
- jdk: "oraclejdk7"
env: SPARK_VER="1.6.0" HADOOP_VER="2.3" PROFILE="-Pspark-1.6 -Phadoop-2.3 -Ppyspark -Pscalding" BUILD_FLAG="package -Pbuild-distr" TEST_FLAG="verify -Pusing-packaged-distr" TEST_PROJECTS=""
env: SPARK_VER="1.6.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.6 -Pr -Phadoop-2.3 -Ppyspark -Psparkr -Pscalding" BUILD_FLAG="package -Pbuild-distr" TEST_FLAG="verify -Pusing-packaged-distr" TEST_PROJECTS=""
# Test spark module for 1.5.2
- jdk: "oraclejdk7"
env: SPARK_VER="1.5.2" HADOOP_VER="2.3" PROFILE="-Pspark-1.5 -Phadoop-2.3 -Ppyspark" BUILD_FLAG="package -DskipTests" TEST_FLAG="verify" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark* -DfailIfNoTests=false"
env: SPARK_VER="1.5.2" HADOOP_VER="2.3" PROFILE="-Pspark-1.5 -Pr -Phadoop-2.3 -Ppyspark -Psparkr" BUILD_FLAG="package -DskipTests" TEST_FLAG="verify" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark,r -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark* -DfailIfNoTests=false"
# Test spark module for 1.4.1
- jdk: "oraclejdk7"
env: SPARK_VER="1.4.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.4 -Phadoop-2.3 -Ppyspark" BUILD_FLAG="package -DskipTests" TEST_FLAG="verify" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark* -DfailIfNoTests=false"
env: SPARK_VER="1.4.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.4 -Pr -Phadoop-2.3 -Ppyspark -Psparkr" BUILD_FLAG="package -DskipTests" TEST_FLAG="verify" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark,r -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark* -DfailIfNoTests=false"
# Test spark module for 1.3.1
- jdk: "oraclejdk7"
@ -41,11 +56,16 @@ matrix:
- jdk: "oraclejdk7"
env: SPARK_VER="1.1.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.1 -Phadoop-2.3 -Ppyspark" BUILD_FLAG="package -DskipTests" TEST_FLAG="verify" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark* -DfailIfNoTests=false"
# Test selenium with spark module for 1.6.0
# Test selenium with spark module for 1.6.1
- jdk: "oraclejdk7"
env: TEST_SELENIUM="true" SPARK_VER="1.6.0" HADOOP_VER="2.3" PROFILE="-Pspark-1.6 -Phadoop-2.3 -Ppyspark" BUILD_FLAG="package -DskipTests" TEST_FLAG="verify" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark -Dtest=org.apache.zeppelin.AbstractFunctionalSuite -DfailIfNoTests=false"
env: TEST_SELENIUM="true" SPARK_VER="1.6.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.6 -Phadoop-2.3 -Ppyspark" BUILD_FLAG="package -DskipTests" TEST_FLAG="verify" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark -Dtest=org.apache.zeppelin.AbstractFunctionalSuite -DfailIfNoTests=false"
before_install:
- "ls -la .spark-dist"
- mkdir -p ~/R
- echo 'R_LIBS=~/R' > ~/.Renviron
- R -e "install.packages('knitr', repos = 'http://cran.us.r-project.org', lib='~/R')"
- export R_LIBS='~/R'
- "export DISPLAY=:99.0"
- "sh -e /etc/init.d/xvfb start"
@ -56,19 +76,23 @@ before_script:
- travis_retry ./testing/downloadSpark.sh $SPARK_VER $HADOOP_VER
- ./testing/startSparkCluster.sh $SPARK_VER $HADOOP_VER
- echo "export SPARK_HOME=`pwd`/spark-$SPARK_VER-bin-hadoop$HADOOP_VER" > conf/zeppelin-env.sh
- tail conf/zeppelin-env.sh
script:
- mvn $TEST_FLAG $PROFILE -B $TEST_PROJECTS
after_success:
- echo "Travis exited with ${TRAVIS_TEST_RESULT}"
after_failure:
- echo "Travis exited with ${TRAVIS_TEST_RESULT}"
- cat target/rat.txt
- cat zeppelin-server/target/rat.txt
- cat zeppelin-distribution/target/zeppelin-*-SNAPSHOT/zeppelin-*-SNAPSHOT/logs/zeppelin*.log
- cat zeppelin-distribution/target/zeppelin-*-SNAPSHOT/zeppelin-*-SNAPSHOT/logs/zeppelin*.out
- cat zeppelin-web/npm-debug.log
after_script:
- ./testing/stopSparkCluster.sh $SPARK_VER $HADOOP_VER
notifications:
slack:
secure: dtIkPwlf5uTun19p9TtPEAFmrLOMK2COE8TL9m8LXX/N2WzJaKYvAnovMObEV6KEgK2oZ+72Cke7eBI+Hp4FmHZ2B7mQI/PNCfRZthI3cc3zVmMd25yvLH9AlCRa2bC6R885z2copvzaoZtLBkHnPa8bUrUkbmRp40qkDPQpgO4=

14
LICENSE
View file

@ -244,4 +244,16 @@ Apache licenses
The following components are provided under the Apache License. See project link for details.
The text of each license is also included at licenses/LICENSE-[project]-[version].txt.
(Apache 2.0) Bootstrap v3.0.2 (http://getbootstrap.com/) - https://github.com/twbs/bootstrap/blob/v3.0.2/LICENSE
(Apache 2.0) Bootstrap v3.0.2 (http://getbootstrap.com/) - https://github.com/twbs/bootstrap/blob/v3.0.2/LICENSE
========================================================================
BSD 3-Clause licenses
========================================================================
The following components are provided under the BSD 3-Clause license. See file headers and project links for details.
(BSD 3 Clause) portions of rscala 1.0.6 (https://dahl.byu.edu/software/rscala/) - https://cran.r-project.org/web/packages/rscala/index.html
r/R/rzeppelin/R/{common.R, globals.R,protocol.R,rServer.R,scalaInterpreter.R,zzz.R }
r/src/main/scala/org/apache/zeppelin/rinterpreter/rscala/{Package.scala, RClient.scala}
(BSD 3 Clause) portions of Scala (http://www.scala-lang.org/download) - http://www.scala-lang.org/download/#License
r/src/main/scala/scala/Console.scala

145
README.md
View file

@ -1,4 +1,4 @@
#Zeppelin
#Zeppelin
**Documentation:** [User Guide](http://zeppelin.incubator.apache.org/docs/latest/index.html)<br/>
**Mailing Lists:** [User and Dev mailing list](http://zeppelin.incubator.apache.org/community.html)<br/>
@ -18,15 +18,16 @@ Core feature:
To know more about Zeppelin, visit our web site [http://zeppelin.incubator.apache.org](http://zeppelin.incubator.apache.org)
## Requirements
* Git
* Java 1.7
* Tested on Mac OSX, Ubuntu 14.X, CentOS 6.X
* Tested on Mac OSX, Ubuntu 14.X, CentOS 6.X, Windows 7 Pro SP1
* Maven (if you want to build from the source code)
* Node.js Package Manager
* Node.js Package Manager (npm, downloaded by Maven during build phase)
## Getting Started
### Before Build
If you don't have requirements prepared, install it.
If you don't have requirements prepared, install it.
(The installation method may vary according to your environment, example is for Ubuntu.)
```
@ -35,17 +36,52 @@ sudo apt-get install git
sudo apt-get install openjdk-7-jdk
sudo apt-get install npm
sudo apt-get install libfontconfig
```
# install maven
#### Proxy settings (optional)
If you are behind a corporate Proxy with NTLM authentication you can use [Cntlm Authentication Proxy](http://cntlm.sourceforge.net/) .
Before build start, run these commands from shell.
```
export http_proxy=http://localhost:3128
export https_proxy=http://localhost:3128
export HTTP_PROXY=http://localhost:3128
export HTTPS_PROXY=http://localhost:3128
npm config set proxy http://localhost:3128
npm config set https-proxy http://localhost:3128
npm config set registry "http://registry.npmjs.org/"
npm config set strict-ssl false
npm cache clean
git config --global http.proxy http://localhost:3128
git config --global https.proxy http://localhost:3128
git config --global url."http://".insteadOf git://
```
After build is complete, run these commands to cleanup.
```
npm config rm proxy
npm config rm https-proxy
git config --global --unset http.proxy
git config --global --unset https.proxy
git config --global --unset url."http://".insteadOf
```
_Notes:_
- If you are on Windows replace `export` with `set` to set env variables
- Replace `localhost:3128` with standard pattern `http://user:pwd@host:port`
- Git configuration is needed because Bower use it for fetching from GitHub
#### Install maven
```
wget http://www.eu.apache.org/dist/maven/maven-3/3.3.3/binaries/apache-maven-3.3.3-bin.tar.gz
sudo tar -zxf apache-maven-3.3.3-bin.tar.gz -C /usr/local/
sudo ln -s /usr/local/apache-maven-3.3.3/bin/mvn /usr/local/bin/mvn
```
_Notes:_
_Notes:_
- Ensure node is installed by running `node --version`
- Ensure maven is running version 3.1.x or higher with `mvn -version`
- Configure maven to use more memory than usual by ```export MAVEN_OPTS="-Xmx2g -XX:MaxPermSize=1024m"```
- Configure maven to use more memory than usual by `export MAVEN_OPTS="-Xmx2g -XX:MaxPermSize=1024m"`
### Build
If you want to build Zeppelin from the source, please first clone this repository, then:
@ -61,7 +97,7 @@ Each Interpreter requires different Options.
To build with a specific Spark version, Hadoop version or specific features, define one or more of the following profiles and options:
##### -Pspark-[version]
##### `-Pspark-[version]`
Set spark major version
@ -84,7 +120,7 @@ Available profiles are
minor version can be adjusted by `-Dspark.version=x.x.x`
##### -Phadoop-[version]
##### `-Phadoop-[version]`
set hadoop major version
@ -101,25 +137,32 @@ Available profiles are
minor version can be adjusted by `-Dhadoop.version=x.x.x`
##### -Pyarn (optional)
##### `-Pyarn` (optional)
enable YARN support for local mode
> YARN for local mode is not supported for Spark v1.5.0 or higher. Set SPARK_HOME instead.
> YARN for local mode is not supported for Spark v1.5.0 or higher. Set `SPARK_HOME` instead.
##### -Ppyspark (optional)
##### `-Ppyspark` (optional)
enable PySpark support for local mode
enable [PySpark](http://spark.apache.org/docs/latest/api/python/) support for local mode.
##### `-Pr` (optional)
##### -Pvendor-repo (optional)
enable [R](https://www.r-project.org/) support with [SparkR](https://spark.apache.org/docs/latest/sparkr.html) integration.
##### `-Psparkr` (optional)
another [R](https://www.r-project.org/) support with [SparkR](https://spark.apache.org/docs/latest/sparkr.html) integration as well as local mode support.
##### `-Pvendor-repo` (optional)
enable 3rd party vendor repository (cloudera)
##### -Pmapr[version] (optional)
##### `-Pmapr[version]` (optional)
For the MapR Hadoop Distribution, these profiles will handle the Hadoop version. As MapR allows different versions
of Spark to be installed, you should specify which version of Spark is installed on the cluster by adding a Spark profile (-Pspark-1.2, -Pspark-1.3, etc.) as needed. For Hive, check the hive/pom.xml and adjust the version installed as well. The correct Maven
For the MapR Hadoop Distribution, these profiles will handle the Hadoop version. As MapR allows different versions of Spark to be installed, you should specify which version of Spark is installed on the cluster by adding a Spark profile (`-Pspark-1.2`, `-Pspark-1.3`, etc.) as needed.
For Hive, check the hive/pom.xml and adjust the version installed as well. The correct Maven
artifacts can be found for every version of MapR at http://doc.mapr.com
Available profiles are
@ -129,12 +172,13 @@ Available profiles are
-Pmapr40
-Pmapr41
-Pmapr50
-Pmapr51
```
Here're some examples:
```
```sh
# basic build
mvn clean package -Pspark-1.6 -Phadoop-2.4 -Pyarn -Ppyspark
@ -151,13 +195,13 @@ mvn clean package -Pspark-1.5 -Pmapr50 -DskipTests
#### Ignite Interpreter
```
```sh
mvn clean package -Dignite.version=1.1.0-incubating -DskipTests
```
#### Scalding Interpreter
```
```sh
mvn clean package -Pscalding -DskipTests
```
@ -168,67 +212,80 @@ If you wish to configure Zeppelin option (like port number), configure the follo
./conf/zeppelin-env.sh
./conf/zeppelin-site.xml
```
(You can copy ```./conf/zeppelin-env.sh.template``` into ```./conf/zeppelin-env.sh```.
Same for ```zeppelin-site.xml```.)
(You can copy `./conf/zeppelin-env.sh.template` into `./conf/zeppelin-env.sh`.
Same for `zeppelin-site.xml`.)
#### Setting SPARK_HOME and HADOOP_HOME
Without SPARK_HOME and HADOOP_HOME, Zeppelin uses embedded Spark and Hadoop binaries that you have specified with mvn build option.
If you want to use system provided Spark and Hadoop, export SPARK_HOME and HADOOP_HOME in zeppelin-env.sh
Without `SPARK_HOME` and `HADOOP_HOME`, Zeppelin uses embedded Spark and Hadoop binaries that you have specified with mvn build option.
If you want to use system provided Spark and Hadoop, export `SPARK_HOME` and `HADOOP_HOME` in `zeppelin-env.sh`.
You can use any supported version of spark without rebuilding Zeppelin.
```
```sh
# ./conf/zeppelin-env.sh
export SPARK_HOME=...
export HADOOP_HOME=...
```
#### External cluster configuration
Mesos
# ./conf/zeppelin-env.sh
export MASTER=mesos://...
export ZEPPELIN_JAVA_OPTS="-Dspark.executor.uri=/path/to/spark-*.tgz" or SPARK_HOME="/path/to/spark_home"
export MESOS_NATIVE_LIBRARY=/path/to/libmesos.so
```sh
# ./conf/zeppelin-env.sh
export MASTER=mesos://...
export ZEPPELIN_JAVA_OPTS="-Dspark.executor.uri=/path/to/spark-*.tgz" or SPARK_HOME="/path/to/spark_home"
export MESOS_NATIVE_LIBRARY=/path/to/libmesos.so
```
If you set `SPARK_HOME`, you should deploy spark binary on the same location to all worker nodes. And if you set `spark.executor.uri`, every worker can read that file on its node.
Yarn
# ./conf/zeppelin-env.sh
export SPARK_HOME=/path/to/spark_dir
```sh
# ./conf/zeppelin-env.sh
export SPARK_HOME=/path/to/spark_dir
```
### Run
./bin/zeppelin-daemon.sh start
browse localhost:8080 in your browser.
```sh
./bin/zeppelin-daemon.sh start
```
And browse [localhost:8080](localhost:8080) in your browser.
For configuration details check __./conf__ subdirectory.
For configuration details check __`./conf`__ subdirectory.
### Package
To package the final distribution including the compressed archive, run:
mvn clean package -Pbuild-distr
```sh
mvn clean package -Pbuild-distr
```
To build a distribution with specific profiles, run:
mvn clean package -Pbuild-distr -Pspark-1.5 -Phadoop-2.4 -Pyarn -Ppyspark
```sh
mvn clean package -Pbuild-distr -Pspark-1.5 -Phadoop-2.4 -Pyarn -Ppyspark
```
The profiles `-Pspark-1.5 -Phadoop-2.4 -Pyarn -Ppyspark` can be adjusted if you wish to build to a specific spark versions, or omit support such as `yarn`.
The archive is generated under _zeppelin-distribution/target_ directory
The archive is generated under _`zeppelin-distribution/target`_ directory
###Run end-to-end tests
Zeppelin comes with a set of end-to-end acceptance tests driving headless selenium browser
#assumes zeppelin-server running on localhost:8080 (use -Durl=.. to override)
mvn verify
#or take care of starting\stoping zeppelin-server from packaged _zeppelin-distribuion/target_
mvn verify -P using-packaged-distr
```sh
# assumes zeppelin-server running on localhost:8080 (use -Durl=.. to override)
mvn verify
# or take care of starting/stoping zeppelin-server from packaged zeppelin-distribuion/target
mvn verify -P using-packaged-distr
```
[![Analytics](https://ga-beacon.appspot.com/UA-45176241-4/apache/incubator-zeppelin/README.md?pixel)](https://github.com/igrigorik/ga-beacon)

112
bin/common.cmd Normal file
View file

@ -0,0 +1,112 @@
@echo off
REM Licensed to the Apache Software Foundation (ASF) under one or more
REM contributor license agreements. See the NOTICE file distributed with
REM this work for additional information regarding copyright ownership.
REM The ASF licenses this file to You under the Apache License, Version 2.0
REM (the "License"); you may not use this file except in compliance with
REM the License. You may obtain a copy of the License at
REM
REM http://www.apache.org/licenses/LICENSE-2.0
REM
REM Unless required by applicable law or agreed to in writing, software
REM distributed under the License is distributed on an "AS IS" BASIS,
REM WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
REM See the License for the specific language governing permissions and
REM limitations under the License.
if not defined ZEPPELIN_HOME (
for %%d in ("%~dp0..") do (
set ZEPPELIN_HOME=%%~fd
)
)
if not defined ZEPPELIN_CONF_DIR (
set ZEPPELIN_CONF_DIR=%ZEPPELIN_HOME%\conf
)
if not defined ZEPPELIN_LOG_DIR (
set ZEPPELIN_LOG_DIR=%ZEPPELIN_HOME%\logs
)
if not defined ZEPPELIN_NOTEBOOK_DIR (
set ZEPPELIN_NOTEBOOK_DIR=%ZEPPELIN_HOME%\notebook
)
if not defined ZEPPELIN_PID_DIR (
set ZEPPELIN_PID_DIR=%ZEPPELIN_HOME%\run
)
if not defined ZEPPELIN_WAR (
if exist "%ZEPPELIN_HOME%\zeppelin-web\dist" (
set ZEPPELIN_WAR=%ZEPPELIN_HOME%\zeppelin-web\dist
) else (
for %%d in ("%ZEPPELIN_HOME%\zeppelin-web*.war") do (
set ZEPPELIN_WAR=%%d
)
)
)
if not defined ZEPPELIN_INTERPRETER_DIR (
set ZEPPELIN_INTERPRETER_DIR=%ZEPPELIN_HOME%\interpreter
)
if exist "%ZEPPELIN_CONF_DIR%\zeppelin-env.cmd" (
call "%ZEPPELIN_CONF_DIR%\zeppelin-env.cmd"
)
if not defined ZEPPELIN_CLASSPATH (
set ZEPPELIN_CLASSPATH="%ZEPPELIN_CONF_DIR%"
) else (
set ZEPPELIN_CLASSPATH=%ZEPPELIN_CLASSPATH%;"%ZEPPELIN_CONF_DIR%"
)
if not defined ZEPPELIN_ENCODING (
set ZEPPELIN_ENCODING=UTF-8
)
if not defined ZEPPELIN_MEM (
set ZEPPELIN_MEM=-Xms1024m -Xmx1024m -XX:MaxPermSize=512m
)
if not defined ZEPPELIN_JAVA_OPTS (
set ZEPPELIN_JAVA_OPTS=-Dfile.encoding=%ZEPPELIN_ENCODING% %ZEPPELIN_MEM%
) else (
set ZEPPELIN_JAVA_OPTS=%ZEPPELIN_JAVA_OPTS% -Dfile.encoding=%ZEPPELIN_ENCODING% %ZEPPELIN_MEM%
)
if not defined JAVA_OPTS (
set JAVA_OPTS=%ZEPPELIN_JAVA_OPTS%
) else (
set JAVA_OPTS=%JAVA_OPTS% %ZEPPELIN_JAVA_OPTS%
)
if not defined ZEPPELIN_INTP_JAVA_OPTS (
set ZEPPELIN_INTP_JAVA_OPTS=%ZEPPELIN_JAVA_OPTS%
)
if not defined ZEPPELIN_INTP_MEM (
set ZEPPELIN_INTP_MEM=%ZEPPELIN_MEM%
)
set JAVA_INTP_OPTS=%ZEPPELIN_INTP_JAVA_OPTS% -Dfile.encoding=%ZEPPELIN_ENCODING%
if not defined JAVA_HOME (
set ZEPPELIN_RUNNER=java
) else (
set ZEPPELIN_RUNNER=%JAVA_HOME%\bin\java
)
if not defined ZEPPELIN_IDENT_STRING (
set ZEPPELIN_IDENT_STRING=%USERNAME%
)
if not defined DEBUG (
set DEBUG=0
)
if not defined ZEPPELIN_INTERPRETER_REMOTE_RUNNER (
set ZEPPELIN_INTERPRETER_REMOTE_RUNNER=bin\interpreter.cmd
)
exit /b

View file

@ -81,6 +81,18 @@ function addJarInDir(){
fi
}
ZEPPELIN_COMMANDLINE_MAIN=org.apache.zeppelin.utils.CommandLineUtils
function getZeppelinVersion(){
if [[ -d "${ZEPPELIN_HOME}/zeppelin-server/target/classes" ]]; then
ZEPPELIN_CLASSPATH+=":${ZEPPELIN_HOME}/zeppelin-server/target/classes"
fi
addJarInDir "${ZEPPELIN_HOME}/zeppelin-server/target/lib"
CLASSPATH+=":${ZEPPELIN_CLASSPATH}"
$ZEPPELIN_RUNNER -cp $CLASSPATH $ZEPPELIN_COMMANDLINE_MAIN -v
exit 0
}
# Text encoding for
# read/write job into files,
# receiving/displaying query/result.
@ -93,6 +105,7 @@ if [[ -z "$ZEPPELIN_MEM" ]]; then
fi
JAVA_OPTS+=" ${ZEPPELIN_JAVA_OPTS} -Dfile.encoding=${ZEPPELIN_ENCODING} ${ZEPPELIN_MEM}"
JAVA_OPTS+=" -Dlog4j.configuration=file://${ZEPPELIN_CONF_DIR}/log4j.properties"
export JAVA_OPTS
# jvm options for interpreter process
@ -104,7 +117,8 @@ if [[ -z "${ZEPPELIN_INTP_MEM}" ]]; then
export ZEPPELIN_INTP_MEM="${ZEPPELIN_MEM}"
fi
JAVA_INTP_OPTS+=" ${ZEPPELIN_INTP_JAVA_OPTS} -Dfile.encoding=${ZEPPELIN_ENCODING}"
JAVA_INTP_OPTS="${ZEPPELIN_INTP_JAVA_OPTS} -Dfile.encoding=${ZEPPELIN_ENCODING}"
JAVA_INTP_OPTS+=" -Dlog4j.configuration=file://${ZEPPELIN_CONF_DIR}/log4j.properties"
export JAVA_INTP_OPTS

38
bin/functions.cmd Normal file
View file

@ -0,0 +1,38 @@
@echo off
REM Licensed to the Apache Software Foundation (ASF) under one or more
REM contributor license agreements. See the NOTICE file distributed with
REM this work for additional information regarding copyright ownership.
REM The ASF licenses this file to You under the Apache License, Version 2.0
REM (the "License"); you may not use this file except in compliance with
REM the License. You may obtain a copy of the License at
REM
REM http://www.apache.org/licenses/LICENSE-2.0
REM
REM Unless required by applicable law or agreed to in writing, software
REM distributed under the License is distributed on an "AS IS" BASIS,
REM WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
REM See the License for the specific language governing permissions and
REM limitations under the License.
if not "%1"=="" goto %1
exit /b
:ADDEACHJARINDIR
for %%d in ("%~2\*.jar") do (
set ZEPPELIN_CLASSPATH="%%d";!ZEPPELIN_CLASSPATH!
)
exit /b
:ADDEACHJARINDIRRECURSIVE
for /r "%~2" %%d in (*.jar) do (
set ZEPPELIN_CLASSPATH="%%d";!ZEPPELIN_CLASSPATH!
)
exit /b
:ADDJARINDIR
if exist "%~2" (
set ZEPPELIN_CLASSPATH="%~2\*";%ZEPPELIN_CLASSPATH%
)
exit /b

136
bin/interpreter.cmd Normal file
View file

@ -0,0 +1,136 @@
@echo off
REM Licensed to the Apache Software Foundation (ASF) under one or more
REM contributor license agreements. See the NOTICE file distributed with
REM this work for additional information regarding copyright ownership.
REM The ASF licenses this file to You under the Apache License, Version 2.0
REM (the "License"); you may not use this file except in compliance with
REM the License. You may obtain a copy of the License at
REM
REM http://www.apache.org/licenses/LICENSE-2.0
REM
REM Unless required by applicable law or agreed to in writing, software
REM distributed under the License is distributed on an "AS IS" BASIS,
REM WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
REM See the License for the specific language governing permissions and
REM limitations under the License.
setlocal enableextensions enabledelayedexpansion
set bin=%~dp0
:loop
if "%~1"=="" goto cont
if /I "%~1"=="-h" goto usage
if /I "%~1"=="-d" (
set INTERPRETER_DIR=%~2
set INTERPRETER_ID=%~n2
)
if /I "%~1"=="-p" set PORT=%~2
if /I "%~1"=="-l" set LOCAL_INTERPRETER_REPO=%~2
shift
goto loop
:cont
if "%PORT%"=="" goto usage
if "%INTERPRETER_DIR%"=="" goto usage
call "%bin%\common.cmd"
if exist "%ZEPPELIN_HOME%\zeppelin-interpreter\target\classes" (
set ZEPPELIN_CLASSPATH=%ZEPPELIN_CLASSPATH%;"%ZEPPELIN_HOME%\zeppelin-interpreter\target\classes"
) else (
for %%d in ("%ZEPPELIN_HOME%\lib\zeppelin-interpreter*.jar") do (
set ZEPPELIN_INTERPRETER_JAR=%%d
)
set ZEPPELIN_CLASSPATH=%ZEPPELIN_CLASSPATH%;"!ZEPPELIN_INTERPRETER_JAR!"
)
call "%bin%\functions.cmd" ADDJARINDIR "%ZEPPELIN_HOME%\zeppelin-interpreter\target\lib"
call "%bin%\functions.cmd" ADDJARINDIR "%INTERPRETER_DIR%"
set HOSTNAME=%COMPUTERNAME%
set ZEPPELIN_SERVER=org.apache.zeppelin.interpreter.remote.RemoteInterpreterServer
set ZEPPELIN_LOGFILE=%ZEPPELIN_LOG_DIR%\zeppelin-interpreter-%INTERPRETER_ID%-%ZEPPELIN_IDENT_STRING%-%HOSTNAME%.log
if not exist "%ZEPPELIN_LOG_DIR%" (
echo Log dir doesn't exist, create %ZEPPELIN_LOG_DIR%
mkdir "%ZEPPELIN_LOG_DIR%"
)
if /I "%INTERPRETER_ID%"=="spark" (
if defined SPARK_HOME (
set SPARK_SUBMIT=%SPARK_HOME%\bin\spark-submit.cmd
for %%d in ("%ZEPPELIN_HOME%\interpreter\spark\zeppelin-spark*.jar") do (
set SPARK_APP_JAR=%%d
)
set ZEPPELIN_CLASSPATH="!SPARK_APP_JAR!"
for %%d in ("%SPARK_HOME%\python\lib\py4j-*-src.zip") do (
set py4j=%%d
)
if not defined PYTHONPATH (
set PYTHONPATH=!py4j!;%SPARK_HOME%\python
) else (
set PYTHONPATH=!py4j!;%SPARK_HOME%\python;%PYTHONPATH%
)
) else (
if defined HADOOP_HOME if exist "%HADOOP_HOME%\bin\hadoop.cmd" (
for /f "tokens=*" %%d in ('"%HADOOP_HOME%\bin\hadoop.cmd" classpath') do (
set LOCAL_HADOOP_CLASSPATH=%%d
)
set ZEPPELIN_CLASSPATH=!LOCAL_HADOOP_CLASSPATH!;%ZEPPELIN_CLASSPATH%
)
call "%bin%\functions.cmd" ADDJARINDIR "%INTERPRETER_DIR%\dep"
for %%d in ("%ZEPPELIN_HOME%\interpreter\spark\pyspark\py4j-*-src.zip") do (
set py4j=%%d
)
set PYSPARKPATH=%ZEPPELIN_HOME%\interpreter\spark\pyspark\pyspark.zip;!py4j!
if not defined PYTHONPATH (
set PYTHONPATH=!PYSPARKPATH!
) else (
set PYTHONPATH=%PYTHONPATH%;!PYSPARKPATH!
)
set PYSPARKPATH=
if defined HADOOP_HOME if not defined HADOOP_CONF_DIR (
if exist "%HADOOP_HOME%\etc\hadoop" (
set HADOOP_CONF_DIR=%HADOOP_HOME%\etc\hadoop
)
)
if exist "%HADOOP_CONF_DIR%" (
set ZEPPELIN_CLASSPATH=%ZEPPELIN_CLASSPATH%;"%HADOOP_CONF_DIR%"
)
)
)
call "%bin%\functions.cmd" ADDJARINDIR "%LOCAL_INTERPRETER_REPO%"
if not defined ZEPPELIN_CLASSPATH_OVERRIDES (
set CLASSPATH=%ZEPPELIN_CLASSPATH%
) else (
set CLASSPATH=%ZEPPELIN_CLASSPATH_OVERRIDES%;%ZEPPELIN_CLASSPATH%
)
if defined SPARK_SUBMIT (
set JAVA_INTP_OPTS=%JAVA_INTP_OPTS% -Dzeppelin.log.file='%ZEPPELIN_LOGFILE%'
"%SPARK_SUBMIT%" --class %ZEPPELIN_SERVER% --jars %CLASSPATH% --driver-java-options "!JAVA_INTP_OPTS!" %SPARK_SUBMIT_OPTIONS% "%SPARK_APP_JAR%" %PORT%
) else (
set JAVA_INTP_OPTS=%JAVA_INTP_OPTS% -Dzeppelin.log.file="%ZEPPELIN_LOGFILE%"
"%ZEPPELIN_RUNNER%" !JAVA_INTP_OPTS! %ZEPPELIN_INTP_MEM% -cp %ZEPPELIN_CLASSPATH_OVERRIDES%;%CLASSPATH% %ZEPPELIN_SERVER% %PORT%
)
exit /b
:usage
echo Usage: %~n0 -p ^<port^> -d ^<interpreter dir to load^> -l ^<local interpreter repo dir to load^>

View file

@ -19,12 +19,11 @@
bin=$(dirname "${BASH_SOURCE-$0}")
bin=$(cd "${bin}">/dev/null; pwd)
function usage() {
echo "usage) $0 -p <port> -d <interpreter dir to load> -l <local interpreter repo dir to load>"
}
while getopts "hp:d:l:" o; do
while getopts "hp:d:l:v" o; do
case ${o} in
h)
usage
@ -39,6 +38,10 @@ while getopts "hp:d:l:" o; do
l)
LOCAL_INTERPRETER_REPO=${OPTARG}
;;
v)
. "${bin}/common.sh"
getZeppelinVersion
;;
esac
done
@ -82,7 +85,7 @@ if [[ "${INTERPRETER_ID}" == "spark" ]]; then
export SPARK_SUBMIT="${SPARK_HOME}/bin/spark-submit"
SPARK_APP_JAR="$(ls ${ZEPPELIN_HOME}/interpreter/spark/zeppelin-spark*.jar)"
# This will evantually passes SPARK_APP_JAR to classpath of SparkIMain
ZEPPELIN_CLASSPATH=${SPARK_APP_JAR}
ZEPPELIN_CLASSPATH+=${SPARK_APP_JAR}
pattern="$SPARK_HOME/python/lib/py4j-*-src.zip"
py4j=($pattern)
@ -129,6 +132,14 @@ if [[ "${INTERPRETER_ID}" == "spark" ]]; then
export SPARK_CLASSPATH+=":${ZEPPELIN_CLASSPATH}"
fi
elif [[ "${INTERPRETER_ID}" == "hbase" ]]; then
if [[ -n "${HBASE_CONF_DIR}" ]]; then
ZEPPELIN_CLASSPATH+=":${HBASE_CONF_DIR}"
elif [[ -n "${HBASE_HOME}" ]]; then
ZEPPELIN_CLASSPATH+=":${HBASE_HOME}/conf"
else
echo "HBASE_HOME and HBASE_CONF_DIR are not set, configuration might not be loaded"
fi
fi
addJarInDir "${LOCAL_INTERPRETER_REPO}"

View file

@ -19,7 +19,9 @@
# description: Start and stop daemon script for.
#
USAGE="Usage: zeppelin-daemon.sh [--config <conf-dir>] {start|stop|upstart|restart|reload|status}"
USAGE="-e Usage: zeppelin-daemon.sh\n\t
[--config <conf-dir>] {start|stop|upstart|restart|reload|status}\n\t
[--version | -v]"
if [[ "$1" == "--config" ]]; then
shift
@ -258,6 +260,9 @@ case "${1}" in
status)
find_zeppelin_process
;;
-v | --version)
getZeppelinVersion
;;
*)
echo ${USAGE}
esac

91
bin/zeppelin.cmd Normal file
View file

@ -0,0 +1,91 @@
@echo off
REM Licensed to the Apache Software Foundation (ASF) under one or more
REM contributor license agreements. See the NOTICE file distributed with
REM this work for additional information regarding copyright ownership.
REM The ASF licenses this file to You under the Apache License, Version 2.0
REM (the "License"); you may not use this file except in compliance with
REM the License. You may obtain a copy of the License at
REM
REM http://www.apache.org/licenses/LICENSE-2.0
REM
REM Unless required by applicable law or agreed to in writing, software
REM distributed under the License is distributed on an "AS IS" BASIS,
REM WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
REM See the License for the specific language governing permissions and
REM limitations under the License.
setlocal enableextensions enabledelayedexpansion
set bin=%~dp0
if not "%1"=="--config" goto MAIN
:SET_CONFIG
shift
set conf_dir=%~f1
shift
if not exist "%conf_dir%" (
echo ERROR: %conf_dir% is not a directory
echo Usage: %~n0 [--config ^<conf-dir^>]
exit /b 1
) else (
set ZEPPELIN_CONF_DIR=%conf_dir%
)
:MAIN
call "%bin%\common.cmd"
set HOSTNAME=%COMPUTERNAME%
set ZEPPELIN_LOGFILE=%ZEPPELIN_LOG_DIR%\zeppelin-%ZEPPELIN_IDENT_STRING%-%HOSTNAME%.log
set ZEPPELIN_SERVER=org.apache.zeppelin.server.ZeppelinServer
set JAVA_OPTS=%JAVA_OPTS% -Dzeppelin.log.file="%ZEPPELIN_LOGFILE%"
if exist "%ZEPPELIN_HOME%\zeppelin-interpreter\target\classes" (
set ZEPPELIN_CLASSPATH=%ZEPPELIN_CLASSPATH%;"%ZEPPELIN_HOME%\zeppelin-interpreter\target\classes"
)
if exist "%ZEPPELIN_HOME%\zeppelin-zengine\target\classes" (
set ZEPPELIN_CLASSPATH=%ZEPPELIN_CLASSPATH%;"%ZEPPELIN_HOME%\zeppelin-zengine\target\classes"
)
if exist "%ZEPPELIN_HOME%\zeppelin-server\target\classes" (
set ZEPPELIN_CLASSPATH=%ZEPPELIN_CLASSPATH%;"%ZEPPELIN_HOME%\zeppelin-server\target\classes"
)
call "%bin%\functions.cmd" ADDJARINDIR "%ZEPPELIN_HOME%"
call "%bin%\functions.cmd" ADDJARINDIR "%ZEPPELIN_HOME%\lib"
call "%bin%\functions.cmd" ADDJARINDIR "%ZEPPELIN_HOME%\zeppelin-interpreter\target\lib"
call "%bin%\functions.cmd" ADDJARINDIR "%ZEPPELIN_HOME%\zeppelin-zengine\target\lib"
call "%bin%\functions.cmd" ADDJARINDIR "%ZEPPELIN_HOME%\zeppelin-server\target\lib"
call "%bin%\functions.cmd" ADDJARINDIR "%ZEPPELIN_HOME%\zeppelin-web\target\lib"
if not defined CLASSPATH (
set CLASSPATH=%ZEPPELIN_CLASSPATH%
) else (
set CLASSPATH=%CLASSPATH%;%ZEPPELIN_CLASSPATH%
)
if not defined ZEPPELIN_CLASSPATH_OVERRIDES (
set CLASSPATH=%ZEPPELIN_CLASSPATH%
) else (
set CLASSPATH=%ZEPPELIN_CLASSPATH_OVERRIDES%;%ZEPPELIN_CLASSPATH%
)
if not exist %ZEPPELIN_LOG_DIR% (
echo Log dir doesn't exist, create %ZEPPELIN_LOG_DIR%
mkdir "%ZEPPELIN_LOG_DIR%"
)
if not exist %ZEPPELIN_PID_DIR% (
echo Pid dir doesn't exist, create %ZEPPELIN_PID_DIR%
mkdir "%ZEPPELIN_PID_DIR%"
)
if not exist %ZEPPELIN_NOTEBOOK_DIR% (
echo Notebook dir doesn't exist, create %ZEPPELIN_NOTEBOOK_DIR%
mkdir "%ZEPPELIN_NOTEBOOK_DIR%"
)
"%ZEPPELIN_RUNNER%" %JAVA_OPTS% -cp %CLASSPATH% %ZEPPELIN_SERVER% "%*"

View file

@ -39,6 +39,10 @@ bin=$(cd "${bin}">/dev/null; pwd)
. "${bin}/common.sh"
if [ "$1" == "--version" ] || [ "$1" == "-v" ]; then
getZeppelinVersion
fi
HOSTNAME=$(hostname)
ZEPPELIN_LOGFILE="${ZEPPELIN_LOG_DIR}/zeppelin-${ZEPPELIN_IDENT_STRING}-${HOSTNAME}.log"
LOG="${ZEPPELIN_LOG_DIR}/zeppelin-cli-${ZEPPELIN_IDENT_STRING}-${HOSTNAME}.out"
@ -83,4 +87,4 @@ if [[ ! -d "${ZEPPELIN_NOTEBOOK_DIR}" ]]; then
$(mkdir -p "${ZEPPELIN_NOTEBOOK_DIR}")
fi
$(exec $ZEPPELIN_RUNNER $JAVA_OPTS -cp $ZEPPELIN_CLASSPATH_OVERRIDES:$CLASSPATH $ZEPPELIN_SERVER "$@")
exec $ZEPPELIN_RUNNER $JAVA_OPTS -cp $ZEPPELIN_CLASSPATH_OVERRIDES:$CLASSPATH $ZEPPELIN_SERVER "$@"

View file

@ -302,10 +302,10 @@ public class InterpreterLogicTest {
}
private <A> scala.collection.immutable.List<A> toScalaList(java.util.List<A> list) {
return scala.collection.JavaConversions.asScalaIterable(list).toList();
return scala.collection.JavaConversions.collectionAsScalaIterable(list).toList();
}
private <A> java.util.List<A> toJavaList(scala.collection.immutable.List<A> list){
return scala.collection.JavaConversions.asJavaList(list);
return scala.collection.JavaConversions.seqAsJavaList(list);
}
}

View file

@ -29,11 +29,16 @@ user3 = password4, role2
#ldapRealm.userDnTemplate = cn={0},cn=engg,ou=testdomain,dc=testdomain,dc=com
#ldapRealm.contextFactory.url = ldap://ldaphost:389
#ldapRealm.contextFactory.authenticationMechanism = SIMPLE
sessionManager = org.apache.shiro.web.session.mgt.DefaultWebSessionManager
securityManager.sessionManager = $sessionManager
# 86,400,000 milliseconds = 24 hour
securityManager.sessionManager.globalSessionTimeout = 86400000
shiro.loginUrl = /api/login
[urls]
# anon means the access is anonymous.
# authcBasic means Basic Auth Security
# To enfore security, comment the line below and uncomment the next one
/api/version = anon
/** = anon
#/** = authcBasic
#/** = authc

View file

@ -0,0 +1,70 @@
@echo off
REM Licensed to the Apache Software Foundation (ASF) under one or more
REM contributor license agreements. See the NOTICE file distributed with
REM this work for additional information regarding copyright ownership.
REM The ASF licenses this file to You under the Apache License, Version 2.0
REM (the "License"); you may not use this file except in compliance with
REM the License. You may obtain a copy of the License at
REM
REM http://www.apache.org/licenses/LICENSE-2.0
REM
REM Unless required by applicable law or agreed to in writing, software
REM distributed under the License is distributed on an "AS IS" BASIS,
REM WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
REM See the License for the specific language governing permissions and
REM limitations under the License.
REM
REM set JAVA_HOME=
REM set MASTER= REM Spark master url. eg. spark://master_addr:7077. Leave empty if you want to use local mode.
REM set ZEPPELIN_JAVA_OPTS REM Additional jvm options. for example, set ZEPPELIN_JAVA_OPTS="-Dspark.executor.memory=8g -Dspark.cores.max=16"
REM set ZEPPELIN_MEM REM Zeppelin jvm mem options Default -Xmx1024m -XX:MaxPermSize=512m
REM set ZEPPELIN_INTP_MEM REM zeppelin interpreter process jvm mem options. Default = ZEPPELIN_MEM
REM set ZEPPELIN_INTP_JAVA_OPTS REM zeppelin interpreter process jvm options. Default = ZEPPELIN_JAVA_OPTS
REM set ZEPPELIN_LOG_DIR REM Where log files are stored. PWD by default.
REM set ZEPPELIN_PID_DIR REM The pid files are stored. /tmp by default.
REM set ZEPPELIN_WAR_TEMPDIR REM The location of jetty temporary directory.
REM set ZEPPELIN_NOTEBOOK_DIR REM Where notebook saved
REM set ZEPPELIN_NOTEBOOK_HOMESCREEN REM Id of notebook to be displayed in homescreen. ex) 2A94M5J1Z
REM set ZEPPELIN_NOTEBOOK_HOMESCREEN_HIDE REM hide homescreen notebook from list when this value set to "true". default "false"
REM set ZEPPELIN_NOTEBOOK_S3_BUCKET REM Bucket where notebook saved
REM set ZEPPELIN_NOTEBOOK_S3_USER REM User in bucket where notebook saved. For example bucket/user/notebook/2A94M5J1Z/note.json
REM set ZEPPELIN_IDENT_STRING REM A string representing this instance of zeppelin. $USER by default.
REM set ZEPPELIN_NICENESS REM The scheduling priority for daemons. Defaults to 0.
REM set ZEPPELIN_INTERPRETER_LOCALREPO REM Local repository for interpreter's additional dependency loading
REM set ZEPPELIN_NOTEBOOK_STORAGE REM Refers to pluggable notebook storage class, can have two classes simultaneously with a sync between them (e.g. local and remote).
REM Spark interpreter configuration
REM Use provided spark installation
REM defining SPARK_HOME makes Zeppelin run spark interpreter process using spark-submit
REM
REM set SPARK_HOME REM (required) When it is defined, load it instead of Zeppelin embedded Spark libraries
REM set SPARK_SUBMIT_OPTIONS REM (optional) extra options to pass to spark submit. eg) "--driver-memory 512M --executor-memory 1G".
REM set SPARK_APP_NAME REM (optional) The name of spark application.
REM Use embedded spark binaries
REM without SPARK_HOME defined, Zeppelin still able to run spark interpreter process using embedded spark binaries.
REM however, it is not encouraged when you can define SPARK_HOME
REM
REM Options read in YARN client mode
REM set HADOOP_CONF_DIR REM yarn-site.xml is located in configuration directory in HADOOP_CONF_DIR.
REM Pyspark (supported with Spark 1.2.1 and above)
REM To configure pyspark, you need to set spark distribution's path to 'spark.home' property in Interpreter setting screen in Zeppelin GUI
REM set PYSPARK_PYTHON REM path to the python command. must be the same path on the driver(Zeppelin) and all workers.
REM set PYTHONPATH
REM Spark interpreter options
REM
REM set ZEPPELIN_SPARK_USEHIVECONTEXT REM Use HiveContext instead of SQLContext if set true. true by default.
REM set ZEPPELIN_SPARK_CONCURRENTSQL REM Execute multiple SQL concurrently if set true. false by default.
REM set ZEPPELIN_SPARK_MAXRESULT REM Max number of SparkSQL result to display. 1000 by default.
REM ZeppelinHub connection configuration
REM
REM set ZEPPELINHUB_API_ADDRESS REM Refers to the address of the ZeppelinHub service in use
REM set ZEPPELINHUB_API_TOKEN REM Refers to the Zeppelin instance token of the user
REM set ZEPPELINHUB_USER_KEY REM Optional, when using Zeppelin with authentication.

View file

@ -24,17 +24,18 @@
# export ZEPPELIN_INTP_JAVA_OPTS # zeppelin interpreter process jvm options. Default = ZEPPELIN_JAVA_OPTS
# export ZEPPELIN_LOG_DIR # Where log files are stored. PWD by default.
# export ZEPPELIN_PID_DIR # The pid files are stored. /tmp by default.
# export ZEPPELIN_PID_DIR # The pid files are stored. ${ZEPPELIN_HOME}/run by default.
# export ZEPPELIN_WAR_TEMPDIR # The location of jetty temporary directory.
# export ZEPPELIN_NOTEBOOK_DIR # Where notebook saved
# export ZEPPELIN_NOTEBOOK_HOMESCREEN # Id of notebook to be displayed in homescreen. ex) 2A94M5J1Z
# export ZEPPELIN_NOTEBOOK_HOMESCREEN_HIDE # hide homescreen notebook from list when this value set to "true". default "false"
# export ZEPPELIN_NOTEBOOK_S3_BUCKET # Bucket where notebook saved
# export ZEPPELIN_NOTEBOOK_S3_USER # User in bucket where notebook saved. For example bucket/user/notebook/2A94M5J1Z/note.json
# export ZEPPELIN_NOTEBOOK_S3_BUCKET # Bucket where notebook saved
# export ZEPPELIN_NOTEBOOK_S3_ENDPOINT # Endpoint of the bucket
# export ZEPPELIN_NOTEBOOK_S3_USER # User in bucket where notebook saved. For example bucket/user/notebook/2A94M5J1Z/note.json
# export ZEPPELIN_IDENT_STRING # A string representing this instance of zeppelin. $USER by default.
# export ZEPPELIN_NICENESS # The scheduling priority for daemons. Defaults to 0.
# export ZEPPELIN_INTERPRETER_LOCALREPO # Local repository for interpreter's additional dependency loading
# export ZEPPELIN_NOTEBOOK_STORAGE # Refers to pluggable notebook storage class, can have two classes simultaneously with a sync between them (e.g. local and remote).
#### Spark interpreter configuration ####
@ -61,4 +62,17 @@
# export ZEPPELIN_SPARK_USEHIVECONTEXT # Use HiveContext instead of SQLContext if set true. true by default.
# export ZEPPELIN_SPARK_CONCURRENTSQL # Execute multiple SQL concurrently if set true. false by default.
# export ZEPPELIN_SPARK_MAXRESULT # Max number of SparkSQL result to display. 1000 by default.
# export ZEPPELIN_WEBSOCKET_MAX_TEXT_MESSAGE_SIZE # Size in characters of the maximum text message to be received by websocket. Defaults to 1024000
#### HBase interpreter configuration ####
## To connect to HBase running on a cluster, either HBASE_HOME or HBASE_CONF_DIR must be set
# export HBASE_HOME= # (require) Under which HBase scripts and configuration should be
# export HBASE_CONF_DIR= # (optional) Alternatively, configuration directory can be set to point to the directory that has hbase-site.xml
#### ZeppelinHub connection configuration ####
# export ZEPPELINHUB_API_ADDRESS # Refers to the address of the ZeppelinHub service in use
# export ZEPPELINHUB_API_TOKEN # Refers to the Zeppelin instance token of the user
# export ZEPPELINHUB_USER_KEY # Optional, when using Zeppelin with authentication.

View file

@ -62,7 +62,8 @@
</property>
<!-- If used S3 to storage the notebooks, it is necessary the following folder structure bucketname/username/notebook/ -->
<!-- Amazon S3 notebook storage -->
<!-- Creates the following directory structure: s3://{bucket}/{username}/{notebook-id}/note.json -->
<!--
<property>
<name>zeppelin.notebook.s3.user</name>
@ -76,6 +77,12 @@
<description>bucket name for notebook storage</description>
</property>
<property>
<name>zeppelin.notebook.s3.endpoint</name>
<value>s3.amazonaws.com</value>
<description>endpoint for s3 bucket</description>
</property>
<property>
<name>zeppelin.notebook.storage</name>
<value>org.apache.zeppelin.notebook.repo.S3NotebookRepo</value>
@ -83,12 +90,36 @@
</property>
-->
<!-- Additionally, encryption is supported for notebook data stored in S3 -->
<!-- Use the AWS KMS to encrypt data -->
<!-- If used, the EC2 role assigned to the EMR cluster must have rights to use the given key -->
<!-- See https://aws.amazon.com/kms/ and http://docs.aws.amazon.com/kms/latest/developerguide/concepts.html -->
<!--
<property>
<name>zeppelin.notebook.s3.kmsKeyID</name>
<value>AWS-KMS-Key-UUID</value>
<description>AWS KMS key ID used to encrypt notebook data in S3</description>
</property>
-->
<!-- Use a custom encryption materials provider to encrypt data -->
<!-- No configuration is given to the provider, so you must use system properties or another means to configure -->
<!-- See https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/services/s3/model/EncryptionMaterialsProvider.html -->
<!--
<property>
<name>zeppelin.notebook.s3.encryptionMaterialsProvider</name>
<value>provider implementation class name</value>
<description>Custom encryption materials provider used to encrypt notebook data in S3</description>
</property>
-->
<!-- If using Azure for storage use the following settings -->
<!--
<property>
<name>zeppelin.notebook.azure.user</name>
<value>user</value>
<description>optional user name for Azure folder structure</description>
<name>zeppelin.notebook.azure.connectionString</name>
<value>DefaultEndpointsProtocol=https;AccountName=<accountName>;AccountKey=<accountKey></value>
<description>Azure account credentials</description>
</property>
<property>
@ -98,9 +129,9 @@
</property>
<property>
<name>zeppelin.notebook.azure.connectionString</name>
<value>DefaultEndpointsProtocol=https;AccountName=<accountName>;AccountKey=<accountKey></value>
<description>share name for notebook storage</description>
<name>zeppelin.notebook.azure.user</name>
<value>user</value>
<description>optional user name for Azure folder structure</description>
</property>
<property>
@ -110,7 +141,7 @@
</property>
-->
<!-- For versioning your local norebook storage using Git repository
<!-- For versioning your local notebook storage using Git repository
<property>
<name>zeppelin.notebook.storage</name>
<value>org.apache.zeppelin.notebook.repo.GitNotebookRepo</value>
@ -118,6 +149,15 @@
</property>
-->
<!-- For connecting your Zeppelin with ZeppelinHub -->
<!--
<property>
<name>zeppelin.notebook.storage</name>
<value>org.apache.zeppelin.notebook.repo.VFSNotebookRepo, org.apache.zeppelin.notebook.repo.zeppelinhub.ZeppelinHubRepo</value>
<description>two notebook persistence layers (local + ZeppelinHub)</description>
</property>
-->
<property>
<name>zeppelin.notebook.storage</name>
<value>org.apache.zeppelin.notebook.repo.VFSNotebookRepo</value>
@ -138,7 +178,7 @@
<property>
<name>zeppelin.interpreters</name>
<value>org.apache.zeppelin.spark.SparkInterpreter,org.apache.zeppelin.spark.PySparkInterpreter,org.apache.zeppelin.spark.SparkSqlInterpreter,org.apache.zeppelin.spark.DepInterpreter,org.apache.zeppelin.markdown.Markdown,org.apache.zeppelin.angular.AngularInterpreter,org.apache.zeppelin.shell.ShellInterpreter,org.apache.zeppelin.hive.HiveInterpreter,org.apache.zeppelin.tajo.TajoInterpreter,org.apache.zeppelin.flink.FlinkInterpreter,org.apache.zeppelin.lens.LensInterpreter,org.apache.zeppelin.ignite.IgniteInterpreter,org.apache.zeppelin.ignite.IgniteSqlInterpreter,org.apache.zeppelin.cassandra.CassandraInterpreter,org.apache.zeppelin.geode.GeodeOqlInterpreter,org.apache.zeppelin.postgresql.PostgreSqlInterpreter,org.apache.zeppelin.jdbc.JDBCInterpreter,org.apache.zeppelin.phoenix.PhoenixInterpreter,org.apache.zeppelin.kylin.KylinInterpreter,org.apache.zeppelin.elasticsearch.ElasticsearchInterpreter,org.apache.zeppelin.scalding.ScaldingInterpreter,org.apache.zeppelin.alluxio.AlluxioInterpreter,org.apache.zeppelin.hbase.HbaseInterpreter</value>
<value>org.apache.zeppelin.spark.SparkInterpreter,org.apache.zeppelin.spark.PySparkInterpreter,org.apache.zeppelin.rinterpreter.RRepl,org.apache.zeppelin.rinterpreter.KnitR,org.apache.zeppelin.spark.SparkRInterpreter,org.apache.zeppelin.spark.SparkSqlInterpreter,org.apache.zeppelin.spark.DepInterpreter,org.apache.zeppelin.markdown.Markdown,org.apache.zeppelin.angular.AngularInterpreter,org.apache.zeppelin.shell.ShellInterpreter,org.apache.zeppelin.hive.HiveInterpreter,org.apache.zeppelin.tajo.TajoInterpreter,org.apache.zeppelin.file.HDFSFileInterpreter,org.apache.zeppelin.flink.FlinkInterpreter,org.apache.zeppelin.lens.LensInterpreter,org.apache.zeppelin.ignite.IgniteInterpreter,org.apache.zeppelin.ignite.IgniteSqlInterpreter,org.apache.zeppelin.cassandra.CassandraInterpreter,org.apache.zeppelin.geode.GeodeOqlInterpreter,org.apache.zeppelin.postgresql.PostgreSqlInterpreter,org.apache.zeppelin.jdbc.JDBCInterpreter,org.apache.zeppelin.phoenix.PhoenixInterpreter,org.apache.zeppelin.kylin.KylinInterpreter,org.apache.zeppelin.elasticsearch.ElasticsearchInterpreter,org.apache.zeppelin.scalding.ScaldingInterpreter,org.apache.zeppelin.alluxio.AlluxioInterpreter,org.apache.zeppelin.hbase.HbaseInterpreter,org.apache.zeppelin.livy.LivySparkInterpreter,org.apache.zeppelin.livy.LivyPySparkInterpreter,org.apache.zeppelin.livy.LivySparkRInterpreter,org.apache.zeppelin.livy.LivySparkSQLInterpreter</value>
<description>Comma separated interpreter configurations. First interpreter become a default</description>
</property>
@ -219,5 +259,11 @@
<description>Anonymous user allowed by default</description>
</property>
<property>
<name>zeppelin.websocket.max.text.message.size</name>
<value>1024000</value>
<description>Size in characters of the maximum text message to be received by websocket. Defaults to 1024000</description>
</property>
</configuration>

View file

@ -48,7 +48,7 @@ JIRA_USERNAME = os.environ.get("JIRA_USERNAME", "moon")
# ASF JIRA password
JIRA_PASSWORD = os.environ.get("JIRA_PASSWORD", "00000")
GITHUB_BASE = "https://github.com/apache/incubator-zeppelin/pulls"
GITHUB_BASE = "https://github.com/apache/incubator-zeppelin/pull"
GITHUB_API_BASE = "https://api.github.com/repos/apache/incubator-zeppelin"
JIRA_BASE = "https://issues.apache.org/jira/browse"
JIRA_API_BASE = "https://issues.apache.org/jira"

View file

@ -1,6 +1,6 @@
# This is the default format.
# This is the default format.
# For more see: http://jekyllrb.com/docs/permalinks/
permalink: /:categories/:year/:month/:day/:title
permalink: /:categories/:year/:month/:day/:title
exclude: [".rvmrc", ".rbenv-version", "README.md", "Rakefile", "changelog.md", "vendor", "node_modules", "scss"]
pygments: true
@ -9,7 +9,7 @@ redcarpet:
extensions: ["tables"]
encoding: utf-8
# Themes are encouraged to use these universal variables
# Themes are encouraged to use these universal variables
# so be sure to set them if your theme uses them.
#
title : Apache Zeppelin (incubating)
@ -24,7 +24,7 @@ author :
ZEPPELIN_VERSION : 0.6.0-incubating-SNAPSHOT
# The production_url is only used when full-domain names are needed
# such as sitemap.txt
# such as sitemap.txt
# Most places will/should use BASE_PATH to make the urls
#
# If you have set a CNAME (pages.github.com) set your custom domain here.
@ -42,11 +42,11 @@ JB :
# however this value will be dynamically changed depending on your deployment situation.
#
# CNAME (http://yourcustomdomain.com)
# DO NOT SET BASE_PATH
# DO NOT SET BASE_PATH
# (urls will be prefixed with "/" and work relatively)
#
# GitHub Pages (http://username.github.io)
# DO NOT SET BASE_PATH
# DO NOT SET BASE_PATH
# (urls will be prefixed with "/" and work relatively)
#
# GitHub Project Pages (http://username.github.io/project-name)
@ -65,7 +65,7 @@ JB :
# ex: [BASE_PATH]/assets/themes/[THEME-NAME]
#
# Override this by defining an absolute path to assets here.
# ex:
# ex:
# http://s3.amazonaws.com/yoursite/themes/watermelon
# /assets
#
@ -97,42 +97,41 @@ JB :
num_posts: 5
width: 580
colorscheme: light
# Settings for analytics helper
# Set 'provider' to the analytics provider you want to use.
# Set 'provider' to false to turn analytics off globally.
#
#
analytics :
provider : google_universal
google_classic :
google_classic :
tracking_id : 'UA-45176241-2'
google_universal :
google_universal :
tracking_id : 'UA-45176241-5'
domain : 'zeppelin.incubator.apache.org'
getclicky :
site_id :
site_id :
mixpanel :
token : '_MIXPANEL_TOKEN_'
piwik :
baseURL : 'myserver.tld/piwik' # Piwik installation address (without protocol)
idsite : '1' # the id of the site on Piwik
# Settings for sharing helper.
# Settings for sharing helper.
# Sharing is for things like tweet, plusone, like, reddit buttons etc.
# Set 'provider' to the sharing provider you want to use.
# Set 'provider' to false to turn sharing off globally.
#
sharing :
provider : false
# Settings for all other include helpers can be defined by creating
# Settings for all other include helpers can be defined by creating
# a hash with key named for the given helper. ex:
#
# pages_list :
# provider : "custom"
# provider : "custom"
#
# Setting any helper's provider to 'custom' will bypass the helper code
# and include your custom code. Your custom file must be defined at:
# ./_includes/custom/[HELPER]
# where [HELPER] is the name of the helper you are overriding.

View file

@ -31,6 +31,8 @@
<!-- li><span><b>Tutorial</b><span></li -->
<li><a href="{{BASE_PATH}}/tutorial/tutorial.html">Tutorial</a></li>
<li role="separator" class="divider"></li>
<li><a href="{{BASE_PATH}}/ui_layout/zeppelin_layout.html">UI Layout</a></li>
<li role="separator" class="divider"></li>
<!-- li><span><b>Guide</b><span></li -->
<li><a href="{{BASE_PATH}}/manual/dynamicform.html">Dynamic Form</a></li>
<li><a href="{{BASE_PATH}}/manual/publish.html">Publish your Paragraph</a></li>
@ -41,21 +43,24 @@
<ul class="dropdown-menu">
<li><a href="{{BASE_PATH}}/manual/interpreters.html">Overview</a></li>
<li role="separator" class="divider"></li>
<li><a href="{{BASE_PATH}}/interpreter/alluxio.html">Alluxio</a></li>
<li><a href="{{BASE_PATH}}/interpreter/cassandra.html">Cassandra</a></li>
<li><a href="{{BASE_PATH}}/interpreter/elasticsearch.html">Elasticsearch</a></li>
<li><a href="{{BASE_PATH}}/interpreter/flink.html">Flink</a></li>
<li><a href="{{BASE_PATH}}/interpreter/geode.html">Geode</a></li>
<li><a href="{{BASE_PATH}}/interpreter/hbase.html">HBase</a></li>
<li><a href="{{BASE_PATH}}/interpreter/hdfs.html">HDFS</a></li>
<li><a href="{{BASE_PATH}}/interpreter/hive.html">Hive</a></li>
<li><a href="{{BASE_PATH}}/interpreter/ignite.html">Ignite</a></li>
<li><a href="{{BASE_PATH}}/interpreter/jdbc.html">JDBC</a></li>
<li><a href="{{BASE_PATH}}/interpreter/lens.html">Lens</a></li>
<li><a href="{{BASE_PATH}}/interpreter/livy.html">Livy</a></li>
<li><a href="{{BASE_PATH}}/interpreter/markdown.html">Markdown</a></li>
<li><a href="{{BASE_PATH}}/interpreter/postgresql.html">Postgresql, hawq</a></li>
<li><a href="{{BASE_PATH}}/interpreter/r.html">R</a></li>
<li><a href="{{BASE_PATH}}/interpreter/scalding.html">Scalding</a></li>
<li><a href="{{BASE_PATH}}/pleasecontribute.html">Shell</a></li>
<li><a href="{{BASE_PATH}}/interpreter/spark.html">Spark</a></li>
<li><a href="{{BASE_PATH}}/interpreter/alluxio.html">Alluxio</a></li>
<li><a href="{{BASE_PATH}}/pleasecontribute.html">Tajo</a></li>
<li role="separator" class="divider"></li>
<li><a href="{{BASE_PATH}}/manual/dynamicinterpreterload.html">Dynamic Interpreter Loading</a></li>
@ -68,7 +73,8 @@
<li><a href="{{BASE_PATH}}/displaysystem/display.html">Text</a></li>
<li><a href="{{BASE_PATH}}/displaysystem/display.html#html">Html</a></li>
<li><a href="{{BASE_PATH}}/displaysystem/table.html">Table</a></li>
<li><a href="{{BASE_PATH}}/displaysystem/angular.html">Angular</a></li>
<li><a href="{{BASE_PATH}}/displaysystem/back-end-angular.html">Angular (backend API)</a></li>
<li><a href="{{BASE_PATH}}/displaysystem/front-end-angular.html">Angular (frontend API)</a></li>
</ul>
</li>
<li>
@ -80,6 +86,8 @@
<!-- li><span><b>Notebook Storage</b><span></li -->
<li><a href="{{BASE_PATH}}/storage/storage.html#Git">Git Storage</a></li>
<li><a href="{{BASE_PATH}}/storage/storage.html#S3">S3 Storage</a></li>
<li><a href="{{BASE_PATH}}/storage/storage.html#Azure">Azure Storage</a></li>
<li><a href="{{BASE_PATH}}/storage/storage.html#ZeppelinHub">ZeppelinHub Storage</a></li>
<li role="separator" class="divider"></li>
<!-- li><span><b>REST API</b><span></li -->
<li><a href="{{BASE_PATH}}/rest-api/rest-interpreter.html">Interpreter API</a></li>
@ -88,7 +96,8 @@
<li role="separator" class="divider"></li>
<!-- li><span><b>Security</b><span></li -->
<li><a href="{{BASE_PATH}}/security/overview.html">Security Overview</a></li>
<li><a href="{{BASE_PATH}}/security/authentication.html">Authentication</a></li>
<li><a href="{{BASE_PATH}}/security/authentication.html">Authentication for NGINX</a></li>
<li><a href="{{BASE_PATH}}/security/shiroauthentication.html">Shiro Authentication</a></li>
<li><a href="{{BASE_PATH}}/security/notebook_authorization.html">Notebook Authorization</a></li>
<li><a href="{{BASE_PATH}}/security/interpreter_authorization.html">Interpreter Authorization</a></li>
<li role="separator" class="divider"></li>
@ -96,12 +105,9 @@
<li><a href="{{BASE_PATH}}/development/writingzeppelininterpreter.html">Writing Zeppelin Interpreter</a></li>
<li><a href="{{BASE_PATH}}/development/howtocontribute.html">How to contribute (code)</a></li>
<li><a href="{{BASE_PATH}}/development/howtocontributewebsite.html">How to contribute (website)</a></li>
<li role="separator" class="divider"></li>
<!-- li><span><b>Shiro Security</b><span></li -->
<li><a href="{{BASE_PATH}}/manual/shiroauthentication.html">Shiro Authentication</a></li>
</ul>
</li>
</ul>
</nav><!--/.navbar-collapse -->
</div>
</div>
</div>

Binary file not shown.

After

Width:  |  Height:  |  Size: 176 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 40 KiB

After

Width:  |  Height:  |  Size: 245 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 37 KiB

After

Width:  |  Height:  |  Size: 42 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 103 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 105 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 94 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 133 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 209 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 343 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 232 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 29 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 167 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 73 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 39 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 40 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 49 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 589 KiB

View file

@ -99,9 +99,7 @@ function viewSolution() {
// A script to fix internal hash links because we have an overlapping top bar.
// Based on https://github.com/twitter/bootstrap/issues/193#issuecomment-2281510
function maybeScrollToHash() {
console.log("HERE");
if (window.location.hash && $(window.location.hash).length) {
console.log("HERE2", $(window.location.hash), $(window.location.hash).offset().top);
var newTop = $(window.location.hash).offset().top - 57;
$(window).scrollTop(newTop);
}
@ -117,5 +115,5 @@ $(function() {
// Scroll now too in case we had opened the page on a hash, but wait a bit because some browsers
// will try to do *their* initial scroll after running the onReady handler.
$(window).load(function() { setTimeout(function() { maybeScrollToHash(); }, 25); });
});
$(window).load(function() { setTimeout(function() { maybeScrollToHash(); }, 25); });
});

View file

@ -4,7 +4,7 @@ title : Atom Feed
---
<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<title>{{ site.title }}</title>
<link href="{{ site.production_url }}/{{ site.atom_path }}" rel="self"/>
<link href="{{ site.production_url }}"/>
@ -24,5 +24,5 @@ title : Atom Feed
<content type="html">{{ post.content | xml_escape }}</content>
</entry>
{% endfor %}
</feed>
</feed>

View file

@ -12,20 +12,20 @@ Apache Zeppelin (incubating) is an [Apache2 License](http://www.apache.org/licen
Any contributions to Zeppelin (Source code, Documents, Image, Website) means you agree with license all your contributions as Apache2 License.
## Setting up
Here are some tools you will need to build and test Zeppelin.
Here are some tools you will need to build and test Zeppelin.
#### Software Configuration Management ( SCM )
Since Zeppelin uses Git for it's SCM system, you need git client installed in your development machine.
Since Zeppelin uses Git for it's SCM system, you need git client installed in your development machine.
#### Integrated Development Environment ( IDE )
You are free to use whatever IDE you prefer, or your favorite command line editor.
You are free to use whatever IDE you prefer, or your favorite command line editor.
### Build Tools
To build the code, install
* Oracle Java 7
* Apache Maven

View file

@ -22,7 +22,7 @@ limitations under the License.
### What is Zeppelin Interpreter
Zeppelin Interpreter is a language backend. For example to use scala code in Zeppelin, you need scala interpreter.
Every Interpreter belongs to an InterpreterGroup.
Every Interpreter belongs to an InterpreterGroup.
Interpreters in the same InterpreterGroup can reference each other. For example, SparkSqlInterpreter can reference SparkInterpreter to get SparkContext from it while they're in the same group.
<img class="img-responsive" style="width:50%; border: 1px solid #ecf0f1;" height="auto" src="/assets/themes/zeppelin/img/interpreter.png" />

View file

@ -1,7 +1,7 @@
---
layout: page
title: "Angular Display System"
description: ""
title: "Angular (backend API)"
description: "Angular (backend API)"
group: display
---
<!--
@ -20,7 +20,7 @@ limitations under the License.
{% include JB/setup %}
## Angular Display System in Zeppelin
## Back-end Angular API in Zeppelin
Angular display system treats output as a view template for [AngularJS](https://angularjs.org/).
It compiles templates and displays them inside of Zeppelin.
@ -92,12 +92,12 @@ When the button is clicked, you'll see both `run` and `numWatched` are increment
<img src="/assets/themes/zeppelin/img/screenshots/display_angular3.png" width="60%" />
## Let's make it Simpler and more Intuitive
In this section, we will introduce a simpler and more intuitive way of using **Angular Display System** in Zeppelin.
In this section, we will introduce a simpler and more intuitive way of using **Angular Display System** in Zeppelin.
### How can we use it?
Here are some usages.
Here are some usages.
#### Import
#### Import
##### - In notebook scope
```scala
@ -141,11 +141,11 @@ import AngularElem._
<div></div>.model("myModel").display
// bind model with initial value
<div></div>.model("myModel", initialValue).display
<div></div>.model("myModel", initialValue).display
```
#### Interact with Model
```scala
```scala
// read model
AngularModel("myModel")()
@ -155,7 +155,7 @@ AngularModel("myModel", "newValue")
<br/>
### Example: Basic Usage
Using the above basic usages, you can apply them like below examples.
Using the above basic usages, you can apply them like below examples.
#### Display Elements
@ -195,7 +195,7 @@ AngularModel("myModel", "New value")
### Example: String Converter
Using below example, you can convert the lowercase string to uppercase.
{% raw %}
```scala
// clear previously created angular object.
@ -215,5 +215,3 @@ val button = <div class="btn btn-success btn-sm">Convert</div>.onClick{() =>
{% endraw %}
<img src="../assets/themes/zeppelin/img/docs-img/string-converter-angular.gif" width="70%">

View file

@ -0,0 +1,159 @@
---
layout: page
title: "Angular (frontend API)"
description: "Angular (frontend API)"
group: display
---
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
{% include JB/setup %}
## Front-end Angular API in Zeppelin
In addition to the back-end API to handle Angular objects binding, Zeppelin also exposes a simple AngularJS **`z`** object on the front-end side to expose the same capabilities.
This **`z`** object is accessible in the Angular isolated scope for each paragraph.
<br />
### Bind / Unbind Variables
Through the **`z`**, you can bind / unbind variables to **AngularJS view**
Bind a value to an angular object and a **mandatory** target paragraph:
```html
%angular
<form class="form-inline">
<div class="form-group">
<label for="superheroId">Super Hero: </label>
<input type="text" class="form-control" id="superheroId" placeholder="Superhero name ..." ng-model="superhero"></input>
</div>
<button type="submit" class="btn btn-primary" ng-click="z.angularBind('superhero',superhero,'20160222-232336_1472609686')"> Bind</button>
</form>
```
<img src="../assets/themes/zeppelin/img/screenshots/z_angularBind.gif" />
<hr/>
Unbind/remove a value from angular object and a **mandatory** target paragraph:
```html
%angular
<form class="form-inline">
<button type="submit" class="btn btn-primary" ng-click="z.angularUnbind('superhero','20160222-232336_1472609686')"> UnBind</button>
</form>
```
<img src="../assets/themes/zeppelin/img/screenshots/z_angularUnbind.gif" />
The signature for the **`z.angularBind() / z.angularUnbind()`** functions are:
```javascript
z.angularBind(angularObjectName, angularObjectValue, paragraphId);
z.angularUnbind(angularObjectName, angularObjectValue, paragraphId);
```
All the parameters are mandatory.
<br />
### Run Paragraph
You can also trigger paragraph execution by calling **`z.runParagraph()`** function passing the appropriate paragraphId:
```html
%angular
<form class="form-inline">
<div class="form-group">
<label for="paragraphId">Paragraph Id: </label>
<input type="text" class="form-control" id="paragraphId" placeholder="Paragraph Id ..." ng-model="paragraph"></input>
</div>
<button type="submit" class="btn btn-primary" ng-click="z.runParagraph(paragraph)"> Run Paragraph</button>
</form>
```
<img src="../assets/themes/zeppelin/img/screenshots/z_runParagraph.gif" />
<br />
### Overriding dynamic form with Angular Object
The front-end Angular Interaction API has been designed to offer richer form capabilities and variable binding. With the existing **Dynamic Form** system you can already create input text, select and checkbox forms but the choice is rather limited and the look & feel cannot be changed.
The idea is to create a custom form using plain HTML/AngularJS code and bind actions on this form to push/remove Angular variables to targeted paragraphs using this new API.
Consequently if you use the **Dynamic Form** syntax in a paragraph and there is a bound Angular object having the same name as the _${formName}_, the Angular object will have higher priority and the **Dynamic Form** will not be displayed. Example:
<img src="../assets/themes/zeppelin/img/screenshots/z_angularJs_overriding_dynamic_form.gif" />
<br />
### Feature matrix comparison
How does the front-end AngularJS API compares to the back-end API ? Below is a comparison matrix for both APIs:
<table>
<thead>
<tr>
<th>Actions</th>
<th>Front-end API</th>
<th>Back-end API</th>
</tr>
</thead>
<tr>
<td>Initiate binding</td>
<td>z.angularbind(var, initialValue, paragraphId)</td>
<td>z.angularBind(var, initialValue)</td>
</tr>
<tr>
<td>Update value</td>
<td>same to ordinary angularjs scope variable, or z.angularbind(var, newValue, paragraphId)</td>
<td>z.angularBind(var, newValue)</td>
</tr>
<tr>
<td>Watching value</td>
<td>same to ordinary angularjs scope variable</td>
<td>z.angularWatch(var, (oldVal, newVal) => ...)</td>
</tr>
<tr>
<td>Destroy binding</td>
<td>z.angularUnbind(var, paragraphId)</td>
<td>z.angularUnbind(var)</td>
</tr>
<tr>
<td>Executing Paragraph</td>
<td>z.runParagraph(paragraphId)</td>
<td>z.run(paragraphId)</td>
</tr>
<tbody>
<tbody>
</table>
Both APIs are pretty similar, except for value watching where it is done naturally by AngularJS internals on the front-end and by user custom watcher functions in the back-end.
There is also a slight difference in term of scope. Front-end API limits the Angular object binding to a paragraph scope whereas back-end API allows you to bind an Angular object at the global or note scope. This restriction has been designed purposely to avoid Angular object leaks and scope pollution.

View file

@ -76,7 +76,7 @@ Some basic charts are already included in Zeppelin. Visualizations are not limit
#### Pivot chart
With simple drag and drop Zeppelin aggeregates the values and display them in pivot chart. You can easily create chart with multiple aggregated values including sum, count, average, min, max.
With simple drag and drop Zeppelin aggregates the values and display them in pivot chart. You can easily create chart with multiple aggregated values including sum, count, average, min, max.
<div class="row">
<div class="col-md-8">
@ -123,4 +123,4 @@ Join the [Mailing list](./community.html) and report issues on our [Issue tracke
<br />
### Undergoing Incubation
Apache Zeppelin is an effort undergoing [incubation](https://incubator.apache.org/index.html) at The Apache Software Foundation (ASF), sponsored by the Incubator. Incubation is required of all newly accepted projects until a further review indicates that the infrastructure, communications, and decision making process have stabilized in a manner consistent with other successful ASF projects. While incubation status is not necessarily a reflection of the completeness or stability of the code, it does indicate that the project has yet to be fully endorsed by the ASF.
Apache Zeppelin is an effort undergoing [incubation](https://incubator.apache.org/index.html) at The Apache Software Foundation (ASF), sponsored by the Incubator. Incubation is required of all newly accepted projects until a further review indicates that the infrastructure, communications, and decision making process have stabilized in a manner consistent with other successful ASF projects. While incubation status is not necessarily a reflection of the completeness or stability of the code, it does indicate that the project has yet to be fully endorsed by the ASF.

View file

@ -45,7 +45,7 @@ If you don't have requirements prepared, please check instructions in [README.md
<a name="zeppelin-configuration"> </a>
## Zeppelin Configuration
You can configure Zeppelin with both **environment variables** in `conf/zeppelin-env.sh` and **java properties** in `conf/zeppelin-site.xml`. If both are defined, then the **environment variables** will be used priorly.
You can configure Zeppelin with both **environment variables** in `conf/zeppelin-env.sh` (`conf\zeppelin-env.cmd` for Windows) and **Java properties** in `conf/zeppelin-site.xml`. If both are defined, then the **environment variables** will take priority.
<table class="table-configuration">
<tr>
@ -186,6 +186,24 @@ You can configure Zeppelin with both **environment variables** in `conf/zeppelin
<td>user</td>
<td>A user name of S3 bucket<br />i.e. <code>bucket/user/notebook/2A94M5J1Z/note.json</code></td>
</tr>
<tr>
<td>ZEPPELIN_NOTEBOOK_S3_ENDPOINT</td>
<td>zeppelin.notebook.s3.endpoint</td>
<td>s3.amazonaws.com</td>
<td>Endpoint for the bucket</td>
</tr>
<tr>
<td>ZEPPELIN_NOTEBOOK_S3_KMS_KEY_ID</td>
<td>zeppelin.notebook.s3.kmsKeyID</td>
<td></td>
<td>AWS KMS Key ID to use for encrypting data in S3 (optional)</td>
</tr>
<tr>
<td>ZEPPELIN_NOTEBOOK_S3_EMP</td>
<td>zeppelin.notebook.s3.encryptionMaterialsProvider</td>
<td></td>
<td>Class name of a custom S3 encryption materials provider implementation to use for encrypting data in S3 (optional)</td>
</tr>
<tr>
<td>ZEPPELIN_NOTEBOOK_AZURE_CONNECTION_STRING</td>
<td>zeppelin.notebook.azure.connectionString</td>
@ -225,10 +243,16 @@ You can configure Zeppelin with both **environment variables** in `conf/zeppelin
<td>interpreter</td>
<td>Zeppelin interpreter directory</td>
</tr>
<tr>
<td>ZEPPELIN_WEBSOCKET_MAX_TEXT_MESSAGE_SIZE</td>
<td>zeppelin.websocket.max.text.message.size</td>
<td>1024000</td>
<td>Size in characters of the maximum text message to be received by websocket.</td>
</tr>
</table>
Maybe you need to configure individual interpreter. If so, please check **Interpreter** section in Zeppelin documentation.
[Spark Interpreter for Apache Zeppelin](../interpreter/spark.html) will be a good example.
[Spark Interpreter for Apache Zeppelin](../interpreter/spark.html) will be a good example.
## Zeppelin Start / Stop
#### Start Zeppelin
@ -248,9 +272,9 @@ bin/zeppelin-daemon.sh stop
Zeppelin can auto start as a service with an init script, such as services managed by upstart.
The following is an example upstart script to be saved as `/etc/init/zeppelin.conf`
The following is an example upstart script to be saved as `/etc/init/zeppelin.conf`
This example has been tested with Ubuntu Linux.
This also allows the service to be managed with commands such as
This also allows the service to be managed with commands such as
`sudo service zeppelin start`
`sudo service zeppelin stop`
@ -278,4 +302,8 @@ chdir /usr/share/zeppelin
exec bin/zeppelin-daemon.sh upstart
```
#### Running on Windows
```
bin\zeppelin.cmd
```

View file

@ -41,4 +41,4 @@ So, copying `notebook` and `conf` directory should be enough.
```
bin/zeppelin-daemon.sh start
```
```

View file

@ -21,16 +21,18 @@ limitations under the License.
## Vagrant Virtual Machine for Apache Zeppelin
Apache Zeppelin distribution includes a scripts directory
`scripts/vagrant/zeppelin-dev`
This script creates a virtual machine that launches a repeatable, known set of core dependencies required for developing Zeppelin. It can also be used to run an existing Zeppelin build if you don't plan to build from source. For pyspark users, this script also includes several helpful [Python Libraries](#pythonextras).
This script creates a virtual machine that launches a repeatable, known set of core dependencies required for developing Zeppelin. It can also be used to run an existing Zeppelin build if you don't plan to build from source.
For PySpark users, this script includes several helpful [Python Libraries](#python-extras).
For SparkR users, this script includes several helpful [R Libraries](#r-extras).
####Installing the required components to launch a virtual machine.
This script requires three applications, [Ansible](http://docs.ansible.com/ansible/intro_installation.html#latest-releases-via-pip "Ansible"), [Vagrant](http://www.vagrantup.com/downloads "Vagrant") and [Virtual Box](https://www.virtualbox.org/ "Virtual Box"). All of these applications are freely available as Open Source projects and extremely easy to set up on most operating systems.
This script requires three applications, [Ansible](http://docs.ansible.com/ansible/intro_installation.html#latest-releases-via-pip "Ansible"), [Vagrant](http://www.vagrantup.com "Vagrant") and [Virtual Box](https://www.virtualbox.org/ "Virtual Box"). All of these applications are freely available as Open Source projects and extremely easy to set up on most operating systems.
### Create a Zeppelin Ready VM in 4 Steps (5 on Windows)
@ -38,11 +40,11 @@ If you are running Windows and don't yet have python installed, [install Python
1. Download and Install Vagrant: [Vagrant Downloads](http://www.vagrantup.com/downloads)
2. Install Ansible: [Ansible Python pip install](http://docs.ansible.com/ansible/intro_installation.html#latest-releases-via-pip)
```
sudo easy_install pip
sudo pip install ansible
ansible --version
ansible --version
```
After then, please check whether it reports **ansible version 1.9.2 or higher**.
@ -68,7 +70,7 @@ Cloning the project again may seem counter intuitive, since this script likley o
Synced folders enable Vagrant to sync a folder on the host machine to the guest machine, allowing you to continue working on your project's files on your host machine, but use the resources in the guest machine to compile or run your project. _[(1) Synced Folder Description from Vagrant Up](https://docs.vagrantup.com/v2/synced-folders/index.html)_
By default, Vagrant will share your project directory (the directory with the Vagrantfile) to `/vagrant`. Which means you should be able to build within the guest machine after you
By default, Vagrant will share your project directory (the directory with the Vagrantfile) to `/vagrant`. Which means you should be able to build within the guest machine after you
`cd /vagrant/incubator-zeppelin`
@ -92,23 +94,24 @@ The virtual machine consists of:
- libfontconfig to avoid phatomJs missing dependency issues
- openjdk-7-jdk
- Python addons: pip, matplotlib, scipy, numpy, pandas
- [R](https://www.r-project.org/) and R Packages required to run the R Interpreter and the related R tutorial notebook, including: Knitr, devtools, repr, rCharts, ggplot2, googleVis, mplot, htmltools, base64enc, data.table
### How to build & run Zeppelin
This assumes you've already cloned the project either on the host machine in the zeppelin-dev directory (to be shared with the guest machine) or cloned directly into a directory while running inside the guest machine.
This assumes you've already cloned the project either on the host machine in the zeppelin-dev directory (to be shared with the guest machine) or cloned directly into a directory while running inside the guest machine. The following build steps will also include Python and R support via PySpark and SparkR:
```
cd /incubator-zeppelin
mvn clean package -Pspark-1.5 -Ppyspark -Dhadoop.version=2.2.0 -Phadoop-2.2 -DskipTests
mvn clean package -Pspark-1.6 -Ppyspark -Phadoop-2.4 -Psparkr -DskipTests
./bin/zeppelin-daemon.sh start
```
On your host machine browse to `http://localhost:8080/`
If you [turned off port forwarding](#tweakvm) in the `Vagrantfile` browse to `http://192.168.51.52:8080`
If you [turned off port forwarding](#tweaking-the-virtual-machine) in the `Vagrantfile` browse to `http://192.168.51.52:8080`
### [Tweaking the Virtual Machine](id:tweakvm)
### Tweaking the Virtual Machine
If you plan to run this virtual machine along side other Vagrant images, you may wish to bind the virtual machine to a specific IP address, and not use port fowarding from your local host.
@ -123,7 +126,7 @@ config.vm.network "private_network", ip: "192.168.51.52"
This approach usually is typically required if running other virtual machines that discover each other directly by IP address, such as Spark Masters and Slaves as well as Cassandra Nodes, Elasticsearch Nodes, and other Spark data sources. You may wish to launch nodes in virtual machines with IP addresses in a subnet that works for your local network, such as: 192.168.51.53, 192.168.51.54, 192.168.51.53, etc..
### [Python Extras](id:pythonextras)
### Python Extras
With Zeppelin running, **Numpy**, **SciPy**, **Pandas** and **Matplotlib** will be available. Create a pyspark notebook, and try the below code.
@ -135,7 +138,7 @@ import scipy
import pandas
import matplotlib
print "numpy " + numpy.__version__
print "numpy " + numpy.__version__
print "scipy " + scipy.__version__
print "pandas " + pandas.__version__
print "matplotlib " + matplotlib.__version__
@ -173,12 +176,9 @@ plt.xlabel('Performance')
plt.title('How fast do you want to go today?')
show(plt)
```
```
### R Extras
With zeppelin running, an R Tutorial notebook will be available. The R packages required to run the examples and graphs in this tutorial notebook were installed by this virtual machine.
The installed R Packages include: Knitr, devtools, repr, rCharts, ggplot2, googleVis, mplot, htmltools, base64enc, data.table

View file

@ -25,16 +25,16 @@ This page describes how to pre-configure a bare metal node, configure Zeppelin a
## Prepare Node
### Zeppelin user (Optional)
This step is optional, however its nice to run Zeppelin under its own user. In case you do not like to use Zeppelin (hope not) the user could be deleted along with all the pacakges that were installed for Zeppelin, Zeppelin binary itself and associated directories.
This step is optional, however its nice to run Zeppelin under its own user. In case you do not like to use Zeppelin (hope not) the user could be deleted along with all the packages that were installed for Zeppelin, Zeppelin binary itself and associated directories.
Create a zeppelin user and switch to zeppelin user or if zeppelin user is already created then login as zeppelin.
```bash
useradd zeppelin
su - zeppelin
su - zeppelin
whoami
```
Assuming a zeppelin user is created then running whoami command must return
Assuming a zeppelin user is created then running whoami command must return
```bash
zeppelin
@ -48,7 +48,7 @@ Its assumed in the rest of the document that zeppelin user is indeed created and
* Java 1.7
* Hadoop client
* Spark
* Internet connection is required.
* Internet connection is required.
It's assumed that the node has CentOS 6.x installed on it. Although any version of Linux distribution should work fine.
@ -83,7 +83,7 @@ This document assumes that Zeppelin is located under `/home/zeppelin/incubator-z
Zeppelin configuration needs to be modified to connect to YARN cluster. Create a copy of zeppelin environment shell script.
```bash
cp /home/zeppelin/incubator-zeppelin/conf/zeppelin-env.sh.template /home/zeppelin/incubator-zeppelin/conf/zeppelin-env.sh
cp /home/zeppelin/incubator-zeppelin/conf/zeppelin-env.sh.template /home/zeppelin/incubator-zeppelin/conf/zeppelin-env.sh
```
Set the following properties
@ -127,7 +127,7 @@ Zeppelin supports Hive interpreter and hence copy hive-site.xml that should be p
cp /etc/hive/conf/hive-site.xml /home/zeppelin/incubator-zeppelin/conf
```
Once Zeppelin server has started successfully, visit http://[zeppelin-server-host-name]:8080 with your web browser. Click on Interpreter tab next to Notebook dropdown. Look for Hive configurations and set them appropriately. By default hive.hiveserver2.url will be pointing to localhost and hive.hiveserver2.password/hive.hiveserver2.user are set to hive/hive. Set them as per Hive installation on YARN cluster.
Once Zeppelin server has started successfully, visit http://[zeppelin-server-host-name]:8080 with your web browser. Click on Interpreter tab next to Notebook dropdown. Look for Hive configurations and set them appropriately. By default hive.hiveserver2.url will be pointing to localhost and hive.hiveserver2.password/hive.hiveserver2.user are set to hive/hive. Set them as per Hive installation on YARN cluster.
Click on Save button. Once these configurations are updated, Zeppelin will prompt you to restart the interpreter. Accept the prompt and the interpreter will reload the configurations.
### Spark
@ -161,7 +161,7 @@ Click on Save button. Once these configurations are updated, Zeppelin will promp
Spark & Hive notebooks can be written with Zeppelin now. The resulting Spark & Hive jobs will run on configured YARN cluster.
## Debug
Zeppelin does not emit any kind of error messages on web interface when notebook/paragrah is run. If a paragraph fails it only displays ERROR. The reason for failure needs to be looked into log files which is present in logs directory under zeppelin installation base directory. Zeppelin creates a log file for each kind of interpreter.
Zeppelin does not emit any kind of error messages on web interface when notebook/paragraph is run. If a paragraph fails it only displays ERROR. The reason for failure needs to be looked into log files which is present in logs directory under zeppelin installation base directory. Zeppelin creates a log file for each kind of interpreter.
```bash
[zeppelin@zeppelin-3529 logs]$ pwd
@ -172,5 +172,5 @@ total 844
-rw-rw-r-- 1 zeppelin zeppelin 625050 Aug 3 16:05 zeppelin-interpreter-spark-zeppelin-zeppelin-3529.log
-rw-rw-r-- 1 zeppelin zeppelin 200394 Aug 3 21:15 zeppelin-zeppelin-zeppelin-3529.log
-rw-rw-r-- 1 zeppelin zeppelin 16162 Aug 3 14:03 zeppelin-zeppelin-zeppelin-3529.out
[zeppelin@zeppelin-3529 logs]$
[zeppelin@zeppelin-3529 logs]$
```

View file

@ -74,7 +74,7 @@ The **Alluxio** interpreter accepts the following commands.
<tr>
<td>copyFromLocal</td>
<td>copyFromLocal "source path" "remote path"</td>
<td>Copy the specified file specified by "source path" to the path specified by "remote path".
<td>Copy the specified file specified by "source path" to the path specified by "remote path".
This command will fail if "remote path" already exists.</td>
</tr>
<tr>
@ -230,4 +230,4 @@ Following steps are performed:
<center>
![Alluxio Interpreter Example](../assets/themes/zeppelin/img/docs-img/alluxio-example.png)
</center>
</center>

View file

@ -94,7 +94,7 @@ With the `search` command, you can send a search query to Elasticsearch. There a
* You can provide a JSON-formatted query, that is exactly what you provide when you use the REST API of Elasticsearch.
* See [Elasticsearch search API reference document](https://www.elastic.co/guide/en/elasticsearch/reference/current/search.html) for more details about the content of the search queries.
* You can also provide the content of a `query_string`.
* This is a shortcut to a query like that: `{ "query": { "query_string": { "query": "__HERE YOUR QUERY__", "analyze_wildcard": true } } }`
* This is a shortcut to a query like that: `{ "query": { "query_string": { "query": "__HERE YOUR QUERY__", "analyze_wildcard": true } } }`
* See [Elasticsearch query string syntax](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#query-string-syntax) for more details about the content of such a query.
```bash
@ -119,10 +119,10 @@ Examples:
```bash
%elasticsearch
search / { "query": { "match_all": { } } }
%elasticsearch
search /logs { "query": { "query_string": { "query": "request.method:GET AND status:200" } } }
%elasticsearch
search /logs { "aggs": {
"content_length_stats": {
@ -130,7 +130,7 @@ Examples:
"field": "content_length"
}
}
} }
} }
```
* With query_string elements:
@ -138,7 +138,7 @@ Examples:
```bash
%elasticsearch
search /logs request.method:GET AND status:200
%elasticsearch
search /logs (404 AND (POST OR DELETE))
```
@ -178,6 +178,9 @@ Examples:
* With a JSON query:
![Elasticsearch - Search with query](../assets/themes/zeppelin/img/docs-img/elasticsearch-search-json-query-table.png)
* With a JSON query containing a `fields` parameter (for filtering the fields in the response): in this case, all the fields values in the response are arrays, so, after flattening the result, the format of all the field names is `field_name[x]`
![Elasticsearch - Search with query and a fields param](../assets/themes/zeppelin/img/docs-img/elasticsearch-query-with-fields-param.png)
* With a query string:
![Elasticsearch - Search with query string](../assets/themes/zeppelin/img/docs-img/elasticsearch-query-string.png)

View file

@ -33,7 +33,7 @@ At the "Interpreters" menu, you have to create a new Flink interpreter and provi
</tr>
</table>
For more information about Flink configuration, you can find it [here](https://ci.apache.org/projects/flink/flink-docs-release-0.10/setup/config.html).
For more information about Flink configuration, you can find it [here](https://ci.apache.org/projects/flink/flink-docs-release-1.0/setup/config.html).
## How to test it's working
In example, by using the [Zeppelin notebook](https://www.zeppelinhub.com/viewer/notebooks/aHR0cHM6Ly9yYXcuZ2l0aHVidXNlcmNvbnRlbnQuY29tL05GTGFicy96ZXBwZWxpbi1ub3RlYm9va3MvbWFzdGVyL25vdGVib29rcy8yQVFFREs1UEMvbm90ZS5qc29u) is from Till Rohrmann's presentation [Interactive data analysis with Apache Flink](http://www.slideshare.net/tillrohrmann/data-analysis-49806564) for Apache Flink Meetup.

View file

@ -10,7 +10,18 @@ group: manual
[HBase Shell](http://hbase.apache.org/book.html#shell) is a JRuby IRB client for Apache HBase. This interpreter provides all capabilities of Apache HBase shell within Apache Zeppelin. The interpreter assumes that Apache HBase client software has been installed and it can connect to the Apache HBase cluster from the machine on where Apache Zeppelin is installed.
To get start with HBase, please see [HBase Quickstart](https://hbase.apache.org/book.html#quickstart)
> Note: currently only HBase 1.0.x releases are supported.
## HBase release supported
By default, Zeppelin is built against HBase 1.0.x releases. To work with HBase 1.1.x releases, use the following build command:
```bash
# HBase 1.1.4
mvn clean package -DskipTests -Phadoop-2.6 -Dhadoop.version=2.6.0 -P build-distr -Dhbase.hbase.version=1.1.4 -Dhbase.hadoop.version=2.6.0
```
To work with HBase 1.2.0+, use the following build command:
```bash
# HBase 1.2.0
mvn clean package -DskipTests -Phadoop-2.6 -Dhadoop.version=2.6.0 -P build-distr -Dhbase.hbase.version=1.2.0 -Dhbase.hadoop.version=2.6.0
```
## Configuration
@ -23,7 +34,7 @@ To get start with HBase, please see [HBase Quickstart](https://hbase.apache.org/
<tr>
<td>hbase.home</td>
<td>/usr/lib/hbase</td>
<td>Installation directory of Hbase</td>
<td>Installation directory of HBase, defaults to HBASE_HOME in environment</td>
</tr>
<tr>
<td>hbase.ruby.sources</td>
@ -31,12 +42,31 @@ To get start with HBase, please see [HBase Quickstart](https://hbase.apache.org/
<td>Path to Ruby scripts relative to 'hbase.home'</td>
</tr>
<tr>
<td>hbase.test.mode</td>
<td>zeppelin.hbase.test.mode</td>
<td>false</td>
<td>Disable checks for unit and manual tests</td>
</tr>
</table>
If you want to connect to HBase running on a cluster, you'll need to follow the next step.
### Export HBASE_HOME
In **conf/zeppelin-env.sh**, export `HBASE_HOME` environment variable with your HBase installation path. This ensures `hbase-site.xml` can be loaded.
for example
```bash
export HBASE_HOME=/usr/lib/hbase
```
or, when running with CDH
```bash
export HBASE_HOME="/opt/cloudera/parcels/CDH/lib/hbase"
```
You can optionally export `HBASE_CONF_DIR` instead of `HBASE_HOME` should you have custom HBase configurations.
## Enabling the HBase Shell Interpreter
In a notebook, to enable the **HBase Shell** interpreter, click the **Gear** icon and select **HBase Shell**.

56
docs/interpreter/hdfs.md Normal file
View file

@ -0,0 +1,56 @@
---
layout: page
title: "HDFS File System Interpreter"
description: ""
group: manual
---
{% include JB/setup %}
## HDFS File System Interpreter for Apache Zeppelin
[Hadoop File System](http://hadoop.apache.org/) is a distributed, fault tolerant file system part of the hadoop project and is often used as storage for distributed processing engines like [Hadoop MapReduce](http://hadoop.apache.org/) and [Apache Spark](http://spark.apache.org/) or underlying file systems like [Alluxio](http://www.alluxio.org/).
## Configuration
<table class="table-configuration">
<tr>
<th>Property</th>
<th>Default</th>
<th>Description</th>
</tr>
<tr>
<td>hdfs.url</td>
<td>http://localhost:50070/webhdfs/v1/</td>
<td>The URL for WebHDFS</td>
</tr>
<tr>
<td>hdfs.user</td>
<td>hdfs</td>
<td>The WebHDFS user</td>
</tr>
<tr>
<td>hdfs.maxlength</td>
<td>1000</td>
<td>Maximum number of lines of results fetched</td>
</tr>
</table>
<br/>
This interpreter connects to HDFS using the HTTP WebHDFS interface.
It supports the basic shell file commands applied to HDFS, it currently only supports browsing.
* You can use <i>ls [PATH]</i> and <i>ls -l [PATH]</i> to list a directory. If the path is missing, then the current directory is listed. <i>ls </i> supports a <i>-h</i> flag for human readable file sizes.
* You can use <i>cd [PATH]</i> to change your current directory by giving a relative or an absolute path.
* You can invoke <i>pwd</i> to see your current directory.
> **Tip :** Use ( Ctrl + . ) for autocompletion.
### Create Interpreter
In a notebook, to enable the **HDFS** interpreter, click the **Gear** icon and select **HDFS**.
#### WebHDFS REST API
You can confirm that you're able to access the WebHDFS API by running a curl command against the WebHDFS end point provided to the interpreter.
Here is an example:
$> curl "http://localhost:50070/webhdfs/v1/?op=LISTSTATUS"

View file

@ -44,7 +44,7 @@ The [Apache Hive](https://hive.apache.org/) ™ data warehouse software facilita
<tr>
<td>${prefix}.driver</td>
<td></td>
<td>Driver class path of <code>%hive(${prefix})</code> </td>
<td>Driver class path of <code>%hive(${prefix})</code> </td>
</tr>
<tr>
<td>${prefix}.url</td>
@ -93,9 +93,9 @@ You can leverage [Zeppelin Dynamic Form]({{BASE_PATH}}/manual/dynamicform.html)
```sql
%hive
SELECT ${group_by}, count(*) as count
FROM retail_demo.order_lineitems_pxf
GROUP BY ${group_by=product_id,product_id|product_name|customer_id|store_id}
ORDER BY count ${order=DESC,DESC|ASC}
SELECT ${group_by}, count(*) as count
FROM retail_demo.order_lineitems_pxf
GROUP BY ${group_by=product_id,product_id|product_name|customer_id|store_id}
ORDER BY count ${order=DESC,DESC|ASC}
LIMIT ${limit=10};
```

View file

@ -18,17 +18,17 @@ You can use Zeppelin to retrieve distributed data from cache using Ignite SQL in
### Installing and Running Ignite example
In order to use Ignite interpreters, you may install Apache Ignite in some simple steps:
1. Download Ignite [source release](https://ignite.apache.org/download.html#sources) or [binary release](https://ignite.apache.org/download.html#binaries) whatever you want. But you must download Ignite as the same version of Zeppelin's. If it is not, you can't use scala code on Zeppelin. You can find ignite version in Zepplin at the pom.xml which is placed under `path/to/your-Zeppelin/ignite/pom.xml` ( Of course, in Zeppelin source release ). Please check `ignite.version` .<br>Currently, Zeppelin provides ignite only in Zeppelin source release. So, if you download Zeppelin binary release( `zeppelin-0.5.0-incubating-bin-spark-xxx-hadoop-xx` ), you can not use ignite interpreter on Zeppelin. We are planning to include ignite in a future binary release.
2. Examples are shipped as a separate Maven project, so to start running you simply need to import provided <dest_dir>/apache-ignite-fabric-1.2.0-incubating-bin/pom.xml file into your favourite IDE, such as Eclipse.
1. Download Ignite [source release](https://ignite.apache.org/download.html#sources) or [binary release](https://ignite.apache.org/download.html#binaries) whatever you want. But you must download Ignite as the same version of Zeppelin's. If it is not, you can't use scala code on Zeppelin. You can find ignite version in Zeppelin at the pom.xml which is placed under `path/to/your-Zeppelin/ignite/pom.xml` ( Of course, in Zeppelin source release ). Please check `ignite.version` .<br>Currently, Zeppelin provides ignite only in Zeppelin source release. So, if you download Zeppelin binary release( `zeppelin-0.5.0-incubating-bin-spark-xxx-hadoop-xx` ), you can not use ignite interpreter on Zeppelin. We are planning to include ignite in a future binary release.
2. Examples are shipped as a separate Maven project, so to start running you simply need to import provided <dest_dir>/apache-ignite-fabric-1.2.0-incubating-bin/pom.xml file into your favourite IDE, such as Eclipse.
* In case of Eclipse, Eclipse -> File -> Import -> Existing Maven Projects
* Set examples directory path to Eclipse and select the pom.xml.
* Then start `org.apache.ignite.examples.ExampleNodeStartup` (or whatever you want) to run at least one or more ignite node. When you run example code, you may notice that the number of node is increase one by one.
* Then start `org.apache.ignite.examples.ExampleNodeStartup` (or whatever you want) to run at least one or more ignite node. When you run example code, you may notice that the number of node is increase one by one.
> **Tip. If you want to run Ignite examples on the cli not IDE, you can export executable Jar file from IDE. Then run it by using below command.**
```
$ nohup java -jar </path/to/your Jar file name>
```
$ nohup java -jar </path/to/your Jar file name>
```
### Configuring Ignite Interpreter
@ -78,17 +78,17 @@ For more interpreter binding information see [here](http://zeppelin.incubator.ap
### How to use Ignite SQL interpreter
In order to execute SQL query, use ` %ignite.ignitesql ` prefix. <br>
Supposing you are running `org.apache.ignite.examples.streaming.wordcount.StreamWords`, then you can use "words" cache( Of course you have to specify this cache name to the Ignite interpreter setting section `ignite.jdbc.url` of Zeppelin ).
Supposing you are running `org.apache.ignite.examples.streaming.wordcount.StreamWords`, then you can use "words" cache( Of course you have to specify this cache name to the Ignite interpreter setting section `ignite.jdbc.url` of Zeppelin ).
For example, you can select top 10 words in the words cache using the following query
```
%ignite.ignitesql
select _val, count(_val) as cnt from String group by _val order by cnt desc limit 10
```
```
%ignite.ignitesql
select _val, count(_val) as cnt from String group by _val order by cnt desc limit 10
```
![IgniteSql on Zeppelin](../assets/themes/zeppelin/img/docs-img/ignite-sql-example.png)
As long as your Ignite version and Zeppelin Ignite version is same, you can also use scala code. Please check the Zeppelin Ignite version before you download your own Ignite.
As long as your Ignite version and Zeppelin Ignite version is same, you can also use scala code. Please check the Zeppelin Ignite version before you download your own Ignite.
```
%ignite

View file

@ -17,7 +17,7 @@ group: manual
In order to use Lens interpreters, you may install Apache Lens in some simple steps:
1. Download Lens for latest version from [the ASF](http://www.apache.org/dyn/closer.lua/lens/2.3-beta). Or the older release can be found [in the Archives](http://archive.apache.org/dist/lens/).
2. Before running Lens, you have to set HIVE_HOME and HADOOP_HOME. If you want to get more information about this, please refer to [here](http://lens.apache.org/lenshome/install-and-run.html#Installation). Lens also provides Pseudo Distributed mode. [Lens pseudo-distributed setup](http://lens.apache.org/lenshome/pseudo-distributed-setup.html) is done by using [docker](https://www.docker.com/). Hive server and hadoop daemons are run as separate processes in lens pseudo-distributed setup.
2. Before running Lens, you have to set HIVE_HOME and HADOOP_HOME. If you want to get more information about this, please refer to [here](http://lens.apache.org/lenshome/install-and-run.html#Installation). Lens also provides Pseudo Distributed mode. [Lens pseudo-distributed setup](http://lens.apache.org/lenshome/pseudo-distributed-setup.html) is done by using [docker](https://www.docker.com/). Hive server and hadoop daemons are run as separate processes in lens pseudo-distributed setup.
3. Now, you can start lens server (or stop).
```
@ -77,16 +77,16 @@ At the "Interpreters" menu, you can edit Lens interpreter or create new one. Zep
![Apache Lens Interpreter Setting](../assets/themes/zeppelin/img/docs-img/lens-interpreter-setting.png)
### Interpreter Bindging for Zeppelin Notebook
### Interpreter Binding for Zeppelin Notebook
After configuring Lens interpreter, create your own notebook, then you can bind interpreters like below image.
![Zeppelin Notebook Interpreter Biding](../assets/themes/zeppelin/img/docs-img/lens-interpreter-binding.png)
![Zeppelin Notebook Interpreter Binding](../assets/themes/zeppelin/img/docs-img/lens-interpreter-binding.png)
For more interpreter binding information see [here](http://zeppelin.incubator.apache.org/docs/manual/interpreters.html).
### How to use
You can analyze your data by using [OLAP Cube](http://lens.apache.org/user/olap-cube.html) [QL](http://lens.apache.org/user/cli.html) which is a high level SQL like language to query and describe data sets organized in data cubes.
You may experience OLAP Cube like this [Video tutorial](https://cwiki.apache.org/confluence/display/LENS/2015/07/13/20+Minute+video+demo+of+Apache+Lens+through+examples).
### How to use
You can analyze your data by using [OLAP Cube](http://lens.apache.org/user/olap-cube.html) [QL](http://lens.apache.org/user/cli.html) which is a high level SQL like language to query and describe data sets organized in data cubes.
You may experience OLAP Cube like this [Video tutorial](https://cwiki.apache.org/confluence/display/LENS/2015/07/13/20+Minute+video+demo+of+Apache+Lens+through+examples).
As you can see in this video, they are using Lens Client Shell(./bin/lens-cli.sh). All of these functions also can be used on Zeppelin by using Lens interpreter.
<li> Create and Use(Switch) Databases.
@ -105,7 +105,7 @@ use newDb
create storage your/path/to/lens/client/examples/resources/db-storage.xml
```
<li> Create Dimensions, Show fields and join-chains of them.
<li> Create Dimensions, Show fields and join-chains of them.
```
create dimension your/path/to/lens/client/examples/resources/customer.xml
@ -121,8 +121,8 @@ dimension show joinchains customer
<li> Create Caches, Show fields and join-chains of them.
```
create cube your/path/to/lens/client/examples/resources/sales-cube.xml
```
create cube your/path/to/lens/client/examples/resources/sales-cube.xml
```
```
@ -133,7 +133,7 @@ cube show fields sales
cube show joinchains sales
```
<li> Create Dimtables and Fact.
<li> Create Dimtables and Fact.
```
create dimtable your/path/to/lens/client/examples/resources/customer_table.xml
@ -163,7 +163,7 @@ query execute cube select customer_city_name, product_details.description, produ
These are just examples that provided in advance by Lens. If you want to explore whole tutorials of Lens, see the [tutorial video](https://cwiki.apache.org/confluence/display/LENS/2015/07/13/20+Minute+video+demo+of+Apache+Lens+through+examples).
### Lens UI Service
### Lens UI Service
Lens also provides web UI service. Once the server starts up, you can open the service on http://serverhost:19999/index.html and browse. You may also check the structure that you made and use query easily here.
![Lens UI Servive](../assets/themes/zeppelin/img/docs-img/lens-ui-service.png)
![Lens UI Service](../assets/themes/zeppelin/img/docs-img/lens-ui-service.png)

107
docs/interpreter/livy.md Normal file
View file

@ -0,0 +1,107 @@
---
layout: page
title: "Livy Interpreter"
description: ""
group: manual
---
{% include JB/setup %}
## Livy Interpreter for Apache Zeppelin
Livy is an open source REST interface for interacting with Spark from anywhere. It supports executing snippets of code or programs in a Spark context that runs locally or in YARN.
* Interactive Scala, Python and R shells
* Batch submissions in Scala, Java, Python
* Multi users can share the same server (impersonation support)
* Can be used for submitting jobs from anywhere with REST
* Does not require any code change to your programs
### Requirements
Additional requirements for the Livy interpreter are:
* Spark 1.3 or above.
* Livy server.
### Configuration
<table class="table-configuration">
<tr>
<th>Property</th>
<th>Default</th>
<th>Description</th>
</tr>
<tr>
<td>zeppelin.livy.master</td>
<td>local[*]</td>
<td>Spark master uri. ex) spark://masterhost:7077</td>
</tr>
<tr>
<td>zeppelin.livy.url</td>
<td>http://localhost:8998</td>
<td>URL where livy server is running</td>
</tr>
<tr>
<td>zeppelin.livy.spark.maxResult</td>
<td>1000</td>
<td>Max number of SparkSQL result to display.</td>
</tr>
</table>
## How to use
Basically, you can use
**spark**
```
%livy.spark
sc.version
```
**pyspark**
```
%livy.pyspark
print "1"
```
**sparkR**
```
%livy.sparkr
hello <- function( name ) {
sprintf( "Hello, %s", name );
}
hello("livy")
```
## Impersonation
When Zeppelin server is running with authentication enabled, then this interpreter utilizes Livys user impersonation feature i.e. sends extra parameter for creating and running a session ("proxyUser": "${loggedInUser}"). This is particularly useful when multi users are sharing a Notebook server.
### Apply Zeppelin Dynamic Forms
You can leverage [Zeppelin Dynamic Form]({{BASE_PATH}}/manual/dynamicform.html). You can use both the `text input` and `select form` parameterization features.
```
%livy.pyspark
print "${group_by=product_id,product_id|product_name|customer_id|store_id}"
```
## FAQ
Livy debugging: If you see any of these in error console
> Connect to livyhost:8998 [livyhost/127.0.0.1, livyhost/0:0:0:0:0:0:0:1] failed: Connection refused
Looks like the livy server is not up yet or the config is wrong
> Exception: Session not found, Livy server would have restarted, or lost session.
The session would have timed out, you may need to restart the interpreter.
> Blacklisted configuration values in session config: spark.master
edit `conf/spark-blacklist.conf` file in livy server and comment out `#spark.master` line.

137
docs/interpreter/r.md Normal file
View file

@ -0,0 +1,137 @@
---
layout: page
title: "R Interpreter"
description: ""
group: manual
---
{% include JB/setup %}
## R Interpreter
This is a the Apache (incubating) Zeppelin project, with the addition of support for the R programming language and R-spark integration.
### Requirements
Additional requirements for the R interpreter are:
* R 3.1 or later (earlier versions may work, but have not been tested)
* The `evaluate` R package.
For full R support, you will also need the following R packages:
* `knitr`
* `repr` -- available with `devtools::install_github("IRkernel/repr")`
* `htmltools` -- required for some interactive plotting
* `base64enc` -- required to view R base plots
### Configuration
To run Zeppelin with the R Interpreter, the SPARK_HOME environment variable must be set. The best way to do this is by editing `conf/zeppelin-env.sh`.
If it is not set, the R Interpreter will not be able to interface with Spark.
You should also copy `conf/zeppelin-site.xml.template` to `conf/zeppelin-site.xml`. That will ensure that Zeppelin sees the R Interpreter the first time it starts up.
### Using the R Interpreter
By default, the R Interpreter appears as two Zeppelin Interpreters, `%r` and `%knitr`.
`%r` will behave like an ordinary REPL. You can execute commands as in the CLI.
[![2+2](screenshots/repl2plus2.png)](screenshots/repl2plus2.png)
R base plotting is fully supported
[![replhist](screenshots/replhist.png)](screenshots/replhist.png)
If you return a data.frame, Zeppelin will attempt to display it using Zeppelin's built-in visualizations.
[![replhist](screenshots/replhead.png)](screenshots/replhead.png)
`%knitr` interfaces directly against `knitr`, with chunk options on the first line:
[![knitgeo](screenshots/knitgeo.png)](screenshots/knitgeo.png)
[![knitstock](screenshots/knitstock.png)](screenshots/knitstock.png)
[![knitmotion](screenshots/knitmotion.png)](screenshots/knitmotion.png)
The two interpreters share the same environment. If you define a variable from `%r`, it will be within-scope if you then make a call using `knitr`.
### Using SparkR & Moving Between Languages
If `SPARK_HOME` is set, the `SparkR` package will be loaded automatically:
[![sparkrfaithful](screenshots/sparkrfaithful.png)](screenshots/sparkrfaithful.png)
The Spark Context and SQL Context are created and injected into the local environment automatically as `sc` and `sql`.
The same context are shared with the `%spark`, `%sql` and `%pyspark` interpreters:
[![backtoscala](screenshots/backtoscala.png)](screenshots/backtoscala.png)
You can also make an ordinary R variable accessible in scala and Python:
[![varr1](screenshots/varr1.png)](screenshots/varr1.png)
And vice versa:
[![varscala](screenshots/varscala.png)](screenshots/varscala.png)
[![varr2](screenshots/varr2.png)](screenshots/varr2.png)
### Caveats & Troubleshooting
* Almost all issues with the R interpreter turned out to be caused by an incorrectly set `SPARK_HOME`. The R interpreter must load a version of the `SparkR` package that matches the running version of Spark, and it does this by searching `SPARK_HOME`. If Zeppelin isn't configured to interface with Spark in `SPARK_HOME`, the R interpreter will not be able to connect to Spark.
* The `knitr` environment is persistent. If you run a chunk from Zeppelin that changes a variable, then run the same chunk again, the variable has already been changed. Use immutable variables.
* (Note that `%spark.r` and `$r` are two different ways of calling the same interpreter, as are `%spark.knitr` and `%knitr`. By default, Zeppelin puts the R interpreters in the `%spark.` Interpreter Group.
* Using the `%r` interpreter, if you return a data.frame, HTML, or an image, it will dominate the result. So if you execute three commands, and one is `hist()`, all you will see is the histogram, not the results of the other commands. This is a Zeppelin limitation.
* If you return a data.frame (for instance, from calling `head()`) from the `%spark.r` interpreter, it will be parsed by Zeppelin's built-in data visualization system.
* Why `knitr` Instead of `rmarkdown`? Why no `htmlwidgets`? In order to support `htmlwidgets`, which has indirect dependencies, `rmarkdown` uses `pandoc`, which requires writing to and reading from disc. This makes it many times slower than `knitr`, which can operate entirely in RAM.
* Why no `ggvis` or `shiny`? Supporting `shiny` would require integrating a reverse-proxy into Zeppelin, which is a task.
* Max OS X & case-insensitive filesystem. If you try to install on a case-insensitive filesystem, which is the Mac OS X default, maven can unintentionally delete the install directory because `r` and `R` become the same subdirectory.
* Error `unable to start device X11` with the repl interpreter. Check your shell login scripts to see if they are adjusting the `DISPLAY` environment variable. This is common on some operating systems as a workaround for ssh issues, but can interfere with R plotting.
* akka Library Version or `TTransport` errors. This can happen if you try to run Zeppelin with a SPARK_HOME that has a version of Spark other than the one specified with `-Pspark-1.x` when Zeppelin was compiled.
## R Interpreter for Apache Zeppelin
[R](https://www.r-project.org) is a free software environment for statistical computing and graphics.
To run R code and visualize plots in Apache Zeppelin, you will need R on your master node (or your dev laptop).
+ For Centos: `yum install R R-devel libcurl-devel openssl-devel`
+ For Ubuntu: `apt-get install r-base`
Validate your installation with a simple R command:
```
R -e "print(1+1)"
```
To enjoy plots, install additional libraries with:
```
+ devtools with `R -e "install.packages('devtools', repos = 'http://cran.us.r-project.org')"`
+ knitr with `R -e "install.packages('knitr', repos = 'http://cran.us.r-project.org')"`
+ ggplot2 with `R -e "install.packages('ggplot2', repos = 'http://cran.us.r-project.org')"`
+ Other vizualisation librairies: `R -e "install.packages(c('devtools','mplot', 'googleVis'), repos = 'http://cran.us.r-project.org'); require(devtools); install_github('ramnathv/rCharts')"`
```
We recommend you to also install the following optional R libraries for happy data analytics:
+ glmnet
+ pROC
+ data.table
+ caret
+ sqldf
+ wordcloud

View file

@ -72,6 +72,6 @@ If you click on the icon for the pie chart, you should be able to see a chart li
![Scalding - Pie - Chart](../assets/themes/zeppelin/img/docs-img/scalding-pie.png)
### Current Status & Future Work
The current implementation of the Scalding interpreter does not support canceling jobs, or fine-grained progress updates.
The current implementation of the Scalding interpreter does not support canceling jobs, or fine-grained progress updates.
The pre-configured Scalding interpreter only supports Scalding in local mode. Hadoop mode for Scalding is currently unsupported, and will be future work (contributions welcome!).

Binary file not shown.

After

Width:  |  Height:  |  Size: 36 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 58 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 33 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 106 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 42 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 31 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 51 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

View file

@ -8,7 +8,7 @@ group: manual
## Spark Interpreter for Apache Zeppelin
[Apache Spark](http://spark.apache.org) is supported in Zeppelin with
[Apache Spark](http://spark.apache.org) is supported in Zeppelin with
Spark Interpreter group, which consisted of 4 interpreters.
<table class="table-configuration">
@ -20,17 +20,22 @@ Spark Interpreter group, which consisted of 4 interpreters.
<tr>
<td>%spark</td>
<td>SparkInterpreter</td>
<td>Creates SparkContext and provides scala environment</td>
<td>Creates a SparkContext and provides a scala environment</td>
</tr>
<tr>
<td>%pyspark</td>
<td>PySparkInterpreter</td>
<td>Provides python environment</td>
<td>Provides a python environment</td>
</tr>
<tr>
<td>%r</td>
<td>SparkRInterpreter</td>
<td>Provides an R environment with SparkR support</td>
</tr>
<tr>
<td>%sql</td>
<td>SparkSQLInterpreter</td>
<td>Provides SQL environment</td>
<td>Provides a SQL environment</td>
</tr>
<tr>
<td>%dep</td>
@ -40,6 +45,74 @@ Spark Interpreter group, which consisted of 4 interpreters.
</table>
## Configuration
The Spark interpreter can be configured with properties provided by Zeppelin.
You can also set other Spark properties which are not listed in the table. For a list of additional properties, refer to [Spark Available Properties](http://spark.apache.org/docs/latest/configuration.html#available-properties).
<table class="table-configuration">
<tr>
<th>Property</th>
<th>Default</th>
<th>Description</th>
</tr>
<tr>
<td>args</td>
<td></td>
<td>Spark commandline args</td>
</tr>
<td>master</td>
<td>local[*]</td>
<td>Spark master uri. <br/> ex) spark://masterhost:7077</td>
<tr>
<td>spark.app.name</td>
<td>Zeppelin</td>
<td>The name of spark application.</td>
</tr>
<tr>
<td>spark.cores.max</td>
<td></td>
<td>Total number of cores to use. <br/> Empty value uses all available core.</td>
</tr>
<tr>
<td>spark.executor.memory </td>
<td>512m</td>
<td>Executor memory per worker instance. <br/> ex) 512m, 32g</td>
</tr>
<tr>
<td>zeppelin.dep.additionalRemoteRepository</td>
<td>spark-packages, <br/> http://dl.bintray.com/spark-packages/maven, <br/> false;</td>
<td>A list of `id,remote-repository-URL,is-snapshot;` <br/> for each remote repository.</td>
</tr>
<tr>
<td>zeppelin.dep.localrepo</td>
<td>local-repo</td>
<td>Local repository for dependency loader</td>
</tr>
<tr>
<td>zeppelin.pyspark.python</td>
<td>python</td>
<td>Python command to run pyspark with</td>
</tr>
<tr>
<td>zeppelin.spark.concurrentSQL</td>
<td>false</td>
<td>Execute multiple SQL concurrently if set true.</td>
</tr>
<tr>
<td>zeppelin.spark.maxResult</td>
<td>1000</td>
<td>Max number of SparkSQL result to display.</td>
</tr>
<tr>
<td>zeppelin.spark.printREPLOutput</td>
<td>true</td>
<td>Print REPL output</td>
</tr>
<tr>
<td>zeppelin.spark.useHiveContext</td>
<td>true</td>
<td>Use HiveContext instead of SQLContext if it is true.</td>
</tr>
</table>
Without any configuration, Spark interpreter works out of box in local mode. But if you want to connect to your Spark cluster, you'll need to follow below two simple steps.
### 1. Export SPARK_HOME
@ -58,6 +131,8 @@ export HADOOP_CONF_DIR=/usr/lib/hadoop
export SPARK_SUBMIT_OPTIONS="--packages com.databricks:spark-csv_2.10:1.2.0"
```
For Windows, ensure you have `winutils.exe` in `%HADOOP_HOME%\bin`. For more details please see [Problems running Hadoop on Windows](https://wiki.apache.org/hadoop/WindowsProblems)
### 2. Set master in Interpreter menu
After start Zeppelin, go to **Interpreter** menu and edit **master** property in your Spark interpreter setting. The value may vary depending on your Spark cluster deployment type.
@ -203,13 +278,13 @@ z.put("objName", myObject)
%pyspark
myObject = z.get("objName")
{% endhighlight %}
</div>
</div>
### Form Creation
ZeppelinContext provides functions for creating forms.
ZeppelinContext provides functions for creating forms.
In scala and python environments, you can create forms programmatically.
<div class="codetabs">
<div data-lang="scala" markdown="1">
@ -236,13 +311,13 @@ z.select("formName", "option1", Seq(("option1", "option1DisplayName"),
{% highlight python %}
%pyspark
# Create text input form
# Create text input form
z.input("formName")
# Create text input form with default value
# Create text input form with default value
z.input("formName", "defaultValue")
# Create select form
# Create select form
z.select("formName", [("option1", "option1DisplayName"),
("option2", "option2DisplayName")])
@ -250,7 +325,7 @@ z.select("formName", [("option1", "option1DisplayName"),
z.select("formName", [("option1", "option1DisplayName"),
("option2", "option2DisplayName")], "option1")
{% endhighlight %}
</div>
</div>
@ -264,12 +339,13 @@ select * from ${table=defaultTableName} where text like '%${search}%'
To learn more about dynamic form, checkout [Dynamic Form](../manual/dynamicform.html).
### Separate Interpreter for each note
### Interpreter setting option.
Interpreter setting can choose one of 'shared', 'scoped', 'isolated' option. Spark interpreter creates separate scala compiler per each notebook but share a single SparkContext in 'scoped' mode (experimental). It creates separate SparkContext per each notebook in 'isolated' mode.
In 'Separate Interpreter for each note' mode, SparkInterpreter creates scala compiler per each notebook. However it still shares the single SparkContext.
## Setting up Zeppelin with Kerberos
Logical setup with Zeppelin, Kerberos Distribution Center (KDC), and Spark on YARN:
Logical setup with Zeppelin, Kerberos Key Distribution Center (KDC), and Spark on YARN:
<img src="../assets/themes/zeppelin/img/docs-img/kdc_zeppelin.png">
@ -289,5 +365,3 @@ This is to make the server communicate with KDC.
> **NOTE:** If you do not have access to the above spark-defaults.conf file, optionally, you may add the lines to the Spark Interpreter through the Interpreter tab in the Zeppelin UI.
4. That's it. Play with Zeppelin !

View file

@ -1,6 +1,6 @@
---
layout: page
title: "Dependnecy Management"
title: "Dependency Management"
description: ""
group: manual
---
@ -71,4 +71,3 @@ When your code requires external library, instead of doing download/copy/restart
</ol>
</div>
</div>

View file

@ -34,12 +34,12 @@ To create text input form, use _${formName}_ templates.
for example
<img src="/assets/themes/zeppelin/img/screenshots/form_input.png" />
<img src="../assets/themes/zeppelin/img/screenshots/form_input.png" />
Also you can provide default value, using _${formName=defaultValue}_.
<img src="/assets/themes/zeppelin/img/screenshots/form_input_default.png" />
<img src="../assets/themes/zeppelin/img/screenshots/form_input_default.png" />
#### Select form
@ -48,21 +48,21 @@ To create select form, use _${formName=defaultValue,option1|option2...}_
for example
<img src="/assets/themes/zeppelin/img/screenshots/form_select.png" />
<img src="../assets/themes/zeppelin/img/screenshots/form_select.png" />
Also you can separate option's display name and value, using _${formName=defaultValue,option1(DisplayName)|option2(DisplayName)...}_
<img src="/assets/themes/zeppelin/img/screenshots/form_select_displayname.png" />
<img src="../assets/themes/zeppelin/img/screenshots/form_select_displayname.png" />
#### Checkbox form
For multi-selection, you can create a checkbox form using _${checkbox:formName=defaultValue1|defaultValue2...,option1|option2...}_. The variable will be substituted by a comma-separated string based on the selected items. For example:
<img src="/assets/themes/zeppelin/img/screenshots/form_checkbox.png">
<img src="../assets/themes/zeppelin/img/screenshots/form_checkbox.png">
Besides, you can specify the delimiter using _${checkbox(delimiter):formName=...}_:
<img src="/assets/themes/zeppelin/img/screenshots/form_checkbox_delimiter.png">
<img src="../assets/themes/zeppelin/img/screenshots/form_checkbox_delimiter.png">
### Creates Programmatically
@ -89,7 +89,7 @@ print("Hello "+z.input("name"))
</div>
</div>
<img src="/assets/themes/zeppelin/img/screenshots/form_input_prog.png" />
<img src="../assets/themes/zeppelin/img/screenshots/form_input_prog.png" />
####Text input form with default value
<div class="codetabs">
@ -110,7 +110,7 @@ print("Hello "+z.input("name", "sun"))
</div>
</div>
<img src="/assets/themes/zeppelin/img/screenshots/form_input_default_prog.png" />
<img src="../assets/themes/zeppelin/img/screenshots/form_input_default_prog.png" />
####Select form
<div class="codetabs">
@ -143,7 +143,7 @@ print("Hello "+z.select("day", [("1","mon"),
</div>
</div>
<img src="/assets/themes/zeppelin/img/screenshots/form_select_prog.png" />
<img src="../assets/themes/zeppelin/img/screenshots/form_select_prog.png" />
#### Checkbox form
<div class="codetabs">
@ -166,4 +166,4 @@ print("Hello "+ " and ".join(z.checkbox("fruit", options, ["apple"])))
</div>
</div>
<img src="/assets/themes/zeppelin/img/screenshots/form_checkbox_prog.png" />
<img src="../assets/themes/zeppelin/img/screenshots/form_checkbox_prog.png" />

View file

@ -29,16 +29,18 @@ Zeppelin Interpreter is a plug-in which enables Zeppelin users to use a specific
When you click the ```+Create``` button in the interpreter page, the interpreter drop-down list box will show all the available interpreters on your server.
<img src="/assets/themes/zeppelin/img/screenshots/interpreter_create.png">
<img src="../assets/themes/zeppelin/img/screenshots/interpreter_create.png">
## What is Zeppelin Interpreter Setting?
Zeppelin interpreter setting is the configuration of a given interpreter on Zeppelin server. For example, the properties are required for hive JDBC interpreter to connect to the Hive server.
Zeppelin interpreter setting is the configuration of a given interpreter on Zeppelin server. For example, the properties are required for hive JDBC interpreter to connect to the Hive server.
<img src="/assets/themes/zeppelin/img/screenshots/interpreter_setting.png">
<img src="../assets/themes/zeppelin/img/screenshots/interpreter_setting.png">
Each notebook can be binded to multiple Interpreter Settings using setting icon on upper right corner of the notebook.
Properties are exported as environment variable when property name is consisted of upper characters, numbers and underscore ([A-Z_0-9]). Otherwise set properties as JVM property.
<img src="/assets/themes/zeppelin/img/screenshots/interpreter_binding.png" width="800px">
Each notebook can be bound to multiple Interpreter Settings using setting icon on upper right corner of the notebook.
<img src="../assets/themes/zeppelin/img/screenshots/interpreter_binding.png" width="800px">
@ -49,13 +51,12 @@ By default, every interpreter is belonged to a single group, but the group might
Technically, Zeppelin interpreters from the same group are running in the same JVM. For more information about this, please checkout [here](../development/writingzeppelininterpreter.html).
Each interpreters is belonged to a single group and registered together. All of their properties are listed in the interpreter setting like below image.
<img src="/assets/themes/zeppelin/img/screenshots/interpreter_setting_spark.png">
<img src="../assets/themes/zeppelin/img/screenshots/interpreter_setting_spark.png">
## Interpreter binding mode
Each Interpreter Setting can choose one of two different interpreter binding mode.
Shared mode (default) and 'Separate Interpreter for each note' mode. In shared mode, every notebook binded to the Interpreter Setting will share the single Interpreter instance. In 'Separate Interpreter for each note' mode, each notebook will create new Interpreter instance. Therefore each notebook will have fresh new Interpreter environment.
<img src="/assets/themes/zeppelin/img/screenshots/interpreter_persession.png" width="400px">
Each Interpreter Setting can choose one of 'shared', 'scoped', 'isolated' interpreter binding mode.
In 'shared' mode, every notebook bound to the Interpreter Setting will share the single Interpreter instance. In 'scoped' mode, each notebook will create new Interpreter instance in the same interpreter process. In 'isolated' mode, each notebook will create new Interpreter process.
<img src="../assets/themes/zeppelin/img/screenshots/interpreter_persession.png" width="400px">

View file

@ -21,66 +21,66 @@ limitations under the License.
## Customize your zeppelin homepage
Zeppelin allows you to use one of the notebooks you create as your zeppelin Homepage.
With that you can brand your zeppelin installation,
With that you can brand your zeppelin installation,
adjust the instruction to your users needs and even translate to other languages.
<br />
### How to set a notebook as your zeppelin homepage
The process for creating your homepage is very simple as shown below:
1. Create a notebook using zeppelin
2. Set the notebook id in the config file
3. Restart zeppelin
<br />
#### Create a notebook using zeppelin
Create a new notebook using zeppelin,
you can use ```%md``` interpreter for markdown content or any other interpreter you like.
You can also use the display system to generate [text](../displaysystem/display.html),
You can also use the display system to generate [text](../displaysystem/display.html),
[html](../displaysystem/display.html#html),[table](../displaysystem/table.html) or
[angular](../displaysystem/angular.html)
Run (shift+Enter) the notebook and see the output. Optionally, change the notebook view to report to hide
Run (shift+Enter) the notebook and see the output. Optionally, change the notebook view to report to hide
the code sections.
<br />
#### Set the notebook id in the config file
To set the notebook id in the config file you should copy it from the last word in the notebook url
To set the notebook id in the config file you should copy it from the last word in the notebook url
for example
<img src="/assets/themes/zeppelin/img/screenshots/homepage_notebook_id.png" />
Set the notebook id to the ```ZEPPELIN_NOTEBOOK_HOMESCREEN``` environment variable
or ```zeppelin.notebook.homescreen``` property.
You can also set the ```ZEPPELIN_NOTEBOOK_HOMESCREEN_HIDE``` environment variable
Set the notebook id to the ```ZEPPELIN_NOTEBOOK_HOMESCREEN``` environment variable
or ```zeppelin.notebook.homescreen``` property.
You can also set the ```ZEPPELIN_NOTEBOOK_HOMESCREEN_HIDE``` environment variable
or ```zeppelin.notebook.homescreen.hide``` property to hide the new notebook from the notebook list.
<br />
#### Restart zeppelin
Restart your zeppelin server
```
./bin/zeppelin-deamon stop
./bin/zeppelin-deamon stop
./bin/zeppelin-deamon start
```
####That's it! Open your browser and navigate to zeppelin and see your customized homepage...
<br />
### Show notebooks list in your custom homepage
If you want to display the list of notebooks on your custom zeppelin homepage all
If you want to display the list of notebooks on your custom zeppelin homepage all
you need to do is use our %angular support.
<br />
Add the following code to a paragraph in you home page and run it... walla! you have your notebooks list.
```javascript
println(
"""%angular
"""%angular
<div class="col-md-4" ng-controller="HomeCtrl as home">
<h4>Notebooks</h4>
<div>
@ -88,22 +88,22 @@ you need to do is use our %angular support.
<i style="font-size: 15px;" class="icon-notebook"></i> Create new note</a></h5>
<ul style="list-style-type: none;">
<li ng-repeat="note in home.notes.list track by $index"><i style="font-size: 10px;" class="icon-doc"></i>
<a style="text-decoration: none;" href="#/notebook/{{note.id}}">{{note.name || 'Note ' + note.id}}</a>
<a style="text-decoration: none;" href="#/notebook/{{note.id}}">{{noteName(note)}}</a>
</li>
</ul>
</div>
</div>
""")
```
After running the notebook you will see output similar to this one:
<img src="/assets/themes/zeppelin/img/screenshots/homepage_notebook_list.png" />
The main trick here relays in linking the ```<div>``` to the controller:
```javascript
<div class="col-md-4" ng-controller="HomeCtrl as home">
```
Once we have ```home``` as our controller variable in our ```<div></div>```
we can use ```home.notes.list``` to get access to the notebook list.
we can use ```home.notes.list``` to get access to the notebook list.

View file

@ -21,18 +21,18 @@ limitations under the License.
## Zeppelin REST API
Zeppelin provides several REST API's for interaction and remote activation of zeppelin functionality.
All REST API are available starting with the following endpoint ```http://[zeppelin-server]:[zeppelin-port]/api```
Note that zeppein REST API receive or return JSON objects, it it recommended you install some JSON viewers such as
Note that Zeppelin REST API receive or return JSON objects, it it recommended you install some JSON viewers such as
[JSONView](https://chrome.google.com/webstore/detail/jsonview/chklaanhfefbnpoihckbnefhakgolnmc)
If you work with zeppelin and find a need for an additional REST API please [file an issue or send us mail](../../community.html)
If you work with zeppelin and find a need for an additional REST API please [file an issue or send us mail](../../community.html)
<br />
### Configuration REST API list
<table class="table-configuration">
<col width="200">
<tr>
@ -41,7 +41,7 @@ limitations under the License.
</tr>
<tr>
<td>Description</td>
<td>This ```GET``` method return all key/value pair of configurations on the server.<br/>
<td>This ```GET``` method return all key/value pair of configurations on the server.<br/>
Note: For security reason, some pairs would not be shown.</td>
</tr>
<tr>
@ -79,7 +79,7 @@ limitations under the License.
"zeppelin.server.context.path":"/",
"zeppelin.ssl.keystore.type":"JKS",
"zeppelin.ssl.truststore.path":"truststore",
"zeppelin.interpreters":"org.apache.zeppelin.spark.SparkInterpreter,org.apache.zeppelin.spark.PySparkInterpreter,org.apache.zeppelin.spark.SparkSqlInterpreter,org.apache.zeppelin.spark.DepInterpreter,org.apache.zeppelin.markdown.Markdown,org.apache.zeppelin.angular.AngularInterpreter,org.apache.zeppelin.shell.ShellInterpreter,org.apache.zeppelin.hive.HiveInterpreter,org.apache.zeppelin.tajo.TajoInterpreter,org.apache.zeppelin.flink.FlinkInterpreter,org.apache.zeppelin.lens.LensInterpreter,org.apache.zeppelin.ignite.IgniteInterpreter,org.apache.zeppelin.ignite.IgniteSqlInterpreter,org.apache.zeppelin.cassandra.CassandraInterpreter,org.apache.zeppelin.geode.GeodeOqlInterpreter,org.apache.zeppelin.postgresql.PostgreSqlInterpreter,org.apache.zeppelin.phoenix.PhoenixInterpreter,org.apache.zeppelin.kylin.KylinInterpreter,org.apache.zeppelin.elasticsearch.ElasticsearchInterpreter,org.apache.zeppelin.scalding.ScaldingInterpreter",
"zeppelin.interpreters":"org.apache.zeppelin.spark.SparkInterpreter,org.apache.zeppelin.spark.PySparkInterpreter,org.apache.zeppelin.spark.SparkRInterpreter,org.apache.zeppelin.spark.SparkSqlInterpreter,org.apache.zeppelin.spark.DepInterpreter,org.apache.zeppelin.markdown.Markdown,org.apache.zeppelin.angular.AngularInterpreter,org.apache.zeppelin.shell.ShellInterpreter,org.apache.zeppelin.hive.HiveInterpreter,org.apache.zeppelin.tajo.TajoInterpreter,org.apache.zeppelin.flink.FlinkInterpreter,org.apache.zeppelin.lens.LensInterpreter,org.apache.zeppelin.ignite.IgniteInterpreter,org.apache.zeppelin.ignite.IgniteSqlInterpreter,org.apache.zeppelin.cassandra.CassandraInterpreter,org.apache.zeppelin.geode.GeodeOqlInterpreter,org.apache.zeppelin.postgresql.PostgreSqlInterpreter,org.apache.zeppelin.phoenix.PhoenixInterpreter,org.apache.zeppelin.kylin.KylinInterpreter,org.apache.zeppelin.elasticsearch.ElasticsearchInterpreter,org.apache.zeppelin.scalding.ScaldingInterpreter",
"zeppelin.ssl":"false",
"zeppelin.notebook.autoInterpreterBinding":"true",
"zeppelin.notebook.homescreen":"",
@ -94,9 +94,9 @@ limitations under the License.
</td>
</tr>
</table>
<br/>
<table class="table-configuration">
<col width="200">
<tr>
@ -105,7 +105,7 @@ limitations under the License.
</tr>
<tr>
<td>Description</td>
<td>This ```GET``` method return all prefix matched key/value pair of configurations on the server.<br/>
<td>This ```GET``` method return all prefix matched key/value pair of configurations on the server.<br/>
Note: For security reason, some pairs would not be shown.</td>
</tr>
<tr>

View file

@ -21,18 +21,18 @@ limitations under the License.
## Zeppelin REST API
Zeppelin provides several REST API's for interaction and remote activation of zeppelin functionality.
All REST API are available starting with the following endpoint `http://[zeppelin-server]:[zeppelin-port]/api`.
Note that zeppein REST API receive or return JSON objects, it it recommended you install some JSON viewers such as
Note that Zeppelin REST API receive or return JSON objects, it it recommended you install some JSON viewers such as
[JSON View](https://chrome.google.com/webstore/detail/jsonview/chklaanhfefbnpoihckbnefhakgolnmc).
If you work with zeppelin and find a need for an additional REST API, please [file an issue or send us mail](http://zeppelin.incubator.apache.org/community.html).
If you work with zeppelin and find a need for an additional REST API, please [file an issue or send us mail](http://zeppelin.incubator.apache.org/community.html).
<br />
## Interpreter REST API List
The role of registered interpreters, settings and interpreters group are described in [here](../manual/interpreters.html).
### 1. List of Registered Interpreters & Interpreter Settings
<table class="table-configuration">
@ -106,9 +106,9 @@ limitations under the License.
</td>
</tr>
</table>
<br/>
<table class="table-configuration">
<col width="200">
<tr>
@ -268,8 +268,8 @@ limitations under the License.
</td>
</tr>
</table>
<br/>
### 3. Update an Interpreter Setting
@ -354,7 +354,7 @@ limitations under the License.
</tr>
</table>
<br/>
### 4. Delete an Interpreter Setting
@ -388,9 +388,9 @@ limitations under the License.
</tr>
</table>
<br/>
### 5. Restart an Interpreter
### 5. Restart an Interpreter
<table class="table-configuration">
<col width="200">

View file

@ -21,20 +21,20 @@ limitations under the License.
## Zeppelin REST API
Zeppelin provides several REST APIs for interaction and remote activation of zeppelin functionality.
All REST APIs are available starting with the following endpoint ```http://[zeppelin-server]:[zeppelin-port]/api```
Note that zeppelin REST APIs receive or return JSON objects, it is recommended for you to install some JSON viewers
such as [JSONView](https://chrome.google.com/webstore/detail/jsonview/chklaanhfefbnpoihckbnefhakgolnmc)
If you work with zeppelin and find a need for an additional REST API please [file an issue or send us mail](../../community.html)
If you work with zeppelin and find a need for an additional REST API please [file an issue or send us mail](../../community.html)
<br />
### Notebook REST API list
Notebooks REST API supports the following operations: List, Create, Get, Delete, Clone, Run, Export, Import as detailed in the following table
Notebooks REST API supports the following operations: List, Create, Get, Delete, Clone, Run, Export, Import as detailed in the following table
<table class="table-configuration">
<col width="200">
<tr>
@ -64,7 +64,7 @@ limitations under the License.
<td><pre>{"status":"OK","message":"","body":[{"name":"Homepage","id":"2AV4WUEMK"},{"name":"Zeppelin Tutorial","id":"2A94M5J1Z"}]}</pre></td>
</tr>
</table>
<br/>
<table class="table-configuration">
@ -99,7 +99,7 @@ limitations under the License.
<td> sample JSON input (with initial paragraphs) </td>
<td><pre>
{
"name": "name of new notebook",
"name": "name of new notebook",
"paragraphs": [
{
"title": "paragraph title1",
@ -118,7 +118,7 @@ limitations under the License.
<td><pre>{"status": "CREATED","message": "","body": "2AZPHY918"}</pre></td>
</tr>
</table>
<br/>
<table class="table-configuration">
@ -220,7 +220,7 @@ limitations under the License.
</pre></td>
</tr>
</table>
<br/>
<table class="table-configuration">
@ -251,9 +251,9 @@ limitations under the License.
<td><pre>{"status":"OK","message":""}</pre></td>
</tr>
</table>
<br/>
<table class="table-configuration">
<col width="200">
<tr>
@ -262,7 +262,7 @@ limitations under the License.
</tr>
<tr>
<td>Description</td>
<td>This ```POST``` method clones a notebook by the given id and create a new notebook using the given name
<td>This ```POST``` method clones a notebook by the given id and create a new notebook using the given name
or default name if none given.
The body field of the returned JSON contains the new notebook id.
</td>
@ -288,7 +288,7 @@ limitations under the License.
<td><pre>{"status": "CREATED","message": "","body": "2AZPHY918"}</pre></td>
</tr>
</table>
<br/>
<table class="table-configuration">
@ -319,7 +319,7 @@ limitations under the License.
<td><pre>{"status":"OK"}</pre></td>
</tr>
</table>
<br/>
<table class="table-configuration">
@ -330,7 +330,7 @@ limitations under the License.
</tr>
<tr>
<td>Description</td>
<td>This ```DELETE``` method stops all paragraph in the given notebook id.
<td>This ```DELETE``` method stops all paragraph in the given notebook id.
</td>
</tr>
<tr>
@ -350,9 +350,9 @@ limitations under the License.
<td><pre>{"status":"OK"}</pre></td>
</tr>
</table>
<br/>
<br/>
<table class="table-configuration">
@ -363,7 +363,7 @@ limitations under the License.
</tr>
<tr>
<td>Description</td>
<td>This ```GET``` method gets all paragraph status by the given notebook id.
<td>This ```GET``` method gets all paragraph status by the given notebook id.
The body field of the returned JSON contains of the array that compose of the paragraph id, paragraph status, paragraph finish date, paragraph started date.
</td>
</tr>
@ -384,7 +384,7 @@ limitations under the License.
<td><pre>{"status":"OK","body":[{"id":"20151121-212654_766735423","status":"FINISHED","finished":"Tue Nov 24 14:21:40 KST 2015","started":"Tue Nov 24 14:21:39 KST 2015"},{"progress":"1","id":"20151121-212657_730976687","status":"RUNNING","finished":"Tue Nov 24 14:21:35 KST 2015","started":"Tue Nov 24 14:21:40 KST 2015"}]}</pre></td>
</tr>
</table>
<br/>
<table class="table-configuration">
@ -395,7 +395,7 @@ limitations under the License.
</tr>
<tr>
<td>Description</td>
<td>This ```POST``` method runs the paragraph by given notebook and paragraph id.
<td>This ```POST``` method runs the paragraph by given notebook and paragraph id.
</td>
</tr>
<tr>
@ -427,7 +427,7 @@ limitations under the License.
<td><pre>{"status":"OK"}</pre></td>
</tr>
</table>
<br/>
<table class="table-configuration">
@ -438,7 +438,7 @@ limitations under the License.
</tr>
<tr>
<td>Description</td>
<td>This ```DELETE``` method stops the paragraph by given notebook and paragraph id.
<td>This ```DELETE``` method stops the paragraph by given notebook and paragraph id.
</td>
</tr>
<tr>
@ -458,7 +458,7 @@ limitations under the License.
<td><pre>{"status":"OK"}</pre></td>
</tr>
</table>
<br/>
<table class="table-configuration">
@ -469,7 +469,7 @@ limitations under the License.
</tr>
<tr>
<td>Description</td>
<td>This ```POST``` method adds cron job by the given notebook id.
<td>This ```POST``` method adds cron job by the given notebook id.
</td>
</tr>
<tr>
@ -493,7 +493,7 @@ limitations under the License.
<td><pre>{"status":"OK"}</pre></td>
</tr>
</table>
<br/>
<table class="table-configuration">
@ -504,7 +504,7 @@ limitations under the License.
</tr>
<tr>
<td>Description</td>
<td>This ```DELETE``` method removes cron job by the given notebook id.
<td>This ```DELETE``` method removes cron job by the given notebook id.
</td>
</tr>
<tr>
@ -524,7 +524,7 @@ limitations under the License.
<td><pre>{"status":"OK"}</pre></td>
</tr>
</table>
<br/>
<table class="table-configuration">
@ -535,7 +535,7 @@ limitations under the License.
</tr>
<tr>
<td>Description</td>
<td>This ```GET``` method gets cron job expression of given notebook id.
<td>This ```GET``` method gets cron job expression of given notebook id.
The body field of the returned JSON contains the cron expression.
</td>
</tr>
@ -585,7 +585,7 @@ limitations under the License.
<td><pre>{"status":"OK", body: [{"id":"<noteId>/paragraph/<paragraphId>", "name":"Notebook Name", "snippet":"", "text":""}]}</pre></td>
</tr>
</table>
<br/>
@ -616,16 +616,16 @@ limitations under the License.
<tr>
<td> sample JSON input (add to the last) </td>
<td><pre>
{
"title": "Paragraph insert revised",
"text": "%spark\nprintln(\"Paragraph insert revised\")"
{
"title": "Paragraph insert revised",
"text": "%spark\nprintln(\"Paragraph insert revised\")"
}</pre></td>
</tr>
<tr>
<td> sample JSON input (add to specific index) </td>
<td><pre>
{
"title": "Paragraph insert revised",
{
"title": "Paragraph insert revised",
"text": "%spark\nprintln(\"Paragraph insert revised\")",
"index": 0
}
@ -636,7 +636,7 @@ limitations under the License.
<td><pre>{"status": "CREATED","message": "","body": "20151218-100330_1754029574"}</pre></td>
</tr>
</table>
<br/>
<table class="table-configuration">
@ -709,7 +709,7 @@ limitations under the License.
</pre></td>
</tr>
</table>
<br/>
<table class="table-configuration">
@ -740,7 +740,7 @@ limitations under the License.
<td><pre>{"status":"OK","message":""}</pre></td>
</tr>
</table>
<br/>
@ -772,9 +772,9 @@ limitations under the License.
<td><pre>{"status":"OK","message":""}</pre></td>
</tr>
</table>
<table class="table-configuration">
<col width="200">
<tr>
@ -826,7 +826,7 @@ limitations under the License.
}</pre></td>
</tr>
</table>
<table class="table-configuration">
<col width="200">
<tr>
@ -881,4 +881,4 @@ limitations under the License.
<td><pre>"status": "CREATED","message": "","body": "2AZPHY918"}</pre></td>
</tr>
</tr>
</table>
</table>

View file

@ -42,4 +42,4 @@ limitations under the License.
</div>
<div class="col-md-3">
</div>
</div>
</div>

View file

@ -1,7 +1,7 @@
---
layout: page
title: "Authentication"
description: "Authentication"
title: "Authentication for NGINX"
description: "Authentication for NGINX"
group: security
---
<!--
@ -17,15 +17,114 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
# Authentication
# Authentication for NGINX
Authentication is company-specific.
Authentication is company-specific.
One option is to use [Basic Access Authentication](https://en.wikipedia.org/wiki/Basic_access_authentication)
### HTTP Basic Authentication using NGINX
> **Quote from Wikipedia:** NGINX is a web server. It can act as a reverse proxy server for HTTP, HTTPS, SMTP, POP3, and IMAP protocols, as well as a load balancer and an HTTP cache.
So you can use NGINX server as proxy server to serve HTTP Basic Authentication as a separate process along with Zeppelin server.
Here are instructions how to accomplish the setup NGINX as a front-end authentication server and connect Zeppelin at behind.
This instruction based on Ubuntu 14.04 LTS but may work with other OS with few configuration changes.
1. Install NGINX server on your server instance
You can install NGINX server with same box where zeppelin installed or separate box where it is dedicated to serve as proxy server.
```
$ apt-get install nginx
```
*Important: On pre 1.3.13 version of NGINX, Proxy for Websocket may not fully works. Please use latest version of NGINX. See: [NGINX documentation](https://www.nginx.com/blog/websocket-nginx/)*
1. Setup init script in NGINX
In most cases, NGINX configuration located under `/etc/nginx/sites-available`. Create your own configuration or add your existing configuration at `/etc/nginx/sites-available`.
```
$ cd /etc/nginx/sites-available
$ touch my-zeppelin-auth-setting
```
Now add this script into `my-zeppelin-auth-setting` file. You can comment out `optional` lines If you want serve Zeppelin under regular HTTP 80 Port.
```
upstream zeppelin {
server [YOUR-ZEPPELIN-SERVER-IP]:[YOUR-ZEPPELIN-SERVER-PORT]; # For security, It is highly recommended to make this address/port as non-public accessible
}
# Zeppelin Website
server {
listen [YOUR-ZEPPELIN-WEB-SERVER-PORT];
listen 443 ssl; # optional, to serve HTTPS connection
server_name [YOUR-ZEPPELIN-SERVER-HOST]; # for example: zeppelin.mycompany.com
ssl_certificate [PATH-TO-YOUR-CERT-FILE]; # optional, to serve HTTPS connection
ssl_certificate_key [PATH-TO-YOUR-CERT-KEY-FILE]; # optional, to serve HTTPS connection
if ($ssl_protocol = "") {
rewrite ^ https://$host$request_uri? permanent; # optional, to force use of HTTPS
}
location / { # For regular websever support
proxy_pass http://zeppelin;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header Host $http_host;
proxy_set_header X-NginX-Proxy true;
proxy_redirect off;
auth_basic "Restricted";
auth_basic_user_file /etc/nginx/.htpasswd;
}
location /ws { # For websocket support
proxy_pass http://zeppelin;
proxy_http_version 1.1;
proxy_set_header Upgrade websocket;
proxy_set_header Connection upgrade;
proxy_read_timeout 86400;
}
}
```
Then make a symbolic link to this file from `/etc/nginx/sites-enabled/` to enable configuration above when NGINX reloads.
```
$ ln -s /etc/nginx/sites-enabled/my-zeppelin-auth-setting /etc/nginx/sites-available/my-zeppelin-auth-setting
```
1. Setup user credential into `.htpasswd` file and restart server
Now you need to setup `.htpasswd` file to serve list of authenticated user credentials for NGINX server.
```
$ cd /etc/nginx
$ htpasswd -c htpasswd [YOUR-ID]
$ NEW passwd: [YOUR-PASSWORD]
$ RE-type new passwd: [YOUR-PASSWORD-AGAIN]
```
Or you can use your own apache `.htpasswd` files in other location for setting up property: `auth_basic_user_file`
Restart NGINX server.
```
$ service nginx restart
```
Then check HTTP Basic Authentication works in browser. If you can see regular basic auth popup and then able to login with credential you entered into `.htpasswd` you are good to go.
1. More security consideration
* Using HTTPS connection with Basic Authentication is highly recommended since basic auth without encryption may expose your important credential information over the network.
* Using [Shiro Security feature built-into Zeppelin](https://github.com/apache/incubator-zeppelin/blob/master/SECURITY-README.md) is recommended if you prefer all-in-one solution for authentication but NGINX may provides ad-hoc solution for re-use authentication served by your system's NGINX server or in case of you need to separate authentication from zeppelin server.
* It is recommended to isolate direct connection to Zeppelin server from public internet or external services to secure your zeppelin instance from unexpected attack or problems caused by public zone.
### Another option
Another option is to have an authentication server that can verify user credentials in an LDAP server.
If an incoming request to the Zeppelin server does not have a cookie with user information encrypted with the authentication server public key, the user
is redirected to the authentication server. Once the user is verified, the authentication server redirects the browser to a specific
URL in the Zeppelin server which sets the authentication cookie in the browser.
The end result is that all requests to the Zeppelin
web server have the authentication cookie which contains user and groups information.
is redirected to the authentication server. Once the user is verified, the authentication server redirects the browser to a specific URL in the Zeppelin server which sets the authentication cookie in the browser.
The end result is that all requests to the Zeppelin web server have the authentication cookie which contains user and groups information.

View file

@ -33,5 +33,5 @@ Before executing a Note operation, it checks if the user and the groups associat
operation, it checks if the user and the groups have at least one entity that belongs to the reader entities.
To initialize and modify note permissions, we provide UI like "Interpreter binding". The user inputs comma separated entities for owners, readers and writers.
We execute a rest api call with this information. In the backend we get the user information for the connection and allow the operation if the user and groups
We execute a rest api call with this information. In the backend we get the user information for the connection and allow the operation if the user and groups
associated with the current user have at least one entity that belongs to owner entities for the note.

View file

@ -2,7 +2,7 @@
layout: page
title: "Shiro Security for Apache Zeppelin"
description: ""
group: manual
group: security
---
<!--
Licensed under the Apache License, Version 2.0 (the "License");
@ -20,7 +20,7 @@ limitations under the License.
{% include JB/setup %}
# Shiro authentication for Apache Zeppelin
[Apache Shiro](http://shiro.apache.org/) is a powerful and easy-to-use Java security framework that performs authentication, authorization, cryptography, and session management. In this documentation, we will explain step by step how Shiro works for Zeppelin notebook authentication.
[Apache Shiro](http://shiro.apache.org/) is a powerful and easy-to-use Java security framework that performs authentication, authorization, cryptography, and session management. In this documentation, we will explain step by step how Shiro works for Zeppelin notebook authentication.
When you connect to Apache Zeppelin, you will be asked to enter your credentials. Once you logged in, then you have access to all notes including other user's notes.
@ -28,7 +28,7 @@ When you connect to Apache Zeppelin, you will be asked to enter your credentials
You can setup **Zeppelin notebook authentication** in some simple steps.
####1. Secure the HTTP channel
To secure the HTTP channel, you have to change both **anon** and **authcBasic** settings in `conf/shiro.ini`. In here, **anon** means "the access is anonymous" and **authcBasic** means "basic auth security".
To secure the HTTP channel, you have to change both **anon** and **authcBasic** settings in `conf/shiro.ini`. In here, **anon** means "the access is anonymous" and **authcBasic** means "basic auth security".
The default status of them is
@ -36,7 +36,7 @@ The default status of them is
/** = anon
#/** = authcBasic
```
Deactivate the line "/** = anon" and activate the line "/** = authcBasic" in `conf/shiro.ini` file.
Deactivate the line "/** = anon" and activate the line "/** = authcBasic" in `conf/shiro.ini` file.
```
#/** = anon
@ -49,24 +49,24 @@ For the further information about `shiro.ini` file format, please refer to [Shi
Set to property **zeppelin.anonymous.allowed** to **false** in `conf/zeppelin-site.xml`. If you don't have this file yet, just copy `conf/zeppelin-site.xml.template` to `conf/zeppelin-site.xml`.
####3. Start Zeppelin
```
bin/zeppelin-daemon.sh start (or restart)
```
Then you can browse Zeppelin at [http://localhost:8080](http://localhost:8080).
####4. Login
Finally, you can login using one of the below **username/password** combinations.
Finally, you can login using one of the below **username/password** combinations.
<center><img src="../assets/themes/zeppelin/img/docs-img/zeppelin-login.png" width="40%" height="40%"></center>
<center><img src="../assets/themes/zeppelin/img/docs-img/zeppelin-login.png"></center>
```
admin = password1
user1 = password2
user2 = password3
```
Those combinations are defined in the `conf/shiro.ini` file.
> **NOTE :** This documentation is originally from [SECURITY-README.md](https://github.com/apache/incubator-zeppelin/blob/master/SECURITY-README.md).
> **NOTE :** This documentation is originally from [SECURITY-README.md](https://github.com/apache/incubator-zeppelin/blob/master/SECURITY-README.md).

View file

@ -5,4 +5,4 @@ title : Sitemap
{% for page in site.pages %}
{{site.production_url}}{{ page.url }}{% endfor %}
{% for post in site.posts %}
{{site.production_url}}{{ post.url }}{% endfor %}
{{site.production_url}}{{ post.url }}{% endfor %}

View file

@ -20,15 +20,17 @@ limitations under the License.
### Notebook Storage
Zeppelin has a pluggable notebook storage mechanism controlled by `zeppelin.notebook.storage` configuration option with multiple implementations.
There are few Notebook storages avaialble for a use out of the box:
There are few Notebook storage systems available for a use out of the box:
- (default) all notes are saved in the notebook folder in your local File System - `VFSNotebookRepo`
- there is also an option to version it using local Git repository - `GitNotebookRepo`
- another option is Amazon S3 service - `S3NotebookRepo`
- use local file system and version it using local Git repository - `GitNotebookRepo`
- storage using Amazon S3 service - `S3NotebookRepo`
- storage using Azure service - `AzureNotebookRepo`
Multiple storages can be used at the same time by providing a comma-separated list of the calss-names in the confiruration.
Multiple storage systems can be used at the same time by providing a comma-separated list of the class-names in the configuration.
By default, only first two of them will be automatically kept in sync by Zeppelin.
</br>
#### Notebook Storage in local Git repository <a name="Git"></a>
To enable versioning for all your local notebooks though a standard Git repository - uncomment the next property in `zeppelin-site.xml` in order to use GitNotebookRepo class:
@ -42,44 +44,46 @@ To enable versioning for all your local notebooks though a standard Git reposito
```
</br>
#### Notebook Storage in S3 <a name="S3"></a>
For notebook storage in S3 you need the AWS credentials, for this there are three options, the enviroment variable ```AWS_ACCESS_KEY_ID``` and ```AWS_ACCESS_SECRET_KEY```, credentials file in the folder .aws in you home and IAM role for your instance. For complete the need steps is necessary:
Notebooks may be stored in S3, and optionally encrypted. The [``DefaultAWSCredentialsProviderChain``](https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html) credentials provider is used for credentials and checks the following:
- The ``AWS_ACCESS_KEY_ID`` and ``AWS_SECRET_ACCESS_KEY`` environment variables
- The ``aws.accessKeyId`` and ``aws.secretKey`` Java System properties
- Credential profiles file at the default location (````~/.aws/credentials````) used by the AWS CLI
- Instance profile credentials delivered through the Amazon EC2 metadata service
</br>
you need the following folder structure on S3
The following folder structure will be created in S3:
```
bucket_name/
username/
notebook/
s3://bucket_name/username/notebook-id/
```
set the enviroment variable in the file **zeppelin-env.sh**:
Configure by setting environment variables in the file **zeppelin-env.sh**:
```
export ZEPPELIN_NOTEBOOK_S3_BUCKET = bucket_name
export ZEPPELIN_NOTEBOOK_S3_USER = username
```
in the file **zeppelin-site.xml** uncommet and complete the next property:
Or using the file **zeppelin-site.xml** uncomment and complete the S3 settings:
```
<!--If used S3 to storage, it is necessary the following folder structure bucket_name/username/notebook/-->
<property>
<name>zeppelin.notebook.s3.user</name>
<value>username</value>
<description>user name for s3 folder structure</description>
</property>
<property>
<name>zeppelin.notebook.s3.bucket</name>
<value>bucket_name</value>
<description>bucket name for notebook storage</description>
</property>
<property>
<name>zeppelin.notebook.s3.user</name>
<value>username</value>
<description>user name for s3 folder structure</description>
</property>
```
uncomment the next property for use S3NotebookRepo class:
Uncomment the next property for use S3NotebookRepo class:
```
<property>
@ -89,7 +93,7 @@ uncomment the next property for use S3NotebookRepo class:
</property>
```
comment the next property:
Comment out the next property to disable local notebook storage (the default):
```
<property>
@ -97,4 +101,134 @@ comment the next property:
<value>org.apache.zeppelin.notebook.repo.VFSNotebookRepo</value>
<description>notebook persistence layer implementation</description>
</property>
```
#### Data Encryption in S3
##### AWS KMS encryption keys
To use an [AWS KMS](https://aws.amazon.com/kms/) encryption key to encrypt notebooks, set the following environment variable in the file **zeppelin-env.sh**:
```
export ZEPPELIN_NOTEBOOK_S3_KMS_KEY_ID = kms-key-id
```
Or using the following setting in **zeppelin-site.xml**:
```
<property>
<name>zeppelin.notebook.s3.kmsKeyID</name>
<value>AWS-KMS-Key-UUID</value>
<description>AWS KMS key ID used to encrypt notebook data in S3</description>
</property>
```
##### Custom Encryption Materials Provider class
You may use a custom [``EncryptionMaterialsProvider``](https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/services/s3/model/EncryptionMaterialsProvider.html) class as long as it is available in the classpath and able to initialize itself from system properties or another mechanism. To use this, set the following environment variable in the file **zeppelin-env.sh**:
```
export ZEPPELIN_NOTEBOOK_S3_EMP = class-name
```
Or using the following setting in **zeppelin-site.xml**:
```
<property>
<name>zeppelin.notebook.s3.encryptionMaterialsProvider</name>
<value>provider implementation class name</value>
<description>Custom encryption materials provider used to encrypt notebook data in S3</description>
```
</br>
#### Notebook Storage in Azure <a name="Azure"></a>
Using `AzureNotebookRepo` you can connect your Zeppelin with your Azure account for notebook storage.
</br>
First of all, input your `AccountName`, `AccountKey`, and `Share Name` in the file **zeppelin-site.xml** by commenting out and completing the next properties:
```
<property>
<name>zeppelin.notebook.azure.connectionString</name>
<value>DefaultEndpointsProtocol=https;AccountName=<accountName>;AccountKey=<accountKey></value>
<description>Azure account credentials</description>
</property>
<property>
<name>zeppelin.notebook.azure.share</name>
<value>zeppelin</value>
<description>share name for notebook storage</description>
</property>
```
Secondly, you can initialize `AzureNotebookRepo` class in the file **zeppelin-site.xml** by commenting the next property:
```
<property>
<name>zeppelin.notebook.storage</name>
<value>org.apache.zeppelin.notebook.repo.VFSNotebookRepo</value>
<description>notebook persistence layer implementation</description>
</property>
```
and commenting out:
```
<property>
<name>zeppelin.notebook.storage</name>
<value>org.apache.zeppelin.notebook.repo.AzureNotebookRepo</value>
<description>notebook persistence layer implementation</description>
</property>
```
In case you want to use simultaneously your local storage with Azure storage use the following property instead:
```
<property>
<name>zeppelin.notebook.storage</name>
<value>org.apache.zeppelin.notebook.repo.VFSNotebookRepo, apache.zeppelin.notebook.repo.AzureNotebookRepo</value>
<description>notebook persistence layer implementation</description>
</property>
```
Optionally, you can specify Azure folder structure name in the file **zeppelin-site.xml** by commenting out the next property:
```
<property>
<name>zeppelin.notebook.azure.user</name>
<value>user</value>
<description>optional user name for Azure folder structure</description>
</property>
```
</br>
#### Notebook Storage in ZeppelinHub <a name="ZeppelinHub"></a>
ZeppelinHub storage layer allows out of the box connection of Zeppelin instance with your ZeppelinHub account. First of all, you need to either comment out the following property in **zeppelin-site.xml**:
```
<!-- For connecting your Zeppelin with ZeppelinHub -->
<!--
<property>
<name>zeppelin.notebook.storage</name>
<value>org.apache.zeppelin.notebook.repo.VFSNotebookRepo, org.apache.zeppelin.notebook.repo.zeppelinhub.ZeppelinHubRepo</value>
<description>two notebook persistence layers (local + ZeppelinHub)</description>
</property>
-->
```
or set the environment variable in the file **zeppelin-env.sh**:
```
export ZEPPELIN_NOTEBOOK_STORAGE="org.apache.zeppelin.notebook.repo.VFSNotebookRepo, org.apache.zeppelin.notebook.repo.zeppelinhub.ZeppelinHubRepo"
```
Secondly, you need to set the environment variables in the file **zeppelin-env.sh**:
```
export ZEPPELINHUB_API_TOKEN = ZeppelinHub token
export ZEPPELINHUB_API_ADDRESS = address of ZeppelinHub service (e.g. https://www.zeppelinhub.com)
```
You can get more information on generating `token` and using authentication on the corresponding [help page](http://help.zeppelinhub.com/zeppelin_integration/#add-a-new-zeppelin-instance-and-generate-a-token).

View file

@ -0,0 +1,136 @@
---
layout: page
title: "Zeppelin UI Layout"
description: "Description of Zeppelin UI Layout"
group: ui_layout
---
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
## Home Page
The first time you connect to Zeppelin, you'll land at the main page similar to the below screen capture
<img src="../assets/themes/zeppelin/img/ui-img/homepage.png" />
On the left of the page are listed all existing notes. Those notes are stored by default in the `$ZEPPELIN_HOME/notebook` folder.
You can filter them by name using the input text form. You can also create an new note, refresh the list of existing notes
(in case you manually copy them into the `$ZEPPELIN_HOME/notebook` folder) and import a note
<img src="../assets/themes/zeppelin/img/ui-img/notes_management.png" />
When clicking on `Import Note` link, a new dialog open. From there you can import your note from local disk or from a remote location
if you provide the URL.
<img src="../assets/themes/zeppelin/img/ui-img/note_import_dialog.png" />
By default, the name of the imported note is the same as the original note but you can override it by providing a new name
<br />
## Menus
### 1. Notebook
The `Notebook` menu proposes almost the same features as the note management section in the home page. From the drop-down menu you can:
1. Open a selected note
2. Filter node by name
3. Create a new note
<img src="../assets/themes/zeppelin/img/ui-img/notebook_menu.png" />
### 2. Interpreter
In this menu you can:
1. Configure existing **interpreter instance**
2. Add/remove **interpreter instances**
<img src="../assets/themes/zeppelin/img/ui-img/interpreter_menu.png" />
### 3. Configuration
This menu displays all the Zeppelin configuration that are set in the config file `$ZEPPELIN_HOME/conf/zeppelin-site.xml`
<img src="../assets/themes/zeppelin/img/ui-img/configuration_menu.png" />
<br />
## Note Layout
Each Zeppelin note is composed of 1 .. N paragraphs. The note can be viewed as a paragraph container.
<img src="../assets/themes/zeppelin/img/ui-img/note_paragraph_layout.png" />
### Paragraph
Each paragraph consists of 2 sections: `code section` where you put your source code and `result section` where you can see the result of the code execution.
<img src="../assets/themes/zeppelin/img/ui-img/paragraph_layout.png" />
On the top-right corner of each paragraph there are some commands to:
* execute the paragraph code
* hide/show `code section`
* hide/show `result section`
* configure the paragraph
To configure the paragraph, just click on the gear icon:
<img src="../assets/themes/zeppelin/img/ui-img/paragraph_configuration_dialog.png" />
From this dialog, you can (in descending order):
* find the **paragraph id** ( **20150924-163507_134879501** )
* control paragraph width. Since Zeppelin is using the grid system of **Twitter Bootstrap**, each paragraph width can be changed from 1 to 12
* move the paragraph 1 level up
* move the paragraph 1 level down
* create a new paragraph
* change paragraph title
* show/hide line number in the `code section`
* disable the run button for this paragraph
* export the current paragraph as an **iframe** and open the **iframe** in a new window
* clear the `result section`
* delete the current paragraph
### Note toolbar
At the top of the note, you can find a toolbar which exposes command buttons as well as configuration, security and display options
<img src="../assets/themes/zeppelin/img/ui-img/note_toolbar.png" />
On the far right is displayed the note name, just click on it to reveal the input form and update it
In the middle of the toolbar you can find the command buttons:
* execute all the paragraphs **sequentially**, in their display order
* hide/show `code section` of all paragraphs
* hide/show `result section` of all paragraphs
* clear the `result section` of all paragraphs
* clone the current note
* export the current note to a JSON file. _Please note that the `code section` and `result section` of all paragraphs will be exported. If you have heavy data in the `result section` of some paragraphs, it is recommended to clean them before exporting
* commit the current node content
* delete the note
* schedule the execution of **all paragraph** using a CRON syntax
<img src="../assets/themes/zeppelin/img/ui-img/note_commands.png" />
On the right of the note tool bar you can find configuration icons:
* display all the keyboard shorcuts
* configure the interpreters binding to the current note
* configure the note permissions
* switch the node display mode between `default`, `simple` and `report`
<img src="../assets/themes/zeppelin/img/ui-img/note_configuration.png" />

View file

@ -36,7 +36,7 @@
<properties>
<elasticsearch.version>2.1.0</elasticsearch.version>
<guava.version>18.0</guava.version>
<json-flattener.version>0.1.1</json-flattener.version>
<json-flattener.version>0.1.6</json-flattener.version>
</properties>
<dependencies>

View file

@ -17,10 +17,21 @@
package org.apache.zeppelin.elasticsearch;
import com.github.wnameless.json.flattener.JsonFlattener;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import com.google.gson.JsonParseException;
import java.io.IOException;
import java.net.InetAddress;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang.StringUtils;
import org.apache.zeppelin.interpreter.Interpreter;
import org.apache.zeppelin.interpreter.InterpreterContext;
@ -39,6 +50,7 @@ import org.elasticsearch.common.transport.InetSocketTransportAddress;
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHitField;
import org.elasticsearch.search.aggregations.Aggregation;
import org.elasticsearch.search.aggregations.Aggregations;
import org.elasticsearch.search.aggregations.InternalMultiBucketAggregation;
@ -48,9 +60,10 @@ import org.elasticsearch.search.aggregations.metrics.InternalMetricsAggregation;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.net.InetAddress;
import java.util.*;
import com.github.wnameless.json.flattener.JsonFlattener;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import com.google.gson.JsonParseException;
/**
@ -80,7 +93,9 @@ public class ElasticsearchInterpreter extends Interpreter {
private static final List<String> COMMANDS = Arrays.asList(
"count", "delete", "get", "help", "index", "search");
private static final Pattern FIELD_NAME_PATTERN = Pattern.compile("\\[\\\\\"(.+)\\\\\"\\](.*)");
public static final String ELASTICSEARCH_HOST = "elasticsearch.host";
public static final String ELASTICSEARCH_PORT = "elasticsearch.port";
@ -141,7 +156,7 @@ public class ElasticsearchInterpreter extends Interpreter {
@Override
public InterpreterResult interpret(String cmd, InterpreterContext interpreterContext) {
logger.info("Run Elasticsearch command '" + cmd + "'");
if (StringUtils.isEmpty(cmd) || StringUtils.isEmpty(cmd.trim())) {
return new InterpreterResult(InterpreterResult.Code.SUCCESS);
}
@ -260,15 +275,15 @@ public class ElasticsearchInterpreter extends Interpreter {
/**
* Processes a "get" request.
*
*
* @param urlItems Items of the URL
* @return Result of the get request, it contains a JSON-formatted string
*/
private InterpreterResult processGet(String[] urlItems) {
if (urlItems.length != 3
|| StringUtils.isEmpty(urlItems[0])
|| StringUtils.isEmpty(urlItems[1])
if (urlItems.length != 3
|| StringUtils.isEmpty(urlItems[0])
|| StringUtils.isEmpty(urlItems[1])
|| StringUtils.isEmpty(urlItems[2])) {
return new InterpreterResult(InterpreterResult.Code.ERROR,
"Bad URL (it should be /index/type/id)");
@ -285,13 +300,13 @@ public class ElasticsearchInterpreter extends Interpreter {
InterpreterResult.Type.TEXT,
json);
}
return new InterpreterResult(InterpreterResult.Code.ERROR, "Document not found");
}
/**
* Processes a "count" request.
*
*
* @param urlItems Items of the URL
* @param data May contains the JSON of the request
* @return Result of the count request, it contains the total hits
@ -313,7 +328,7 @@ public class ElasticsearchInterpreter extends Interpreter {
/**
* Processes a "search" request.
*
*
* @param urlItems Items of the URL
* @param data May contains the JSON of the request
* @param size Limit of result set
@ -325,7 +340,7 @@ public class ElasticsearchInterpreter extends Interpreter {
return new InterpreterResult(InterpreterResult.Code.ERROR,
"Bad URL (it should be /index1,index2,.../type1,type2,...)");
}
final SearchResponse response = searchData(urlItems, data, size);
return buildResponseMessage(response);
@ -333,18 +348,18 @@ public class ElasticsearchInterpreter extends Interpreter {
/**
* Processes a "index" request.
*
*
* @param urlItems Items of the URL
* @param data JSON to be indexed
* @return Result of the index request, it contains the id of the document
*/
private InterpreterResult processIndex(String[] urlItems, String data) {
if (urlItems.length < 2 || urlItems.length > 3) {
return new InterpreterResult(InterpreterResult.Code.ERROR,
"Bad URL (it should be /index/type or /index/type/id)");
}
final IndexResponse response = client
.prepareIndex(urlItems[0], urlItems[1], urlItems.length == 2 ? null : urlItems[2])
.setSource(data)
@ -358,15 +373,15 @@ public class ElasticsearchInterpreter extends Interpreter {
/**
* Processes a "delete" request.
*
*
* @param urlItems Items of the URL
* @return Result of the delete request, it contains the id of the deleted document
*/
private InterpreterResult processDelete(String[] urlItems) {
if (urlItems.length != 3
|| StringUtils.isEmpty(urlItems[0])
|| StringUtils.isEmpty(urlItems[1])
if (urlItems.length != 3
|| StringUtils.isEmpty(urlItems[0])
|| StringUtils.isEmpty(urlItems[1])
|| StringUtils.isEmpty(urlItems[2])) {
return new InterpreterResult(InterpreterResult.Code.ERROR,
"Bad URL (it should be /index/type/id)");
@ -375,23 +390,23 @@ public class ElasticsearchInterpreter extends Interpreter {
final DeleteResponse response = client
.prepareDelete(urlItems[0], urlItems[1], urlItems[2])
.get();
if (response.isFound()) {
return new InterpreterResult(
InterpreterResult.Code.SUCCESS,
InterpreterResult.Type.TEXT,
response.getId());
}
return new InterpreterResult(InterpreterResult.Code.ERROR, "Document not found");
}
private SearchResponse searchData(String[] urlItems, String query, int size) {
final SearchRequestBuilder reqBuilder = new SearchRequestBuilder(
client, SearchAction.INSTANCE);
reqBuilder.setIndices();
if (urlItems.length >= 1) {
reqBuilder.setIndices(StringUtils.split(urlItems[0], ","));
}
@ -452,18 +467,42 @@ public class ElasticsearchInterpreter extends Interpreter {
}
private String buildSearchHitsResponseMessage(SearchHit[] hits) {
if (hits == null || hits.length == 0) {
return "";
}
//First : get all the keys in order to build an ordered list of the values for each hit
//
final Map<String, Object> hitFields = new HashMap<>();
final List<Map<String, Object>> flattenHits = new LinkedList<>();
final Set<String> keys = new TreeSet<>();
for (SearchHit hit : hits) {
final String json = hit.getSourceAsString();
final Map<String, Object> flattenMap = JsonFlattener.flattenAsMap(json);
// Fields can be found either in _source, or in fields (it depends on the query)
//
String json = hit.getSourceAsString();
if (json == null) {
hitFields.clear();
for (SearchHitField hitField : hit.getFields().values()) {
hitFields.put(hitField.getName(), hitField.getValues());
}
json = gson.toJson(hitFields);
}
final Map<String, Object> flattenJsonMap = JsonFlattener.flattenAsMap(json);
final Map<String, Object> flattenMap = new HashMap<>();
for (Iterator<String> iter = flattenJsonMap.keySet().iterator(); iter.hasNext(); ) {
// Replace keys that match a format like that : [\"keyname\"][0]
final String fieldName = iter.next();
final Matcher fieldNameMatcher = FIELD_NAME_PATTERN.matcher(fieldName);
if (fieldNameMatcher.matches()) {
flattenMap.put(fieldNameMatcher.group(1) + fieldNameMatcher.group(2),
flattenJsonMap.get(fieldName));
}
else {
flattenMap.put(fieldName, flattenJsonMap.get(fieldName));
}
}
flattenHits.add(flattenMap);
for (String key : flattenMap.keySet()) {

View file

@ -17,6 +17,15 @@
package org.apache.zeppelin.elasticsearch;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.junit.Assert.assertEquals;
import java.io.IOException;
import java.util.Arrays;
import java.util.Date;
import java.util.Properties;
import java.util.UUID;
import org.apache.commons.lang.math.RandomUtils;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.InterpreterResult.Code;
@ -29,21 +38,12 @@ import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
import java.io.IOException;
import java.util.Arrays;
import java.util.Date;
import java.util.Properties;
import java.util.UUID;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.junit.Assert.assertEquals;
public class ElasticsearchInterpreterTest {
private static Client elsClient;
private static Node elsNode;
private static ElasticsearchInterpreter interpreter;
private static final String[] METHODS = { "GET", "PUT", "DELETE", "POST" };
private static final int[] STATUS = { 200, 404, 500, 403 };
@ -75,7 +75,7 @@ public class ElasticsearchInterpreterTest {
.field("type", "integer")
.endObject()
.endObject().endObject().endObject()).get();
for (int i = 0; i < 50; i++) {
elsClient.prepareIndex("logs", "http", "" + i)
.setRefresh(true)
@ -100,7 +100,7 @@ public class ElasticsearchInterpreterTest {
interpreter = new ElasticsearchInterpreter(props);
interpreter.open();
}
@AfterClass
public static void clean() {
if (interpreter != null) {
@ -116,41 +116,44 @@ public class ElasticsearchInterpreterTest {
elsNode.close();
}
}
@Test
public void testCount() {
InterpreterResult res = interpreter.interpret("count /unknown", null);
assertEquals(Code.ERROR, res.code());
res = interpreter.interpret("count /logs", null);
assertEquals("50", res.message());
}
@Test
public void testGet() {
InterpreterResult res = interpreter.interpret("get /logs/http/unknown", null);
assertEquals(Code.ERROR, res.code());
res = interpreter.interpret("get /logs/http/10", null);
assertEquals(Code.SUCCESS, res.code());
}
@Test
public void testSearch() {
InterpreterResult res = interpreter.interpret("size 10\nsearch /logs *", null);
assertEquals(Code.SUCCESS, res.code());
res = interpreter.interpret("search /logs {{{hello}}}", null);
assertEquals(Code.ERROR, res.code());
res = interpreter.interpret("search /logs { \"query\": { \"match\": { \"status\": 500 } } }", null);
assertEquals(Code.SUCCESS, res.code());
res = interpreter.interpret("search /logs status:404", null);
assertEquals(Code.SUCCESS, res.code());
assertEquals(Code.SUCCESS, res.code());
res = interpreter.interpret("search /logs { \"fields\": [ \"date\", \"request.headers\" ], \"query\": { \"match\": { \"status\": 500 } } }", null);
assertEquals(Code.SUCCESS, res.code());
}
@Test
@ -177,23 +180,23 @@ public class ElasticsearchInterpreterTest {
" { \"terms\" : { \"field\" : \"status\" } } } }", null);
assertEquals(Code.SUCCESS, res.code());
}
@Test
public void testIndex() {
InterpreterResult res = interpreter.interpret("index /logs { \"date\": \"" + new Date() + "\", \"method\": \"PUT\", \"status\": \"500\" }", null);
assertEquals(Code.ERROR, res.code());
res = interpreter.interpret("index /logs/http { \"date\": \"2015-12-06T14:54:23.368Z\", \"method\": \"PUT\", \"status\": \"500\" }", null);
assertEquals(Code.SUCCESS, res.code());
}
@Test
public void testDelete() {
InterpreterResult res = interpreter.interpret("delete /logs/http/unknown", null);
assertEquals(Code.ERROR, res.code());
res = interpreter.interpret("delete /logs/http/11", null);
assertEquals("11", res.message());
}

146
file/pom.xml Normal file
View file

@ -0,0 +1,146 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
~ Licensed to the Apache Software Foundation (ASF) under one or more
~ contributor license agreements. See the NOTICE file distributed with
~ this work for additional information regarding copyright ownership.
~ The ASF licenses this file to You under the Apache License, Version 2.0
~ (the "License"); you may not use this file except in compliance with
~ the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<artifactId>zeppelin</artifactId>
<groupId>org.apache.zeppelin</groupId>
<version>0.6.0-incubating-SNAPSHOT</version>
</parent>
<groupId>org.apache.zeppelin</groupId>
<artifactId>zeppelin-file</artifactId>
<packaging>jar</packaging>
<version>0.6.0-incubating-SNAPSHOT</version>
<name>Zeppelin: File System Interpreters</name>
<url>http://www.apache.org</url>
<dependencies>
<dependency>
<groupId>org.apache.zeppelin</groupId>
<artifactId>zeppelin-interpreter</artifactId>
<version>${project.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>javax.ws.rs</groupId>
<artifactId>javax.ws.rs-api</artifactId>
<version>2.0</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</dependency>
<dependency>
<groupId>org.glassfish.jersey.core</groupId>
<artifactId>jersey-common</artifactId>
<version>2.22.2</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-deploy-plugin</artifactId>
<version>2.7</version>
<configuration>
<skip>true</skip>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.18.1</version>
</plugin>
<plugin>
<artifactId>maven-enforcer-plugin</artifactId>
<version>1.3.1</version>
<executions>
<execution>
<id>enforce</id>
<phase>none</phase>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-dependency-plugin</artifactId>
<version>2.8</version>
<executions>
<execution>
<id>copy-dependencies</id>
<phase>package</phase>
<goals>
<goal>copy-dependencies</goal>
</goals>
<configuration>
<outputDirectory>${project.build.directory}/../../interpreter/file</outputDirectory>
<overWriteReleases>false</overWriteReleases>
<overWriteSnapshots>false</overWriteSnapshots>
<overWriteIfNewer>true</overWriteIfNewer>
<includeScope>runtime</includeScope>
</configuration>
</execution>
<execution>
<id>copy-artifact</id>
<phase>package</phase>
<goals>
<goal>copy</goal>
</goals>
<configuration>
<outputDirectory>${project.build.directory}/../../interpreter/file</outputDirectory>
<overWriteReleases>false</overWriteReleases>
<overWriteSnapshots>false</overWriteSnapshots>
<overWriteIfNewer>true</overWriteIfNewer>
<!--<includeScope>runtime</includeScope>-->
<artifactItems>
<artifactItem>
<groupId>${project.groupId}</groupId>
<artifactId>${project.artifactId}</artifactId>
<version>${project.version}</version>
<type>${project.packaging}</type>
</artifactItem>
</artifactItems>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View file

@ -0,0 +1,171 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.file;
import org.apache.zeppelin.interpreter.Interpreter;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.InterpreterResult.Code;
import org.apache.zeppelin.interpreter.InterpreterResult.Type;
import org.apache.zeppelin.scheduler.Scheduler;
import org.apache.zeppelin.scheduler.SchedulerFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;
/**
* File interpreter for Zeppelin.
*
*/
public abstract class FileInterpreter extends Interpreter {
Logger logger = LoggerFactory.getLogger(FileInterpreter.class);
String currentDir = null;
CommandArgs args = null;
public FileInterpreter(Properties property) {
super(property);
currentDir = new String("/");
}
/**
* Handling the arguments of the command
*/
public class CommandArgs {
public String input = null;
public String command = null;
public ArrayList<String> args = null;
public HashSet<Character> flags = null;
public CommandArgs(String cmd) {
input = cmd;
args = new ArrayList();
flags = new HashSet();
}
private void parseArg(String arg) {
if (arg.charAt(0) == '-') { // handle flags
for (int i = 0; i < arg.length(); i++) {
Character c = arg.charAt(i);
flags.add(c);
}
} else { // handle other args
args.add(arg);
}
}
public void parseArgs() {
if (input == null)
return;
StringTokenizer st = new StringTokenizer(input);
if (st.hasMoreTokens()) {
command = st.nextToken();
while (st.hasMoreTokens())
parseArg(st.nextToken());
}
}
}
// Functions that each file system implementation must override
public abstract String listAll(String path);
public abstract boolean isDirectory(String path);
// Combine paths, takes care of arguments such as ..
protected String getNewPath(String argument){
Path arg = Paths.get(argument);
Path ret = arg.isAbsolute() ? arg : Paths.get(currentDir, argument);
return ret.normalize().toString();
}
// Handle the command handling uniformly across all file systems
@Override
public InterpreterResult interpret(String cmd, InterpreterContext contextInterpreter) {
logger.info("Run File command '" + cmd + "'");
args = new CommandArgs(cmd);
args.parseArgs();
if (args.command == null) {
logger.info("Error: No command");
return new InterpreterResult(Code.ERROR, Type.TEXT, "No command");
}
// Simple parsing of the command
if (args.command.equals("cd")) {
String newPath = !args.args.isEmpty() ? getNewPath(args.args.get(0)) : currentDir;
if (!isDirectory(newPath))
return new InterpreterResult(Code.ERROR, Type.TEXT, newPath + ": No such directory");
currentDir = newPath;
return new InterpreterResult(Code.SUCCESS, Type.TEXT, "OK");
} else if (args.command.equals("ls")) {
String newPath = !args.args.isEmpty() ? getNewPath(args.args.get(0)) : currentDir;
try {
String results = listAll(newPath);
return new InterpreterResult(Code.SUCCESS, Type.TEXT, results);
} catch (Exception e) {
logger.error("Error listing files in path " + newPath, e);
return new InterpreterResult(Code.ERROR, Type.TEXT, e.getMessage());
}
} else if (args.command.equals("pwd")) {
return new InterpreterResult(Code.SUCCESS, Type.TEXT, currentDir);
} else {
return new InterpreterResult(Code.ERROR, Type.TEXT, "Unknown command");
}
}
@Override
public void cancel(InterpreterContext context) {
}
@Override
public FormType getFormType() {
return FormType.SIMPLE;
}
@Override
public int getProgress(InterpreterContext context) {
return 0;
}
@Override
public Scheduler getScheduler() {
return SchedulerFactory.singleton().createOrGetFIFOScheduler(
FileInterpreter.class.getName() + this.hashCode());
}
@Override
public List<String> completion(String buf, int cursor) {
return null;
}
}

View file

@ -0,0 +1,156 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.file;
import java.net.URL;
import java.net.HttpURLConnection;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import javax.ws.rs.core.UriBuilder;
import org.slf4j.Logger;
/**
* Definition and HTTP invocation methods for all WebHDFS commands
*
*/
public class HDFSCommand {
/**
* Type of HTTP request
*/
public enum HttpType {
GET,
PUT
}
/**
* Definition of WebHDFS operator
*/
public class Op {
public String op;
public HttpType cmd;
public int minArgs;
public Op(String op, HttpType cmd, int minArgs) {
this.op = op;
this.cmd = cmd;
this.minArgs = minArgs;
}
}
/**
* Definition of argument to an operator
*/
public class Arg {
public String key;
public String value;
public Arg(String key, String value) {
this.key = key;
this.value = value;
}
}
// How to connect to WebHDFS
String url = null;
String user = null;
int maxLength = 0;
Logger logger;
// Define all the commands available
public Op getFileStatus = new Op("GETFILESTATUS", HttpType.GET, 0);
public Op listStatus = new Op("LISTSTATUS", HttpType.GET, 0);
public HDFSCommand(String url, String user, Logger logger, int maxLength) {
super();
this.url = url;
this.user = user;
this.maxLength = maxLength;
this.logger = logger;
}
public String checkArgs(Op op, String path, Arg[] args) throws Exception {
if (op == null ||
path == null ||
(op.minArgs > 0 &&
(args == null ||
args.length != op.minArgs)))
{
String a = "";
a = (op != null) ? a + op.op + "\n" : a;
a = (path != null) ? a + path + "\n" : a;
a = (args != null) ? a + args + "\n" : a;
return a;
}
return null;
}
// The operator that runs all commands
public String runCommand(Op op, String path, Arg[] args) throws Exception {
// Check arguments
String error = checkArgs(op, path, args);
if (error != null) {
logger.error("Bad arguments to command: " + error);
return "ERROR: BAD ARGS";
}
// Build URI
UriBuilder builder = UriBuilder
.fromPath(url)
.path(path)
.queryParam("op", op.op);
if (args != null) {
for (Arg a : args) {
builder = builder.queryParam(a.key, a.value);
}
}
java.net.URI uri = builder.build();
// Connect and get response string
URL hdfsUrl = uri.toURL();
HttpURLConnection con = (HttpURLConnection) hdfsUrl.openConnection();
if (op.cmd == HttpType.GET) {
con.setRequestMethod("GET");
int responseCode = con.getResponseCode();
logger.info("Sending 'GET' request to URL : " + hdfsUrl);
logger.info("Response Code : " + responseCode);
BufferedReader in = new BufferedReader(
new InputStreamReader(con.getInputStream()));
String inputLine;
StringBuffer response = new StringBuffer();
int i = 0;
while ((inputLine = in.readLine()) != null) {
if (inputLine.length() < maxLength)
response.append(inputLine);
i++;
if (i >= maxLength)
break;
}
in.close();
return response.toString();
}
return null;
}
}

View file

@ -0,0 +1,330 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.file;
import java.text.SimpleDateFormat;
import java.util.*;
import com.google.gson.Gson;
import org.apache.commons.lang.StringUtils;
import org.apache.zeppelin.interpreter.Interpreter;
import org.apache.zeppelin.interpreter.InterpreterException;
import org.apache.zeppelin.interpreter.InterpreterPropertyBuilder;
/**
* HDFS implementation of File interpreter for Zeppelin.
*
*/
public class HDFSFileInterpreter extends FileInterpreter {
static final String HDFS_URL = "hdfs.url";
static final String HDFS_USER = "hdfs.user";
static final String HDFS_MAXLENGTH = "hdfs.maxlength";
static {
Interpreter.register(
"hdfs",
"file",
HDFSFileInterpreter.class.getName(),
new InterpreterPropertyBuilder()
.add(HDFS_URL, "http://localhost:50070/webhdfs/v1/", "The URL for WebHDFS")
.add(HDFS_USER, "hdfs", "The WebHDFS user")
.add(HDFS_MAXLENGTH, "1000", "Maximum number of lines of results fetched").build());
}
Exception exceptionOnConnect = null;
HDFSCommand cmd = null;
Gson gson = null;
public void prepare() {
String userName = getProperty(HDFS_USER);
String hdfsUrl = getProperty(HDFS_URL);
int i = Integer.parseInt(getProperty(HDFS_MAXLENGTH));
cmd = new HDFSCommand(hdfsUrl, userName, logger, i);
gson = new Gson();
}
public HDFSFileInterpreter(Properties property){
super(property);
prepare();
}
/**
* Status of one file
*
* matches returned JSON
*/
public class OneFileStatus {
public long accessTime;
public int blockSize;
public int childrenNum;
public int fileId;
public String group;
public long length;
public long modificationTime;
public String owner;
public String pathSuffix;
public String permission;
public int replication;
public int storagePolicy;
public String type;
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("\nAccessTime = " + accessTime);
sb.append("\nBlockSize = " + blockSize);
sb.append("\nChildrenNum = " + childrenNum);
sb.append("\nFileId = " + fileId);
sb.append("\nGroup = " + group);
sb.append("\nLength = " + length);
sb.append("\nModificationTime = " + modificationTime);
sb.append("\nOwner = " + owner);
sb.append("\nPathSuffix = " + pathSuffix);
sb.append("\nPermission = " + permission);
sb.append("\nReplication = " + replication);
sb.append("\nStoragePolicy = " + storagePolicy);
sb.append("\nType = " + type);
return sb.toString();
}
}
/**
* Status of one file
*
* matches returned JSON
*/
public class SingleFileStatus {
public OneFileStatus FileStatus;
}
/**
* Status of all files in a directory
*
* matches returned JSON
*/
public class MultiFileStatus {
public OneFileStatus[] FileStatus;
}
/**
* Status of all files in a directory
*
* matches returned JSON
*/
public class AllFileStatus {
public MultiFileStatus FileStatuses;
}
// tests whether we're able to connect to HDFS
private void testConnection() {
try {
if (isDirectory("/"))
logger.info("Successfully created WebHDFS connection");
} catch (Exception e) {
logger.error("testConnection: Cannot open WebHDFS connection. Bad URL: " + "/", e);
exceptionOnConnect = e;
}
}
@Override
public void open() {
testConnection();
}
@Override
public void close() {
}
private String listDir(String path) throws Exception {
return cmd.runCommand(cmd.listStatus, path, null);
}
private String listPermission(OneFileStatus fs){
StringBuilder sb = new StringBuilder();
sb.append(fs.type.equalsIgnoreCase("Directory") ? 'd' : '-');
int p = Integer.parseInt(fs.permission, 16);
sb.append(((p & 0x400) == 0) ? '-' : 'r');
sb.append(((p & 0x200) == 0) ? '-' : 'w');
sb.append(((p & 0x100) == 0) ? '-' : 'x');
sb.append(((p & 0x40) == 0) ? '-' : 'r');
sb.append(((p & 0x20) == 0) ? '-' : 'w');
sb.append(((p & 0x10) == 0) ? '-' : 'x');
sb.append(((p & 0x4) == 0) ? '-' : 'r');
sb.append(((p & 0x2) == 0) ? '-' : 'w');
sb.append(((p & 0x1) == 0) ? '-' : 'x');
return sb.toString();
}
private String listDate(OneFileStatus fs) {
return new SimpleDateFormat("yyyy-MM-dd HH:mm").format(new Date(fs.modificationTime));
}
private String ListOne(String path, OneFileStatus fs) {
if (args.flags.contains(new Character('l'))) {
StringBuilder sb = new StringBuilder();
sb.append(listPermission(fs) + "\t");
sb.append(((fs.replication == 0) ? "-" : fs.replication) + "\t ");
sb.append(fs.owner + "\t");
sb.append(fs.group + "\t");
if (args.flags.contains(new Character('h'))){ //human readable
sb.append(humanReadableByteCount(fs.length) + "\t\t");
} else {
sb.append(fs.length + "\t");
}
sb.append(listDate(fs) + "GMT\t");
sb.append((path.length() == 1) ? path + fs.pathSuffix : path + '/' + fs.pathSuffix);
return sb.toString();
}
return fs.pathSuffix;
}
private String humanReadableByteCount(long bytes) {
int unit = 1024;
if (bytes < unit) return bytes + " B";
int exp = (int) (Math.log(bytes) / Math.log(unit));
String pre = "KMGTPE".charAt(exp - 1) + "";
return String.format("%.1f %sB", bytes / Math.pow(unit, exp), pre);
}
public String listFile(String filePath) {
try {
String str = cmd.runCommand(cmd.getFileStatus, filePath, null);
SingleFileStatus sfs = gson.fromJson(str, SingleFileStatus.class);
if (sfs != null) {
return ListOne(filePath, sfs.FileStatus);
}
} catch (Exception e) {
logger.error("listFile: " + filePath, e);
}
return "No such File or directory";
}
public String listAll(String path) {
String all = "";
if (exceptionOnConnect != null)
return "Error connecting to provided endpoint.";
try {
//see if directory.
if (isDirectory(path)) {
String sfs = listDir(path);
if (sfs != null) {
AllFileStatus allFiles = gson.fromJson(sfs, AllFileStatus.class);
if (allFiles != null &&
allFiles.FileStatuses != null &&
allFiles.FileStatuses.FileStatus != null)
{
for (OneFileStatus fs : allFiles.FileStatuses.FileStatus)
all = all + ListOne(path, fs) + '\n';
}
}
return all;
} else {
return listFile(path);
}
} catch (Exception e) {
logger.error("listall: listDir " + path, e);
throw new InterpreterException("Could not find file or directory:\t" + path);
}
}
public boolean isDirectory(String path) {
boolean ret = false;
if (exceptionOnConnect != null)
return ret;
try {
String str = cmd.runCommand(cmd.getFileStatus, path, null);
SingleFileStatus sfs = gson.fromJson(str, SingleFileStatus.class);
if (sfs != null)
return sfs.FileStatus.type.equals("DIRECTORY");
} catch (Exception e) {
logger.error("IsDirectory: " + path, e);
return false;
}
return ret;
}
@Override
public List<String> completion(String buf, int cursor) {
logger.info("Completion request at position\t" + cursor + " in string " + buf);
final List<String> suggestions = new ArrayList<>();
if (StringUtils.isEmpty(buf)) {
suggestions.add("ls");
suggestions.add("cd");
suggestions.add("pwd");
return suggestions;
}
//part of a command == no spaces
if (buf.split(" ").length == 1){
if ("cd".contains(buf)) suggestions.add("cd");
if ("ls".contains(buf)) suggestions.add("ls");
if ("pwd".contains(buf)) suggestions.add("pwd");
return suggestions;
}
// last word will contain the path we're working with.
String lastToken = buf.substring(buf.lastIndexOf(" ") + 1);
if (lastToken.startsWith("-")) { //flag not path
return null;
}
String localPath = ""; //all things before the last '/'
String unfinished = lastToken; //unfished filenames or directories
if (lastToken.contains("/")) {
localPath = lastToken.substring(0, lastToken.lastIndexOf('/') + 1);
unfinished = lastToken.substring(lastToken.lastIndexOf('/') + 1);
}
String globalPath = getNewPath(localPath); //adjust for cwd
if (isDirectory(globalPath)){
try {
String fileStatusString = listDir(globalPath);
if (fileStatusString != null) {
AllFileStatus allFiles = gson.fromJson(fileStatusString, AllFileStatus.class);
if (allFiles != null &&
allFiles.FileStatuses != null &&
allFiles.FileStatuses.FileStatus != null)
{
for (OneFileStatus fs : allFiles.FileStatuses.FileStatus) {
if (fs.pathSuffix.contains(unfinished)) {
//only suggest the text after the last .
String beforeLastPeriod = unfinished.substring(0, unfinished.lastIndexOf('.') + 1);
//beforeLastPeriod should be the start of fs.pathSuffix, so take the end of it.
String suggestedFinish = fs.pathSuffix.substring(beforeLastPeriod.length());
suggestions.add(suggestedFinish);
}
}
return suggestions;
}
}
} catch (Exception e) {
logger.error("listall: listDir " + globalPath, e);
return null;
}
} else {
logger.info("path is not a directory. No values suggested.");
}
//Error in string.
return null;
}
}

View file

@ -0,0 +1,209 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.file;
import com.google.gson.Gson;
import junit.framework.TestCase;
import static org.junit.Assert.*;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.junit.Test;
import org.slf4j.Logger;
import java.util.HashMap;
import java.util.Properties;
import java.lang.Override;
import java.lang.String;
/**
* Tests Interpreter by running pre-determined commands against mock file system
*
*/
public class HDFSFileInterpreterTest extends TestCase {
@Test
public void test() {
HDFSFileInterpreter t = new MockHDFSFileInterpreter(new Properties());
t.open();
// We have info for /, /user, /tmp, /mr-history/done
// Ensure
// 1. ls -l works
// 2. paths (. and ..) are correctly handled
// 3. flags and arguments to commands are correctly handled
InterpreterResult result1 = t.interpret("ls -l /", null);
assertEquals(result1.type(), InterpreterResult.Type.TEXT);
InterpreterResult result2 = t.interpret("ls -l /./user/..", null);
assertEquals(result2.type(), InterpreterResult.Type.TEXT);
assertEquals(result1.message(), result2.message());
// Ensure you can do cd and after that the ls uses current directory correctly
InterpreterResult result3 = t.interpret("cd user", null);
assertEquals(result3.type(), InterpreterResult.Type.TEXT);
assertEquals(result3.message(), "OK");
InterpreterResult result4 = t.interpret("ls", null);
assertEquals(result4.type(), InterpreterResult.Type.TEXT);
InterpreterResult result5 = t.interpret("ls /user", null);
assertEquals(result5.type(), InterpreterResult.Type.TEXT);
assertEquals(result4.message(), result5.message());
// Ensure pwd works correctly
InterpreterResult result6 = t.interpret("pwd", null);
assertEquals(result6.type(), InterpreterResult.Type.TEXT);
assertEquals(result6.message(), "/user");
// Move a couple of levels and check we're in the right place
InterpreterResult result7 = t.interpret("cd ../mr-history/done", null);
assertEquals(result7.type(), InterpreterResult.Type.TEXT);
assertEquals(result7.message(), "OK");
InterpreterResult result8 = t.interpret("ls -l ", null);
assertEquals(result8.type(), InterpreterResult.Type.TEXT);
InterpreterResult result9 = t.interpret("ls -l /mr-history/done", null);
assertEquals(result9.type(), InterpreterResult.Type.TEXT);
assertEquals(result8.message(), result9.message());
InterpreterResult result10 = t.interpret("cd ../..", null);
assertEquals(result10.type(), InterpreterResult.Type.TEXT);
assertEquals(result7.message(), "OK");
InterpreterResult result11 = t.interpret("ls -l ", null);
assertEquals(result11.type(), InterpreterResult.Type.TEXT);
// we should be back to first result after all this navigation
assertEquals(result1.message(), result11.message());
t.close();
}
}
/**
* Store command results from curl against a real file system
*/
class MockFileSystem {
HashMap<String, String> mfs = new HashMap<String, String>();
void addListStatusData() {
mfs.put("/?op=LISTSTATUS",
"{\"FileStatuses\":{\"FileStatus\":[\n" +
"{\"accessTime\":0,\"blockSize\":0,\"childrenNum\":1,\"fileId\":16389,\"group\":\"hadoop\",\"length\":0,\"modificationTime\":1438548219672,\"owner\":\"yarn\",\"pathSuffix\":\"app-logs\",\"permission\":\"777\",\"replication\":0,\"storagePolicy\":0,\"type\":\"DIRECTORY\"},\n" +
"{\"accessTime\":0,\"blockSize\":0,\"childrenNum\":1,\"fileId\":16395,\"group\":\"hdfs\",\"length\":0,\"modificationTime\":1438548030045,\"owner\":\"hdfs\",\"pathSuffix\":\"hdp\",\"permission\":\"755\",\"replication\":0,\"storagePolicy\":0,\"type\":\"DIRECTORY\"},\n" +
"{\"accessTime\":0,\"blockSize\":0,\"childrenNum\":1,\"fileId\":16390,\"group\":\"hdfs\",\"length\":0,\"modificationTime\":1438547985336,\"owner\":\"mapred\",\"pathSuffix\":\"mapred\",\"permission\":\"755\",\"replication\":0,\"storagePolicy\":0,\"type\":\"DIRECTORY\"},\n" +
"{\"accessTime\":0,\"blockSize\":0,\"childrenNum\":2,\"fileId\":16392,\"group\":\"hdfs\",\"length\":0,\"modificationTime\":1438547985346,\"owner\":\"hdfs\",\"pathSuffix\":\"mr-history\",\"permission\":\"755\",\"replication\":0,\"storagePolicy\":0,\"type\":\"DIRECTORY\"},\n" +
"{\"accessTime\":0,\"blockSize\":0,\"childrenNum\":1,\"fileId\":16400,\"group\":\"hdfs\",\"length\":0,\"modificationTime\":1438548089725,\"owner\":\"hdfs\",\"pathSuffix\":\"system\",\"permission\":\"755\",\"replication\":0,\"storagePolicy\":0,\"type\":\"DIRECTORY\"},\n" +
"{\"accessTime\":0,\"blockSize\":0,\"childrenNum\":1,\"fileId\":16386,\"group\":\"hdfs\",\"length\":0,\"modificationTime\":1438548150089,\"owner\":\"hdfs\",\"pathSuffix\":\"tmp\",\"permission\":\"777\",\"replication\":0,\"storagePolicy\":0,\"type\":\"DIRECTORY\"},\n" +
"{\"accessTime\":0,\"blockSize\":0,\"childrenNum\":1,\"fileId\":16387,\"group\":\"hdfs\",\"length\":0,\"modificationTime\":1438547921792,\"owner\":\"hdfs\",\"pathSuffix\":\"user\",\"permission\":\"755\",\"replication\":0,\"storagePolicy\":0,\"type\":\"DIRECTORY\"}\n" +
"]}}"
);
mfs.put("/user?op=LISTSTATUS",
"{\"FileStatuses\":{\"FileStatus\":[\n" +
" {\"accessTime\":0,\"blockSize\":0,\"childrenNum\":4,\"fileId\":16388,\"group\":\"hdfs\",\"length\":0,\"modificationTime\":1441253161263,\"owner\":\"ambari-qa\",\"pathSuffix\":\"ambari-qa\",\"permission\":\"770\",\"replication\":0,\"storagePolicy\":0,\"type\":\"DIRECTORY\"}\n" +
" ]}}"
);
mfs.put("/tmp?op=LISTSTATUS",
"{\"FileStatuses\":{\"FileStatus\":[\n" +
" {\"accessTime\":1441253097489,\"blockSize\":134217728,\"childrenNum\":0,\"fileId\":16400,\"group\":\"hdfs\",\"length\":1645,\"modificationTime\":1441253097517,\"owner\":\"hdfs\",\"pathSuffix\":\"ida8c06540_date040315\",\"permission\":\"755\",\"replication\":3,\"storagePolicy\":0,\"type\":\"FILE\"}\n" +
" ]}}"
);
mfs.put("/mr-history/done?op=LISTSTATUS",
"{\"FileStatuses\":{\"FileStatus\":[\n" +
"{\"accessTime\":0,\"blockSize\":0,\"childrenNum\":1,\"fileId\":16433,\"group\":\"hadoop\",\"length\":0,\"modificationTime\":1441253197481,\"owner\":\"mapred\",\"pathSuffix\":\"2015\",\"permission\":\"770\",\"replication\":0,\"storagePolicy\":0,\"type\":\"DIRECTORY\"}\n" +
"]}}"
);
}
void addGetFileStatusData() {
mfs.put("/?op=GETFILESTATUS",
"{\"FileStatus\":{\"accessTime\":0,\"blockSize\":0,\"childrenNum\":7,\"fileId\":16385,\"group\":\"hdfs\",\"length\":0,\"modificationTime\":1438548089725,\"owner\":\"hdfs\",\"pathSuffix\":\"\",\"permission\":\"755\",\"replication\":0,\"storagePolicy\":0,\"type\":\"DIRECTORY\"}}");
mfs.put("/user?op=GETFILESTATUS",
"{\"FileStatus\":{\"accessTime\":0,\"blockSize\":0,\"childrenNum\":1,\"fileId\":16387,\"group\":\"hdfs\",\"length\":0,\"modificationTime\":1441253043188,\"owner\":\"hdfs\",\"pathSuffix\":\"\",\"permission\":\"755\",\"replication\":0,\"storagePolicy\":0,\"type\":\"DIRECTORY\"}}");
mfs.put("/tmp?op=GETFILESTATUS",
"{\"FileStatus\":{\"accessTime\":0,\"blockSize\":0,\"childrenNum\":1,\"fileId\":16386,\"group\":\"hdfs\",\"length\":0,\"modificationTime\":1441253097489,\"owner\":\"hdfs\",\"pathSuffix\":\"\",\"permission\":\"777\",\"replication\":0,\"storagePolicy\":0,\"type\":\"DIRECTORY\"}}");
mfs.put("/mr-history/done?op=GETFILESTATUS",
"{\"FileStatus\":{\"accessTime\":0,\"blockSize\":0,\"childrenNum\":1,\"fileId\":16393,\"group\":\"hadoop\",\"length\":0,\"modificationTime\":1441253197480,\"owner\":\"mapred\",\"pathSuffix\":\"\",\"permission\":\"777\",\"replication\":0,\"storagePolicy\":0,\"type\":\"DIRECTORY\"}}");
}
public void addMockData(HDFSCommand.Op op) {
if (op.op.equals("LISTSTATUS")) {
addListStatusData();
} else if (op.op.equals("GETFILESTATUS")) {
addGetFileStatusData();
}
// do nothing
}
public String get(String key) {
return mfs.get(key);
}
}
/**
* Run commands against mock file system that simulates webhdfs responses
*/
class MockHDFSCommand extends HDFSCommand {
MockFileSystem fs = null;
public MockHDFSCommand(String url, String user, Logger logger) {
super(url, user, logger, 1000);
fs = new MockFileSystem();
fs.addMockData(getFileStatus);
fs.addMockData(listStatus);
}
@Override
public String runCommand(Op op, String path, Arg[] args) throws Exception {
String error = checkArgs(op, path, args);
assertNull(error);
String c = path + "?op=" + op.op;
if (args != null) {
for (Arg a : args) {
c += "&" + a.key + "=" + a.value;
}
}
return fs.get(c);
}
}
/**
* Mock Interpreter - uses Mock HDFS command
*/
class MockHDFSFileInterpreter extends HDFSFileInterpreter {
@Override
public void prepare() {
// Run commands against mock File System instead of WebHDFS
cmd = new MockHDFSCommand("", "", logger);
gson = new Gson();
}
public MockHDFSFileInterpreter(Properties property) {
super(property);
}
}

View file

@ -35,7 +35,7 @@
<url>http://zeppelin.incubator.apache.org</url>
<properties>
<flink.version>0.10.0</flink.version>
<flink.version>1.0.0</flink.version>
<flink.akka.version>2.3.7</flink.akka.version>
<flink.scala.binary.version>2.10</flink.scala.binary.version>
<flink.scala.version>2.10.4</flink.scala.version>
@ -73,25 +73,25 @@
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients</artifactId>
<artifactId>flink-clients_${flink.scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-runtime</artifactId>
<artifactId>flink-runtime_${flink.scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-scala</artifactId>
<artifactId>flink-scala_${flink.scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-scala-shell</artifactId>
<artifactId>flink-scala-shell_${flink.scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>

View file

@ -249,12 +249,34 @@ public class FlinkInterpreter extends Interpreter {
Code r = null;
String incomplete = "";
boolean inComment = false;
for (int l = 0; l < linesToRun.length; l++) {
final String s = linesToRun[l];
// check if next line starts with "." (but not ".." or "./") it is treated as an invocation
if (l + 1 < linesToRun.length) {
String nextLine = linesToRun[l + 1].trim();
if (nextLine.startsWith(".") && !nextLine.startsWith("..") && !nextLine.startsWith("./")) {
boolean continuation = false;
if (nextLine.isEmpty()
|| nextLine.startsWith("//") // skip empty line or comment
|| nextLine.startsWith("}")
|| nextLine.startsWith("object")) { // include "} object" for Scala companion object
continuation = true;
} else if (!inComment && nextLine.startsWith("/*")) {
inComment = true;
continuation = true;
} else if (inComment && nextLine.lastIndexOf("*/") >= 0) {
inComment = false;
continuation = true;
} else if (nextLine.length() > 1
&& nextLine.charAt(0) == '.'
&& nextLine.charAt(1) != '.' // ".."
&& nextLine.charAt(1) != '/') { // "./"
continuation = true;
} else if (inComment) {
continuation = true;
}
if (continuation) {
incomplete += s + "\n";
continue;
}

View file

@ -49,6 +49,22 @@ public class FlinkInterpreterTest {
flink.destroy();
}
@Test
public void testNextLineInvocation() {
assertEquals(InterpreterResult.Code.SUCCESS, flink.interpret("\"123\"\n.toInt", context).code());
}
@Test
public void testNextLineComments() {
assertEquals(InterpreterResult.Code.SUCCESS, flink.interpret("\"123\"\n/*comment here\n*/.toInt", context).code());
}
@Test
public void testNextLineCompanionObject() {
String code = "class Counter {\nvar value: Long = 0\n}\n // comment\n\n object Counter {\n def apply(x: Long) = new Counter()\n}";
assertEquals(InterpreterResult.Code.SUCCESS, flink.interpret(code, context).code());
}
@Test
public void testSimpleStatement() {
InterpreterResult result = flink.interpret("val a=1", context);
@ -63,12 +79,6 @@ public class FlinkInterpreterTest {
assertEquals("1", result.message());
}
@Test
public void testNextlineInvoke() {
InterpreterResult result = flink.interpret("\"123\"\n .toInt", context);
assertEquals("res0: Int = 123\n", result.message());
}
@Test
public void testWordCount() {
flink.interpret("val text = env.fromElements(\"To be or not to be\")", context);

View file

@ -37,21 +37,20 @@ import java.util.List;
import java.util.Properties;
/**
* Support for Hbase Shell. All the commands documented here
* Support for HBase Shell. All the commands documented here
* http://hbase.apache.org/book.html#shell is supported.
*
* Requirements:
* Hbase Shell should be installed on the same machine. To be more specific, the following dir.
* HBase Shell should be installed on the same machine. To be more specific, the following dir.
* should be available: https://github.com/apache/hbase/tree/master/hbase-shell/src/main/ruby
* Hbase Shell should be able to connect to the Hbase cluster from terminal. This makes sure
* HBase Shell should be able to connect to the HBase cluster from terminal. This makes sure
* that the client is configured properly.
*
* The interpreter takes 3 config parameters:
* hbase.home: Root dir. where hbase is installed. Default is /usr/lib/hbase/
* hbase.home: Root directory where HBase is installed. Default is /usr/lib/hbase/
* hbase.ruby.sources: Dir where shell ruby code is installed.
* Path is relative to hbase.home. Default: lib/ruby
* hbase.irb.load: (Testing only) Default is true.
* Whether to load irb in the interpreter.
* zeppelin.hbase.test.mode: (Testing only) Disable checks for unit and manual tests. Default: false
*/
public class HbaseInterpreter extends Interpreter {
private Logger logger = LoggerFactory.getLogger(HbaseInterpreter.class);
@ -62,11 +61,13 @@ public class HbaseInterpreter extends Interpreter {
static {
Interpreter.register("hbase", "hbase", HbaseInterpreter.class.getName(),
new InterpreterPropertyBuilder()
.add("hbase.home", "/usr/lib/hbase/", "Installation dir. of Hbase")
.add("hbase.home",
getSystemDefault("HBASE_HOME", "hbase.home", "/usr/lib/hbase/"),
"Installation directory of HBase")
.add("hbase.ruby.sources", "lib/ruby",
"Path to Ruby scripts relative to 'hbase.home'")
.add("hbase.test.mode", "false", "Disable checks for unit and manual tests")
.build());
.add("zeppelin.hbase.test.mode", "false", "Disable checks for unit and manual tests")
.build());
}
public HbaseInterpreter(Properties property) {
@ -79,7 +80,7 @@ public class HbaseInterpreter extends Interpreter {
this.writer = new StringWriter();
scriptingContainer.setOutput(this.writer);
if (!Boolean.parseBoolean(getProperty("hbase.test.mode"))) {
if (!Boolean.parseBoolean(getProperty("zeppelin.hbase.test.mode"))) {
String hbase_home = getProperty("hbase.home");
String ruby_src = getProperty("hbase.ruby.sources");
Path abs_ruby_src = Paths.get(hbase_home, ruby_src).toAbsolutePath();
@ -89,7 +90,7 @@ public class HbaseInterpreter extends Interpreter {
File f = abs_ruby_src.toFile();
if (!f.exists() || !f.isDirectory()) {
throw new InterpreterException("hbase ruby sources is not available at '" + abs_ruby_src
throw new InterpreterException("HBase ruby sources is not available at '" + abs_ruby_src
+ "'");
}
@ -155,4 +156,24 @@ public class HbaseInterpreter extends Interpreter {
return null;
}
private static String getSystemDefault(
String envName,
String propertyName,
String defaultValue) {
if (envName != null && !envName.isEmpty()) {
String envValue = System.getenv().get(envName);
if (envValue != null) {
return envValue;
}
}
if (propertyName != null && !propertyName.isEmpty()) {
String propValue = System.getProperty(propertyName);
if (propValue != null) {
return propValue;
}
}
return defaultValue;
}
}

View file

@ -40,7 +40,7 @@ public class HbaseInterpreterTest {
Properties properties = new Properties();
properties.put("hbase.home", "");
properties.put("hbase.ruby.sources", "");
properties.put("hbase.test.mode", "true");
properties.put("zeppelin.hbase.test.mode", "true");
hbaseInterpreter = new HbaseInterpreter(properties);
hbaseInterpreter.open();
@ -72,4 +72,4 @@ public class HbaseInterpreterTest {
assertEquals(InterpreterResult.Code.ERROR, result.code());
assertEquals("(NameError) undefined local variable or method `joke' for main:Object", result.message());
}
}
}

Some files were not shown because too many files have changed in this diff Show more