Merge branch 'master' into master

This commit is contained in:
Bruno Bonnin 2017-01-16 09:31:13 +01:00 committed by GitHub
commit 5a96ae0af3
339 changed files with 21778 additions and 12111 deletions

7
.gitignore vendored
View file

@ -30,6 +30,7 @@ conf/interpreter.json
conf/notebook-authorization.json
conf/shiro.ini
conf/credentials.json
conf/helium.json
# other generated files
spark/dependency-reduced-pom.xml
@ -39,11 +40,11 @@ reports
zeppelin-web/node_modules
zeppelin-web/dist
zeppelin-web/.tmp
zeppelin-web/src/fonts/Roboto*
zeppelin-web/src/fonts/Source-Code-Pro*
zeppelin-web/src/fonts/Patua-One*
zeppelin-web/.sass-cache
zeppelin-web/npm-debug.log
zeppelin-web/yarn-error.log
zeppelin-web/bower_components
zeppelin-web/yarn.lock
**nbproject/
**node/

View file

@ -33,8 +33,6 @@ addons:
- r-packages-precise
packages:
- r-base-dev
- r-cran-evaluate
- r-cran-base64enc
matrix:
include:
@ -44,15 +42,19 @@ matrix:
# Test all modules with spark 2.0.2 and scala 2.11
- jdk: "oraclejdk7"
env: SCALA_VER="2.11" SPARK_VER="2.0.2" HADOOP_VER="2.6" PROFILE="-Pspark-2.0 -Phadoop-2.6 -Ppyspark -Psparkr -Pscalding -Pexamples -Pscala-2.11" BUILD_FLAG="package -Pbuild-distr -DskipRat" TEST_FLAG="verify -Pusing-packaged-distr -DskipRat" TEST_PROJECTS=""
env: SCALA_VER="2.11" SPARK_VER="2.0.2" HADOOP_VER="2.6" PROFILE="-Pspark-2.0 -Phadoop-2.6 -Ppyspark -Psparkr -Pscalding -Phelium-dev -Pexamples -Pscala-2.11" BUILD_FLAG="package -Pbuild-distr -DskipRat" TEST_FLAG="verify -Pusing-packaged-distr -DskipRat" TEST_PROJECTS=""
# Test all modules with spark 2.1.0 and scala 2.11
- jdk: "oraclejdk7"
env: SCALA_VER="2.11" SPARK_VER="2.1.0" HADOOP_VER="2.6" PROFILE="-Pspark-2.0 -Phadoop-2.6 -Ppyspark -Psparkr -Pscalding -Phelium-dev -Pexamples -Pscala-2.11" BUILD_FLAG="package -Pbuild-distr -DskipRat" TEST_FLAG="verify -Pusing-packaged-distr -DskipRat" TEST_PROJECTS=""
# Test all modules with scala 2.10
- jdk: "oraclejdk7"
env: SCALA_VER="2.10" SPARK_VER="1.6.3" HADOOP_VER="2.6" PROFILE="-Pspark-1.6 -Pr -Phadoop-2.6 -Ppyspark -Psparkr -Pscalding -Pbeam -Pexamples -Pscala-2.10" BUILD_FLAG="package -Pbuild-distr -DskipRat" TEST_FLAG="verify -Pusing-packaged-distr -DskipRat" TEST_PROJECTS=""
env: SCALA_VER="2.10" SPARK_VER="1.6.3" HADOOP_VER="2.6" PROFILE="-Pspark-1.6 -Pr -Phadoop-2.6 -Ppyspark -Psparkr -Pscalding -Pbeam -Phelium-dev -Pexamples -Pscala-2.10" BUILD_FLAG="package -Pbuild-distr -DskipRat" TEST_FLAG="verify -Pusing-packaged-distr -DskipRat" TEST_PROJECTS=""
# Test all modules with scala 2.11
- jdk: "oraclejdk7"
env: SCALA_VER="2.11" SPARK_VER="1.6.3" HADOOP_VER="2.6" PROFILE="-Pspark-1.6 -Pr -Phadoop-2.6 -Ppyspark -Psparkr -Pscalding -Pexamples -Pscala-2.11" BUILD_FLAG="package -Pbuild-distr -DskipRat" TEST_FLAG="verify -Pusing-packaged-distr -DskipRat" TEST_PROJECTS=""
env: SCALA_VER="2.11" SPARK_VER="1.6.3" HADOOP_VER="2.6" PROFILE="-Pspark-1.6 -Pr -Phadoop-2.6 -Ppyspark -Psparkr -Pscalding -Phelium-dev -Pexamples -Pscala-2.11" BUILD_FLAG="package -Pbuild-distr -DskipRat" TEST_FLAG="verify -Pusing-packaged-distr -DskipRat" TEST_PROJECTS=""
# Test spark module for 1.5.2
- jdk: "oraclejdk7"
@ -64,7 +66,7 @@ matrix:
# Test selenium with spark module for 1.6.3
- jdk: "oraclejdk7"
env: TEST_SELENIUM="true" SCALA_VER="2.10" SPARK_VER="1.6.3" HADOOP_VER="2.6" PROFILE="-Pspark-1.6 -Phadoop-2.6 -Ppyspark -Pexamples" BUILD_FLAG="package -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark -Dtest=org.apache.zeppelin.AbstractFunctionalSuite -DfailIfNoTests=false"
env: TEST_SELENIUM="true" SCALA_VER="2.10" SPARK_VER="1.6.3" HADOOP_VER="2.6" PROFILE="-Pspark-1.6 -Phadoop-2.6 -Ppyspark -Phelium-dev -Pexamples" BUILD_FLAG="package -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark -Dtest=org.apache.zeppelin.AbstractFunctionalSuite -DfailIfNoTests=false"
# Test python/pyspark with python 2
- jdk: "oraclejdk7"
@ -92,10 +94,8 @@ install:
before_script:
- travis_retry ./testing/downloadSpark.sh $SPARK_VER $HADOOP_VER
- if [[ -n $LIVY_VER ]]; then travis_retry ./testing/downloadLivy.sh $LIVY_VER; fi
- ./testing/setupLivy.sh
- echo "export SPARK_HOME=`pwd`/spark-$SPARK_VER-bin-hadoop$HADOOP_VER" > conf/zeppelin-env.sh
- if [[ -n $LIVY_VER ]]; then export LIVY_HOME=`pwd`/livy-server-$LIVY_VER; fi
- if [[ -n $LIVY_VER ]]; then export SPARK_HOME=`pwd`/spark-$SPARK_VER-bin-hadoop$HADOOP_VER; fi
- tail conf/zeppelin-env.sh
script:
@ -106,8 +106,7 @@ after_success:
after_failure:
- echo "Travis exited with ${TRAVIS_TEST_RESULT}"
- cat target/rat.txt
- cat zeppelin-server/target/rat.txt
- find . -name rat.txt | xargs cat
- cat zeppelin-distribution/target/zeppelin-*-SNAPSHOT/zeppelin-*-SNAPSHOT/logs/zeppelin*.log
- cat zeppelin-distribution/target/zeppelin-*-SNAPSHOT/zeppelin-*-SNAPSHOT/logs/zeppelin*.out
- cat zeppelin-web/npm-debug.log

View file

@ -221,7 +221,8 @@ The following components are provided under the Open Font License. See project l
The text of each license is also included at licenses/LICENSE-[project]-[version].txt.
(OFT 1.1) Font Awesome v4.2.0 (http://fortawesome.github.io/Font-Awesome/) - http://scripts.sil.org/OFL
(OFL 1.1) Patua One Font (see licenses/LICENSE-patuaOne-font)
(OFL 1.1) Source Code Pro Font (see licenses/LICENSE-source_code_pro-font)
========================================================================
MIT licenses
@ -253,6 +254,7 @@ The text of each license is also included at licenses/LICENSE-[project]-[version
(Apache 2.0) Bootstrap v3.0.2 (http://getbootstrap.com/) - https://github.com/twbs/bootstrap/blob/v3.0.2/LICENSE
(Apache 2.0) Software under ./bigquery/* was developed at Google (http://www.google.com/). Licensed under the Apache v2.0 License.
(Apache 2.0) Roboto Font (https://github.com/google/roboto/)
========================================================================
BSD 3-Clause licenses

View file

@ -16,12 +16,7 @@ limitations under the License.
To connect to Zeppelin, users will be asked to enter their credentials. Once logged, a user has access to all notes including other users notes.
This a a first step toward full security as implemented by this pull request (https://github.com/apache/zeppelin/pull/53).
# Security setup
1. Secure the HTTP channel: Comment the line "/** = anon" and uncomment the line "/** = authc" in the file conf/shiro.ini. Read more about he shiro.ini file format at the following URL http://shiro.apache.org/configuration.html#Configuration-INISections.
2. Secure the Websocket channel : Set to property "zeppelin.anonymous.allowed" to "false" in the file conf/zeppelin-site.xml. You can start by renaming conf/zeppelin-site.xml.template to conf/zeppelin-site.xml
3. Start Zeppelin : bin/zeppelin.sh
4. point your browser to http://localhost:8080
5. Login using one of the user/password combinations defined in the conf/shiro.ini file.
Please check [Shiro authentication in Apache Zeppelin](https://zeppelin.apache.org/docs/snapshot/security/shiroauthentication.html) in our official website for more detailed information(e.g. How to setup the security, How to configure user groups and permissions, and etc).
# Implementation notes
## Vocabulary

View file

@ -25,11 +25,11 @@ bower.json
In the override section at the bottom, include the Highlightjs stylesheet (eg. styles/github.css)
For the selected Ace Editor theme script, include it in the override section. (eg. src-noconflict/theme-github.js)
(bower will automatically add the appropriate .js and .css in app/index.html)
```
```diff
"src-noconflict/mode-sql.js",
"src-noconflict/mode-markdown.js",
"src-noconflict/keybinding-emacs.js",
"src-noconflict/ext-language_tools.js",
"src-noconflict/ext-language_tools.js",
+ "src-noconflict/theme-github.js"],
"version": "1.1.8",
"name": "ace-builds"
@ -48,13 +48,13 @@ Highlight.js style - depends on the style, a few themes have jpg - if so, one mu
### Example - change Ace Editor theme to monokai
app/scripts/controllers/paragraph.js
```
```diff
- $scope.editor.setTheme('ace/theme/github');
+ $scope.editor.setTheme('ace/theme/monokai');
```
bower.json
```
```diff
- "src-noconflict/theme-github.js"],
+ "src-noconflict/theme-monokai.js"],
```

View file

@ -171,7 +171,7 @@ elif [[ "${INTERPRETER_ID}" == "pig" ]]; then
if [[ -n "${HADOOP_CONF_DIR}" ]] && [[ -d "${HADOOP_CONF_DIR}" ]]; then
ZEPPELIN_INTP_CLASSPATH+=":${HADOOP_CONF_DIR}"
fi
# autodetect TEZ_CONF_DIR
if [[ -n "${TEZ_CONF_DIR}" ]]; then
ZEPPELIN_INTP_CLASSPATH+=":${TEZ_CONF_DIR}"
@ -187,19 +187,26 @@ addJarInDirForIntp "${LOCAL_INTERPRETER_REPO}"
CLASSPATH+=":${ZEPPELIN_INTP_CLASSPATH}"
if [[ ! -z "$ZEPPELIN_IMPERSONATE_USER" ]]; then
INTERPRETER_RUN_COMMAND=${ZEPPELIN_IMPERSONATE_RUN_CMD}" '"
if [[ -f "${ZEPPELIN_CONF_DIR}/zeppelin-env.sh" ]]; then
INTERPRETER_RUN_COMMAND+=" source "${ZEPPELIN_CONF_DIR}'/zeppelin-env.sh;'
suid="$(id -u ${ZEPPELIN_IMPERSONATE_USER})"
if [[ -n "${suid}" || -z "${SPARK_SUBMIT}" ]]; then
INTERPRETER_RUN_COMMAND=${ZEPPELIN_IMPERSONATE_RUN_CMD}" '"
if [[ -f "${ZEPPELIN_CONF_DIR}/zeppelin-env.sh" ]]; then
INTERPRETER_RUN_COMMAND+=" source "${ZEPPELIN_CONF_DIR}'/zeppelin-env.sh;'
fi
fi
fi
if [[ -n "${SPARK_SUBMIT}" ]]; then
INTERPRETER_RUN_COMMAND+=' '` echo ${SPARK_SUBMIT} --class ${ZEPPELIN_SERVER} --driver-class-path \"${ZEPPELIN_INTP_CLASSPATH_OVERRIDES}:${CLASSPATH}\" --driver-java-options \"${JAVA_INTP_OPTS}\" ${SPARK_SUBMIT_OPTIONS} ${SPARK_APP_JAR} ${PORT}`
if [[ -n "$ZEPPELIN_IMPERSONATE_USER" ]] && [[ "$ZEPPELIN_IMPERSONATE_SPARK_PROXY_USER" != "false" ]]; then
INTERPRETER_RUN_COMMAND+=' '` echo ${SPARK_SUBMIT} --class ${ZEPPELIN_SERVER} --driver-class-path \"${ZEPPELIN_INTP_CLASSPATH_OVERRIDES}:${CLASSPATH}\" --driver-java-options \"${JAVA_INTP_OPTS}\" ${SPARK_SUBMIT_OPTIONS} --proxy-user ${ZEPPELIN_IMPERSONATE_USER} ${SPARK_APP_JAR} ${PORT}`
else
INTERPRETER_RUN_COMMAND+=' '` echo ${SPARK_SUBMIT} --class ${ZEPPELIN_SERVER} --driver-class-path \"${ZEPPELIN_INTP_CLASSPATH_OVERRIDES}:${CLASSPATH}\" --driver-java-options \"${JAVA_INTP_OPTS}\" ${SPARK_SUBMIT_OPTIONS} ${SPARK_APP_JAR} ${PORT}`
fi
else
INTERPRETER_RUN_COMMAND+=' '` echo ${ZEPPELIN_RUNNER} ${JAVA_INTP_OPTS} ${ZEPPELIN_INTP_MEM} -cp ${ZEPPELIN_INTP_CLASSPATH_OVERRIDES}:${CLASSPATH} ${ZEPPELIN_SERVER} ${PORT} `
fi
if [[ ! -z "$ZEPPELIN_IMPERSONATE_USER" ]]; then
if [[ ! -z "$ZEPPELIN_IMPERSONATE_USER" ]] && [[ -n "${suid}" || -z "${SPARK_SUBMIT}" ]]; then
INTERPRETER_RUN_COMMAND+="'"
fi

View file

@ -84,9 +84,4 @@ if not exist %ZEPPELIN_PID_DIR% (
mkdir "%ZEPPELIN_PID_DIR%"
)
if not exist %ZEPPELIN_NOTEBOOK_DIR% (
echo Notebook dir doesn't exist, create %ZEPPELIN_NOTEBOOK_DIR%
mkdir "%ZEPPELIN_NOTEBOOK_DIR%"
)
"%ZEPPELIN_RUNNER%" %JAVA_OPTS% -cp %CLASSPATH% %ZEPPELIN_SERVER% "%*"

View file

@ -83,9 +83,4 @@ if [[ ! -d "${ZEPPELIN_PID_DIR}" ]]; then
$(mkdir -p "${ZEPPELIN_PID_DIR}")
fi
if [[ ! -d "${ZEPPELIN_NOTEBOOK_DIR}" ]]; then
echo "Pid dir doesn't exist, create ${ZEPPELIN_NOTEBOOK_DIR}"
$(mkdir -p "${ZEPPELIN_NOTEBOOK_DIR}")
fi
exec $ZEPPELIN_RUNNER $JAVA_OPTS -cp $ZEPPELIN_CLASSPATH_OVERRIDES:$CLASSPATH $ZEPPELIN_SERVER "$@"

View file

@ -1,15 +0,0 @@
## Enabling SSL
Enabling SSL requires a few changes. The first is to set zeppelin.ssl to true. If you'll like to use client side certificate authentication as well, then set zeppelin.ssl.client.auth to true too.
Information how about to generate certificates and a keystore can be found [here](https://wiki.eclipse.org/Jetty/Howto/Configure_SSL).
A condensed example can be found in the top answer to this [StackOverflow post](http://stackoverflow.com/questions/4008837/configure-ssl-on-jetty).
The keystore holds the private key and certificate on the server end. The trustore holds the trusted client certificates. Be sure that the path and password for these two stores are correctly configured in the password fields below. They can be obfuscated using the Jetty password tool. After Maven pulls in all the dependency to build Zeppelin, one of the Jetty jars contain the Password tool. Invoke this command from the Zeppelin home build directory with the appropriate version, user, and password.
```
java -cp ./zeppelin-server/target/lib/jetty-all-server-<version>.jar org.eclipse.jetty.util.security.Password <user> <password>
```
If you are using a self-signed, a certificate signed by an untrusted CA, or if client authentication is enabled, then the client must have a browser create exceptions for both the normal HTTPS port and WebSocket port. This can by done by trying to establish an HTTPS connection to both ports in a browser (i.e. if the ports are 443 and 8443, then visit https://127.0.0.1:443 and https://127.0.0.1:8443). This step can be skipped if the server certificate is signed by a trusted CA and client auth is disabled.

View file

@ -31,6 +31,9 @@ REM set ZEPPELIN_NOTEBOOK_HOMESCREEN REM Id of notebook to be displayed in home
REM set ZEPPELIN_NOTEBOOK_HOMESCREEN_HIDE REM hide homescreen notebook from list when this value set to "true". default "false"
REM set ZEPPELIN_NOTEBOOK_S3_BUCKET REM Bucket where notebook saved
REM set ZEPPELIN_NOTEBOOK_S3_USER REM User in bucket where notebook saved. For example bucket/user/notebook/2A94M5J1Z/note.json
REM set ZEPPELIN_NOTEBOOK_S3_ENDPOINT REM Endpoint of the bucket
REM set ZEPPELIN_NOTEBOOK_S3_KMS_KEY_ID REM AWS KMS key ID
REM set ZEPPELIN_NOTEBOOK_S3_KMS_KEY_REGION REM AWS KMS key region
REM set ZEPPELIN_IDENT_STRING REM A string representing this instance of zeppelin. $USER by default.
REM set ZEPPELIN_NICENESS REM The scheduling priority for daemons. Defaults to 0.
REM set ZEPPELIN_INTERPRETER_LOCALREPO REM Local repository for interpreter's additional dependency loading

View file

@ -33,6 +33,8 @@
# export ZEPPELIN_NOTEBOOK_S3_BUCKET # Bucket where notebook saved
# export ZEPPELIN_NOTEBOOK_S3_ENDPOINT # Endpoint of the bucket
# export ZEPPELIN_NOTEBOOK_S3_USER # User in bucket where notebook saved. For example bucket/user/notebook/2A94M5J1Z/note.json
# export ZEPPELIN_NOTEBOOK_S3_KMS_KEY_ID # AWS KMS key ID
# export ZEPPELIN_NOTEBOOK_S3_KMS_KEY_REGION # AWS KMS key region
# export ZEPPELIN_IDENT_STRING # A string representing this instance of zeppelin. $USER by default.
# export ZEPPELIN_NICENESS # The scheduling priority for daemons. Defaults to 0.
# export ZEPPELIN_INTERPRETER_LOCALREPO # Local repository for interpreter's additional dependency loading
@ -80,4 +82,7 @@
# export ZEPPELINHUB_API_ADDRESS # Refers to the address of the ZeppelinHub service in use
# export ZEPPELINHUB_API_TOKEN # Refers to the Zeppelin instance token of the user
# export ZEPPELINHUB_USER_KEY # Optional, when using Zeppelin with authentication.
#### Zeppelin impersonation configuration
# export ZEPPELIN_IMPERSONATE_CMD # Optional, when user want to run interpreter as end web user. eg) 'sudo -H -u ${ZEPPELIN_IMPERSONATE_USER} bash -c '
# export ZEPPELIN_IMPERSONATE_SPARK_PROXY_USER #Optional, by default is true; can be set to false if you don't want to use --proxy-user option with Spark interpreter when impersonation enabled

View file

@ -108,6 +108,16 @@
</property>
-->
<!-- provide region of your KMS key -->
<!-- See http://docs.aws.amazon.com/general/latest/gr/rande.html#kms_region for region codes names -->
<!--
<property>
<name>zeppelin.notebook.s3.kmsKeyRegion</name>
<value>us-east-1</value>
<description>AWS KMS key region in your AWS account</description>
</property>
-->
<!-- Use a custom encryption materials provider to encrypt data -->
<!-- No configuration is given to the provider, so you must use system properties or another means to configure -->
<!-- See https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/services/s3/model/EncryptionMaterialsProvider.html -->
@ -147,11 +157,11 @@
</property>
-->
<!-- For versioning your local notebook storage using Git repository
<!-- Notebook storage layer using local file system
<property>
<name>zeppelin.notebook.storage</name>
<value>org.apache.zeppelin.notebook.repo.GitNotebookRepo</value>
<description>notebook persistence layer implementation</description>
<value>org.apache.zeppelin.notebook.repo.VFSNotebookRepo</value>
<description>local notebook persistence layer implementation</description>
</property>
-->
@ -159,15 +169,15 @@
<!--
<property>
<name>zeppelin.notebook.storage</name>
<value>org.apache.zeppelin.notebook.repo.VFSNotebookRepo, org.apache.zeppelin.notebook.repo.zeppelinhub.ZeppelinHubRepo</value>
<description>two notebook persistence layers (local + ZeppelinHub)</description>
<value>org.apache.zeppelin.notebook.repo.GitNotebookRepo, org.apache.zeppelin.notebook.repo.zeppelinhub.ZeppelinHubRepo</value>
<description>two notebook persistence layers (versioned local + ZeppelinHub)</description>
</property>
-->
<property>
<name>zeppelin.notebook.storage</name>
<value>org.apache.zeppelin.notebook.repo.VFSNotebookRepo</value>
<description>notebook persistence layer implementation</description>
<value>org.apache.zeppelin.notebook.repo.GitNotebookRepo</value>
<description>versioned notebook persistence layer implementation</description>
</property>
<property>
@ -190,7 +200,7 @@
<property>
<name>zeppelin.interpreters</name>
<value>org.apache.zeppelin.spark.SparkInterpreter,org.apache.zeppelin.spark.PySparkInterpreter,org.apache.zeppelin.rinterpreter.RRepl,org.apache.zeppelin.rinterpreter.KnitR,org.apache.zeppelin.spark.SparkRInterpreter,org.apache.zeppelin.spark.SparkSqlInterpreter,org.apache.zeppelin.spark.DepInterpreter,org.apache.zeppelin.markdown.Markdown,org.apache.zeppelin.angular.AngularInterpreter,org.apache.zeppelin.shell.ShellInterpreter,org.apache.zeppelin.file.HDFSFileInterpreter,org.apache.zeppelin.flink.FlinkInterpreter,,org.apache.zeppelin.python.PythonInterpreter,org.apache.zeppelin.python.PythonInterpreterPandasSql,org.apache.zeppelin.python.PythonCondaInterpreter,org.apache.zeppelin.python.PythonDockerInterpreter,org.apache.zeppelin.lens.LensInterpreter,org.apache.zeppelin.ignite.IgniteInterpreter,org.apache.zeppelin.ignite.IgniteSqlInterpreter,org.apache.zeppelin.cassandra.CassandraInterpreter,org.apache.zeppelin.geode.GeodeOqlInterpreter,org.apache.zeppelin.postgresql.PostgreSqlInterpreter,org.apache.zeppelin.jdbc.JDBCInterpreter,org.apache.zeppelin.kylin.KylinInterpreter,org.apache.zeppelin.elasticsearch.ElasticsearchInterpreter,org.apache.zeppelin.scalding.ScaldingInterpreter,org.apache.zeppelin.alluxio.AlluxioInterpreter,org.apache.zeppelin.hbase.HbaseInterpreter,org.apache.zeppelin.livy.LivySparkInterpreter,org.apache.zeppelin.livy.LivyPySparkInterpreter,org.apache.zeppelin.livy.LivySparkRInterpreter,org.apache.zeppelin.livy.LivySparkSQLInterpreter,org.apache.zeppelin.bigquery.BigQueryInterpreter,org.apache.zeppelin.beam.BeamInterpreter,org.apache.zeppelin.pig.PigInterpreter,org.apache.zeppelin.pig.PigQueryInterpreter,org.apache.zeppelin.scio.ScioInterpreter</value>
<value>org.apache.zeppelin.spark.SparkInterpreter,org.apache.zeppelin.spark.PySparkInterpreter,org.apache.zeppelin.rinterpreter.RRepl,org.apache.zeppelin.rinterpreter.KnitR,org.apache.zeppelin.spark.SparkRInterpreter,org.apache.zeppelin.spark.SparkSqlInterpreter,org.apache.zeppelin.spark.DepInterpreter,org.apache.zeppelin.markdown.Markdown,org.apache.zeppelin.angular.AngularInterpreter,org.apache.zeppelin.shell.ShellInterpreter,org.apache.zeppelin.file.HDFSFileInterpreter,org.apache.zeppelin.flink.FlinkInterpreter,,org.apache.zeppelin.python.PythonInterpreter,org.apache.zeppelin.python.PythonInterpreterPandasSql,org.apache.zeppelin.python.PythonCondaInterpreter,org.apache.zeppelin.python.PythonDockerInterpreter,org.apache.zeppelin.lens.LensInterpreter,org.apache.zeppelin.ignite.IgniteInterpreter,org.apache.zeppelin.ignite.IgniteSqlInterpreter,org.apache.zeppelin.cassandra.CassandraInterpreter,org.apache.zeppelin.geode.GeodeOqlInterpreter,org.apache.zeppelin.postgresql.PostgreSqlInterpreter,org.apache.zeppelin.jdbc.JDBCInterpreter,org.apache.zeppelin.kylin.KylinInterpreter,org.apache.zeppelin.elasticsearch.ElasticsearchInterpreter,org.apache.zeppelin.scalding.ScaldingInterpreter,org.apache.zeppelin.alluxio.AlluxioInterpreter,org.apache.zeppelin.hbase.HbaseInterpreter,org.apache.zeppelin.livy.LivySparkInterpreter,org.apache.zeppelin.livy.LivyPySparkInterpreter,org.apache.zeppelin.livy.LivyPySpark3Interpreter,org.apache.zeppelin.livy.LivySparkRInterpreter,org.apache.zeppelin.livy.LivySparkSQLInterpreter,org.apache.zeppelin.bigquery.BigQueryInterpreter,org.apache.zeppelin.beam.BeamInterpreter,org.apache.zeppelin.pig.PigInterpreter,org.apache.zeppelin.pig.PigQueryInterpreter,org.apache.zeppelin.scio.ScioInterpreter</value>
<description>Comma separated interpreter configurations. First interpreter become a default</description>
</property>

View file

@ -42,6 +42,7 @@ done
RELEASE_VERSION="$1"
GIT_TAG="$2"
SCALA_VERSION="2.11"
function build_docker_base() {
# build base image
@ -80,7 +81,7 @@ function make_binary_release() {
cp -r "${WORKING_DIR}/zeppelin" "${WORKING_DIR}/zeppelin-${RELEASE_VERSION}-bin-${BIN_RELEASE_NAME}"
cd "${WORKING_DIR}/zeppelin-${RELEASE_VERSION}-bin-${BIN_RELEASE_NAME}"
./dev/change_scala_version.sh 2.11
./dev/change_scala_version.sh "${SCALA_VERSION}"
echo "mvn clean package -Pbuild-distr -DskipTests ${BUILD_FLAGS}"
mvn clean package -Pbuild-distr -DskipTests ${BUILD_FLAGS}
if [[ $? -ne 0 ]]; then
@ -123,8 +124,8 @@ function make_binary_release() {
build_docker_base
git_clone
make_source_package
make_binary_release all "-Pspark-2.0 -Phadoop-2.4 -Pyarn -Ppyspark -Psparkr -Pr -Pscala-2.11"
make_binary_release netinst "-Pspark-2.0 -Phadoop-2.4 -Pyarn -Ppyspark -Psparkr -Pr -Pscala-2.11 -pl !alluxio,!angular,!cassandra,!elasticsearch,!file,!flink,!hbase,!ignite,!jdbc,!kylin,!lens,!livy,!markdown,!postgresql,!python,!shell,!bigquery"
make_binary_release all "-Pspark-2.0 -Phadoop-2.6 -Pyarn -Ppyspark -Psparkr -Pscala-${SCALA_VERSION}"
make_binary_release netinst "-Pspark-2.0 -Phadoop-2.6 -Pyarn -Ppyspark -Psparkr -Pscala-${SCALA_VERSION} -pl zeppelin-interpreter,zeppelin-zengine,:zeppelin-display_${SCALA_VERSION},:zeppelin-spark-dependencies_${SCALA_VERSION},:zeppelin-spark_${SCALA_VERSION},zeppelin-web,zeppelin-server,zeppelin-distribution -am"
# remove non release files and dirs
rm -rf "${WORKING_DIR}/zeppelin"

View file

@ -44,7 +44,7 @@ NC='\033[0m' # No Color
RELEASE_VERSION="$1"
GIT_TAG="$2"
PUBLISH_PROFILES="-Ppublish-distr -Pspark-2.0 -Phadoop-2.4 -Pyarn -Ppyspark -Psparkr -Pr"
PUBLISH_PROFILES="-Ppublish-distr -Pspark-2.0 -Phadoop-2.6 -Pyarn -Ppyspark -Psparkr -Pr"
PROJECT_OPTIONS="-pl !zeppelin-distribution"
NEXUS_STAGING="https://repository.apache.org/service/local/staging"
NEXUS_PROFILE="153446d1ac37c4"

View file

@ -22,7 +22,7 @@
<li role="separator" class="divider"></li>
<li class="title"><span><b>Getting Started</b><span></li>
<li><a href="{{BASE_PATH}}/install/install.html">Install</a></li>
<li><a href="{{BASE_PATH}}/install/install.html#apache-zeppelin-configuration">Configuration</a></li>
<li><a href="{{BASE_PATH}}/install/configuration.html">Configuration</a></li>
<li><a href="{{BASE_PATH}}/quickstart/explorezeppelinui.html">Explore Zeppelin UI</a></li>
<li><a href="{{BASE_PATH}}/quickstart/tutorial.html">Tutorial</a></li>
<li role="separator" class="divider"></li>
@ -47,6 +47,7 @@
<!--<li><a href="{{BASE_PATH}}/manual/dynamicinterpreterload.html">Dynamic Interpreter Loading</a></li>-->
<li><a href="{{BASE_PATH}}/manual/dependencymanagement.html">Interpreter Dependency Management</a></li>
<li><a href="{{BASE_PATH}}/manual/userimpersonation.html">Interpreter User Impersonation</a></li>
<li><a href="{{BASE_PATH}}/manual/interpreterexechooks.html">Interpreter Execution Hooks (Experimental)</a></li>
<li role="separator" class="divider"></li>
<li class="title"><span><b>Available Interpreters</b><span></li>
<li><a href="{{BASE_PATH}}/interpreter/alluxio.html">Alluxio</a></li>
@ -102,6 +103,7 @@
<li><a href="{{BASE_PATH}}/rest-api/rest-notebook.html">Notebook API</a></li>
<li><a href="{{BASE_PATH}}/rest-api/rest-configuration.html">Configuration API</a></li>
<li><a href="{{BASE_PATH}}/rest-api/rest-credential.html">Credential API</a></li>
<li><a href="{{BASE_PATH}}/rest-api/rest-helium.html">Helium API</a></li>
<li role="separator" class="divider"></li>
<li class="title"><span><b>Security</b><span></li>
<li><a href="{{BASE_PATH}}/security/shiroauthentication.html">Shiro Authentication</a></li>
@ -117,7 +119,8 @@
<li role="separator" class="divider"></li>
<li class="title"><span><b>Contibute</b><span></li>
<li><a href="{{BASE_PATH}}/development/writingzeppelininterpreter.html">Writing Zeppelin Interpreter</a></li>
<li><a href="{{BASE_PATH}}/development/writingzeppelinapplication.html">Writing Zeppelin Application (Experimental)</a></li>
<li><a href="{{BASE_PATH}}/development/writingzeppelinvisualization.html">Writing Zeppelin Visualization (Experimental)</a></li>
<li><a href="{{BASE_PATH}}/development/writingzeppelinapplication.html">Writing Zeppelin Application (Experimental)</a></li>
<li><a href="{{BASE_PATH}}/development/howtocontribute.html">How to contribute (code)</a></li>
<li><a href="{{BASE_PATH}}/development/howtocontributewebsite.html">How to contribute (website)</a></li>
</ul>

Binary file not shown.

After

Width:  |  Height:  |  Size: 79 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 173 KiB

View file

@ -60,7 +60,7 @@ You can check example applications under [./zeppelin-examples](https://github.co
In the development mode, you can run your Application in your IDE as a normal java application and see the result inside of Zeppelin notebook.
`org.apache.zeppelin.interpreter.dev.ZeppelinApplicationDevServer` can run Zeppelin Application in development mode.
`org.apache.zeppelin.helium.ZeppelinApplicationDevServer` can run Zeppelin Application in development mode.
```java
@ -73,7 +73,7 @@ public static void main(String[] args) throws Exception {
// run application in devlopment mode with given resource
// in this case, Clock.class.getName() will be the application class name
ZeppelinApplicationDevServer devServer = new ZeppelinApplicationDevServer(
org.apache.zeppelin.helium.ZeppelinApplicationDevServer devServer = new org.apache.zeppelin.helium.ZeppelinApplicationDevServer(
Clock.class.getName(), pool.getAll());
// start development mode

View file

@ -0,0 +1,212 @@
---
layout: page
title: "Writing a new Visualization(Experimental)"
description: "Apache Zeppelin Application is a package that runs on Interpreter process and displays it's output inside of the notebook. Make your own Application in Apache Zeppelin is quite easy."
group: development
---
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
{% include JB/setup %}
# Writing a new Visualization (Experimental)
<div id="toc"></div>
## What is Apache Zeppelin Visualization
Apache Zeppelin Visualization is a pluggable package that can be loaded/unloaded on runtime through Helium framework in Zeppelin. A Visualization is a javascript npm package and user can use them just like any other built-in visualization in notebook.
## How it works
#### 1. Load Helium package files from registry
Zeppelin needs to know what Visualization packages are available. Zeppelin searches _Helium package file_ from local registry (by default helium/ directory) by default.
_Helium package file_ provides informations like name, artifact, and so on. It's similar to _package.json_ in npm package.
Here's an example `helium/zeppelin-example-horizontalbar.json`
```
{
"type" : "VISUALIZATION",
"name" : "zeppelin_horizontalbar",
"description" : "Horizontal Bar chart (example)",
"artifact" : "./zeppelin-examples/zeppelin-example-horizontalbar",
"license" : "Apache-2.0",
"icon" : "<i class='fa fa-bar-chart rotate90flipX'></i>"
}
```
Check [Create helium package file](#3-create-helium-package-file) section to learn about it.
#### 2. Enable packages
Once Zeppelin loads _Helium package files_ from local registry, available packages are displayed in Helium menu.
Click 'enable' button.
<img class="img-responsive" style="width:70%" src="../assets/themes/zeppelin/img/docs-img/writing_visualization_helium_menu.png" />
#### 3. Create and load visualization bundle on the fly
Once a Visualization package is enabled, [HeliumVisualizationFactory](https://github.com/apache/zeppelin/blob/master/zeppelin-zengine/src/main/java/org/apache/zeppelin/helium/HeliumVisualizationPackage.java) creates a js bundle. The js bundle is served by `helium/visualization/load` rest api endpoint.
#### 4. Run visualization
Zeppelin shows additional button for loaded Visualizations.
User can use just like any other built-in visualizations.
<img class="img-responsive" style="width:70%" src="../assets/themes/zeppelin/img/docs-img/writing_visualization_example.png" />
## Write new Visualization
#### 1. Create a npm package
Create a [package.json](https://docs.npmjs.com/files/package.json) in your new Visualization directory. Normally, you can add any dependencies in package.json however Zeppelin Visualization package only allows two dependencies: [zeppelin-vis](https://github.com/apache/zeppelin/tree/master/zeppelin-web/src/app/visualization) and [zeppelin-tabledata](https://github.com/apache/zeppelin/tree/master/zeppelin-web/src/app/tabledata).
Here's an example
```
{
"name": "zeppelin_horizontalbar",
"description" : "Horizontal Bar chart",
"version": "1.0.0",
"main": "horizontalbar",
"author": "",
"license": "Apache-2.0",
"dependencies": {
"zeppelin-tabledata": "*",
"zeppelin-vis": "*"
}
}
```
#### 2. Create your own visualization
To create your own visualization, you need to create a js file and import [Visualization](https://github.com/apache/zeppelin/blob/master/zeppelin-web/src/app/visualization/visualization.js) class from [zeppelin-vis](https://github.com/apache/zeppelin/tree/master/zeppelin-web/src/app/visualization) package and extend the class. [zeppelin-tabledata](https://github.com/apache/zeppelin/tree/master/zeppelin-web/src/app/tabledata) package provides some useful transformations, like pivot, you can use in your visualization. (you can create your own transformation, too).
[Visualization](https://github.com/apache/zeppelin/blob/master/zeppelin-web/src/app/visualization/visualization.js) class, there're several methods that you need to override and implement. Here's simple visualization that just prints `Hello world`.
```
import Visualization from 'zeppelin-vis'
import PassthroughTransformation from 'zeppelin-tabledata/passthrough'
export default class helloworld extends Visualization {
constructor(targetEl, config) {
super(targetEl, config)
this.passthrough = new PassthroughTransformation(config);
}
render(tableData) {
this.targetEl.html('Hello world!')
}
getTransformation() {
return this.passthrough
}
}
```
To learn more about `Visualization` class, check [visualization.js](https://github.com/apache/zeppelin/blob/master/zeppelin-web/src/app/visualization/visualization.js).
You can check complete visualization package example [here](https://github.com/apache/zeppelin/tree/master/zeppelin-examples/zeppelin-example-horizontalbar).
Zeppelin's built-in visualization uses the same API, so you can check [built-in visualizations](https://github.com/apache/zeppelin/tree/master/zeppelin-web/src/app/visualization/builtins) as additional examples.
#### 3. Create __Helium package file__
__Helium Package file__ is a json file that provides information about the application.
Json file contains the following information
```
{
"type" : "VISUALIZATION",
"name" : "zeppelin_horizontalbar",
"description" : "Horizontal Bar chart (example)",
"license" : "Apache-2.0",
"artifact" : "./zeppelin-examples/zeppelin-example-horizontalbar",
"icon" : "<i class='fa fa-bar-chart rotate90flipX'></i>"
}
```
##### type
When you're creating a visualization, 'type' should be 'VISUALIZATION'.
Check [application](./writingzeppelinapplication.html) type if you're interested in the other types of package.
##### name
Name of visualization. Should be unique. Allows `[A-Za-z90-9_]`.
##### description
A short description about visualization.
##### artifact
Location of the visualization npm package. Support npm package with version or local filesystem path.
e.g.
When artifact exists in npm repository
```
artifact: "my-visualiztion@1.0.0"
```
When artifact exists in local file system
```
artifact: "/path/to/my/visualization"
```
##### license
License information.
e.g.
```
license: "Apache-2.0"
```
##### icon
Icon to be used in visualization select button. String in this field will be rendered as a HTML tag.
e.g.
```
icon: "<i class='fa fa-coffee'></i>"
```
#### 4. Run in dev mode
Place your __Helium package file__ in local registry (ZEPPELIN_HOME/helium).
Run Zeppelin. And then run zeppelin-web in visualization dev mode.
```
cd zeppelin-web
yarn run visdev
```
You can browse localhost:9000. Everytime refresh your browser, Zeppelin will rebuild your visualization and reload changes.

View file

@ -125,7 +125,7 @@ Join to our [Mailing list](https://zeppelin.apache.org/community.html) and repor
* Getting Started
* [Quick Start](./install/install.html) for basic instructions on installing Apache Zeppelin
* [Configuration](./install/install.html#apache-zeppelin-configuration) lists for Apache Zeppelin
* [Configuration](./install/configuration.html) lists for Apache Zeppelin
* [Explore Apache Zeppelin UI](./quickstart/explorezeppelinui.html): basic components of Apache Zeppelin home
* [Tutorial](./quickstart/tutorial.html): a short walk-through tutorial that uses Apache Spark backend
* Basic Feature Guide
@ -143,6 +143,7 @@ Join to our [Mailing list](https://zeppelin.apache.org/community.html) and repor
* [Interpreter Installation](./manual/interpreterinstallation.html): Install not only community managed interpreters but also 3rd party interpreters
* [Interpreter Dependency Management](./manual/dependencymanagement.html) when you include external libraries to interpreter
* [Interpreter User Impersonation](./manual/userimpersonation.html) when you want to run interpreter as end user
* [Interpreter Execution Hooks](./manual/interpreterexechooks.html) to specify additional code to be executed by an interpreter at pre and post-paragraph code execution
* Available Interpreters: currently, about 20 interpreters are available in Apache Zeppelin.
####Display System

View file

@ -205,7 +205,7 @@ mvn clean package -Pspark-1.5 -Pmapr50 -DskipTests
Ignite Interpreter
```bash
mvn clean package -Dignite.version=1.6.0 -DskipTests
mvn clean package -Dignite.version=1.8.0 -DskipTests
```
Scalding Interpreter

View file

@ -0,0 +1,388 @@
---
layout: page
title: "Apache Zeppelin Configuration"
description: "This page will guide you to configure Apache Zeppelin using either environment variables or Java properties. Also, you can configure SSL for Zeppelin."
group: install
---
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
{% include JB/setup %}
# Apache Zeppelin Configuration
<div id="toc"></div>
## Zeppelin Properties
There are two locations you can configure Apache Zeppelin.
* **Environment variables** can be defined `conf/zeppelin-env.sh`(`conf\zeppelin-env.cmd` for Windows).
* **Java properties** can ba defined in `conf/zeppelin-site.xml`.
If both are defined, then the **environment variables** will take priority.
<table class="table-configuration">
<tr>
<th>zeppelin-env.sh</th>
<th>zeppelin-site.xml</th>
<th>Default value</th>
<th class="col-md-4">Description</th>
</tr>
<tr>
<td>ZEPPELIN_PORT</td>
<td>zeppelin.server.port</td>
<td>8080</td>
<td>Zeppelin server port</td>
</tr>
<tr>
<td>ZEPPELIN_SSL_PORT</td>
<td>zeppelin.server.ssl.port</td>
<td>8443</td>
<td>Zeppelin Server ssl port (used when ssl environment/property is set to true)</td>
</tr>
<tr>
<td>ZEPPELIN_MEM</td>
<td>N/A</td>
<td>-Xmx1024m -XX:MaxPermSize=512m</td>
<td>JVM mem options</td>
</tr>
<tr>
<td>ZEPPELIN_INTP_MEM</td>
<td>N/A</td>
<td>ZEPPELIN_MEM</td>
<td>JVM mem options for interpreter process</td>
</tr>
<tr>
<td>ZEPPELIN_JAVA_OPTS</td>
<td>N/A</td>
<td></td>
<td>JVM options</td>
</tr>
<tr>
<td>ZEPPELIN_ALLOWED_ORIGINS</td>
<td>zeppelin.server.allowed.origins</td>
<td>*</td>
<td>Enables a way to specify a ',' separated list of allowed origins for REST and websockets. <br /> e.g. http://localhost:8080 </td>
</tr>
<tr>
<td>N/A</td>
<td>zeppelin.anonymous.allowed</td>
<td>true</td>
<td>The anonymous user is allowed by default.</td>
</tr>
<tr>
<td>ZEPPELIN_SERVER_CONTEXT_PATH</td>
<td>zeppelin.server.context.path</td>
<td>/</td>
<td>Context path of the web application</td>
</tr>
<tr>
<td>ZEPPELIN_SSL</td>
<td>zeppelin.ssl</td>
<td>false</td>
<td></td>
</tr>
<tr>
<td>ZEPPELIN_SSL_CLIENT_AUTH</td>
<td>zeppelin.ssl.client.auth</td>
<td>false</td>
<td></td>
</tr>
<tr>
<td>ZEPPELIN_SSL_KEYSTORE_PATH</td>
<td>zeppelin.ssl.keystore.path</td>
<td>keystore</td>
<td></td>
</tr>
<tr>
<td>ZEPPELIN_SSL_KEYSTORE_TYPE</td>
<td>zeppelin.ssl.keystore.type</td>
<td>JKS</td>
<td></td>
</tr>
<tr>
<td>ZEPPELIN_SSL_KEYSTORE_PASSWORD</td>
<td>zeppelin.ssl.keystore.password</td>
<td></td>
<td></td>
</tr>
<tr>
<td>ZEPPELIN_SSL_KEY_MANAGER_PASSWORD</td>
<td>zeppelin.ssl.key.manager.password</td>
<td></td>
<td></td>
</tr>
<tr>
<td>ZEPPELIN_SSL_TRUSTSTORE_PATH</td>
<td>zeppelin.ssl.truststore.path</td>
<td></td>
<td></td>
</tr>
<tr>
<td>ZEPPELIN_SSL_TRUSTSTORE_TYPE</td>
<td>zeppelin.ssl.truststore.type</td>
<td></td>
<td></td>
</tr>
<tr>
<td>ZEPPELIN_SSL_TRUSTSTORE_PASSWORD</td>
<td>zeppelin.ssl.truststore.password</td>
<td></td>
<td></td>
</tr>
<tr>
<td>ZEPPELIN_NOTEBOOK_HOMESCREEN</td>
<td>zeppelin.notebook.homescreen</td>
<td></td>
<td>Display note IDs on the Apache Zeppelin homescreen <br />e.g. 2A94M5J1Z</td>
</tr>
<tr>
<td>ZEPPELIN_NOTEBOOK_HOMESCREEN_HIDE</td>
<td>zeppelin.notebook.homescreen.hide</td>
<td>false</td>
<td>Hide the note ID set by <code>ZEPPELIN_NOTEBOOK_HOMESCREEN</code> on the Apache Zeppelin homescreen. <br />For the further information, please read <a href="../manual/notebookashomepage.html">Customize your Zeppelin homepage</a>.</td>
</tr>
<tr>
<td>ZEPPELIN_WAR_TEMPDIR</td>
<td>zeppelin.war.tempdir</td>
<td>webapps</td>
<td>Location of the jetty temporary directory</td>
</tr>
<tr>
<td>ZEPPELIN_NOTEBOOK_DIR</td>
<td>zeppelin.notebook.dir</td>
<td>notebook</td>
<td>The root directory where notebook directories are saved</td>
</tr>
<tr>
<td>ZEPPELIN_NOTEBOOK_S3_BUCKET</td>
<td>zeppelin.notebook.s3.bucket</td>
<td>zeppelin</td>
<td>S3 Bucket where notebook files will be saved</td>
</tr>
<tr>
<td>ZEPPELIN_NOTEBOOK_S3_USER</td>
<td>zeppelin.notebook.s3.user</td>
<td>user</td>
<td>User name of an S3 bucket<br />e.g. <code>bucket/user/notebook/2A94M5J1Z/note.json</code></td>
</tr>
<tr>
<td>ZEPPELIN_NOTEBOOK_S3_ENDPOINT</td>
<td>zeppelin.notebook.s3.endpoint</td>
<td>s3.amazonaws.com</td>
<td>Endpoint for the bucket</td>
</tr>
<tr>
<td>ZEPPELIN_NOTEBOOK_S3_KMS_KEY_ID</td>
<td>zeppelin.notebook.s3.kmsKeyID</td>
<td></td>
<td>AWS KMS Key ID to use for encrypting data in S3 (optional)</td>
</tr>
<tr>
<td>ZEPPELIN_NOTEBOOK_S3_EMP</td>
<td>zeppelin.notebook.s3.encryptionMaterialsProvider</td>
<td></td>
<td>Class name of a custom S3 encryption materials provider implementation to use for encrypting data in S3 (optional)</td>
</tr>
<tr>
<td>ZEPPELIN_NOTEBOOK_AZURE_CONNECTION_STRING</td>
<td>zeppelin.notebook.azure.connectionString</td>
<td></td>
<td>The Azure storage account connection string<br />e.g. <br/><code>DefaultEndpointsProtocol=https;<br/>AccountName=&lt;accountName&gt;;<br/>AccountKey=&lt;accountKey&gt;</code></td>
</tr>
<tr>
<td>ZEPPELIN_NOTEBOOK_AZURE_SHARE</td>
<td>zeppelin.notebook.azure.share</td>
<td>zeppelin</td>
<td>Azure Share where the notebook files will be saved</td>
</tr>
<tr>
<td>ZEPPELIN_NOTEBOOK_AZURE_USER</td>
<td>zeppelin.notebook.azure.user</td>
<td>user</td>
<td>Optional user name of an Azure file share<br />e.g. <code>share/user/notebook/2A94M5J1Z/note.json</code></td>
</tr>
<tr>
<td>ZEPPELIN_NOTEBOOK_STORAGE</td>
<td>zeppelin.notebook.storage</td>
<td>org.apache.zeppelin.notebook.repo.GitNotebookRepo</td>
<td>Comma separated list of notebook storage locations</td>
</tr>
<tr>
<td>ZEPPELIN_NOTEBOOK_ONE_WAY_SYNC</td>
<td>zeppelin.notebook.one.way.sync</td>
<td>false</td>
<td>If there are multiple notebook storage locations, should we treat the first one as the only source of truth?</td>
</tr>
<tr>
<td>ZEPPELIN_NOTEBOOK_PUBLIC</td>
<td>zeppelin.notebook.public</td>
<td>true</td>
<td>Make notebook public (set only <code>owners</code>) by default when created/imported. If set to <code>false</code> will add <code>user</code> to <code>readers</code> and <code>writers</code> as well, making it private and invisible to other users unless permissions are granted.</td>
</tr>
<tr>
<td>ZEPPELIN_INTERPRETERS</td>
<td>zeppelin.interpreters</td>
<description></description>
<td>org.apache.zeppelin.spark.SparkInterpreter,<br />org.apache.zeppelin.spark.PySparkInterpreter,<br />org.apache.zeppelin.spark.SparkSqlInterpreter,<br />org.apache.zeppelin.spark.DepInterpreter,<br />org.apache.zeppelin.markdown.Markdown,<br />org.apache.zeppelin.shell.ShellInterpreter,<br />
...
</td>
<td>
Comma separated interpreter configurations [Class] <br/><br />
<span style="font-style:italic; color: gray">NOTE: This property is deprecated since Zeppelin-0.6.0 and will not be supported from Zeppelin-0.7.0.</span>
</td>
</tr>
<tr>
<td>ZEPPELIN_INTERPRETER_DIR</td>
<td>zeppelin.interpreter.dir</td>
<td>interpreter</td>
<td>Interpreter directory</td>
</tr>
<tr>
<td>ZEPPELIN_WEBSOCKET_MAX_TEXT_MESSAGE_SIZE</td>
<td>zeppelin.websocket.max.text.message.size</td>
<td>1024000</td>
<td>Size (in characters) of the maximum text message that can be received by websocket.</td>
</tr>
</table>
## SSL Configuration
Enabling SSL requires a few configuration changes. First, you need to create certificates and then update necessary configurations to enable server side SSL and/or client side certificate authentication.
### Creating and configuring the Certificates
Information how about to generate certificates and a keystore can be found [here](https://wiki.eclipse.org/Jetty/Howto/Configure_SSL).
A condensed example can be found in the top answer to this [StackOverflow post](http://stackoverflow.com/questions/4008837/configure-ssl-on-jetty).
The keystore holds the private key and certificate on the server end. The trustore holds the trusted client certificates. Be sure that the path and password for these two stores are correctly configured in the password fields below. They can be obfuscated using the Jetty password tool. After Maven pulls in all the dependency to build Zeppelin, one of the Jetty jars contain the Password tool. Invoke this command from the Zeppelin home build directory with the appropriate version, user, and password.
```
java -cp ./zeppelin-server/target/lib/jetty-all-server-<version>.jar org.eclipse.jetty.util.security.Password <user> <password>
```
If you are using a self-signed, a certificate signed by an untrusted CA, or if client authentication is enabled, then the client must have a browser create exceptions for both the normal HTTPS port and WebSocket port. This can by done by trying to establish an HTTPS connection to both ports in a browser (e.g. if the ports are 443 and 8443, then visit https://127.0.0.1:443 and https://127.0.0.1:8443). This step can be skipped if the server certificate is signed by a trusted CA and client auth is disabled.
### Configuring server side SSL
The following properties needs to be updated in the `zeppelin-site.xml` in order to enable server side SSL.
```
<property>
<name>zeppelin.server.ssl.port</name>
<value>8443</value>
<description>Server ssl port. (used when ssl property is set to true)</description>
</property>
<property>
<name>zeppelin.ssl</name>
<value>true</value>
<description>Should SSL be used by the servers?</description>
</property>
<property>
<name>zeppelin.ssl.keystore.path</name>
<value>keystore</value>
<description>Path to keystore relative to Zeppelin configuration directory</description>
</property>
<property>
<name>zeppelin.ssl.keystore.type</name>
<value>JKS</value>
<description>The format of the given keystore (e.g. JKS or PKCS12)</description>
</property>
<property>
<name>zeppelin.ssl.keystore.password</name>
<value>change me</value>
<description>Keystore password. Can be obfuscated by the Jetty Password tool</description>
</property>
<property>
<name>zeppelin.ssl.key.manager.password</name>
<value>change me</value>
<description>Key Manager password. Defaults to keystore password. Can be obfuscated.</description>
</property>
```
### Enabling client side certificate authentication
The following properties needs to be updated in the `zeppelin-site.xml` in order to enable client side certificate authentication.
```
<property>
<name>zeppelin.server.ssl.port</name>
<value>8443</value>
<description>Server ssl port. (used when ssl property is set to true)</description>
</property>
<property>
<name>zeppelin.ssl.client.auth</name>
<value>true</value>
<description>Should client authentication be used for SSL connections?</description>
</property>
<property>
<name>zeppelin.ssl.truststore.path</name>
<value>truststore</value>
<description>Path to truststore relative to Zeppelin configuration directory. Defaults to the keystore path</description>
</property>
<property>
<name>zeppelin.ssl.truststore.type</name>
<value>JKS</value>
<description>The format of the given truststore (e.g. JKS or PKCS12). Defaults to the same type as the keystore type</description>
</property>
<property>
<name>zeppelin.ssl.truststore.password</name>
<value>change me</value>
<description>Truststore password. Can be obfuscated by the Jetty Password tool. Defaults to the keystore password</description>
</property>
```
### Obfuscating Passwords using the Jetty Password Tool
Security best practices advise to not use plain text passwords and Jetty provides a password tool to help obfuscating the passwords used to access the KeyStore and TrustStore.
The Password tool documentation can be found [here](http://www.eclipse.org/jetty/documentation/current/configuring-security-secure-passwords.html).
After using the tool:
```
java -cp $ZEPPELIN_HOME/zeppelin-server/target/lib/jetty-util-9.2.15.v20160210.jar \
org.eclipse.jetty.util.security.Password \
password
2016-12-15 10:46:47.931:INFO::main: Logging initialized @101ms
password
OBF:1v2j1uum1xtv1zej1zer1xtn1uvk1v1v
MD5:5f4dcc3b5aa765d61d8327deb882cf99
```
update your configuration with the obfuscated password :
```
<property>
<name>zeppelin.ssl.keystore.password</name>
<value>OBF:1v2j1uum1xtv1zej1zer1xtn1uvk1v1v</value>
<description>Keystore password. Can be obfuscated by the Jetty Password tool</description>
</property>
```
**Note:** After updating these configurations, Zeppelin server needs to be restarted.

View file

@ -1,7 +1,7 @@
---
layout: page
title: "Quick Start"
description: "This page will help you get started and will guide you through installing Apache Zeppelin, running it in the command line and configuring options."
description: "This page will help you get started and will guide you through installing Apache Zeppelin and running it in the command line."
group: install
---
<!--
@ -56,8 +56,9 @@ Two binary packages are available on the [Apache Zeppelin Download Page](http://
Unpack and follow [install additional interpreters](../manual/interpreterinstallation.html) to install interpreters. If you're unsure, just run `./bin/install-interpreter.sh --all` and install all interpreters.
## Starting Apache Zeppelin from the Command Line
#### Starting Apache Zeppelin
## Starting Apache Zeppelin
#### Starting Apache Zeppelin from the Command Line
On all unix like platforms:
@ -79,266 +80,6 @@ After Zeppelin has started successfully, go to [http://localhost:8080](http://lo
bin/zeppelin-daemon.sh stop
```
## Next Steps
Congratulations, you have successfully installed Apache Zeppelin! Here are few steps you might find useful:
#### New to Apache Zeppelin...
* For an in-depth overview, head to [Explore Apache Zeppelin UI](../quickstart/explorezeppelinui.html).
* And then, try run [tutorial](http://localhost:8080/#/notebook/2A94M5J1Z) notebook in your Zeppelin.
* And see how to change [configurations](#apache-zeppelin-configuration) like port number, etc.
#### Zeppelin with Apache Spark ...
* To know more about deep integration with [Apache Spark](http://spark.apache.org/), check [Spark Interpreter](../interpreter/spark.html).
#### Zeppelin with JDBC data sources ...
* Check [JDBC Interpreter](../interpreter/jdbc.html) to know more about configure and uses multiple JDBC data sources.
#### Zeppelin with Python ...
* Check [Python interpreter](../interpreter/python.html) to know more about Matplotlib, Pandas, Conda/Docker environment integration.
#### Multi-user environment ...
* Turn on [authentication](../security/shiroauthentication.html).
* Manage your [notebook permission](../security/notebook_authorization.html).
* For more informations, go to **More** -> **Security** section.
#### Other useful informations ...
* Learn how [Display System](../displaysystem/basicdisplaysystem.html) works.
* Use [Service Manager](#start-apache-zeppelin-with-a-service-manager) to start Zeppelin.
* If you're using previous version please see [Upgrade Zeppelin version](./upgrade.html).
## Apache Zeppelin Configuration
You can configure Apache Zeppelin with either **environment variables** in `conf/zeppelin-env.sh` (`conf\zeppelin-env.cmd` for Windows) or **Java properties** in `conf/zeppelin-site.xml`. If both are defined, then the **environment variables** will take priority.
<table class="table-configuration">
<tr>
<th>zeppelin-env.sh</th>
<th>zeppelin-site.xml</th>
<th>Default value</th>
<th class="col-md-4">Description</th>
</tr>
<tr>
<td>ZEPPELIN_PORT</td>
<td>zeppelin.server.port</td>
<td>8080</td>
<td>Zeppelin server port</td>
</tr>
<tr>
<td>ZEPPELIN_SSL_PORT</td>
<td>zeppelin.server.ssl.port</td>
<td>8443</td>
<td>Zeppelin Server ssl port (used when ssl environment/property is set to true)</td>
</tr>
<tr>
<td>ZEPPELIN_MEM</td>
<td>N/A</td>
<td>-Xmx1024m -XX:MaxPermSize=512m</td>
<td>JVM mem options</td>
</tr>
<tr>
<td>ZEPPELIN_INTP_MEM</td>
<td>N/A</td>
<td>ZEPPELIN_MEM</td>
<td>JVM mem options for interpreter process</td>
</tr>
<tr>
<td>ZEPPELIN_JAVA_OPTS</td>
<td>N/A</td>
<td></td>
<td>JVM options</td>
</tr>
<tr>
<td>ZEPPELIN_ALLOWED_ORIGINS</td>
<td>zeppelin.server.allowed.origins</td>
<td>*</td>
<td>Enables a way to specify a ',' separated list of allowed origins for REST and websockets. <br /> i.e. http://localhost:8080 </td>
</tr>
<tr>
<td>N/A</td>
<td>zeppelin.anonymous.allowed</td>
<td>true</td>
<td>The anonymous user is allowed by default.</td>
</tr>
<tr>
<td>ZEPPELIN_SERVER_CONTEXT_PATH</td>
<td>zeppelin.server.context.path</td>
<td>/</td>
<td>Context path of the web application</td>
</tr>
<tr>
<td>ZEPPELIN_SSL</td>
<td>zeppelin.ssl</td>
<td>false</td>
<td></td>
</tr>
<tr>
<td>ZEPPELIN_SSL_CLIENT_AUTH</td>
<td>zeppelin.ssl.client.auth</td>
<td>false</td>
<td></td>
</tr>
<tr>
<td>ZEPPELIN_SSL_KEYSTORE_PATH</td>
<td>zeppelin.ssl.keystore.path</td>
<td>keystore</td>
<td></td>
</tr>
<tr>
<td>ZEPPELIN_SSL_KEYSTORE_TYPE</td>
<td>zeppelin.ssl.keystore.type</td>
<td>JKS</td>
<td></td>
</tr>
<tr>
<td>ZEPPELIN_SSL_KEYSTORE_PASSWORD</td>
<td>zeppelin.ssl.keystore.password</td>
<td></td>
<td></td>
</tr>
<tr>
<td>ZEPPELIN_SSL_KEY_MANAGER_PASSWORD</td>
<td>zeppelin.ssl.key.manager.password</td>
<td></td>
<td></td>
</tr>
<tr>
<td>ZEPPELIN_SSL_TRUSTSTORE_PATH</td>
<td>zeppelin.ssl.truststore.path</td>
<td></td>
<td></td>
</tr>
<tr>
<td>ZEPPELIN_SSL_TRUSTSTORE_TYPE</td>
<td>zeppelin.ssl.truststore.type</td>
<td></td>
<td></td>
</tr>
<tr>
<td>ZEPPELIN_SSL_TRUSTSTORE_PASSWORD</td>
<td>zeppelin.ssl.truststore.password</td>
<td></td>
<td></td>
</tr>
<tr>
<td>ZEPPELIN_NOTEBOOK_HOMESCREEN</td>
<td>zeppelin.notebook.homescreen</td>
<td></td>
<td>Display note IDs on the Apache Zeppelin homescreen <br />i.e. 2A94M5J1Z</td>
</tr>
<tr>
<td>ZEPPELIN_NOTEBOOK_HOMESCREEN_HIDE</td>
<td>zeppelin.notebook.homescreen.hide</td>
<td>false</td>
<td>Hide the note ID set by <code>ZEPPELIN_NOTEBOOK_HOMESCREEN</code> on the Apache Zeppelin homescreen. <br />For the further information, please read <a href="../manual/notebookashomepage.html">Customize your Zeppelin homepage</a>.</td>
</tr>
<tr>
<td>ZEPPELIN_WAR_TEMPDIR</td>
<td>zeppelin.war.tempdir</td>
<td>webapps</td>
<td>Location of the jetty temporary directory</td>
</tr>
<tr>
<td>ZEPPELIN_NOTEBOOK_DIR</td>
<td>zeppelin.notebook.dir</td>
<td>notebook</td>
<td>The root directory where notebook directories are saved</td>
</tr>
<tr>
<td>ZEPPELIN_NOTEBOOK_S3_BUCKET</td>
<td>zeppelin.notebook.s3.bucket</td>
<td>zeppelin</td>
<td>S3 Bucket where notebook files will be saved</td>
</tr>
<tr>
<td>ZEPPELIN_NOTEBOOK_S3_USER</td>
<td>zeppelin.notebook.s3.user</td>
<td>user</td>
<td>User name of an S3 bucket<br />i.e. <code>bucket/user/notebook/2A94M5J1Z/note.json</code></td>
</tr>
<tr>
<td>ZEPPELIN_NOTEBOOK_S3_ENDPOINT</td>
<td>zeppelin.notebook.s3.endpoint</td>
<td>s3.amazonaws.com</td>
<td>Endpoint for the bucket</td>
</tr>
<tr>
<td>ZEPPELIN_NOTEBOOK_S3_KMS_KEY_ID</td>
<td>zeppelin.notebook.s3.kmsKeyID</td>
<td></td>
<td>AWS KMS Key ID to use for encrypting data in S3 (optional)</td>
</tr>
<tr>
<td>ZEPPELIN_NOTEBOOK_S3_EMP</td>
<td>zeppelin.notebook.s3.encryptionMaterialsProvider</td>
<td></td>
<td>Class name of a custom S3 encryption materials provider implementation to use for encrypting data in S3 (optional)</td>
</tr>
<tr>
<td>ZEPPELIN_NOTEBOOK_AZURE_CONNECTION_STRING</td>
<td>zeppelin.notebook.azure.connectionString</td>
<td></td>
<td>The Azure storage account connection string<br />i.e. <br/><code>DefaultEndpointsProtocol=https;<br/>AccountName=&lt;accountName&gt;;<br/>AccountKey=&lt;accountKey&gt;</code></td>
</tr>
<tr>
<td>ZEPPELIN_NOTEBOOK_AZURE_SHARE</td>
<td>zeppelin.notebook.azure.share</td>
<td>zeppelin</td>
<td>Azure Share where the notebook files will be saved</td>
</tr>
<tr>
<td>ZEPPELIN_NOTEBOOK_AZURE_USER</td>
<td>zeppelin.notebook.azure.user</td>
<td>user</td>
<td>Optional user name of an Azure file share<br />i.e. <code>share/user/notebook/2A94M5J1Z/note.json</code></td>
</tr>
<tr>
<td>ZEPPELIN_NOTEBOOK_STORAGE</td>
<td>zeppelin.notebook.storage</td>
<td>org.apache.zeppelin.notebook.repo.VFSNotebookRepo</td>
<td>Comma separated list of notebook storage locations</td>
</tr>
<tr>
<td>ZEPPELIN_NOTEBOOK_ONE_WAY_SYNC</td>
<td>zeppelin.notebook.one.way.sync</td>
<td>false</td>
<td>If there are multiple notebook storage locations, should we treat the first one as the only source of truth?</td>
</tr>
<tr>
<td>ZEPPELIN_NOTEBOOK_PUBLIC</td>
<td>zeppelin.notebook.public</td>
<td>true</td>
<td>Make notebook public (set only `owners`) by default when created/imported. If set to `false` will add `user` to `readers` and `writers` as well, making it private and invisible to other users unless permissions are granted.</td>
</tr>
<tr>
<td>ZEPPELIN_INTERPRETERS</td>
<td>zeppelin.interpreters</td>
<description></description>
<td>org.apache.zeppelin.spark.SparkInterpreter,<br />org.apache.zeppelin.spark.PySparkInterpreter,<br />org.apache.zeppelin.spark.SparkSqlInterpreter,<br />org.apache.zeppelin.spark.DepInterpreter,<br />org.apache.zeppelin.markdown.Markdown,<br />org.apache.zeppelin.shell.ShellInterpreter,<br />
...
</td>
<td>
Comma separated interpreter configurations [Class] <br/>
<span style="font-style:italic">NOTE: This property is deprecated since Zeppelin-0.6.0 and will not be supported from Zeppelin-0.7.0 on.</span>
</td>
</tr>
<tr>
<td>ZEPPELIN_INTERPRETER_DIR</td>
<td>zeppelin.interpreter.dir</td>
<td>interpreter</td>
<td>Interpreter directory</td>
</tr>
<tr>
<td>ZEPPELIN_WEBSOCKET_MAX_TEXT_MESSAGE_SIZE</td>
<td>zeppelin.websocket.max.text.message.size</td>
<td>1024000</td>
<td>Size (in characters) of the maximum text message that can be received by websocket.</td>
</tr>
</table>
#### Start Apache Zeppelin with a service manager
> **Note :** The below description was written based on Ubuntu Linux.
@ -381,6 +122,37 @@ exec bin/zeppelin-daemon.sh upstart
```
## Building from Source
## Next Steps
Congratulations, you have successfully installed Apache Zeppelin! Here are few steps you might find useful:
#### New to Apache Zeppelin...
* For an in-depth overview, head to [Explore Apache Zeppelin UI](../quickstart/explorezeppelinui.html).
* And then, try run [tutorial](http://localhost:8080/#/notebook/2A94M5J1Z) notebook in your Zeppelin.
* And see how to change [configurations](./configuration.html) like port number, etc.
#### Zeppelin with Apache Spark ...
* To know more about deep integration with [Apache Spark](http://spark.apache.org/), check [Spark Interpreter](../interpreter/spark.html).
#### Zeppelin with JDBC data sources ...
* Check [JDBC Interpreter](../interpreter/jdbc.html) to know more about configure and uses multiple JDBC data sources.
#### Zeppelin with Python ...
* Check [Python interpreter](../interpreter/python.html) to know more about Matplotlib, Pandas, Conda/Docker environment integration.
#### Multi-user environment ...
* Turn on [authentication](../security/shiroauthentication.html).
* Manage your [notebook permission](../security/notebook_authorization.html).
* For more informations, go to **More** -> **Security** section.
#### Other useful informations ...
* Learn how [Display System](../displaysystem/basicdisplaysystem.html) works.
* Use [Service Manager](#start-apache-zeppelin-with-a-service-manager) to start Zeppelin.
* If you're using previous version please see [Upgrade Zeppelin version](./upgrade.html).
## Building Apache Zeppelin from Source
If you want to build from source instead of using binary package, follow the instructions [here](./build.html).

View file

@ -208,3 +208,30 @@ Don't forget to set Spark `master` as `mesos://127.0.1.1:5050` in Zeppelin **Int
After running a single paragraph with Spark interpreter in Zeppelin, browse `http://<hostname>:5050/#/frameworks` and check Zeppelin application is running well or not.
<img src="../assets/themes/zeppelin/img/docs-img/mesos_frameworks.png" />
### Troubleshooting for Spark on Mesos
- If you have problem with hostname, use `--add-host` option when executing `dockerrun`
```
## use `--add-host=moby:127.0.0.1` option to resolve
## since docker container couldn't resolve `moby`
: java.net.UnknownHostException: moby: moby: Name or service not known
at java.net.InetAddress.getLocalHost(InetAddress.java:1496)
at org.apache.spark.util.Utils$.findLocalInetAddress(Utils.scala:789)
at org.apache.spark.util.Utils$.org$apache$spark$util$Utils$$localIpAddress$lzycompute(Utils.scala:782)
at org.apache.spark.util.Utils$.org$apache$spark$util$Utils$$localIpAddress(Utils.scala:782)
```
- If you have problem with mesos master, try `mesos://127.0.0.1` instead of `mesos://127.0.1.1`
```
I0103 20:17:22.329269 340 sched.cpp:330] New master detected at master@127.0.1.1:5050
I0103 20:17:22.330749 340 sched.cpp:341] No credentials provided. Attempting to register without authentication
W0103 20:17:22.333531 340 sched.cpp:736] Ignoring framework registered message because it was sentfrom 'master@127.0.0.1:5050' instead of the leading master 'master@127.0.1.1:5050'
W0103 20:17:24.040252 339 sched.cpp:736] Ignoring framework registered message because it was sentfrom 'master@127.0.0.1:5050' instead of the leading master 'master@127.0.1.1:5050'
W0103 20:17:26.150250 339 sched.cpp:736] Ignoring framework registered message because it was sentfrom 'master@127.0.0.1:5050' instead of the leading master 'master@127.0.1.1:5050'
W0103 20:17:26.737604 339 sched.cpp:736] Ignoring framework registered message because it was sentfrom 'master@127.0.0.1:5050' instead of the leading master 'master@127.0.1.1:5050'
W0103 20:17:35.241714 336 sched.cpp:736] Ignoring framework registered message because it was sentfrom 'master@127.0.0.1:5050' instead of the leading master 'master@127.0.1.1:5050'
```

View file

@ -54,4 +54,5 @@ So, copying `notebook` and `conf` directory should be enough.
- Usage of `ZEPPELIN_PORT` is not supported in ssl mode. Instead use `ZEPPELIN_SSL_PORT` to configure the ssl port. Value from `ZEPPELIN_PORT` is used only when `ZEPPELIN_SSL` is set to `false`.
- The support on Spark 1.1.x to 1.3.x is deprecated.
- From 0.7, we uses `pegdown` as the `markdown.parser.type` option for the `%md` interpreter. Rendered markdown might be different from what you expected
- From 0.7 note.json format has been changed to support multiple outputs in a paragraph. Zeppelin will automatically convert old format to new format. 0.6 or lower version can read new note.json format but output will not be displayed. For the detail, see [ZEPPELIN-212](http://issues.apache.org/jira/browse/ZEPPELIN-212) and [pullrequest](https://github.com/apache/zeppelin/pull/1658).
- From 0.7 note.json format has been changed to support multiple outputs in a paragraph. Zeppelin will automatically convert old format to new format. 0.6 or lower version can read new note.json format but output will not be displayed. For the detail, see [ZEPPELIN-212](http://issues.apache.org/jira/browse/ZEPPELIN-212) and [pull request](https://github.com/apache/zeppelin/pull/1658).
- From 0.7 note storage layer will utilize `GitNotebookRepo` by default instead of `VFSNotebookRepo` storage layer, which is an extension of latter one with versioning capabilities on top of it.

View file

@ -53,7 +53,7 @@ At the "Interpreters" menu, you have to create a new Flink interpreter and provi
For more information about Flink configuration, you can find it [here](https://ci.apache.org/projects/flink/flink-docs-release-1.0/setup/config.html).
## How to test it's working
In example, by using the [Zeppelin notebook](https://www.zeppelinhub.com/viewer/notebooks/aHR0cHM6Ly9yYXcuZ2l0aHVidXNlcmNvbnRlbnQuY29tL05GTGFicy96ZXBwZWxpbi1ub3RlYm9va3MvbWFzdGVyL25vdGVib29rcy8yQVFFREs1UEMvbm90ZS5qc29u) is from Till Rohrmann's presentation [Interactive data analysis with Apache Flink](http://www.slideshare.net/tillrohrmann/data-analysis-49806564) for Apache Flink Meetup.
You can find an example of Flink usage in the Zeppelin Tutorial folder or try the following word count example, by using the [Zeppelin notebook](https://www.zeppelinhub.com/viewer/notebooks/aHR0cHM6Ly9yYXcuZ2l0aHVidXNlcmNvbnRlbnQuY29tL05GTGFicy96ZXBwZWxpbi1ub3RlYm9va3MvbWFzdGVyL25vdGVib29rcy8yQVFFREs1UEMvbm90ZS5qc29u) from Till Rohrmann's presentation [Interactive data analysis with Apache Flink](http://www.slideshare.net/tillrohrmann/data-analysis-49806564) for Apache Flink Meetup.
```
%sh
@ -63,7 +63,7 @@ wget http://www.gutenberg.org/ebooks/10.txt.utf-8
{% highlight scala %}
%flink
case class WordCount(word: String, frequency: Int)
val bible:DataSet[String] = env.readTextFile("10.txt.utf-8")
val bible:DataSet[String] = benv.readTextFile("10.txt.utf-8")
val partialCounts: DataSet[WordCount] = bible.flatMap{
line =>
"""\b\w+\b""".r.findAllIn(line).map(word => WordCount(word, 1))

View file

@ -40,9 +40,9 @@ Additional requirements for the Livy interpreter are:
## Configuration
We added some common configurations for spark, and you can set any configuration you want.
This link contains all spark configurations: http://spark.apache.org/docs/latest/configuration.html#available-properties.
You can find all Spark configurations in [here](http://spark.apache.org/docs/latest/configuration.html#available-properties).
And instead of starting property with `spark.` it should be replaced with `livy.spark.`.
Example: `spark.master` to `livy.spark.master`
Example: `spark.driver.memory` to `livy.spark.driver.memory`
<table class="table-configuration">
<tr>
@ -50,11 +50,6 @@ Example: `spark.master` to `livy.spark.master`
<th>Default</th>
<th>Description</th>
</tr>
<tr>
<td>livy.spark.master</td>
<td>local[*]</td>
<td>Spark master uri. ex) spark://masterhost:7077</td>
</tr>
<tr>
<td>zeppelin.livy.url</td>
<td>http://localhost:8998</td>
@ -127,6 +122,8 @@ Example: `spark.master` to `livy.spark.master`
</tr>
</table>
**We remove livy.spark.master in zeppelin-0.7. Because we sugguest user to use livy 0.3 in zeppelin-0.7. And livy 0.3 don't allow to specify livy.spark.master, it enfornce yarn-cluster mode.**
## Adding External libraries
You can load dynamic library to livy interpreter by set `livy.spark.jars.packages` property to comma-separated list of maven coordinates of jars to include on the driver and executor classpaths. The format for the coordinates should be groupId:artifactId:version.

View file

@ -26,6 +26,7 @@ group: manual
## Supported runtime mode
- Local
- MapReduce
- Tez_Local (Only Tez 0.7 is supported)
- Tez (Only Tez 0.7 is supported)
## How to use
@ -40,6 +41,10 @@ group: manual
HADOOP\_CONF\_DIR needs to be specified in `ZEPPELIN_HOME/conf/zeppelin-env.sh`.
- Tez Local Mode
Nothing needs to be done for tez local mode
- Tez Mode
HADOOP\_CONF\_DIR and TEZ\_CONF\_DIR needs to be specified in `ZEPPELIN_HOME/conf/zeppelin-env.sh`.
@ -47,6 +52,8 @@ group: manual
### How to configure interpreter
At the Interpreters menu, you have to create a new Pig interpreter. Pig interpreter has below properties by default.
And you can set any pig properties here which will be passed to pig engine. (like tez.queue.name & mapred.job.queue.name).
Besides, we use paragraph title as job name if it exists, else use the last line of pig script. So you can use that to find app running in YARN RM UI.
<table class="table-configuration">
<tr>
@ -57,7 +64,7 @@ At the Interpreters menu, you have to create a new Pig interpreter. Pig interpre
<tr>
<td>zeppelin.pig.execType</td>
<td>mapreduce</td>
<td>Execution mode for pig runtime. local | mapreduce | tez </td>
<td>Execution mode for pig runtime. local | mapreduce | tez_local | tez </td>
</tr>
<tr>
<td>zeppelin.pig.includeJobStats</td>
@ -69,6 +76,16 @@ At the Interpreters menu, you have to create a new Pig interpreter. Pig interpre
<td>1000</td>
<td>max row number displayed in <code>%pig.query</code></td>
</tr>
<tr>
<td>tez.queue.name</td>
<td>default</td>
<td>queue name for tez engine</td>
</tr>
<tr>
<td>mapred.job.queue.name</td>
<td>default</td>
<td>queue name for mapreduce engine</td>
</tr>
</table>
### Example
@ -94,4 +111,6 @@ c = group b by Category;
foreach c generate group as category, COUNT($1) as count;
```
Data is shared between `%pig` and `%pig.query`, so that you can do some common work in `%pig`, and do different kinds of query based on the data of `%pig`.
Data is shared between `%pig` and `%pig.query`, so that you can do some common work in `%pig`, and do different kinds of query based on the data of `%pig`.
Besides, we recommend you to specify alias explicitly so that the visualization can display the column name correctly. Here, we name `COUNT($1)` as `count`, if you don't do this,
then we will name it using position, here we will use `col_1` to represent `COUNT($1)` if you don't specify alias for it. There's one pig tutorial note in zeppelin for your reference.

View file

@ -0,0 +1,81 @@
---
layout: page
title: "Interpreter Execution Hooks (Experimental)"
description: "Apache Zeppelin allows for users to specify additional code to be executed by an interpreter at pre and post-paragraph code execution."
group: manual
---
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
{% include JB/setup %}
# Interpreter Execution Hooks (Experimental)
<div id="toc"></div>
## Overview
Apache Zeppelin allows for users to specify additional code to be executed by an interpreter at pre and post-paragraph code execution.
This is primarily useful if you need to run the same set of code for all of the paragraphs within your notebook at specific times.
Currently, this feature is only available for the spark and pyspark interpreters.
To specify your hook code, you may use `z.registerHook()`.
For example, enter the following into one paragraph:
```python
%pyspark
z.registerHook("post_exec", "print 'This code should be executed before the parapgraph code!'")
z.registerHook("pre_exec", "print 'This code should be executed after the paragraph code!'")
```
These calls will not take into effect until the next time you run a paragraph.
In another paragraph, enter
```python
%pyspark
print "This code should be entered into the paragraph by the user!"
```
The output should be:
```
This code should be executed before the paragraph code!
This code should be entered into the paragraph by the user!
This code should be executed after the paragraph code!
```
If you ever need to know the hook code, use `z.getHook()`:
```python
%pyspark
print z.getHook("post_exec")
print 'This code should be executed after the paragraph code!'
```
Any call to `z.registerHook()` will automatically overwrite what was previously registered.
To completely unregister a hook event, use `z.unregisterHook(eventCode)`.
Currently only `"post_exec"` and `"pre_exec"` are valid event codes for the Zeppelin Hook Registry system.
Finally, the hook registry is internally shared by other interpreters in the same group.
This would allow for hook code for one interpreter REPL to be set by another as follows:
```scala
%spark
z.unregisterHook("post_exec", "pyspark")
```
The API is identical for both the spark (scala) and pyspark (python) implementations.
### Caveats
Calls to `z.registerHook("pre_exec", ...)` should be made with care. If there are errors in your specified hook code, this will cause the interpreter REPL to become unable to execute any code pass the pre-execute stage making it impossible for direct calls to `z.unregisterHook()` to take into effect. Current workarounds include calling `z.unregisterHook()` from a different interpreter REPL in the same interpreter group (see above) or manually restarting the interpreter group in the UI.

View file

@ -108,7 +108,7 @@ You can also install 3rd party interpreters located in the maven repository by u
The above command will download maven artifact `groupId1:artifact1:version1` and all of it's transitive dependencies into `interpreter/interpreter1` directory.
Once you have installed interpreters, you'll need to add interpreter class name into `zeppelin.interpreters` property in [configuration](../install/install.html#apache-zeppelin-configuration).
Once you have installed interpreters, you'll need to add interpreter class name into `zeppelin.interpreters` property in [configuration](../install/configuration.html).
And then restart Zeppelin, [create interpreter setting](../manual/interpreters.html#what-is-zeppelin-interpreter) and [bind it with your notebook](../manual/interpreters.html#what-is-zeppelin-interpreter-setting).

View file

@ -82,49 +82,3 @@ interpreter.start()
The above code will start interpreter thread inside your process. Once the interpreter is started you can configure zeppelin to connect to RemoteInterpreter by checking **Connect to existing process** checkbox and then provide **Host** and **Port** on which interpreter process is listening as shown in the image below:
<img src="../assets/themes/zeppelin/img/screenshots/existing_interpreter.png" width="450px">
## (Experimental) Interpreter Execution Hooks
Zeppelin allows for users to specify additional code to be executed by an interpreter at pre and post-paragraph code execution. This is primarily useful if you need to run the same set of code for all of the paragraphs within your notebook at specific times. Currently, this feature is only available for the spark and pyspark interpreters. To specify your hook code, you may use '`z.registerHook()`. For example, enter the following into one paragraph:
```python
%pyspark
z.registerHook("post_exec", "print 'This code should be executed before the parapgraph code!'")
z.registerHook("pre_exec", "print 'This code should be executed after the paragraph code!'")
```
These calls will not take into effect until the next time you run a paragraph. In another paragraph, enter
```python
%pyspark
print "This code should be entered into the paragraph by the user!"
```
The output should be:
```
This code should be executed before the paragraph code!
This code should be entered into the paragraph by the user!
This code should be executed after the paragraph code!
```
If you ever need to know the hook code, use `z.getHook()`:
```python
%pyspark
print z.getHook("post_exec")
```
```
print 'This code should be executed after the paragraph code!'
```
Any call to `z.registerHook()` will automatically overwrite what was previously registered. To completely unregister a hook event, use `z.unregisterHook(eventCode)`. Currently only `"post_exec"` and `"pre_exec"` are valid event codes for the Zeppelin Hook Registry system.
Finally, the hook registry is internally shared by other interpreters in the same group. This would allow for hook code for one interpreter REPL to be set by another as follows:
```scala
%spark
z.unregisterHook("post_exec", "pyspark")
```
The API is identical for both the spark (scala) and pyspark (python) implementations.
### Caveats
Calls to `z.registerHook("pre_exec", ...)` should be made with care. If there are errors in your specified hook code, this will cause the interpreter REPL to become unable to execute any code pass the pre-execute stage making it impossible for direct calls to `z.unregisterHook()` to take into effect. Current workarounds include calling `z.unregisterHook()` from a different interpreter REPL in the same interpreter group (see above) or manually restarting the interpreter group in the UI.

View file

@ -43,10 +43,10 @@ cat ~/.ssh/id_rsa.pub | ssh user1@localhost 'cat >> .ssh/authorized_keys'
```
export ZEPPELIN_IMPERSONATE_CMD='sudo -H -u ${ZEPPELIN_IMPERSONATE_USER} bash -c '
```
* Start zeppelin server.
<hr>
<div class="row">
<div class="col-md-12">
@ -57,13 +57,13 @@ export ZEPPELIN_IMPERSONATE_CMD='sudo -H -u ${ZEPPELIN_IMPERSONATE_USER} bash -c
<a data-lightbox="compiler" href="../assets/themes/zeppelin/img/screenshots/user-impersonation.gif">
<img class="img-responsive" src="../assets/themes/zeppelin/img/screenshots/user-impersonation.gif" />
</a>
</div>
</div>
<hr>
* Go to interpreter setting page, and enable "User Impersonate" in any of the interpreter (in my example its shell interpreter)
* Test with a simple paragraph
```
@ -71,3 +71,5 @@ export ZEPPELIN_IMPERSONATE_CMD='sudo -H -u ${ZEPPELIN_IMPERSONATE_USER} bash -c
whoami
```
Note that usage of "User Impersonate" option will enable Spark interpreter to use `--proxy-user` option with current user by default. If you want to disable `--proxy-user` option, then refer to `ZEPPELIN_IMPERSONATE_SPARK_PROXY_USER` variable in `conf/zeppelin-env.sh`

View file

@ -118,14 +118,16 @@ cd zeppelin
Package Zeppelin.
```
mvn clean package -DskipTests -Pspark-1.6 -Dflink.version=1.1.2
mvn clean package -DskipTests -Pspark-1.6 -Dflink.version=1.1.3 -Pscala-2.10
```
`-DskipTests` skips build tests- you're not developing (yet), so you don't need to do tests, the clone version *should* build.
`-Pspark-1.6` tells maven to build a Zeppelin with Spark 1.6. This is important because Zeppelin has its own Spark interpreter and the versions must be the same.
`-Dflink.version=1.1.2` tells maven specifically to build Zeppelin with Flink version 1.1.2.
`-Dflink.version=1.1.3` tells maven specifically to build Zeppelin with Flink version 1.1.3.
-`-Pscala-2.10` tells maven to build with Scala v2.10.
**Note:** You may wish to include additional build flags such as `-Ppyspark` or `-Psparkr`. See [the build section of github for more details](https://github.com/apache/zeppelin#build).
@ -162,7 +164,7 @@ Create a new notebook named "Flink Test" and copy and paste the following code.
%flink // let Zeppelin know what interpreter to use.
val text = env.fromElements("In the time of chimpanzees, I was a monkey", // some lines of text to analyze
val text = benv.fromElements("In the time of chimpanzees, I was a monkey", // some lines of text to analyze
"Butane in my veins and I'm out to cut the junkie",
"With the plastic eyeballs, spray paint the vegetables",
"Dog food stalls with the beefcake pantyhose",
@ -252,16 +254,16 @@ Building from source is recommended where possible, for simplicity in this tuto
To download the Flink Binary use `wget`
```bash
wget "http://mirror.cogentco.com/pub/apache/flink/flink-1.0.3/flink-1.0.3-bin-hadoop24-scala_2.10.tgz"
tar -xzvf flink-1.0.3-bin-hadoop24-scala_2.10.tgz
wget "http://mirror.cogentco.com/pub/apache/flink/flink-1.1.3/flink-1.1.3-bin-hadoop24-scala_2.10.tgz"
tar -xzvf flink-1.1.3-bin-hadoop24-scala_2.10.tgz
```
This will download Flink 1.0.3, compatible with Hadoop 2.4. You do not have to install Hadoop for this binary to work, but if you are using Hadoop, please change `24` to your appropriate version.
This will download Flink 1.1.3, compatible with Hadoop 2.4. You do not have to install Hadoop for this binary to work, but if you are using Hadoop, please change `24` to your appropriate version.
Start the Flink Cluster.
```bash
flink-1.0.3/bin/start-cluster.sh
flink-1.1.3/bin/start-cluster.sh
```
###### Building From source
@ -270,13 +272,13 @@ If you wish to build Flink from source, the following will be instructive. Note
See the [Flink Installation guide](https://github.com/apache/flink/blob/master/README.md) for more detailed instructions.
Return to the directory where you have been downloading, this tutorial assumes that is `$HOME`. Clone Flink, check out release-1.0, and build.
Return to the directory where you have been downloading, this tutorial assumes that is `$HOME`. Clone Flink, check out release-1.1.3-rc2, and build.
```
cd $HOME
git clone https://github.com/apache/flink.git
cd flink
git checkout release-1.0
git checkout release-1.1.3-rc2
mvn clean install -DskipTests
```
@ -297,8 +299,8 @@ If no task managers are present, restart the Flink cluster with the following co
(if binaries)
```
flink-1.0.3/bin/stop-cluster.sh
flink-1.0.3/bin/start-cluster.sh
flink-1.1.3/bin/stop-cluster.sh
flink-1.1.3/bin/start-cluster.sh
```
@ -320,12 +322,12 @@ Using binaries is also
To download the Spark Binary use `wget`
```bash
wget "http://mirrors.koehn.com/apache/spark/spark-1.6.1/spark-1.6.1-bin-hadoop2.4.tgz"
tar -xzvf spark-1.6.1-bin-hadoop2.4.tgz
mv spark-1.6.1-bin-hadoop4.4 spark
wget "http://d3kbcqa49mib13.cloudfront.net/spark-1.6.3-bin-hadoop2.6.tgz"
tar -xzvf spark-1.6.3-bin-hadoop2.6.tgz
mv spark-1.6.3-bin-hadoop2.6 spark
```
This will download Spark 1.6.1, compatible with Hadoop 2.4. You do not have to install Hadoop for this binary to work, but if you are using Hadoop, please change `2.4` to your appropriate version.
This will download Spark 1.6.3, compatible with Hadoop 2.6. You do not have to install Hadoop for this binary to work, but if you are using Hadoop, please change `2.6` to your appropriate version.
###### Building From source
@ -335,7 +337,7 @@ See the [Spark Installation](https://github.com/apache/spark/blob/master/README.
Return to the directory where you have been downloading, this tutorial assumes that is $HOME. Clone Spark, check out branch-1.6, and build.
**Note:** Recall, we're only checking out 1.6 because it is the most recent Spark for which a Zeppelin profile exists at
the time of writing. You are free to check out other version, just make sure you build Zeppelin against the correct version of Spark.
the time of writing. You are free to check out other version, just make sure you build Zeppelin against the correct version of Spark. However if you use Spark 2.0, the word count example will need to be changed as Spark 2.0 is not compatible with the following examples.
```

View file

@ -0,0 +1,378 @@
---
layout: page
title: "Apache Zeppelin Helium REST API"
description: "This page contains Apache Zeppelin Helium REST API information."
group: rest-api
---
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
{% include JB/setup %}
# Apache Zeppelin Helium REST API
<div id="toc"></div>
## Overview
Apache Zeppelin provides several REST APIs for interaction and remote activation of zeppelin functionality.
All REST APIs are available starting with the following endpoint `http://[zeppelin-server]:[zeppelin-port]/api`.
Note that Apache Zeppelin REST APIs receive or return JSON objects, it is recommended for you to install some JSON viewers such as [JSONView](https://chrome.google.com/webstore/detail/jsonview/chklaanhfefbnpoihckbnefhakgolnmc).
If you work with Apache Zeppelin and find a need for an additional REST API, please [file an issue or send us an email](http://zeppelin.apache.org/community.html).
## Helium REST API List
### List of all available helium packages
<table class="table-configuration">
<col width="200">
<tr>
<td>Description</td>
<td>This ```GET``` method returns all the available helium packages in configured registries.</td>
</tr>
<tr>
<td>URL</td>
<td>```http://[zeppelin-server]:[zeppelin-port]/api/helium/all```</td>
</tr>
<tr>
<td>Success code</td>
<td>200</td>
</tr>
<tr>
<td>Fail code</td>
<td> 500 </td>
</tr>
<tr>
<td>Sample JSON response</td>
<td>
<pre>
{
"status": "OK",
"message": "",
"body": {
"zeppelin.clock": [
{
"registry": "local",
"pkg": {
"type": "APPLICATION",
"name": "zeppelin.clock",
"description": "Clock (example)",
"artifact": "zeppelin-examples\/zeppelin-example-clock\/target\/zeppelin-example-clock-0.7.0-SNAPSHOT.jar",
"className": "org.apache.zeppelin.example.app.clock.Clock",
"resources": [
[
":java.util.Date"
]
],
"icon": "icon"
},
"enabled": false
}
],
"zeppelin-bubblechart": [
{
"registry": "local",
"pkg": {
"type": "VISUALIZATION",
"name": "zeppelin-bubblechart",
"description": "Animated bubble chart",
"artifact": ".\/..\/helium\/zeppelin-bubble",
"icon": "icon"
},
"enabled": true
},
{
"registry": "local",
"pkg": {
"type": "VISUALIZATION",
"name": "zeppelin-bubblechart",
"description": "Animated bubble chart",
"artifact": "zeppelin-bubblechart@0.0.2",
"icon": "icon"
},
"enabled": false
}
],
"zeppelin_horizontalbar": [
{
"registry": "local",
"pkg": {
"type": "VISUALIZATION",
"name": "zeppelin_horizontalbar",
"description": "Horizontal Bar chart (example)",
"artifact": ".\/zeppelin-examples\/zeppelin-example-horizontalbar",
"icon": "icon"
},
"enabled": true
}
]
}
}
</pre>
</td>
</tr>
</table>
<br/>
### Suggest Helium application
<table class="table-configuration">
<col width="200">
<tr>
<td>Description</td>
<td>This ```GET``` method returns suggested helium application for the paragraph.</td>
</tr>
<tr>
<td>URL</td>
<td>```http://[zeppelin-server]:[zeppelin-port]/api/helium/suggest/[Note ID]/[Paragraph ID]```</td>
</tr>
<tr>
<td>Success code</td>
<td>200</td>
</tr>
<tr>
<td>Fail code</td>
<td>
404 on note or paragraph not exists <br />
500
</td>
</tr>
<tr>
<td>Sample JSON response</td>
<td>
<pre>
{
"status": "OK",
"message": "",
"body": {
"available": [
{
"registry": "local",
"pkg": {
"type": "APPLICATION",
"name": "zeppelin.clock",
"description": "Clock (example)",
"artifact": "zeppelin-examples\/zeppelin-example-clock\/target\/zeppelin-example-clock-0.7.0-SNAPSHOT.jar",
"className": "org.apache.zeppelin.example.app.clock.Clock",
"resources": [
[
":java.util.Date"
]
],
"icon": "icon"
},
"enabled": true
}
]
}
}
</pre>
</td>
</tr>
</table>
<br/>
### Load helium Application on a paragraph
<table class="table-configuration">
<col width="200">
<tr>
<td>Description</td>
<td>This ```GET``` method returns a helium Application id on success.</td>
</tr>
<tr>
<td>URL</td>
<td>```http://[zeppelin-server]:[zeppelin-port]/api/helium/load/[Note ID]/[Paragraph ID]```</td>
</tr>
<tr>
<td>Success code</td>
<td>200</td>
</tr>
<tr>
<td>Fail code</td>
<td>
404 on note or paragraph not exists <br/>
500 for any other errors
</td>
</tr>
<tr>
<td>Sample JSON response</td>
<td>
<pre>
{
"status": "OK",
"message": "",
"body": "app_2C5FYRZ1E-20170108-040449_2068241472zeppelin_clock"
}
</pre>
</td>
</tr>
</table>
<br/>
### Load bundled visualization script
<table class="table-configuration">
<col width="200">
<tr>
<td>Description</td>
<td>This ```GET``` method returns bundled helium visualization javascript. When refresh=true (optional) is provided, Zeppelin rebuild bundle. otherwise, provided from cache</td>
</tr>
<tr>
<td>URL</td>
<td>```http://[zeppelin-server]:[zeppelin-port]/api/helium/visualizations/load[?refresh=true]```</td>
</tr>
<tr>
<td>Success code</td>
<td>200 reponse body is executable javascript</td>
</tr>
<tr>
<td>Fail code</td>
<td>
200 reponse body is error message string starts with ERROR:<br/>
</td>
</tr>
</table>
<br/>
### Enable package
<table class="table-configuration">
<col width="200">
<tr>
<td>Description</td>
<td>This ```POST``` method enables a helium package. Needs artifact name in input payload</td>
</tr>
<tr>
<td>URL</td>
<td>```http://[zeppelin-server]:[zeppelin-port]/api/helium/enable/[Package Name]```</td>
</tr>
<tr>
<td>Success code</td>
<td>200</td>
</tr>
<tr>
<td>Fail code</td>
<td> 500 </td>
</tr>
<tr>
<td>Sample input</td>
<td>
<pre>
zeppelin-examples/zeppelin-example-clock/target/zeppelin-example-clock-0.7.0-SNAPSHOT.jar
</pre>
</td>
</tr>
<tr>
<td>Sample JSON response</td>
<td>
<pre>
{"status":"OK"}
</pre>
</td>
</tr>
</table>
<br/>
### Disable package
<table class="table-configuration">
<col width="200">
<tr>
<td>Description</td>
<td>This ```POST``` method disables a helium package.</td>
</tr>
<tr>
<td>URL</td>
<td>```http://[zeppelin-server]:[zeppelin-port]/api/helium/disable/[Package Name]```</td>
</tr>
<tr>
<td>Success code</td>
<td>200</td>
</tr>
<tr>
<td> Fail code</td>
<td> 500 </td>
</tr>
<tr>
<td>Sample JSON response</td>
<td>
<code>{"status":"OK"}</code>
</td>
</tr>
</table>
<br />
### Get visualization display order
<table class="table-configuration">
<col width="200">
<tr>
<td>Description</td>
<td>This ```GET``` method returns display order of enabled visualization packages.</td>
</tr>
<tr>
<td>URL</td>
<td>```http://[zeppelin-server]:[zeppelin-port]/api/helium/visualizationOrder```</td>
</tr>
<tr>
<td>Success code</td>
<td>200</td>
</tr>
<tr>
<td> Fail code</td>
<td> 500 </td>
</tr>
<tr>
<td>Sample JSON response</td>
<td>
<code>{"status":"OK","body":["zeppelin_horizontalbar","zeppelin-bubblechart"]}</code>
</td>
</tr>
</table>
<br />
### Set visualization display order
<table class="table-configuration">
<col width="200">
<tr>
<td>Description</td>
<td>This ```POST``` method sets visualization packages display order.</td>
</tr>
<tr>
<td>URL</td>
<td>```http://[zeppelin-server]:[zeppelin-port]/api/helium/visualizationOrder```</td>
</tr>
<tr>
<td>Success code</td>
<td>200</td>
</tr>
<tr>
<td> Fail code</td>
<td> 500 </td>
</tr>
<tr>
<td>Sample JSON input</td>
<td>
<code>["zeppelin-bubblechart", "zeppelin_horizontalbar"]</code>
</td>
</tr>
<tr>
<td>Sample JSON response</td>
<td>
<code>{"status":"OK"}</code>
</td>
</tr>
</table>

File diff suppressed because it is too large Load diff

View file

@ -46,6 +46,8 @@ If someone who doesn't have **read** permission is trying to access the notebook
<center><img src="../assets/themes/zeppelin/img/docs-img/insufficient_privileges.png"></center>
By default when you create a new note, the owner is the user who create it. And the readers/writers is empty which means it is shared publicly. But if you don't want it to be shared by default. You can set `zeppelin.notebook.public` to be false in `zeppelin-site.xml`.
## How it works
In this section, we will explain the detail about how the notebook authorization works in backend side.

View file

@ -41,10 +41,10 @@ cp conf/shiro.ini.template conf/shiro.ini
For the further information about `shiro.ini` file format, please refer to [Shiro Configuration](http://shiro.apache.org/configuration.html#Configuration-INISections).
### 3. Secure the Websocket channel
### 2. Secure the Websocket channel
Set to property **zeppelin.anonymous.allowed** to **false** in `conf/zeppelin-site.xml`. If you don't have this file yet, just copy `conf/zeppelin-site.xml.template` to `conf/zeppelin-site.xml`.
### 4. Start Zeppelin
### 3. Start Zeppelin
```
bin/zeppelin-daemon.sh start (or restart)
@ -52,7 +52,7 @@ bin/zeppelin-daemon.sh start (or restart)
Then you can browse Zeppelin at [http://localhost:8080](http://localhost:8080).
### 5. Login
### 4. Login
Finally, you can login using one of the below **username/password** combinations.
<center><img src="../assets/themes/zeppelin/img/docs-img/zeppelin-login.png"></center>
@ -161,7 +161,7 @@ zeppelinHubRealm.zeppelinhubUrl = https://www.zeppelinhub.com
securityManager.realms = $zeppelinHubRealm
```
> Note: ZeppelinHub is not releated to apache Zeppelin project.
> Note: ZeppelinHub is not releated to Apache Zeppelin project.
## Secure your Zeppelin information (optional)
By default, anyone who defined in `[users]` can share **Interpreter Setting**, **Credential** and **Configuration** information in Apache Zeppelin.
@ -180,7 +180,7 @@ In this case, only who have `admin` role can see **Interpreter Setting**, **Cred
If you want to grant this permission to other users, you can change **roles[ ]** as you defined at `[users]` section.
<br/>
> **NOTE :** All of the above configurations are defined in the `conf/shiro.ini` file. This documentation is originally from [SECURITY-README.md](https://github.com/apache/zeppelin/blob/master/SECURITY-README.md).
> **NOTE :** All of the above configurations are defined in the `conf/shiro.ini` file.
## Other authentication methods

View file

@ -130,6 +130,23 @@ Or using the following setting in **zeppelin-site.xml**:
</property>
```
In order to set custom KMS key region, set the following environment variable in the file **zeppelin-env.sh**:
```
export ZEPPELIN_NOTEBOOK_S3_KMS_KEY_REGION = kms-key-region
```
Or using the following setting in **zeppelin-site.xml**:
```
<property>
<name>zeppelin.notebook.s3.kmsKeyRegion</name>
<value>target-region</value>
<description>AWS KMS key region in your AWS account</description>
</property>
```
Format of `target-region` is described in more details [here](http://docs.aws.amazon.com/general/latest/gr/rande.html#kms_region) in second `Region` column (e.g. `us-east-1`).
#### Custom Encryption Materials Provider class
You may use a custom [``EncryptionMaterialsProvider``](https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/services/s3/model/EncryptionMaterialsProvider.html) class as long as it is available in the classpath and able to initialize itself from system properties or another mechanism. To use this, set the following environment variable in the file **zeppelin-env.sh**:
@ -238,4 +255,4 @@ export ZEPPELINHUB_API_TOKEN = ZeppelinHub token
export ZEPPELINHUB_API_ADDRESS = address of ZeppelinHub service (e.g. https://www.zeppelinhub.com)
```
You can get more information on generating `token` and using authentication on the corresponding [help page](http://help.zeppelinhub.com/zeppelin_integration/#add-a-new-zeppelin-instance-and-generate-a-token).
You can get more information on generating `token` and using authentication on the corresponding [help page](http://help.zeppelinhub.com/zeppelin_integration/#add-a-new-zeppelin-instance-and-generate-a-token).

View file

@ -23,9 +23,7 @@ import java.util.*;
import com.google.gson.Gson;
import org.apache.commons.lang.StringUtils;
import org.apache.zeppelin.interpreter.Interpreter;
import org.apache.zeppelin.interpreter.InterpreterException;
import org.apache.zeppelin.interpreter.InterpreterPropertyBuilder;
import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion;
/**
@ -75,19 +73,19 @@ public class HDFSFileInterpreter extends FileInterpreter {
public String type;
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("\nAccessTime = " + accessTime);
sb.append("\nBlockSize = " + blockSize);
sb.append("\nChildrenNum = " + childrenNum);
sb.append("\nFileId = " + fileId);
sb.append("\nGroup = " + group);
sb.append("\nLength = " + length);
sb.append("\nModificationTime = " + modificationTime);
sb.append("\nOwner = " + owner);
sb.append("\nPathSuffix = " + pathSuffix);
sb.append("\nPermission = " + permission);
sb.append("\nReplication = " + replication);
sb.append("\nStoragePolicy = " + storagePolicy);
sb.append("\nType = " + type);
sb.append("\nAccessTime = ").append(accessTime);
sb.append("\nBlockSize = ").append(blockSize);
sb.append("\nChildrenNum = ").append(childrenNum);
sb.append("\nFileId = ").append(fileId);
sb.append("\nGroup = ").append(group);
sb.append("\nLength = ").append(length);
sb.append("\nModificationTime = ").append(modificationTime);
sb.append("\nOwner = ").append(owner);
sb.append("\nPathSuffix = ").append(pathSuffix);
sb.append("\nPermission = ").append(permission);
sb.append("\nReplication = ").append(replication);
sb.append("\nStoragePolicy = ").append(storagePolicy);
sb.append("\nType = ").append(type);
return sb.toString();
}
}
@ -162,7 +160,7 @@ public class HDFSFileInterpreter extends FileInterpreter {
private String listDate(OneFileStatus fs) {
return new SimpleDateFormat("yyyy-MM-dd HH:mm").format(new Date(fs.modificationTime));
}
private String ListOne(String path, OneFileStatus fs) {
private String listOne(String path, OneFileStatus fs) {
if (args.flags.contains(new Character('l'))) {
StringBuilder sb = new StringBuilder();
sb.append(listPermission(fs) + "\t");
@ -194,7 +192,7 @@ public class HDFSFileInterpreter extends FileInterpreter {
String str = cmd.runCommand(cmd.getFileStatus, filePath, null);
SingleFileStatus sfs = gson.fromJson(str, SingleFileStatus.class);
if (sfs != null) {
return ListOne(filePath, sfs.FileStatus);
return listOne(filePath, sfs.FileStatus);
}
} catch (Exception e) {
logger.error("listFile: " + filePath, e);
@ -218,7 +216,7 @@ public class HDFSFileInterpreter extends FileInterpreter {
allFiles.FileStatuses.FileStatus != null)
{
for (OneFileStatus fs : allFiles.FileStatuses.FileStatus)
all = all + ListOne(path, fs) + '\n';
all = all + listOne(path, fs) + '\n';
}
}
return all;

89
helium-dev/pom.xml Normal file
View file

@ -0,0 +1,89 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
~ Licensed to the Apache Software Foundation (ASF) under one or more
~ contributor license agreements. See the NOTICE file distributed with
~ this work for additional information regarding copyright ownership.
~ The ASF licenses this file to You under the Apache License, Version 2.0
~ (the "License"); you may not use this file except in compliance with
~ the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.zeppelin</groupId>
<artifactId>zeppelin</artifactId>
<version>0.7.0-SNAPSHOT</version>
</parent>
<groupId>org.apache.zeppelin</groupId>
<artifactId>helium-dev</artifactId>
<version>0.7.0-SNAPSHOT</version>
<name>Zeppelin: Helium development interpreter</name>
<dependencies>
<dependency>
<groupId>org.apache.zeppelin</groupId>
<artifactId>zeppelin-interpreter</artifactId>
<version>${project.version}</version>
<scope>provided</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<artifactId>maven-dependency-plugin</artifactId>
<executions>
<execution>
<id>copy-dependencies</id>
<phase>package</phase>
<goals>
<goal>copy-dependencies</goal>
</goals>
<configuration>
<outputDirectory>${project.build.directory}/../../interpreter/helium-dev</outputDirectory>
<overWriteReleases>false</overWriteReleases>
<overWriteSnapshots>false</overWriteSnapshots>
<overWriteIfNewer>true</overWriteIfNewer>
<includeScope>runtime</includeScope>
</configuration>
</execution>
<execution>
<id>copy-artifact</id>
<phase>package</phase>
<goals>
<goal>copy</goal>
</goals>
<configuration>
<outputDirectory>${project.build.directory}/../../interpreter/helium-dev</outputDirectory>
<overWriteReleases>false</overWriteReleases>
<overWriteSnapshots>false</overWriteSnapshots>
<overWriteIfNewer>true</overWriteIfNewer>
<includeScope>runtime</includeScope>
<artifactItems>
<artifactItem>
<groupId>${project.groupId}</groupId>
<artifactId>${project.artifactId}</artifactId>
<version>${project.version}</version>
<type>${project.packaging}</type>
</artifactItem>
</artifactItems>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View file

@ -14,7 +14,8 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.interpreter.dev;
package org.apache.zeppelin.helium;
import java.util.LinkedList;
import java.util.List;
@ -48,7 +49,7 @@ public class DevInterpreter extends Interpreter {
}
/**
* event handler for ZeppelinApplicationDevServer
* event handler for org.apache.zeppelin.helium.ZeppelinApplicationDevServer
*/
public static interface InterpreterEvent {
public InterpreterResult interpret(String st, InterpreterContext context);

View file

@ -14,23 +14,20 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.interpreter.dev;
package org.apache.zeppelin.helium;
import java.io.IOException;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.net.URL;
import com.google.gson.Gson;
import org.apache.log4j.ConsoleAppender;
import org.apache.log4j.Level;
import org.apache.log4j.PatternLayout;
import org.apache.zeppelin.helium.*;
import org.apache.zeppelin.interpreter.*;
import org.apache.zeppelin.interpreter.InterpreterResult.Code;
import org.apache.zeppelin.interpreter.remote.RemoteInterpreterEventClient;
import org.apache.zeppelin.resource.ResourceSet;
import org.apache.zeppelin.resource.WellKnownResourceName;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -47,7 +44,7 @@ public class ZeppelinApplicationDevServer extends ZeppelinDevServer {
public ZeppelinApplicationDevServer(final String className, ResourceSet resourceSet) throws
Exception {
this(ZeppelinDevServer.DEFAULT_TEST_INTERPRETER_PORT, className, resourceSet);
this(Constants.ZEPPELIN_INTERPRETER_DEFAUlT_PORT, className, resourceSet);
}
public ZeppelinApplicationDevServer(int port, String className, ResourceSet resourceSet) throws

View file

@ -14,16 +14,16 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.interpreter.dev;
package org.apache.zeppelin.helium;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Properties;
import org.apache.thrift.TException;
import org.apache.zeppelin.helium.DevInterpreter.InterpreterEvent;
import org.apache.zeppelin.interpreter.*;
import org.apache.zeppelin.interpreter.dev.DevInterpreter.InterpreterEvent;
import org.apache.zeppelin.interpreter.remote.RemoteInterpreterEventClient;
import org.apache.zeppelin.interpreter.remote.RemoteInterpreterServer;
import org.slf4j.Logger;
@ -34,11 +34,10 @@ import org.slf4j.LoggerFactory;
*/
public class ZeppelinDevServer extends
RemoteInterpreterServer implements InterpreterEvent, InterpreterOutputChangeListener {
final Logger logger = LoggerFactory.getLogger(ZeppelinDevServer.class);
public static final int DEFAULT_TEST_INTERPRETER_PORT = 29914;
private static final Logger logger = LoggerFactory.getLogger(ZeppelinDevServer.class);
DevInterpreter interpreter = null;
InterpreterOutput out;
private DevInterpreter interpreter = null;
private InterpreterOutput out;
public ZeppelinDevServer(int port) throws TException {
super(port);
}
@ -47,21 +46,21 @@ public class ZeppelinDevServer extends
protected Interpreter getInterpreter(String sessionKey, String className) throws TException {
synchronized (this) {
InterpreterGroup interpreterGroup = getInterpreterGroup();
if (interpreterGroup == null) {
if (interpreterGroup == null || interpreterGroup.isEmpty()) {
createInterpreter(
"dev",
sessionKey,
DevInterpreter.class.getName(),
new HashMap<String, String>(),
"anonymous");
Interpreter intp = super.getInterpreter(sessionKey, className);
interpreter = (DevInterpreter) (
((LazyOpenInterpreter) intp).getInnerInterpreter());
interpreter.setInterpreterEvent(this);
notify();
}
}
Interpreter intp = super.getInterpreter(sessionKey, className);
interpreter = (DevInterpreter) (
((LazyOpenInterpreter) intp).getInnerInterpreter());
interpreter.setInterpreterEvent(this);
return super.getInterpreter(sessionKey, className);
}

View file

@ -0,0 +1,19 @@
[
{
"group": "dev",
"name": "dev",
"className": "org.apache.zeppelin.helium.DevInterpreter",
"properties": {
"port": {
"envName": "PORT",
"propertyName": "port",
"defaultValue": "jdbc:postgresql://localhost:5432/",
"description": "The URL for JDBC."
}
},
"editor": {
"language": "helium",
"editOnDblClick": false
}
}
]

View file

@ -32,7 +32,7 @@
<name>Zeppelin: Apache Ignite interpreter</name>
<properties>
<ignite.version>1.7.0</ignite.version>
<ignite.version>1.8.0</ignite.version>
</properties>
<dependencies>

View file

@ -76,23 +76,6 @@ public class IgniteInterpreter extends Interpreter {
static final String IGNITE_CFG_URL = "ignite.config.url";
static {
Interpreter.register(
"ignite",
"ignite",
IgniteInterpreter.class.getName(),
true,
new InterpreterPropertyBuilder()
.add(IGNITE_ADDRESSES, "127.0.0.1:47500..47509",
"Coma separated list of addresses "
+ "(e.g. 127.0.0.1:47500 or 127.0.0.1:47500..47509)")
.add(IGNITE_CLIENT_MODE, "true", "Client mode. true or false")
.add(IGNITE_CFG_URL, "", "Configuration URL. Overrides all other settings.")
.add(IGNITE_PEER_CLASS_LOADING_ENABLED, "true",
"Peer class loading enabled. true or false")
.build());
}
private Logger logger = LoggerFactory.getLogger(IgniteInterpreter.class);
private Ignite ignite;
private ByteArrayOutputStream out;

View file

@ -57,17 +57,6 @@ public class IgniteSqlInterpreter extends Interpreter {
static final String IGNITE_JDBC_URL = "ignite.jdbc.url";
static {
Interpreter.register(
"ignitesql",
"ignite",
IgniteSqlInterpreter.class.getName(),
new InterpreterPropertyBuilder()
.add(IGNITE_JDBC_URL,
"jdbc:ignite:cfg://default-ignite-jdbc.xml", "Ignite JDBC connection URL.")
.build());
}
private Logger logger = LoggerFactory.getLogger(IgniteSqlInterpreter.class);
private Connection conn;

View file

@ -0,0 +1,46 @@
[
{
"group": "ignite",
"name": "ignite",
"className": "org.apache.zeppelin.ignite.IgniteInterpreter",
"properties": {
"ignite.addresses": {
"envName": null,
"propertyName": "ignite.addresses",
"defaultValue": "127.0.0.1:47500..47509",
"description": "Comma separated list of addresses (e.g. 127.0.0.1:47500 or 127.0.0.1:47500..47509)"
},
"ignite.clientMode": {
"envName": null,
"propertyName": "ignite.clientMode",
"defaultValue": "true",
"description": "Client mode. true or false"
},
"ignite.config.url": {
"envName": null,
"propertyName": "ignite.config.url",
"defaultValue": "",
"description": "Configuration URL. Overrides all other settings."
},
"ignite.peerClassLoadingEnabled": {
"envName": null,
"propertyName": "ignite.peerClassLoadingEnabled",
"defaultValue": "true",
"description": "Peer class loading enabled. True or false"
}
}
},
{
"group": "ignite",
"name": "ignitesql",
"className": "org.apache.zeppelin.ignite.IgniteSqlInterpreter",
"properties": {
"ignite.jdbc.url": {
"envName": null,
"propertyName": "ignite.jdbc.url",
"defaultValue": "jdbc:ignite:cfg://default-ignite-jdbc.xml",
"description": "Ignite JDBC connection URL."
}
}
}
]

View file

@ -64,6 +64,7 @@ public class IgniteInterpreterTest {
props.setProperty(IgniteSqlInterpreter.IGNITE_JDBC_URL, "jdbc:ignite:cfg://cache=person@default-ignite-jdbc.xml");
props.setProperty(IgniteInterpreter.IGNITE_CLIENT_MODE, "false");
props.setProperty(IgniteInterpreter.IGNITE_PEER_CLASS_LOADING_ENABLED, "false");
props.setProperty(IgniteInterpreter.IGNITE_ADDRESSES, HOST);
intp = new IgniteInterpreter(props);
intp.open();

View file

@ -15,12 +15,26 @@
package org.apache.zeppelin.jdbc;
import static org.apache.commons.lang.StringUtils.containsIgnoreCase;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.io.ByteArrayOutputStream;
import java.io.PrintStream;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.security.PrivilegedExceptionAction;
import java.sql.*;
import java.util.*;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import org.apache.commons.dbcp2.ConnectionFactory;
import org.apache.commons.dbcp2.DriverManagerConnectionFactory;
@ -30,10 +44,7 @@ import org.apache.commons.lang3.StringUtils;
import org.apache.commons.pool2.ObjectPool;
import org.apache.commons.pool2.impl.GenericObjectPool;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.zeppelin.interpreter.Interpreter;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterException;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.*;
import org.apache.zeppelin.interpreter.InterpreterResult.Code;
import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion;
import org.apache.zeppelin.jdbc.security.JDBCSecurityImpl;
@ -80,7 +91,7 @@ public class JDBCInterpreter extends Interpreter {
static final String JDBC_DEFAULT_PASSWORD_KEY = "default.password";
static final String COMMON_KEY = "common";
static final String MAX_LINE_KEY = "max_count";
static final String MAX_LINE_DEFAULT = "1000";
static final int MAX_LINE_DEFAULT = 1000;
static final String DEFAULT_KEY = "default";
static final String DRIVER_KEY = "driver";
@ -121,12 +132,14 @@ public class JDBCInterpreter extends Interpreter {
};
private static final List<InterpreterCompletion> NO_COMPLETION = new ArrayList<>();
private int maxLineResults;
public JDBCInterpreter(Properties property) {
super(property);
jdbcUserConfigurationsMap = new HashMap<>();
propertyKeySqlCompleterMap = new HashMap<>();
basePropretiesMap = new HashMap<>();
maxLineResults = MAX_LINE_DEFAULT;
}
public HashMap<String, Properties> getPropertiesMap() {
@ -146,9 +159,9 @@ public class JDBCInterpreter extends Interpreter {
prefixProperties = basePropretiesMap.get(keyValue[0]);
} else {
prefixProperties = new Properties();
basePropretiesMap.put(keyValue[0], prefixProperties);
basePropretiesMap.put(keyValue[0].trim(), prefixProperties);
}
prefixProperties.put(keyValue[1], property.getProperty(propertyKey));
prefixProperties.put(keyValue[1].trim(), property.getProperty(propertyKey));
}
}
@ -175,6 +188,14 @@ public class JDBCInterpreter extends Interpreter {
for (String propertyKey : basePropretiesMap.keySet()) {
propertyKeySqlCompleterMap.put(propertyKey, createSqlCompleter(null));
}
setMaxLineResults();
}
private void setMaxLineResults() {
if (basePropretiesMap.containsKey(COMMON_KEY) &&
basePropretiesMap.get(COMMON_KEY).containsKey(MAX_LINE_KEY)) {
maxLineResults = Integer.valueOf(basePropretiesMap.get(COMMON_KEY).getProperty(MAX_LINE_KEY));
}
}
private SqlCompleter createSqlCompleter(Connection jdbcConnection) {
@ -433,6 +454,57 @@ public class JDBCInterpreter extends Interpreter {
return updatedCount < 0 && columnCount <= 0 ? true : false;
}
/*
inspired from https://github.com/postgres/pgadmin3/blob/794527d97e2e3b01399954f3b79c8e2585b908dd/
pgadmin/dlg/dlgProperty.cpp#L999-L1045
*/
protected ArrayList<String> splitSqlQueries(String sql) {
ArrayList<String> queries = new ArrayList<>();
StringBuilder query = new StringBuilder();
Character character;
Boolean antiSlash = false;
Boolean quoteString = false;
Boolean doubleQuoteString = false;
for (int item = 0; item < sql.length(); item++) {
character = sql.charAt(item);
if (character.equals('\\')) {
antiSlash = true;
}
if (character.equals('\'')) {
if (antiSlash) {
antiSlash = false;
} else if (quoteString) {
quoteString = false;
} else if (!doubleQuoteString) {
quoteString = true;
}
}
if (character.equals('"')) {
if (antiSlash) {
antiSlash = false;
} else if (doubleQuoteString) {
doubleQuoteString = false;
} else if (!quoteString) {
doubleQuoteString = true;
}
}
if (character.equals(';') && !antiSlash && !quoteString && !doubleQuoteString) {
queries.add(query.toString());
query = new StringBuilder();
} else if (item == sql.length() - 1) {
query.append(character);
queries.add(query.toString());
} else {
query.append(character);
}
}
return queries;
}
private InterpreterResult executeSql(String propertyKey, String sql,
InterpreterContext interpreterContext) {
Connection connection;
@ -441,60 +513,68 @@ public class JDBCInterpreter extends Interpreter {
String paragraphId = interpreterContext.getParagraphId();
String user = interpreterContext.getAuthenticationInfo().getUser();
try {
String results = null;
connection = getConnection(propertyKey, interpreterContext);
InterpreterResult interpreterResult = new InterpreterResult(InterpreterResult.Code.SUCCESS);
try {
connection = getConnection(propertyKey, interpreterContext);
if (connection == null) {
return new InterpreterResult(Code.ERROR, "Prefix not found.");
}
statement = connection.createStatement();
if (statement == null) {
return new InterpreterResult(Code.ERROR, "Prefix not found.");
}
ArrayList<String> multipleSqlArray = splitSqlQueries(sql);
for (int i = 0; i < multipleSqlArray.size(); i++) {
String sqlToExecute = multipleSqlArray.get(i);
statement = connection.createStatement();
if (statement == null) {
return new InterpreterResult(Code.ERROR, "Prefix not found.");
}
try {
getJDBCConfiguration(user).saveStatement(paragraphId, statement);
try {
getJDBCConfiguration(user).saveStatement(paragraphId, statement);
boolean isResultSetAvailable = statement.execute(sql);
if (isResultSetAvailable) {
resultSet = statement.getResultSet();
boolean isResultSetAvailable = statement.execute(sqlToExecute);
if (isResultSetAvailable) {
resultSet = statement.getResultSet();
// Regards that the command is DDL.
if (isDDLCommand(statement.getUpdateCount(), resultSet.getMetaData().getColumnCount())) {
results = "Query executed successfully.";
// Regards that the command is DDL.
if (isDDLCommand(statement.getUpdateCount(),
resultSet.getMetaData().getColumnCount())) {
interpreterResult.add(InterpreterResult.Type.TEXT,
"Query executed successfully.");
} else {
interpreterResult.add(
getResults(resultSet, !containsIgnoreCase(sqlToExecute, EXPLAIN_PREDICATE)));
}
} else {
results = getResults(resultSet, !containsIgnoreCase(sql, EXPLAIN_PREDICATE));
// Response contains either an update count or there are no results.
int updateCount = statement.getUpdateCount();
interpreterResult.add(InterpreterResult.Type.TEXT,
"Query executed successfully. Affected rows : " +
updateCount);
}
} finally {
if (resultSet != null) {
try {
resultSet.close();
} catch (SQLException e) { /*ignored*/ }
}
if (statement != null) {
try {
statement.close();
} catch (SQLException e) { /*ignored*/ }
}
} else {
// Response contains either an update count or there are no results.
int updateCount = statement.getUpdateCount();
results = "Query executed successfully. Affected rows : " + updateCount;
}
//In case user ran an insert/update/upsert statement
if (connection.getAutoCommit() != true) connection.commit();
} finally {
if (resultSet != null) {
try {
resultSet.close();
} catch (SQLException e) { /*ignored*/ }
}
if (statement != null) {
try {
statement.close();
} catch (SQLException e) { /*ignored*/ }
}
if (connection != null) {
try {
connection.close();
} catch (SQLException e) { /*ignored*/ }
}
getJDBCConfiguration(user).removeStatement(paragraphId);
}
return new InterpreterResult(Code.SUCCESS, results);
//In case user ran an insert/update/upsert statement
if (connection != null) {
try {
if (!connection.getAutoCommit()) {
connection.commit();
}
connection.close();
} catch (SQLException e) { /*ignored*/ }
}
getJDBCConfiguration(user).removeStatement(paragraphId);
} catch (Exception e) {
logger.error("Cannot run " + sql, e);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
@ -507,9 +587,10 @@ public class JDBCInterpreter extends Interpreter {
} catch (SQLException e1) {
e1.printStackTrace();
}
return new InterpreterResult(Code.ERROR, errorMsg);
interpreterResult.add(errorMsg);
return new InterpreterResult(Code.ERROR, interpreterResult.message());
}
return interpreterResult;
}
/**
@ -600,8 +681,7 @@ public class JDBCInterpreter extends Interpreter {
}
public int getMaxResult() {
return Integer.valueOf(
basePropretiesMap.get(COMMON_KEY).getProperty(MAX_LINE_KEY, MAX_LINE_DEFAULT));
return maxLineResults;
}
boolean isConcurrentExecution() {

View file

@ -15,9 +15,6 @@
package org.apache.zeppelin.jdbc;
import static java.lang.String.format;
import static org.apache.zeppelin.interpreter.Interpreter.logger;
import static org.apache.zeppelin.interpreter.Interpreter.register;
import static org.apache.zeppelin.jdbc.JDBCInterpreter.DEFAULT_KEY;
import static org.apache.zeppelin.jdbc.JDBCInterpreter.DEFAULT_DRIVER;
import static org.apache.zeppelin.jdbc.JDBCInterpreter.DEFAULT_PASSWORD;
import static org.apache.zeppelin.jdbc.JDBCInterpreter.DEFAULT_USER;
@ -29,19 +26,17 @@ import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.sql.*;
import java.util.HashMap;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion;
import org.apache.zeppelin.jdbc.JDBCInterpreter;
import org.apache.zeppelin.scheduler.FIFOScheduler;
import org.apache.zeppelin.scheduler.ParallelScheduler;
import org.apache.zeppelin.scheduler.Scheduler;
import org.apache.zeppelin.user.AuthenticationInfo;
import org.apache.zeppelin.user.Credentials;
import org.apache.zeppelin.user.UserCredentials;
import org.apache.zeppelin.user.UsernamePassword;
import org.junit.Before;
@ -171,6 +166,49 @@ public class JDBCInterpreterTest extends BasicJDBCTestCaseAdapter {
assertEquals("ID\tNAME\na\ta_name\nb\tb_name\n", interpreterResult.message().get(0).getData());
}
@Test
public void testSplitSqlQuery() throws SQLException, IOException {
String sqlQuery = "insert into test_table(id, name) values ('a', ';\"');" +
"select * from test_table;" +
"select * from test_table WHERE ID = \";'\";" +
"select * from test_table WHERE ID = ';'";
Properties properties = new Properties();
JDBCInterpreter t = new JDBCInterpreter(properties);
t.open();
ArrayList<String> multipleSqlArray = t.splitSqlQueries(sqlQuery);
assertEquals(4, multipleSqlArray.size());
assertEquals("insert into test_table(id, name) values ('a', ';\"')", multipleSqlArray.get(0));
assertEquals("select * from test_table", multipleSqlArray.get(1));
assertEquals("select * from test_table WHERE ID = \";'\"", multipleSqlArray.get(2));
assertEquals("select * from test_table WHERE ID = ';'", multipleSqlArray.get(3));
}
@Test
public void testSelectMultipleQuries() throws SQLException, IOException {
Properties properties = new Properties();
properties.setProperty("common.max_count", "1000");
properties.setProperty("common.max_retry", "3");
properties.setProperty("default.driver", "org.h2.Driver");
properties.setProperty("default.url", getJdbcConnection());
properties.setProperty("default.user", "");
properties.setProperty("default.password", "");
JDBCInterpreter t = new JDBCInterpreter(properties);
t.open();
String sqlQuery = "select * from test_table;" +
"select * from test_table WHERE ID = ';';";
InterpreterResult interpreterResult = t.interpret(sqlQuery, interpreterContext);
assertEquals(InterpreterResult.Code.SUCCESS, interpreterResult.code());
assertEquals(2, interpreterResult.message().size());
assertEquals(InterpreterResult.Type.TABLE, interpreterResult.message().get(0).getType());
assertEquals("ID\tNAME\na\ta_name\nb\tb_name\nc\tnull\n", interpreterResult.message().get(0).getData());
assertEquals(InterpreterResult.Type.TABLE, interpreterResult.message().get(1).getType());
assertEquals("ID\tNAME\n", interpreterResult.message().get(1).getData());
}
@Test
public void testSelectQueryWithNull() throws SQLException, IOException {
Properties properties = new Properties();

17
livy/README.md Normal file
View file

@ -0,0 +1,17 @@
# Overview
Livy interpreter for Apache Zeppelin
# Prerequisities
You can follow the instructions at [Livy Quick Start](http://livy.io/quickstart.html) to set up livy.
# Run Integration Tests
You can add integration test to [LivyInterpreter.java](https://github.com/apache/zeppelin/blob/master/livy/src/test/java/org/apache/zeppelin/livy/LivyInterpreterIT.java)
Either you can run the integration test on travis where enviroment will be setup or you can run it in local. You need to download livy-0.2 and spark-1.5.2 to local, then use the following
script to run the integration test.
```bash
#!/usr/bin/env bash
export LIVY_HOME=<path_of_livy_0.2.0>
export SPARK_HOME=<path_of_spark-1.5.2>
mvn clean verify -pl livy -DfailIfNoTests=false -DskipRat
```

View file

@ -17,15 +17,26 @@
package org.apache.zeppelin.livy;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import com.google.gson.annotations.SerializedName;
import org.apache.commons.lang3.StringUtils;
import org.apache.zeppelin.interpreter.Interpreter;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.InterpreterUtils;
import org.apache.zeppelin.interpreter.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.http.HttpEntity;
import org.springframework.http.HttpHeaders;
import org.springframework.http.HttpMethod;
import org.springframework.http.ResponseEntity;
import org.springframework.security.kerberos.client.KerberosRestTemplate;
import org.springframework.web.client.HttpClientErrorException;
import org.springframework.web.client.RestTemplate;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.atomic.AtomicBoolean;
/**
* Base class for livy interpreters.
@ -33,76 +44,85 @@ import java.util.Properties;
public abstract class BaseLivyInterprereter extends Interpreter {
protected static final Logger LOGGER = LoggerFactory.getLogger(BaseLivyInterprereter.class);
private static Gson gson = new GsonBuilder().setPrettyPrinting().disableHtmlEscaping().create();
// -1 means session is not created yet, valid sessionId start from 0
protected int sessionId = -1;
protected String appId;
protected String webUIAddress;
protected volatile SessionInfo sessionInfo;
private String livyURL;
private long sessionCreationTimeout;
protected boolean displayAppInfo;
protected LivyOutputStream out;
protected LivyHelper livyHelper;
private AtomicBoolean sessionExpired = new AtomicBoolean(false);
public BaseLivyInterprereter(Properties property) {
super(property);
this.out = new LivyOutputStream();
this.livyHelper = new LivyHelper(property);
this.livyURL = property.getProperty("zeppelin.livy.url");
this.sessionCreationTimeout = Long.parseLong(
property.getProperty("zeppelin.livy.create.session.timeout", 120 + ""));
}
public abstract String getSessionKind();
@Override
public void open() {
// TODO(zjffdu) move session creation here.
try {
initLivySession();
} catch (LivyException e) {
String msg = "Fail to create session, please check livy interpreter log and " +
"livy server log";
LOGGER.error(msg);
throw new RuntimeException(msg, e);
}
}
@Override
public void close() {
if (sessionId != -1) {
livyHelper.closeSession(sessionId);
// reset sessionId to -1
sessionId = -1;
if (sessionInfo != null) {
closeSession(sessionInfo.id);
// reset sessionInfo to null so that we won't close it twice.
sessionInfo = null;
}
}
protected void createSession(InterpreterContext context) throws Exception {
sessionId = livyHelper.createSession(context, getSessionKind());
protected void initLivySession() throws LivyException {
this.sessionInfo = createSession(getUserName(), getSessionKind());
if (displayAppInfo) {
this.appId = extractStatementResult(
livyHelper.interpret("sc.applicationId", context, sessionId).message().get(0).getData());
livyHelper.interpret(
if (sessionInfo.appId == null) {
// livy 0.2 don't return appId and sparkUiUrl in response so that we need to get it
// explicitly by ourselves.
sessionInfo.appId = extractStatementResult(
interpret("sc.applicationId", false, false).message()
.get(0).getData());
}
interpret(
"val webui=sc.getClass.getMethod(\"ui\").invoke(sc).asInstanceOf[Some[_]].get",
context, sessionId);
this.webUIAddress = extractStatementResult(
livyHelper.interpret(
"webui.getClass.getMethod(\"appUIAddress\").invoke(webui)",
context, sessionId).message().get(0).getData());
LOGGER.info("Create livy session with sessionId: {}, appId: {}, webUI: {}",
sessionId, appId, webUIAddress);
false, false);
if (StringUtils.isEmpty(sessionInfo.appInfo.get("sparkUiUrl"))) {
sessionInfo.webUIAddress = extractStatementResult(
interpret(
"webui.getClass.getMethod(\"appUIAddress\").invoke(webui)", false, false)
.message().get(0).getData());
} else {
sessionInfo.webUIAddress = sessionInfo.appInfo.get("sparkUiUrl");
}
LOGGER.info("Create livy session successfully with sessionId: {}, appId: {}, webUI: {}",
sessionInfo.id, sessionInfo.appId, sessionInfo.webUIAddress);
}
}
public SessionInfo getSessionInfo() {
return sessionInfo;
}
@Override
public InterpreterResult interpret(String st, InterpreterContext context) {
try {
// add synchronized, because LivySparkSQLInterperter will use ParallelScheduler
synchronized (this) {
if (sessionId == -1) {
try {
createSession(context);
} catch (Exception e) {
LOGGER.error("Exception while creating livy session", e);
return new InterpreterResult(InterpreterResult.Code.ERROR, e.getMessage());
}
}
}
if (StringUtils.isEmpty(st)) {
return new InterpreterResult(InterpreterResult.Code.SUCCESS, "");
}
if (StringUtils.isEmpty(st)) {
return new InterpreterResult(InterpreterResult.Code.SUCCESS, "");
}
return livyHelper.interpretInput(st, context, sessionId, out,
appId, webUIAddress, displayAppInfo);
} catch (Exception e) {
LOGGER.error("Exception in LivyInterpreter.", e);
try {
return interpret(st, this.displayAppInfo, true);
} catch (LivyException e) {
LOGGER.error("Fail to interpret:" + st, e);
return new InterpreterResult(InterpreterResult.Code.ERROR,
InterpreterUtils.getMostRelevantMessage(e));
}
@ -116,7 +136,7 @@ public abstract class BaseLivyInterprereter extends Interpreter {
* @param result
* @return
*/
private static String extractStatementResult(String result) {
private String extractStatementResult(String result) {
int pos = -1;
if ((pos = result.indexOf("=")) >= 0) {
return result.substring(pos + 1).trim();
@ -128,7 +148,7 @@ public abstract class BaseLivyInterprereter extends Interpreter {
@Override
public void cancel(InterpreterContext context) {
livyHelper.cancelHTTP(context.getParagraphId());
//TODO(zjffdu). Use livy cancel api which is available in livy 0.3
}
@Override
@ -140,4 +160,387 @@ public abstract class BaseLivyInterprereter extends Interpreter {
public int getProgress(InterpreterContext context) {
return 0;
}
private SessionInfo createSession(String user, String kind)
throws LivyException {
try {
Map<String, String> conf = new HashMap<>();
for (Map.Entry<Object, Object> entry : property.entrySet()) {
if (entry.getKey().toString().startsWith("livy.spark.") &&
!entry.getValue().toString().isEmpty())
conf.put(entry.getKey().toString().substring(5), entry.getValue().toString());
}
CreateSessionRequest request = new CreateSessionRequest(kind, user, conf);
SessionInfo sessionInfo = SessionInfo.fromJson(
callRestAPI("/sessions", "POST", request.toJson()));
long start = System.currentTimeMillis();
// pull the session status until it is idle or timeout
while (!sessionInfo.isReady()) {
LOGGER.info("Session {} is in state {}, appId {}", sessionInfo.id, sessionInfo.state,
sessionInfo.appId);
if (sessionInfo.isFinished()) {
String msg = "Session " + sessionInfo.id + " is finished, appId: " + sessionInfo.appId
+ ", log: " + sessionInfo.log;
LOGGER.error(msg);
throw new LivyException(msg);
}
if ((System.currentTimeMillis() - start) / 1000 > sessionCreationTimeout) {
String msg = "The creation of session " + sessionInfo.id + " is timeout within "
+ sessionCreationTimeout + " seconds, appId: " + sessionInfo.appId
+ ", log: " + sessionInfo.log;
LOGGER.error(msg);
throw new LivyException(msg);
}
Thread.sleep(1000);
sessionInfo = getSessionInfo(sessionInfo.id);
}
return sessionInfo;
} catch (Exception e) {
LOGGER.error("Error when creating livy session for user " + user, e);
throw new LivyException(e);
}
}
private SessionInfo getSessionInfo(int sessionId) throws LivyException {
return SessionInfo.fromJson(callRestAPI("/sessions/" + sessionId, "GET"));
}
public InterpreterResult interpret(String code, boolean displayAppInfo,
boolean appendSessionExpired)
throws LivyException {
StatementInfo stmtInfo = null;
boolean sessionExpired = false;
try {
stmtInfo = executeStatement(new ExecuteRequest(code));
} catch (SessionNotFoundException e) {
LOGGER.warn("Livy session {} is expired, new session will be created.", sessionInfo.id);
sessionExpired = true;
// we don't want to create multiple sessions because it is possible to have multiple thread
// to call this method, like LivySparkSQLInterpreter which use ParallelScheduler. So we need
// to check session status again in this sync block
synchronized (this) {
if (isSessionExpired()) {
initLivySession();
}
}
stmtInfo = executeStatement(new ExecuteRequest(code));
}
// pull the statement status
while (!stmtInfo.isAvailable()) {
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
LOGGER.error("InterruptedException when pulling statement status.", e);
throw new LivyException(e);
}
stmtInfo = getStatementInfo(stmtInfo.id);
}
if (appendSessionExpired) {
return appendSessionExpire(getResultFromStatementInfo(stmtInfo, displayAppInfo),
sessionExpired);
} else {
return getResultFromStatementInfo(stmtInfo, displayAppInfo);
}
}
private boolean isSessionExpired() throws LivyException {
try {
getSessionInfo(sessionInfo.id);
return false;
} catch (SessionNotFoundException e) {
return true;
} catch (LivyException e) {
throw e;
}
}
private InterpreterResult appendSessionExpire(InterpreterResult result, boolean sessionExpired) {
if (sessionExpired) {
InterpreterResult result2 = new InterpreterResult(result.code());
result2.add(InterpreterResult.Type.HTML,
"<font color=\"red\">Previous livy session is expired, new livy session is created. " +
"Paragraphs that depend on this paragraph need to be re-executed!" + "</font>");
for (InterpreterResultMessage message : result.message()) {
result2.add(message.getType(), message.getData());
}
return result2;
} else {
return result;
}
}
private InterpreterResult getResultFromStatementInfo(StatementInfo stmtInfo,
boolean displayAppInfo) {
if (stmtInfo.output.isError()) {
return new InterpreterResult(InterpreterResult.Code.ERROR, stmtInfo.output.evalue);
} else {
//TODO(zjffdu) support other types of data (like json, image and etc)
String result = stmtInfo.output.data.plain_text;
// check table magic result first
if (stmtInfo.output.data.application_livy_table_json != null) {
StringBuilder outputBuilder = new StringBuilder();
boolean notFirstColumn = false;
for (Map header : stmtInfo.output.data.application_livy_table_json.headers) {
if (notFirstColumn) {
outputBuilder.append("\t");
}
outputBuilder.append(header.get("name"));
notFirstColumn = true;
}
outputBuilder.append("\n");
for (List<Object> row : stmtInfo.output.data.application_livy_table_json.records) {
outputBuilder.append(StringUtils.join(row, "\t"));
outputBuilder.append("\n");
}
return new InterpreterResult(InterpreterResult.Code.SUCCESS,
InterpreterResult.Type.TABLE, outputBuilder.toString());
} else if (stmtInfo.output.data.image_png != null) {
return new InterpreterResult(InterpreterResult.Code.SUCCESS,
InterpreterResult.Type.IMG, (String) stmtInfo.output.data.image_png);
} else if (result != null) {
result = result.trim();
if (result.startsWith("<link")
|| result.startsWith("<script")
|| result.startsWith("<style")
|| result.startsWith("<div")) {
result = "%html " + result;
}
}
if (displayAppInfo) {
//TODO(zjffdu), use multiple InterpreterResult to display appInfo
StringBuilder outputBuilder = new StringBuilder();
outputBuilder.append("%angular ");
outputBuilder.append("<pre><code>");
outputBuilder.append(result);
outputBuilder.append("</code></pre>");
outputBuilder.append("<hr/>");
outputBuilder.append("Spark Application Id:" + sessionInfo.appId + "<br/>");
outputBuilder.append("Spark WebUI: <a href=" + sessionInfo.webUIAddress + ">"
+ sessionInfo.webUIAddress + "</a>");
return new InterpreterResult(InterpreterResult.Code.SUCCESS, outputBuilder.toString());
} else {
return new InterpreterResult(InterpreterResult.Code.SUCCESS, result);
}
}
}
private StatementInfo executeStatement(ExecuteRequest executeRequest)
throws LivyException {
return StatementInfo.fromJson(callRestAPI("/sessions/" + sessionInfo.id + "/statements", "POST",
executeRequest.toJson()));
}
private StatementInfo getStatementInfo(int statementId)
throws LivyException {
return StatementInfo.fromJson(
callRestAPI("/sessions/" + sessionInfo.id + "/statements/" + statementId, "GET"));
}
private RestTemplate getRestTemplate() {
String keytabLocation = property.getProperty("zeppelin.livy.keytab");
String principal = property.getProperty("zeppelin.livy.principal");
if (StringUtils.isNotEmpty(keytabLocation) && StringUtils.isNotEmpty(principal)) {
return new KerberosRestTemplate(keytabLocation, principal);
}
return new RestTemplate();
}
private String callRestAPI(String targetURL, String method) throws LivyException {
return callRestAPI(targetURL, method, "");
}
private String callRestAPI(String targetURL, String method, String jsonData)
throws LivyException {
targetURL = livyURL + targetURL;
LOGGER.debug("Call rest api in {}, method: {}, jsonData: {}", targetURL, method, jsonData);
RestTemplate restTemplate = getRestTemplate();
HttpHeaders headers = new HttpHeaders();
headers.add("Content-Type", "application/json");
headers.add("X-Requested-By", "zeppelin");
ResponseEntity<String> response = null;
try {
if (method.equals("POST")) {
HttpEntity<String> entity = new HttpEntity<>(jsonData, headers);
response = restTemplate.exchange(targetURL, HttpMethod.POST, entity, String.class);
} else if (method.equals("GET")) {
HttpEntity<String> entity = new HttpEntity<>(headers);
response = restTemplate.exchange(targetURL, HttpMethod.GET, entity, String.class);
} else if (method.equals("DELETE")) {
HttpEntity<String> entity = new HttpEntity<>(headers);
response = restTemplate.exchange(targetURL, HttpMethod.DELETE, entity, String.class);
}
} catch (HttpClientErrorException e) {
response = new ResponseEntity(e.getResponseBodyAsString(), e.getStatusCode());
LOGGER.error(String.format("Error with %s StatusCode: %s",
response.getStatusCode().value(), e.getResponseBodyAsString()));
}
if (response == null) {
throw new LivyException("No http response returned");
}
LOGGER.debug("Get response, StatusCode: {}, responseBody: {}", response.getStatusCode(),
response.getBody());
if (response.getStatusCode().value() == 200
|| response.getStatusCode().value() == 201
|| response.getStatusCode().value() == 404) {
String responseBody = response.getBody();
if (responseBody.matches("\"Session '\\d+' not found.\"")) {
throw new SessionNotFoundException(responseBody);
} else {
return responseBody;
}
} else {
String responseString = response.getBody();
if (responseString.contains("CreateInteractiveRequest[\\\"master\\\"]")) {
return responseString;
}
LOGGER.error(String.format("Error with %s StatusCode: %s",
response.getStatusCode().value(), responseString));
throw new LivyException(String.format("Error with %s StatusCode: %s",
response.getStatusCode().value(), responseString));
}
}
private void closeSession(int sessionId) {
try {
callRestAPI("/sessions/" + sessionId, "DELETE");
} catch (Exception e) {
LOGGER.error(String.format("Error closing session for user with session ID: %s",
sessionId), e);
}
}
/*
* We create these POJO here to accommodate livy 0.3 which is not released yet. livy rest api has
* some changes from version to version. So we create these POJO in zeppelin side to accommodate
* incompatibility between versions. Later, when livy become more stable, we could just depend on
* livy client jar.
*/
private static class CreateSessionRequest {
public final String kind;
@SerializedName("proxyUser")
public final String user;
public final Map<String, String> conf;
public CreateSessionRequest(String kind, String user, Map<String, String> conf) {
this.kind = kind;
this.user = user;
this.conf = conf;
}
public String toJson() {
return gson.toJson(this);
}
}
/**
*
*/
public static class SessionInfo {
public final int id;
public String appId;
public String webUIAddress;
public final String owner;
public final String proxyUser;
public final String state;
public final String kind;
public final Map<String, String> appInfo;
public final List<String> log;
public SessionInfo(int id, String appId, String owner, String proxyUser, String state,
String kind, Map<String, String> appInfo, List<String> log) {
this.id = id;
this.appId = appId;
this.owner = owner;
this.proxyUser = proxyUser;
this.state = state;
this.kind = kind;
this.appInfo = appInfo;
this.log = log;
}
public boolean isReady() {
return state.equals("idle");
}
public boolean isFinished() {
return state.equals("error") || state.equals("dead") || state.equals("success");
}
public static SessionInfo fromJson(String json) {
return gson.fromJson(json, SessionInfo.class);
}
}
private static class ExecuteRequest {
public final String code;
public ExecuteRequest(String code) {
this.code = code;
}
public String toJson() {
return gson.toJson(this);
}
}
private static class StatementInfo {
public Integer id;
public String state;
public StatementOutput output;
public StatementInfo() {
}
public static StatementInfo fromJson(String json) {
return gson.fromJson(json, StatementInfo.class);
}
public boolean isAvailable() {
return state.equals("available");
}
private static class StatementOutput {
public String status;
public String execution_count;
public Data data;
public String ename;
public String evalue;
public Object traceback;
public TableMagic tableMagic;
public boolean isError() {
return status.equals("error");
}
public String toJson() {
return gson.toJson(this);
}
private static class Data {
@SerializedName("text/plain")
public String plain_text;
@SerializedName("image/png")
public String image_png;
@SerializedName("application/json")
public String application_json;
@SerializedName("application/vnd.livy.table.v1+json")
public TableMagic application_livy_table_json;
}
private static class TableMagic {
@SerializedName("headers")
List<Map> headers;
@SerializedName("data")
List<List> records;
}
}
}
}

View file

@ -0,0 +1,43 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.livy;
/**
* Livy api related exception
*/
public class LivyException extends Exception {
public LivyException() {
}
public LivyException(String message) {
super(message);
}
public LivyException(String message, Throwable cause) {
super(message, cause);
}
public LivyException(Throwable cause) {
super(cause);
}
public LivyException(String message, Throwable cause, boolean enableSuppression,
boolean writableStackTrace) {
super(message, cause, enableSuppression, writableStackTrace);
}
}

View file

@ -1,406 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.livy;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import com.google.gson.reflect.TypeToken;
import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.InterpreterResult.Code;
import org.apache.zeppelin.interpreter.InterpreterUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.http.HttpEntity;
import org.springframework.http.HttpHeaders;
import org.springframework.http.HttpMethod;
import org.springframework.http.ResponseEntity;
import org.springframework.security.kerberos.client.KerberosRestTemplate;
import org.springframework.web.client.HttpClientErrorException;
import org.springframework.web.client.RestTemplate;
import java.nio.charset.Charset;
import java.util.*;
import java.util.Map.Entry;
/***
* Livy helper class
*/
public class LivyHelper {
Logger LOGGER = LoggerFactory.getLogger(LivyHelper.class);
Gson gson = new GsonBuilder().setPrettyPrinting().create();
HashMap<String, Object> paragraphHttpMap = new HashMap<>();
Properties property;
LivyHelper(Properties property) {
this.property = property;
}
public Integer createSession(InterpreterContext context, String kind) throws Exception {
try {
Map<String, String> conf = new HashMap<>();
Iterator<Entry<Object, Object>> it = property.entrySet().iterator();
while (it.hasNext()) {
Entry<Object, Object> pair = it.next();
if (pair.getKey().toString().startsWith("livy.spark.") &&
!pair.getValue().toString().isEmpty())
conf.put(pair.getKey().toString().substring(5), pair.getValue().toString());
}
String confData = gson.toJson(conf);
String user = context.getAuthenticationInfo().getUser();
String json = executeHTTP(property.getProperty("zeppelin.livy.url") + "/sessions", "POST",
"{" +
"\"kind\": \"" + kind + "\", " +
"\"conf\": " + confData + ", " +
"\"proxyUser\": " + (StringUtils.isEmpty(user) ? null : "\"" + user + "\"") +
"}",
context.getParagraphId()
);
Map jsonMap = (Map<Object, Object>) gson.fromJson(json,
new TypeToken<Map<Object, Object>>() {
}.getType());
Integer sessionId = ((Double) jsonMap.get("id")).intValue();
if (!jsonMap.get("state").equals("idle")) {
Integer retryCount = 60;
try {
retryCount = Integer.valueOf(
property.getProperty("zeppelin.livy.create.session.retries"));
} catch (Exception e) {
LOGGER.info("zeppelin.livy.create.session.retries property is not configured." +
" Using default retry count.");
}
while (retryCount >= 0) {
LOGGER.error(String.format("sessionId:%s state is %s",
jsonMap.get("id"), jsonMap.get("state")));
Thread.sleep(1000);
json = executeHTTP(property.getProperty("zeppelin.livy.url") + "/sessions/" +
sessionId, "GET", null, context.getParagraphId());
jsonMap = (Map<Object, Object>) gson.fromJson(json,
new TypeToken<Map<Object, Object>>() {
}.getType());
if (jsonMap.get("state").equals("idle")) {
break;
} else if (jsonMap.get("state").equals("error") || jsonMap.get("state").equals("dead")) {
json = executeHTTP(property.getProperty("zeppelin.livy.url") + "/sessions/" +
sessionId + "/log",
"GET", null,
context.getParagraphId());
jsonMap = (Map<Object, Object>) gson.fromJson(json,
new TypeToken<Map<Object, Object>>() {
}.getType());
String logs = StringUtils.join((ArrayList<String>) jsonMap.get("log"), '\n');
LOGGER.error(String.format("Cannot start %s.\n%s", kind, logs));
throw new Exception(String.format("Cannot start %s.\n%s", kind, logs));
}
retryCount--;
}
if (retryCount <= 0) {
LOGGER.error("Error getting session for user within the configured number of retries.");
throw new Exception(String.format("Cannot start %s.", kind));
}
}
return sessionId;
} catch (Exception e) {
LOGGER.error("Error getting session for user", e);
throw e;
}
}
public InterpreterResult interpretInput(String stringLines,
final InterpreterContext context,
int sessionId,
LivyOutputStream out,
String appId,
String webUI,
boolean displayAppInfo) {
try {
out.setInterpreterOutput(context.out);
context.out.clear();
String incomplete = "";
boolean inComment = false;
String[] lines = stringLines.split("\n");
String[] linesToRun = new String[lines.length + 1];
for (int i = 0; i < lines.length; i++) {
linesToRun[i] = lines[i];
}
linesToRun[lines.length] = "print(\"\")";
Code r = null;
StringBuilder outputBuilder = new StringBuilder();
for (int l = 0; l < linesToRun.length; l++) {
String s = linesToRun[l];
// check if next line starts with "." (but not ".." or "./") it is treated as an invocation
//for spark
if (l + 1 < linesToRun.length) {
String nextLine = linesToRun[l + 1].trim();
boolean continuation = false;
if (nextLine.isEmpty()
|| nextLine.startsWith("//") // skip empty line or comment
|| nextLine.startsWith("}")
|| nextLine.startsWith("object")) { // include "} object" for Scala companion object
continuation = true;
} else if (!inComment && nextLine.startsWith("/*")) {
inComment = true;
continuation = true;
} else if (inComment && nextLine.lastIndexOf("*/") >= 0) {
inComment = false;
continuation = true;
} else if (nextLine.length() > 1
&& nextLine.charAt(0) == '.'
&& nextLine.charAt(1) != '.' // ".."
&& nextLine.charAt(1) != '/') { // "./"
continuation = true;
} else if (inComment) {
continuation = true;
}
if (continuation) {
incomplete += s + "\n";
continue;
}
}
InterpreterResult res;
try {
res = interpret(incomplete + s, context, sessionId);
} catch (Exception e) {
LOGGER.error("Interpreter exception", e);
return new InterpreterResult(Code.ERROR, InterpreterUtils.getMostRelevantMessage(e));
}
r = res.code();
if (r == Code.ERROR) {
out.setInterpreterOutput(null);
return res;
} else if (r == Code.INCOMPLETE) {
incomplete += s + "\n";
} else {
outputBuilder.append(res.message() + "\n");
incomplete = "";
}
}
if (r == Code.INCOMPLETE) {
out.setInterpreterOutput(null);
return new InterpreterResult(r, "Incomplete expression");
} else {
if (displayAppInfo) {
out.write("%angular ");
out.write("<pre><code>");
out.write(outputBuilder.toString());
out.write("</code></pre>");
out.write("<hr/>");
out.write("Spark Application Id:" + appId + "<br/>");
out.write("Spark WebUI: <a href=" + webUI + ">" + webUI + "</a>");
} else {
out.write(outputBuilder.toString());
}
out.setInterpreterOutput(null);
return new InterpreterResult(Code.SUCCESS);
}
} catch (Exception e) {
LOGGER.error("error in interpretInput", e);
return new InterpreterResult(Code.ERROR, e.getMessage());
}
}
public InterpreterResult interpret(String stringLines,
final InterpreterContext context,
int sessionId)
throws Exception {
if (stringLines.trim().equals("")) {
return new InterpreterResult(Code.SUCCESS, "");
}
Map jsonMap = executeCommand(stringLines, context, sessionId);
Integer id = ((Double) jsonMap.get("id")).intValue();
InterpreterResult res = getResultFromMap(jsonMap);
if (res != null) {
return res;
}
while (true) {
Thread.sleep(1000);
if (paragraphHttpMap.get(context.getParagraphId()) == null) {
return new InterpreterResult(Code.INCOMPLETE, "");
}
jsonMap = getStatusById(context, sessionId, id);
InterpreterResult interpreterResult = getResultFromMap(jsonMap);
if (interpreterResult != null) {
return interpreterResult;
}
}
}
private InterpreterResult getResultFromMap(Map jsonMap) {
if (jsonMap.get("state").equals("available")) {
if (((Map) jsonMap.get("output")).get("status").equals("error")) {
StringBuilder errorMessage = new StringBuilder((String) ((Map) jsonMap
.get("output")).get("evalue"));
if (errorMessage.toString().equals("incomplete statement")
|| errorMessage.toString().contains("EOF")) {
return new InterpreterResult(Code.INCOMPLETE, "");
}
String traceback = gson.toJson(((Map) jsonMap.get("output")).get("traceback"));
if (!traceback.equals("[]")) {
errorMessage
.append("\n")
.append("traceback: \n")
.append(traceback);
}
return new InterpreterResult(Code.ERROR, errorMessage.toString());
}
if (((Map) jsonMap.get("output")).get("status").equals("ok")) {
String result = (String) ((Map) ((Map) jsonMap.get("output"))
.get("data")).get("text/plain");
if (result != null) {
result = result.trim();
if (result.startsWith("<link")
|| result.startsWith("<script")
|| result.startsWith("<style")
|| result.startsWith("<div")) {
result = "%html " + result;
}
}
return new InterpreterResult(Code.SUCCESS, result);
}
}
return null;
}
private Map executeCommand(String lines, InterpreterContext context, int sessionId)
throws Exception {
String json = executeHTTP(property.get("zeppelin.livy.url") + "/sessions/"
+ sessionId + "/statements",
"POST",
"{\"code\": \"" + StringEscapeUtils.escapeJson(lines) + "\"}",
context.getParagraphId());
if (json.matches("^(\")?Session (\'[0-9]\' )?not found(.?\"?)$")) {
throw new Exception("Exception: Session not found, Livy server would have restarted, " +
"or lost session.");
}
try {
Map jsonMap = gson.fromJson(json,
new TypeToken<Map>() {
}.getType());
return jsonMap;
} catch (Exception e) {
LOGGER.error("Error executeCommand", e);
throw e;
}
}
private Map getStatusById(InterpreterContext context,
int sessionId, Integer id) throws Exception {
String json = executeHTTP(property.getProperty("zeppelin.livy.url") + "/sessions/"
+ sessionId
+ "/statements/" + id,
"GET", null, context.getParagraphId());
LOGGER.debug("statement {} response: {}", id, json);
try {
Map jsonMap = gson.fromJson(json,
new TypeToken<Map>() {
}.getType());
return jsonMap;
} catch (Exception e) {
LOGGER.error("Error getStatusById", e);
throw e;
}
}
private RestTemplate getRestTemplate() {
String keytabLocation = property.getProperty("zeppelin.livy.keytab");
String principal = property.getProperty("zeppelin.livy.principal");
if (StringUtils.isNotEmpty(keytabLocation) && StringUtils.isNotEmpty(principal)) {
return new KerberosRestTemplate(keytabLocation, principal);
}
return new RestTemplate();
}
protected String executeHTTP(String targetURL, String method, String jsonData, String paragraphId)
throws Exception {
LOGGER.debug("Call rest api in {}, method: {}, jsonData: {}", targetURL, method, jsonData);
RestTemplate restTemplate = getRestTemplate();
HttpHeaders headers = new HttpHeaders();
headers.add("Content-Type", "application/json");
headers.add("X-Requested-By", "zeppelin");
ResponseEntity<String> response = null;
try {
if (method.equals("POST")) {
HttpEntity<String> entity = new HttpEntity<>(jsonData, headers);
response = restTemplate.exchange(targetURL, HttpMethod.POST, entity, String.class);
paragraphHttpMap.put(paragraphId, response);
} else if (method.equals("GET")) {
HttpEntity<String> entity = new HttpEntity<>(headers);
response = restTemplate.exchange(targetURL, HttpMethod.GET, entity, String.class);
paragraphHttpMap.put(paragraphId, response);
} else if (method.equals("DELETE")) {
HttpEntity<String> entity = new HttpEntity<>(headers);
response = restTemplate.exchange(targetURL, HttpMethod.DELETE, entity, String.class);
}
} catch (HttpClientErrorException e) {
response = new ResponseEntity(e.getResponseBodyAsString(), e.getStatusCode());
LOGGER.error(String.format("Error with %s StatusCode: %s",
response.getStatusCode().value(), e.getResponseBodyAsString()));
}
if (response == null) {
return null;
}
if (response.getStatusCode().value() == 200
|| response.getStatusCode().value() == 201
|| response.getStatusCode().value() == 404) {
return response.getBody();
} else {
String responseString = response.getBody();
if (responseString.contains("CreateInteractiveRequest[\\\"master\\\"]")) {
return responseString;
}
LOGGER.error(String.format("Error with %s StatusCode: %s",
response.getStatusCode().value(), responseString));
throw new Exception(String.format("Error with %s StatusCode: %s",
response.getStatusCode().value(), responseString));
}
}
public void cancelHTTP(String paragraphId) {
// TODO(zjffdu), use cancel rest api of livy
paragraphHttpMap.put(paragraphId, null);
}
public void closeSession(int sessionId) {
try {
executeHTTP(property.getProperty("zeppelin.livy.url") + "/sessions/" + sessionId,
"DELETE", null, null);
} catch (Exception e) {
LOGGER.error(String.format("Error closing session for user with session ID: %s",
sessionId), e);
}
}
}

View file

@ -1,84 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.livy;
import org.apache.zeppelin.interpreter.InterpreterOutput;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.io.OutputStream;
/**
* InterpreterOutput can be attached / detached.
*/
public class LivyOutputStream extends OutputStream {
private static Logger LOGGER = LoggerFactory.getLogger(LivyOutputStream.class);
InterpreterOutput interpreterOutput;
public LivyOutputStream() {
}
public InterpreterOutput getInterpreterOutput() {
return interpreterOutput;
}
public void setInterpreterOutput(InterpreterOutput interpreterOutput) {
this.interpreterOutput = interpreterOutput;
}
@Override
public void write(int b) throws IOException {
if (interpreterOutput != null) {
interpreterOutput.write(b);
}
}
@Override
public void write(byte[] b) throws IOException {
if (interpreterOutput != null) {
interpreterOutput.write(b);
}
}
public void write(String text) throws IOException {
LOGGER.debug("livy output:" + text);
write(text.getBytes("UTF-8"));
}
@Override
public void write(byte[] b, int offset, int len) throws IOException {
if (interpreterOutput != null) {
interpreterOutput.write(b, offset, len);
}
}
@Override
public void close() throws IOException {
if (interpreterOutput != null) {
interpreterOutput.close();
}
}
@Override
public void flush() throws IOException {
if (interpreterOutput != null) {
interpreterOutput.flush();
}
}
}

View file

@ -0,0 +1,46 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.livy;
import org.apache.zeppelin.interpreter.*;
import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion;
import org.apache.zeppelin.scheduler.Scheduler;
import org.apache.zeppelin.scheduler.SchedulerFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
/**
* Livy PySpark interpreter for Zeppelin.
*/
public class LivyPySpark3Interpreter extends BaseLivyInterprereter {
public LivyPySpark3Interpreter(Properties property) {
super(property);
}
@Override
public String getSessionKind() {
return "pyspark3";
}
}

View file

@ -19,16 +19,10 @@ package org.apache.zeppelin.livy;
import org.apache.commons.lang3.StringUtils;
import org.apache.zeppelin.interpreter.*;
import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion;
import org.apache.zeppelin.scheduler.Scheduler;
import org.apache.zeppelin.scheduler.SchedulerFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.ConcurrentHashMap;
/**
@ -38,10 +32,12 @@ public class LivySparkSQLInterpreter extends BaseLivyInterprereter {
private LivySparkInterpreter sparkInterpreter;
private boolean sqlContextCreated = false;
private boolean isSpark2 = false;
private int maxResult = 1000;
public LivySparkSQLInterpreter(Properties property) {
super(property);
this.maxResult = Integer.parseInt(property.getProperty("zeppelin.livy.spark.sql.maxResult"));
}
@Override
@ -51,10 +47,56 @@ public class LivySparkSQLInterpreter extends BaseLivyInterprereter {
@Override
public void open() {
super.open();
this.sparkInterpreter =
(LivySparkInterpreter) getInterpreterInTheSameSessionByClassName(
LivySparkInterpreter.class.getName());
this.sparkInterpreter = getSparkInterpreter();
// As we don't know whether livyserver use spark2 or spark1, so we will detect SparkSession
// to judge whether it is using spark2.
try {
InterpreterResult result = sparkInterpreter.interpret("spark", false, false);
if (result.code() == InterpreterResult.Code.SUCCESS &&
result.message().get(0).getData().contains("org.apache.spark.sql.SparkSession")) {
LOGGER.info("SparkSession is detected so we are using spark 2.x for session {}",
sparkInterpreter.getSessionInfo().id);
isSpark2 = true;
} else {
// spark 1.x
result = sparkInterpreter.interpret("sqlContext", false, false);
if (result.code() == InterpreterResult.Code.SUCCESS) {
LOGGER.info("sqlContext is detected.");
} else if (result.code() == InterpreterResult.Code.ERROR) {
// create SqlContext if it is not available, as in livy 0.2 sqlContext
// is not available.
LOGGER.info("sqlContext is not detected, try to create SQLContext by ourselves");
result = sparkInterpreter.interpret(
"val sqlContext = new org.apache.spark.sql.SQLContext(sc)\n"
+ "import sqlContext.implicits._", false, false);
if (result.code() == InterpreterResult.Code.ERROR) {
throw new LivyException("Fail to create SQLContext," +
result.message().get(0).getData());
}
}
}
} catch (LivyException e) {
throw new RuntimeException("Fail to Detect SparkVersion", e);
}
}
private LivySparkInterpreter getSparkInterpreter() {
LazyOpenInterpreter lazy = null;
LivySparkInterpreter spark = null;
Interpreter p = getInterpreterInTheSameSessionByClassName(LivySparkInterpreter.class.getName());
while (p instanceof WrappedInterpreter) {
if (p instanceof LazyOpenInterpreter) {
lazy = (LazyOpenInterpreter) p;
}
p = ((WrappedInterpreter) p).getInnerInterpreter();
}
spark = (LivySparkInterpreter) p;
if (lazy != null) {
lazy.open();
}
return spark;
}
@Override
@ -64,64 +106,52 @@ public class LivySparkSQLInterpreter extends BaseLivyInterprereter {
return new InterpreterResult(InterpreterResult.Code.SUCCESS, "");
}
// create sqlContext implicitly if it is not available, as in livy 0.2 sqlContext
// is not available.
synchronized (this) {
if (!sqlContextCreated) {
InterpreterResult result = sparkInterpreter.interpret("sqlContext", context);
if (result.code() == InterpreterResult.Code.ERROR) {
result = sparkInterpreter.interpret(
"val sqlContext = new org.apache.spark.sql.SQLContext(sc)\n"
+ "import sqlContext.implicits._", context);
if (result.code() == InterpreterResult.Code.ERROR) {
return new InterpreterResult(InterpreterResult.Code.ERROR,
"Fail to create sqlContext," + result.message());
}
}
sqlContextCreated = true;
}
// use triple quote so that we don't need to do string escape.
String sqlQuery = null;
if (isSpark2) {
sqlQuery = "spark.sql(\"\"\"" + line + "\"\"\").show(" + maxResult + ")";
} else {
sqlQuery = "sqlContext.sql(\"\"\"" + line + "\"\"\").show(" + maxResult + ")";
}
InterpreterResult result = sparkInterpreter.interpret(sqlQuery, this.displayAppInfo, true);
// delegate the work to LivySparkInterpreter in the same session.
// TODO(zjffdu), we may create multiple session for the same user here. This can be fixed
// after we move session creation to open()
InterpreterResult res = sparkInterpreter.interpret("sqlContext.sql(\"" +
line.replaceAll("\"", "\\\\\"")
.replaceAll("\\n", " ")
+ "\").show(" +
property.get("zeppelin.livy.spark.sql.maxResult") + ")", context);
if (res.code() == InterpreterResult.Code.SUCCESS) {
StringBuilder resMsg = new StringBuilder();
resMsg.append("%table ");
String[] rows = new String(context.out.toByteArray()).split("\n");
String[] headers = rows[1].split("\\|");
for (int head = 1; head < headers.length; head++) {
resMsg.append(headers[head].trim()).append("\t");
}
resMsg.append("\n");
if (rows[3].indexOf("+") == 0) {
} else {
for (int cols = 3; cols < rows.length - 1; cols++) {
String[] col = rows[cols].split("\\|");
for (int data = 1; data < col.length; data++) {
resMsg.append(col[data].trim()).append("\t");
if (result.code() == InterpreterResult.Code.SUCCESS) {
InterpreterResult result2 = new InterpreterResult(InterpreterResult.Code.SUCCESS);
for (InterpreterResultMessage message : result.message()) {
// convert Text type to Table type. We assume the text type must be the sql output. This
// assumption is correct for now. Ideally livy should return table type. We may do it in
// the future release of livy.
if (message.getType() == InterpreterResult.Type.TEXT) {
StringBuilder resMsg = new StringBuilder();
String[] rows = message.getData().split("\n");
String[] headers = rows[1].split("\\|");
for (int head = 1; head < headers.length; head++) {
resMsg.append(headers[head].trim()).append("\t");
}
resMsg.append("\n");
if (rows[3].indexOf("+") == 0) {
} else {
for (int cols = 3; cols < rows.length - 1; cols++) {
String[] col = rows[cols].split("\\|");
for (int data = 1; data < col.length; data++) {
resMsg.append(col[data].trim()).append("\t");
}
resMsg.append("\n");
}
}
if (rows[rows.length - 1].indexOf("only") == 0) {
resMsg.append("<font color=red>" + rows[rows.length - 1] + ".</font>");
}
result2.add(InterpreterResult.Type.TABLE, resMsg.toString());
} else {
result2.add(message.getType(), message.getData());
}
}
if (rows[rows.length - 1].indexOf("only") == 0) {
resMsg.append("<font color=red>" + rows[rows.length - 1] + ".</font>");
}
return new InterpreterResult(InterpreterResult.Code.SUCCESS,
resMsg.toString()
);
return result2;
} else {
return res;
return result;
}
} catch (Exception e) {
LOGGER.error("Exception in LivySparkSQLInterpreter while interpret ", e);
return new InterpreterResult(InterpreterResult.Code.ERROR,

View file

@ -0,0 +1,28 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.livy;
/**
*
*/
public class SessionNotFoundException extends LivyException {
public SessionNotFoundException(String message) {
super(message);
}
}

View file

@ -13,14 +13,9 @@
},
"zeppelin.livy.create.session.retries": {
"envName": "ZEPPELIN_LIVY_CREATE_SESSION_RETRIES",
"propertyName": "zeppelin.livy.create.session.retries",
"propertyName": "zeppelin.livy.create.session.timeout",
"defaultValue": "120",
"description": "Livy Server create session retry count."
},
"livy.spark.master": {
"propertyName": "livy.spark.master",
"defaultValue": "local[*]",
"description": "Spark master uri. ex) spark://masterhost:7077"
"description": "Livy Server create session timeout (seconds)."
},
"livy.spark.driver.cores": {
"propertyName": "livy.spark.driver.cores",
@ -158,6 +153,26 @@
"editOnDblClick": false
}
},
{
"group": "livy",
"name": "pyspark3",
"className": "org.apache.zeppelin.livy.LivyPySpark3Interpreter",
"properties": {
},
"option": {
"remote": true,
"port": -1,
"perNote": "shared",
"perUser": "scoped",
"isExistingProcess": false,
"setPermission": false,
"users": []
},
"editor": {
"language": "python",
"editOnDblClick": false
}
},
{
"group": "livy",
"name": "sparkr",

View file

@ -1,111 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.livy;
import com.google.gson.GsonBuilder;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.hamcrest.CoreMatchers;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ErrorCollector;
import org.junit.runner.RunWith;
import org.mockito.Answers;
import org.mockito.Mock;
import org.mockito.runners.MockitoJUnitRunner;
import org.slf4j.LoggerFactory;
import java.util.HashMap;
import java.util.Properties;
import static org.mockito.Mockito.doReturn;
/**
* Created for org.apache.zeppelin.livy on 22/04/16.
*/
@RunWith(MockitoJUnitRunner.class)
public class LivyHelperTest {
@Rule
public ErrorCollector collector = new ErrorCollector();
@Mock(answer = Answers.RETURNS_DEEP_STUBS)
private static LivyPySparkInterpreter interpreter;
@Mock(answer = Answers.RETURNS_DEEP_STUBS)
private InterpreterContext interpreterContext;
@Mock(answer = Answers.CALLS_REAL_METHODS)
private LivyHelper livyHelper;
@Before
public void prepareContext() throws Exception {
Properties properties = new Properties();
properties.setProperty("zeppelin.livy.url", "http://localhost:8998");
livyHelper.property = properties;
livyHelper.paragraphHttpMap = new HashMap<>();
livyHelper.gson = new GsonBuilder().setPrettyPrinting().create();
livyHelper.LOGGER = LoggerFactory.getLogger(LivyHelper.class);
doReturn("{\"id\":1,\"state\":\"idle\",\"kind\":\"spark\",\"proxyUser\":\"null\",\"log\":[]}")
.when(livyHelper)
.executeHTTP(
livyHelper.property.getProperty("zeppelin.livy.url") + "/sessions",
"POST",
"{\"kind\": \"spark\", \"conf\": {}, \"proxyUser\": null}",
null
);
doReturn("{\"id\":1,\"state\":\"available\",\"output\":{\"status\":\"ok\"," +
"\"execution_count\":1,\"data\":{\"text/plain\":\"1\"}}}")
.when(livyHelper)
.executeHTTP(
livyHelper.property.getProperty("zeppelin.livy.url") + "/sessions/1/statements",
"POST",
"{\"code\": \"print(1)\"}",
null
);
}
@Test
public void checkCreateSession() {
try {
Integer sessionId = livyHelper.createSession(interpreterContext, "spark");
collector.checkThat("check sessionId", 1, CoreMatchers.equalTo(sessionId));
} catch (Exception e) {
collector.addError(e);
}
}
@Test
public void checkInterpret() {
try {
InterpreterResult result = livyHelper.interpret("print(1)", interpreterContext, 1);
collector.checkThat("check sessionId", InterpreterResult.Code.SUCCESS,
CoreMatchers.equalTo(result.code()));
} catch (Exception e) {
collector.addError(e);
}
}
}

View file

@ -20,6 +20,8 @@ package org.apache.zeppelin.livy;
import com.cloudera.livy.test.framework.Cluster;
import com.cloudera.livy.test.framework.Cluster$;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.zeppelin.interpreter.*;
import org.apache.zeppelin.user.AuthenticationInfo;
import org.junit.*;
@ -49,13 +51,14 @@ public class LivyInterpreterIT {
LOGGER.info("Starting livy at {}", cluster.livyEndpoint());
properties = new Properties();
properties.setProperty("zeppelin.livy.url", cluster.livyEndpoint());
properties.setProperty("zeppelin.livy.create.session.retries", "120");
properties.setProperty("zeppelin.livy.create.session.timeout", "120");
properties.setProperty("zeppelin.livy.spark.sql.maxResult", "100");
}
@AfterClass
public static void tearDown() {
if (cluster != null) {
LOGGER.info("Shutting down livy at {}", cluster.livyEndpoint());
cluster.cleanUp();
}
}
@ -92,63 +95,63 @@ public class LivyInterpreterIT {
try {
InterpreterResult result = sparkInterpreter.interpret("sc.version", context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals(0, result.message().size());
assertTrue(outputListener.getOutputAppended().contains("1.5.2"));
assertEquals(1, result.message().size());
assertTrue(result.message().get(0).getData().contains("1.5.2"));
// test RDD api
outputListener.reset();
result = sparkInterpreter.interpret("sc.parallelize(1 to 10).sum()", context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals(0, result.message().size());
assertTrue(outputListener.getOutputAppended().contains("Double = 55.0"));
assertEquals(1, result.message().size());
assertTrue(result.message().get(0).getData().contains("Double = 55.0"));
// single line comment
outputListener.reset();
String singleLineComment = "// my comment";
String singleLineComment = "println(1)// my comment";
result = sparkInterpreter.interpret(singleLineComment, context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals(0, result.message().size());
assertEquals(1, result.message().size());
// multiple line comment
outputListener.reset();
String multipleLineComment = "/* multiple \n" + "line \n" + "comment */";
String multipleLineComment = "println(1)/* multiple \n" + "line \n" + "comment */";
result = sparkInterpreter.interpret(multipleLineComment, context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals(0, result.message().size());
assertEquals(1, result.message().size());
// multi-line string
outputListener.reset();
String multiLineString = "val str = \"\"\"multiple\n" +
"line\"\"\"\n" +
"println(str)";
result = sparkInterpreter.interpret(multiLineString, context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals(0, result.message().size());
assertTrue(outputListener.getOutputAppended().contains("multiple\nline"));
assertEquals(1, result.message().size());
assertTrue(result.message().get(0).getData().contains("multiple\nline"));
// case class
outputListener.reset();
String caseClassCode = "case class Person(id:Int, \n" +
"name:String)\n" +
"val p=Person(1, \"name_a\")";
result = sparkInterpreter.interpret(caseClassCode, context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals(0, result.message().size());
assertTrue(outputListener.getOutputAppended().contains("defined class Person"));
assertEquals(1, result.message().size());
assertTrue(result.message().get(0).getData().contains("p: Person = Person(1,name_a)"));
// object class
outputListener.reset();
String objectClassCode = "object Person {}";
result = sparkInterpreter.interpret(objectClassCode, context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals(0, result.message().size());
assertTrue(outputListener.getOutputAppended().contains("defined module Person"));
assertEquals(1, result.message().size());
assertTrue(result.message().get(0).getData().contains("defined module Person"));
// error
result = sparkInterpreter.interpret("println(a)", context);
assertEquals(InterpreterResult.Code.ERROR, result.code());
assertEquals(InterpreterResult.Type.TEXT, result.message().get(0).getType());
assertTrue(result.message().get(0).getData().contains("error: not found: value a"));
// incomplete code
result = sparkInterpreter.interpret("if(true){", context);
assertEquals(InterpreterResult.Code.ERROR, result.code());
assertEquals(InterpreterResult.Type.TEXT, result.message().get(0).getType());
assertTrue(result.message().get(0).getData().contains("incomplete statement"));
} finally {
sparkInterpreter.close();
}
@ -178,24 +181,46 @@ public class LivyInterpreterIT {
try {
// test DataFrame api
outputListener.reset();
sparkInterpreter.interpret("val sqlContext = new org.apache.spark.sql.SQLContext(sc)\n"
+ "import sqlContext.implicits._", context);
InterpreterResult result = sparkInterpreter.interpret("val df=sqlContext.createDataFrame(Seq((\"hello\",20)))\n"
InterpreterResult result = sparkInterpreter.interpret(
"val df=sqlContext.createDataFrame(Seq((\"hello\",20))).toDF(\"col_1\", \"col_2\")\n"
+ "df.collect()", context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals(0, result.message().size());
assertTrue(outputListener.getOutputAppended()
assertEquals(1, result.message().size());
assertTrue(result.message().get(0).getData()
.contains("Array[org.apache.spark.sql.Row] = Array([hello,20])"));
sparkInterpreter.interpret("df.registerTempTable(\"df\")", context);
// test LivySparkSQLInterpreter which share the same SparkContext with LivySparkInterpreter
outputListener.reset();
result = sqlInterpreter.interpret("select * from df", context);
result = sqlInterpreter.interpret("select * from df where col_1='hello'", context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals(InterpreterResult.Type.TABLE, result.message().get(0).getType());
// TODO(zjffdu), \t at the end of each line is not necessary, it is a bug of LivySparkSQLInterpreter
assertEquals("_1\t_2\t\nhello\t20\t\n", result.message().get(0).getData());
// TODO(zjffdu), \t at the end of each line is not necessary,
// it is a bug of LivySparkSQLInterpreter
assertEquals("col_1\tcol_2\t\nhello\t20\t\n", result.message().get(0).getData());
// double quotes
result = sqlInterpreter.interpret("select * from df where col_1=\"hello\"", context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals(InterpreterResult.Type.TABLE, result.message().get(0).getType());
assertEquals("col_1\tcol_2\t\nhello\t20\t\n", result.message().get(0).getData());
// double quotes inside attribute value
// TODO(zjffdu). This test case would fail on spark-1.5, would uncomment it when upgrading to
// livy-0.3 and spark-1.6
// result = sqlInterpreter.interpret("select * from df where col_1=\"he\\\"llo\" ", context);
// assertEquals(InterpreterResult.Code.SUCCESS, result.code());
// assertEquals(InterpreterResult.Type.TABLE, result.message().get(0).getType());
// single quotes inside attribute value
result = sqlInterpreter.interpret("select * from df where col_1=\"he'llo\"", context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals(InterpreterResult.Type.TABLE, result.message().get(0).getType());
// test sql with syntax error
result = sqlInterpreter.interpret("select * from df2", context);
assertEquals(InterpreterResult.Code.ERROR, result.code());
assertEquals(InterpreterResult.Type.TEXT, result.message().get(0).getType());
assertTrue(result.message().get(0).getData().contains("Table Not Found"));
} finally {
sparkInterpreter.close();
sqlInterpreter.close();
@ -209,10 +234,12 @@ public class LivyInterpreterIT {
}
InterpreterGroup interpreterGroup = new InterpreterGroup("group_1");
interpreterGroup.put("session_1", new ArrayList<Interpreter>());
LivySparkInterpreter sparkInterpreter = new LivySparkInterpreter(properties);
LazyOpenInterpreter sparkInterpreter = new LazyOpenInterpreter(
new LivySparkInterpreter(properties));
sparkInterpreter.setInterpreterGroup(interpreterGroup);
interpreterGroup.get("session_1").add(sparkInterpreter);
LivySparkSQLInterpreter sqlInterpreter = new LivySparkSQLInterpreter(properties);
LazyOpenInterpreter sqlInterpreter = new LazyOpenInterpreter(
new LivySparkSQLInterpreter(properties));
interpreterGroup.get("session_1").add(sqlInterpreter);
sqlInterpreter.setInterpreterGroup(interpreterGroup);
sqlInterpreter.open();
@ -249,26 +276,33 @@ public class LivyInterpreterIT {
try {
InterpreterResult result = pysparkInterpreter.interpret("sc.version", context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals(0, result.message().size());
assertTrue(outputListener.getOutputAppended().contains("1.5.2"));
assertEquals(1, result.message().size());
assertTrue(result.message().get(0).getData().contains("1.5.2"));
// test RDD api
outputListener.reset();
result = pysparkInterpreter.interpret("sc.range(1, 10).sum()", context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals(0, result.message().size());
assertTrue(outputListener.getOutputAppended().contains("45"));
assertEquals(1, result.message().size());
assertTrue(result.message().get(0).getData().contains("45"));
// test DataFrame api
outputListener.reset();
pysparkInterpreter.interpret("from pyspark.sql import SQLContext\n"
+ "sqlContext = SQLContext(sc)", context);
result = pysparkInterpreter.interpret("df=sqlContext.createDataFrame([(\"hello\",20)])\n"
+ "df.collect()", context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals(0, result.message().size());
assertTrue(outputListener.getOutputAppended().contains("[Row(_1=u'hello', _2=20)]"));
assertEquals(1, result.message().size());
assertTrue(result.message().get(0).getData().contains("[Row(_1=u'hello', _2=20)]"));
// test magic api
pysparkInterpreter.interpret("t = [{\"name\":\"userA\", \"role\":\"roleA\"},"
+ "{\"name\":\"userB\", \"role\":\"roleB\"}]", context);
result = pysparkInterpreter.interpret("%table t", context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals(1, result.message().size());
assertEquals(InterpreterResult.Type.TABLE, result.message().get(0).getType());
assertTrue(result.message().get(0).getData().contains("userA"));
// error
result = pysparkInterpreter.interpret("print(a)", context);
assertEquals(InterpreterResult.Code.ERROR, result.code());
@ -287,37 +321,52 @@ public class LivyInterpreterIT {
// TODO(zjffdu), Livy's SparkRIntepreter has some issue, do it after livy-0.3 release.
}
public static class MyInterpreterOutputListener implements InterpreterOutputListener {
private StringBuilder outputAppended = new StringBuilder();
private StringBuilder outputUpdated = new StringBuilder();
@Test
public void testLivyTutorialNote() throws IOException {
if (!checkPreCondition()) {
return;
}
InterpreterGroup interpreterGroup = new InterpreterGroup("group_1");
interpreterGroup.put("session_1", new ArrayList<Interpreter>());
LazyOpenInterpreter sparkInterpreter = new LazyOpenInterpreter(
new LivySparkInterpreter(properties));
sparkInterpreter.setInterpreterGroup(interpreterGroup);
interpreterGroup.get("session_1").add(sparkInterpreter);
LazyOpenInterpreter sqlInterpreter = new LazyOpenInterpreter(
new LivySparkSQLInterpreter(properties));
interpreterGroup.get("session_1").add(sqlInterpreter);
sqlInterpreter.setInterpreterGroup(interpreterGroup);
sqlInterpreter.open();
try {
AuthenticationInfo authInfo = new AuthenticationInfo("user1");
MyInterpreterOutputListener outputListener = new MyInterpreterOutputListener();
InterpreterOutput output = new InterpreterOutput(outputListener);
InterpreterContext context = new InterpreterContext("noteId", "paragraphId", "livy.sql",
"title", "text", authInfo, null, null, null, null, null, output);
String p1 = IOUtils.toString(getClass().getResourceAsStream("/livy_tutorial_1.scala"));
InterpreterResult result = sparkInterpreter.interpret(p1, context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
String p2 = IOUtils.toString(getClass().getResourceAsStream("/livy_tutorial_2.sql"));
result = sqlInterpreter.interpret(p2, context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals(InterpreterResult.Type.TABLE, result.message().get(0).getType());
} finally {
sparkInterpreter.close();
sqlInterpreter.close();
}
}
public static class MyInterpreterOutputListener implements InterpreterOutputListener {
@Override
public void onAppend(int index, InterpreterResultMessageOutput out, byte[] line) {
LOGGER.info("onAppend:" + new String(line));
outputAppended.append(new String(line));
}
@Override
public void onUpdate(int index, InterpreterResultMessageOutput out) {
try {
LOGGER.info("onUpdate:" + new String(out.toByteArray()));
outputUpdated.append(new String(out.toByteArray()));
} catch (IOException e) {
e.printStackTrace();
}
}
public String getOutputAppended() {
return outputAppended.toString();
}
public String getOutputUpdated() {
return outputUpdated.toString();
}
public void reset() {
outputAppended = new StringBuilder();
outputUpdated = new StringBuilder();
}
@Override

View file

@ -0,0 +1,24 @@
import org.apache.commons.io.IOUtils
import java.net.URL
import java.nio.charset.Charset
// Zeppelin creates and injects sc (SparkContext) and sqlContext (HiveContext or SqlContext)
// So you don't need create them manually
// load bank data
val bankText = sc.parallelize(
IOUtils.toString(
new URL("https://s3.amazonaws.com/apache-zeppelin/tutorial/bank/bank.csv"),
Charset.forName("utf8")).split("\n"))
case class Bank(age: Integer, job: String, marital: String, education: String, balance: Integer)
val bank = bankText.map(s => s.split(";")).filter(s => s(0) != "\"age\"").map(
s => Bank(s(0).toInt,
s(1).replaceAll("\"", ""),
s(2).replaceAll("\"", ""),
s(3).replaceAll("\"", ""),
s(5).replaceAll("\"", "").toInt
)
).toDF()
bank.registerTempTable("bank")

View file

@ -0,0 +1,5 @@
select age, count(1) value
from bank
where age < 30
group by age
order by age

View file

@ -2,162 +2,165 @@
"paragraphs": [
{
"text": "%md\n## Welcome to Zeppelin.\n##### This is a live tutorial, you can run the code yourself. (Shift-Enter to Run)",
"user": "anonymous",
"dateUpdated": "Dec 17, 2016 3:32:15 PM",
"config": {
"colWidth": 12.0,
"graph": {
"mode": "table",
"height": 300.0,
"optionOpen": false,
"keys": [],
"values": [],
"groups": [],
"scatter": {}
"editorHide": true,
"results": [
{
"graph": {
"mode": "table",
"height": 300.0,
"optionOpen": false,
"keys": [],
"values": [],
"groups": [],
"scatter": {}
}
}
],
"enabled": true,
"editorSetting": {
"language": "markdown",
"editOnDblClick": true
},
"editorHide": true
"editorMode": "ace/mode/markdown",
"tableHide": false
},
"settings": {
"params": {},
"forms": {}
},
"apps": [],
"jobName": "paragraph_1423836981412_-1007008116",
"id": "20150213-231621_168813393",
"result": {
"results": {
"code": "SUCCESS",
"type": "HTML",
"msg": "\u003ch2\u003eWelcome to Zeppelin.\u003c/h2\u003e\n\u003ch5\u003eThis is a live tutorial, you can run the code yourself. (Shift-Enter to Run)\u003c/h5\u003e\n"
"msg": [
{
"type": "HTML",
"data": "\u003cdiv class\u003d\"markdown-body\"\u003e\n\u003ch2\u003eWelcome to Zeppelin.\u003c/h2\u003e\n\u003ch5\u003eThis is a live tutorial, you can run the code yourself. (Shift-Enter to Run)\u003c/h5\u003e\n\u003c/div\u003e"
}
]
},
"dateCreated": "Feb 13, 2015 11:16:21 PM",
"dateStarted": "Apr 1, 2015 9:11:09 PM",
"dateFinished": "Apr 1, 2015 9:11:10 PM",
"dateStarted": "Dec 17, 2016 3:32:15 PM",
"dateFinished": "Dec 17, 2016 3:32:18 PM",
"status": "FINISHED",
"progressUpdateIntervalMs": 500
},
{
"title": "Load data into table",
"text": "import org.apache.commons.io.IOUtils\nimport java.net.URL\nimport java.nio.charset.Charset\n\n// Zeppelin creates and injects sc (SparkContext) and sqlContext (HiveContext or SqlContext)\n// So you don\u0027t need create them manually\n\n// load bank data\nval bankText \u003d sc.parallelize(\n IOUtils.toString(\n new URL(\"https://s3.amazonaws.com/apache-zeppelin/tutorial/bank/bank.csv\"),\n Charset.forName(\"utf8\")).split(\"\\n\"))\n\ncase class Bank(age: Integer, job: String, marital: String, education: String, balance: Integer)\n\nval bank \u003d bankText.map(s \u003d\u003e s.split(\";\")).filter(s \u003d\u003e s(0) !\u003d \"\\\"age\\\"\").map(\n s \u003d\u003e Bank(s(0).toInt, \n s(1).replaceAll(\"\\\"\", \"\"),\n s(2).replaceAll(\"\\\"\", \"\"),\n s(3).replaceAll(\"\\\"\", \"\"),\n s(5).replaceAll(\"\\\"\", \"\").toInt\n )\n).toDF()\nbank.registerTempTable(\"bank\")",
"dateUpdated": "Jan 14, 2016 7:58:56 PM",
"user": "anonymous",
"dateUpdated": "Dec 17, 2016 3:30:09 PM",
"config": {
"colWidth": 12.0,
"graph": {
"mode": "table",
"height": 300.0,
"optionOpen": false,
"keys": [],
"values": [],
"groups": [],
"scatter": {}
},
"title": true,
"enabled": true,
"editorMode": "ace/mode/scala"
"editorMode": "ace/mode/scala",
"results": [
{
"graph": {
"mode": "table",
"height": 300.0,
"optionOpen": false
}
}
],
"editorSetting": {
"language": "scala",
"editOnDblClick": false
}
},
"settings": {
"params": {},
"forms": {}
},
"apps": [],
"jobName": "paragraph_1423500779206_-1502780787",
"id": "20150210-015259_1403135953",
"result": {
"results": {
"code": "SUCCESS",
"type": "TEXT",
"msg": "import org.apache.commons.io.IOUtils\nimport java.net.URL\nimport java.nio.charset.Charset\nbankText: org.apache.spark.rdd.RDD[String] \u003d ParallelCollectionRDD[32] at parallelize at \u003cconsole\u003e:65\ndefined class Bank\nbank: org.apache.spark.sql.DataFrame \u003d [age: int, job: string, marital: string, education: string, balance: int]\n"
"msg": [
{
"type": "TEXT",
"data": "\nimport org.apache.commons.io.IOUtils\n\nimport java.net.URL\n\nimport java.nio.charset.Charset\n\nbankText: org.apache.spark.rdd.RDD[String] \u003d ParallelCollectionRDD[0] at parallelize at \u003cconsole\u003e:32\n\ndefined class Bank\n\nbank: org.apache.spark.sql.DataFrame \u003d [age: int, job: string ... 3 more fields]\n\nwarning: there were 1 deprecation warning(s); re-run with -deprecation for details\n"
}
]
},
"dateCreated": "Feb 10, 2015 1:52:59 AM",
"dateStarted": "Jul 3, 2015 1:43:40 PM",
"dateFinished": "Jul 3, 2015 1:43:45 PM",
"dateStarted": "Dec 17, 2016 3:30:09 PM",
"dateFinished": "Dec 17, 2016 3:30:58 PM",
"status": "FINISHED",
"progressUpdateIntervalMs": 500
},
{
"text": "%sql \nselect age, count(1) value\nfrom bank \nwhere age \u003c 30 \ngroup by age \norder by age",
"user": "anonymous",
"dateUpdated": "Dec 17, 2016 3:30:13 PM",
"config": {
"colWidth": 4.0,
"graph": {
"mode": "multiBarChart",
"height": 300.0,
"optionOpen": false,
"keys": [
{
"name": "age",
"index": 0.0,
"aggr": "sum"
}
],
"values": [
{
"name": "value",
"index": 1.0,
"aggr": "sum"
}
],
"groups": [],
"scatter": {
"xAxis": {
"name": "age",
"index": 0.0,
"aggr": "sum"
},
"yAxis": {
"name": "value",
"index": 1.0,
"aggr": "sum"
"results": [
{
"graph": {
"mode": "table",
"height": 300.0,
"optionOpen": false
}
}
}
],
"enabled": true,
"editorSetting": {
"language": "sql",
"editOnDblClick": false
},
"editorMode": "ace/mode/sql"
},
"settings": {
"params": {},
"forms": {}
},
"apps": [],
"jobName": "paragraph_1423500782552_-1439281894",
"id": "20150210-015302_1492795503",
"result": {
"results": {
"code": "SUCCESS",
"type": "TABLE",
"msg": "age\tvalue\n19\t4\n20\t3\n21\t7\n22\t9\n23\t20\n24\t24\n25\t44\n26\t77\n27\t94\n28\t103\n29\t97\n"
"msg": [
{
"type": "TABLE",
"data": "age\tvalue\n19\t4\n20\t3\n21\t7\n22\t9\n23\t20\n24\t24\n25\t44\n26\t77\n27\t94\n28\t103\n29\t97\n"
}
]
},
"dateCreated": "Feb 10, 2015 1:53:02 AM",
"dateStarted": "Jul 3, 2015 1:43:17 PM",
"dateFinished": "Jul 3, 2015 1:43:23 PM",
"dateStarted": "Dec 17, 2016 3:30:13 PM",
"dateFinished": "Dec 17, 2016 3:31:04 PM",
"status": "FINISHED",
"progressUpdateIntervalMs": 500
},
{
"text": "%sql \nselect age, count(1) value \nfrom bank \nwhere age \u003c ${maxAge\u003d30} \ngroup by age \norder by age",
"user": "anonymous",
"dateUpdated": "Dec 17, 2016 3:30:16 PM",
"config": {
"colWidth": 4.0,
"graph": {
"mode": "multiBarChart",
"height": 300.0,
"optionOpen": false,
"keys": [
{
"name": "age",
"index": 0.0,
"aggr": "sum"
}
],
"values": [
{
"name": "value",
"index": 1.0,
"aggr": "sum"
}
],
"groups": [],
"scatter": {
"xAxis": {
"name": "age",
"index": 0.0,
"aggr": "sum"
},
"yAxis": {
"name": "value",
"index": 1.0,
"aggr": "sum"
"results": [
{
"graph": {
"mode": "table",
"height": 300.0,
"optionOpen": false
}
}
}
],
"enabled": true,
"editorSetting": {
"language": "sql",
"editOnDblClick": false
},
"editorMode": "ace/mode/sql"
},
"settings": {
"params": {
@ -171,55 +174,45 @@
}
}
},
"apps": [],
"jobName": "paragraph_1423720444030_-1424110477",
"id": "20150212-145404_867439529",
"result": {
"results": {
"code": "SUCCESS",
"type": "TABLE",
"msg": "age\tvalue\n19\t4\n20\t3\n21\t7\n22\t9\n23\t20\n24\t24\n25\t44\n26\t77\n27\t94\n28\t103\n29\t97\n30\t150\n31\t199\n32\t224\n33\t186\n34\t231\n"
"msg": [
{
"type": "TABLE",
"data": "age\tvalue\n19\t4\n20\t3\n21\t7\n22\t9\n23\t20\n24\t24\n25\t44\n26\t77\n27\t94\n28\t103\n29\t97\n30\t150\n31\t199\n32\t224\n33\t186\n34\t231\n"
}
]
},
"dateCreated": "Feb 12, 2015 2:54:04 PM",
"dateStarted": "Jul 3, 2015 1:43:28 PM",
"dateFinished": "Jul 3, 2015 1:43:29 PM",
"dateStarted": "Dec 17, 2016 3:30:58 PM",
"dateFinished": "Dec 17, 2016 3:31:07 PM",
"status": "FINISHED",
"progressUpdateIntervalMs": 500
},
{
"text": "%sql \nselect age, count(1) value \nfrom bank \nwhere marital\u003d\"${marital\u003dsingle,single|divorced|married}\" \ngroup by age \norder by age",
"user": "anonymous",
"dateUpdated": "Dec 17, 2016 3:30:18 PM",
"config": {
"colWidth": 4.0,
"graph": {
"mode": "multiBarChart",
"height": 300.0,
"optionOpen": false,
"keys": [
{
"name": "age",
"index": 0.0,
"aggr": "sum"
}
],
"values": [
{
"name": "value",
"index": 1.0,
"aggr": "sum"
}
],
"groups": [],
"scatter": {
"xAxis": {
"name": "age",
"index": 0.0,
"aggr": "sum"
},
"yAxis": {
"name": "value",
"index": 1.0,
"aggr": "sum"
"results": [
{
"graph": {
"mode": "table",
"height": 300.0,
"optionOpen": false
}
}
}
],
"enabled": true,
"editorSetting": {
"language": "sql",
"editOnDblClick": false
},
"editorMode": "ace/mode/sql"
},
"settings": {
"params": {
@ -244,80 +237,113 @@
}
}
},
"apps": [],
"jobName": "paragraph_1423836262027_-210588283",
"id": "20150213-230422_1600658137",
"result": {
"results": {
"code": "SUCCESS",
"type": "TABLE",
"msg": "age\tvalue\n19\t4\n20\t3\n21\t7\n22\t9\n23\t17\n24\t13\n25\t33\n26\t56\n27\t64\n28\t78\n29\t56\n30\t92\n31\t86\n32\t105\n33\t61\n34\t75\n35\t46\n36\t50\n37\t43\n38\t44\n39\t30\n40\t25\n41\t19\n42\t23\n43\t21\n44\t20\n45\t15\n46\t14\n47\t12\n48\t12\n49\t11\n50\t8\n51\t6\n52\t9\n53\t4\n55\t3\n56\t3\n57\t2\n58\t7\n59\t2\n60\t5\n66\t2\n69\t1\n"
"msg": [
{
"type": "TABLE",
"data": "age\tvalue\n19\t4\n20\t3\n21\t7\n22\t9\n23\t17\n24\t13\n25\t33\n26\t56\n27\t64\n28\t78\n29\t56\n30\t92\n31\t86\n32\t105\n33\t61\n34\t75\n35\t46\n36\t50\n37\t43\n38\t44\n39\t30\n40\t25\n41\t19\n42\t23\n43\t21\n44\t20\n45\t15\n46\t14\n47\t12\n48\t12\n49\t11\n50\t8\n51\t6\n52\t9\n53\t4\n55\t3\n56\t3\n57\t2\n58\t7\n59\t2\n60\t5\n66\t2\n69\t1\n"
}
]
},
"dateCreated": "Feb 13, 2015 11:04:22 PM",
"dateStarted": "Jul 3, 2015 1:43:33 PM",
"dateFinished": "Jul 3, 2015 1:43:34 PM",
"dateStarted": "Dec 17, 2016 3:31:05 PM",
"dateFinished": "Dec 17, 2016 3:31:09 PM",
"status": "FINISHED",
"progressUpdateIntervalMs": 500
},
{
"text": "%md\n## Congratulations, it\u0027s done.\n##### You can create your own notebook in \u0027Notebook\u0027 menu. Good luck!",
"user": "anonymous",
"dateUpdated": "Dec 17, 2016 3:30:24 PM",
"config": {
"colWidth": 12.0,
"graph": {
"mode": "table",
"height": 300.0,
"optionOpen": false,
"keys": [],
"values": [],
"groups": [],
"scatter": {}
"editorHide": true,
"results": [
{
"graph": {
"mode": "table",
"height": 300.0,
"optionOpen": false
}
}
],
"enabled": true,
"editorSetting": {
"language": "markdown",
"editOnDblClick": true
},
"editorHide": true
"editorMode": "ace/mode/markdown",
"tableHide": false
},
"settings": {
"params": {},
"forms": {}
},
"apps": [],
"jobName": "paragraph_1423836268492_216498320",
"id": "20150213-230428_1231780373",
"result": {
"results": {
"code": "SUCCESS",
"type": "HTML",
"msg": "\u003ch2\u003eCongratulations, it\u0027s done.\u003c/h2\u003e\n\u003ch5\u003eYou can create your own notebook in \u0027Notebook\u0027 menu. Good luck!\u003c/h5\u003e\n"
"msg": [
{
"type": "HTML",
"data": "\u003cdiv class\u003d\"markdown-body\"\u003e\n\u003ch2\u003eCongratulations, it\u0026rsquo;s done.\u003c/h2\u003e\n\u003ch5\u003eYou can create your own notebook in \u0026lsquo;Notebook\u0026rsquo; menu. Good luck!\u003c/h5\u003e\n\u003c/div\u003e"
}
]
},
"dateCreated": "Feb 13, 2015 11:04:28 PM",
"dateStarted": "Apr 1, 2015 9:12:18 PM",
"dateFinished": "Apr 1, 2015 9:12:18 PM",
"dateStarted": "Dec 17, 2016 3:30:24 PM",
"dateFinished": "Dec 17, 2016 3:30:29 PM",
"status": "FINISHED",
"progressUpdateIntervalMs": 500
},
{
"text": "%md\n\nAbout bank data\n\n```\nCitation Request:\n This dataset is public available for research. The details are described in [Moro et al., 2011]. \n Please include this citation if you plan to use this database:\n\n [Moro et al., 2011] S. Moro, R. Laureano and P. Cortez. Using Data Mining for Bank Direct Marketing: An Application of the CRISP-DM Methodology. \n In P. Novais et al. (Eds.), Proceedings of the European Simulation and Modelling Conference - ESM\u00272011, pp. 117-121, Guimarães, Portugal, October, 2011. EUROSIS.\n\n Available at: [pdf] http://hdl.handle.net/1822/14838\n [bib] http://www3.dsi.uminho.pt/pcortez/bib/2011-esm-1.txt\n```",
"user": "anonymous",
"dateUpdated": "Dec 17, 2016 3:30:34 PM",
"config": {
"colWidth": 12.0,
"graph": {
"mode": "table",
"height": 300.0,
"optionOpen": false,
"keys": [],
"values": [],
"groups": [],
"scatter": {}
"editorHide": true,
"results": [
{
"graph": {
"mode": "table",
"height": 300.0,
"optionOpen": false
}
}
],
"enabled": true,
"editorSetting": {
"language": "markdown",
"editOnDblClick": true
},
"editorHide": true
"editorMode": "ace/mode/markdown",
"tableHide": false
},
"settings": {
"params": {},
"forms": {}
},
"apps": [],
"jobName": "paragraph_1427420818407_872443482",
"id": "20150326-214658_12335843",
"result": {
"results": {
"code": "SUCCESS",
"type": "HTML",
"msg": "\u003cp\u003eAbout bank data\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003eCitation Request:\n This dataset is public available for research. The details are described in [Moro et al., 2011]. \n Please include this citation if you plan to use this database:\n\n [Moro et al., 2011] S. Moro, R. Laureano and P. Cortez. Using Data Mining for Bank Direct Marketing: An Application of the CRISP-DM Methodology. \n In P. Novais et al. (Eds.), Proceedings of the European Simulation and Modelling Conference - ESM\u00272011, pp. 117-121, Guimarães, Portugal, October, 2011. EUROSIS.\n\n Available at: [pdf] http://hdl.handle.net/1822/14838\n [bib] http://www3.dsi.uminho.pt/pcortez/bib/2011-esm-1.txt\n\u003c/code\u003e\u003c/pre\u003e\n"
"msg": [
{
"type": "HTML",
"data": "\u003cdiv class\u003d\"markdown-body\"\u003e\n\u003cp\u003eAbout bank data\u003c/p\u003e\n\u003cpre\u003e\u003ccode\u003eCitation Request:\n This dataset is public available for research. The details are described in [Moro et al., 2011]. \n Please include this citation if you plan to use this database:\n\n [Moro et al., 2011] S. Moro, R. Laureano and P. Cortez. Using Data Mining for Bank Direct Marketing: An Application of the CRISP-DM Methodology. \n In P. Novais et al. (Eds.), Proceedings of the European Simulation and Modelling Conference - ESM\u0026#39;2011, pp. 117-121, Guimarães, Portugal, October, 2011. EUROSIS.\n\n Available at: [pdf] http://hdl.handle.net/1822/14838\n [bib] http://www3.dsi.uminho.pt/pcortez/bib/2011-esm-1.txt\n\u003c/code\u003e\u003c/pre\u003e\n\u003c/div\u003e"
}
]
},
"dateCreated": "Mar 26, 2015 9:46:58 PM",
"dateStarted": "Jul 3, 2015 1:44:56 PM",
"dateFinished": "Jul 3, 2015 1:44:56 PM",
"dateStarted": "Dec 17, 2016 3:30:34 PM",
"dateFinished": "Dec 17, 2016 3:30:34 PM",
"status": "FINISHED",
"progressUpdateIntervalMs": 500
},
@ -327,6 +353,7 @@
"params": {},
"forms": {}
},
"apps": [],
"jobName": "paragraph_1435955447812_-158639899",
"id": "20150703-133047_853701097",
"dateCreated": "Jul 3, 2015 1:30:47 PM",
@ -337,11 +364,28 @@
"name": "Zeppelin Tutorial/Basic Features (Spark)",
"id": "2A94M5J1Z",
"angularObjects": {
"2B6FF8NNU": [],
"2B67PH63Z": []
"2C6WUGPNH:shared_process": [],
"2C4A8RJNB:shared_process": [],
"2C4DTK2ZT:shared_process": [],
"2C6XKJWBR:shared_process": [],
"2C6AHZPMK:shared_process": [],
"2C5SU66WQ:shared_process": [],
"2C6AMJ98Q:shared_process": [],
"2C4AJZK72:shared_process": [],
"2C3STPSD7:shared_process": [],
"2C4FJN9CK:shared_process": [],
"2C3CW6JBY:shared_process": [],
"2C5UPQX6Q:shared_process": [],
"2C5873KN4:shared_process": [],
"2C5719XN4:shared_process": [],
"2C52DE5G3:shared_process": [],
"2C4G28E63:shared_process": [],
"2C6CU96BC:shared_process": [],
"2C49A6WY3:shared_process": [],
"2C3NE73HG:shared_process": []
},
"config": {
"looknfeel": "default"
},
"info": {}
}
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1,316 @@
{
"paragraphs": [
{
"text": "%md\n\n\n### [Apache Pig](http://pig.apache.org/) is a platform for analyzing large data sets that consists of a high-level language for expressing data analysis programs, coupled with infrastructure for evaluating these programs. The salient property of Pig programs is that their structure is amenable to substantial parallelization, which in turns enables them to handle very large data sets.\n\nPig\u0027s language layer currently consists of a textual language called Pig Latin, which has the following key properties:\n\n* Ease of programming. It is trivial to achieve parallel execution of simple, \"embarrassingly parallel\" data analysis tasks. Complex tasks comprised of multiple interrelated data transformations are explicitly encoded as data flow sequences, making them easy to write, understand, and maintain.\n* Optimization opportunities. The way in which tasks are encoded permits the system to optimize their execution automatically, allowing the user to focus on semantics rather than efficiency.\n* Extensibility. Users can create their own functions to do special-purpose processing.\n",
"user": "user1",
"dateUpdated": "Jan 6, 2017 3:55:03 PM",
"config": {
"colWidth": 12.0,
"enabled": true,
"results": {},
"editorSetting": {
"language": "markdown",
"editOnDblClick": true
},
"editorMode": "ace/mode/markdown",
"editorHide": true,
"tableHide": false
},
"settings": {
"params": {},
"forms": {}
},
"results": {
"code": "SUCCESS",
"msg": [
{
"type": "HTML",
"data": "\u003cdiv class\u003d\"markdown-body\"\u003e\n\u003ch3\u003e\u003ca href\u003d\"http://pig.apache.org/\"\u003eApache Pig\u003c/a\u003e is a platform for analyzing large data sets that consists of a high-level language for expressing data analysis programs, coupled with infrastructure for evaluating these programs. The salient property of Pig programs is that their structure is amenable to substantial parallelization, which in turns enables them to handle very large data sets.\u003c/h3\u003e\n\u003cp\u003ePig\u0026rsquo;s language layer currently consists of a textual language called Pig Latin, which has the following key properties:\u003c/p\u003e\n\u003cul\u003e\n \u003cli\u003eEase of programming. It is trivial to achieve parallel execution of simple, \u0026ldquo;embarrassingly parallel\u0026rdquo; data analysis tasks. Complex tasks comprised of multiple interrelated data transformations are explicitly encoded as data flow sequences, making them easy to write, understand, and maintain.\u003c/li\u003e\n \u003cli\u003eOptimization opportunities. The way in which tasks are encoded permits the system to optimize their execution automatically, allowing the user to focus on semantics rather than efficiency.\u003c/li\u003e\n \u003cli\u003eExtensibility. Users can create their own functions to do special-purpose processing.\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/div\u003e"
}
]
},
"apps": [],
"jobName": "paragraph_1483277502513_1156234051",
"id": "20170101-213142_1565013608",
"dateCreated": "Jan 1, 2017 9:31:42 PM",
"dateStarted": "Jan 6, 2017 3:55:03 PM",
"dateFinished": "Jan 6, 2017 3:55:04 PM",
"status": "FINISHED",
"progressUpdateIntervalMs": 500
},
{
"text": "%md\n\nThis pig tutorial use pig to do the same thing as spark tutorial. The default mode is mapreduce, you can also use other modes like local/tez_local/tez. For mapreduce mode, you need to have hadoop installed and export `HADOOP_CONF_DIR` in `zeppelin-env.sh`\n\nThe tutorial consists of 3 steps.\n\n* Use shell interpreter to download bank.csv and upload it to hdfs\n* use `%pig` to process the data\n* use `%pig.query` to query the data",
"user": "user1",
"dateUpdated": "Jan 6, 2017 3:55:18 PM",
"config": {
"colWidth": 12.0,
"enabled": true,
"results": {},
"editorSetting": {
"language": "markdown",
"editOnDblClick": true
},
"editorMode": "ace/mode/markdown",
"editorHide": true,
"tableHide": false
},
"settings": {
"params": {},
"forms": {}
},
"results": {
"code": "SUCCESS",
"msg": [
{
"type": "HTML",
"data": "\u003cdiv class\u003d\"markdown-body\"\u003e\n\u003cp\u003eThis pig tutorial use pig to do the same thing as spark tutorial. The default mode is mapreduce, you can also use other modes like local/tez_local/tez. For mapreduce mode, you need to have hadoop installed and export \u003ccode\u003eHADOOP_CONF_DIR\u003c/code\u003e in \u003ccode\u003ezeppelin-env.sh\u003c/code\u003e\u003c/p\u003e\n\u003cp\u003eThe tutorial consists of 3 steps.\u003c/p\u003e\n\u003cul\u003e\n \u003cli\u003eUse shell interpreter to download bank.csv and upload it to hdfs\u003c/li\u003e\n \u003cli\u003euse \u003ccode\u003e%pig\u003c/code\u003e to process the data\u003c/li\u003e\n \u003cli\u003euse \u003ccode\u003e%pig.query\u003c/code\u003e to query the data\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/div\u003e"
}
]
},
"apps": [],
"jobName": "paragraph_1483689316217_-629483391",
"id": "20170106-155516_1050601059",
"dateCreated": "Jan 6, 2017 3:55:16 PM",
"dateStarted": "Jan 6, 2017 3:55:18 PM",
"dateFinished": "Jan 6, 2017 3:55:18 PM",
"status": "FINISHED",
"progressUpdateIntervalMs": 500
},
{
"text": "%pig\n\nbankText \u003d load \u0027bank.csv\u0027 using PigStorage(\u0027;\u0027);\nbank \u003d foreach bankText generate $0 as age, $1 as job, $2 as marital, $3 as education, $5 as balance; \nbank \u003d filter bank by age !\u003d \u0027\"age\"\u0027;\nbank \u003d foreach bank generate (int)age, REPLACE(job,\u0027\"\u0027,\u0027\u0027) as job, REPLACE(marital, \u0027\"\u0027, \u0027\u0027) as marital, (int)(REPLACE(balance, \u0027\"\u0027, \u0027\u0027)) as balance;\n\n-- The following statement is optional, it depends on whether your needs.\n-- store bank into \u0027clean_bank.csv\u0027 using PigStorage(\u0027;\u0027);\n\n\n",
"user": "user1",
"dateUpdated": "Jan 6, 2017 3:57:11 PM",
"config": {
"colWidth": 12.0,
"editorMode": "ace/mode/pig",
"results": {},
"enabled": true,
"editorSetting": {
"language": "pig",
"editOnDblClick": false
}
},
"settings": {
"params": {},
"forms": {}
},
"results": {
"code": "SUCCESS",
"msg": []
},
"apps": [],
"jobName": "paragraph_1483277250237_-466604517",
"id": "20161228-140640_1560978333",
"dateCreated": "Jan 1, 2017 9:27:30 PM",
"dateStarted": "Jan 6, 2017 3:57:11 PM",
"dateFinished": "Jan 6, 2017 3:57:13 PM",
"status": "FINISHED",
"progressUpdateIntervalMs": 500
},
{
"text": "%pig.query\n\nbank_data \u003d filter bank by age \u003c 30;\nb \u003d group bank_data by age;\nforeach b generate group, COUNT($1);\n\n",
"user": "user1",
"dateUpdated": "Jan 6, 2017 3:57:15 PM",
"config": {
"colWidth": 4.0,
"editorMode": "ace/mode/pig",
"results": {
"0": {
"graph": {
"mode": "multiBarChart",
"height": 300.0,
"optionOpen": false
},
"helium": {}
}
},
"enabled": true,
"editorSetting": {
"language": "pig",
"editOnDblClick": false
}
},
"settings": {
"params": {},
"forms": {}
},
"results": {
"code": "SUCCESS",
"msg": [
{
"type": "TABLE",
"data": "group\tnull\n19\t4\n20\t3\n21\t7\n22\t9\n23\t20\n24\t24\n25\t44\n26\t77\n27\t94\n28\t103\n29\t97\n"
}
]
},
"apps": [],
"jobName": "paragraph_1483277250238_-465450270",
"id": "20161228-140730_1903342877",
"dateCreated": "Jan 1, 2017 9:27:30 PM",
"dateStarted": "Jan 6, 2017 3:57:15 PM",
"dateFinished": "Jan 6, 2017 3:57:16 PM",
"status": "FINISHED",
"progressUpdateIntervalMs": 500
},
{
"text": "%pig.query\n\nbank_data \u003d filter bank by age \u003c ${maxAge\u003d40};\nb \u003d group bank_data by age;\nforeach b generate group, COUNT($1);",
"user": "user1",
"dateUpdated": "Jan 6, 2017 3:57:18 PM",
"config": {
"colWidth": 4.0,
"editorMode": "ace/mode/pig",
"results": {
"0": {
"graph": {
"mode": "pieChart",
"height": 300.0,
"optionOpen": false
},
"helium": {}
}
},
"enabled": true,
"editorSetting": {
"language": "pig",
"editOnDblClick": false
}
},
"settings": {
"params": {
"maxAge": "36"
},
"forms": {
"maxAge": {
"name": "maxAge",
"defaultValue": "40",
"hidden": false
}
}
},
"results": {
"code": "SUCCESS",
"msg": [
{
"type": "TABLE",
"data": "group\tnull\n19\t4\n20\t3\n21\t7\n22\t9\n23\t20\n24\t24\n25\t44\n26\t77\n27\t94\n28\t103\n29\t97\n30\t150\n31\t199\n32\t224\n33\t186\n34\t231\n35\t180\n"
}
]
},
"apps": [],
"jobName": "paragraph_1483277250239_-465835019",
"id": "20161228-154918_1551591203",
"dateCreated": "Jan 1, 2017 9:27:30 PM",
"dateStarted": "Jan 6, 2017 3:57:18 PM",
"dateFinished": "Jan 6, 2017 3:57:19 PM",
"status": "FINISHED",
"progressUpdateIntervalMs": 500
},
{
"text": "%pig.query\n\nbank_data \u003d filter bank by marital\u003d\u003d\u0027${marital\u003dsingle,single|divorced|married}\u0027;\nb \u003d group bank_data by age;\nforeach b generate group, COUNT($1) as c;\n\n\n",
"user": "user1",
"dateUpdated": "Jan 6, 2017 3:57:24 PM",
"config": {
"colWidth": 4.0,
"editorMode": "ace/mode/pig",
"results": {
"0": {
"graph": {
"mode": "scatterChart",
"height": 300.0,
"optionOpen": false
},
"helium": {}
}
},
"enabled": true,
"editorSetting": {
"language": "pig",
"editOnDblClick": false
}
},
"settings": {
"params": {
"marital": "married"
},
"forms": {
"marital": {
"name": "marital",
"defaultValue": "single",
"options": [
{
"value": "single"
},
{
"value": "divorced"
},
{
"value": "married"
}
],
"hidden": false
}
}
},
"results": {
"code": "SUCCESS",
"msg": [
{
"type": "TABLE",
"data": "group\tc\n23\t3\n24\t11\n25\t11\n26\t18\n27\t26\n28\t23\n29\t37\n30\t56\n31\t104\n32\t105\n33\t103\n34\t142\n35\t109\n36\t117\n37\t100\n38\t99\n39\t88\n40\t105\n41\t97\n42\t91\n43\t79\n44\t68\n45\t76\n46\t82\n47\t78\n48\t91\n49\t87\n50\t74\n51\t63\n52\t66\n53\t75\n54\t56\n55\t68\n56\t50\n57\t78\n58\t67\n59\t56\n60\t36\n61\t15\n62\t5\n63\t7\n64\t6\n65\t4\n66\t7\n67\t5\n68\t1\n69\t5\n70\t5\n71\t5\n72\t4\n73\t6\n74\t2\n75\t3\n76\t1\n77\t5\n78\t2\n79\t3\n80\t6\n81\t1\n83\t2\n86\t1\n87\t1\n"
}
]
},
"apps": [],
"jobName": "paragraph_1483277250240_-480070728",
"id": "20161228-142259_575675591",
"dateCreated": "Jan 1, 2017 9:27:30 PM",
"dateStarted": "Jan 6, 2017 3:57:20 PM",
"dateFinished": "Jan 6, 2017 3:57:20 PM",
"status": "FINISHED",
"progressUpdateIntervalMs": 500
},
{
"text": "%pig\n",
"dateUpdated": "Jan 1, 2017 9:27:30 PM",
"config": {},
"settings": {
"params": {},
"forms": {}
},
"apps": [],
"jobName": "paragraph_1483277250240_-480070728",
"id": "20161228-155036_1854903164",
"dateCreated": "Jan 1, 2017 9:27:30 PM",
"status": "READY",
"errorMessage": "",
"progressUpdateIntervalMs": 500
}
],
"name": "Zeppelin Tutorial/Using Pig for querying data",
"id": "2C57UKYWR",
"angularObjects": {
"2C3DR183X:shared_process": [],
"2C5VH924X:shared_process": [],
"2C686X8ZH:shared_process": [],
"2C66Z9XPQ:shared_process": [],
"2C3JKFMJU:shared_process": [],
"2C69WE69N:shared_process": [],
"2C3RWCVAG:shared_process": [],
"2C4HKDCQW:shared_process": [],
"2C4BJDRRZ:shared_process": [],
"2C6V3D44K:shared_process": [],
"2C3VECEG2:shared_process": [],
"2C5SRRXHM:shared_process": [],
"2C5DCRVGM:shared_process": [],
"2C66GE1VB:shared_process": [],
"2C3PTPMUH:shared_process": [],
"2C48Y7FSJ:shared_process": [],
"2C4ZD49PF:shared_process": [],
"2C63XW4XE:shared_process": [],
"2C4UB1UZA:shared_process": [],
"2C5S1R21W:shared_process": [],
"2C3SQSB7V:shared_process": []
},
"config": {},
"info": {}
}

View file

@ -177,6 +177,13 @@
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<forkMode>always</forkMode>
</configuration>
</plugin>
</plugins>
</build>
</project>

View file

@ -17,6 +17,7 @@
package org.apache.zeppelin.pig;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.pig.PigServer;
import org.apache.pig.backend.BackendException;
@ -97,4 +98,27 @@ public abstract class BasePigInterpreter extends Interpreter {
}
public abstract PigServer getPigServer();
/**
* Use paragraph title if it exists, else use the last line of pig script.
* @param cmd
* @param context
* @return
*/
protected String createJobName(String cmd, InterpreterContext context) {
String pTitle = context.getParagraphTitle();
if (!StringUtils.isBlank(pTitle)) {
return pTitle;
} else {
// use the last non-empty line of pig script as the job name.
String[] lines = cmd.split("\n");
for (int i = lines.length - 1; i >= 0; --i) {
if (!StringUtils.isBlank(lines[i])) {
return lines[i];
}
}
// in case all the lines are empty, but usually it is almost impossible
return "empty_job";
}
}
}

View file

@ -18,6 +18,7 @@
package org.apache.zeppelin.pig;
import org.apache.commons.io.output.ByteArrayOutputStream;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.apache.pig.PigServer;
import org.apache.pig.impl.logicalLayer.FrontendException;
@ -58,6 +59,12 @@ public class PigInterpreter extends BasePigInterpreter {
}
try {
pigServer = new PigServer(execType);
for (Map.Entry entry : getProperty().entrySet()) {
if (!entry.getKey().toString().startsWith("zeppelin.")) {
pigServer.getPigContext().getProperties().setProperty(entry.getKey().toString(),
entry.getValue().toString());
}
}
} catch (IOException e) {
LOGGER.error("Fail to initialize PigServer", e);
throw new RuntimeException("Fail to initialize PigServer", e);
@ -78,6 +85,7 @@ public class PigInterpreter extends BasePigInterpreter {
ByteArrayOutputStream bytesOutput = new ByteArrayOutputStream();
File tmpFile = null;
try {
pigServer.setJobName(createJobName(cmd, contextInterpreter));
tmpFile = PigUtils.createTempPigScript(cmd);
System.setOut(new PrintStream(bytesOutput));
// each thread should its own ScriptState & PigStats

View file

@ -78,6 +78,7 @@ public class PigQueryInterpreter extends BasePigInterpreter {
StringBuilder resultBuilder = new StringBuilder("%table ");
try {
pigServer.setJobName(createJobName(st, context));
File tmpScriptFile = PigUtils.createTempPigScript(queries);
// each thread should its own ScriptState & PigStats
ScriptState.start(pigServer.getPigContext().getExecutionEngine().instantiateScriptState());
@ -93,7 +94,7 @@ public class PigQueryInterpreter extends BasePigInterpreter {
if (schemaKnown) {
for (int i = 0; i < schema.size(); ++i) {
Schema.FieldSchema field = schema.getField(i);
resultBuilder.append(field.alias);
resultBuilder.append(field.alias != null ? field.alias : "col_" + i);
if (i != schema.size() - 1) {
resultBuilder.append("\t");
}

View file

@ -171,6 +171,15 @@ public class PigUtils {
private static String extractFromTezPigStats(TezPigScriptStats stats) {
try {
if (stats.getReturnCode() == PigRunner.ReturnCode.UNKNOWN) {
LOGGER.warn("unknown return code, can't display the results");
return null;
}
if (stats.getPigContext() == null) {
LOGGER.warn("unknown exec type, don't display the results");
return null;
}
Field userIdField = PigStats.class.getDeclaredField("userId");
userIdField.setAccessible(true);
String userId = (String) (userIdField.get(stats));

View file

@ -41,10 +41,10 @@ public class PigInterpreterTest {
private PigInterpreter pigInterpreter;
private InterpreterContext context;
@Before
public void setUp() {
private void setUpLocal(boolean includeJobStats) {
Properties properties = new Properties();
properties.put("zeppelin.pig.execType", "local");
properties.put("zeppelin.pig.includeJobStats", includeJobStats + "");
pigInterpreter = new PigInterpreter(properties);
pigInterpreter.open();
context = new InterpreterContext(null, "paragraph_id", null, null, null, null, null, null, null, null,
@ -58,6 +58,8 @@ public class PigInterpreterTest {
@Test
public void testBasics() throws IOException {
setUpLocal(false);
String content = "1\tandy\n"
+ "2\tpeter\n";
File tmpFile = File.createTempFile("zeppelin", "test");
@ -101,11 +103,7 @@ public class PigInterpreterTest {
@Test
public void testIncludeJobStats() throws IOException {
Properties properties = new Properties();
properties.put("zeppelin.pig.execType", "local");
properties.put("zeppelin.pig.includeJobStats", "true");
pigInterpreter = new PigInterpreter(properties);
pigInterpreter.open();
setUpLocal(true);
String content = "1\tandy\n"
+ "2\tpeter\n";
@ -152,4 +150,5 @@ public class PigInterpreterTest {
assertTrue(result.message().get(0).getData().contains("Counters:"));
assertTrue(result.message().get(0).getData().contains("Input path does not exist"));
}
}

View file

@ -0,0 +1,157 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.pig;
import org.apache.commons.io.IOUtils;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.InterpreterResult.Code;
import org.apache.zeppelin.interpreter.InterpreterResult.Type;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Properties;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
public class PigInterpreterTezTest {
private PigInterpreter pigInterpreter;
private InterpreterContext context;
public void setUpTez(boolean includeJobStats) {
Properties properties = new Properties();
properties.put("zeppelin.pig.execType", "tez_local");
properties.put("zeppelin.pig.includeJobStats", includeJobStats + "");
properties.put("tez.queue.name", "test");
pigInterpreter = new PigInterpreter(properties);
pigInterpreter.open();
context = new InterpreterContext(null, "paragraph_id", null, null, null, null, null, null, null, null,
null, null);
}
@After
public void tearDown() {
pigInterpreter.close();
}
@Test
public void testBasics() throws IOException {
setUpTez(false);
assertEquals("test",
pigInterpreter.getPigServer().getPigContext().getProperties()
.getProperty("tez.queue.name"));
String content = "1\tandy\n"
+ "2\tpeter\n";
File tmpFile = File.createTempFile("zeppelin", "test");
FileWriter writer = new FileWriter(tmpFile);
IOUtils.write(content, writer);
writer.close();
// simple pig script using dump
String pigscript = "a = load '" + tmpFile.getAbsolutePath() + "';"
+ "dump a;";
InterpreterResult result = pigInterpreter.interpret(pigscript, context);
assertEquals(Type.TEXT, result.message().get(0).getType());
assertEquals(Code.SUCCESS, result.code());
assertTrue(result.message().get(0).getData().contains("(1,andy)\n(2,peter)"));
// describe
pigscript = "a = load '" + tmpFile.getAbsolutePath() + "' as (id: int, name: bytearray);"
+ "describe a;";
result = pigInterpreter.interpret(pigscript, context);
assertEquals(Type.TEXT, result.message().get(0).getType());
assertEquals(Code.SUCCESS, result.code());
assertTrue(result.message().get(0).getData().contains("a: {id: int,name: bytearray}"));
// syntax error (compilation error)
pigscript = "a = loa '" + tmpFile.getAbsolutePath() + "';"
+ "describe a;";
result = pigInterpreter.interpret(pigscript, context);
assertEquals(Type.TEXT, result.message().get(0).getType());
assertEquals(Code.ERROR, result.code());
assertTrue(result.message().get(0).getData().contains("Syntax error, unexpected symbol at or near 'a'"));
// syntax error
pigscript = "a = load '" + tmpFile.getAbsolutePath() + "';"
+ "foreach a generate $0;";
result = pigInterpreter.interpret(pigscript, context);
assertEquals(Type.TEXT, result.message().get(0).getType());
assertEquals(Code.ERROR, result.code());
assertTrue(result.message().get(0).getData().contains("expecting one of"));
}
@Test
public void testIncludeJobStats() throws IOException {
setUpTez(true);
String content = "1\tandy\n"
+ "2\tpeter\n";
File tmpFile = File.createTempFile("zeppelin", "test");
FileWriter writer = new FileWriter(tmpFile);
IOUtils.write(content, writer);
writer.close();
// simple pig script using dump
String pigscript = "a = load '" + tmpFile.getAbsolutePath() + "';"
+ "dump a;";
InterpreterResult result = pigInterpreter.interpret(pigscript, context);
assertEquals(Type.TEXT, result.message().get(0).getType());
assertEquals(Code.SUCCESS, result.code());
assertTrue(result.message().get(0).getData().contains("Vertex Stats"));
assertTrue(result.message().get(0).getData().contains("(1,andy)\n(2,peter)"));
// describe
pigscript = "a = load '" + tmpFile.getAbsolutePath() + "' as (id: int, name: bytearray);"
+ "describe a;";
result = pigInterpreter.interpret(pigscript, context);
assertEquals(Type.TEXT, result.message().get(0).getType());
assertEquals(Code.SUCCESS, result.code());
// no job is launched, so no jobStats
assertTrue(!result.message().get(0).getData().contains("Vertex Stats"));
assertTrue(result.message().get(0).getData().contains("a: {id: int,name: bytearray}"));
// syntax error (compilation error)
pigscript = "a = loa '" + tmpFile.getAbsolutePath() + "';"
+ "describe a;";
result = pigInterpreter.interpret(pigscript, context);
assertEquals(Type.TEXT, result.message().get(0).getType());
assertEquals(Code.ERROR, result.code());
// no job is launched, so no jobStats
assertTrue(!result.message().get(0).getData().contains("Vertex Stats"));
assertTrue(result.message().get(0).getData().contains("Syntax error, unexpected symbol at or near 'a'"));
// execution error
pigscript = "a = load 'invalid_path';"
+ "dump a;";
result = pigInterpreter.interpret(pigscript, context);
assertEquals(Type.TEXT, result.message().get(0).getType());
assertEquals(Code.ERROR, result.code());
assertTrue(!result.message().get(0).getData().contains("Vertex Stats"));
assertTrue(result.message().get(0).getData().contains("Input path does not exist"));
}
}

View file

@ -108,6 +108,13 @@ public class PigQueryInterpreterTest {
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals("gender\tcount\nmale\t2\nfemale\t1\n", result.message().get(0).getData());
// generate alias with unknown schema
query = "b = group a by gender;\nforeach b generate group, COUNT($1);";
result = pigQueryInterpreter.interpret(query, context);
assertEquals(InterpreterResult.Type.TABLE, result.message().get(0).getType());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals("group\tcol_1\nmale\t2\nfemale\t1\n", result.message().get(0).getData());
// syntax error in PigQueryInterpereter
query = "b = group a by invalid_column;\nforeach b generate group as gender, COUNT($1) as count;";
result = pigQueryInterpreter.interpret(query, context);

View file

@ -0,0 +1,3 @@
<configuration>
</configuration>

10
pom.xml
View file

@ -726,6 +726,13 @@
</modules>
</profile>
<profile>
<id>helium-dev</id>
<modules>
<module>helium-dev</module>
</modules>
</profile>
<profile>
<id>build-distr</id>
<activation>
@ -933,7 +940,8 @@
<exclude>docs/_site/**</exclude>
<exclude>docs/Gemfile.lock</exclude>
<exclude>**/horizontalbar_mockdata.txt</exclude>
<!-- package.json -->
<exclude>**/package.json</exclude>
<!-- compiled R packages (binaries) -->
<exclude>R/lib/**</exclude>

View file

@ -16,14 +16,16 @@
*/
package org.apache.zeppelin.python;
import org.apache.commons.lang.StringUtils;
import org.apache.zeppelin.interpreter.*;
import org.apache.zeppelin.interpreter.InterpreterResult.Code;
import org.apache.zeppelin.interpreter.InterpreterResult.Type;
import org.apache.zeppelin.scheduler.Scheduler;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.*;
import java.util.HashMap;
import java.util.Properties;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -36,11 +38,17 @@ public class PythonCondaInterpreter extends Interpreter {
public static final String CONDA_PYTHON_PATH = "/bin/python";
public static final String DEFAULT_ZEPPELIN_PYTHON = "python";
Pattern condaEnvListPattern = Pattern.compile("([^\\s]*)[\\s*]*\\s(.*)");
Pattern listPattern = Pattern.compile("env\\s*list\\s?");
Pattern activatePattern = Pattern.compile("activate\\s*(.*)");
Pattern deactivatePattern = Pattern.compile("deactivate");
Pattern helpPattern = Pattern.compile("help");
public static final Pattern PATTERN_OUTPUT_ENV_LIST = Pattern.compile("([^\\s]*)[\\s*]*\\s(.*)");
public static final Pattern PATTERN_COMMAND_ENV_LIST = Pattern.compile("env\\s*list\\s?");
public static final Pattern PATTERN_COMMAND_ENV = Pattern.compile("env\\s*(.*)");
public static final Pattern PATTERN_COMMAND_LIST = Pattern.compile("list");
public static final Pattern PATTERN_COMMAND_CREATE = Pattern.compile("create\\s*(.*)");
public static final Pattern PATTERN_COMMAND_ACTIVATE = Pattern.compile("activate\\s*(.*)");
public static final Pattern PATTERN_COMMAND_DEACTIVATE = Pattern.compile("deactivate");
public static final Pattern PATTERN_COMMAND_INSTALL = Pattern.compile("install\\s*(.*)");
public static final Pattern PATTERN_COMMAND_UNINSTALL = Pattern.compile("uninstall\\s*(.*)");
public static final Pattern PATTERN_COMMAND_HELP = Pattern.compile("help");
public static final Pattern PATTERN_COMMAND_INFO = Pattern.compile("info");
public PythonCondaInterpreter(Properties property) {
super(property);
@ -59,33 +67,53 @@ public class PythonCondaInterpreter extends Interpreter {
@Override
public InterpreterResult interpret(String st, InterpreterContext context) {
InterpreterOutput out = context.out;
Matcher activateMatcher = PATTERN_COMMAND_ACTIVATE.matcher(st);
Matcher createMatcher = PATTERN_COMMAND_CREATE.matcher(st);
Matcher installMatcher = PATTERN_COMMAND_INSTALL.matcher(st);
Matcher uninstallMatcher = PATTERN_COMMAND_UNINSTALL.matcher(st);
Matcher envMatcher = PATTERN_COMMAND_ENV.matcher(st);
Matcher listMatcher = listPattern.matcher(st);
Matcher activateMatcher = activatePattern.matcher(st);
Matcher deactivateMatcher = deactivatePattern.matcher(st);
Matcher helpMatcher = helpPattern.matcher(st);
if (st == null || st.isEmpty() || listMatcher.matches()) {
listEnv(out, getCondaEnvs());
return new InterpreterResult(InterpreterResult.Code.SUCCESS);
} else if (activateMatcher.matches()) {
String envName = activateMatcher.group(1);
changePythonEnvironment(envName);
restartPythonProcess();
return new InterpreterResult(InterpreterResult.Code.SUCCESS, "\"" + envName + "\" activated");
} else if (deactivateMatcher.matches()) {
changePythonEnvironment(null);
restartPythonProcess();
return new InterpreterResult(InterpreterResult.Code.SUCCESS, "Deactivated");
} else if (helpMatcher.matches()) {
printUsage(out);
return new InterpreterResult(InterpreterResult.Code.SUCCESS);
} else {
return new InterpreterResult(InterpreterResult.Code.ERROR, "Not supported command: " + st);
try {
if (PATTERN_COMMAND_ENV_LIST.matcher(st).matches()) {
String result = runCondaEnvList();
return new InterpreterResult(Code.SUCCESS, Type.HTML, result);
} else if (envMatcher.matches()) {
// `envMatcher` should be used after `listEnvMatcher`
String result = runCondaEnv(getRestArgsFromMatcher(envMatcher));
return new InterpreterResult(Code.SUCCESS, Type.HTML, result);
} else if (PATTERN_COMMAND_LIST.matcher(st).matches()) {
String result = runCondaList();
return new InterpreterResult(Code.SUCCESS, Type.HTML, result);
} else if (createMatcher.matches()) {
String result = runCondaCreate(getRestArgsFromMatcher(createMatcher));
return new InterpreterResult(Code.SUCCESS, Type.HTML, result);
} else if (activateMatcher.matches()) {
String envName = activateMatcher.group(1).trim();
return runCondaActivate(envName);
} else if (PATTERN_COMMAND_DEACTIVATE.matcher(st).matches()) {
return runCondaDeactivate();
} else if (installMatcher.matches()) {
String result = runCondaInstall(getRestArgsFromMatcher(installMatcher));
return new InterpreterResult(Code.SUCCESS, Type.HTML, result);
} else if (uninstallMatcher.matches()) {
String result = runCondaUninstall(getRestArgsFromMatcher(uninstallMatcher));
return new InterpreterResult(Code.SUCCESS, Type.HTML, result);
} else if (st == null || PATTERN_COMMAND_HELP.matcher(st).matches()) {
runCondaHelp(out);
return new InterpreterResult(Code.SUCCESS);
} else if (PATTERN_COMMAND_INFO.matcher(st).matches()) {
String result = runCondaInfo();
return new InterpreterResult(Code.SUCCESS, Type.HTML, result);
} else {
return new InterpreterResult(Code.ERROR, "Not supported command: " + st);
}
} catch (RuntimeException | IOException | InterruptedException e) {
throw new InterpreterException(e);
}
}
private void changePythonEnvironment(String envName) {
private void changePythonEnvironment(String envName)
throws IOException, InterruptedException {
PythonInterpreter python = getPythonInterpreter();
String binPath = null;
if (envName == null) {
@ -94,7 +122,7 @@ public class PythonCondaInterpreter extends Interpreter {
binPath = DEFAULT_ZEPPELIN_PYTHON;
}
} else {
HashMap<String, String> envList = getCondaEnvs();
Map<String, String> envList = getCondaEnvs();
for (String name : envList.keySet()) {
if (envName.equals(name)) {
binPath = envList.get(name) + CONDA_PYTHON_PATH;
@ -114,7 +142,8 @@ public class PythonCondaInterpreter extends Interpreter {
protected PythonInterpreter getPythonInterpreter() {
LazyOpenInterpreter lazy = null;
PythonInterpreter python = null;
Interpreter p = getInterpreterInTheSameSessionByClassName(PythonInterpreter.class.getName());
Interpreter p =
getInterpreterInTheSameSessionByClassName(PythonInterpreter.class.getName());
while (p instanceof WrappedInterpreter) {
if (p instanceof LazyOpenInterpreter) {
@ -130,59 +159,75 @@ public class PythonCondaInterpreter extends Interpreter {
return python;
}
private HashMap getCondaEnvs() {
HashMap envList = null;
public static String runCondaCommandForTextOutput(String title, List<String> commands)
throws IOException, InterruptedException {
StringBuilder sb = createStringBuilder();
try {
int exit = runCommand(sb, "conda", "env", "list");
if (exit == 0) {
envList = new HashMap();
String[] lines = sb.toString().split("\n");
for (String s : lines) {
if (s == null || s.isEmpty() || s.startsWith("#")) {
continue;
}
Matcher match = condaEnvListPattern.matcher(s);
String result = runCommand(commands);
return wrapCondaBasicOutputStyle(title, result);
}
if (!match.matches()) {
continue;
}
envList.put(match.group(1), match.group(2));
}
}
} catch (IOException | InterruptedException e) {
throw new InterpreterException(e);
}
private String runCondaCommandForTableOutput(String title, List<String> commands)
throws IOException, InterruptedException {
StringBuilder sb = new StringBuilder();
String result = runCommand(commands);
// use table output for pretty output
Map<String, String> envPerName = parseCondaCommonStdout(result);
return wrapCondaTableOutputStyle(title, envPerName);
}
protected Map<String, String> getCondaEnvs()
throws IOException, InterruptedException {
String result = runCommand("conda", "env", "list");
Map<String, String> envList = parseCondaCommonStdout(result);
return envList;
}
private void listEnv(InterpreterOutput out, HashMap<String, String> envList) {
try {
out.setType(InterpreterResult.Type.HTML);
out.write("<h4>Conda environments</h4>\n");
// start table
out.write("<div style=\"display:table\">\n");
for (String name : envList.keySet()) {
String path = envList.get(name);
out.write(String.format("<div style=\"display:table-row\">" +
"<div style=\"display:table-cell;width:150px\">%s</div>" +
"<div style=\"display:table-cell;\">%s</div>" +
"</div>\n",
name, path));
}
// end table
out.write("</div><br />\n");
out.write("<small><code>%python.conda help</code> for the usage</small>\n");
} catch (IOException e) {
throw new InterpreterException(e);
}
private String runCondaEnvList() throws IOException, InterruptedException {
return wrapCondaTableOutputStyle("Environment List", getCondaEnvs());
}
private String runCondaEnv(List<String> restArgs)
throws IOException, InterruptedException {
private void printUsage(InterpreterOutput out) {
restArgs.add(0, "conda");
restArgs.add(1, "env");
restArgs.add(3, "--yes"); // --yes should be inserted after command
return runCondaCommandForTextOutput(null, restArgs);
}
private InterpreterResult runCondaActivate(String envName)
throws IOException, InterruptedException {
if (null == envName || envName.isEmpty()) {
return new InterpreterResult(Code.ERROR, "Env name should be specified");
}
changePythonEnvironment(envName);
restartPythonProcess();
return new InterpreterResult(Code.SUCCESS, "'" + envName + "' is activated");
}
private InterpreterResult runCondaDeactivate()
throws IOException, InterruptedException {
changePythonEnvironment(null);
restartPythonProcess();
return new InterpreterResult(Code.SUCCESS, "Deactivated");
}
private String runCondaList() throws IOException, InterruptedException {
List<String> commands = new ArrayList<String>();
commands.add("conda");
commands.add("list");
return runCondaCommandForTableOutput("Installed Package List", commands);
}
private void runCondaHelp(InterpreterOutput out) {
try {
out.setType(InterpreterResult.Type.HTML);
out.writeResource("output_templates/conda_usage.html");
@ -191,6 +236,98 @@ public class PythonCondaInterpreter extends Interpreter {
}
}
private String runCondaInfo() throws IOException, InterruptedException {
List<String> commands = new ArrayList<String>();
commands.add("conda");
commands.add("info");
return runCondaCommandForTextOutput("Conda Information", commands);
}
private String runCondaCreate(List<String> restArgs)
throws IOException, InterruptedException {
restArgs.add(0, "conda");
restArgs.add(1, "create");
restArgs.add(2, "--yes");
return runCondaCommandForTextOutput("Environment Creation", restArgs);
}
private String runCondaInstall(List<String> restArgs)
throws IOException, InterruptedException {
restArgs.add(0, "conda");
restArgs.add(1, "install");
restArgs.add(2, "--yes");
return runCondaCommandForTextOutput("Package Installation", restArgs);
}
private String runCondaUninstall(List<String> restArgs)
throws IOException, InterruptedException {
restArgs.add(0, "conda");
restArgs.add(1, "uninstall");
restArgs.add(2, "--yes");
return runCondaCommandForTextOutput("Package Uninstallation", restArgs);
}
public static String wrapCondaBasicOutputStyle(String title, String content) {
StringBuilder sb = new StringBuilder();
if (null != title && !title.isEmpty()) {
sb.append("<h4>").append(title).append("</h4>\n")
.append("</div><br />\n");
}
sb.append("<div style=\"white-space:pre-wrap;\">\n")
.append(content)
.append("</div>");
return sb.toString();
}
public static String wrapCondaTableOutputStyle(String title, Map<String, String> kv) {
StringBuilder sb = new StringBuilder();
if (null != title && !title.isEmpty()) {
sb.append("<h4>").append(title).append("</h4>\n");
}
sb.append("<div style=\"display:table;white-space:pre-wrap;\">\n");
for (String name : kv.keySet()) {
String path = kv.get(name);
sb.append(String.format("<div style=\"display:table-row\">" +
"<div style=\"display:table-cell;width:150px\">%s</div>" +
"<div style=\"display:table-cell;\">%s</div>" +
"</div>\n",
name, path));
}
sb.append("</div>\n");
return sb.toString();
}
public static Map<String, String> parseCondaCommonStdout(String out)
throws IOException, InterruptedException {
Map<String, String> kv = new LinkedHashMap<String, String>();
String[] lines = out.split("\n");
for (String s : lines) {
if (s == null || s.isEmpty() || s.startsWith("#")) {
continue;
}
Matcher match = PATTERN_OUTPUT_ENV_LIST.matcher(s);
if (!match.matches()) {
continue;
}
kv.put(match.group(1), match.group(2));
}
return kv;
}
@Override
public void cancel(InterpreterContext context) {
@ -206,7 +343,6 @@ public class PythonCondaInterpreter extends Interpreter {
return 0;
}
/**
* Use python interpreter's scheduler.
* To make sure %python.conda paragraph and %python paragraph runs sequentially
@ -221,9 +357,12 @@ public class PythonCondaInterpreter extends Interpreter {
}
}
protected int runCommand(StringBuilder sb, String ... command)
public static String runCommand(List<String> commands)
throws IOException, InterruptedException {
ProcessBuilder builder = new ProcessBuilder(command);
StringBuilder sb = new StringBuilder();
ProcessBuilder builder = new ProcessBuilder(commands);
builder.redirectErrorStream(true);
Process process = builder.start();
InputStream stdout = process.getInputStream();
@ -234,10 +373,28 @@ public class PythonCondaInterpreter extends Interpreter {
sb.append("\n");
}
int r = process.waitFor(); // Let the process finish.
return r;
if (r != 0) {
throw new RuntimeException("Failed to execute `" +
StringUtils.join(commands, " ") + "` exited with " + r);
}
return sb.toString();
}
protected StringBuilder createStringBuilder() {
return new StringBuilder();
public static String runCommand(String ... command)
throws IOException, InterruptedException {
List<String> list = new ArrayList<>(command.length);
for (String arg : command) {
list.add(arg);
}
return runCommand(list);
}
public static List<String> getRestArgsFromMatcher(Matcher m) {
// Arrays.asList just returns fixed-size, so we should use ctor instead of
return new ArrayList<>(Arrays.asList(m.group(1).split(" ")));
}
}

View file

@ -72,9 +72,9 @@ public class PythonInterpreter extends Interpreter {
// Add matplotlib display hook
InterpreterGroup intpGroup = getInterpreterGroup();
if (intpGroup != null && intpGroup.getInterpreterHookRegistry() != null) {
registerHook(HookType.POST_EXEC_DEV, "z._displayhook()");
registerHook(HookType.POST_EXEC_DEV, "\nz._displayhook()");
}
// Add zeppelin-bundled libs to PYTHONPATH
setPythonPath("../interpreter/lib/python:$PYTHONPATH");
LOG.info("Starting Python interpreter ---->");

View file

@ -110,10 +110,13 @@ public class PythonProcess {
writer.println("\"" + STATEMENT_END + "\"");
StringBuilder output = new StringBuilder();
String line = null;
while (!(line = reader.readLine()).contains(STATEMENT_END)) {
while ((line = reader.readLine()) != null &&
!line.contains(STATEMENT_END)) {
logger.debug("Read line from python shell : " + line);
output.append(line + "\n");
}
return output.toString();
}

View file

@ -12,6 +12,18 @@ See the License for the specific language governing permissions and
limitations under the License.
-->
<h4>Usage</h4>
<div>
Get the Conda Infomation
<pre>%python.conda info</pre>
</div>
<div>
List the Conda environments
<pre>%python.conda env list</pre>
</div>
<div>
Create a conda enviornment
<pre>%python.conda create --name [ENV NAME]</pre>
</div>
<div>
Activate an environment (python interpreter will be restarted)
<pre>%python.conda activate [ENV NAME]</pre>
@ -21,7 +33,14 @@ limitations under the License.
<pre>%python.conda deactivate</pre>
</div>
<div>
List the Conda environments
<pre>%python.conda</pre>
Get installed package list inside the current environment
<pre>%python.conda list</pre>
</div>
<div>
Install Package
<pre>%python.conda install [PACKAGE NAME]</pre>
</div>
<div>
Uninstall Package
<pre>%python.conda uninstall [PACKAGE NAME]</pre>
</div>

View file

@ -23,13 +23,11 @@ import org.junit.Before;
import org.junit.Test;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Properties;
import java.util.*;
import java.util.regex.Matcher;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.mockito.Matchers.*;
import static org.mockito.Mockito.*;
public class PythonCondaInterpreterTest {
@ -49,35 +47,32 @@ public class PythonCondaInterpreterTest {
doReturn(python).when(conda).getPythonInterpreter();
}
private void setCondaEnvs() throws IOException, InterruptedException {
StringBuilder sb = new StringBuilder();
sb.append("#comment\n\nenv1 * /path1\nenv2\t/path2\n");
doReturn(sb).when(conda).createStringBuilder();
doReturn(0).when(conda)
.runCommand(any(StringBuilder.class), anyString(), anyString(), anyString());
private void setMockCondaEnvList() throws IOException, InterruptedException {
Map<String, String> envList = new LinkedHashMap<String, String>();
envList.put("env1", "/path1");
envList.put("env2", "/path2");
doReturn(envList).when(conda).getCondaEnvs();
}
@Test
public void testListEnv() throws IOException, InterruptedException {
setCondaEnvs();
setMockCondaEnvList();
// list available env
InterpreterContext context = getInterpreterContext();
InterpreterResult result = conda.interpret("", context);
InterpreterResult result = conda.interpret("env list", context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
context.out.flush();
String out = new String(context.out.toByteArray());
assertTrue(out.contains(">env1<"));
assertTrue(out.contains(">/path1<"));
assertTrue(out.contains(">env2<"));
assertTrue(out.contains(">/path2<"));
assertTrue(result.toString().contains(">env1<"));
assertTrue(result.toString().contains("/path1<"));
assertTrue(result.toString().contains(">env2<"));
assertTrue(result.toString().contains("/path2<"));
}
@Test
public void testActivateEnv() throws IOException, InterruptedException {
setCondaEnvs();
setMockCondaEnvList();
InterpreterContext context = getInterpreterContext();
conda.interpret("activate env1", context);
verify(python, times(1)).open();
@ -94,6 +89,34 @@ public class PythonCondaInterpreterTest {
verify(python).setPythonCommand("python");
}
@Test
public void testParseCondaCommonStdout()
throws IOException, InterruptedException {
StringBuilder sb = new StringBuilder()
.append("# comment1\n")
.append("# comment2\n")
.append("env1 /location1\n")
.append("env2 /location2\n");
Map<String, String> locationPerEnv =
PythonCondaInterpreter.parseCondaCommonStdout(sb.toString());
assertEquals("/location1", locationPerEnv.get("env1"));
assertEquals("/location2", locationPerEnv.get("env2"));
}
@Test
public void testGetRestArgsFromMatcher() {
Matcher m =
PythonCondaInterpreter.PATTERN_COMMAND_ENV.matcher("env remove --name test --yes");
m.matches();
List<String> restArgs = PythonCondaInterpreter.getRestArgsFromMatcher(m);
List<String> expected = Arrays.asList(new String[]{"remove", "--name", "test", "--yes"});
assertEquals(expected, restArgs);
}
private InterpreterContext getInterpreterContext() {
return new InterpreterContext(
"noteId",

View file

@ -45,6 +45,7 @@ import org.apache.zeppelin.interpreter.ClassloaderInterpreter;
import org.apache.zeppelin.interpreter.Interpreter;
import org.apache.zeppelin.interpreter.InterpreterGroup;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.mockito.invocation.InvocationOnMock;
@ -62,6 +63,7 @@ import org.slf4j.LoggerFactory;
public class PythonInterpreterTest {
private static final Logger LOG = LoggerFactory.getLogger(PythonProcess.class);
PythonInterpreter zeppelinPythonInterpreter = null;
PythonInterpreter pythonInterpreter = null;
PythonProcess mockPythonProcess;
String cmdHistory;
@ -88,6 +90,7 @@ public class PythonInterpreterTest {
// python interpreter
pythonInterpreter = spy(new PythonInterpreter(getPythonTestProperties()));
zeppelinPythonInterpreter = new PythonInterpreter(getPythonTestProperties());
// create interpreter group
InterpreterGroup group = new InterpreterGroup();
@ -99,6 +102,12 @@ public class PythonInterpreterTest {
when(mockPythonProcess.sendAndGetResult(eq("\n\nimport py4j\n"))).thenReturn("ImportError");
}
@After
public void afterTest() throws IOException {
pythonInterpreter.close();
zeppelinPythonInterpreter.close();
}
@Test
public void testOpenInterpreter() {
pythonInterpreter.open();
@ -172,6 +181,18 @@ public class PythonInterpreterTest {
assertEquals("%text print a", result.message().get(0).toString());
}
@Test
public void testInterpretInvalidSyntax() {
zeppelinPythonInterpreter.open();
InterpreterResult result = zeppelinPythonInterpreter.interpret("for x in range(0,3): print (\"hi\")\n\nz._displayhook()", null);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertTrue(result.message().get(0).toString().contains("hi\nhi\nhi"));
result = zeppelinPythonInterpreter.interpret("for x in range(0,3): print (\"hi\")\nz._displayhook()", null);
assertEquals(InterpreterResult.Code.ERROR, result.code());
assertTrue(result.message().get(0).toString().contains("SyntaxError: invalid syntax"));
}
/**
* Checks if given port is open on 'localhost'
* @param port

View file

@ -57,18 +57,6 @@ public class ScaldingInterpreter extends Interpreter {
public static final List NO_COMPLETION =
Collections.unmodifiableList(new ArrayList<>());
static {
Interpreter.register(
"scalding",
"scalding",
ScaldingInterpreter.class.getName(),
new InterpreterPropertyBuilder()
.add(ARGS_STRING, ARGS_STRING_DEFAULT, "Arguments for scalding REPL")
.add(MAX_OPEN_INSTANCES, MAX_OPEN_INSTANCES_DEFAULT,
"Maximum number of open interpreter instances")
.build());
}
static int numOpenInstances = 0;
private ScaldingILoop interpreter;
private ByteArrayOutputStream out;

View file

@ -0,0 +1,19 @@
[
{
"group": "scalding",
"name": "scalding",
"className": "org.apache.zeppelin.scalding.ScaldingInterpreter",
"properties": {
"args.string": {
"envName": null,
"defaultValue": "--local --repl",
"description": "Arguments for scalding REPL"
},
"max.open.instances": {
"envName": null,
"defaultValue": "50",
"description": "Maximum number of open interpreter instances"
}
}
}
]

View file

@ -35,6 +35,7 @@ private[scio] object DisplayHelpers {
private[scio] val tab = "\t"
private[scio] val newline = "\n"
private[scio] val table = "%table"
private[scio] val endTable = "%text"
private[scio] val rowLimitReachedMsg =
s"$newline<font color=red>Results are limited to " + maxResults + s" rows.</font>$newline"
private[scio] val bQSchemaIncomplete =
@ -52,6 +53,7 @@ private[scio] object DisplayHelpers {
println(sCollectionEmptyMsg)
} else {
println(s"$table value$newline${it.take(maxResults).map(printer).mkString(newline)}")
println(endTable)
notifyIfTruncated(it)
}
}
@ -64,6 +66,7 @@ private[scio] object DisplayHelpers {
println(sCollectionEmptyMsg)
} else {
println(s"$table value$newline${it.take(maxResults).map(printer).mkString(newline)}")
println(endTable)
notifyIfTruncated(it)
}
}
@ -77,6 +80,7 @@ private[scio] object DisplayHelpers {
} else {
val content = it.take(maxResults).map{ case (k, v) => s"$k$tab$v" }.mkString(newline)
println(s"$table key${tab}value$newline$content")
println(endTable)
notifyIfTruncated(it)
}
}
@ -97,6 +101,7 @@ private[scio] object DisplayHelpers {
val firstStr = first.productIterator.mkString(tab)
val content = it.take(maxResults - 1).map(_.productIterator.mkString(tab)).mkString(newline)
println(s"$table $header$newline$firstStr$newline$content")
println(endTable)
notifyIfTruncated(it)
}
}
@ -125,6 +130,7 @@ private[scio] object DisplayHelpers {
.map(r => fieldNames.map(r.get).mkString(tab))
.mkString(newline)
println(s"$table $header$newline$firstStr$newline$content")
println(endTable)
notifyIfTruncated(it)
}
}
@ -151,6 +157,7 @@ private[scio] object DisplayHelpers {
.mkString(newline)
println(s"$table $header$newline$content")
println(endTable)
notifyIfTruncated(it)
}
}

View file

@ -48,6 +48,7 @@ class DisplayHelpersTest extends FlatSpec with Matchers {
// -----------------------------------------------------------------------------------------------
private val anyValHeader = s"$table value"
private val endTable = DisplayHelpers.endTable
"DisplayHelpers" should "support Integer SCollection via AnyVal" in {
import org.apache.zeppelin.scio.DisplaySCollectionImplicits.ZeppelinSCollection
@ -59,8 +60,10 @@ class DisplayHelpersTest extends FlatSpec with Matchers {
o should contain theSameElementsAs Seq(anyValHeader,
"1",
"2",
"3")
"3",
endTable)
o.head should be(anyValHeader)
o.last should be(endTable)
}
it should "support Long SCollection via AnyVal" in {
@ -73,8 +76,10 @@ class DisplayHelpersTest extends FlatSpec with Matchers {
o should contain theSameElementsAs Seq(anyValHeader,
"1",
"2",
"3")
"3",
endTable)
o.head should be(anyValHeader)
o.last should be(endTable)
}
it should "support Double SCollection via AnyVal" in {
@ -87,8 +92,10 @@ class DisplayHelpersTest extends FlatSpec with Matchers {
o should contain theSameElementsAs Seq(anyValHeader,
"1.0",
"2.0",
"3.0")
"3.0",
endTable)
o.head should be(anyValHeader)
o.last should be(endTable)
}
it should "support Float SCollection via AnyVal" in {
@ -101,8 +108,10 @@ class DisplayHelpersTest extends FlatSpec with Matchers {
o should contain theSameElementsAs Seq(anyValHeader,
"1.0",
"2.0",
"3.0")
"3.0",
endTable)
o.head should be(anyValHeader)
o.last should be(endTable)
}
it should "support Short SCollection via AnyVal" in {
@ -115,8 +124,10 @@ class DisplayHelpersTest extends FlatSpec with Matchers {
o should contain theSameElementsAs Seq(anyValHeader,
"1",
"2",
"3")
"3",
endTable)
o.head should be(anyValHeader)
o.last should be(endTable)
}
it should "support Byte SCollection via AnyVal" in {
@ -129,8 +140,10 @@ class DisplayHelpersTest extends FlatSpec with Matchers {
o should contain theSameElementsAs Seq(anyValHeader,
"1",
"2",
"3")
"3",
endTable)
o.head should be(anyValHeader)
o.last should be(endTable)
}
it should "support Boolean SCollection via AnyVal" in {
@ -143,8 +156,10 @@ class DisplayHelpersTest extends FlatSpec with Matchers {
o should contain theSameElementsAs Seq(anyValHeader,
"true",
"false",
"true")
"true",
endTable)
o.head should be(anyValHeader)
o.last should be(endTable)
}
it should "support Char SCollection via AnyVal" in {
@ -157,8 +172,10 @@ class DisplayHelpersTest extends FlatSpec with Matchers {
o should contain theSameElementsAs Seq(anyValHeader,
"a",
"b",
"c")
"c",
endTable)
o.head should be(anyValHeader)
o.last should be(endTable)
}
it should "support SCollection of AnyVal over row limit" in {
@ -199,8 +216,10 @@ class DisplayHelpersTest extends FlatSpec with Matchers {
o should contain theSameElementsAs Seq(stringHeader,
"a",
"b",
"c")
"c",
endTable)
o.head should be (stringHeader)
o.last should be (endTable)
}
it should "support empty SCollection of String" in {
@ -240,8 +259,10 @@ class DisplayHelpersTest extends FlatSpec with Matchers {
}
o should contain theSameElementsAs Seq(kvHeader,
s"3${tab}4",
s"1${tab}2")
s"1${tab}2",
endTable)
o.head should be (kvHeader)
o.last should be (endTable)
}
it should "support KV (str keys) SCollection" in {
@ -253,8 +274,10 @@ class DisplayHelpersTest extends FlatSpec with Matchers {
}
o should contain theSameElementsAs Seq(kvHeader,
s"foo${tab}2",
s"bar${tab}4")
s"bar${tab}4",
endTable)
o.head should be (kvHeader)
o.last should be (endTable)
}
it should "support KV (str values) SCollection" in {
@ -266,8 +289,10 @@ class DisplayHelpersTest extends FlatSpec with Matchers {
}
o should contain theSameElementsAs Seq(kvHeader,
s"2${tab}foo",
s"4${tab}bar")
s"4${tab}bar",
endTable)
o.head should be (kvHeader)
o.last should be (endTable)
}
it should "support empty KV SCollection" in {
@ -305,8 +330,10 @@ class DisplayHelpersTest extends FlatSpec with Matchers {
in.closeAndDisplay()
}
}
o should contain theSameElementsAs (Seq(tupleHeader) ++ Seq.fill(3)(s"1${tab}2${tab}3"))
o should contain theSameElementsAs
(Seq(tupleHeader, endTable) ++ Seq.fill(3)(s"1${tab}2${tab}3"))
o.head should be(tupleHeader)
o.last should be (endTable)
}
it should "support SCollection of Tuple of 22" in {
@ -318,9 +345,10 @@ class DisplayHelpersTest extends FlatSpec with Matchers {
in.closeAndDisplay()
}
}
o should contain theSameElementsAs (Seq(tupleHeader) ++
o should contain theSameElementsAs (Seq(tupleHeader, endTable) ++
Seq.fill(3)((1 to 21).map(i => s"$i$tab").mkString + "22"))
o.head should be(tupleHeader)
o.last should be (endTable)
}
it should "support SCollection of Case Class of 22" in {
@ -332,9 +360,10 @@ class DisplayHelpersTest extends FlatSpec with Matchers {
in.closeAndDisplay()
}
}
o should contain theSameElementsAs (Seq(tupleHeader) ++
o should contain theSameElementsAs (Seq(tupleHeader, endTable) ++
Seq.fill(3)((1 to 21).map(i => s"$i$tab").mkString + "22"))
o.head should be(tupleHeader)
o.last should be (endTable)
}
it should "support SCollection of Case Class" in {
@ -344,9 +373,10 @@ class DisplayHelpersTest extends FlatSpec with Matchers {
in.closeAndDisplay()
}
}
o should contain theSameElementsAs (Seq(testCaseClassHeader) ++
o should contain theSameElementsAs (Seq(testCaseClassHeader, endTable) ++
Seq.fill(3)(s"1${tab}foo${tab}2.0"))
o.head should be(testCaseClassHeader)
o.last should be (endTable)
}
it should "support empty SCollection of Product" in {
@ -423,9 +453,10 @@ class DisplayHelpersTest extends FlatSpec with Matchers {
in.closeAndDisplay()
}
}
o should contain theSameElementsAs (Seq(avroGenericRecordHeader) ++
o should contain theSameElementsAs (Seq(avroGenericRecordHeader, endTable) ++
Seq.fill(3)(s"1${tab}1.0${tab}user1${tab}checking"))
o.head should be(avroGenericRecordHeader)
o.last should be (endTable)
}
it should "support SCollection of SpecificRecord Avro" in {
@ -436,9 +467,10 @@ class DisplayHelpersTest extends FlatSpec with Matchers {
in.closeAndDisplay()
}
}
o should contain theSameElementsAs (Seq(avroAccountHeader) ++
o should contain theSameElementsAs (Seq(avroAccountHeader, endTable) ++
Seq.fill(3)(s"2${tab}checking${tab}user2${tab}2.0"))
o.head should be(avroAccountHeader)
o.last should be (endTable)
}
it should "support empty SCollection of SpecificRecord Avro" in {
@ -509,9 +541,10 @@ class DisplayHelpersTest extends FlatSpec with Matchers {
in.closeAndDisplay(bQSchema)
}
}
o should contain theSameElementsAs (Seq(bQHeader) ++
o should contain theSameElementsAs (Seq(bQHeader, endTable) ++
Seq.fill(3)(s"3${tab}3.0${tab}checking${tab}user3"))
o.head should be(bQHeader)
o.last should be (endTable)
}
it should "print error on empty BQ schema" in {

View file

@ -153,7 +153,6 @@ public class PySparkInterpreter extends Interpreter implements ExecuteResultHand
}
urls = urlList.toArray(urls);
ClassLoader oldCl = Thread.currentThread().getContextClassLoader();
try {
URLClassLoader newCl = new URLClassLoader(urls, oldCl);
@ -169,11 +168,25 @@ public class PySparkInterpreter extends Interpreter implements ExecuteResultHand
private Map setupPySparkEnv() throws IOException{
Map env = EnvironmentUtils.getProcEnvironment();
if (!env.containsKey("PYTHONPATH")) {
SparkConf conf = getSparkConf();
env.put("PYTHONPATH", conf.get("spark.submit.pyFiles").replaceAll(",", ":") +
env.put("PYTHONPATH", conf.get("spark.submit.pyFiles").replaceAll(",", ":") +
":../interpreter/lib/python");
}
// get additional class paths when using SPARK_SUBMIT and not using YARN-CLIENT
// also, add all packages to PYTHONPATH since there might be transitive dependencies
if (SparkInterpreter.useSparkSubmit() &&
!getSparkInterpreter().isYarnMode()) {
String sparkSubmitJars = getSparkConf().get("spark.jars").replace(",", ":");
if (!"".equals(sparkSubmitJars)) {
env.put("PYTHONPATH", env.get("PYTHONPATH") + sparkSubmitJars);
}
}
return env;
}

View file

@ -205,7 +205,6 @@ public class SparkInterpreter extends Interpreter {
private boolean hiveClassesArePresent() {
try {
this.getClass().forName("org.apache.spark.sql.hive.HiveSessionState");
this.getClass().forName("org.apache.spark.sql.hive.HiveSharedState");
this.getClass().forName("org.apache.hadoop.hive.conf.HiveConf");
return true;
} catch (ClassNotFoundException | NoClassDefFoundError e) {
@ -296,7 +295,7 @@ public class SparkInterpreter extends Interpreter {
return (DepInterpreter) p;
}
private boolean isYarnMode() {
public boolean isYarnMode() {
return getProperty("master").startsWith("yarn");
}
@ -355,7 +354,7 @@ public class SparkInterpreter extends Interpreter {
new Class[]{ String.class, String.class},
new Object[]{ "spark.sql.catalogImplementation", "in-memory"});
sparkSession = Utils.invokeMethod(builder, "getOrCreate");
logger.info("Created Spark session with Hive support");
logger.info("Created Spark session with Hive support use in-memory catalogImplementation");
}
} else {
sparkSession = Utils.invokeMethod(builder, "getOrCreate");
@ -556,7 +555,7 @@ public class SparkInterpreter extends Interpreter {
return (o instanceof String) ? (String) o : "";
}
private boolean useSparkSubmit() {
public static boolean useSparkSubmit() {
return null != System.getenv("SPARK_SUBMIT");
}
@ -727,7 +726,6 @@ public class SparkInterpreter extends Interpreter {
pathSettings.v_$eq(classpath);
settings.scala$tools$nsc$settings$ScalaSettings$_setter_$classpath_$eq(pathSettings);
// set classloader for scala compiler
settings.explicitParentLoader_$eq(new Some<>(Thread.currentThread()
.getContextClassLoader()));
@ -746,8 +744,12 @@ public class SparkInterpreter extends Interpreter {
*
* In Spark 2.x, REPL generated wrapper class name should compatible with the pattern
* ^(\$line(?:\d+)\.\$read)(?:\$\$iw)+$
*
* As hashCode() can return a negative integer value and the minus character '-' is invalid
* in a package name we change it to a numeric value '0' which still conforms to the regexp.
*
*/
System.setProperty("scala.repl.name.line", "$line" + this.hashCode());
System.setProperty("scala.repl.name.line", ("$line" + this.hashCode()).replace('-', '0'));
// To prevent 'File name too long' error on some file system.
MutableSettings.IntSetting numClassFileSetting = settings.maxClassfileName();
@ -976,7 +978,7 @@ public class SparkInterpreter extends Interpreter {
}
}
private List<File> currentClassPath() {
public List<File> currentClassPath() {
List<File> paths = classPath(Thread.currentThread().getContextClassLoader());
String[] cps = System.getProperty("java.class.path").split(File.pathSeparator);
if (cps != null) {

View file

@ -42,6 +42,7 @@ public class SparkRInterpreter extends Interpreter {
private static final Logger logger = LoggerFactory.getLogger(SparkRInterpreter.class);
private static String renderOptions;
private SparkInterpreter sparkInterpreter;
private ZeppelinR zeppelinR;
private SparkContext sc;
@ -70,7 +71,7 @@ public class SparkRInterpreter extends Interpreter {
int port = SparkRBackend.port();
SparkInterpreter sparkInterpreter = getSparkInterpreter();
this.sparkInterpreter = getSparkInterpreter();
this.sc = sparkInterpreter.getSparkContext();
SparkVersion sparkVersion = new SparkVersion(sc.version());
ZeppelinRContext.setSparkContext(sc);
@ -185,7 +186,11 @@ public class SparkRInterpreter extends Interpreter {
@Override
public int getProgress(InterpreterContext context) {
return 0;
if (sparkInterpreter != null) {
return sparkInterpreter.getProgress(context);
} else {
return 0;
}
}
@Override
@ -225,5 +230,4 @@ public class SparkRInterpreter extends Interpreter {
return false;
}
}
}

View file

@ -34,10 +34,10 @@ public class SparkVersion {
public static final SparkVersion SPARK_1_6_0 = SparkVersion.fromVersionString("1.6.0");
public static final SparkVersion SPARK_2_0_0 = SparkVersion.fromVersionString("2.0.0");
public static final SparkVersion SPARK_2_1_0 = SparkVersion.fromVersionString("2.1.0");
public static final SparkVersion SPARK_2_2_0 = SparkVersion.fromVersionString("2.2.0");
public static final SparkVersion MIN_SUPPORTED_VERSION = SPARK_1_0_0;
public static final SparkVersion UNSUPPORTED_FUTURE_VERSION = SPARK_2_1_0;
public static final SparkVersion UNSUPPORTED_FUTURE_VERSION = SPARK_2_2_0;
private int version;
private String versionString;

View file

@ -57,7 +57,6 @@ public class ZeppelinR implements ExecuteResultHandler {
boolean rScriptInitialized = false;
Integer rScriptInitializeNotifier = new Integer(0);
/**
* Request to R repl
*/
@ -103,8 +102,6 @@ public class ZeppelinR implements ExecuteResultHandler {
boolean rResponseError = false;
Integer rResponseNotifier = new Integer(0);
/**
* Create ZeppelinR instance
* @param rCmdPath R repl commandline path
@ -216,7 +213,6 @@ public class ZeppelinR implements ExecuteResultHandler {
}
}
/**
* Send request to r repl and return response
* @return responseValue
@ -257,7 +253,6 @@ public class ZeppelinR implements ExecuteResultHandler {
}
}
/**
* Wait until src/main/resources/R/zeppelin_sparkr.R is initialized
* and call onScriptInitialized()
@ -286,14 +281,11 @@ public class ZeppelinR implements ExecuteResultHandler {
e.printStackTrace();
}
if (rScriptInitialized == false) {
throw new InterpreterException("sparkr is not responding " + errorMessage);
}
}
/**
* invoked by src/main/resources/R/zeppelin_sparkr.R
* @return
@ -337,7 +329,6 @@ public class ZeppelinR implements ExecuteResultHandler {
}
}
/**
* Create R script in tmp dir
*/
@ -381,7 +372,6 @@ public class ZeppelinR implements ExecuteResultHandler {
return zeppelinR.get(hashcode);
}
/**
* Pass InterpreterOutput to capture the repl output
* @param out
@ -390,8 +380,6 @@ public class ZeppelinR implements ExecuteResultHandler {
outputStream.setInterpreterOutput(out);
}
@Override
public void onProcessComplete(int i) {
logger.info("process complete {}", i);
@ -403,6 +391,4 @@ public class ZeppelinR implements ExecuteResultHandler {
logger.error(e.getMessage(), e);
rScriptRunning = false;
}
}

View file

@ -31,7 +31,7 @@ print(paste("LibPath ", libPath))
library(SparkR)
SparkR:::connectBackend("localhost", port)
SparkR:::connectBackend("localhost", port, 6000)
# scStartTime is needed by R/pkg/R/sparkR.R
assign(".scStartTime", as.integer(Sys.time()), envir = SparkR:::.sparkREnv)

View file

@ -18,15 +18,15 @@
package org.apache.zeppelin.spark
import org.apache.zeppelin.interpreter.InterpreterResult.Code
import org.apache.zeppelin.interpreter.InterpreterResult.Code.{SUCCESS, ERROR}
import org.apache.zeppelin.interpreter.InterpreterResult.Code.{SUCCESS}
import org.apache.zeppelin.interpreter.InterpreterResult.Type
import org.apache.zeppelin.interpreter.InterpreterResult.Type.{TEXT, HTML, TABLE, IMG}
import org.jsoup.Jsoup
import org.jsoup.nodes.Element
import org.jsoup.nodes.Document
import org.jsoup.nodes.Document.OutputSettings
import org.jsoup.safety.Whitelist
import scala.collection.JavaConversions._
import scala.util.matching.Regex
case class RDisplay(content: String, `type`: Type, code: Code)
@ -64,11 +64,13 @@ object ZeppelinRDisplay {
}
return htmlDisplay(body, imageWidth)
}
private def textDisplay(body: Element): RDisplay = {
RDisplay(body.getElementsByTag("p").first().html(), TEXT, SUCCESS)
// remove HTML tag while preserving whitespaces and newlines
val text = Jsoup.clean(body.html(), "",
Whitelist.none(), new OutputSettings().prettyPrint(false))
RDisplay(text, TEXT, SUCCESS)
}
private def tableDisplay(body: Element): RDisplay = {
@ -86,7 +88,6 @@ object ZeppelinRDisplay {
}
private def htmlDisplay(body: Element, imageWidth: String): RDisplay = {
var div = new String()
for (element <- body.children) {
@ -99,7 +100,6 @@ object ZeppelinRDisplay {
val r = (pattern findFirstIn eHtml).getOrElse("")
div = div + eOuterHtml.replace(r, "")
}
val content = div
@ -113,7 +113,5 @@ object ZeppelinRDisplay {
}
RDisplay(body.html, HTML, SUCCESS)
}
}

View file

@ -16,20 +16,18 @@
*/
package org.apache.zeppelin.spark;
import org.apache.zeppelin.display.AngularObjectRegistry;
import org.apache.zeppelin.display.GUI;
import org.apache.zeppelin.interpreter.*;
import org.apache.zeppelin.interpreter.InterpreterResult.Type;
import org.apache.zeppelin.resource.LocalResourcePool;
import org.apache.zeppelin.user.AuthenticationInfo;
import org.junit.After;
import org.junit.Before;
import org.junit.FixMethodOrder;
import org.junit.Test;
import org.junit.*;
import org.junit.rules.TemporaryFolder;
import org.junit.runners.MethodSorters;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.LinkedList;
@ -40,10 +38,13 @@ import static org.junit.Assert.*;
@FixMethodOrder(MethodSorters.NAME_ASCENDING)
public class PySparkInterpreterMatplotlibTest {
@Rule
public TemporaryFolder tmpDir = new TemporaryFolder();
public static SparkInterpreter sparkInterpreter;
public static PySparkInterpreter pyspark;
public static InterpreterGroup intpGroup;
private File tmpDir;
public static Logger LOGGER = LoggerFactory.getLogger(PySparkInterpreterTest.class);
private InterpreterContext context;
@ -79,7 +80,7 @@ public class PySparkInterpreterMatplotlibTest {
}
}
public static Properties getPySparkTestProperties() {
private Properties getPySparkTestProperties() throws IOException {
Properties p = new Properties();
p.setProperty("master", "local[*]");
p.setProperty("spark.app.name", "Zeppelin Test");
@ -87,6 +88,7 @@ public class PySparkInterpreterMatplotlibTest {
p.setProperty("zeppelin.spark.maxResult", "1000");
p.setProperty("zeppelin.spark.importImplicit", "true");
p.setProperty("zeppelin.pyspark.python", "python");
p.setProperty("zeppelin.dep.localrepo", tmpDir.newFolder().getAbsolutePath());
return p;
}
@ -106,10 +108,6 @@ public class PySparkInterpreterMatplotlibTest {
@Before
public void setUp() throws Exception {
tmpDir = new File(System.getProperty("java.io.tmpdir") + "/ZeppelinLTest_" + System.currentTimeMillis());
System.setProperty("zeppelin.dep.localrepo", tmpDir.getAbsolutePath() + "/local-repo");
tmpDir.mkdirs();
intpGroup = new InterpreterGroup();
intpGroup.put("note", new LinkedList<Interpreter>());
@ -137,24 +135,6 @@ public class PySparkInterpreterMatplotlibTest {
new InterpreterOutput(null));
}
@After
public void tearDown() throws Exception {
delete(tmpDir);
}
private void delete(File file) {
if (file.isFile()) file.delete();
else if (file.isDirectory()) {
File[] files = file.listFiles();
if (files != null && files.length > 0) {
for (File f : files) {
delete(f);
}
}
file.delete();
}
}
@Test
public void dependenciesAreInstalled() {
// matplotlib

View file

@ -16,20 +16,22 @@
*/
package org.apache.zeppelin.spark;
import org.apache.zeppelin.display.AngularObjectRegistry;
import org.apache.zeppelin.display.GUI;
import org.apache.zeppelin.interpreter.*;
import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion;
import org.apache.zeppelin.resource.LocalResourcePool;
import org.apache.zeppelin.user.AuthenticationInfo;
import org.junit.After;
import org.junit.Before;
import org.junit.FixMethodOrder;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import org.junit.runners.MethodSorters;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
@ -39,14 +41,17 @@ import static org.junit.Assert.*;
@FixMethodOrder(MethodSorters.NAME_ASCENDING)
public class PySparkInterpreterTest {
@Rule
public TemporaryFolder tmpDir = new TemporaryFolder();
public static SparkInterpreter sparkInterpreter;
public static PySparkInterpreter pySparkInterpreter;
public static InterpreterGroup intpGroup;
private File tmpDir;
public static Logger LOGGER = LoggerFactory.getLogger(PySparkInterpreterTest.class);
private InterpreterContext context;
public static Properties getPySparkTestProperties() {
private Properties getPySparkTestProperties() throws IOException {
Properties p = new Properties();
p.setProperty("master", "local[*]");
p.setProperty("spark.app.name", "Zeppelin Test");
@ -54,6 +59,7 @@ public class PySparkInterpreterTest {
p.setProperty("zeppelin.spark.maxResult", "1000");
p.setProperty("zeppelin.spark.importImplicit", "true");
p.setProperty("zeppelin.pyspark.python", "python");
p.setProperty("zeppelin.dep.localrepo", tmpDir.newFolder().getAbsolutePath());
return p;
}
@ -73,10 +79,6 @@ public class PySparkInterpreterTest {
@Before
public void setUp() throws Exception {
tmpDir = new File(System.getProperty("java.io.tmpdir") + "/ZeppelinLTest_" + System.currentTimeMillis());
System.setProperty("zeppelin.dep.localrepo", tmpDir.getAbsolutePath() + "/local-repo");
tmpDir.mkdirs();
intpGroup = new InterpreterGroup();
intpGroup.put("note", new LinkedList<Interpreter>());
@ -104,24 +106,6 @@ public class PySparkInterpreterTest {
new InterpreterOutput(null));
}
@After
public void tearDown() throws Exception {
delete(tmpDir);
}
private void delete(File file) {
if (file.isFile()) file.delete();
else if (file.isDirectory()) {
File[] files = file.listFiles();
if (files != null && files.length > 0) {
for (File f : files) {
delete(f);
}
}
file.delete();
}
}
@Test
public void testBasicIntp() {
if (getSparkVersionNumber() > 11) {

View file

@ -19,7 +19,7 @@ package org.apache.zeppelin.spark;
import static org.junit.Assert.*;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
@ -35,20 +35,24 @@ import org.apache.zeppelin.user.AuthenticationInfo;
import org.apache.zeppelin.display.GUI;
import org.apache.zeppelin.interpreter.*;
import org.apache.zeppelin.interpreter.InterpreterResult.Code;
import org.junit.After;
import org.junit.Before;
import org.junit.FixMethodOrder;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import org.junit.runners.MethodSorters;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@FixMethodOrder(MethodSorters.NAME_ASCENDING)
public class SparkInterpreterTest {
@Rule
public TemporaryFolder tmpDir = new TemporaryFolder();
public static SparkInterpreter repl;
public static InterpreterGroup intpGroup;
private InterpreterContext context;
private File tmpDir;
public static Logger LOGGER = LoggerFactory.getLogger(SparkInterpreterTest.class);
/**
@ -65,28 +69,24 @@ public class SparkInterpreterTest {
return version;
}
public static Properties getSparkTestProperties() {
public static Properties getSparkTestProperties(TemporaryFolder tmpDir) throws IOException {
Properties p = new Properties();
p.setProperty("master", "local[*]");
p.setProperty("spark.app.name", "Zeppelin Test");
p.setProperty("zeppelin.spark.useHiveContext", "true");
p.setProperty("zeppelin.spark.maxResult", "1000");
p.setProperty("zeppelin.spark.importImplicit", "true");
p.setProperty("zeppelin.dep.localrepo", tmpDir.newFolder().getAbsolutePath());
return p;
}
@Before
public void setUp() throws Exception {
tmpDir = new File(System.getProperty("java.io.tmpdir") + "/ZeppelinLTest_" + System.currentTimeMillis());
System.setProperty("zeppelin.dep.localrepo", tmpDir.getAbsolutePath() + "/local-repo");
tmpDir.mkdirs();
if (repl == null) {
intpGroup = new InterpreterGroup();
intpGroup.put("note", new LinkedList<Interpreter>());
repl = new SparkInterpreter(getSparkTestProperties());
repl = new SparkInterpreter(getSparkTestProperties(tmpDir));
repl.setInterpreterGroup(intpGroup);
intpGroup.get("note").add(repl);
repl.open();
@ -102,24 +102,6 @@ public class SparkInterpreterTest {
new InterpreterOutput(null));
}
@After
public void tearDown() throws Exception {
delete(tmpDir);
}
private void delete(File file) {
if (file.isFile()) file.delete();
else if (file.isDirectory()) {
File[] files = file.listFiles();
if (files != null && files.length > 0) {
for (File f : files) {
delete(f);
}
}
file.delete();
}
}
@Test
public void testBasicIntp() {
assertEquals(InterpreterResult.Code.SUCCESS,
@ -194,7 +176,7 @@ public class SparkInterpreterTest {
}
@Test
public void testSparkSql(){
public void testSparkSql() throws IOException {
repl.interpret("case class Person(name:String, age:Int)\n", context);
repl.interpret("val people = sc.parallelize(Seq(Person(\"moon\", 33), Person(\"jobs\", 51), Person(\"gates\", 51), Person(\"park\", 34)))\n", context);
assertEquals(Code.SUCCESS, repl.interpret("people.take(3)", context).code());
@ -202,7 +184,7 @@ public class SparkInterpreterTest {
if (getSparkVersionNumber() <= 11) { // spark 1.2 or later does not allow create multiple SparkContext in the same jvm by default.
// create new interpreter
SparkInterpreter repl2 = new SparkInterpreter(getSparkTestProperties());
SparkInterpreter repl2 = new SparkInterpreter(getSparkTestProperties(tmpDir));
repl2.setInterpreterGroup(intpGroup);
intpGroup.get("note").add(repl2);
repl2.open();
@ -236,9 +218,9 @@ public class SparkInterpreterTest {
}
@Test
public void shareSingleSparkContext() throws InterruptedException {
public void shareSingleSparkContext() throws InterruptedException, IOException {
// create another SparkInterpreter
SparkInterpreter repl2 = new SparkInterpreter(getSparkTestProperties());
SparkInterpreter repl2 = new SparkInterpreter(getSparkTestProperties(tmpDir));
repl2.setInterpreterGroup(intpGroup);
intpGroup.get("note").add(repl2);
repl2.open();
@ -252,10 +234,10 @@ public class SparkInterpreterTest {
}
@Test
public void testEnableImplicitImport() {
public void testEnableImplicitImport() throws IOException {
if (getSparkVersionNumber() >= 13) {
// Set option of importing implicits to "true", and initialize new Spark repl
Properties p = getSparkTestProperties();
Properties p = getSparkTestProperties(tmpDir);
p.setProperty("zeppelin.spark.importImplicit", "true");
SparkInterpreter repl2 = new SparkInterpreter(p);
repl2.setInterpreterGroup(intpGroup);
@ -269,11 +251,11 @@ public class SparkInterpreterTest {
}
@Test
public void testDisableImplicitImport() {
public void testDisableImplicitImport() throws IOException {
if (getSparkVersionNumber() >= 13) {
// Set option of importing implicits to "false", and initialize new Spark repl
// this test should return error status when creating DataFrame from sequence
Properties p = getSparkTestProperties();
Properties p = getSparkTestProperties(tmpDir);
p.setProperty("zeppelin.spark.importImplicit", "false");
SparkInterpreter repl2 = new SparkInterpreter(p);
repl2.setInterpreterGroup(intpGroup);

Some files were not shown because too many files have changed in this diff Show more