diff --git a/.github/PULL_REQUEST_TEMPLATE b/.github/PULL_REQUEST_TEMPLATE index 526b414782..25b263abea 100644 --- a/.github/PULL_REQUEST_TEMPLATE +++ b/.github/PULL_REQUEST_TEMPLATE @@ -14,7 +14,9 @@ First time? Check out the contributing guide - https://zeppelin.apache.org/contr * Put link here, and add [ZEPPELIN-*Jira number*] in PR title, eg. [ZEPPELIN-533] ### How should this be tested? -Outline the steps to test the PR here. +* First time? Setup Travis CI as described on https://zeppelin.apache.org/contribution/contributions.html#continuous-integration +* Strongly recommended: add automated unit tests for any new or changed behavior +* Outline any manual steps to test the PR here. ### Screenshots (if appropriate) diff --git a/.travis.yml b/.travis.yml index 099fb385d6..4495aa4c24 100644 --- a/.travis.yml +++ b/.travis.yml @@ -37,7 +37,7 @@ addons: env: global: # Interpreters does not required by zeppelin-server integration tests - - INTERPRETERS='!hbase,!pig,!jdbc,!file,!flink,!ignite,!kylin,!python,!lens,!cassandra,!elasticsearch,!bigquery,!alluxio,!scio,!livy,!groovy' + - INTERPRETERS='!hbase,!pig,!jdbc,!file,!flink,!ignite,!kylin,!lens,!cassandra,!elasticsearch,!bigquery,!alluxio,!scio,!livy,!groovy' matrix: include: @@ -53,7 +53,7 @@ matrix: sudo: false dist: trusty jdk: "oraclejdk8" - env: WEB_E2E="true" SCALA_VER="2.11" SPARK_VER="2.1.0" HADOOP_VER="2.6" PROFILE="-Pweb-ci -Pscala-2.11" BUILD_FLAG="package -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" MODULES="-pl ${INTERPRETERS}" TEST_MODULES="-pl zeppelin-web" TEST_PROJECTS="-Pweb-e2e" + env: PYTHON="2" WEB_E2E="true" SCALA_VER="2.11" SPARK_VER="2.1.0" HADOOP_VER="2.6" PROFILE="-Pweb-ci -Pscala-2.11" BUILD_FLAG="package -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" MODULES="-pl ${INTERPRETERS}" TEST_MODULES="-pl zeppelin-web" TEST_PROJECTS="-Pweb-e2e" addons: apt: sources: @@ -66,56 +66,57 @@ matrix: # Several tests were excluded from this configuration due to the following issues: # HeliumApplicationFactoryTest - https://issues.apache.org/jira/browse/ZEPPELIN-2470 # After issues are fixed these tests need to be included back by removing them from the "-Dtests.to.exclude" property - - jdk: "oraclejdk8" + - sudo: required + jdk: "oraclejdk8" dist: precise - env: SCALA_VER="2.11" SPARK_VER="2.2.0" HADOOP_VER="2.6" PROFILE="-Pspark-2.2 -Pweb-ci -Pscalding -Phelium-dev -Pexamples -Pscala-2.11" BUILD_FLAG="package -Pbuild-distr -DskipRat" TEST_FLAG="verify -Pusing-packaged-distr -DskipRat" MODULES="-pl ${INTERPRETERS}" TEST_PROJECTS="-Dtests.to.exclude=**/ZeppelinSparkClusterTest.java,**/org.apache.zeppelin.spark.*,**/HeliumApplicationFactoryTest.java -DfailIfNoTests=false" + env: PYTHON="3" SCALA_VER="2.11" SPARK_VER="2.2.0" HADOOP_VER="2.6" PROFILE="-Pspark-2.2 -Pweb-ci -Pscalding -Phelium-dev -Pexamples -Pscala-2.11" BUILD_FLAG="package -Pbuild-distr -DskipRat" TEST_FLAG="verify -Pusing-packaged-distr -DskipRat" MODULES="-pl ${INTERPRETERS}" TEST_PROJECTS="-Dtests.to.exclude=**/ZeppelinSparkClusterTest.java,**/org.apache.zeppelin.spark.*,**/HeliumApplicationFactoryTest.java -DfailIfNoTests=false" # Test selenium with spark module for 1.6.3 - jdk: "oraclejdk7" dist: precise - env: TEST_SELENIUM="true" SCALA_VER="2.10" SPARK_VER="1.6.3" HADOOP_VER="2.6" PROFILE="-Pweb-ci -Pspark-1.6 -Phadoop-2.6 -Phelium-dev -Pexamples" BUILD_FLAG="package -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" TEST_PROJECTS="-pl .,zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark -Dtest=org.apache.zeppelin.AbstractFunctionalSuite -DfailIfNoTests=false" + env: PYTHON="2" TEST_SELENIUM="true" SCALA_VER="2.10" SPARK_VER="1.6.3" HADOOP_VER="2.6" PROFILE="-Pweb-ci -Pspark-1.6 -Phadoop-2.6 -Phelium-dev -Pexamples" BUILD_FLAG="package -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" TEST_PROJECTS="-pl .,zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark,python -Dtest=org.apache.zeppelin.AbstractFunctionalSuite -DfailIfNoTests=false" # Test interpreter modules - jdk: "oraclejdk7" dist: precise - env: SCALA_VER="2.10" PROFILE="-Pscalding" BUILD_FLAG="package -DskipTests -DskipRat -Pr" TEST_FLAG="test -DskipRat" MODULES="-pl $(echo .,zeppelin-interpreter,${INTERPRETERS} | sed 's/!//g')" TEST_PROJECTS="" + env: PYTHON="3" SCALA_VER="2.10" PROFILE="-Pscalding" BUILD_FLAG="install -DskipTests -DskipRat -Pr" TEST_FLAG="test -DskipRat" MODULES="-pl $(echo .,zeppelin-interpreter,${INTERPRETERS} | sed 's/!//g')" TEST_PROJECTS="" # Test spark module for 2.2.0 with scala 2.11, livy - jdk: "oraclejdk8" dist: precise - env: SCALA_VER="2.11" SPARK_VER="2.2.0" HADOOP_VER="2.6" PROFILE="-Pweb-ci -Pspark-2.2 -Phadoop-2.6 -Pscala-2.11" SPARKR="true" BUILD_FLAG="package -DskipTests -DskipRat" TEST_FLAG="test -DskipRat" MODULES="-pl .,zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark,livy" TEST_PROJECTS="-Dtest=ZeppelinSparkClusterTest,org.apache.zeppelin.spark.*,org.apache.zeppelin.livy.* -DfailIfNoTests=false" + env: PYTHON="2" SCALA_VER="2.11" SPARK_VER="2.2.0" HADOOP_VER="2.6" PROFILE="-Pweb-ci -Pspark-2.2 -Phadoop-2.6 -Pscala-2.11" SPARKR="true" BUILD_FLAG="install -DskipTests -DskipRat" TEST_FLAG="test -DskipRat" MODULES="-pl .,zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark,python,livy" TEST_PROJECTS="-Dtest=ZeppelinSparkClusterTest,org.apache.zeppelin.spark.*,org.apache.zeppelin.livy.* -DfailIfNoTests=false" # Test spark module for 2.1.0 with scala 2.11, livy - jdk: "oraclejdk7" dist: precise - env: SCALA_VER="2.11" SPARK_VER="2.1.0" HADOOP_VER="2.6" PROFILE="-Pweb-ci -Pspark-2.1 -Phadoop-2.6 -Pscala-2.11" SPARKR="true" BUILD_FLAG="package -DskipTests -DskipRat" TEST_FLAG="test -DskipRat" MODULES="-pl .,zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark,livy" TEST_PROJECTS="-Dtest=ZeppelinSparkClusterTest,org.apache.zeppelin.spark.*,org.apache.zeppelin.livy.* -DfailIfNoTests=false" + env: PYTHON="2" SCALA_VER="2.11" SPARK_VER="2.1.0" HADOOP_VER="2.6" PROFILE="-Pweb-ci -Pspark-2.1 -Phadoop-2.6 -Pscala-2.11" SPARKR="true" BUILD_FLAG="install -DskipTests -DskipRat" TEST_FLAG="test -DskipRat" MODULES="-pl .,zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark,python,livy" TEST_PROJECTS="-Dtest=ZeppelinSparkClusterTest,org.apache.zeppelin.spark.*,org.apache.zeppelin.livy.* -DfailIfNoTests=false" # Test spark module for 2.0.2 with scala 2.11 - jdk: "oraclejdk7" dist: precise - env: SCALA_VER="2.11" SPARK_VER="2.0.2" HADOOP_VER="2.6" PROFILE="-Pweb-ci -Pspark-2.0 -Phadoop-2.6 -Pscala-2.11" SPARKR="true" BUILD_FLAG="package -DskipTests -DskipRat" TEST_FLAG="test -DskipRat" MODULES="-pl .,zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark" TEST_PROJECTS="-Dtest=ZeppelinSparkClusterTest,org.apache.zeppelin.spark.* -DfailIfNoTests=false" + env: PYTHON="2" SCALA_VER="2.11" SPARK_VER="2.0.2" HADOOP_VER="2.6" PROFILE="-Pweb-ci -Pspark-2.0 -Phadoop-2.6 -Pscala-2.11" SPARKR="true" BUILD_FLAG="install -DskipTests -DskipRat" TEST_FLAG="test -DskipRat" MODULES="-pl .,zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark,python" TEST_PROJECTS="-Dtest=ZeppelinSparkClusterTest,org.apache.zeppelin.spark.* -DfailIfNoTests=false" # Test spark module for 1.6.3 with scala 2.10 - jdk: "oraclejdk7" dist: precise - env: SCALA_VER="2.10" SPARK_VER="1.6.3" HADOOP_VER="2.6" PROFILE="-Pweb-ci -Pspark-1.6 -Phadoop-2.6 -Pscala-2.10" SPARKR="true" BUILD_FLAG="package -DskipTests -DskipRat" TEST_FLAG="test -DskipRat" MODULES="-pl .,zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark" TEST_PROJECTS="-Dtest=ZeppelinSparkClusterTest,org.apache.zeppelin.spark.*,org.apache.zeppelin.spark.* -DfailIfNoTests=false" + env: PYTHON="3" SCALA_VER="2.10" SPARK_VER="1.6.3" HADOOP_VER="2.6" PROFILE="-Pweb-ci -Pspark-1.6 -Phadoop-2.6 -Pscala-2.10" SPARKR="true" BUILD_FLAG="install -DskipTests -DskipRat" TEST_FLAG="test -DskipRat" MODULES="-pl .,zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark,python" TEST_PROJECTS="-Dtest=ZeppelinSparkClusterTest,org.apache.zeppelin.spark.*,org.apache.zeppelin.spark.* -DfailIfNoTests=false" # Test spark module for 1.6.3 with scala 2.11 - jdk: "oraclejdk7" dist: precise - env: SCALA_VER="2.11" SPARK_VER="1.6.3" HADOOP_VER="2.6" PROFILE="-Pweb-ci -Pspark-1.6 -Phadoop-2.6 -Pscala-2.11" SPARKR="true" BUILD_FLAG="package -DskipTests -DskipRat" TEST_FLAG="test -DskipRat" MODULES="-pl .,zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark" TEST_PROJECTS="-Dtest=ZeppelinSparkClusterTest,org.apache.zeppelin.spark.* -DfailIfNoTests=false" + env: PYTHON="2" SCALA_VER="2.11" SPARK_VER="1.6.3" HADOOP_VER="2.6" PROFILE="-Pweb-ci -Pspark-1.6 -Phadoop-2.6 -Pscala-2.11" SPARKR="true" BUILD_FLAG="install -DskipTests -DskipRat" TEST_FLAG="test -DskipRat" MODULES="-pl .,zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark,python" TEST_PROJECTS="-Dtest=ZeppelinSparkClusterTest,org.apache.zeppelin.spark.* -DfailIfNoTests=false" # Test python/pyspark with python 2, livy 0.2 - sudo: required dist: precise jdk: "oraclejdk7" - env: PYTHON="2" SCALA_VER="2.10" SPARK_VER="1.6.1" HADOOP_VER="2.6" LIVY_VER="0.2.0" PROFILE="-Pspark-1.6 -Phadoop-2.6 -Plivy-0.2" BUILD_FLAG="package -am -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" MODULES="-pl .,zeppelin-interpreter,zeppelin-display,spark-dependencies,spark,python,livy" TEST_PROJECTS="-Dtest=LivySQLInterpreterTest,org.apache.zeppelin.spark.PySpark*Test,org.apache.zeppelin.python.* -Dpyspark.test.exclude='' -DfailIfNoTests=false" + env: PYTHON="2" SCALA_VER="2.10" SPARK_VER="1.6.1" HADOOP_VER="2.6" LIVY_VER="0.4.0-incubating" PROFILE="-Pspark-1.6 -Phadoop-2.6 -Pscala-2.10" BUILD_FLAG="install -am -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" MODULES="-pl .,zeppelin-interpreter,zeppelin-display,spark-dependencies,spark,python,livy" TEST_PROJECTS="-Dtest=LivySQLInterpreterTest,org.apache.zeppelin.spark.PySpark*Test,org.apache.zeppelin.python.* -Dpyspark.test.exclude='' -DfailIfNoTests=false" # Test python/pyspark with python 3, livy 0.3 - sudo: required dist: precise jdk: "oraclejdk7" - env: PYTHON="3" SCALA_VER="2.11" SPARK_VER="2.0.0" HADOOP_VER="2.6" LIVY_VER="0.3.0" PROFILE="-Pspark-2.0 -Phadoop-2.6 -Pscala-2.11 -Plivy-0.3" BUILD_FLAG="package -am -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" MODULES="-pl .,zeppelin-interpreter,zeppelin-display,spark-dependencies,spark,python,livy" TEST_PROJECTS="-Dtest=LivySQLInterpreterTest,org.apache.zeppelin.spark.PySpark*Test,org.apache.zeppelin.python.* -Dpyspark.test.exclude='' -DfailIfNoTests=false" + env: PYTHON="3" SCALA_VER="2.11" SPARK_VER="2.0.0" HADOOP_VER="2.6" LIVY_VER="0.4.0-incubating" PROFILE="-Pspark-2.0 -Phadoop-2.6 -Pscala-2.11" BUILD_FLAG="install -am -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" MODULES="-pl .,zeppelin-interpreter,zeppelin-display,spark-dependencies,spark,python,livy" TEST_PROJECTS="-Dtest=LivySQLInterpreterTest,org.apache.zeppelin.spark.PySpark*Test,org.apache.zeppelin.python.* -Dpyspark.test.exclude='' -DfailIfNoTests=false" before_install: # check files included in commit range, clear bower_components if a bower.json file has changed. @@ -141,8 +142,9 @@ install: before_script: - if [[ -n $SPARK_VER ]]; then travis_retry ./testing/downloadSpark.sh $SPARK_VER $HADOOP_VER; fi - if [[ -n $LIVY_VER ]]; then ./testing/downloadLivy.sh $LIVY_VER; fi - - if [[ -n $LIVY_VER ]]; then export LIVY_HOME=`pwd`/livy-server-$LIVY_VER; fi + - if [[ -n $LIVY_VER ]]; then export LIVY_HOME=`pwd`/livy-$LIVY_VER-bin; fi - if [[ -n $LIVY_VER ]]; then export SPARK_HOME=`pwd`/spark-$SPARK_VER-bin-hadoop$HADOOP_VER; fi + - export SPARK_HOME=`pwd`/spark-$SPARK_VER-bin-hadoop$HADOOP_VER - echo "export SPARK_HOME=`pwd`/spark-$SPARK_VER-bin-hadoop$HADOOP_VER" > conf/zeppelin-env.sh - echo "export ZEPPELIN_HELIUM_REGISTRY=helium" >> conf/zeppelin-env.sh - tail conf/zeppelin-env.sh @@ -162,15 +164,13 @@ after_success: after_failure: - echo "Travis exited with ${TRAVIS_TEST_RESULT}" - find . -name rat.txt | xargs cat + - cat logs/* - cat zeppelin-distribution/target/zeppelin-*-SNAPSHOT/zeppelin-*-SNAPSHOT/logs/zeppelin*.log - cat zeppelin-distribution/target/zeppelin-*-SNAPSHOT/zeppelin-*-SNAPSHOT/logs/zeppelin*.out - cat zeppelin-web/npm-debug.log - cat spark-*/logs/* - cat livy/target/tmp/*/output.log - - ls -R livy/target/tmp/MiniYarnMain/target/com.cloudera.livy.test.framework.MiniYarnMain/* - - cat livy/target/tmp/MiniYarnMain/target/com.cloudera.livy.test.framework.MiniYarnMain/*/*/*/stdout - - cat livy/target/tmp/MiniYarnMain/target/com.cloudera.livy.test.framework.MiniYarnMain/*/*/*/stderr - cat livy/target/tmp/livy-int-test/*/output.log - - ls -R livy/target/tmp/livy-int-test/MiniYarnMain/target/com.cloudera.livy.test.framework.MiniYarnMain/* - - cat livy/target/tmp/livy-int-test/MiniYarnMain/target/com.cloudera.livy.test.framework.MiniYarnMain/*/*/*/stdout - - cat livy/target/tmp/livy-int-test/MiniYarnMain/target/com.cloudera.livy.test.framework.MiniYarnMain/*/*/*/stderr + - ls -R livy/target/tmp/livy-int-test/MiniYarnMain/target/org.apache.livy.test.framework.MiniYarnMain/* + - cat livy/target/tmp/livy-int-test/MiniYarnMain/target/org.apache.livy.test.framework.MiniYarnMain/*/*/*/stdout + - cat livy/target/tmp/livy-int-test/MiniYarnMain/target/org.apache.livy.test.framework.MiniYarnMain/*/*/*/stderr diff --git a/README.md b/README.md index e12d2aedb5..126520a3a5 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Apache Zeppelin -**Documentation:** [User Guide](http://zeppelin.apache.org/docs/latest/index.html)
-**Mailing Lists:** [User and Dev mailing list](http://zeppelin.apache.org/community.html)
+**Documentation:** [User Guide](https://zeppelin.apache.org/docs/latest/index.html)
+**Mailing Lists:** [User and Dev mailing list](https://zeppelin.apache.org/community.html)
**Continuous Integration:** [![Build Status](https://travis-ci.org/apache/zeppelin.svg?branch=master)](https://travis-ci.org/apache/zeppelin)
**Contributing:** [Contribution Guide](https://zeppelin.apache.org/contribution/contributions.html)
**Issue Tracker:** [Jira](https://issues.apache.org/jira/browse/ZEPPELIN)
@@ -15,15 +15,15 @@ Core feature: * Built-in Apache Spark support -To know more about Zeppelin, visit our web site [http://zeppelin.apache.org](http://zeppelin.apache.org) +To know more about Zeppelin, visit our web site [http://zeppelin.apache.org](https://zeppelin.apache.org) ## Getting Started ### Install binary package -Please go to [install](http://zeppelin.apache.org/docs/snapshot/install/install.html) to install Apache Zeppelin from binary package. +Please go to [install](https://zeppelin.apache.org/docs/latest/install/install.html) to install Apache Zeppelin from binary package. ### Build from source -Please check [Build from source](http://zeppelin.apache.org/docs/snapshot/install/build.html) to build Zeppelin from source. +Please check [Build from source](https://zeppelin.apache.org/docs/latest/install/build.html) to build Zeppelin from source. diff --git a/alluxio/pom.xml b/alluxio/pom.xml index 38135b8179..f36494e94c 100644 --- a/alluxio/pom.xml +++ b/alluxio/pom.xml @@ -20,10 +20,10 @@ 4.0.0 - zeppelin + interpreter-parent org.apache.zeppelin 0.8.0-SNAPSHOT - .. + ../interpreter-parent org.apache.zeppelin @@ -34,6 +34,7 @@ 1.0.0 + alluxio @@ -47,6 +48,7 @@ com.google.guava guava + 15.0 @@ -128,54 +130,12 @@ maven-enforcer-plugin - - - enforce - none - - - maven-dependency-plugin - - - copy-dependencies - package - - copy-dependencies - - - ${project.build.directory}/../../interpreter/alluxio - false - false - true - runtime - - - - copy-artifact - package - - copy - - - ${project.build.directory}/../../interpreter/alluxio - false - false - true - runtime - - - ${project.groupId} - ${project.artifactId} - ${project.version} - ${project.packaging} - - - - - + + + maven-resources-plugin diff --git a/angular/pom.xml b/angular/pom.xml index be43e496a4..9ff2acb80e 100644 --- a/angular/pom.xml +++ b/angular/pom.xml @@ -20,10 +20,10 @@ 4.0.0 - zeppelin + interpreter-parent org.apache.zeppelin 0.8.0-SNAPSHOT - .. + ../interpreter-parent org.apache.zeppelin @@ -32,6 +32,10 @@ 0.8.0-SNAPSHOT Zeppelin: Angular interpreter + + angular + + ${project.groupId} @@ -61,54 +65,12 @@ maven-enforcer-plugin - - - enforce - none - - - maven-dependency-plugin - - - copy-dependencies - package - - copy-dependencies - - - ${project.build.directory}/../../interpreter/angular - false - false - true - runtime - - - - copy-artifact - package - - copy - - - ${project.build.directory}/../../interpreter/angular - false - false - true - runtime - - - ${project.groupId} - ${project.artifactId} - ${project.version} - ${project.packaging} - - - - - + + + maven-resources-plugin diff --git a/beam/README.md b/beam/README.md index 57150a0208..948c95cfc0 100644 --- a/beam/README.md +++ b/beam/README.md @@ -8,7 +8,7 @@ Current interpreter implementation supports the static repl. It compiles the cod You have to first build the Beam interpreter by enable the **beam** profile as follows: ``` -mvn clean package -Pbeam -DskipTests +mvn clean package -Pbeam -DskipTests -Pscala-2.10 ``` ### Notice diff --git a/beam/pom.xml b/beam/pom.xml index c02695c460..a1d4761535 100644 --- a/beam/pom.xml +++ b/beam/pom.xml @@ -20,10 +20,10 @@ 4.0.0 - zeppelin + interpreter-parent org.apache.zeppelin 0.8.0-SNAPSHOT - .. + ../interpreter-parent org.apache.zeppelin @@ -35,12 +35,13 @@ 2.3.0 1.6.2 - 0.2.0-incubating + 2.0.0 4.1.1.Final 3.1.0 1.3 + beam @@ -211,6 +212,14 @@ ${beam.beam.version} jar + + + org.apache.beam + beam-runners-flink_${scala.binary.version} + ${beam.beam.version} + + + ${project.groupId} @@ -232,69 +241,18 @@ + - - - org.apache.maven.plugins - maven-deploy-plugin - - true - - - maven-enforcer-plugin - - - enforce - none - - - maven-dependency-plugin - - - copy-dependencies - package - - copy-dependencies - - - ${project.build.directory}/../../interpreter/beam - false - false - true - runtime - - - - copy-artifact - package - - copy - - - ${project.build.directory}/../../interpreter/beam - false - false - true - runtime - - - ${project.groupId} - ${project.artifactId} - ${project.version} - ${project.packaging} - - - - - - + + maven-resources-plugin + diff --git a/bigquery/pom.xml b/bigquery/pom.xml index f974b98815..db88edafaf 100644 --- a/bigquery/pom.xml +++ b/bigquery/pom.xml @@ -21,9 +21,10 @@ 4.0.0 - zeppelin + interpreter-parent org.apache.zeppelin 0.8.0-SNAPSHOT + ../interpreter-parent org.apache.zeppelin @@ -41,6 +42,7 @@ v2-rev265-1.21.0 2.6 + bigquery @@ -99,12 +101,12 @@ maven-enforcer-plugin - - - enforce - none - - + + + maven-dependency-plugin + + + maven-resources-plugin @@ -116,63 +118,22 @@ - - maven-dependency-plugin - - - copy-dependencies - package - - copy-dependencies - - - ${project.build.directory}/../../interpreter/bqsql - false - false - true - runtime - - - - copy-artifact - package - - copy - - - ${project.build.directory}/../../interpreter/bqsql - false - false - true - runtime - - - ${project.groupId} - ${project.artifactId} - ${project.version} - ${project.packaging} - - - - - + maven-assembly-plugin + + + + + org.apache.zeppelin.bigquery.BigQueryInterpreter + + + + + jar-with-dependencies + + - - maven-assembly-plugin - - - - - org.apache.zeppelin.bigquery.BigQueryInterpreter - - - - - jar-with-dependencies - - - + diff --git a/bin/common.sh b/bin/common.sh index c7100c7d02..6447ec8daf 100644 --- a/bin/common.sh +++ b/bin/common.sh @@ -122,7 +122,11 @@ JAVA_OPTS+=" -Dlog4j.configuration=file://${ZEPPELIN_CONF_DIR}/log4j.properties" export JAVA_OPTS JAVA_INTP_OPTS="${ZEPPELIN_INTP_JAVA_OPTS} -Dfile.encoding=${ZEPPELIN_ENCODING}" -JAVA_INTP_OPTS+=" -Dlog4j.configuration=file://${ZEPPELIN_CONF_DIR}/log4j.properties" +if [[ -z "${ZEPPELIN_SPARK_YARN_CLUSTER}" ]]; then + JAVA_INTP_OPTS+=" -Dlog4j.configuration=file://${ZEPPELIN_CONF_DIR}/log4j.properties" +else + JAVA_INTP_OPTS+=" -Dlog4j.configuration=log4j_yarn_cluster.properties" +fi export JAVA_INTP_OPTS diff --git a/bin/interpreter.cmd b/bin/interpreter.cmd index eb59799952..8877c45409 100644 --- a/bin/interpreter.cmd +++ b/bin/interpreter.cmd @@ -27,6 +27,7 @@ if /I "%~1"=="-d" ( set INTERPRETER_ID=%~n2 ) if /I "%~1"=="-p" set PORT=%~2 +if /I "%~1"=="-c" set CALLBACK_HOST=%~2 if /I "%~1"=="-l" set LOCAL_INTERPRETER_REPO=%~2 shift goto loop @@ -127,11 +128,11 @@ if not defined ZEPPELIN_CLASSPATH_OVERRIDES ( if defined SPARK_SUBMIT ( set JAVA_INTP_OPTS=%JAVA_INTP_OPTS% -Dzeppelin.log.file='%ZEPPELIN_LOGFILE%' - "%SPARK_SUBMIT%" --class %ZEPPELIN_SERVER% --jars %CLASSPATH% --driver-java-options "!JAVA_INTP_OPTS!" %SPARK_SUBMIT_OPTIONS% "%SPARK_APP_JAR%" %PORT% + "%SPARK_SUBMIT%" --class %ZEPPELIN_SERVER% --jars %CLASSPATH% --driver-java-options "!JAVA_INTP_OPTS!" %SPARK_SUBMIT_OPTIONS% "%SPARK_APP_JAR%" "%CALLBACK_HOST%" %PORT% ) else ( set JAVA_INTP_OPTS=%JAVA_INTP_OPTS% -Dzeppelin.log.file="%ZEPPELIN_LOGFILE%" - "%ZEPPELIN_RUNNER%" !JAVA_INTP_OPTS! %ZEPPELIN_INTP_MEM% -cp %ZEPPELIN_CLASSPATH_OVERRIDES%;%CLASSPATH% %ZEPPELIN_SERVER% %PORT% + "%ZEPPELIN_RUNNER%" !JAVA_INTP_OPTS! %ZEPPELIN_INTP_MEM% -cp %ZEPPELIN_CLASSPATH_OVERRIDES%;%CLASSPATH% %ZEPPELIN_SERVER% "%CALLBACK_HOST%" %PORT% ) exit /b diff --git a/bin/interpreter.sh b/bin/interpreter.sh index 1344e319fc..d27b0767bf 100755 --- a/bin/interpreter.sh +++ b/bin/interpreter.sh @@ -23,7 +23,7 @@ function usage() { echo "usage) $0 -p -d -l -g " } -while getopts "hp:d:l:v:u:g:" o; do +while getopts "hc:p:d:l:v:u:g:" o; do case ${o} in h) usage @@ -32,8 +32,11 @@ while getopts "hp:d:l:v:u:g:" o; do d) INTERPRETER_DIR=${OPTARG} ;; + c) + CALLBACK_HOST=${OPTARG} # This will be used callback host + ;; p) - PORT=${OPTARG} + PORT=${OPTARG} # This will be used callback port ;; l) LOCAL_INTERPRETER_REPO=${OPTARG} @@ -140,7 +143,13 @@ if [[ "${INTERPRETER_ID}" == "spark" ]]; then export PYTHONPATH="${PYTHONPATH}:${PYSPARKPATH}" fi unset PYSPARKPATH + export SPARK_CLASSPATH+=":${ZEPPELIN_INTP_CLASSPATH}" + fi + if [[ -n "${HADOOP_CONF_DIR}" ]] && [[ -d "${HADOOP_CONF_DIR}" ]]; then + ZEPPELIN_INTP_CLASSPATH+=":${HADOOP_CONF_DIR}" + export HADOOP_CONF_DIR=${HADOOP_CONF_DIR} + else # autodetect HADOOP_CONF_HOME by heuristic if [[ -n "${HADOOP_HOME}" ]] && [[ -z "${HADOOP_CONF_DIR}" ]]; then if [[ -d "${HADOOP_HOME}/etc/hadoop" ]]; then @@ -149,13 +158,8 @@ if [[ "${INTERPRETER_ID}" == "spark" ]]; then export HADOOP_CONF_DIR="/etc/hadoop/conf" fi fi - - if [[ -n "${HADOOP_CONF_DIR}" ]] && [[ -d "${HADOOP_CONF_DIR}" ]]; then - ZEPPELIN_INTP_CLASSPATH+=":${HADOOP_CONF_DIR}" - fi - - export SPARK_CLASSPATH+=":${ZEPPELIN_INTP_CLASSPATH}" fi + elif [[ "${INTERPRETER_ID}" == "hbase" ]]; then if [[ -n "${HBASE_CONF_DIR}" ]]; then ZEPPELIN_INTP_CLASSPATH+=":${HBASE_CONF_DIR}" @@ -202,12 +206,12 @@ fi if [[ -n "${SPARK_SUBMIT}" ]]; then if [[ -n "$ZEPPELIN_IMPERSONATE_USER" ]] && [[ "$ZEPPELIN_IMPERSONATE_SPARK_PROXY_USER" != "false" ]]; then - INTERPRETER_RUN_COMMAND+=' '` echo ${SPARK_SUBMIT} --class ${ZEPPELIN_SERVER} --driver-class-path \"${ZEPPELIN_INTP_CLASSPATH_OVERRIDES}:${ZEPPELIN_INTP_CLASSPATH}\" --driver-java-options \"${JAVA_INTP_OPTS}\" ${SPARK_SUBMIT_OPTIONS} ${ZEPPELIN_SPARK_CONF} --proxy-user ${ZEPPELIN_IMPERSONATE_USER} ${SPARK_APP_JAR} ${PORT}` + INTERPRETER_RUN_COMMAND+=' '` echo ${SPARK_SUBMIT} --class ${ZEPPELIN_SERVER} --driver-class-path \"${ZEPPELIN_INTP_CLASSPATH_OVERRIDES}:${ZEPPELIN_INTP_CLASSPATH}\" --driver-java-options \"${JAVA_INTP_OPTS}\" ${SPARK_SUBMIT_OPTIONS} ${ZEPPELIN_SPARK_CONF} --proxy-user ${ZEPPELIN_IMPERSONATE_USER} ${SPARK_APP_JAR} ${CALLBACK_HOST} ${PORT}` else - INTERPRETER_RUN_COMMAND+=' '` echo ${SPARK_SUBMIT} --class ${ZEPPELIN_SERVER} --driver-class-path \"${ZEPPELIN_INTP_CLASSPATH_OVERRIDES}:${ZEPPELIN_INTP_CLASSPATH}\" --driver-java-options \"${JAVA_INTP_OPTS}\" ${SPARK_SUBMIT_OPTIONS} ${ZEPPELIN_SPARK_CONF} ${SPARK_APP_JAR} ${PORT}` + INTERPRETER_RUN_COMMAND+=' '` echo ${SPARK_SUBMIT} --class ${ZEPPELIN_SERVER} --driver-class-path \"${ZEPPELIN_INTP_CLASSPATH_OVERRIDES}:${ZEPPELIN_INTP_CLASSPATH}\" --driver-java-options \"${JAVA_INTP_OPTS}\" ${SPARK_SUBMIT_OPTIONS} ${ZEPPELIN_SPARK_CONF} ${SPARK_APP_JAR} ${CALLBACK_HOST} ${PORT}` fi else - INTERPRETER_RUN_COMMAND+=' '` echo ${ZEPPELIN_RUNNER} ${JAVA_INTP_OPTS} ${ZEPPELIN_INTP_MEM} -cp ${ZEPPELIN_INTP_CLASSPATH_OVERRIDES}:${ZEPPELIN_INTP_CLASSPATH} ${ZEPPELIN_SERVER} ${PORT} ` + INTERPRETER_RUN_COMMAND+=' '` echo ${ZEPPELIN_RUNNER} ${JAVA_INTP_OPTS} ${ZEPPELIN_INTP_MEM} -cp ${ZEPPELIN_INTP_CLASSPATH_OVERRIDES}:${ZEPPELIN_INTP_CLASSPATH} ${ZEPPELIN_SERVER} ${CALLBACK_HOST} ${PORT} ` fi if [[ ! -z "$ZEPPELIN_IMPERSONATE_USER" ]] && [[ -n "${suid}" || -z "${SPARK_SUBMIT}" ]]; then diff --git a/bin/zeppelin-daemon.sh b/bin/zeppelin-daemon.sh index e88c26fc43..5982aee2e0 100755 --- a/bin/zeppelin-daemon.sh +++ b/bin/zeppelin-daemon.sh @@ -67,6 +67,10 @@ if [[ -d "${ZEPPELIN_HOME}/zeppelin-server/target/classes" ]]; then ZEPPELIN_CLASSPATH+=":${ZEPPELIN_HOME}/zeppelin-server/target/classes" fi +if [[ -n "${HADOOP_CONF_DIR}" ]] && [[ -d "${HADOOP_CONF_DIR}" ]]; then + ZEPPELIN_CLASSPATH+=":${HADOOP_CONF_DIR}" +fi + # Add jdbc connector jar # ZEPPELIN_CLASSPATH+=":${ZEPPELIN_HOME}/jdbc/jars/jdbc-connector-jar" diff --git a/bin/zeppelin.sh b/bin/zeppelin.sh index 44fc2cfe89..a13f9db977 100755 --- a/bin/zeppelin.sh +++ b/bin/zeppelin.sh @@ -73,6 +73,10 @@ addJarInDir "${ZEPPELIN_HOME}/zeppelin-web/target/lib" ZEPPELIN_CLASSPATH="$CLASSPATH:$ZEPPELIN_CLASSPATH" +if [[ -n "${HADOOP_CONF_DIR}" ]] && [[ -d "${HADOOP_CONF_DIR}" ]]; then + ZEPPELIN_CLASSPATH+=":${HADOOP_CONF_DIR}" +fi + if [[ ! -d "${ZEPPELIN_LOG_DIR}" ]]; then echo "Log dir doesn't exist, create ${ZEPPELIN_LOG_DIR}" $(mkdir -p "${ZEPPELIN_LOG_DIR}") diff --git a/cassandra/pom.xml b/cassandra/pom.xml index 05108e9fd4..4b139e9c60 100644 --- a/cassandra/pom.xml +++ b/cassandra/pom.xml @@ -20,10 +20,10 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> 4.0.0 - zeppelin + interpreter-parent org.apache.zeppelin 0.8.0-SNAPSHOT - .. + ../interpreter-parent org.apache.zeppelin @@ -49,6 +49,7 @@ 2.15.2 1.0 1.7.1 + cassandra @@ -241,55 +242,14 @@ maven-enforcer-plugin - - - enforce - none - - - maven-dependency-plugin - - - copy-dependencies - package - - copy-dependencies - - - ${project.build.directory}/../../interpreter/cassandra - false - false - true - runtime - - - - copy-artifact - package - - copy - - - ${project.build.directory}/../../interpreter/cassandra - false - false - true - runtime - - - ${project.groupId} - ${project.artifactId} - ${project.version} - ${project.packaging} - - - - - + + + maven-resources-plugin + diff --git a/conf/log4j_yarn_cluster.properties b/conf/log4j_yarn_cluster.properties new file mode 100644 index 0000000000..532fc5ef5f --- /dev/null +++ b/conf/log4j_yarn_cluster.properties @@ -0,0 +1,23 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +log4j.rootLogger = INFO, stdout + +log4j.appender.stdout = org.apache.log4j.ConsoleAppender +log4j.appender.stdout.layout = org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern=%5p [%d] ({%t} %F[%M]:%L) - %m%n + diff --git a/conf/shiro.ini.template b/conf/shiro.ini.template index 06ad9712a5..756ba79b7a 100644 --- a/conf/shiro.ini.template +++ b/conf/shiro.ini.template @@ -62,6 +62,14 @@ sessionManager = org.apache.shiro.web.session.mgt.DefaultWebSessionManager #cacheManager = org.apache.shiro.cache.MemoryConstrainedCacheManager #securityManager.cacheManager = $cacheManager +### Enables 'HttpOnly' flag in Zeppelin cookies +cookie = org.apache.shiro.web.servlet.SimpleCookie +cookie.name = JSESSIONID +cookie.httpOnly = true +### Uncomment the below line only when Zeppelin is running over HTTPS +#cookie.secure = true +sessionManager.sessionIdCookie = $cookie + securityManager.sessionManager = $sessionManager # 86,400,000 milliseconds = 24 hour securityManager.sessionManager.globalSessionTimeout = 86400000 diff --git a/conf/zeppelin-site.xml.template b/conf/zeppelin-site.xml.template index cbae4e5900..4c31669b6d 100755 --- a/conf/zeppelin-site.xml.template +++ b/conf/zeppelin-site.xml.template @@ -138,6 +138,16 @@ --> + + + + + + + + + diff --git a/docs/_includes/themes/zeppelin/_navigation.html b/docs/_includes/themes/zeppelin/_navigation.html index d1b33e4a52..bccb5b4691 100644 --- a/docs/_includes/themes/zeppelin/_navigation.html +++ b/docs/_includes/themes/zeppelin/_navigation.html @@ -95,6 +95,7 @@
  • Shiro Authentication
  • Notebook Authorization
  • Data Source Authorization
  • +
  • HTTP Security Headers
  • Notebook Storage
  • Git Storage
  • @@ -137,6 +138,7 @@
  • Lens
  • Livy
  • Markdown
  • +
  • Neo4j
  • Pig
  • Postgresql, HAWQ
  • R
  • diff --git a/docs/assets/themes/zeppelin/img/docs-img/neo4j-config.png b/docs/assets/themes/zeppelin/img/docs-img/neo4j-config.png new file mode 100644 index 0000000000..2de3699e8a Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/neo4j-config.png differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/neo4j-dynamic-forms.png b/docs/assets/themes/zeppelin/img/docs-img/neo4j-dynamic-forms.png new file mode 100644 index 0000000000..177e0a5e76 Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/neo4j-dynamic-forms.png differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/neo4j-graph.png b/docs/assets/themes/zeppelin/img/docs-img/neo4j-graph.png new file mode 100644 index 0000000000..396b960db6 Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/neo4j-graph.png differ diff --git a/docs/assets/themes/zeppelin/img/docs-img/neo4j-interpreter-video.gif b/docs/assets/themes/zeppelin/img/docs-img/neo4j-interpreter-video.gif new file mode 100644 index 0000000000..28c191516f Binary files /dev/null and b/docs/assets/themes/zeppelin/img/docs-img/neo4j-interpreter-video.gif differ diff --git a/docs/assets/themes/zeppelin/img/ui-img/about_menu.png b/docs/assets/themes/zeppelin/img/ui-img/about_menu.png old mode 100644 new mode 100755 index 18ed125c9b..1668678f31 Binary files a/docs/assets/themes/zeppelin/img/ui-img/about_menu.png and b/docs/assets/themes/zeppelin/img/ui-img/about_menu.png differ diff --git a/docs/assets/themes/zeppelin/img/ui-img/settings_menu.png b/docs/assets/themes/zeppelin/img/ui-img/settings_menu.png old mode 100644 new mode 100755 index f4e8154637..9f19f5c497 Binary files a/docs/assets/themes/zeppelin/img/ui-img/settings_menu.png and b/docs/assets/themes/zeppelin/img/ui-img/settings_menu.png differ diff --git a/docs/development/writing_zeppelin_interpreter.md b/docs/development/writing_zeppelin_interpreter.md index 6ba24bc44a..f4c0bc939d 100644 --- a/docs/development/writing_zeppelin_interpreter.md +++ b/docs/development/writing_zeppelin_interpreter.md @@ -40,7 +40,49 @@ In 'Separate Interpreter(scoped / isolated) for each note' mode which you can se ## Make your own Interpreter Creating a new interpreter is quite simple. Just extend [org.apache.zeppelin.interpreter](https://github.com/apache/zeppelin/blob/master/zeppelin-interpreter/src/main/java/org/apache/zeppelin/interpreter/Interpreter.java) abstract class and implement some methods. -You can include `org.apache.zeppelin:zeppelin-interpreter:[VERSION]` artifact in your build system. And you should put your jars under your interpreter directory with a specific directory name. Zeppelin server reads interpreter directories recursively and initializes interpreters including your own interpreter. +For your interpreter project, you need to make `interpreter-parent` as your parent project and use plugin `maven-enforcer-plugin`, `maven-dependency-plugin` and `maven-resources-plugin`. Here's one sample pom.xml + +``` + + 4.0.0 + + + interpreter-parent + org.apache.zeppelin + 0.8.0-SNAPSHOT + ../interpreter-parent + + + ... + + + + org.apache.zeppelin + zeppelin-interpreter + ${project.version} + provided + + + + + + + maven-enforcer-plugin + + + maven-dependency-plugin + + + maven-resources-plugin + + + + + +``` + +You should include `org.apache.zeppelin:zeppelin-interpreter:[VERSION]` as your interpreter's dependency in `pom.xml`. Bes +And you should put your jars under your interpreter directory with a specific directory name. Zeppelin server reads interpreter directories recursively and initializes interpreters including your own interpreter. There are three locations where you can store your interpreter group, name and other information. Zeppelin server tries to find the location below. Next, Zeppelin tries to find `interpreter-setting.json` in your interpreter jar. diff --git a/docs/index.md b/docs/index.md index f5fc524da8..8f3b551c6a 100644 --- a/docs/index.md +++ b/docs/index.md @@ -98,6 +98,7 @@ limitations under the License. * [Shiro Authentication](./setup/security/shiro_authentication.html) * [Notebook Authorization](./setup/security/notebook_authorization.html) * [Data Source Authorization](./setup/security/datasource_authorization.html) + * [HTTP Security Headers](./setup/security/http_security_headers.html) * Notebook Storage: a guide about saving notebooks to external storage * [Git Storage](./setup/storage/storage.html#notebook-storage-in-local-git-repository) * [S3 Storage](./setup/storage/storage.html#notebook-storage-in-s3) @@ -123,11 +124,6 @@ limitations under the License. * [Useful Developer Tools](./development/contribution/useful_developer_tools.html) * [How to Contribute (code)](./development/contribution/how_to_contribute_code.html) * [How to Contribute (website)](./development/contribution/how_to_contribute_website.html) - -#### External Resources - * [Mailing List](https://zeppelin.apache.org/community.html) - * [Apache Zeppelin Wiki](https://cwiki.apache.org/confluence/display/ZEPPELIN/Zeppelin+Home) - * [Stackoverflow Questions about Zeppelin (tag: `apache-zeppelin`)](http://stackoverflow.com/questions/tagged/apache-zeppelin) #### Available Interpreters * [Alluxio](./interpreter/alluxio.html) @@ -147,6 +143,7 @@ limitations under the License. * [Lens](./interpreter/lens.html) * [Livy](./interpreter/livy.html) * [markdown](./interpreter/markdown.html) + * [Neo4j](./interpreter/neo4j.html) * [Pig](./interpreter/pig.html) * [Postgresql, HAWQ](./interpreter/postgresql.html) * [Python](./interpreter/python.html) @@ -156,3 +153,7 @@ limitations under the License. * [Shell](./interpreter/Shell.html) * [Spark](./interpreter/spark.html) +#### External Resources + * [Mailing List](https://zeppelin.apache.org/community.html) + * [Apache Zeppelin Wiki](https://cwiki.apache.org/confluence/display/ZEPPELIN/Zeppelin+Home) + * [Stackoverflow Questions about Zeppelin (tag: `apache-zeppelin`)](http://stackoverflow.com/questions/tagged/apache-zeppelin) diff --git a/docs/interpreter/beam.md b/docs/interpreter/beam.md index cbcd5e37d5..d992b8ee5b 100644 --- a/docs/interpreter/beam.md +++ b/docs/interpreter/beam.md @@ -44,18 +44,10 @@ import java.io.Serializable; import java.util.Arrays; import java.util.List; import java.util.ArrayList; -import org.apache.spark.api.java.*; -import org.apache.spark.api.java.function.Function; -import org.apache.spark.SparkConf; -import org.apache.spark.streaming.*; -import org.apache.spark.SparkContext; import org.apache.beam.runners.direct.*; import org.apache.beam.sdk.runners.*; import org.apache.beam.sdk.options.*; -import org.apache.beam.runners.spark.*; -import org.apache.beam.runners.spark.io.ConsoleIO; import org.apache.beam.runners.flink.*; -import org.apache.beam.runners.flink.examples.WordCount.Options; import org.apache.beam.sdk.Pipeline; import org.apache.beam.sdk.io.TextIO; import org.apache.beam.sdk.options.PipelineOptionsFactory; @@ -89,12 +81,12 @@ public class MinimalWordCount { }; static final List SENTENCES = Arrays.asList(SENTENCES_ARRAY); public static void main(String[] args) { - Options options = PipelineOptionsFactory.create().as(Options.class); + PipelineOptions options = PipelineOptionsFactory.create().as(PipelineOptions.class); options.setRunner(FlinkRunner.class); Pipeline p = Pipeline.create(options); p.apply(Create.of(SENTENCES).withCoder(StringUtf8Coder.of())) .apply("ExtractWords", ParDo.of(new DoFn() { - @Override + @ProcessElement public void processElement(ProcessContext c) { for (String word : c.element().split("[^a-zA-Z']+")) { if (!word.isEmpty()) { @@ -105,7 +97,7 @@ public class MinimalWordCount { })) .apply(Count. perElement()) .apply("FormatResults", ParDo.of(new DoFn, String>() { - @Override + @ProcessElement public void processElement(DoFn, String>.ProcessContext arg0) throws Exception { s.add("\n" + arg0.element().getKey() + "\t" + arg0.element().getValue()); diff --git a/docs/interpreter/cassandra.md b/docs/interpreter/cassandra.md index 36edcfd343..e91d995093 100644 --- a/docs/interpreter/cassandra.md +++ b/docs/interpreter/cassandra.md @@ -788,6 +788,29 @@ Below are the configuration parameters and their default value. DEFAULT + + cassandra.ssl.enabled + + Enable support for connecting to the Cassandra configured with SSL. + To connect to Cassandra configured with SSL use true + and provide a truststore file and password with following options. + + false + + + cassandra.ssl.truststore.path + + Filepath for the truststore file to use for connection to Cassandra with SSL. + + + + + cassandra.ssl.truststore.password + + Password for the truststore file to use for connection to Cassandra with SSL. + + + ## Change Log diff --git a/docs/interpreter/livy.md b/docs/interpreter/livy.md index 1741a80c8b..09bf6e1c27 100644 --- a/docs/interpreter/livy.md +++ b/docs/interpreter/livy.md @@ -144,7 +144,12 @@ Example: `spark.driver.memory` to `livy.spark.driver.memory` zeppelin.livy.ssl.trustStorePassword password for trustStore file. Used when livy ssl is enabled - + + + zeppelin.livy.http.headers + key_1: value_1; key_2: value_2 + custom http headers when calling livy rest api. Each http header is separated by `;`, and each header is one key value pair where key value is separated by `:` + **We remove livy.spark.master in zeppelin-0.7. Because we sugguest user to use livy 0.3 in zeppelin-0.7. And livy 0.3 don't allow to specify livy.spark.master, it enfornce yarn-cluster mode.** diff --git a/docs/interpreter/neo4j.md b/docs/interpreter/neo4j.md new file mode 100644 index 0000000000..37f1f8c935 --- /dev/null +++ b/docs/interpreter/neo4j.md @@ -0,0 +1,117 @@ +--- +layout: page +title: "Neo4j Interpreter for Apache Zeppelin" +description: "Neo4j is a native graph database, designed to store and process graphs from bottom to top." +group: interpreter +--- + +{% include JB/setup %} + +# Neo4j Interpreter for Apache Zeppelin + +
    + +## Overview +[Neo4j](https://neo4j.com/product/) is a native graph database, designed to store and process graphs from bottom to top. + + +![Neo4j - Interpreter - Video]({{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/neo4j-interpreter-video.gif) + +## Configuration + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    PropertyDefaultDescription
    neo4j.urlbolt://localhost:7687The Neo4j's BOLT url.
    neo4j.auth.typeBASICThe Neo4j's authentication type (NONE, BASIC).
    neo4j.auth.userneo4jThe Neo4j user name.
    neo4j.auth.passwordneo4jThe Neo4j user password.
    neo4j.max.concurrency50Max concurrency call from Zeppelin to Neo4j server.
    + +
    + ![Interpreter configuration]({{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/neo4j-config.png) +
    + + +## Enabling the Neo4j Interpreter +In a notebook, to enable the **Neo4j** interpreter, click the **Gear** icon and select **Neo4j**. + +## Using the Neo4j Interpreter +In a paragraph, use `%neo4j` to select the Neo4j interpreter and then input the Cypher commands. +For list of Cypher commands please refer to the official [Cyper Refcard](http://neo4j.com/docs/cypher-refcard/current/) + +```bash +%neo4j +//Sample the TrumpWorld dataset +WITH +'https://docs.google.com/spreadsheets/u/1/d/1Z5Vo5pbvxKJ5XpfALZXvCzW26Cl4we3OaN73K9Ae5Ss/export?format=csv&gid=1996904412' AS url +LOAD CSV WITH HEADERS FROM url AS row +RETURN row.`Entity A`, row.`Entity A Type`, row.`Entity B`, row.`Entity B Type`, row.Connection, row.`Source(s)` +LIMIT 10 +``` + +The Neo4j interpreter leverages the [Network display system](../usage/display_system/basic.html#network) allowing to visualize the them directly from the paragraph. + + +### Write your Cypher queries and navigate your graph + +This query: + +```bash +%neo4j +MATCH (vp:Person {name:"VLADIMIR PUTIN"}), (dt:Person {name:"DONALD J. TRUMP"}) +MATCH path = allShortestPaths( (vp)-[*]-(dt) ) +RETURN path +``` +produces the following result_ +![Neo4j - Graph - Result]({{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/neo4j-graph.png) + +### Apply Zeppelin Dynamic Forms +You can leverage [Zeppelin Dynamic Form](../usage/dynamic_form/intro.html) inside your queries. This query: + +```bash +%neo4j +MATCH (o:Organization)-[r]-() +RETURN o.name, count(*), collect(distinct type(r)) AS types +ORDER BY count(*) DESC +LIMIT ${Show top=10} +``` + +produces the following result: +![Neo4j - Zeppelin - Dynamic Forms]({{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/neo4j-dynamic-forms.png) + diff --git a/docs/interpreter/python.md b/docs/interpreter/python.md index b4b5ca8651..1965fc9569 100644 --- a/docs/interpreter/python.md +++ b/docs/interpreter/python.md @@ -232,6 +232,70 @@ SELECT * FROM rates WHERE age < 40 Otherwise it can be referred to as `%python.sql` +## IPython Support + +IPython is more powerful than the default python interpreter with extra functionality. You can use IPython with Python2 or Python3 which depends on which python you set `zeppelin.python`. + + **Pre-requests** + + - Jupyter `pip install jupyter` + - grpcio `pip install grpcio` + +If you already install anaconda, then you just need to install `grpcio` as Jupyter is already included in anaconda. + +In addition to all basic functions of the python interpreter, you can use all the IPython advanced features as you use it in Jupyter Notebook. + +e.g. + +Use IPython magic + +``` +%python.ipython + +#python help +range? + +#timeit +%timeit range(100) +``` + +Use matplotlib + +``` +%python.ipython + + +%matplotlib inline +import matplotlib.pyplot as plt + +print("hello world") +data=[1,2,3,4] +plt.figure() +plt.plot(data) +``` + +We also make `ZeppelinContext` available in IPython Interpreter. You can use `ZeppelinContext` to create dynamic forms and display pandas DataFrame. + +e.g. + +Create dynamic form + +``` +z.input(name='my_name', defaultValue='hello') +``` + +Show pandas dataframe + +``` +import pandas as pd +df = pd.DataFrame({'id':[1,2,3], 'name':['a','b','c']}) +z.show(df) + +``` + +By default, we would use IPython in `%python.python` if IPython is available. Otherwise it would fall back to the original Python implementation. +If you don't want to use IPython, then you can set `zeppelin.python.useIPython` as `false` in interpreter setting. + ## Technical description For in-depth technical details on current implementation please refer to [python/README.md](https://github.com/apache/zeppelin/blob/master/python/README.md). diff --git a/docs/interpreter/shell.md b/docs/interpreter/shell.md index 9d4bfe7787..d285cf4c04 100644 --- a/docs/interpreter/shell.md +++ b/docs/interpreter/shell.md @@ -43,6 +43,11 @@ At the "Interpreters" menu in Zeppelin dropdown menu, you can set the property v 60000 Shell command time out in millisecs + + shell.working.directory.user.home + false + If this set to true, the shell's working directory will be set to user home + zeppelin.shell.auth.type diff --git a/docs/interpreter/spark.md b/docs/interpreter/spark.md index 122c8db3b8..bbd9065faa 100644 --- a/docs/interpreter/spark.md +++ b/docs/interpreter/spark.md @@ -181,6 +181,7 @@ For example, * **local[*]** in local mode * **spark://master:7077** in standalone cluster * **yarn-client** in Yarn client mode + * **yarn-cluster** in Yarn cluster mode * **mesos://host:5050** in Mesos cluster That's it. Zeppelin will work with any version of Spark and any deployment type without rebuilding Zeppelin in this way. @@ -188,6 +189,11 @@ For the further information about Spark & Zeppelin version compatibility, please > Note that without exporting `SPARK_HOME`, it's running in local mode with included version of Spark. The included version may vary depending on the build profile. +### 3. Yarn mode +Zeppelin support both yarn client and yarn cluster mode (yarn cluster mode is supported from 0.8.0). For yarn mode, you must specify `SPARK_HOME` & `HADOOP_CONF_DIR`. +You can either specify them in `zeppelin-env.sh`, or in interpreter setting page. Specifying them in `zeppelin-env.sh` means you can use only one version of `spark` & `hadoop`. Specifying them +in interpreter setting page means you can use multiple versions of `spark` & `hadoop` in one zeppelin instance. + ## SparkContext, SQLContext, SparkSession, ZeppelinContext SparkContext, SQLContext and ZeppelinContext are automatically created and exposed as variable names `sc`, `sqlContext` and `z`, respectively, in Scala, Python and R environments. Staring from 0.6.1 SparkSession is available as variable `spark` when you are using Spark 2.x. @@ -196,6 +202,13 @@ Staring from 0.6.1 SparkSession is available as variable `spark` when you are us +### How to pass property to SparkConf + +There're 2 kinds of properties that would be passed to SparkConf + + * Standard spark property (prefix with `spark.`). e.g. `spark.executor.memory` will be passed to `SparkConf` + * Non-standard spark property (prefix with `zeppelin.spark.`). e.g. `zeppelin.spark.property_1`, `property_1` will be passed to `SparkConf` + ## Dependency Management There are two ways to load external libraries in Spark interpreter. First is using interpreter setting menu and second is loading Spark properties. @@ -414,6 +427,12 @@ You can choose one of `shared`, `scoped` and `isolated` options wheh you configu Spark interpreter creates separated Scala compiler per each notebook but share a single SparkContext in `scoped` mode (experimental). It creates separated SparkContext per each notebook in `isolated` mode. +## IPython support + +By default, zeppelin would use IPython in `pyspark` when IPython is available, Otherwise it would fall back to the original PySpark implementation. +If you don't want to use IPython, then you can set `zeppelin.pyspark.useIPython` as `false` in interpreter setting. For the IPython features, you can refer doc +[Python Interpreter](python.html) + ## Setting up Zeppelin with Kerberos Logical setup with Zeppelin, Kerberos Key Distribution Center (KDC), and Spark on YARN: diff --git a/docs/setup/basics/how_to_build.md b/docs/setup/basics/how_to_build.md index 9886ca55c2..f5eb96945d 100644 --- a/docs/setup/basics/how_to_build.md +++ b/docs/setup/basics/how_to_build.md @@ -2,7 +2,7 @@ layout: page title: "How to Build Zeppelin from source" description: "How to build Zeppelin from source" -group: setup/basics +group: setup/basics --- +{% include JB/setup %} + +# Setting up HTTP Response Headers for Zeppelin + +
    + +Apache Zeppelin can be configured to include HTTP Headers which aids in preventing Cross Site Scripting (XSS), Cross-Frame Scripting (XFS) and also enforces HTTP Strict Transport Security. Apache Zeppelin also has configuration available to set the Application Server Version to desired value. + +## Setting up HTTP Strict Transport Security (HSTS) Response Header + +Enabling HSTS Response Header prevents Man-in-the-middle attacks by automatically redirecting HTTP requests to HTTPS when Zeppelin Server is running on SSL. Read on how to configure SSL for Zeppelin [here] (../operation/configuration.html). Even if web page contains any resource which gets served over HTTP or any HTTP links, it will automatically be redirected to HTTPS for the target domain. +It also prevents MITM attack by not allowing User to override the invalid certificate message, when Attacker presents invalid SSL certificate to the User. + +The following property needs to be updated in the zeppelin-site.xml in order to enable HSTS. You can choose appropriate value for "max-age". + +``` + + zeppelin.server.strict.transport + max-age=631138519 + The HTTP Strict-Transport-Security response header is a security feature that lets a web site tell browsers that it should only be communicated with using HTTPS, instead of using HTTP. Enable this when Zeppelin is running on HTTPS. Value is in Seconds, the default value is equivalent to 20 years. + +``` + + +Possible values are: + +* max-age=\ +* max-age=\; includeSubDomains +* max-age=\; preload + +Read more about HSTS [here](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Strict-Transport-Security). + +## Setting up X-XSS-PROTECTION Header + +The HTTP X-XSS-Protection response header is a feature of Internet Explorer, Chrome and Safari Web browsers that initiates configured action when they detect reflected cross-site scripting (XSS) attacks. + +The following property needs to be updated in the zeppelin-site.xml in order to set X-XSS-PROTECTION header. + +``` + + zeppelin.server.xxss.protection + 1; mode=block + The HTTP X-XSS-Protection response header is a feature of Internet Explorer, Chrome and Safari that stops pages from loading when they detect reflected cross-site scripting (XSS) attacks. When value is set to 1 and a cross-site scripting attack is detected, the browser will sanitize the page (remove the unsafe parts). + +``` + + +You can choose appropriate value from below. + +* 0 (Disables XSS filtering) +* 1 (Enables XSS filtering. If a cross-site scripting attack is detected, the browser will sanitize the page.) +* 1; mode=block (Enables XSS filtering. The browser will prevent rendering of the page if an attack is detected.) + +Read more about HTTP X-XSS-Protection response header [here](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-XSS-Protection). + +## Setting up X-Frame-Options Header + +The X-Frame-Options HTTP response header can indicate browser to avoid clickjacking attacks, by ensuring that their content is not embedded into other sites in a ``,`