This commit is contained in:
Javier Fuentes 2020-04-13 14:56:09 -04:00
commit b720757080
431 changed files with 24937 additions and 6651 deletions

View file

@ -15,9 +15,6 @@
language: java
sudo: false
before_cache:
- sudo chown -R travis:travis $HOME/.m2
@ -51,9 +48,9 @@ services:
env:
global:
# Interpreters does not required by zeppelin-server integration tests
- INTERPRETERS='!beam,!hbase,!pig,!jdbc,!file,!flink,!ignite,!kylin,!lens,!cassandra,!elasticsearch,!bigquery,!alluxio,!scio,!livy,!groovy,!sap,!java,!geode,!neo4j,!hazelcastjet,!submarine,!sparql'
- INTERPRETERS='!beam,!hbase,!pig,!jdbc,!file,!flink,!ignite,!kylin,!lens,!cassandra,!elasticsearch,!bigquery,!alluxio,!scio,!livy,!groovy,!sap,!java,!geode,!neo4j,!hazelcastjet,!submarine,!sparql,!mongodb'
matrix:
jobs:
include:
# Test License compliance using RAT tool
- jdk: "openjdk8"
@ -61,16 +58,14 @@ matrix:
env: SCALA_VER="2.11" PROFILE="-Prat" BUILD_FLAG="clean" TEST_FLAG="org.apache.rat:apache-rat-plugin:check" TEST_PROJECTS=""
# Default build command, no tests
- sudo: required
jdk: "openjdk8"
- jdk: "openjdk8"
dist: xenial
env: BUILD_FLAG="clean package -T C2 -DskipTests" TEST_FLAG="test -DskipTests"
env: BUILD_FLAG="clean package -T C2 -DskipTests -Pweb-angular" TEST_FLAG="test -DskipTests -Pweb-angular"
# Run e2e tests (in zeppelin-web)
# chrome dropped the support for precise (ubuntu 12.04), so need to use trusty
# also, can't use JDK 7 in trusty: https://github.com/travis-ci/travis-ci/issues/7884
- os: linux
sudo: false
dist: xenial
jdk: "openjdk8"
env: CI="true" WEB_E2E="true" PYTHON="2" SCALA_VER="2.11" SPARK_VER="2.1.0" HADOOP_VER="2.6" PROFILE="-Phadoop2 -Pscala-2.11" BUILD_FLAG="install -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" MODULES="-pl ${INTERPRETERS}" TEST_MODULES="-pl zeppelin-web" TEST_PROJECTS="-Pweb-e2e"
@ -81,23 +76,21 @@ matrix:
# Run tests (in zeppelin-web-angular)
- os: linux
sudo: false
dist: xenial
jdk: "openjdk8"
env: CI="true" BUILD_FLAG="clean -DskipTests -DskipRat" TEST_FLAG="package -DskipRat" MODULES="-pl ${INTERPRETERS}" TEST_MODULES="-pl zeppelin-web-angular"
env: CI="true" BUILD_FLAG="clean -DskipTests -DskipRat" TEST_FLAG="package -DskipRat" MODULES="-pl ${INTERPRETERS}" TEST_MODULES="-pl zeppelin-web-angular -Pweb-angular"
addons:
apt:
packages:
- google-chrome-stable
# Test core modules
# Test core modules (zeppelin-interpreter,zeppelin-zengine,zeppelin-server)
# Several tests were excluded from this configuration due to the following issues:
# HeliumApplicationFactoryTest - https://issues.apache.org/jira/browse/ZEPPELIN-2470
# After issues are fixed these tests need to be included back by removing them from the "-Dtests.to.exclude" property
- sudo: required
jdk: "openjdk8"
- jdk: "openjdk8"
dist: xenial
env: PYTHON="3" R="true" PROFILE="-Pspark-2.2 -Phelium-dev -Pexamples -Pspark-scala-2.11" BUILD_FLAG="install -Pbuild-distr -DskipRat -DskipTests" TEST_FLAG="verify -Pusing-packaged-distr -DskipRat" MODULES="-pl ${INTERPRETERS}" TEST_PROJECTS="-Dtests.to.exclude=**/JdbcIntegrationTest.java,**/SparkIntegrationTest.java,**/ZeppelinSparkClusterTest.java,**/org/apache/zeppelin/spark/*,**/HeliumApplicationFactoryTest.java -DfailIfNoTests=false"
env: BUILD_PLUGINS="true" PYTHON="3" R="true" PROFILE="-Phelium-dev -Pexamples" BUILD_FLAG="install -Pbuild-distr -DskipRat -DskipTests" TEST_FLAG="verify -Pusing-packaged-distr -DskipRat" MODULES="-pl zeppelin-server,zeppelin-web,spark/spark-dependencies,markdown,angular,shell -am" TEST_PROJECTS="-Dtests.to.exclude=**/org/apache/zeppelin/spark/*,**/HeliumApplicationFactoryTest.java -DfailIfNoTests=false"
# Test selenium with spark module for spark 2.3
- jdk: "openjdk8"
@ -109,63 +102,64 @@ matrix:
# Test interpreter modules
- jdk: "openjdk8"
dist: xenial
env: PYTHON="3" R="true" SCALA_VER="2.10" TENSORFLOW="1.13.1" PROFILE="-Pscala-2.10" BUILD_FLAG="install -DskipTests -DskipRat -am" TEST_FLAG="test -DskipRat" MODULES="-pl $(echo .,zeppelin-interpreter,zeppelin-interpreter-api,${INTERPRETERS} | sed 's/!//g')" TEST_PROJECTS=""
env: PYTHON="3" R="true" SCALA_VER="2.10" TENSORFLOW="1.13.1" PROFILE="-Pscala-2.10" BUILD_FLAG="install -DskipTests -DskipRat -am" TEST_FLAG="test -DskipRat" MODULES="-pl $(echo .,zeppelin-interpreter,zeppelin-interpreter-shaded,${INTERPRETERS} | sed 's/!//g')" TEST_PROJECTS=""
# Run Spark integration test and unit test separately for each spark version
# Run Spark integration test and unit test
# ZeppelinSparkClusterTest24, SparkIntegrationTest24, JdbcIntegrationTest, Unit test of Spark 2.4 (Scala-2.11)
- sudo: required
jdk: "openjdk8"
# Run spark integration of in one zeppelin instance (2.4, 2.3, 2.2)
- jdk: "openjdk8"
dist: xenial
env: BUILD_PLUGINS="true" PYTHON="3" SCALA_VER="2.11" PROFILE="-Pspark-2.4 -Pspark-scala-2.11 -Phadoop2 -Pintegration" R="true" BUILD_FLAG="install -DskipTests -DskipRat -am" TEST_FLAG="test -DskipRat -am" MODULES="-pl zeppelin-interpreter-integration,jdbc,zeppelin-web,spark/spark-dependencies,markdown" TEST_PROJECTS="-Dtest=ZeppelinSparkClusterTest24,SparkIntegrationTest24,JdbcIntegrationTest,org.apache.zeppelin.spark.*,org.apache.zeppelin.kotlin.* -DfailIfNoTests=false"
env: BUILD_PLUGINS="true" PYTHON="3" SCALA_VER="2.11" PROFILE="-Phadoop2 -Pintegration" R="true" BUILD_FLAG="install -DskipTests -DskipRat" TEST_FLAG="test -DskipRat" MODULES="-pl zeppelin-interpreter-integration,zeppelin-web,spark/spark-dependencies,markdown -am" TEST_PROJECTS="-Dtest=ZeppelinSparkClusterTest24,SparkIntegrationTest24,ZeppelinSparkClusterTest23,SparkIntegrationTest23,ZeppelinSparkClusterTest22,SparkIntegrationTest22 -DfailIfNoTests=false"
# ZeppelinSparkClusterTest24, SparkIntegrationTest24, JdbcIntegrationTest, Unit test of Spark 2.4 (Scala-2.12)
- sudo: required
jdk: "openjdk8"
# Run spark integration of in one zeppelin instance (2.1, 2.0, 1.6)
- jdk: "openjdk8"
dist: xenial
env: BUILD_PLUGINS="true" PYTHON="3" SCALA_VER="2.12" PROFILE="-Pspark-2.4 -Pspark-scala-2.12 -Phadoop2 -Pintegration" R="true" BUILD_FLAG="install -DskipTests -DskipRat -am" TEST_FLAG="test -DskipRat -am" MODULES="-pl zeppelin-interpreter-integration,jdbc,zeppelin-web,spark/spark-dependencies,markdown" TEST_PROJECTS="-Dtest=ZeppelinSparkClusterTest24,SparkIntegrationTest24,JdbcIntegrationTest,org.apache.zeppelin.spark.*,org.apache.zeppelin.kotlin.* -DfailIfNoTests=false"
env: BUILD_PLUGINS="true" PYTHON="2" SCALA_VER="2.11" PROFILE="-Phadoop2 -Pintegration" R="true" BUILD_FLAG="install -DskipTests -DskipRat" TEST_FLAG="test -DskipRat" MODULES="-pl zeppelin-interpreter-integration,zeppelin-web,spark/spark-dependencies,markdown -am" TEST_PROJECTS="-Dtest=ZeppelinSparkClusterTest21,SparkIntegrationTest21,ZeppelinSparkClusterTest20,SparkIntegrationTest20,ZeppelinSparkClusterTest16,SparkIntegrationTest16 -DfailIfNoTests=false"
# ZeppelinSparkClusterTest23, SparkIntegrationTest23, Unit test of Spark 2.3 (Scala-2.11) and Unit test PythonInterpreter under python2
- sudo: required
jdk: "openjdk8"
# JdbcIntegrationTest, Unit test of Spark 2.4 (Scala-2.11)
- jdk: "openjdk8"
dist: xenial
env: BUILD_PLUGINS="true" PYTHON="3" SCALA_VER="2.11" PROFILE="-Pspark-2.3 -Pspark-scala-2.11 -Phadoop2 -Pintegration" R="true" BUILD_FLAG="install -DskipTests -DskipRat -am" TEST_FLAG="test -DskipRat -am" MODULES="-pl zeppelin-interpreter-integration,zeppelin-web,spark/spark-dependencies,markdown" TEST_PROJECTS="-Dtest=ZeppelinSparkClusterTest23,SparkIntegrationTest23,org.apache.zeppelin.spark.*,apache.zeppelin.python.* -DfailIfNoTests=false"
env: BUILD_PLUGINS="true" PYTHON="3" SCALA_VER="2.11" PROFILE="-Pspark-2.4 -Pspark-scala-2.11 -Phadoop2 -Pintegration" R="true" BUILD_FLAG="install -DskipTests -DskipRat" TEST_FLAG="test -DskipRat" MODULES="-pl zeppelin-interpreter-integration,jdbc,zeppelin-web,spark/spark-dependencies,markdown -am" TEST_PROJECTS="-Dtest=JdbcIntegrationTest,org.apache.zeppelin.spark.*,org.apache.zeppelin.kotlin.* -DfailIfNoTests=false"
# ZeppelinSparkClusterTest22, SparkIntegrationTest22, Unit test of Spark 2.2 (Scala-2.10) and Unit test PythonInterpreter under python3
- sudo: required
jdk: "openjdk8"
# Unit test of Spark 2.4 (Scala-2.12)
- jdk: "openjdk8"
dist: xenial
env: BUILD_PLUGINS="true" PYTHON="3" SCALA_VER="2.10" PROFILE="-Pspark-2.2 -Pspark-scala-2.10 -Phadoop2 -Pintegration" R="true" BUILD_FLAG="install -DskipTests -DskipRat -am" TEST_FLAG="test -DskipRat -am" MODULES="-pl zeppelin-interpreter-integration,zeppelin-web,spark/spark-dependencies,markdown" TEST_PROJECTS="-Dtest=ZeppelinSparkClusterTest22,SparkIntegrationTest22,org.apache.zeppelin.spark.*,apache.zeppelin.python.* -DfailIfNoTests=false"
env: BUILD_PLUGINS="false" PYTHON="3" SCALA_VER="2.12" PROFILE="-Pspark-2.4 -Pspark-scala-2.12 -Phadoop2" R="true" BUILD_FLAG="install -DskipTests -DskipRat" TEST_FLAG="test -DskipRat" MODULES="-pl spark/spark-dependencies -am" TEST_PROJECTS="-Dtest=org.apache.zeppelin.spark.*,org.apache.zeppelin.kotlin.* -DfailIfNoTests=false"
# ZeppelinSparkClusterTest21, SparkIntegrationTest21, Unit test of Spark 2.1 (Scala-2.10)
- sudo: required
jdk: "openjdk8"
# Unit test of Spark 2.3 (Scala-2.11) and Unit test python, jupyter and r interpreter under python2
- jdk: "openjdk8"
dist: xenial
env: BUILD_PLUGINS="true" PYTHON="3" SCALA_VER="2.10" PROFILE="-Pspark-2.1 -Phadoop2 -Pspark-scala-2.10 -Pintegration" R="true" BUILD_FLAG="install -DskipTests -DskipRat -am" TEST_FLAG="test -DskipRat -am" MODULES="-pl zeppelin-interpreter-integration,zeppelin-web,spark/spark-dependencies,markdown" TEST_PROJECTS="-Dtest=ZeppelinSparkClusterTest21,SparkIntegrationTest21,org.apache.zeppelin.spark.* -DfailIfNoTests=false"
env: BUILD_PLUGINS="false" PYTHON="3" SCALA_VER="2.11" PROFILE="-Pspark-2.3 -Pspark-scala-2.11 -Phadoop2" R="true" BUILD_FLAG="install -DskipTests -DskipRat" TEST_FLAG="test -DskipRat" MODULES="-pl spark/spark-dependencies -am" TEST_PROJECTS="-Dtest=org.apache.zeppelin.spark.*,apache.zeppelin.python.*,apache.zeppelin.jupyter.*,apache.zeppelin.r.* -DfailIfNoTests=false"
# ZeppelinSparkClusterTest20, SparkIntegrationTest20, Unit test of Spark 2.0 (Scala-2.10), Use python 3.5 because spark 2.0 doesn't support python 3.6 +
- sudo: required
jdk: "openjdk8"
# Unit test of Spark 2.2 (Scala-2.10) and Unit test python, jupyter and r interpreter under python3
- jdk: "openjdk8"
dist: xenial
env: BUILD_PLUGINS="true" PYTHON="2" SCALA_VER="2.10" PROFILE="-Pspark-2.0 -Phadoop2 -Pspark-scala-2.10 -Pintegration" R="true" BUILD_FLAG="install -DskipTests -DskipRat -am" TEST_FLAG="test -DskipRat -am" MODULES="-pl zeppelin-interpreter-integration,zeppelin-web,spark/spark-dependencies,markdown" TEST_PROJECTS="-Dtest=ZeppelinSparkClusterTest20,SparkIntegrationTest20,org.apache.zeppelin.spark.* -DfailIfNoTests=false"
env: BUILD_PLUGINS="false" PYTHON="3" SCALA_VER="2.10" PROFILE="-Pspark-2.2 -Pspark-scala-2.10 -Phadoop2" R="true" BUILD_FLAG="install -DskipTests -DskipRat" TEST_FLAG="test -DskipRat" MODULES="-pl spark/spark-dependencies -am" TEST_PROJECTS="-Dtest=org.apache.zeppelin.spark.*,apache.zeppelin.python.*,apache.zeppelin.jupyter.*,apache.zeppelin.r.* -DfailIfNoTests=false"
# ZeppelinSparkClusterTest16, SparkIntegrationTest16, Unit test of Spark 1.6 (Scala-2.10)
- sudo: required
jdk: "openjdk8"
# Unit test of Spark 2.1 (Scala-2.10)
- jdk: "openjdk8"
dist: xenial
env: BUILD_PLUGINS="true" PYTHON="2" SCALA_VER="2.10" PROFILE="-Pspark-1.6 -Phadoop2 -Pspark-scala-2.10 -Pintegration" R="true" BUILD_FLAG="install -DskipTests -DskipRat -am" TEST_FLAG="test -DskipRat -am" MODULES="-pl zeppelin-interpreter-integration,zeppelin-web,spark/spark-dependencies,markdown" TEST_PROJECTS="-Dtest=ZeppelinSparkClusterTest16,SparkIntegrationTest16,org.apache.zeppelin.spark.* -DfailIfNoTests=false"
env: BUILD_PLUGINS="false" PYTHON="3" SCALA_VER="2.10" PROFILE="-Pspark-2.1 -Phadoop2 -Pspark-scala-2.10 -Pintegration" R="true" BUILD_FLAG="install -DskipTests -DskipRat" TEST_FLAG="test -DskipRat" MODULES="-pl spark/spark-dependencies -am" TEST_PROJECTS="-Dtest=org.apache.zeppelin.spark.* -DfailIfNoTests=false"
# Unit test of Spark 2.0 (Scala-2.10), Use python 3.5 because spark 2.0 doesn't support python 3.6 +
- jdk: "openjdk8"
dist: xenial
env: BUILD_PLUGINS="false" PYTHON="2" SCALA_VER="2.10" PROFILE="-Pspark-2.0 -Phadoop2 -Pspark-scala-2.10" R="true" BUILD_FLAG="install -DskipTests -DskipRat" TEST_FLAG="test -DskipRat" MODULES="-pl spark/spark-dependencies -am" TEST_PROJECTS="-Dtest=org.apache.zeppelin.spark.* -DfailIfNoTests=false"
# Unit test of Spark 1.6 (Scala-2.10)
- jdk: "openjdk8"
dist: xenial
env: BUILD_PLUGINS="false" PYTHON="2" SCALA_VER="2.10" PROFILE="-Pspark-1.6 -Phadoop2 -Pspark-scala-2.10" R="true" BUILD_FLAG="install -DskipTests -DskipRat" TEST_FLAG="test -DskipRat" MODULES="-pl spark/spark-dependencies -am" TEST_PROJECTS="-Dtest=org.apache.zeppelin.spark.* -DfailIfNoTests=false"
# Test python/pyspark with python 2, livy 0.5
- sudo: required
dist: xenial
- dist: xenial
jdk: "openjdk8"
env: PYTHON="2" SCALA_VER="2.10" SPARK_VER="1.6.3" HADOOP_VER="2.6" LIVY_VER="0.5.0-incubating" PROFILE="-Pspark-1.6 -Phadoop2 -Pscala-2.10" BUILD_FLAG="install -am -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" MODULES="-pl livy" TEST_PROJECTS=""
env: PYTHON="2" SCALA_VER="2.10" SPARK_VER="1.6.3" HADOOP_VER="2.6" LIVY_VER="0.5.0-incubating" PROFILE="-Pspark-1.6 -Phadoop2 -Pscala-2.10" BUILD_FLAG="install -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" MODULES="-pl livy -am" TEST_PROJECTS=""
# Test livy 0.5 with spark 2.2.0 under python3
- sudo: required
dist: xenial
- dist: xenial
jdk: "openjdk8"
env: PYTHON="3" SPARK_VER="2.2.0" HADOOP_VER="2.6" LIVY_VER="0.5.0-incubating" PROFILE="" BUILD_FLAG="install -am -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" MODULES="-pl livy" TEST_PROJECTS=""
env: PYTHON="3" SPARK_VER="2.2.0" HADOOP_VER="2.6" LIVY_VER="0.5.0-incubating" PROFILE="" BUILD_FLAG="install -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" MODULES="-pl livy -am" TEST_PROJECTS=""
before_install:
# check files included in commit range, clear bower_components if a bower.json file has changed.
@ -177,7 +171,7 @@ before_install:
- clearcache=$(echo $gitlog | grep -c -E "clear bower|bower clear" || true)
- if [ "$hasbowerchanged" -gt 0 ] || [ "$clearcache" -gt 0 ]; then echo "Clearing bower_components cache"; rm -r zeppelin-web/bower_components; npm cache verify; else echo "Using cached bower_components."; fi
- echo "MAVEN_OPTS='-Xms1024M -Xmx2048M -XX:MaxPermSize=1024m -XX:-UseGCOverheadLimit -Dorg.slf4j.simpleLogger.defaultLogLevel=warn'" >> ~/.mavenrc
- ./testing/install_external_dependencies.sh
- bash -x ./testing/install_external_dependencies.sh
- ls -la .spark-dist ${HOME}/.m2/repository/.cache/maven-download-plugin || true
- ls .node_modules && cp -r .node_modules zeppelin-web/node_modules || echo "node_modules are not cached"
- "/sbin/start-stop-daemon --start --quiet --pidfile /tmp/custom_xvfb_99.pid --make-pidfile --background --exec /usr/bin/Xvfb -- :99 -ac -screen 0 1600x1024x16"
@ -227,3 +221,4 @@ after_failure:
- cat livy/target/tmp/livy-int-test/MiniYarnMain/target/org.apache.livy.test.framework.MiniYarnMain/*/*/*/stdout
- cat livy/target/tmp/livy-int-test/MiniYarnMain/target/org.apache.livy.test.framework.MiniYarnMain/*/*/*/stderr
- cat zeppelin-zengine/target/org.apache.zeppelin.interpreter.MiniHadoopCluster/*/*/*/stdout
- cat flink/*.log

View file

@ -266,6 +266,7 @@ The text of each license is also included at licenses/LICENSE-[project]-[version
(Apache 2.0) concurrentunit (https://github.com/jhalterman/concurrentunit)
(Apache 2.0) Embedded MongoDB (https://github.com/flapdoodle-oss/de.flapdoodle.embed.mongo)
(Apache 2.0) Kotlin (https://github.com/JetBrains/kotlin)
(Apache 2.0) s3proxy (https://github.com/gaul/s3proxy)
========================================================================
BSD 3-Clause licenses

View file

@ -231,7 +231,7 @@
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>zeppelin-interpreter-api</artifactId>
<artifactId>zeppelin-interpreter-shaded</artifactId>
<version>${project.version}</version>
</dependency>

View file

@ -69,7 +69,7 @@ fi
. "${bin}/common.sh"
ZEPPELIN_INTERPRETER_API_JAR=$(find "${ZEPPELIN_HOME}/interpreter" -name 'zeppelin-interpreter-api-*.jar')
ZEPPELIN_INTERPRETER_API_JAR=$(find "${ZEPPELIN_HOME}/interpreter" -name 'zeppelin-interpreter-shaded-*.jar')
ZEPPELIN_INTP_CLASSPATH="${CLASSPATH}:${ZEPPELIN_INTERPRETER_API_JAR}"
# construct classpath
@ -83,7 +83,7 @@ if [[ -d "${ZEPPELIN_HOME}/zeppelin-zengine/target/test-classes" ]]; then
addJarInDirForIntp "${ZEPPELIN_HOME}/zeppelin-zengine/target/test-classes"
fi
addJarInDirForIntp "${ZEPPELIN_HOME}/zeppelin-interpreter-api/target"
addJarInDirForIntp "${ZEPPELIN_HOME}/zeppelin-interpreter-shaded/target"
addJarInDirForIntp "${INTERPRETER_DIR}"
HOSTNAME=$(hostname)
@ -203,11 +203,14 @@ elif [[ "${INTERPRETER_ID}" == "pig" ]]; then
echo "TEZ_CONF_DIR is not set, configuration might not be loaded"
fi
elif [[ "${INTERPRETER_ID}" == "flink" ]]; then
addJarInDirForIntp "${FLINK_HOME}/lib"
addJarInDirForIntp "${FLINK_HOME}/opt"
addEachJarInDirRecursiveForIntp "${FLINK_HOME}/lib"
FLINK_PYTHON_JAR=$(find "${FLINK_HOME}/opt" -name 'flink-python_*.jar')
ZEPPELIN_INTP_CLASSPATH+=":${FLINK_PYTHON_JAR}"
if [[ -n "${HADOOP_CONF_DIR}" ]] && [[ -d "${HADOOP_CONF_DIR}" ]]; then
ZEPPELIN_INTP_CLASSPATH+=`hadoop classpath`
ZEPPELIN_INTP_CLASSPATH+=":${HADOOP_CONF_DIR}"
ZEPPELIN_INTP_CLASSPATH+=":`hadoop classpath`"
export HADOOP_CONF_DIR=${HADOOP_CONF_DIR}
else
# autodetect HADOOP_CONF_HOME by heuristic

View file

@ -16,24 +16,47 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Run Zeppelin
# Run Zeppelin
#
USAGE="Usage: bin/zeppelin.sh [--config <conf-dir>]"
# Check whether there is a passwd entry for the container UID
myuid=$(id -u)
mygid=$(id -g)
# turn off -e for getent because it will return error code in anonymous uid case
set +e
uidentry=$(getent passwd $myuid)
set -e
if [[ "$1" == "--config" ]]; then
shift
conf_dir="$1"
if [[ ! -d "${conf_dir}" ]]; then
echo "ERROR : ${conf_dir} is not a directory"
echo ${USAGE}
exit 1
else
export ZEPPELIN_CONF_DIR="${conf_dir}"
fi
shift
# If there is no passwd entry for the container UID, attempt to create one
if [ -z "$uidentry" ] ; then
if [ -w /etc/passwd ] ; then
echo "zeppelin:x:$myuid:$mygid:anonymous uid:$Z_HOME:/bin/false" >> /etc/passwd
else
echo "Container ENTRYPOINT failed to add passwd entry for anonymous UID"
fi
fi
USAGE="Usage: bin/zeppelin.sh [--config <conf-dir>] [--run <noteId>]"
POSITIONAL=()
while [[ $# -gt 0 ]]
do
key="$1"
case $key in
--config)
export ZEPPELIN_CONF_DIR="$2"
shift # past argument
shift # past value
;;
--run)
export ZEPPELIN_NOTEBOOK_RUN_ID="$2"
shift # past argument
shift # past value
;;
esac
done
set -- "${POSITIONAL[@]}" # restore positional parameters
bin=$(dirname "${BASH_SOURCE-$0}")
bin=$(cd "${bin}">/dev/null; pwd)
@ -46,7 +69,7 @@ fi
HOSTNAME=$(hostname)
ZEPPELIN_LOGFILE="${ZEPPELIN_LOG_DIR}/zeppelin-${ZEPPELIN_IDENT_STRING}-${HOSTNAME}.log"
LOG="${ZEPPELIN_LOG_DIR}/zeppelin-cli-${ZEPPELIN_IDENT_STRING}-${HOSTNAME}.out"
ZEPPELIN_SERVER=org.apache.zeppelin.server.ZeppelinServer
JAVA_OPTS+=" -Dzeppelin.log.file=${ZEPPELIN_LOGFILE}"

View file

@ -36,7 +36,6 @@
<cassandra.driver.version>3.7.2</cassandra.driver.version>
<snappy.version>1.1.2.6</snappy.version>
<lz4.version>1.4.1</lz4.version>
<commons-lang.version>3.3.2</commons-lang.version>
<scalate.version>1.7.1</scalate.version>
<cassandra.guava.version>19.0</cassandra.guava.version>
@ -44,10 +43,6 @@
<achilles.version>3.2.4-Zeppelin</achilles.version>
<jna.version>4.2.0</jna.version>
<!-- plugin versions -->
<plugin.scala.version>2.15.2</plugin.scala.version>
<plugin.scalatest.version>1.0</plugin.scalatest.version>
<plugin.scalate.version>1.7.1</plugin.scalate.version>
<interpreter.name>cassandra</interpreter.name>
</properties>
@ -117,7 +112,6 @@
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>${commons-lang.version}</version>
</dependency>
<dependency>
@ -201,7 +195,6 @@
<plugin>
<groupId>org.scala-tools</groupId>
<artifactId>maven-scala-plugin</artifactId>
<version>${plugin.scala.version}</version>
<executions>
<execution>
<id>compile</id>
@ -229,7 +222,6 @@
<plugin>
<groupId>org.scalatest</groupId>
<artifactId>scalatest-maven-plugin</artifactId>
<version>${plugin.scalatest.version}</version>
<executions>
<execution>
<id>test</id>
@ -243,7 +235,6 @@
<plugin>
<groupId>org.scalatra.scalate</groupId>
<artifactId>maven-scalate-plugin_${scala.binary.version}</artifactId>
<version>${plugin.scalate.version}</version>
<executions>
<execution>
<id>compile</id>

View file

@ -193,6 +193,42 @@
</property>
-->
<!-- Aliyun OSS notebook storage -->
<!-- Creates the following directory structure: oss://{bucket}/{notebook_dir}/note_path -->
<!--
<property>
<name>zeppelin.notebook.oss.bucket</name>
<value>zeppelin</value>
<description>bucket name for notebook storage</description>
</property>
<property>
<name>zeppelin.notebook.oss.endpoint</name>
<value>http://oss-cn-hangzhou.aliyuncs.com</value>
<description>endpoint for oss bucket</description>
</property>
<property>
<name>zeppelin.notebook.oss.accesskeyid</name>
<value></value>
<description>Access key id for your OSS account</description>
</property>
<property>
<name>zeppelin.notebook.oss.accesskeysecret</name>
<value></value>
<description>Access key secret for your OSS account</description>
</property>
<property>
<name>zeppelin.notebook.storage</name>
<value>org.apache.zeppelin.notebook.repo.OSSNotebookRepo</value>
<description>notebook persistence layer implementation</description>
</property>
-->
<!-- If using Azure for storage use the following settings -->
<!--
<property>
@ -446,8 +482,8 @@
<property>
<name>zeppelin.websocket.max.text.message.size</name>
<value>1024000</value>
<description>Size in characters of the maximum text message to be received by websocket. Defaults to 1024000</description>
<value>10240000</value>
<description>Size in characters of the maximum text message to be received by websocket. Defaults to 10240000</description>
</property>
<property>
@ -456,6 +492,12 @@
<description>Enable directory listings on server.</description>
</property>
<property>
<name>zeppelin.interpreter.yarn.monitor.interval_secs</name>
<value>10</value>
<description>Check interval in secs for yarn apps monitors</description>
</property>
<!--
<property>
<name>zeppelin.interpreter.lifecyclemanager.class</name>
@ -477,13 +519,11 @@
</property>
-->
<!--
<property>
<name>zeppelin.server.jetty.name</name>
<value>Jetty(7.6.0.v20120127)</value>
<value> </value>
<description>Hardcoding Application Server name to Prevent Fingerprinting</description>
</property>
-->
<!--
<property>
@ -531,13 +571,11 @@
</property>
-->
<!--
<property>
<name>zeppelin.server.xframe.options</name>
<value>SAMEORIGIN</value>
<description>The X-Frame-Options HTTP response header can be used to indicate whether or not a browser should be allowed to render a page in a frame/iframe/object.</description>
</property>
-->
<!--
<property>
@ -546,22 +584,18 @@
<description>The HTTP Strict-Transport-Security response header is a security feature that lets a web site tell browsers that it should only be communicated with using HTTPS, instead of using HTTP. Enable this when Zeppelin is running on HTTPS. Value is in Seconds, the default value is equivalent to 20 years.</description>
</property>
-->
<!--
<property>
<name>zeppelin.server.xxss.protection</name>
<value>1</value>
<value>1; mode=block</value>
<description>The HTTP X-XSS-Protection response header is a feature of Internet Explorer, Chrome and Safari that stops pages from loading when they detect reflected cross-site scripting (XSS) attacks. When value is set to 1 and a cross-site scripting attack is detected, the browser will sanitize the page (remove the unsafe parts).</description>
</property>
-->
<!--
<property>
<name>zeppelin.server.xcontent.type.options</name>
<value>nosniff</value>
<description>The HTTP X-Content-Type-Options response header helps to prevent MIME type sniffing attacks. It directs the browser to honor the type specified in the Content-Type header, rather than trying to determine the type from the content itself. The default value "nosniff" is really the only meaningful value. This header is supported on all browsers except Safari and Safari on iOS.</description>
</property>
-->
<!--
<property>
@ -653,10 +687,41 @@
<description>Kubernetes yaml spec files</description>
</property>
<property>
<name>zeppelin.docker.container.image</name>
<value>apache/zeppelin:0.8.0</value>
<description>Docker image for interpreters</description>
</property>
<property>
<name>zeppelin.docker.container.image</name>
<value>apache/zeppelin:0.8.0</value>
<description>Docker image for interpreters</description>
</property>
<property>
<name>zeppelin.search.index.rebuild</name>
<value>false</value>
<description>Whether rebuild index when zeppelin start. If true, it would read all notes and rebuild the index, this would consume lots of memory if you have large amounts of notes, so by default it is false</description>
</property>
<property>
<name>zeppelin.search.use.disk</name>
<value>true</value>
<description>Whether using disk for storing search index, if false, memory will be used instead.</description>
</property>
<property>
<name>zeppelin.search.index.path</name>
<value>/tmp/zeppelin-index</value>
<description>path for storing search index on disk.</description>
</property>
<property>
<name>zeppelin.jobmanager.enable</name>
<value>false</value>
<description>The Job tab in zeppelin page seems not so useful instead it cost lots of memory and affect the performance.
Disable it can save lots of memory</description>
</property>
<property>
<name>zeppelin.spark.only_yarn_cluster</name>
<value>false</value>
<description>Whether only allow yarn cluster mode</description>
</property>
</configuration>

View file

@ -46,7 +46,7 @@ usage() {
function git_clone() {
echo "Clone the source"
# clone source
git clone https://git-wip-us.apache.org/repos/asf/zeppelin.git "${WORKING_DIR}/zeppelin"
git clone https://gitbox.apache.org/repos/asf/zeppelin.git "${WORKING_DIR}/zeppelin"
if [[ $? -ne 0 ]]; then
echo "Can not clone source repository"

View file

@ -65,7 +65,6 @@ function make_binary_release() {
cp -r "${WORKING_DIR}/zeppelin" "${WORKING_DIR}/zeppelin-${RELEASE_VERSION}-bin-${BIN_RELEASE_NAME}"
cd "${WORKING_DIR}/zeppelin-${RELEASE_VERSION}-bin-${BIN_RELEASE_NAME}"
./dev/change_scala_version.sh "${SCALA_VERSION}"
echo "mvn clean package -Pbuild-distr -DskipTests ${BUILD_FLAGS}"
mvn clean package -Pbuild-distr -DskipTests ${BUILD_FLAGS}
if [[ $? -ne 0 ]]; then
@ -98,8 +97,8 @@ function make_binary_release() {
git_clone
make_source_package
make_binary_release all "-Phadoop-2.6 -Pscala-${SCALA_VERSION}"
make_binary_release netinst "-Phadoop-2.6 -Pscala-${SCALA_VERSION} -pl zeppelin-interpreter,zeppelin-zengine,:zeppelin-display_${SCALA_VERSION},:zeppelin-spark-dependencies_${SCALA_VERSION},:zeppelin-spark_${SCALA_VERSION},zeppelin-web,zeppelin-server,zeppelin-distribution -am"
make_binary_release netinst "-Pweb-angular -Phadoop-2.6 -pl !beam,!hbase,!pig,!jdbc,!file,!flink,!ignite,!kylin,!lens,!cassandra,!elasticsearch,!bigquery,!alluxio,!scio,!livy,!groovy,!sap,!java,!geode,!neo4j,!hazelcastjet,!submarine,!sparql,!mongodb,!ksql,!scalding -am"
make_binary_release all "-Pweb-angular -Phadoop-2.6"
# remove non release files and dirs
rm -rf "${WORKING_DIR}/zeppelin"

View file

@ -46,8 +46,8 @@ if [[ $RELEASE_VERSION == *"SNAPSHOT"* ]]; then
DO_SNAPSHOT="yes"
fi
PUBLISH_PROFILES="-Ppublish-distr -Phadoop-2.6 -Pr"
PROJECT_OPTIONS="-pl !zeppelin-distribution"
PUBLISH_PROFILES="-Ppublish-distr -Phadoop-2.6 -Pweb-angular"
PROJECT_OPTIONS="-pl !zeppelin-distribution -Dmaven.javadoc.skip=true"
NEXUS_STAGING="https://repository.apache.org/service/local/staging"
NEXUS_PROFILE="153446d1ac37c4"
@ -127,27 +127,13 @@ function publish_to_maven() {
rm -rf $HOME/.m2/repository/org/apache/zeppelin
# build with scala-2.10
echo "mvn clean install -DskipTests \
-Pscala-2.10 -Pbeam \
${PUBLISH_PROFILES} ${PROJECT_OPTIONS}"
mvn clean install -DskipTests -Pscala-2.10 -Pbeam \
${PUBLISH_PROFILES} ${PROJECT_OPTIONS}
if [[ $? -ne 0 ]]; then
echo "Build with scala 2.10 failed."
exit 1
fi
# build with scala-2.11
"${BASEDIR}/change_scala_version.sh" 2.11
echo "mvn clean install -DskipTests \
-Pscala-2.11 \
${PUBLISH_PROFILES} ${PROJECT_OPTIONS}"
mvn clean install -DskipTests -Pscala-2.11 \
mvn clean install -DskipTests \
${PUBLISH_PROFILES} ${PROJECT_OPTIONS}
if [[ $? -ne 0 ]]; then
echo "Build with scala 2.11 failed."
echo "Build failed."
exit 1
fi

View file

@ -28,7 +28,7 @@ done
set -e
git clone https://git-wip-us.apache.org/repos/asf/zeppelin.git "${WORKING_DIR}"
git clone https://gitbox.apache.org/repos/asf/zeppelin.git "${WORKING_DIR}"
pushd "${WORKING_DIR}"
git checkout "${GIT_BRANCH}"

View file

@ -107,6 +107,7 @@
<li><a href="{{BASE_PATH}}/setup/storage/storage.html#notebook-storage-in-local-git-repository">Git Storage</a></li>
<li><a href="{{BASE_PATH}}/setup/storage/storage.html#notebook-storage-in-s3">S3 Storage</a></li>
<li><a href="{{BASE_PATH}}/setup/storage/storage.html#notebook-storage-in-azure">Azure Storage</a></li>
<li><a href="{{BASE_PATH}}/setup/storage/storage.html#notebook-storage-in-oss">OSS Storage</a></li>
<li><a href="{{BASE_PATH}}/setup/storage/storage.html#notebook-storage-in-zeppelinhub">ZeppelinHub Storage</a></li>
<li><a href="{{BASE_PATH}}/setup/storage/storage.html#notebook-storage-in-mongodb">MongoDB Storage</a></li>
<li role="separator" class="divider"></li>
@ -127,6 +128,7 @@
<li><a href="{{BASE_PATH}}/interpreter/spark.html">Spark</a></li>
<li><a href="{{BASE_PATH}}/interpreter/jdbc.html">JDBC</a></li>
<li><a href="{{BASE_PATH}}/interpreter/python.html">Python</a></li>
<li><a href="{{BASE_PATH}}/interpreter/r.html">R</a></li>
<li role="separator" class="divider"></li>
<li><a href="{{BASE_PATH}}/interpreter/alluxio.html">Alluxio</a></li>
<li><a href="{{BASE_PATH}}/interpreter/beam.html">Beam</a></li>
@ -148,10 +150,10 @@
<li><a href="{{BASE_PATH}}/interpreter/lens.html">Lens</a></li>
<li><a href="{{BASE_PATH}}/interpreter/livy.html">Livy</a></li>
<li><a href="{{BASE_PATH}}/interpreter/markdown.html">Markdown</a></li>
<li><a href="{{BASE_PATH}}/interpreter/mongodb.html">MongoDB</a></li>
<li><a href="{{BASE_PATH}}/interpreter/neo4j.html">Neo4j</a></li>
<li><a href="{{BASE_PATH}}/interpreter/pig.html">Pig</a></li>
<li><a href="{{BASE_PATH}}/interpreter/postgresql.html">Postgresql, HAWQ</a></li>
<li><a href="{{BASE_PATH}}/interpreter/r.html">R</a></li>
<li><a href="{{BASE_PATH}}/interpreter/scalding.html">Scalding</a></li>
<li><a href="{{BASE_PATH}}/interpreter/scio.html">Scio</a></li>
<li><a href="{{BASE_PATH}}/interpreter/shell.html">Shell</a></li>

Binary file not shown.

Before

Width:  |  Height:  |  Size: 36 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 58 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 106 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 303 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 170 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 305 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 235 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 140 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 159 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 230 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 42 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 51 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 16 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 18 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 21 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 123 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 237 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 38 KiB

After

Width:  |  Height:  |  Size: 53 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 235 KiB

View file

@ -37,3 +37,7 @@ Currently, Helium supports 4 types of package.
- [Helium Interpreter](../writing_zeppelin_interpreter.html): Adding a new custom interpreter
## Configuration
Zeppelin ships with several builtin helium plugins which is located in $ZEPPELIN_HOME/heliums. If you want to try more types of heliums plugins,
you can configure `zeppelin.helium.registry` to be `helium,https://s3.amazonaws.com/helium-package/helium.json` in zeppelin-site.xml. `https://s3.amazonaws.com/helium-package/helium.json` will be updated regularly.

View file

@ -26,6 +26,7 @@ limitations under the License.
## Overview
[Apache Flink](https://flink.apache.org) is an open source platform for distributed stream and batch data processing. Flinks core is a streaming dataflow engine that provides data distribution, communication, and fault tolerance for distributed computations over data streams. Flink also builds batch processing on top of the streaming engine, overlaying native iteration support, managed memory, and program optimization.
In Zeppelin 0.9, we refactor the Flink interpreter in Zeppelin to support the latest version of Flink. **Only Flink 1.10+ is supported, old version of flink may not work.**
Apache Flink is supported in Zeppelin with Flink interpreter group which consists of below five interpreters.
<table class="table-configuration">
@ -61,9 +62,14 @@ Apache Flink is supported in Zeppelin with Flink interpreter group which consist
</tr>
</table>
## Prerequisites
* Download Flink 1.10 for scala 2.11 (Only scala-2.11 is supported, scala-2.12 is not supported yet in Zeppelin)
* Download [flink-hadoop-shaded](https://repo1.maven.org/maven2/org/apache/flink/flink-shaded-hadoop-2/2.8.3-10.0/flink-shaded-hadoop-2-2.8.3-10.0.jar) and put it under lib folder of flink (flink interpreter need that to support yarn mode)
## Configuration
The Flink interpreter can be configured with properties provided by Zeppelin.
You can also set other flink properties which are not listed in the table. For a list of additional properties, refer to [Flink Available Properties](https://ci.apache.org/projects/flink/flink-docs-release-1.9/ops/config.html).
The Flink interpreter can be configured with properties provided by Zeppelin (as following table).
You can also set other flink properties which are not listed in the table. For a list of additional properties, refer to [Flink Available Properties](https://ci.apache.org/projects/flink/flink-docs-master/ops/config.html).
<table class="table-configuration">
<tr>
<th>Property</th>
@ -71,9 +77,19 @@ You can also set other flink properties which are not listed in the table. For a
<th>Description</th>
</tr>
<tr>
<td>FLINK_HOME</td>
<td>`FLINK_HOME`</td>
<td></td>
<td>Location of flink installation. It is must be specified, otherwise you can not use flink in zeppelin</td>
<td>Location of flink installation. It is must be specified, otherwise you can not use flink in Zeppelin</td>
</tr>
<tr>
<td>`HADOOP_CONF_DIR`</td>
<td></td>
<td>Location of hadoop conf, this is must be set if running in yarn mode</td>
</tr>
<tr>
<td>`HIVE_CONF_DIR`</td>
<td></td>
<td>Location of hive conf, this is must be set if you want to connect to hive metastore</td>
</tr>
<tr>
<td>flink.execution.mode</td>
@ -83,12 +99,12 @@ You can also set other flink properties which are not listed in the table. For a
<tr>
<td>flink.execution.remote.host</td>
<td></td>
<td>jobmanager hostname if it is remote mode</td>
<td>Host name of running JobManager. Only used for remote mode</td>
</tr>
<tr>
<td>flink.execution.remote.port</td>
<td></td>
<td>jobmanager port if it is remote mode</td>
<td>Port of running JobManager. Only used for remote mode</td>
</tr>
<tr>
<td>flink.jm.memory</td>
@ -100,16 +116,16 @@ You can also set other flink properties which are not listed in the table. For a
<td>1024</td>
<td>Total number of memory(mb) of TaskManager</td>
</tr>
<tr>
<td>flink.tm.num</td>
<td>2</td>
<td>Number of TaskManager</td>
</tr>
<tr>
<td>flink.tm.slot</td>
<td>1</td>
<td>Number of slot per TaskManager</td>
</tr>
<tr>
<td>local.number-taskmanager</td>
<td>4</td>
<td>Total number of TaskManagers in local mode</td>
</tr>
<tr>
<td>flink.yarn.appName</td>
<td>Zeppelin Flink Session</td>
@ -117,28 +133,68 @@ You can also set other flink properties which are not listed in the table. For a
</tr>
<tr>
<td>flink.yarn.queue</td>
<td></td>
<td>default</td>
<td>queue name of yarn app</td>
</tr>
<tr>
<td>flink.yarn.jars</td>
<td>flink.webui.yarn.useProxy</td>
<td>false</td>
<td>whether use yarn proxy url as flink weburl, e.g. http://localhost:8088/proxy/application_1583396598068_0004</td>
</tr>
<tr>
<td>flink.udf.jars</td>
<td></td>
<td>additional user jars (comma separated)</td>
<td>Flink udf jars (comma separated), zeppelin will register udf in this jar automatically for user. The udf name is the class name.</td>
</tr>
<tr>
<td>flink.execution.jars</td>
<td></td>
<td>Additional user jars (comma separated)</td>
</tr>
<tr>
<td>flink.execution.packages</td>
<td></td>
<td>Additional user packages (comma separated), e.g. org.apache.flink:flink-connector-kafka_2.11:1.10,org.apache.flink:flink-connector-kafka-base_2.11:1.10.0,org.apache.flink:flink-json:1.10.0</td>
</tr>
<tr>
<td>zeppelin.flink.concurrentBatchSql.max</td>
<td>10</td>
<td>Max concurrent sql of Batch Sql (`%flink.bsql`)</td>
</tr>
<tr>
<td>zeppelin.flink.concurrentStreamSql.max</td>
<td>10</td>
<td>Max concurrent sql of Stream Sql (`%flink.ssql`)</td>
</tr>
<tr>
<td>zeppelin.pyflink.python</td>
<td>python</td>
<td>Python binary executable for PyFlink</td>
</tr>
<tr>
<td>table.exec.resource.default-parallelism</td>
<td>1</td>
<td>Default parallelism for flink sql job</td>
</tr>
<tr>
<td>zeppelin.flink.scala.color</td>
<td>true</td>
<td>whether display scala shell output in colorful format</td>
<td>Whether display scala shell output in colorful format</td>
</tr>
<tr>
<td>zeppelin.flink.enableHive</td>
<td>false</td>
<td>whether enable hive</td>
<td>Whether enable hive</td>
</tr>
<tr>
<td>zeppelin.flink.printREPLOutput</td>
<td>true</td>
<td>Print REPL output</td>
<td>zeppelin.flink.enableHive</td>
<td>false</td>
<td>Whether enable hive</td>
</tr>
<tr>
<td>zeppelin.flink.hive.version</td>
<td>2.3.4</td>
<td>Hive version that you would like to connect</td>
</tr>
<tr>
<td>zeppelin.flink.maxResult</td>
@ -146,64 +202,155 @@ You can also set other flink properties which are not listed in the table. For a
<td>max number of row returned by sql interpreter</td>
</tr>
<tr>
<td>zeppelin.flink.planner</td>
<td>blink</td>
<td>planner of flink table api, <em>blink</em> or <em>flink</em></td>
<td>`flink.interpreter.close.shutdown_cluster`</td>
<td>true</td>
<td>Whether shutdown application when closing interpreter</td>
</tr>
<tr>
<td>zeppelin.pyflink.python</td>
<td>python</td>
<td>python executable for pyflink</td>
</tr>
<tr>
<td>HADOOP_CONF_DIR</td>
<td></td>
<td>location of hadoop conf, this is must be set if running in yarn mode</td>
<td>`zeppelin.interpreter.close.cancel_job`</td>
<td>true</td>
<td>Whether cancel flink job when closing interpreter</td>
</tr>
</table>
## StreamExecutionEnvironment, ExecutionEnvironment, StreamTableEnvironment, BatchTableEnvironment
Zeppelin will create 4 variables to represent flink's entrypoint:
Zeppelin will create 6 variables as flink scala (`%flink`) entry point:
* `senv` (StreamExecutionEnvironment),
* `env` (ExecutionEnvironment)
* `stenv` (StreamTableEnvironment)
* `btenv` (BatchTableEnvironment)
* `benv` (ExecutionEnvironment)
* `stenv` (StreamTableEnvironment for blink planner)
* `btenv` (BatchTableEnvironment for blink planner)
* `stenv_2` (StreamTableEnvironment for flink planner)
* `btenv_2` (BatchTableEnvironment for flink planner)
## Flink Planner
And will create 6 variables as pyflink (`%flink.pyflink` or `%flink.ipyflink`) entry point:
* `s_env` (StreamExecutionEnvironment),
* `b_env` (ExecutionEnvironment)
* `st_env` (StreamTableEnvironment for blink planner)
* `bt_env` (BatchTableEnvironment for blink planner)
* `st_env_2` (StreamTableEnvironment for flink planner)
* `bt_env_2` (BatchTableEnvironment for flink planner)
## Execution mode (Local/Remote/Yarn)
Flink in Zeppelin supports 3 execution modes (`flink.execution.mode`):
* Local
* Remote
* Yarn
### Run Flink in Local Mode
Running Flink in Local mode will start a MiniCluster in local JVM. By default, the local MiniCluster will use port 8081, so make sure this port is available in your machine,
otherwise you can configure `rest.port` to specify another port. You can also specify `local.number-taskmanager` and `flink.tm.slot` to customize the number of TM and number of slots per TM,
because by default it is only 4 TM with 1 Slots which may not be enough for some cases.
### Run Flink in Remote Mode
Running Flink in remote mode will connect to a existing flink cluster which could be standalone cluster or yarn session cluster. Besides specifying `flink.execution.mode` to be `remote`. You also need to specify
`flink.execution.remote.host` and `flink.execution.remote.port` to point to flink job manager.
### Run Flink in Yarn Mode
In order to run flink in Yarn mode, you need to make the following settings:
* Set `flink.execution.mode` to `yarn`
* Set `HADOOP_CONF_DIR` in flink's interpreter setting.
* Make sure `hadoop` command is your PATH. Because internally flink will call command `hadoop classpath` and load all the hadoop related jars in the flink interpreter process
## Blink/Flink Planner
There're 2 planners supported by Flink's table api: `flink` & `blink`.
* If you want to use DataSet api, and convert it to flink table then please use flink planner (`btenv_2` and `stenv_2`).
* In other cases, we would always recommend you to use `blink` planner. This is also what flink batch/streaming sql interpreter use (`%flink.bsql` & `%flink.ssql`)
Starting from Flink 1.9, there're 2 planners supported by Flink's table api: `flink` & `blink`.
* If you want to use DataSet api, then please use flink planner (specify `zeppelin.flink.planner` to `flink`).
* In other cases, we would always recommend you to use `blink` planner which is also the default value of `zeppelin.flink.planner`.
## How to use Hive
In order to use Hive in Flink, you have to do several setting.
* Set `zeppelin.flink.enableHive` to `true`
* Copy necessary dependencies to flink's lib folder, check this [link](https://ci.apache.org/projects/flink/flink-docs-release-1.9/dev/table/hive/#depedencies) for more details
* flink-connector-hive_{scala_version}-{flink.version}.jar
* flink-hadoop-compatibility_{scala_version}-{flink.version}.jar
* flink-shaded-hadoop-2-uber-{hadoop.version}-{flink-shaded.version}.jar
* hive-exec-2.x.jar (for Hive 1.x, you need to copy hive-exec-1.x.jar, hive-metastore-1.x.jar, libfb303-0.9.2.jar and libthrift-0.9.2.jar)
* Specify `HIVE_CONF_DIR` either in flink interpreter setting or `zeppelin-env.sh`
* Specify `zeppelin.flink.hive.version`, by default it is 2.3.4. If you are using Hive 1.2.x, then you need to set it as `1.2.2`
In order to use Hive in Flink, you have to make the following setting.
* Set `zeppelin.flink.enableHive` to be true
* Set `zeppelin.flink.hive.version` to be the hive version you are using.
* Set `HIVE_CONF_DIR` to be the location where `hive-site.xml` is located. Make sure hive metastore is started and you have configure `hive.metastore.uris` in `hive-site.xml`
* Copy the following dependencies to the lib folder of flink installation. 
* flink-connector-hive_2.111.10.0.jar
* flink-hadoop-compatibility_2.111.10.0.jar
* hive-exec-2.x.jar (for hive 1.x, you need to copy hive-exec-1.x.jar, hive-metastore-1.x.jar, libfb3030.9.2.jar and libthrift-0.9.2.jar)
After these settings, you will be able to query hive table via either table api `%flink` or batch sql `%flink.bsql`
## Flink Batch SQL
`%flink.bsql` is used for flink's batch sql. You just type `help` to get all the available commands.
* Use `insert into` statement for batch ETL
* Use `select` statement for exploratory data analytics
## Flink Streaming SQL
`%flink.ssql` is used for flink's streaming sql. You just type `help` to get all the available commands. Mainlly there're 2 cases:
* Use `insert into` statement for streaming processing
* Use `select` statement for streaming data analytics
## Flink UDF
You can use Flink scala UDF or Python UDF in sql. UDF for batch and streaming sql is the same. Here's 2 examples.
* Scala UDF
```scala
%flink
class ScalaUpper extends ScalarFunction {
def eval(str: String) = str.toUpperCase
}
btenv.registerFunction("scala_upper", new ScalaUpper())
```
* Python UDF
```python
%flink.pyflink
class PythonUpper(ScalarFunction):
def eval(self, s):
return s.upper()
bt_env.register_function("python_upper", udf(PythonUpper(), DataTypes.STRING(), DataTypes.STRING()))
```
Besides defining udf in Zeppelin, you can also load udfs in jars via `flink.udf.jars`. For example, you can create
udfs in intellij and then build these udfs in one jar. After that you can specify `flink.udf.jars` to this jar, and flink
interpreter will detect all the udfs in this jar and register all the udfs to TableEnvironment, the udf name is the class name.
## ZeppelinContext
Zeppelin automatically injects `ZeppelinContext` as variable `z` in your Scala/Python environment. `ZeppelinContext` provides some additional functions and utilities.
See [Zeppelin-Context](../usage/other_features/zeppelin_context.html) for more details.
## IPython support
## IPython Support
By default, zeppelin would use IPython in `%flink.pyflink` when IPython is available, Otherwise it would fall back to the original python implementation.
If you don't want to use IPython, then you can set `zeppelin.pyflink.useIPython` as `false` in interpreter setting. For the IPython features, you can refer doc
[Python Interpreter](python.html)
For the IPython features, you can refer doc[Python Interpreter](python.html)
## Tutorial Notes
Zeppelin is shipped with several Flink tutorial notes which may be helpful for you.
Zeppelin is shipped with several Flink tutorial notes which may be helpful for you. Except the first one, the below 4 notes cover the 4 main scenarios of flink.
* Flink Basic
* Batch ETL
* Exploratory Data Analytics
* Streaming ETL
* Streaming Data Analytics

102
docs/interpreter/mongodb.md Normal file
View file

@ -0,0 +1,102 @@
---
layout: page
title: "MongoDB Interpreter for Apache Zeppelin"
description: "MongoDB is a general purpose, document-based, distributed database built for modern application developers and for the cloud era."
group: interpreter
---
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
{% include JB/setup %}
# MongoDB interpreter for Apache Zeppelin
<div id="toc"></div>
## Overview
[MongoDB](https://www.mongodb.com/) is a general purpose, document-based, distributed database built for modern application developers and for the cloud era.
This interpreter use mongo shell to execute [scripts](https://docs.mongodb.com/manual/tutorial/write-scripts-for-the-mongo-shell/)
Use mongo-shell `JavaScript` to analyze data as you need.
## Installing AND Configuration
First, you need to install mongo shell with Zeppelin in the same machine.
If you use mac with brew, follow this instructions.
```
brew tap mongodb/brew
brew install mongodb/brew/mongodb-community-shell
```
Or you can follow this [mongo shell](https://docs.mongodb.com/manual/mongo/)
Second, create mongodb interpreter in Zeppelin.
![MongoDB interpreter install]({{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/mongo-interpreter-install.png)
<table class="table-configuration">
<tr>
<th>Name</th>
<th>Default Value</th>
<th>Description</th>
</tr>
<tr>
<td>mongo.shell.path</td>
<td>mongo</td>
<td>MongoDB shell local path. <br/> Use `which mongo` to get local path in linux or mac.</td>
</tr>
<tr>
<td>mongo.shell.command.table.limit</td>
<td>1000</td>
<td>Limit of documents displayed in a table. <br/> Use table function when get data from mongodb</td>
</tr>
<tr>
<td>mongo.shell.command.timeout</td>
<td>60000</td>
<td>MongoDB shell command timeout in millisecond</td>
</tr>
<tr>
<td>mongo.server.host</td>
<td>localhost</td>
<td>MongoDB server host to connect to</td>
</tr>
<tr>
<td>mongo.server.port</td>
<td>27017</td>
<td>MongoDB server port to connect to</td>
</tr>
<tr>
<td>mongo.server.database</td>
<td>test</td>
<td>MongoDB database name</td>
</tr>
<tr>
<td>mongo.server.authentdatabase</td>
<td></td>
<td>MongoDB database name for authentication</td>
</tr>
<tr>
<td>mongo.server.username</td>
<td></td>
<td>Username for authentication</td>
</tr>
<tr>
<td>mongo.server.password</td>
<td></td>
<td>Password for authentication</td>
</tr>
<tr>
<td>mongo.interpreter.concurrency.max</td>
<td>10</td>
<td>Max count of scheduler concurrency</td>
</tr>
</table>
## Examples
The following example demonstrates the basic usage of MongoDB in a Zeppelin notebook.
![MongoDB interpreter examples]({{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/mongo-examples.png)
Or you can monitor stats of mongodb collections.
![MongoDB interpreter examples]({{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/mongo-interpreter-monitor.png)

View file

@ -74,79 +74,198 @@ We recommend you to also install the following optional R libraries for happy da
+ sqldf
+ wordcloud
## Supported Interpreters
Zeppelin supports R language in 3 interpreters
<table class="table-configuration">
<tr>
<th>Name</th>
<th>Class</th>
<th>Description</th>
</tr>
<tr>
<td>%r.r</td>
<td>RInterpreter</td>
<td>Vanilla r interpreter, with least dependencies, only R environment installed is required.
It is always recommended to use the fully qualified interpreter name <code>%r.r</code>code>, because <code>%r</code> is ambiguous,
it could mean both <code>%spark.r</code> and <code>%r.r</code></td>
</tr>
<tr>
<td>%r.ir</td>
<td>IRInterpreter</td>
<td>Provide more fancy R runtime via [IRKernel](https://github.com/IRkernel/IRkernel), almost the same experience like using R in Jupyter. It requires more things, but is the recommended interpreter for using R in Zeppelin.</td>
</tr>
<tr>
<td>%r.shiny</td>
<td>ShinyInterpreter</td>
<td>Run Shiny app in Zeppelin</td>
</tr>
</table>
If you want to use R with Spark, it is almost the same via `%spark.r`, `%spark.ir` & `%spark.shiny` . You can refer Spark Interpreter docs for more details.
## Configuration
To run Zeppelin with the R Interpreter, the `SPARK_HOME` environment variable must be set. The best way to do this is by editing `conf/zeppelin-env.sh`.
If it is not set, the R Interpreter will not be able to interface with Spark.
<table class="table-configuration">
<tr>
<th>Property</th>
<th>Default</th>
<th>Description</th>
</tr>
<tr>
<td>zeppelin.R.cmd</td>
<td>R</td>
<td>Path of the installed R binary. You should set this property explicitly if R is not in your <code>$PATH</code>(example: /usr/bin/R).
</td>
</tr>
<tr>
<td>zeppelin.R.knitr</td>
<td>true</td>
<td>Whether to use knitr or not. It is recommended to install [knitr](https://yihui.org/knitr/)</td>
</tr>
<tr>
<td>zeppelin.R.image.width</td>
<td>100%</td>
<td>Image width of R plotting</td>
</tr>
<tr>
<td>zeppelin.R.shiny.iframe_width</td>
<td>100%</td>
<td>IFrame width of Shiny App</td>
</tr>
<tr>
<td>zeppelin.R.shiny.iframe_height</td>
<td>500px</td>
<td>IFrame height of Shiny App</td>
</tr>
</table>
You should also copy `conf/zeppelin-site.xml.template` to `conf/zeppelin-site.xml`. That will ensure that Zeppelin sees the R Interpreter the first time it starts up.
## Using the R Interpreter(`%r.r` & `%r.ir`)
## Using the R Interpreter
By default, the R Interpreter appears as two Zeppelin Interpreters, `%r.r` and `%r.ir`.
By default, the R Interpreter appears as two Zeppelin Interpreters, `%r` and `%knitr`.
`%r.r` behaves like an ordinary REPL and use SparkR to communicate between R process and JVM process.
`%r.ir` use IRKernel underneath, it behaves like using IRKernel in Jupyter notebook.
`%r` will behave like an ordinary REPL. You can execute commands as in the CLI.
R basic expression
<img class="img-responsive" src="{{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/repl2plus2.png" width="700px"/>
<img class="img-responsive" src="{{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/r_basic.png" width="800px"/>
R base plotting is fully supported
<img class="img-responsive" src="{{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/replhist.png" width="550px"/>
<img class="img-responsive" src="{{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/r_plotting.png" width="800px"/>
If you return a data.frame, Zeppelin will attempt to display it using Zeppelin's built-in visualizations.
Besides R base plotting, you can use other visualization library, e.g. `ggplot` and `googlevis`
<img class="img-responsive" src="{{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/replhead.png" width="550px"/>
<img class="img-responsive" src="{{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/r_ggplot.png" width="800px"/>
`%knitr` interfaces directly against `knitr`, with chunk options on the first line:
<img class="img-responsive" src="{{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/r_googlevis.png" width="800px"/>
<img class="img-responsive" src="{{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/knitgeo.png" width="550px"/>
<img class="img-responsive" src="{{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/knitstock.png" width="550px"/>
## Make Shiny App in Zeppelin
<img class="img-responsive" src="{{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/knitmotion.png" width="550px"/>
[Shiny](https://shiny.rstudio.com/tutorial/) is an R package that makes it easy to build interactive web applications (apps) straight from R.
For developing one Shiny App in Zeppelin, you need to at least 3 paragraphs (server paragraph, ui paragraph and run type paragraph)
The two interpreters share the same environment. If you define a variable from `%r`, it will be within-scope if you then make a call using `knitr`.
* Server type R shiny paragraph
## Using SparkR & Moving Between Languages
```r
If `SPARK_HOME` is set, the `SparkR` package will be loaded automatically:
%r.shiny(type=server)
<img class="img-responsive" src="{{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/sparkrfaithful.png" width="550px"/>
# Define server logic to summarize and view selected dataset ----
server <- function(input, output) {
The Spark Context and SQL Context are created and injected into the local environment automatically as `sc` and `sql`.
# Return the requested dataset ----
datasetInput <- reactive({
switch(input$dataset,
"rock" = rock,
"pressure" = pressure,
"cars" = cars)
})
The same context are shared with the `%spark`, `%sql` and `%pyspark` interpreters:
# Generate a summary of the dataset ----
output$summary <- renderPrint({
dataset <- datasetInput()
summary(dataset)
})
<img class="img-responsive" src="{{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/backtoscala.png" width="700px"/>
# Show the first "n" observations ----
output$view <- renderTable({
head(datasetInput(), n = input$obs)
})
}
```
You can also make an ordinary R variable accessible in scala and Python:
* UI type R shiny paragraph
<img class="img-responsive" src="{{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/varr1.png" width="550px"/>
```r
%r.shiny(type=ui)
And vice versa:
# Define UI for dataset viewer app ----
ui <- fluidPage(
<img class="img-responsive" src="{{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/varscala.png" width="550px"/>
# App title ----
titlePanel("Shiny Text"),
# Sidebar layout with a input and output definitions ----
sidebarLayout(
<img class="img-responsive" src="{{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/varr2.png" width="550px"/>
# Sidebar panel for inputs ----
sidebarPanel(
# Input: Selector for choosing dataset ----
selectInput(inputId = "dataset",
label = "Choose a dataset:",
choices = c("rock", "pressure", "cars")),
# Input: Numeric entry for number of obs to view ----
numericInput(inputId = "obs",
label = "Number of observations to view:",
value = 10)
),
## Caveats & Troubleshooting
# Main panel for displaying outputs ----
mainPanel(
# Output: Verbatim text for data summary ----
verbatimTextOutput("summary"),
# Output: HTML table with requested number of observations ----
tableOutput("view")
)
)
)
```
* Almost all issues with the R interpreter turned out to be caused by an incorrectly set `SPARK_HOME`. The R interpreter must load a version of the `SparkR` package that matches the running version of Spark, and it does this by searching `SPARK_HOME`. If Zeppelin isn't configured to interface with Spark in `SPARK_HOME`, the R interpreter will not be able to connect to Spark.
* Run type R shiny paragraph
* The `knitr` environment is persistent. If you run a chunk from Zeppelin that changes a variable, then run the same chunk again, the variable has already been changed. Use immutable variables.
```r
* (Note that `%spark.r` and `%r` are two different ways of calling the same interpreter, as are `%spark.knitr` and `%knitr`. By default, Zeppelin puts the R interpreters in the `%spark.` Interpreter Group.
%r.shiny(type=run)
* Using the `%r` interpreter, if you return a data.frame, HTML, or an image, it will dominate the result. So if you execute three commands, and one is `hist()`, all you will see is the histogram, not the results of the other commands. This is a Zeppelin limitation.
```
* If you return a data.frame (for instance, from calling `head()`) from the `%spark.r` interpreter, it will be parsed by Zeppelin's built-in data visualization system.
After executing the run type R shiny paragraph, the shiny app will be launched and embedded as Iframe in paragraph.
* Why `knitr` Instead of `rmarkdown`? Why no `htmlwidgets`? In order to support `htmlwidgets`, which has indirect dependencies, `rmarkdown` uses `pandoc`, which requires writing to and reading from disc. This makes it many times slower than `knitr`, which can operate entirely in RAM.
<img class="img-responsive" src="{{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/r_shiny.png" width="800px"/>
* Why no `ggvis` or `shiny`? Supporting `shiny` would require integrating a reverse-proxy into Zeppelin, which is a task.
### Run multiple shiny app
* Max OS X & case-insensitive filesystem. If you try to install on a case-insensitive filesystem, which is the Mac OS X default, maven can unintentionally delete the install directory because `r` and `R` become the same subdirectory.
If you want to run multiple shiny app, you can specify `app` in paragraph local property to differentiate shiny app.
* Error `unable to start device X11` with the repl interpreter. Check your shell login scripts to see if they are adjusting the `DISPLAY` environment variable. This is common on some operating systems as a workaround for ssh issues, but can interfere with R plotting.
e.g.
* akka Library Version or `TTransport` errors. This can happen if you try to run Zeppelin with a SPARK_HOME that has a version of Spark other than the one specified with `-Pspark-1.x` when Zeppelin was compiled.
```r
%r.shiny(type=ui, app=app_1)
```
```r
%r.shiny(type=server, app=app_1)
```
```r
%r.shiny(type=run, app=app_1)
```

View file

@ -74,6 +74,11 @@ At the "Interpreters" menu in Zeppelin dropdown menu, you can set the property v
<td>false</td>
<td>Enable ZeppelinContext variable interpolation into paragraph text</td>
</tr>
<tr>
<td>zeppelin.terminal.ip.mapping</td>
<td></td>
<td>Internal and external IP mapping of zeppelin server</td>
</tr>
</table>
## Example
@ -129,3 +134,14 @@ input any char
```
<img src="{{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/shell-terminal.gif" />
### zeppelin.terminal.ip.mapping
When running the terminal interpreter in the notebook, the front end of the notebook needs to obtain the IP address of the server where the terminal interpreter is located to communicate.
In a public cloud environment, the cloud host has an internal IP and an external access IP, and the interpreter runs in the cloud host. This will cause the notebook front end to be unable to connect to the terminal interpreter properly, resulting in the terminal interpreter being unusable.
Solution: Set the mapping between internal IP and external IP in the terminal interpreter, and connect the front end of the notebook through the external IP of the terminal interpreter.
Example:
{"internal-ip1":"external-ip1", "internal-ip2":"external-ip2"}

View file

@ -186,6 +186,10 @@ You can also set other Spark properties which are not listed in the table. For a
<td></td>
<td>Overrides Spark UI default URL. Value should be a full URL (ex: http://{hostName}/{uniquePath}</td>
</tr>
<td>spark.webui.yarn.useProxy</td>
<td>false</td>
<td>whether use yarn proxy url as spark weburl, e.g. http://localhost:8088/proxy/application_1583396598068_0004</td>
</tr>
</table>
Without any configuration, Spark interpreter works out of box in local mode. But if you want to connect to your Spark cluster, you'll need to follow below two simple steps.
@ -255,6 +259,8 @@ For the further information about Spark & Zeppelin version compatibility, please
> Note that without exporting `SPARK_HOME`, it's running in local mode with included version of Spark. The included version may vary depending on the build profile.
> Yarn client mode and local mode will run driver in the same machine with zeppelin server, this would be dangerous for production. Because it may run out of memory when there's many spark interpreters running at the same time. So we suggest you only allow yarn-cluster mode via setting `zeppelin.spark.only_yarn_cluster` in `zeppelin-site.xml`.
## SparkContext, SQLContext, SparkSession, ZeppelinContext
SparkContext, SQLContext, SparkSession (for spark 2.x) and ZeppelinContext are automatically created and exposed as variable names `sc`, `sqlContext`, `spark` and `z`, respectively, in Scala, Kotlin, Python and R environments.

View file

@ -40,7 +40,7 @@ Apache Zeppelin officially supports and is tested on the following environments:
</tr>
<tr>
<td>OS</td>
<td>Mac OSX <br /> Ubuntu 14.X <br /> CentOS 6.X <br /> Windows 7 Pro SP1</td>
<td>Mac OSX <br /> Ubuntu 16.X</td>
</tr>
</table>
@ -53,7 +53,7 @@ Two binary packages are available on the [download page](http://zeppelin.apache.
### Building Zeppelin from source
follow the instructions [How to Build](../setup/basics/how_to_build.html), If you want to build from source instead of using binary package.
Follow the instructions [How to Build](../setup/basics/how_to_build.html), If you want to build from source instead of using binary package.
## Starting Apache Zeppelin
@ -65,12 +65,6 @@ On all unix like platforms:
bin/zeppelin-daemon.sh start
```
If you are on Windows:
```
bin\zeppelin.cmd
```
After Zeppelin has started successfully, go to [http://localhost:8080](http://localhost:8080) with your web browser.
#### Stopping Zeppelin

View file

@ -31,6 +31,7 @@ The following guides explain how to use Apache Zeppelin that enables you to writ
- can query using [PandasSQL](../interpreter/python.html#sql-over-pandas-dataframes)
- also, provides [PySpark](../interpreter/spark.html)
- with [matplotlib integration](../interpreter/python.html#matplotlib-integration)
- support [ipython](../interpreter/python.html#ipython-interpreter-pythonipython-recommended)
- can create results including **UI widgets** using [Dynamic Form](../interpreter/python.html#using-zeppelin-dynamic-forms)
<br/>

View file

@ -29,8 +29,7 @@ For a brief overview of Apache Spark fundamentals with Apache Zeppelin, see the
- **built-in** Apache Spark integration.
- with [SparkSQL](http://spark.apache.org/sql/), [PySpark](https://spark.apache.org/docs/latest/api/python/pyspark.html), [SparkR](https://spark.apache.org/docs/latest/sparkr.html)
- inject [SparkContext](https://spark.apache.org/docs/latest/api/java/org/apache/spark/SparkContext.html) and [SQLContext](https://spark.apache.org/docs/latest/sql-programming-guide.html) automatically
- dependencies loading (jars) at runtime using [dependency loader](../interpreter/spark.html#dependencyloading)
- inject [SparkContext](https://spark.apache.org/docs/latest/api/java/org/apache/spark/SparkContext.html), [SQLContext](https://spark.apache.org/docs/latest/sql-programming-guide.html) and [SparkSession](https://spark.apache.org/docs/latest/sql-programming-guide.html) automatically
- canceling job and displaying its progress
- supporting [Spark Cluster Mode](../setup/deployment/spark_cluster_mode.html#apache-zeppelin-on-spark-cluster-mode) for external spark clusters
- supports [different context per user / note](../usage/interpreter/interpreter_binding_mode.html)

View file

@ -131,10 +131,6 @@ Available profiles are
-Pscala-2.11
```
##### `-Pr` (optional)
enable [R](https://www.r-project.org/) support with [SparkR](https://spark.apache.org/docs/latest/sparkr.html) integration.
Note that, this enables R interpreter which is different from sparkR included in Spark interpreter by default.
##### `-Pvendor-repo` (optional)

View file

@ -37,11 +37,11 @@ So, copying `notebook` and `conf` directory should be enough.
### Upgrading from Zeppelin 0.8 to 0.9
- From 0.9, we change the notes file name structure ([ZEPPELIN-2619](https://issues.apache.org/jira/browse/ZEPPELIN-2619)) and move permissions info from `notebook-authorization.json` into note file itself [ZEPPELIN-3985](https://issues.apache.org/jira/browse/ZEPPELIN-3985). So when you upgrading zeppelin to 0.9, you need to upgrade note file. Here's steps you need to follow:
- From 0.9, we change the notes file name structure ([ZEPPELIN-2619](https://issues.apache.org/jira/browse/ZEPPELIN-2619)). So when you upgrading zeppelin to 0.9, you need to upgrade note files. Here's steps you need to follow:
1. Backup your notes file in case the upgrade fails
2. Call `bin/upgrade-note.sh -d` to upgrade note, `-d` option means to delete the old note file, missing this option will keep the old file.
- From 0.9, Zeppelin server bind `127.0.0.1` by default instead of `0.0.0.0`. Configure `zeppelin.server.addr` property or `ZEPPELIN_ADDR` env variable to change.
- From 0.9, we have removed `zeppelin.anonymous.allowed` ([ZEPPELIN-4489](https://issues.apache.org/jira/browse/ZEPPELIN-4489)). So, when you upgrade Zeppelin to 0.9 and if `shiro.ini` file does not exists in conf path then all the Zeppelin-Users runs as anonymous.
2. Call `bin/upgrade-note.sh -d` to upgrade notes, `-d` option means to delete the old note file, missing this option will keep the old file.
- From 0.9, Zeppelin server bind `127.0.0.1` by default instead of `0.0.0.0`. Configure `zeppelin.server.addr` property or `ZEPPELIN_ADDR` env variable to change it to `0.0.0.0` if you want to access it remotely.
- From 0.9, we have removed `zeppelin.anonymous.allowed` ([ZEPPELIN-4489](https://issues.apache.org/jira/browse/ZEPPELIN-4489)). So, when you upgrade Zeppelin to 0.9 and if `shiro.ini` file does not exist in conf path then all the Zeppelin-Users runs as anonymous.
### Upgrading from Zeppelin 0.8.1 (and before) to 0.8.2 (and later)
- From 0.8.2, Zeppelin server bind `127.0.0.1` by default instead of `0.0.0.0`. Configure `zeppelin.server.addr` property or `ZEPPELIN_ADDR` env variable to change.

View file

@ -53,7 +53,7 @@ Read more about HSTS [here](https://developer.mozilla.org/en-US/docs/Web/HTTP/He
The HTTP X-XSS-Protection response header is a feature of Internet Explorer, Chrome and Safari Web browsers that initiates configured action when they detect reflected cross-site scripting (XSS) attacks.
The following property needs to be updated in the zeppelin-site.xml in order to set X-XSS-PROTECTION header.
The below property to set X-XSS-Protection header is enabled with default value of "1; mode=block" in the zeppelin-site.xml
```xml
<property>
@ -64,7 +64,7 @@ The following property needs to be updated in the zeppelin-site.xml in order to
```
You can choose appropriate value from below.
You can choose appropriate value from below to update the configuration if required.
* 0 (Disables XSS filtering)
* 1 (Enables XSS filtering. If a cross-site scripting attack is detected, the browser will sanitize the page.)
@ -76,7 +76,7 @@ Read more about HTTP X-XSS-Protection response header [here](https://developer.m
The X-Frame-Options HTTP response header can indicate browser to avoid clickjacking attacks, by ensuring that their content is not embedded into other sites in a `<frame>`,`<iframe>` or `<object>`.
The following property needs to be updated in the zeppelin-site.xml in order to set X-Frame-Options header.
The below property to set X-Frame-Options header is enabled with default value of "SAMEORIGIN" in the zeppelin-site.xml
```xml
<property>
@ -87,7 +87,7 @@ The following property needs to be updated in the zeppelin-site.xml in order to
```
You can choose appropriate value from below.
You can choose appropriate value from below to update the configuration if required.
* `DENY`
* `SAMEORIGIN`
@ -97,7 +97,7 @@ You can choose appropriate value from below.
The HTTP X-Content-Type-Options response header helps to prevent MIME type sniffing attacks. It directs the browser to honor the type specified in the Content-Type header, rather than trying to determine the type from the content itself. The default value `nosniff` is really the only meaningful value. This header is supported on all browsers except Safari and Safari on iOS.
Zeppelin server will add this header to HTTP response by default. The following property needs to be updated in the zeppelin-site.xml in order to change X-Content-Type-Options header value.
The below property to set X-Content-Type-Options header is enabled with default value of "nosniff" in the zeppelin-site.xml
```xml
<property>
@ -111,25 +111,24 @@ Zeppelin server will add this header to HTTP response by default. The following
Security conscious organisations does not want to reveal the Application Server name and version to prevent finding this information easily by Attacker while fingerprinting the Application. The exact version number can tell an Attacker if the current Application Server is patched for or vulnerable to certain publicly known CVE associated to it.
The following property needs to be updated in the zeppelin-site.xml in order to set Server header.
The below property to mask Jetty server version is enabled by default and configured with value of " " (one whitespace char) in the zeppelin-site.xml
```xml
<property>
<name>zeppelin.server.jetty.name</name>
<value>Jetty(7.6.0.v20120127)</value>
<value> </value>
<description>Hardcoding Application Server name to Prevent Fingerprinting</description>
</property>
```
The value can be any "String".
The value can be any "String". Removing this property from configuration will cause Zeppelin to send correct Jetty server version.
Also, it can be removed the from response headers and from 300/400/500 HTTP response pages.
```xml
<property>
<name>zeppelin.server.send.jetty.name</name>
<value>false</value>
<description>If set to true, will not show the Jetty version to prevent Fingerprinting</description>
<description>If set to false, will not show the Jetty version to prevent Fingerprinting</description>
</property>
```
```

View file

@ -34,6 +34,7 @@ There are few notebook storage systems available for a use out of the box:
* storage using Amazon S3 service - `S3NotebookRepo`
* storage using Azure service - `AzureNotebookRepo`
* storage using Google Cloud Storage - `GCSNotebookRepo`
* storage using Aliyun OSS - `OSSNotebookRepo`
* storage using MongoDB - `MongoNotebookRepo`
* storage using GitHub - `GitHubNotebookRepo`
@ -371,6 +372,59 @@ file for authentication with GCS, update the following property :
```
</br>
## Notebook Storage in OSS <a name="OSS"></a>
Notebooks may be stored in Aliyun OSS.
</br>
The following folder structure will be created in OSS:
```
oss://bucket_name/{noteboo_dir}/note_path
```
And you should configure oss related properties in file **zeppelin-site.xml**.
```xml
<property>
<name>zeppelin.notebook.oss.bucket</name>
<value>zeppelin</value>
<description>bucket name for notebook storage</description>
</property>
<property>
<name>zeppelin.notebook.oss.endpoint</name>
<value>http://oss-cn-hangzhou.aliyuncs.com</value>
<description>endpoint for oss bucket</description>
</property>
<property>
<name>zeppelin.notebook.oss.accesskeyid</name>
<value></value>
<description>Access key id for your OSS account</description>
</property>
<property>
<name>zeppelin.notebook.oss.accesskeysecret</name>
<value></value>
<description>Access key secret for your OSS account</description>
</property>
```
Uncomment the next property for use OSSNotebookRepo class:
```xml
<property>
<name>zeppelin.notebook.storage</name>
<value>org.apache.zeppelin.notebook.repo.OSSNotebookRepo</value>
<description>notebook persistence layer implementation</description>
</property>
```
</br>
## Notebook Storage in ZeppelinHub <a name="ZeppelinHub"></a>

View file

@ -27,25 +27,30 @@ limitations under the License.
In this section, we will explain the role of interpreters, interpreter groups and interpreter settings in Zeppelin.
The concept of Zeppelin interpreters allows any language or data-processing backend to be plugged into Zeppelin.
Currently, Zeppelin supports many interpreters such as Scala (with Apache Spark), Python (with Apache Spark), Spark SQL, JDBC, Markdown, Shell and so on.
Currently, Zeppelin supports many interpreters such as Scala (with Apache Spark), Python (with Apache Spark), Spark SQL, Hive, JDBC, Markdown, Shell and so on.
## What are Zeppelin interpreters?
## What are Zeppelin Interpreters ?
A Zeppelin interpreter is a plug-in which enables Zeppelin users to use a specific language/data-processing-backend. For example, to use Scala code in Zeppelin, you would use the `%spark` interpreter.
When you click the ```+Create``` button on the interpreter page, the interpreter drop-down list box will show all the available interpreters on your server.
<img src="{{BASE_PATH}}/assets/themes/zeppelin/img/screenshots/interpreter_create.png" width="280px">
## What are the interpreter settings?
You can create multiple interpreters for the same engine with different interpreter setting. e.g. You can create `spark2` for Spark 2.x and create `spark1` for Spark 1.x.
For each paragraph you write in Zeppelin, you need to specify its interpreter first via `%interpreter_group.interpreter_name`. e.g. `%spark.pyspark`, `%spark.r`
## What are the Interpreter Settings?
The interpreter settings are the configuration of a given interpreter on the Zeppelin server. For example, certain properties need to be set for the Apache Hive JDBC interpreter to connect to the Hive server.
<img src="{{BASE_PATH}}/assets/themes/zeppelin/img/screenshots/interpreter_setting.png" width="500px">
Properties are exported as environment variables on the system if the property name consists of upper-case characters, numbers or underscores ([A-Z_0-9]). Otherwise, the property is set as a JVM property.
Properties are exported as environment variables on the system if the property name consists of upper-case characters, numbers or underscores ([A-Z_0-9]). Otherwise, the property is set as a common interpreter property.
e.g. You can define `SPARK_HOME` and `HADOOP_CONF_DIR` in spark's interpreter setting, they are be passed to Spark interpreter process as environment variable which is used by Spark.
You may use parameters from the context of the interpreter by adding #{contextParameterName} in the value. The parameter can be of the following types: string, number, boolean.
You may use parameters from the context of the interpreter by adding #{contextParameterName} in the interpreter property value. The parameter can be of the following types: string, number, boolean.
###### Context parameters
### Context Parameters
<table class="table-configuration">
<tr>
<th>Name</th>
@ -69,20 +74,14 @@ You may use parameters from the context of the interpreter by adding #{contextPa
</tr>
</table>
If the context parameter is null, then it is replaced by an empty string.
If the context parameter is null, then it is replaced by an empty string. The following screenshot is one example where we make the user name as the property value of `default.user`.
<img src="{{BASE_PATH}}/assets/themes/zeppelin/img/screenshots/interpreter_setting_with_context_parameters.png" width="800px">
<br>
Each notebook can be bound to multiple Interpreter Settings using the setting icon in the upper right corner of the notebook.
<img src="{{BASE_PATH}}/assets/themes/zeppelin/img/screenshots/interpreter_binding.png" width="800px">
## What are interpreter groups?
Every interpreter belongs to an **Interpreter Group**. Interpreter Groups are units of interpreters that can be started/stopped together.
By default, every interpreter belongs to a separate group, but the group might contain more interpreters. For example, the Spark interpreter group includes Spark support, pySpark, Spark SQL and the dependency loader.
## What are Interpreter Groups ?
Every interpreter belongs to an **Interpreter Group**. Interpreter Groups are units of interpreters that run in one single JVM process and can be started/stopped together.
By default, every interpreter belongs to a separate group, but the group might contain more interpreters. For example, the Spark interpreter group includes Scala Spark, PySpark, IPySpark, SparkR and Spark SQL.
Technically, Zeppelin interpreters from the same group run within the same JVM. For more information about this, please consult [the documentation on writing interpreters](../development/writing_zeppelin_interpreter.html).
@ -91,30 +90,38 @@ Each interpreter belongs to a single group and is registered together. All relev
<img src="{{BASE_PATH}}/assets/themes/zeppelin/img/screenshots/interpreter_setting_spark.png" width="500px">
## Interpreter binding mode
## Interpreter Binding Mode
In the Interpreter Settings, one can choose one of the 'shared', 'scoped', or 'isolated' interpreter binding modes.
In 'shared' mode, every notebook bound to the Interpreter Setting will share a single Interpreter instance. In 'scoped' mode, each notebook will create a new interpreter instance in the same interpreter process. In 'isolated' mode, each notebook will create new a interpreter process.
In the Interpreter Settings, one can choose one of the `shared`, `scoped`, or `isolated` interpreter binding modes.
In `shared` mode, every note/user using this interpreter will share a single interpreter instance.
`scoped` and `isolated` mode can be used under 2 dimensions: `per user` or `per note`.
e.g. In `scoped per note` mode, each note will create a new interpreter instance in the same interpreter process. In `isolated per note` mode, each note will create a new interpreter process.
For more information, please consult [Interpreter Binding Mode](./interpreter_binding_mode.html).
<img src="{{BASE_PATH}}/assets/themes/zeppelin/img/screenshots/interpreter_persession.png" width="400px">
## Interpreter Lifecycle Management
Before 0.8.0, Zeppelin doesn't have lifecycle management for interpreters. Users had to shut down interpreters explicitly via the UI. Starting from 0.8.0, Zeppelin provides a new interface
`LifecycleManager` to control the lifecycle of interpreters. For now, there are two implementations: `NullLifecycleManager` and `TimeoutLifecycleManager` which is the default.
`NullLifecycleManager` will do nothing, i.e., the user needs to control the lifecycle of interpreter by themselves as before. `TimeoutLifecycleManager` will shut down interpreters after an interpreter remains idle for a while. By default, the idle threshold is 1 hour.
Users can change this threshold via the `zeppelin.interpreter.lifecyclemanager.timeout.threshold` setting. `TimeoutLifecycleManager` is the default lifecycle manager, and users can change it via `zeppelin.interpreter.lifecyclemanager.class`.
## Connecting to the existing remote interpreter
## Inline Generic Configuration
Zeppelin users can start interpreter threads embedded in their service. This provides users with the flexibility of starting interpreters on a remote host. To start an interpreter along with your service you have to create an instance of ``RemoteInterpreterServer`` and start it as follows:
Zeppelin's interpreter setting is shared by all users and notes, if you want to have different settings, you have to create a new interpreter, e.g. you can create `spark_jar1` for running Spark with dependency `jar1` and `spark_jar2` for running Spark with dependency `jar2`.
This approach works, but is not convenient. Inline generic configuration can provide more fine-grained control on interpreter settings and more flexibility.
```java
RemoteInterpreterServer interpreter=new RemoteInterpreterServer(3678);
// Here, 3678 is the port on which interpreter will listen.
interpreter.start();
`ConfInterpreter` is a generic interpreter that can be used by any interpreter. You can use it just like defining a java property file.
It can be used to make custom settings for any interpreter. However, `ConfInterpreter` needs to run before that interpreter process is launched. When that interpreter process is launched is determined by the interpreter binding mode setting.
So users need to understand the [interpreter binding mode setting](../usage/interpreter/interpreter_bindings_mode.html) of Zeppelin and be aware of when the interpreter process is launched. E.g., if we set the Spark interpreter setting as isolated per note, then under this setting, each note will launch one interpreter process.
In this scenario, users need to put `ConfInterpreter` as the first paragraph as in the below example. Otherwise, the customized setting cannot be applied (actually it would report `ERROR`).
```
<img src="{{BASE_PATH}}/assets/themes/zeppelin/img/screenshots/conf_interpreter.png" width="600px">
The above code will start an interpreter thread inside your process. Once the interpreter has started, you can configure Zeppelin to connect to RemoteInterpreter by checking the **Connect to existing process** checkbox and then provide the **Host** and **Port** on which interpreter process is listening, as shown in the image below:
<img src="{{BASE_PATH}}/assets/themes/zeppelin/img/screenshots/existing_interpreter.png" width="450px">
## Precode
@ -122,35 +129,6 @@ Snippet of code (language of interpreter) that executes after initialization of
<img src="{{BASE_PATH}}/assets/themes/zeppelin/img/screenshots/interpreter_precode.png" width="800px">
## Interpreter Lifecycle Management
Before 0.8.0, Zeppelin didn't have lifecycle management for interpreters. Users had to shut down interpreters explicitly via the UI. Starting from 0.8.0, Zeppelin provides a new interface
`LifecycleManager` to control the lifecycle of interpreters. For now, there are two implementations: `NullLifecycleManager` and `TimeoutLifecycleManager`, which is the default.
`NullLifecycleManager` will do nothing, i.e., the user needs to control the lifecycle of interpreter by themselves as before. `TimeoutLifecycleManager` will shut down interpreters after an interpreter remains idle for a while. By default, the idle threshold is 1 hour.
Users can change this threshold via the `zeppelin.interpreter.lifecyclemanager.timeout.threshold` setting. `TimeoutLifecycleManager` is the default lifecycle manager, and users can change it via `zeppelin.interpreter.lifecyclemanager.class`.
## Inline Generic ConfInterpreter
Zeppelin's interpreter setting is shared by all users and notes, if you want to have different settings, you have to create a new interpreter, e.g. you can create `spark_jar1` for running Spark with dependency jar1 and `spark_jar2` for running Spark with dependency jar2.
This approach works, but is not particularly convenient. `ConfInterpreter` can provide more fine-grained control on interpreter settings and more flexibility.
`ConfInterpreter` is a generic interpreter that can be used by any interpreter. The input format should be the property file format.
It can be used to make custom settings for any interpreter. However, `ConfInterpreter` needs to be run before that interpreter process is launched. When that interpreter process is launched is determined by the interpreter mode setting.
So users need to understand the [interpreter mode setting](../usage/interpreter/interpreter_bindings_mode.html) of Zeppelin and be aware of when the interpreter process is launched. E.g., if we set the Spark interpreter setting as isolated per note, then, under this setting, each note will launch one interpreter process.
In this scenario, users need to put `ConfInterpreter` as the first paragraph as in the below example. Otherwise, the customized setting cannot be applied (actually it would report ERROR).
<img src="{{BASE_PATH}}/assets/themes/zeppelin/img/screenshots/conf_interpreter.png" width="700px">
## Interpreter Process Recovery
Before 0.8.0, shutting down Zeppelin also meant to shutdown all the running interpreter processes. Usually, an administrator will shutdown the Zeppelin server for maintenance or upgrades, but would not want to shut down the running interpreter processes.
In such cases, interpreter process recovery is necessary. Starting from 0.8.0, users can enable interpreter process recovery via the setting `zeppelin.recovery.storage.class` as
`org.apache.zeppelin.interpreter.recovery.FileSystemRecoveryStorage` or other implementations if available in the future. By default it is `org.apache.zeppelin.interpreter.recovery.NullRecoveryStorage`,
which means recovery is not enabled. Enabling recovery means shutting down Zeppelin would not terminate interpreter processes, and when Zeppelin is restarted, it would try to reconnect to the existing running interpreter processes. If you want to kill all the interpreter processes after terminating Zeppelin even when recovery is enabled, you can run `bin/stop-interpreter.sh`
## Credential Injection
Credentials from the credential manager can be injected into Notebooks. Credential injection works by replacing the following patterns in Notebooks with matching credentials for the Credential Manager: `{user.CREDENTIAL_ENTITY}` and `{password.CREDENTIAL_ENTITY}`. However, credential injection must be enabled per Interpreter, by adding a boolean `injectCredentials` setting in the Interpreters configuration. Injected passwords are removed from Notebook output to prevent accidentally leaking passwords.
@ -162,7 +140,16 @@ Credentials from the credential manager can be injected into Notebooks. Credenti
<img src="{{BASE_PATH}}/assets/themes/zeppelin/img/screenshots/credential_entry.png" width="500px">
**Credential Injection Example**
```
```scala
val password = "{password.SOME_CREDENTIAL_ENTITY}"
val username = "{user.SOME_CREDENTIAL_ENTITY}"
```
## Interpreter Process Recovery (Experimental)
Before 0.8.0, shutting down Zeppelin also meant to shutdown all the running interpreter processes. Usually, an administrator will shutdown the Zeppelin server for maintenance or upgrades, but would not want to shut down the running interpreter processes.
In such cases, interpreter process recovery is necessary. Starting from 0.8.0, users can enable interpreter process recovery via the setting `zeppelin.recovery.storage.class` as
`org.apache.zeppelin.interpreter.recovery.FileSystemRecoveryStorage` or other implementations if available in the future. By default it is `org.apache.zeppelin.interpreter.recovery.NullRecoveryStorage`,
which means recovery is not enabled. Enabling recovery means shutting down Zeppelin would not terminate interpreter processes, and when Zeppelin is restarted, it would try to reconnect to the existing running interpreter processes. If you want to kill all the interpreter processes after terminating Zeppelin even when recovery is enabled, you can run `bin/stop-interpreter.sh`

View file

@ -57,4 +57,4 @@ Set property **zeppelin.notebook.cron.enable** to **true** in `$ZEPPELIN_HOME/co
### Run cron selectively on folders
In `$ZEPPELIN_HOME/conf/zeppelin-site.xml` make sure the property **zeppelin.notebook.cron.enable** is set to **true**, and then set property **zeppelin.notebook.cron.folders** to the desired folder as comma-separated values, e.g. `*yst*, Sys?em, System`. This property accepts wildcard and joker.
In `$ZEPPELIN_HOME/conf/zeppelin-site.xml` make sure the property **zeppelin.notebook.cron.enable** is set to **true**, and then set property **zeppelin.notebook.cron.folders** to the desired folder as comma-separated values, e.g. `/cron,/test/cron`.

View file

@ -38,7 +38,7 @@ environments is described below.
In many programming-language interpreters (e.g. Apache Spark, Python, R) the zeppelin-context is available
as a predefined variable `z` that can be used by directly invoking its methods.
The methods available on the `z` object are described below.
Other interpreters based on programming languages like spark.dep, Apache Beam, etc. also provide the
Other interpreters based on programming languages like Apache Beam, etc. also provide the
predefined variable `z`.
### Exploring Spark DataFrames
@ -51,7 +51,7 @@ z.show(df)
```
This display functionality using the `show` method is planned to be extended uniformly to
other interpreters that can access the `z` object.
other interpreters that can access the `z` object (Flink already support to show table too).
### Object Exchange
`ZeppelinContext` extends map and it's shared between the Apache Spark and Python environments.
@ -61,36 +61,103 @@ So you can put some objects using Scala (in an Apache Spark cell) and read it fr
<div data-lang="scala" markdown="1">
{% highlight scala %}
// Put object from scala
// Put/Get object from scala
%spark
val myObject = ...
val myObject = "hello'
z.put("objName", myObject)
z.get("objName")
// Exchanging data frames
myScalaDataFrame = ...
z.put("myScalaDataFrame", myScalaDataFrame)
val myPythonDataFrame = z.get("myPythonDataFrame").asInstanceOf[DataFrame]
{% endhighlight %}
</div>
<div data-lang="python" markdown="1">
{% highlight python %}
# Get object from python
# Put/Get object from python
%spark.pyspark
val myObject = "hello"
z.put("objName", myObject)
myObject = z.get("objName")
# Exchanging data frames
myPythonDataFrame = ...
z.put("myPythonDataFrame", postsDf._jdf)
myScalaDataFrame = DataFrame(z.get("myScalaDataFrame"), sqlContext)
# df is Python pandas DataFrame
# "table_name" must be table type. Currently only sql interpreter (%spark.sql or %jdbc) result is supported.
df = z.getAsDataFrame("table_name")
{% endhighlight %}
</div>
<div data-lang="R" markdown="1">
{% highlight python %}
# Get/Put object from R
%spark.r
z.put("objName", myObject)
myObject <- z.get("objName")
# df is R DataFrame
# "table_name" must be table type. Currently only sql interpreter (%spark.sql or %jdbc) result is supported.
df <- z.getAsDataFrame("table_name")
{% endhighlight %}
</div>
</div>
Currently, there're two types of data could be shared across interpreters:
* String Data
* Table Data
#### Share String Object
Here's one example we share one String object `maxAge` between Spark interpreter and jdbc interpreter.
```scala
%spark
z.put("maxAge", 83)
```
```sql
%jdbc(interpolate=true)
select * from bank where age = {maxAge}
```
<img src="{{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/zeppelin_context_share_string.png" height="100%" width="100%">
#### Share Table Object
Here's one example we share one Table object between jdbc interpreter and python interpreter.
```sql
%jdbc(saveAs=bank)
select * from bank
```
```python
%python.ipython
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")
from plotnine import ggplot, geom_histogram, aes, facet_wrap
bank = z.getAsDataFrame('bank')
(ggplot(bank, aes(x='age'))
```
<img src="{{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/zeppelin_context_share_table.png" height="100%" width="100%">
### Form Creation
`ZeppelinContext` provides functions for creating forms.
@ -100,18 +167,19 @@ In Scala and Python environments, you can create forms programmatically.
{% highlight scala %}
%spark
/* Create text input form */
z.input("formName")
z.input("input_1")
/* Create text input form with default value */
z.input("formName", "defaultValue")
z.input("input_2", "defaultValue")
/* Create select form */
z.select("formName", Seq(("option1", "option1DisplayName"),
z.select("select_1", Seq(("option1", "option1DisplayName"),
("option2", "option2DisplayName")))
/* Create select form with default value*/
z.select("formName", "option1", Seq(("option1", "option1DisplayName"),
z.select("select_2", "option1", Seq(("option1", "option1DisplayName"),
("option2", "option2DisplayName")))
{% endhighlight %}
@ -120,38 +188,44 @@ z.select("formName", "option1", Seq(("option1", "option1DisplayName"),
{% highlight python %}
%spark.pyspark
# Create text input form
z.input("formName")
z.input("input_1")
# Create text input form with default value
z.input("formName", "defaultValue")
z.input("input_2", "defaultValue")
# Create select form
z.select("formName", [("option1", "option1DisplayName"),
z.select("select_1", [("option1", "option1DisplayName"),
("option2", "option2DisplayName")])
# Create select form with default value
z.select("formName", [("option1", "option1DisplayName"),
z.select("select_2", [("option1", "option1DisplayName"),
("option2", "option2DisplayName")], "option1")
{% endhighlight %}
</div>
</div>
In sql environment, you can create form in simple template.
Patterns of the form ${ ... } are used to dynamically create additional HTML elements
for requesting user input (that replaces the corresponding pattern in the paragraph text).
Currently only [text](https://developer.mozilla.org/en-US/docs/Web/HTML/Element/input/text),
[select](https://developer.mozilla.org/en-US/docs/Web/HTML/Element/select) with
[options](https://developer.mozilla.org/en-US/docs/Web/HTML/Element/option), and
[checkbox](https://developer.mozilla.org/en-US/docs/Web/HTML/Element/input/checkbox) are supported.
Dynamic forms are described in detail here: [Dynamic Form](../usage/dynamic_form/intro.html).
In sql environment, you can create dynamic form in simple template.
```sql
%spark.sql
select * from ${table=defaultTableName} where text like '%${search}%'
```
To learn more about dynamic form, checkout [Dynamic Form](../usage/dynamic_form/intro.html).
### Interpreter-Specific Functions
Some interpreters use a subclass of `BaseZepplinContext` augmented with interpreter-specific functions.
For example functions of the dependency loader (%spark.dep) can be invoked as `z.addRepo()`, `z.load()`, etc.
Such interpreter-specific functions are described within each interpreter's documentation.
## Usage with Embedded Commands
@ -168,17 +242,6 @@ but object interpolation is only available in a small, but growing, list of inte
(marked with an asterisk in the table above).
Both these zeppelin-context features are described below.
### Dynamic Forms
Patterns of the form ${ ... } are used to dynamically create additional HTML elements
for requesting user input (that replaces the corresponding pattern in the paragraph text).
Currently only [text](https://developer.mozilla.org/en-US/docs/Web/HTML/Element/input/text),
[select](https://developer.mozilla.org/en-US/docs/Web/HTML/Element/select) with
[options](https://developer.mozilla.org/en-US/docs/Web/HTML/Element/option), and
[checkbox](https://developer.mozilla.org/en-US/docs/Web/HTML/Element/input/checkbox) are supported.
Dynamic forms are described in detail here: [Dynamic Form](../usage/dynamic_form/intro.html).
### Object Interpolation
Some interpreters can interpolate object values from `z` into the paragraph text by using the
`{variable-name}` syntax. The value of any object previously `put` into `z` can be
@ -188,18 +251,22 @@ The following example shows one use of this facility:
####In Scala cell:
```scala
%spark
z.put("minAge", 35)
```
####In later SQL cell:
```sql
%sql select * from members where age >= {minAge}
%spark.sql
select * from members where age >= {minAge}
```
The interpolation of a `{var-name}` pattern is performed only when `z` contains an object with the specified name.
But the pattern is left unchanged if the named object does not exist in `z`.
Further, all `{var-name}` patterns within the paragraph text must must be translatable for any interpolation to occur --
Further, all `{var-name}` patterns within the paragraph text must be translatable for any interpolation to occur --
translation of only some of the patterns in a paragraph text is never done.
In some situations, it is necessary to use { and } characters in a paragraph text without invoking the
@ -208,15 +275,18 @@ doubled braces {{ and }} should be used. The following example shows the use of
regular expression containing just { and } into the paragraph text.
```sql
%sql select * from members where name rlike '[aeiou]{{3}}'
%spark.sql
{% raw %}
select * from members where name rlike '[aeiou]{{3}}'
{% endraw %}
```
To summarize, patterns of the form `{var-name}` within the paragraph text will be interpolated only if a predefined
object of the specified name exists. Additionally, all such patterns within the paragraph text should also
be translatable for any interpolation to occur. Patterns of the form `{{any-text}}` are translated into `{any-text}`.
These translations are performed only when all occurrences of `{`, `}`, `{{`, and `}}` in the paragraph text conform
be translatable for any interpolation to occur. Patterns of the form {% raw %} `{{any-text}}` {% endraw %} are translated into `{any-text}`.
These translations are performed only when all occurrences of `{`, `}`, {% raw %} `{{`, and `}}`{% endraw %} in the paragraph text conform
to one of the two forms described above. Paragraph text containing `{` and/or `}` characters used in any other way
(than `{var-name}` and `{{any-text}}`) is used as-is without any changes.
(than `{var-name}` and {% raw %} `{{any-text}}` {% endraw %} ) is used as-is without any changes.
No error is flagged in any case. This behavior is identical to the implementation of a similar feature in
Jupyter's shell invocation using the `!` magic command.
@ -224,12 +294,16 @@ This feature is disabled by default, and must be explicitly turned on for each i
by setting the value of an interpreter-specific property to `true`.
Consult the _Configuration_ section of each interpreter's documentation
to find out if object interpolation is implemented, and the name of the parameter that must be set to `true` to
enable the feature. The name of the parameter used to enable this feature it is different for each interpreter.
enable the feature. The name of the parameter used to enable this feature is different for each interpreter.
For example, the SparkSQL and Shell interpreters use the parameter names `zeppelin.spark.sql.interpolation` and
`zeppelin.shell.interpolation` respectively.
At present only the SparkSQL, JDBC, and Shell interpreters support object interpolation.
### Interpreter-Specific Functions
Some interpreters use a subclass of `BaseZepplinContext` augmented with interpreter-specific functions.
Such interpreter-specific functions are described within each interpreter's documentation.

View file

@ -61,11 +61,11 @@ Notebooks REST API supports the following operations: List, Create, Get, Delete,
"message": "",
"body": [
{
"name":"Homepage",
"path":"Homepage",
"id":"2AV4WUEMK"
},
{
"name":"Zeppelin Tutorial",
"path":"Zeppelin Tutorial",
"id":"2A94M5J1Z"
}
]

View file

@ -54,8 +54,8 @@
</dependency>
<dependency>
<groupId>commons-lang</groupId>
<artifactId>commons-lang</artifactId>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
</dependency>
<dependency>

View file

@ -21,7 +21,7 @@ import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import org.apache.commons.lang.math.RandomUtils;
import org.apache.commons.lang3.RandomUtils;
import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.settings.Settings;
@ -98,12 +98,12 @@ public class ElasticsearchInterpreterTest {
.startObject()
.field("date", new Date())
.startObject("request")
.field("method", METHODS[RandomUtils.nextInt(METHODS.length)])
.field("method", METHODS[RandomUtils.nextInt(0, METHODS.length)])
.field("url", "/zeppelin/" + UUID.randomUUID().toString())
.field("headers", Arrays.asList("Accept: *.*", "Host: apache.org"))
.endObject()
.field("status", STATUS[RandomUtils.nextInt(STATUS.length)])
.field("content_length", RandomUtils.nextInt(2000))
.field("status", STATUS[RandomUtils.nextInt(0, STATUS.length)])
.field("content_length", RandomUtils.nextInt(0, 2000))
)
.get();
}
@ -115,12 +115,12 @@ public class ElasticsearchInterpreterTest {
.startObject()
.field("date", new Date())
.startObject("request")
.field("method", METHODS[RandomUtils.nextInt(METHODS.length)])
.field("method", METHODS[RandomUtils.nextInt(0, METHODS.length)])
.field("url", "/zeppelin/" + UUID.randomUUID().toString())
.field("headers", Arrays.asList("Accept: *.*", "Host: apache.org"))
.endObject()
.field("status", STATUS[RandomUtils.nextInt(STATUS.length)])
.field("content_length", RandomUtils.nextInt(2000))
.field("status", STATUS[RandomUtils.nextInt(0, STATUS.length)])
.field("content_length", RandomUtils.nextInt(0, 2000))
)
.get();
}

View file

@ -51,8 +51,8 @@
</dependency>
<dependency>
<groupId>commons-lang</groupId>
<artifactId>commons-lang</artifactId>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
</dependency>
<dependency>

View file

@ -21,7 +21,7 @@ package org.apache.zeppelin.file;
import com.google.gson.Gson;
import com.google.gson.annotations.SerializedName;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang3.StringUtils;
import java.text.SimpleDateFormat;
import java.util.ArrayList;

View file

@ -37,21 +37,16 @@
<properties>
<!--library versions-->
<interpreter.name>flink</interpreter.name>
<flink.version>1.9.0</flink.version>
<flink.version>1.10.0</flink.version>
<hadoop.version>2.6.5</hadoop.version>
<hive.version>2.3.4</hive.version>
<hiverunner.version>4.0.0</hiverunner.version>
<grpc.version>1.15.0</grpc.version>
<scala.macros.version>2.0.1</scala.macros.version>
<scala.binary.version>2.11</scala.binary.version>
<scala.version>2.11.12</scala.version>
<!--plugin versions-->
<plugin.scalamaven.version>3.2.2</plugin.scalamaven.version>
<plugin.eclipse.version>2.8</plugin.eclipse.version>
<plugin.buildhelper.version>1.7</plugin.buildhelper.version>
<plugin.scalastyle.version>0.5.0</plugin.scalastyle.version>
<flink.bin.download.url>https://archive.apache.org/dist/flink/flink-${flink.version}/flink-${flink.version}-bin-scala_${scala.binary.version}.tgz</flink.bin.download.url>
</properties>
@ -63,8 +58,8 @@
<version>${project.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.zeppelin</groupId>
<artifactId>zeppelin-interpreter</artifactId>
<groupId>io.atomix</groupId>
<artifactId>*</artifactId>
</exclusion>
<exclusion>
<groupId>com.google.guava</groupId>
@ -80,8 +75,16 @@
<scope>provided</scope>
<exclusions>
<exclusion>
<groupId>org.apache.zeppelin</groupId>
<artifactId>zeppelin-interpreter</artifactId>
<groupId>io.atomix</groupId>
<artifactId>*</artifactId>
</exclusion>
<exclusion>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</exclusion>
<exclusion>
<groupId>io.grpc</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>
@ -92,6 +95,32 @@
<version>${project.version}</version>
<classifier>tests</classifier>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>io.atomix</groupId>
<artifactId>*</artifactId>
</exclusion>
<exclusion>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</exclusion>
<exclusion>
<groupId>io.grpc</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.jline</groupId>
<artifactId>jline-terminal</artifactId>
<version>3.9.0</version>
</dependency>
<dependency>
<groupId>org.jline</groupId>
<artifactId>jline-reader</artifactId>
<version>3.9.0</version>
</dependency>
<dependency>
@ -291,13 +320,6 @@
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner_2.11</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner-blink_2.11</artifactId>
@ -311,6 +333,13 @@
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner_2.11</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>
@ -329,43 +358,6 @@
<scope>test</scope>
</dependency>
<!--<dependency>-->
<!--<groupId>com.klarna</groupId>-->
<!--<artifactId>hiverunner</artifactId>-->
<!--<version>${hiverunner.version}</version>-->
<!--<scope>test</scope>-->
<!--<exclusions>-->
<!--<exclusion>-->
<!--<groupId>org.apache.hive</groupId>-->
<!--<artifactId>hive-serde</artifactId>-->
<!--</exclusion>-->
<!--<exclusion>-->
<!--<groupId>org.apache.hive</groupId>-->
<!--<artifactId>hive-jdbc</artifactId>-->
<!--</exclusion>-->
<!--<exclusion>-->
<!--<groupId>org.apache.hive.hcatalog</groupId>-->
<!--<artifactId>hive-webhcat-java-client</artifactId>-->
<!--</exclusion>-->
<!--<exclusion>-->
<!--<groupId>org.apache.hive</groupId>-->
<!--<artifactId>hive-service</artifactId>-->
<!--</exclusion>-->
<!--<exclusion>-->
<!--<groupId>org.apache.hive</groupId>-->
<!--<artifactId>hive-contrib</artifactId>-->
<!--</exclusion>-->
<!--&lt;!&ndash;<exclusion>&ndash;&gt;-->
<!--&lt;!&ndash;<groupId>com.google.guava</groupId>&ndash;&gt;-->
<!--&lt;!&ndash;<artifactId>guava</artifactId>&ndash;&gt;-->
<!--&lt;!&ndash;</exclusion>&ndash;&gt;-->
<!--<exclusion>-->
<!--<groupId>io.netty</groupId>-->
<!--<artifactId>netty</artifactId>-->
<!--</exclusion>-->
<!--</exclusions>-->
<!--</dependency>-->
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-metastore</artifactId>
@ -494,6 +486,10 @@
<groupId>io.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
<exclusion>
<groupId>javax.jms</groupId>
<artifactId>jms</artifactId>
</exclusion>
</exclusions>
</dependency>
@ -537,6 +533,19 @@
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.klarna</groupId>
<artifactId>hiverunner</artifactId>
<version>4.0.0</version>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
<build>
@ -544,7 +553,6 @@
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
<version>3.2.2</version>
<executions>
<execution>
<id>eclipse-add-source</id>
@ -593,12 +601,13 @@
</configuration>
</plugin>
<!-- include flink by default -->
<plugin>
<groupId>com.googlecode.maven-download-plugin</groupId>
<artifactId>download-maven-plugin</artifactId>
<executions>
<execution>
<id>download-pyflink-files</id>
<id>download-flink-files</id>
<phase>validate</phase>
<goals>
<goal>wget</goal>
@ -606,8 +615,8 @@
<configuration>
<readTimeOut>60000</readTimeOut>
<retries>5</retries>
<unpack>true</unpack>
<url>${flink.bin.download.url}</url>
<unpack>true</unpack>
<outputDirectory>${project.build.directory}</outputDirectory>
</configuration>
</execution>
@ -618,20 +627,26 @@
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<skip>false</skip>
<forkMode>always</forkMode>
<forkCount>1</forkCount>
<reuseForks>false</reuseForks>
<!-- set sun.zip.disableMemoryMapping=true because of
https://blogs.oracle.com/poonam/crashes-in-zipgetentry
https://bugs.java.com/bugdatabase/view_bug.do?bug_id=8191484 -->
<argLine>-Xmx4096m -XX:MaxMetaspaceSize=512m -Dsun.zip.disableMemoryMapping=true</argLine>
<environmentVariables>
<PYTHONPATH>${project.build.directory}/flink-${flink.version}/opt/python/py4j-0.10.8.1-src.zip:${project.build.directory}/flink-${flink.version}/opt/python/pyflink.zip</PYTHONPATH>
<FLINK_HOME>${project.build.directory}/flink-${flink.version}</FLINK_HOME>
<FLINK_CONF_DIR>${project.build.directory}/test-classes</FLINK_CONF_DIR>
</environmentVariables>
</configuration>
</plugin>
<!-- Eclipse Integration -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-eclipse-plugin</artifactId>
<version>${plugin.eclipse.version}</version>
<configuration>
<downloadSources>true</downloadSources>
<projectnatures>
@ -645,10 +660,6 @@
<classpathContainer>org.scala-ide.sdt.launching.SCALA_CONTAINER</classpathContainer>
<classpathContainer>org.eclipse.jdt.launching.JRE_CONTAINER</classpathContainer>
</classpathContainers>
<!-- excludes>
<exclude>org.scala-lang:scala-library</exclude>
<exclude>org.scala-lang:scala-compiler</exclude>
</excludes -->
<sourceIncludes>
<sourceInclude>**/*.scala</sourceInclude>
<sourceInclude>**/*.java</sourceInclude>
@ -660,7 +671,6 @@
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>build-helper-maven-plugin</artifactId>
<version>${plugin.buildhelper.version}</version>
<executions>
<!-- Add src/main/scala to eclipse build path -->
<execution>
@ -691,16 +701,6 @@
</executions>
</plugin>
<plugin>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<forkMode>always</forkMode>
<environmentVariables>
<FLINK_CONF_DIR>${project.build.directory}/test-classes</FLINK_CONF_DIR>
</environmentVariables>
</configuration>
</plugin>
<plugin>
<artifactId>maven-enforcer-plugin</artifactId>
</plugin>
@ -713,7 +713,6 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>${plugin.shade.version}</version>
<configuration>
<filters>
<filter>
@ -768,7 +767,7 @@
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-checkstyle-plugin</artifactId>
<configuration>
<skip>false</skip>
<skip>true</skip>
</configuration>
</plugin>

View file

@ -17,17 +17,14 @@
package org.apache.zeppelin.flink;
import com.google.common.collect.Lists;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.config.ExecutionConfigOptions;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterException;
import org.apache.zeppelin.scheduler.Scheduler;
import org.apache.zeppelin.scheduler.SchedulerFactory;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.Properties;
public class FlinkBatchSqlInterpreter extends FlinkSqlInterrpeter {
@ -38,11 +35,16 @@ public class FlinkBatchSqlInterpreter extends FlinkSqlInterrpeter {
super(properties);
}
@Override
protected boolean isBatch() {
return true;
}
@Override
public void open() throws InterpreterException {
super.open();
this.tbenv = flinkInterpreter.getBatchTableEnvironment();
this.tbenv = flinkInterpreter.getJavaBatchTableEnvironment("blink");
this.tbenv_2 = flinkInterpreter.getJavaBatchTableEnvironment("flink");
this.z = flinkInterpreter.getZeppelinContext();
}
@ -52,24 +54,13 @@ public class FlinkBatchSqlInterpreter extends FlinkSqlInterrpeter {
}
@Override
public void callSelect(String sql, InterpreterContext context) throws IOException {
public void callInnerSelect(String sql, InterpreterContext context) throws IOException {
Table table = this.tbenv.sqlQuery(sql);
z.setCurrentSql(sql);
String result = z.showData(table);
context.out.write(result);
}
protected void checkLocalProperties(Map<String, String> localProperties)
throws InterpreterException {
List<String> validLocalProperties = Lists.newArrayList("parallelism");
for (String key : localProperties.keySet()) {
if (!validLocalProperties.contains(key)) {
throw new InterpreterException("Invalid property: " + key + ", Only the following " +
"properties are valid: " + validLocalProperties);
}
}
}
@Override
public void cancel(InterpreterContext context) throws InterpreterException {
flinkInterpreter.getJobManager().cancelJob(context);
@ -82,14 +73,14 @@ public class FlinkBatchSqlInterpreter extends FlinkSqlInterrpeter {
@Override
public int getProgress(InterpreterContext context) throws InterpreterException {
return 0;
return flinkInterpreter.getProgress(context);
}
@Override
public Scheduler getScheduler() {
int maxConcurrency = Integer.parseInt(
getProperty("zeppelin.flink.concurrentBatchSql.max", "10"));
int maxConcurrency = Integer.parseInt(properties.getProperty(
"zeppelin.flink.concurrentBatchSql.max", "10"));
return SchedulerFactory.singleton().createOrGetParallelScheduler(
FlinkBatchSqlInterpreter.class.getName() + this.hashCode(), maxConcurrency);
FlinkBatchSqlInterpreter.class.getName(), maxConcurrency);
}
}

View file

@ -33,6 +33,9 @@ import org.slf4j.LoggerFactory;
import java.util.List;
import java.util.Properties;
/**
* Interpreter for flink scala. It delegates all the function to FlinkScalaInterpreter.
*/
public class FlinkInterpreter extends Interpreter {
private static final Logger LOGGER = LoggerFactory.getLogger(FlinkInterpreter.class);
@ -63,7 +66,16 @@ public class FlinkInterpreter extends Interpreter {
this.z.setInterpreterContext(context);
this.z.setGui(context.getGui());
this.z.setNoteGui(context.getNoteGui());
return innerIntp.interpret(st, context);
// set ClassLoader of current Thread to be the ClassLoader of Flink scala-shell,
// otherwise codegen will fail to find classes defined in scala-shell
ClassLoader originClassLoader = Thread.currentThread().getContextClassLoader();
try {
Thread.currentThread().setContextClassLoader(getFlinkScalaShellLoader());
return innerIntp.interpret(st, context);
} finally {
Thread.currentThread().setContextClassLoader(originClassLoader);
}
}
@Override
@ -98,11 +110,19 @@ public class FlinkInterpreter extends Interpreter {
}
StreamTableEnvironment getStreamTableEnvironment() {
return this.innerIntp.getStreamTableEnvionment();
return this.innerIntp.getStreamTableEnvironment("blink");
}
org.apache.flink.table.api.TableEnvironment getJavaBatchTableEnvironment(String planner) {
return this.innerIntp.getJavaBatchTableEnvironment(planner);
}
TableEnvironment getJavaStreamTableEnvironment(String planner) {
return this.innerIntp.getJavaStreamTableEnvironment(planner);
}
TableEnvironment getBatchTableEnvironment() {
return this.innerIntp.getBatchTableEnvironment();
return this.innerIntp.getBatchTableEnvironment("blink");
}
JobManager getJobManager() {
@ -113,6 +133,17 @@ public class FlinkInterpreter extends Interpreter {
return this.innerIntp.getDefaultParallelism();
}
int getDefaultSqlParallelism() {
return this.innerIntp.getDefaultSqlParallelism();
}
/**
* Workaround for issue of FLINK-16936.
*/
public void createPlannerAgain() {
this.innerIntp.createPlannerAgain();
}
public ClassLoader getFlinkScalaShellLoader() {
return innerIntp.getFlinkScalaShellLoader();
}
@ -128,5 +159,4 @@ public class FlinkInterpreter extends Interpreter {
public FlinkScalaInterpreter getInnerIntp() {
return this.innerIntp;
}
}

View file

@ -18,62 +18,140 @@
package org.apache.zeppelin.flink;
import com.google.common.collect.Lists;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.apache.flink.api.common.Plan;
import org.apache.flink.client.program.ClusterClient;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.core.fs.Path;
import org.apache.flink.optimizer.DataStatistics;
import org.apache.flink.optimizer.Optimizer;
import org.apache.flink.optimizer.costs.DefaultCostEstimator;
import org.apache.flink.optimizer.plan.FlinkPlan;
import org.apache.flink.runtime.jobgraph.JobGraph;
import org.apache.flink.runtime.jobgraph.SavepointRestoreSettings;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.api.common.JobExecutionResult;
import org.apache.flink.configuration.ConfigOption;
import org.apache.flink.core.execution.JobClient;
import org.apache.flink.core.execution.JobListener;
import org.apache.flink.python.PythonConfig;
import org.apache.flink.python.PythonOptions;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.TableEnvironment;
import org.apache.flink.table.api.TableException;
import org.apache.flink.table.api.TableSchema;
import org.apache.flink.table.api.scala.StreamTableEnvironment;
import org.apache.flink.table.delegation.Executor;
import org.apache.flink.table.delegation.ExecutorFactory;
import org.apache.flink.table.factories.ComponentFactoryService;
import org.apache.flink.table.planner.delegation.ExecutorBase;
import org.apache.flink.table.api.config.ExecutionConfigOptions;
import org.apache.flink.table.api.config.OptimizerConfigOptions;
import org.apache.zeppelin.flink.sql.SqlCommandParser;
import org.apache.zeppelin.flink.sql.SqlInfo;
import org.apache.zeppelin.flink.sql.SqlLists;
import org.apache.zeppelin.flink.sql.SqlCommandParser.SqlCommand;
import org.apache.zeppelin.interpreter.Interpreter;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterException;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.util.SqlSplitter;
import org.jline.utils.AttributedString;
import org.jline.utils.AttributedStringBuilder;
import org.jline.utils.AttributedStyle;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.annotation.Nullable;
import java.io.FileInputStream;
import java.io.IOException;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.lang.reflect.Field;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Properties;
import java.util.regex.Matcher;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.stream.Collectors;
public abstract class FlinkSqlInterrpeter extends Interpreter {
private static final Logger LOGGER = LoggerFactory.getLogger(FlinkSqlInterrpeter.class);
protected static final Logger LOGGER = LoggerFactory.getLogger(FlinkSqlInterrpeter.class);
public static final AttributedString MESSAGE_HELP = new AttributedStringBuilder()
.append("The following commands are available:\n\n")
.append(formatCommand(SqlCommand.CREATE_TABLE, "Create table under current catalog and database."))
.append(formatCommand(SqlCommand.DROP_TABLE, "Drop table with optional catalog and database. Syntax: 'DROP TABLE [IF EXISTS] <name>;'"))
.append(formatCommand(SqlCommand.CREATE_VIEW, "Creates a virtual table from a SQL query. Syntax: 'CREATE VIEW <name> AS <query>;'"))
.append(formatCommand(SqlCommand.DESCRIBE, "Describes the schema of a table with the given name."))
.append(formatCommand(SqlCommand.DROP_VIEW, "Deletes a previously created virtual table. Syntax: 'DROP VIEW <name>;'"))
.append(formatCommand(SqlCommand.EXPLAIN, "Describes the execution plan of a query or table with the given name."))
.append(formatCommand(SqlCommand.HELP, "Prints the available commands."))
.append(formatCommand(SqlCommand.INSERT_INTO, "Inserts the results of a SQL SELECT query into a declared table sink."))
.append(formatCommand(SqlCommand.INSERT_OVERWRITE, "Inserts the results of a SQL SELECT query into a declared table sink and overwrite existing data."))
.append(formatCommand(SqlCommand.SELECT, "Executes a SQL SELECT query on the Flink cluster."))
.append(formatCommand(SqlCommand.SET, "Sets a session configuration property. Syntax: 'SET <key>=<value>;'. Use 'SET;' for listing all properties."))
.append(formatCommand(SqlCommand.SHOW_FUNCTIONS, "Shows all user-defined and built-in functions."))
.append(formatCommand(SqlCommand.SHOW_TABLES, "Shows all registered tables."))
.append(formatCommand(SqlCommand.SOURCE, "Reads a SQL SELECT query from a file and executes it on the Flink cluster."))
.append(formatCommand(SqlCommand.USE_CATALOG, "Sets the current catalog. The current database is set to the catalog's default one. Experimental! Syntax: 'USE CATALOG <name>;'"))
.append(formatCommand(SqlCommand.USE, "Sets the current default database. Experimental! Syntax: 'USE <name>;'"))
.style(AttributedStyle.DEFAULT.underline())
.append("\nHint")
.style(AttributedStyle.DEFAULT)
.append(": Make sure that a statement ends with ';' for finalizing (multi-line) statements.")
.toAttributedString();
protected FlinkInterpreter flinkInterpreter;
protected TableEnvironment tbenv;
protected TableEnvironment tbenv_2;
private SqlSplitter sqlSplitter;
private int defaultSqlParallelism;
private ReentrantReadWriteLock.WriteLock lock = new ReentrantReadWriteLock().writeLock();
// all the available sql config options. see
// https://ci.apache.org/projects/flink/flink-docs-release-1.10/dev/table/config.html
private Map<String, ConfigOption> tableConfigOptions;
// represent the current paragraph's configOptions
private Map<String, String> currentConfigOptions = new HashMap<>();
public FlinkSqlInterrpeter(Properties properties) {
super(properties);
}
protected abstract boolean isBatch();
@Override
public void open() throws InterpreterException {
flinkInterpreter =
getInterpreterInTheSameSessionByClassName(FlinkInterpreter.class);
this.sqlSplitter = new SqlSplitter();
JobListener jobListener = new JobListener() {
@Override
public void onJobSubmitted(@Nullable JobClient jobClient, @Nullable Throwable throwable) {
if (lock.isHeldByCurrentThread()) {
lock.unlock();
LOGGER.info("UnLock JobSubmitLock");
}
}
@Override
public void onJobExecuted(@Nullable JobExecutionResult jobExecutionResult, @Nullable Throwable throwable) {
}
};
flinkInterpreter.getExecutionEnvironment().getJavaEnv().registerJobListener(jobListener);
flinkInterpreter.getStreamExecutionEnvironment().getJavaEnv().registerJobListener(jobListener);
this.defaultSqlParallelism = flinkInterpreter.getDefaultSqlParallelism();
this.tableConfigOptions = extractTableConfigOptions();
}
private Map<String, ConfigOption> extractTableConfigOptions() {
Map<String, ConfigOption> configOptions = new HashMap<>();
configOptions.putAll(extractConfigOptions(ExecutionConfigOptions.class));
configOptions.putAll(extractConfigOptions(OptimizerConfigOptions.class));
configOptions.putAll(extractConfigOptions(PythonOptions.class));
return configOptions;
}
private Map<String, ConfigOption> extractConfigOptions(Class clazz) {
Map<String, ConfigOption> configOptions = new HashMap();
Field[] fields = clazz.getDeclaredFields();
for (Field field : fields) {
if (field.getType().isAssignableFrom(ConfigOption.class)) {
try {
ConfigOption configOption = (ConfigOption) field.get(ConfigOption.class);
configOptions.put(configOption.key(), configOption);
} catch (Throwable e) {
LOGGER.warn("Fail to get ConfigOption", e);
}
}
}
return configOptions;
}
@Override
@ -84,8 +162,6 @@ public abstract class FlinkSqlInterrpeter extends Interpreter {
flinkInterpreter.getZeppelinContext().setNoteGui(context.getNoteGui());
flinkInterpreter.getZeppelinContext().setGui(context.getGui());
checkLocalProperties(context.getLocalProperties());
// set ClassLoader of current Thread to be the ClassLoader of Flink scala-shell,
// otherwise codegen will fail to find classes defined in scala-shell
ClassLoader originClassLoader = Thread.currentThread().getContextClassLoader();
@ -97,60 +173,70 @@ public abstract class FlinkSqlInterrpeter extends Interpreter {
}
}
protected abstract void checkLocalProperties(Map<String, String> localProperties)
throws InterpreterException;
private Optional<SqlCommandParser.SqlCommandCall> parse(String stmt) {
// normalize
stmt = stmt.trim();
// remove ';' at the end
if (stmt.endsWith(";")) {
stmt = stmt.substring(0, stmt.length() - 1).trim();
}
// parse
for (SqlCommandParser.SqlCommand cmd : SqlCommandParser.SqlCommand.values()) {
final Matcher matcher = cmd.pattern.matcher(stmt);
if (matcher.matches()) {
final String[] groups = new String[matcher.groupCount()];
for (int i = 0; i < groups.length; i++) {
groups[i] = matcher.group(i + 1);
}
return cmd.operandConverter.apply(groups)
.map((operands) -> new SqlCommandParser.SqlCommandCall(cmd, operands));
}
}
return Optional.empty();
}
private InterpreterResult runSqlList(String sql, InterpreterContext context) {
List<SqlInfo> sqlLists = SqlLists.getSQLList(sql);
List<SqlCommandParser.SqlCommandCall> sqlCommands = new ArrayList<>();
for (SqlInfo sqlInfo : sqlLists) {
Optional<SqlCommandParser.SqlCommandCall> sqlCommand = parse(sqlInfo.getSqlContent());
private InterpreterResult runSqlList(String st, InterpreterContext context) {
currentConfigOptions.clear();
List<String> sqls = sqlSplitter.splitSql(st);
for (String sql : sqls) {
Optional<SqlCommandParser.SqlCommandCall> sqlCommand = SqlCommandParser.parse(sql);
if (!sqlCommand.isPresent()) {
return new InterpreterResult(InterpreterResult.Code.ERROR, "Invalid Sql statement: "
+ sqlInfo.getSqlContent());
try {
context.out.write("%text Invalid Sql statement: " + sql + "\n");
context.out.write(MESSAGE_HELP.toString());
} catch (IOException e) {
return new InterpreterResult(InterpreterResult.Code.ERROR, e.toString());
}
return new InterpreterResult(InterpreterResult.Code.ERROR);
}
sqlCommands.add(sqlCommand.get());
}
for (SqlCommandParser.SqlCommandCall sqlCommand : sqlCommands) {
try {
callCommand(sqlCommand, context);
callCommand(sqlCommand.get(), context);
context.out.flush();
} catch (Throwable e) {
LOGGER.error("Fail to run sql:" + sqlCommand.operands[0], e);
return new InterpreterResult(InterpreterResult.Code.ERROR, "Fail to run sql command: " +
sqlCommand.operands[0] + "\n" + ExceptionUtils.getStackTrace(e));
} catch (Throwable e) {
LOGGER.error("Fail to run sql:" + sql, e);
try {
context.out.write("%text Fail to run sql command: " +
sql + "\n" + ExceptionUtils.getStackTrace(e) + "\n");
} catch (IOException ex) {
LOGGER.warn("Unexpected exception:", ex);
return new InterpreterResult(InterpreterResult.Code.ERROR,
ExceptionUtils.getStackTrace(e));
}
return new InterpreterResult(InterpreterResult.Code.ERROR);
}
}
boolean runAsOne = Boolean.parseBoolean(context.getStringLocalProperty("runAsOne", "false"));
if (runAsOne) {
try {
lock.lock();
if (context.getLocalProperties().containsKey("parallelism")) {
this.tbenv.getConfig().getConfiguration()
.set(ExecutionConfigOptions.TABLE_EXEC_RESOURCE_DEFAULT_PARALLELISM,
Integer.parseInt(context.getLocalProperties().get("parallelism")));
}
this.tbenv.execute(st);
context.out.write("Insertion successfully.\n");
} catch (Exception e) {
LOGGER.error("Fail to execute sql as one job", e);
return new InterpreterResult(InterpreterResult.Code.ERROR, ExceptionUtils.getStackTrace(e));
} finally {
if (lock.isHeldByCurrentThread()) {
lock.unlock();
}
this.tbenv.getConfig().getConfiguration()
.set(ExecutionConfigOptions.TABLE_EXEC_RESOURCE_DEFAULT_PARALLELISM,
defaultSqlParallelism);
}
}
return new InterpreterResult(InterpreterResult.Code.SUCCESS);
}
private void callCommand(SqlCommandParser.SqlCommandCall cmdCall,
InterpreterContext context) throws Exception {
switch (cmdCall.command) {
case HELP:
callHelp(context);
break;
case SHOW_CATALOGS:
callShowCatalogs(context);
break;
@ -160,10 +246,19 @@ public abstract class FlinkSqlInterrpeter extends Interpreter {
case SHOW_TABLES:
callShowTables(context);
break;
case SOURCE:
callSource(cmdCall.operands[0], context);
break;
case SHOW_FUNCTIONS:
callShowFunctions(context);
break;
case USE_DATABASE:
case SHOW_MODULES:
callShowModules(context);
break;
case USE_CATALOG:
callUseCatalog(cmdCall.operands[0], context);
break;
case USE:
callUseDatabase(cmdCall.operands[0], context);
break;
case DESCRIBE:
@ -175,14 +270,142 @@ public abstract class FlinkSqlInterrpeter extends Interpreter {
case SELECT:
callSelect(cmdCall.operands[0], context);
break;
case SET:
callSet(cmdCall.operands[0], cmdCall.operands[1], context);
break;
case INSERT_INTO:
case INSERT_OVERWRITE:
callInsertInto(cmdCall.operands[0], context);
break;
case CREATE_TABLE:
callCreateTable(cmdCall.operands[0], context);
break;
case DROP_TABLE:
callDropTable(cmdCall.operands[0], context);
break;
case CREATE_VIEW:
callCreateView(cmdCall.operands[0], cmdCall.operands[1], context);
break;
case DROP_VIEW:
callDropView(cmdCall.operands[0], context);
break;
case CREATE_DATABASE:
callCreateDatabase(cmdCall.operands[0], context);
break;
case DROP_DATABASE:
callDropDatabase(cmdCall.operands[0], context);
break;
case ALTER_DATABASE:
callAlterDatabase(cmdCall.operands[0], context);
break;
case ALTER_TABLE:
callAlterTable(cmdCall.operands[0], context);
break;
default:
throw new Exception("Unsupported command: " + cmdCall.command);
}
}
private void callAlterTable(String sql, InterpreterContext context) throws IOException {
try {
lock.lock();
this.tbenv.sqlUpdate(sql);
} finally {
if (lock.isHeldByCurrentThread()) {
lock.unlock();
}
}
context.out.write("Table has been modified.\n");
}
private void callAlterDatabase(String sql, InterpreterContext context) throws IOException {
try {
lock.lock();
this.tbenv.sqlUpdate(sql);
} finally {
if (lock.isHeldByCurrentThread()) {
lock.unlock();
}
}
context.out.write("Database has been modified.\n");
}
private void callDropDatabase(String sql, InterpreterContext context) throws IOException {
try {
this.tbenv.sqlUpdate(sql);
} finally {
if (lock.isHeldByCurrentThread()) {
lock.unlock();
}
}
context.out.write("Database has been dropped.\n");
}
private void callCreateDatabase(String sql, InterpreterContext context) throws IOException {
try {
this.tbenv.sqlUpdate(sql);
} finally {
if (lock.isHeldByCurrentThread()) {
lock.unlock();
}
}
context.out.write("Database has been created.\n");
}
private void callDropView(String view, InterpreterContext context) throws IOException {
this.tbenv.dropTemporaryView(view);
context.out.write("View has been dropped.\n");
}
private void callCreateView(String name, String query, InterpreterContext context) throws IOException {
try {
lock.lock();
this.tbenv.createTemporaryView(name, tbenv.sqlQuery(query));
} finally {
if (lock.isHeldByCurrentThread()) {
lock.unlock();
}
}
context.out.write("View has been created.\n");
}
private void callCreateTable(String sql, InterpreterContext context) throws IOException {
try {
lock.lock();
this.tbenv.sqlUpdate(sql);
} finally {
if (lock.isHeldByCurrentThread()) {
lock.unlock();
}
}
context.out.write("Table has been created.\n");
}
private void callDropTable(String sql, InterpreterContext context) throws IOException {
try {
lock.lock();
this.tbenv.sqlUpdate(sql);
} finally {
if (lock.isHeldByCurrentThread()) {
lock.unlock();
}
}
context.out.write("Table has been dropped.\n");
}
private void callUseCatalog(String catalog, InterpreterContext context) {
this.tbenv.useCatalog(catalog);
}
private void callShowModules(InterpreterContext context) throws IOException {
String[] modules = this.tbenv.listModules();
context.out.write("%table module\n" + StringUtils.join(modules, "\n") + "\n");
}
private void callHelp(InterpreterContext context) throws IOException {
context.out.write(MESSAGE_HELP.toString());
}
private void callShowCatalogs(InterpreterContext context) throws IOException {
String[] catalogs = this.tbenv.listCatalogs();
context.out.write("%table catalog\n" + StringUtils.join(catalogs, "\n") + "\n");
@ -195,11 +418,18 @@ public abstract class FlinkSqlInterrpeter extends Interpreter {
}
private void callShowTables(InterpreterContext context) throws IOException {
String[] tables = this.tbenv.listTables();
List<String> tables =
Lists.newArrayList(this.tbenv.listTables()).stream()
.filter(tbl -> !tbl.startsWith("UnnamedTable")).collect(Collectors.toList());
context.out.write(
"%table table\n" + StringUtils.join(tables, "\n") + "\n");
}
private void callSource(String sqlFile, InterpreterContext context) throws IOException {
String sql = IOUtils.toString(new FileInputStream(sqlFile));
runSqlList(sql, context);
}
private void callShowFunctions(InterpreterContext context) throws IOException {
String[] functions = this.tbenv.listUserDefinedFunctions();
context.out.write(
@ -216,85 +446,121 @@ public abstract class FlinkSqlInterrpeter extends Interpreter {
StringBuilder builder = new StringBuilder();
builder.append("Column\tType\n");
for (int i = 0; i < schema.getFieldCount(); ++i) {
builder.append(schema.getFieldName(i) + "\t" + schema.getFieldDataType(i) + "\n");
builder.append(schema.getFieldName(i).get() + "\t" + schema.getFieldDataType(i).get() + "\n");
}
context.out.write(builder.toString());
context.out.write("%table\n" + builder.toString());
}
private void callExplain(String sql, InterpreterContext context) throws IOException {
Table table = this.tbenv.sqlQuery(sql);
context.out.write(this.tbenv.explain(table) + "\n");
try {
lock.lock();
Table table = this.tbenv.sqlQuery(sql);
context.out.write(this.tbenv.explain(table) + "\n");
} finally {
if (lock.isHeldByCurrentThread()) {
lock.unlock();
}
}
}
public abstract void callSelect(String sql, InterpreterContext context) throws IOException;
public void callSelect(String sql, InterpreterContext context) throws IOException {
try {
lock.lock();
// set parallelism from paragraph local property
if (context.getLocalProperties().containsKey("parallelism")) {
this.tbenv.getConfig().getConfiguration()
.set(ExecutionConfigOptions.TABLE_EXEC_RESOURCE_DEFAULT_PARALLELISM,
Integer.parseInt(context.getLocalProperties().get("parallelism")));
}
// set table config from set statement until now.
for (Map.Entry<String, String> entry : currentConfigOptions.entrySet()) {
this.tbenv.getConfig().getConfiguration().setString(entry.getKey(), entry.getValue());
}
callInnerSelect(sql, context);
} finally {
if (lock.isHeldByCurrentThread()) {
lock.unlock();
}
// reset parallelism
this.tbenv.getConfig().getConfiguration()
.set(ExecutionConfigOptions.TABLE_EXEC_RESOURCE_DEFAULT_PARALLELISM,
defaultSqlParallelism);
// reset table config
for (ConfigOption configOption: tableConfigOptions.values()) {
// some may has no default value, e.g. ExecutionConfigOptions#TABLE_EXEC_DISABLED_OPERATORS
if (configOption.defaultValue() != null) {
this.tbenv.getConfig().getConfiguration().set(configOption, configOption.defaultValue());
}
}
this.tbenv.getConfig().getConfiguration().addAll(flinkInterpreter.getFlinkConfiguration());
}
}
public abstract void callInnerSelect(String sql, InterpreterContext context) throws IOException;
public void callSet(String key, String value, InterpreterContext context) throws IOException {
if (!tableConfigOptions.containsKey(key)) {
throw new IOException(key + " is not a valid table/sql config, please check link: " +
"https://ci.apache.org/projects/flink/flink-docs-release-1.10/dev/table/config.html");
}
currentConfigOptions.put(key, value);
}
private void callInsertInto(String sql,
InterpreterContext context) throws IOException {
if (!isBatch()) {
context.getLocalProperties().put("flink.streaming.insert_into", "true");
}
try {
lock.lock();
if (context.getLocalProperties().containsKey("parallelism")) {
this.tbenv.getConfig().getConfiguration()
.set(ExecutionConfigOptions.TABLE_EXEC_RESOURCE_DEFAULT_PARALLELISM,
Integer.parseInt(context.getLocalProperties().get("parallelism")));
}
this.tbenv.sqlUpdate(sql);
// set table config from set statement until now.
for (Map.Entry<String, String> entry : currentConfigOptions.entrySet()) {
this.tbenv.getConfig().getConfiguration().setString(entry.getKey(), entry.getValue());
}
JobGraph jobGraph = createJobGraph(sql);
jobGraph.addJar(new Path(flinkInterpreter.getInnerIntp().getFlinkILoop()
.writeFilesToDisk().getAbsoluteFile().toURI()));
SqlJobRunner jobRunner =
new SqlJobRunner(flinkInterpreter.getInnerIntp().getCluster(), jobGraph, sql,
flinkInterpreter.getFlinkScalaShellLoader());
jobRunner.run();
context.out.write("Insert Succeeded.\n");
this.tbenv.sqlUpdate(sql);
boolean runAsOne = Boolean.parseBoolean(context.getStringLocalProperty("runAsOne", "false"));
if (!runAsOne) {
this.tbenv.execute(sql);
context.out.write("Insertion successfully.\n");
}
} catch (Exception e) {
throw new IOException(e);
} finally {
if (lock.isHeldByCurrentThread()) {
lock.unlock();
}
// reset parallelism
this.tbenv.getConfig().getConfiguration()
.set(ExecutionConfigOptions.TABLE_EXEC_RESOURCE_DEFAULT_PARALLELISM,
defaultSqlParallelism);
// reset table config
for (ConfigOption configOption: tableConfigOptions.values()) {
// some may has no default value, e.g. ExecutionConfigOptions#TABLE_EXEC_DISABLED_OPERATORS
if (configOption.defaultValue() != null) {
this.tbenv.getConfig().getConfiguration().set(configOption, configOption.defaultValue());
}
}
this.tbenv.getConfig().getConfiguration().addAll(flinkInterpreter.getFlinkConfiguration());
}
}
private FlinkPlan createPlan(String name, Configuration flinkConfig) {
if (this.tbenv instanceof StreamTableEnvironment) {
if (flinkInterpreter.getInnerIntp().getPlanner() == "blink") {
Executor executor = lookupExecutor(
flinkInterpreter.getInnerIntp().getStEnvSetting().toExecutorProperties(),
flinkInterpreter.getStreamExecutionEnvironment().getJavaEnv());
// special case for Blink planner to apply batch optimizations
// note: it also modifies the ExecutionConfig!
if (executor instanceof ExecutorBase) {
return ((ExecutorBase) executor).generateStreamGraph(name);
}
}
return flinkInterpreter.getStreamExecutionEnvironment().getStreamGraph();
} else {
final int parallelism = flinkInterpreter.getExecutionEnvironment().getParallelism();
final Plan unoptimizedPlan =
flinkInterpreter.getExecutionEnvironment().createProgramPlan(name);
unoptimizedPlan.setJobName(name);
final Optimizer compiler =
new Optimizer(new DataStatistics(), new DefaultCostEstimator(), flinkConfig);
return ClusterClient.getOptimizedPlan(compiler, unoptimizedPlan, parallelism);
}
private static AttributedString formatCommand(SqlCommand cmd, String description) {
return new AttributedStringBuilder()
.style(AttributedStyle.DEFAULT.bold())
.append(cmd.toString())
.append("\t\t")
.style(AttributedStyle.DEFAULT)
.append(description)
.append('\n')
.toAttributedString();
}
public JobGraph createJobGraph(String name) {
final FlinkPlan plan = createPlan(name, flinkInterpreter.getFlinkConfiguration());
return ClusterClient.getJobGraph(
flinkInterpreter.getFlinkConfiguration(),
plan,
new ArrayList<>(),
new ArrayList<>(),
SavepointRestoreSettings.none());
}
private static Executor lookupExecutor(
Map<String, String> executorProperties,
StreamExecutionEnvironment executionEnvironment) {
try {
ExecutorFactory executorFactory = ComponentFactoryService.find(ExecutorFactory.class,
executorProperties);
Method createMethod = executorFactory.getClass()
.getMethod("create", Map.class, StreamExecutionEnvironment.class);
return (Executor) createMethod.invoke(
executorFactory,
executorProperties,
executionEnvironment);
} catch (Exception e) {
throw new TableException(
"Could not instantiate the executor. Make sure a planner module is on the classpath",
e);
}
}
}

View file

@ -18,9 +18,11 @@
package org.apache.zeppelin.flink;
import org.apache.zeppelin.flink.sql.RetractStreamSqlJob;
import org.apache.commons.lang3.StringUtils;
import org.apache.flink.table.api.config.ExecutionConfigOptions;
import org.apache.zeppelin.flink.sql.UpdateStreamSqlJob;
import org.apache.zeppelin.flink.sql.SingleRowStreamSqlJob;
import org.apache.zeppelin.flink.sql.TimeSeriesStreamSqlJob;
import org.apache.zeppelin.flink.sql.AppendStreamSqlJob;
import org.apache.zeppelin.interpreter.Interpreter;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterException;
@ -28,7 +30,6 @@ import org.apache.zeppelin.scheduler.Scheduler;
import org.apache.zeppelin.scheduler.SchedulerFactory;
import java.io.IOException;
import java.util.Map;
import java.util.Properties;
public class FlinkStreamSqlInterpreter extends FlinkSqlInterrpeter {
@ -37,12 +38,16 @@ public class FlinkStreamSqlInterpreter extends FlinkSqlInterrpeter {
super(properties);
}
@Override
protected boolean isBatch() {
return false;
}
@Override
public void open() throws InterpreterException {
this.flinkInterpreter =
getInterpreterInTheSameSessionByClassName(FlinkInterpreter.class);
this.tbenv = flinkInterpreter.getStreamTableEnvironment();
super.open();
this.tbenv = flinkInterpreter.getJavaStreamTableEnvironment("blink");
this.tbenv_2 = flinkInterpreter.getJavaStreamTableEnvironment("flink");
}
@Override
@ -51,13 +56,20 @@ public class FlinkStreamSqlInterpreter extends FlinkSqlInterrpeter {
}
@Override
protected void checkLocalProperties(Map<String, String> localProperties)
throws InterpreterException {
public void callInnerSelect(String sql, InterpreterContext context) throws IOException {
String savepointDir = context.getLocalProperties().get("savepointDir");
if (!StringUtils.isBlank(savepointDir)) {
Object savepointPath = flinkInterpreter.getZeppelinContext()
.angular(context.getParagraphId() + "_savepointpath", context.getNoteId(), null);
if (savepointPath == null) {
LOGGER.info("savepointPath is null because it is the first run");
} else {
LOGGER.info("set savepointPath to: " + savepointPath.toString());
this.flinkInterpreter.getFlinkConfiguration()
.setString("execution.savepoint.path", savepointPath.toString());
}
}
}
@Override
public void callSelect(String sql, InterpreterContext context) throws IOException {
String streamType = context.getLocalProperties().get("type");
if (streamType == null) {
throw new IOException("type must be specified for stream sql");
@ -65,19 +77,25 @@ public class FlinkStreamSqlInterpreter extends FlinkSqlInterrpeter {
if (streamType.equalsIgnoreCase("single")) {
SingleRowStreamSqlJob streamJob = new SingleRowStreamSqlJob(
flinkInterpreter.getStreamExecutionEnvironment(),
flinkInterpreter.getStreamTableEnvironment(), context,
tbenv,
flinkInterpreter.getJobManager(),
context,
flinkInterpreter.getDefaultParallelism());
streamJob.run(sql);
} else if (streamType.equalsIgnoreCase("ts")) {
TimeSeriesStreamSqlJob streamJob = new TimeSeriesStreamSqlJob(
} else if (streamType.equalsIgnoreCase("append")) {
AppendStreamSqlJob streamJob = new AppendStreamSqlJob(
flinkInterpreter.getStreamExecutionEnvironment(),
flinkInterpreter.getStreamTableEnvironment(), context,
flinkInterpreter.getStreamTableEnvironment(),
flinkInterpreter.getJobManager(),
context,
flinkInterpreter.getDefaultParallelism());
streamJob.run(sql);
} else if (streamType.equalsIgnoreCase("retract")) {
RetractStreamSqlJob streamJob = new RetractStreamSqlJob(
} else if (streamType.equalsIgnoreCase("update")) {
UpdateStreamSqlJob streamJob = new UpdateStreamSqlJob(
flinkInterpreter.getStreamExecutionEnvironment(),
flinkInterpreter.getStreamTableEnvironment(), context,
flinkInterpreter.getStreamTableEnvironment(),
flinkInterpreter.getJobManager(),
context,
flinkInterpreter.getDefaultParallelism());
streamJob.run(sql);
} else {

View file

@ -17,9 +17,11 @@
package org.apache.zeppelin.flink;
import org.apache.zeppelin.interpreter.BaseZeppelinContext;
import org.apache.zeppelin.interpreter.ZeppelinContext;
import org.apache.flink.table.api.TableEnvironment;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterException;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.python.IPythonInterpreter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -36,13 +38,19 @@ public class IPyFlinkInterpreter extends IPythonInterpreter {
private static final Logger LOGGER = LoggerFactory.getLogger(IPyFlinkInterpreter.class);
private FlinkInterpreter flinkInterpreter;
private InterpreterContext curInterpreterContext;
private boolean opened = false;
private ClassLoader originalClassLoader;
public IPyFlinkInterpreter(Properties property) {
super(property);
}
@Override
public void open() throws InterpreterException {
public synchronized void open() throws InterpreterException {
if (opened) {
return;
}
FlinkInterpreter pyFlinkInterpreter =
getInterpreterInTheSameSessionByClassName(FlinkInterpreter.class, false);
setProperty("zeppelin.python",
@ -50,10 +58,11 @@ public class IPyFlinkInterpreter extends IPythonInterpreter {
flinkInterpreter = getInterpreterInTheSameSessionByClassName(FlinkInterpreter.class);
setAdditionalPythonInitFile("python/zeppelin_ipyflink.py");
super.open();
opened = true;
}
@Override
public BaseZeppelinContext buildZeppelinContext() {
public ZeppelinContext buildZeppelinContext() {
return flinkInterpreter.getZeppelinContext();
}
@ -66,6 +75,32 @@ public class IPyFlinkInterpreter extends IPythonInterpreter {
return envs;
}
@Override
public InterpreterResult internalInterpret(String st,
InterpreterContext context)
throws InterpreterException {
try {
// set InterpreterContext in the python thread first, otherwise flink job could not be
// associated with paragraph in JobListener
this.curInterpreterContext = context;
InterpreterResult result =
super.internalInterpret("intp.initJavaThread()", context);
if (result.code() != InterpreterResult.Code.SUCCESS) {
throw new InterpreterException("Fail to initJavaThread: " +
result.toString());
}
return super.internalInterpret(st, context);
} finally {
if (getKernelProcessLauncher().isRunning()) {
InterpreterResult result =
super.internalInterpret("intp.resetClassLoaderInPythonThread()", context);
if (result.code() != InterpreterResult.Code.SUCCESS) {
LOGGER.warn("Fail to resetClassLoaderInPythonThread: " + result.toString());
}
}
}
}
@Override
public void cancel(InterpreterContext context) throws InterpreterException {
super.cancel(context);
@ -81,6 +116,25 @@ public class IPyFlinkInterpreter extends IPythonInterpreter {
}
}
/**
* Called by python process.
*/
public void initJavaThread() {
InterpreterContext.set(curInterpreterContext);
originalClassLoader = Thread.currentThread().getContextClassLoader();
Thread.currentThread().setContextClassLoader(flinkInterpreter.getFlinkScalaShellLoader());
flinkInterpreter.createPlannerAgain();
}
/**
* Called by python process.
*/
public void resetClassLoaderInPythonThread() {
if (originalClassLoader != null) {
Thread.currentThread().setContextClassLoader(originalClassLoader);
}
}
@Override
public int getProgress(InterpreterContext context) throws InterpreterException {
return flinkInterpreter.getProgress(context);
@ -94,4 +148,12 @@ public class IPyFlinkInterpreter extends IPythonInterpreter {
getJavaStreamExecutionEnvironment() {
return flinkInterpreter.getStreamExecutionEnvironment().getJavaEnv();
}
public TableEnvironment getJavaBatchTableEnvironment(String planner) {
return flinkInterpreter.getJavaBatchTableEnvironment(planner);
}
public TableEnvironment getJavaStreamTableEnvironment(String planner) {
return flinkInterpreter.getJavaStreamTableEnvironment(planner);
}
}

View file

@ -19,9 +19,9 @@ package org.apache.zeppelin.flink;
import com.mashape.unirest.http.JsonNode;
import com.mashape.unirest.http.Unirest;
import org.apache.commons.lang3.StringUtils;
import org.apache.flink.api.common.JobID;
import org.apache.flink.api.scala.ExecutionEnvironment;
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment;
import org.apache.flink.core.execution.JobClient;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterException;
import org.json.JSONArray;
@ -38,100 +38,136 @@ public class JobManager {
private static Logger LOGGER = LoggerFactory.getLogger(JobManager.class);
private Map<String, JobID> jobs = new HashMap<>();
private Map<String, String> savePointMap = new HashMap<>();
private Map<String, JobClient> jobs = new HashMap<>();
private ConcurrentHashMap<JobID, FlinkJobProgressPoller> jobProgressPollerMap =
new ConcurrentHashMap<>();
private ExecutionEnvironment env;
private StreamExecutionEnvironment senv;
private FlinkZeppelinContext z;
private String flinkWebUI;
public JobManager(ExecutionEnvironment env,
StreamExecutionEnvironment senv,
FlinkZeppelinContext z,
public JobManager(FlinkZeppelinContext z,
String flinkWebUI) {
this.env = env;
this.senv = senv;
this.z = z;
this.flinkWebUI = flinkWebUI;
}
public void addJob(String paragraphId, JobID jobId) {
JobID previousJobId = this.jobs.put(paragraphId, jobId);
FlinkJobProgressPoller thread = new FlinkJobProgressPoller(flinkWebUI, jobId);
public void addJob(InterpreterContext context, JobClient jobClient) {
String paragraphId = context.getParagraphId();
JobClient previousJobClient = this.jobs.put(paragraphId, jobClient);
FlinkJobProgressPoller thread = new FlinkJobProgressPoller(flinkWebUI, jobClient.getJobID(), context);
thread.setName("JobProgressPoller-Thread-" + paragraphId);
thread.start();
this.jobProgressPollerMap.put(jobId, thread);
if (previousJobId != null) {
this.jobProgressPollerMap.put(jobClient.getJobID(), thread);
if (previousJobClient != null) {
LOGGER.warn("There's another Job {} that is associated with paragraph {}",
jobId, paragraphId);
jobClient.getJobID(), paragraphId);
}
}
public void removeJob(String paragraphId) {
JobID jobID = this.jobs.remove(paragraphId);
if (jobID == null) {
LOGGER.info("Remove job in paragraph: " + paragraphId);
JobClient jobClient = this.jobs.remove(paragraphId);
if (jobClient == null) {
LOGGER.warn("Unable to remove job, because no job is associated with paragraph: "
+ paragraphId);
return;
}
FlinkJobProgressPoller jobProgressPoller = this.jobProgressPollerMap.remove(jobID);
FlinkJobProgressPoller jobProgressPoller =
this.jobProgressPollerMap.remove(jobClient.getJobID());
jobProgressPoller.cancel();
}
public void sendFlinkJobUrl(InterpreterContext context) {
JobClient jobClient = jobs.get(context.getParagraphId());
if (jobClient != null) {
String jobUrl = flinkWebUI + "#/job/" + jobClient.getJobID();
Map<String, String> infos = new HashMap<>();
infos.put("jobUrl", jobUrl);
infos.put("label", "FLINK JOB");
infos.put("tooltip", "View in Flink web UI");
infos.put("noteId", context.getNoteId());
infos.put("paraId", context.getParagraphId());
LOGGER.info("Job is started at: " + jobUrl);
context.getIntpEventClient().onParaInfosReceived(infos);
} else {
LOGGER.warn("No job is associated with paragraph: " + context.getParagraphId());
}
}
public int getJobProgress(String paragraphId) {
JobID jobId = this.jobs.get(paragraphId);
if (jobId == null) {
JobClient jobClient = this.jobs.get(paragraphId);
if (jobClient == null) {
LOGGER.warn("Unable to get job progress for paragraph: " + paragraphId +
", because no job is associated with this paragraph");
return 0;
}
FlinkJobProgressPoller jobProgressPoller = this.jobProgressPollerMap.get(jobId);
FlinkJobProgressPoller jobProgressPoller = this.jobProgressPollerMap.get(jobClient.getJobID());
if (jobProgressPoller == null) {
LOGGER.warn("Unable to get job progress for paragraph: " + paragraphId +
", because no job progress is associated with this jobId: " + jobId);
", because no job progress is associated with this jobId: " + jobClient.getJobID());
return 0;
}
return jobProgressPoller.getProgress();
}
public void cancelJob(InterpreterContext context) throws InterpreterException {
JobID jobId = this.jobs.remove(context.getParagraphId());
if (jobId == null) {
LOGGER.warn("Unable to remove Job from paragraph {}", context.getParagraphId());
JobClient jobClient = this.jobs.remove(context.getParagraphId());
if (jobClient == null) {
LOGGER.warn("Unable to remove Job from paragraph {} as no job associated to this paragraph",
context.getParagraphId());
return;
}
try {
//this.env.cancel(jobId);
String savepointDir = context.getLocalProperties().get("savepointDir");
if (StringUtils.isBlank(savepointDir)) {
LOGGER.info("Trying to cancel job of paragraph {}", context.getParagraphId());
jobClient.cancel();
} else {
LOGGER.info("Trying to stop job of paragraph {} with save point dir: {}",
context.getParagraphId(), savepointDir);
String savePointPath = jobClient.stopWithSavepoint(false, savepointDir).get();
z.angularBind(context.getParagraphId() + "_savepointpath", savePointPath);
}
} catch (Exception e) {
String errorMessage = String.format("Fail to cancel job %s that is associated " +
"with paragraph %s", jobId, context.getParagraphId());
"with paragraph %s", jobClient.getJobID(), context.getParagraphId());
LOGGER.warn(errorMessage, e);
throw new InterpreterException(errorMessage, e);
}
FlinkJobProgressPoller jobProgressPoller = jobProgressPollerMap.remove(jobId);
FlinkJobProgressPoller jobProgressPoller = jobProgressPollerMap.remove(jobClient.getJobID());
jobProgressPoller.cancel();
jobProgressPoller.interrupt();
}
public void shutdown() {
for (FlinkJobProgressPoller jobProgressPoller : jobProgressPollerMap.values()) {
jobProgressPoller.cancel();
}
}
class FlinkJobProgressPoller extends Thread {
private String flinkWebUI;
private JobID jobId;
private InterpreterContext context;
private boolean isStreamingInsertInto;
private int progress;
private AtomicBoolean running = new AtomicBoolean(true);
FlinkJobProgressPoller(String flinkWebUI, JobID jobId) {
FlinkJobProgressPoller(String flinkWebUI, JobID jobId, InterpreterContext context) {
this.flinkWebUI = flinkWebUI;
this.jobId = jobId;
this.context = context;
this.isStreamingInsertInto = context.getLocalProperties().containsKey("flink.streaming.insert_into");
}
@Override
public void run() {
try {
while (!Thread.currentThread().isInterrupted() && running.get()) {
JsonNode rootNode = Unirest.get(flinkWebUI + "/jobs/" + jobId.toString())
while (!Thread.currentThread().isInterrupted() && running.get()) {
JsonNode rootNode = null;
try {
rootNode = Unirest.get(flinkWebUI + "/jobs/" + jobId.toString())
.asJson().getBody();
JSONArray vertices = rootNode.getObject().getJSONArray("vertices");
int totalTasks = 0;
@ -145,6 +181,7 @@ public class JobManager {
LOGGER.debug("Finished tasks:" + finishedTasks);
if (finishedTasks != 0) {
this.progress = finishedTasks * 100 / totalTasks;
LOGGER.debug("Progress: " + this.progress);
}
String jobState = rootNode.getObject().getString("state");
if (jobState.equalsIgnoreCase("finished")) {
@ -153,21 +190,32 @@ public class JobManager {
synchronized (running) {
running.wait(1000);
}
if (isStreamingInsertInto) {
StringBuilder builder = new StringBuilder("%html ");
builder.append("<h1>Duration: " +
Integer.parseInt(rootNode.getObject().getString("duration")) / 1000 +
" seconds");
builder.append("\n%text ");
context.out.clear();
sendFlinkJobUrl(context);
context.out.write(builder.toString());
context.out.flush();
}
} catch (Exception e) {
LOGGER.error("Fail to poll flink job progress via rest api, rest api: " + rootNode, e);
}
} catch (Exception e) {
LOGGER.error("Fail to poll flink job progress via rest api", e);
}
}
public void cancel() {
this.running.set(false);
synchronized (running) {
running.notify();
public void cancel () {
this.running.set(false);
synchronized (running) {
running.notify();
}
}
}
public int getProgress() {
return progress;
public int getProgress () {
return progress;
}
}
}
}

View file

@ -17,9 +17,12 @@
package org.apache.zeppelin.flink;
import org.apache.zeppelin.interpreter.BaseZeppelinContext;
import org.apache.flink.python.util.ResourceUtil;
import org.apache.zeppelin.interpreter.ZeppelinContext;
import org.apache.flink.table.api.TableEnvironment;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterException;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.python.IPythonInterpreter;
import org.apache.zeppelin.python.PythonInterpreter;
import org.slf4j.Logger;
@ -30,6 +33,7 @@ import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLClassLoader;
import java.nio.file.Files;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
@ -40,6 +44,9 @@ public class PyFlinkInterpreter extends PythonInterpreter {
private static final Logger LOGGER = LoggerFactory.getLogger(PyFlinkInterpreter.class);
private FlinkInterpreter flinkInterpreter;
private InterpreterContext curInterpreterContext;
private boolean isOpened = false;
private ClassLoader originalClassLoader;
public PyFlinkInterpreter(Properties properties) {
super(properties);
@ -49,6 +56,7 @@ public class PyFlinkInterpreter extends PythonInterpreter {
public void open() throws InterpreterException {
this.flinkInterpreter = getInterpreterInTheSameSessionByClassName(FlinkInterpreter.class);
setProperty("zeppelin.python", getProperty("zeppelin.pyflink.python", "python"));
setProperty("zeppelin.python.useIPython", getProperty("zeppelin.pyflink.useIPython", "true"));
URL[] urls = new URL[0];
List<URL> urlList = new LinkedList<>();
@ -91,6 +99,58 @@ public class PyFlinkInterpreter extends PythonInterpreter {
throw new InterpreterException("Fail to bootstrap pyflink", e);
}
}
isOpened = true;
}
@Override
public InterpreterResult interpret(String st, InterpreterContext context) throws InterpreterException {
try {
if (isOpened) {
// set InterpreterContext in the python thread first, otherwise flink job could not be
// associated with paragraph in JobListener
this.curInterpreterContext = context;
InterpreterResult result =
super.interpret("intp.initJavaThread()", context);
if (result.code() != InterpreterResult.Code.SUCCESS) {
throw new InterpreterException("Fail to initJavaThread: " +
result.toString());
}
}
flinkInterpreter.createPlannerAgain();
return super.interpret(st, context);
} finally {
if (useIPython() || (!useIPython() && getPythonProcessLauncher().isRunning())) {
InterpreterResult result = super.interpret("intp.resetClassLoaderInPythonThread()", context);
if (result.code() != InterpreterResult.Code.SUCCESS) {
LOGGER.warn("Fail to resetClassLoaderInPythonThread: " + result.toString());
}
}
}
}
/**
* Called by python process.
*/
public void initJavaThread() {
InterpreterContext.set(curInterpreterContext);
originalClassLoader = Thread.currentThread().getContextClassLoader();
Thread.currentThread().setContextClassLoader(flinkInterpreter.getFlinkScalaShellLoader());
flinkInterpreter.createPlannerAgain();
}
/**
* Called by python process.
*/
public void resetClassLoaderInPythonThread() {
if (originalClassLoader != null) {
Thread.currentThread().setContextClassLoader(originalClassLoader);
}
}
@Override
public void cancel(InterpreterContext context) throws InterpreterException {
super.cancel(context);
flinkInterpreter.cancel(context);
}
@Override
@ -104,17 +164,18 @@ public class PyFlinkInterpreter extends PythonInterpreter {
public static String getPyFlinkPythonPath(Properties properties) throws IOException {
String flinkHome = System.getenv("FLINK_HOME");
boolean isTest = Boolean.parseBoolean(properties.getProperty("zeppelin.flink.test", "false"));
if (isTest) {
return "";
}
if (flinkHome != null) {
File pythonFolder = new File(flinkHome + "/opt/python");
File tmpDir = Files.createTempDirectory("zeppelin").toFile();
List<File> depFiles = null;
try {
depFiles = ResourceUtil.extractBuiltInDependencies(tmpDir.getAbsolutePath(), "pyflink", true);
} catch (InterruptedException e) {
throw new IOException(e);
}
StringBuilder builder = new StringBuilder();
for (File file : pythonFolder.listFiles()) {
if (file.getName().endsWith(".zip")) {
builder.append(file.getAbsolutePath() + ":");
}
for (File file : depFiles) {
LOGGER.info("Adding extracted file to PYTHONPATH: " + file.getAbsolutePath());
builder.append(file.getAbsolutePath() + ":");
}
return builder.toString();
} else {
@ -136,13 +197,13 @@ public class PyFlinkInterpreter extends PythonInterpreter {
}
@Override
public BaseZeppelinContext getZeppelinContext() {
public ZeppelinContext getZeppelinContext() {
return flinkInterpreter.getZeppelinContext();
}
@Override
public int getProgress(InterpreterContext context) throws InterpreterException {
return 0;
return flinkInterpreter.getProgress(context);
}
public org.apache.flink.api.java.ExecutionEnvironment getJavaExecutionEnvironment() {
@ -154,4 +215,11 @@ public class PyFlinkInterpreter extends PythonInterpreter {
return flinkInterpreter.getStreamExecutionEnvironment().getJavaEnv();
}
public TableEnvironment getJavaBatchTableEnvironment(String planner) {
return flinkInterpreter.getJavaBatchTableEnvironment(planner);
}
public TableEnvironment getJavaStreamTableEnvironment(String planner) {
return flinkInterpreter.getJavaStreamTableEnvironment(planner);
}
}

View file

@ -0,0 +1,263 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.flink;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.TableConfig;
import org.apache.flink.table.api.TableEnvironment;
import org.apache.flink.table.api.TableException;
import org.apache.flink.table.api.java.StreamTableEnvironment;
import org.apache.flink.table.api.java.internal.StreamTableEnvironmentImpl;
import org.apache.flink.table.catalog.CatalogManager;
import org.apache.flink.table.catalog.FunctionCatalog;
import org.apache.flink.table.delegation.Executor;
import org.apache.flink.table.delegation.ExecutorFactory;
import org.apache.flink.table.delegation.Planner;
import org.apache.flink.table.delegation.PlannerFactory;
import org.apache.flink.table.factories.ComponentFactoryService;
import org.apache.flink.table.module.ModuleManager;
import java.lang.reflect.Constructor;
import java.lang.reflect.Method;
import java.util.Map;
/**
* Factory class for creating flink table env for different purpose:
* 1. java/scala
* 2. stream table / batch table
* 3. flink planner / blink planner
*
*/
public class TableEnvFactory {
private Executor executor;
private org.apache.flink.api.scala.ExecutionEnvironment benv;
private org.apache.flink.streaming.api.scala.StreamExecutionEnvironment senv;
private TableConfig tblConfig;
private CatalogManager catalogManager;
private ModuleManager moduleManager;
private FunctionCatalog flinkFunctionCatalog;
private FunctionCatalog blinkFunctionCatalog;
public TableEnvFactory(org.apache.flink.api.scala.ExecutionEnvironment env,
org.apache.flink.streaming.api.scala.StreamExecutionEnvironment senv,
TableConfig tblConfig,
CatalogManager catalogManager,
ModuleManager moduleManager,
FunctionCatalog flinkFunctionCatalog,
FunctionCatalog blinkFunctionCatalog) {
this.benv = env;
this.senv = senv;
this.tblConfig = tblConfig;
this.catalogManager = catalogManager;
this.moduleManager = moduleManager;
this.flinkFunctionCatalog = flinkFunctionCatalog;
this.blinkFunctionCatalog = blinkFunctionCatalog;
}
public org.apache.flink.table.api.scala.BatchTableEnvironment createScalaFlinkBatchTableEnvironment() {
try {
Class clazz = Class
.forName("org.apache.flink.table.api.scala.internal.BatchTableEnvironmentImpl");
Constructor constructor = clazz
.getConstructor(
org.apache.flink.api.scala.ExecutionEnvironment.class,
TableConfig.class,
CatalogManager.class,
ModuleManager.class);
return (org.apache.flink.table.api.scala.BatchTableEnvironment)
constructor.newInstance(benv, tblConfig, catalogManager, moduleManager);
} catch (Exception e) {
throw new TableException("Fail to createScalaFlinkBatchTableEnvironment", e);
}
}
public org.apache.flink.table.api.scala.internal.StreamTableEnvironmentImpl
createScalaFlinkStreamTableEnvironment(EnvironmentSettings settings) {
Map<String, String> executorProperties = settings.toExecutorProperties();
Executor executor = lookupExecutor(executorProperties, senv.getJavaEnv());
Map<String, String> plannerProperties = settings.toPlannerProperties();
Planner planner = ComponentFactoryService.find(PlannerFactory.class, plannerProperties)
.create(
plannerProperties,
executor,
tblConfig,
flinkFunctionCatalog,
catalogManager);
return new org.apache.flink.table.api.scala.internal.StreamTableEnvironmentImpl(
catalogManager,
moduleManager,
flinkFunctionCatalog,
tblConfig,
senv,
planner,
executor,
settings.isStreamingMode()
);
}
public org.apache.flink.table.api.java.BatchTableEnvironment createJavaFlinkBatchTableEnvironment() {
try {
Class<?> clazz =
Class.forName("org.apache.flink.table.api.java.internal.BatchTableEnvironmentImpl");
Constructor con = clazz.getConstructor(
ExecutionEnvironment.class, TableConfig.class, CatalogManager.class, ModuleManager.class);
return (org.apache.flink.table.api.java.BatchTableEnvironment) con.newInstance(
benv.getJavaEnv(), tblConfig, catalogManager, moduleManager);
} catch (Throwable t) {
throw new TableException("Create BatchTableEnvironment failed.", t);
}
}
public StreamTableEnvironment createJavaFlinkStreamTableEnvironment(EnvironmentSettings settings) {
if (!settings.isStreamingMode()) {
throw new TableException(
"StreamTableEnvironment can not run in batch mode for now, please use TableEnvironment.");
}
Map<String, String> executorProperties = settings.toExecutorProperties();
Executor executor = lookupExecutor(executorProperties, senv.getJavaEnv());
Map<String, String> plannerProperties = settings.toPlannerProperties();
Planner planner = ComponentFactoryService.find(PlannerFactory.class, plannerProperties)
.create(plannerProperties, executor, tblConfig, flinkFunctionCatalog, catalogManager);
return new StreamTableEnvironmentImpl(
catalogManager,
moduleManager,
flinkFunctionCatalog,
tblConfig,
senv.getJavaEnv(),
planner,
executor,
settings.isStreamingMode()
);
}
public org.apache.flink.table.api.scala.internal.StreamTableEnvironmentImpl
createScalaBlinkStreamTableEnvironment(EnvironmentSettings settings) {
Map<String, String> executorProperties = settings.toExecutorProperties();
Executor executor = lookupExecutor(executorProperties, senv.getJavaEnv());
Map<String, String> plannerProperties = settings.toPlannerProperties();
Planner planner = ComponentFactoryService.find(PlannerFactory.class, plannerProperties)
.create(
plannerProperties,
executor,
tblConfig,
blinkFunctionCatalog,
catalogManager);
return new org.apache.flink.table.api.scala.internal.StreamTableEnvironmentImpl(
catalogManager,
moduleManager,
blinkFunctionCatalog,
tblConfig,
senv,
planner,
executor,
settings.isStreamingMode());
}
public void createPlanner(EnvironmentSettings settings) {
Map<String, String> executorProperties = settings.toExecutorProperties();
Executor executor = lookupExecutor(executorProperties, senv.getJavaEnv());
Map<String, String> plannerProperties = settings.toPlannerProperties();
ComponentFactoryService.find(PlannerFactory.class, plannerProperties)
.create(
plannerProperties,
executor,
tblConfig,
blinkFunctionCatalog,
catalogManager);
}
public StreamTableEnvironment createJavaBlinkStreamTableEnvironment(
EnvironmentSettings settings) {
if (!settings.isStreamingMode()) {
throw new TableException(
"StreamTableEnvironment can not run in batch mode for now, please use TableEnvironment.");
}
Map<String, String> executorProperties = settings.toExecutorProperties();
Executor executor = lookupExecutor(executorProperties, senv.getJavaEnv());
Map<String, String> plannerProperties = settings.toPlannerProperties();
Planner planner = ComponentFactoryService.find(PlannerFactory.class, plannerProperties)
.create(plannerProperties, executor, tblConfig, blinkFunctionCatalog, catalogManager);
return new StreamTableEnvironmentImpl(
catalogManager,
moduleManager,
blinkFunctionCatalog,
tblConfig,
senv.getJavaEnv(),
planner,
executor,
settings.isStreamingMode()
);
}
public TableEnvironment createJavaBlinkBatchTableEnvironment(
EnvironmentSettings settings) {
final Map<String, String> executorProperties = settings.toExecutorProperties();
executor = lookupExecutor(executorProperties, senv.getJavaEnv());
final Map<String, String> plannerProperties = settings.toPlannerProperties();
final Planner planner = ComponentFactoryService.find(PlannerFactory.class, plannerProperties)
.create(plannerProperties, executor, tblConfig, blinkFunctionCatalog, catalogManager);
return new StreamTableEnvironmentImpl(
catalogManager,
moduleManager,
blinkFunctionCatalog,
tblConfig,
senv.getJavaEnv(),
planner,
executor,
settings.isStreamingMode());
}
private static Executor lookupExecutor(
Map<String, String> executorProperties,
StreamExecutionEnvironment executionEnvironment) {
try {
ExecutorFactory executorFactory = ComponentFactoryService.find(ExecutorFactory.class, executorProperties);
Method createMethod = executorFactory.getClass()
.getMethod("create", Map.class, StreamExecutionEnvironment.class);
return (Executor) createMethod.invoke(
executorFactory,
executorProperties,
executionEnvironment);
} catch (Exception e) {
throw new TableException(
"Could not instantiate the executor. Make sure a planner module is on the classpath",
e);
}
}
}

View file

@ -18,7 +18,7 @@
package org.apache.zeppelin.flink.sql;
import org.apache.flink.api.common.JobExecutionResult;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.api.common.typeutils.TypeSerializer;
@ -28,10 +28,11 @@ import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment;
import org.apache.flink.streaming.experimental.SocketStreamIterator;
import org.apache.flink.table.api.StreamQueryConfig;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.TableEnvironment;
import org.apache.flink.table.api.TableSchema;
import org.apache.flink.table.api.scala.StreamTableEnvironment;
import org.apache.flink.table.calcite.FlinkTypeFactory;
import org.apache.flink.types.Row;
import org.apache.zeppelin.flink.JobManager;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.remote.RemoteInterpreterUtils;
@ -40,10 +41,9 @@ import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.net.InetAddress;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.atomic.AtomicInteger;
import static java.util.concurrent.TimeUnit.MILLISECONDS;
@ -54,20 +54,26 @@ import static java.util.concurrent.TimeUnit.MILLISECONDS;
public abstract class AbstractStreamSqlJob {
private static Logger LOGGER = LoggerFactory.getLogger(AbstractStreamSqlJob.class);
private static AtomicInteger SQL_INDEX = new AtomicInteger(0);
protected StreamExecutionEnvironment senv;
protected StreamTableEnvironment stenv;
protected TableEnvironment stenv;
protected JobManager jobManager;
protected InterpreterContext context;
protected TableSchema schema;
protected SocketStreamIterator<Tuple2<Boolean, Row>> iterator;
protected Object resultLock = new Object();
protected volatile boolean enableToRefresh = true;
protected int defaultParallelism;
protected ScheduledExecutorService refreshScheduler = Executors.newScheduledThreadPool(1);
public AbstractStreamSqlJob(StreamExecutionEnvironment senv,
StreamTableEnvironment stenv,
TableEnvironment stenv,
JobManager jobManager,
InterpreterContext context,
int defaultParallelism) {
this.senv = senv;
this.stenv = stenv;
this.jobManager = jobManager;
this.context = context;
this.defaultParallelism = defaultParallelism;
}
@ -90,13 +96,15 @@ public abstract class AbstractStreamSqlJob {
protected abstract String getType();
public InterpreterResult run(String st) throws IOException {
try {
checkLocalProperties(context.getLocalProperties());
Table table = stenv.sqlQuery(st);
String tableName = "UnnamedTable_" + st + "_" + SQL_INDEX.getAndIncrement();
return run(table, tableName);
}
public InterpreterResult run(Table table, String tableName) throws IOException {
try {
int parallelism = Integer.parseInt(context.getLocalProperties()
.getOrDefault("parallelism", defaultParallelism + ""));
Table table = stenv.sqlQuery(st);
this.schema = removeTimeAttributes(table.getSchema());
checkTableSchema(schema);
@ -128,14 +136,13 @@ public abstract class AbstractStreamSqlJob {
try {
stenv.useCatalog("default_catalog");
stenv.useDatabase("default_database");
stenv.registerTableSink(st, collectTableSink);
table.insertInto(new StreamQueryConfig(), st);
stenv.registerTableSink(tableName, collectTableSink);
table.insertInto(new StreamQueryConfig(), tableName);
} finally {
stenv.useCatalog(originalCatalog);
stenv.useDatabase(originalDatabase);
}
ScheduledExecutorService refreshScheduler = Executors.newScheduledThreadPool(1);
long delay = 1000L;
long period = Long.parseLong(
context.getLocalProperties().getOrDefault("refreshInterval", "3000"));
@ -145,30 +152,26 @@ public abstract class AbstractStreamSqlJob {
retrievalThread.start();
LOGGER.info("Run job without savePointPath, " + ", parallelism: " + parallelism);
JobExecutionResult jobExecutionResult = stenv.execute(st);
stenv.execute(tableName);
LOGGER.info("Flink Job is finished");
return new InterpreterResult(InterpreterResult.Code.SUCCESS);
// wait for retrieve thread consume all data
LOGGER.info("Waiting for retrieve thread to be done");
retrievalThread.join();
refresh(context);
String finalResult = buildResult();
LOGGER.info("Final Result: " + finalResult);
return new InterpreterResult(InterpreterResult.Code.SUCCESS, finalResult);
} catch (Exception e) {
LOGGER.error("Fail to run stream sql job", e);
throw new IOException("Fail to run stream sql job", e);
} finally {
refreshScheduler.shutdownNow();
}
}
protected void checkTableSchema(TableSchema schema) throws Exception {
}
protected void checkLocalProperties(Map<String, String> localProperties) throws Exception {
List<String> validLocalProperties = getValidLocalProperties();
for (String key : localProperties.keySet()) {
if (!validLocalProperties.contains(key)) {
throw new Exception("Invalid property: " + key + ", Only the following properties " +
"are valid for stream type '" + getType() + "': " + validLocalProperties);
}
}
};
protected abstract List<String> getValidLocalProperties();
protected void processRecord(Tuple2<Boolean, Row> change) {
synchronized (resultLock) {
// insert
@ -186,6 +189,8 @@ public abstract class AbstractStreamSqlJob {
protected abstract void processDelete(Row row);
protected abstract String buildResult();
private class ResultRetrievalThread extends Thread {
private ScheduledExecutorService refreshExecutorService;
@ -211,7 +216,8 @@ public abstract class AbstractStreamSqlJob {
// either the job is done or an error occurred
isRunning = false;
LOGGER.info("ResultRetrieval Thread is done");
refreshExecutorService.shutdown();
LOGGER.info("Final Result: " + buildResult());
refreshExecutorService.shutdownNow();
}
public void cancel() {
@ -221,7 +227,6 @@ public abstract class AbstractStreamSqlJob {
protected abstract void refresh(InterpreterContext context) throws Exception;
private class RefreshTask implements Runnable {
private InterpreterContext context;
@ -234,6 +239,9 @@ public abstract class AbstractStreamSqlJob {
public void run() {
try {
synchronized (resultLock) {
if (!enableToRefresh) {
resultLock.wait();
}
refresh(context);
}
} catch (Exception e) {

View file

@ -18,12 +18,14 @@
package org.apache.zeppelin.flink.sql;
import com.google.common.collect.Lists;
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment;
import org.apache.flink.table.api.TableSchema;
import org.apache.flink.table.api.scala.StreamTableEnvironment;
import org.apache.flink.types.Row;
import org.apache.flink.util.StringUtils;
import org.apache.zeppelin.flink.JobManager;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.tabledata.TableDataUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -32,19 +34,19 @@ import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
public class TimeSeriesStreamSqlJob extends AbstractStreamSqlJob {
public class AppendStreamSqlJob extends AbstractStreamSqlJob {
private static Logger LOGGER = LoggerFactory.getLogger(RetractStreamSqlJob.class);
private static Logger LOGGER = LoggerFactory.getLogger(UpdateStreamSqlJob.class);
private List<Row> materializedTable = new ArrayList<>();
private long tsWindowThreshold;
private boolean firstRefresh = true;
public TimeSeriesStreamSqlJob(StreamExecutionEnvironment senv,
StreamTableEnvironment stEnv,
InterpreterContext context,
int defaultParallelism) {
super(senv, stEnv, context, defaultParallelism);
public AppendStreamSqlJob(StreamExecutionEnvironment senv,
StreamTableEnvironment stEnv,
JobManager jobManager,
InterpreterContext context,
int defaultParallelism) {
super(senv, stEnv, jobManager, context, defaultParallelism);
this.tsWindowThreshold = Long.parseLong(context.getLocalProperties()
.getOrDefault("threshold", 1000 * 60 * 60 + ""));
}
@ -54,12 +56,6 @@ public class TimeSeriesStreamSqlJob extends AbstractStreamSqlJob {
return "ts";
}
@Override
protected List<String> getValidLocalProperties() {
return Lists.newArrayList("type", "parallelism",
"refreshInterval", "enableSavePoint", "runWithSavePoint", "threshold");
}
@Override
protected void checkTableSchema(TableSchema schema) throws Exception {
// if (!(schema.getFieldDataType(0).get() instanceof TimestampType)) {
@ -79,48 +75,57 @@ public class TimeSeriesStreamSqlJob extends AbstractStreamSqlJob {
throw new RuntimeException("Delete operation is not expected");
}
@Override
protected String buildResult() {
StringBuilder builder = new StringBuilder();
builder.append("%table\n");
for (int i = 0; i < schema.getFieldCount(); ++i) {
String field = schema.getFieldNames()[i];
builder.append(field);
if (i != (schema.getFieldCount() - 1)) {
builder.append("\t");
}
}
builder.append("\n");
// sort it by the first column
materializedTable.sort((r1, r2) -> {
String f1 = TableDataUtils.normalizeColumn(StringUtils.arrayAwareToString(r1.getField(0)));
String f2 = TableDataUtils.normalizeColumn(StringUtils.arrayAwareToString(r2.getField(0)));
return f1.compareTo(f2);
});
if (materializedTable.size() != 0) {
long maxTimestamp =
((java.sql.Timestamp) materializedTable.get(materializedTable.size() - 1)
.getField(0)).getTime();
materializedTable = materializedTable.stream()
.filter(row -> ((java.sql.Timestamp) row.getField(0)).getTime() >
maxTimestamp - tsWindowThreshold)
.collect(Collectors.toList());
for (Row row : materializedTable) {
for (int i = 0; i < row.getArity(); ++i) {
Object field = row.getField(i);
builder.append(TableDataUtils.normalizeColumn(StringUtils.arrayAwareToString(field)));
if (i != (row.getArity() - 1)) {
builder.append("\t");
}
}
builder.append("\n");
}
}
builder.append("\n%text ");
return builder.toString();
}
@Override
protected void refresh(InterpreterContext context) {
context.out().clear();
try {
context.out.write("%table\n");
for (int i = 0; i < schema.getFieldCount(); ++i) {
String field = schema.getFieldNames()[i];
context.out.write(field);
if (i != (schema.getFieldCount() - 1)) {
context.out.write("\t");
}
}
context.out.write("\n");
// sort it by the first column
materializedTable.sort((r1, r2) -> {
String f1 = r1.getField(0).toString();
String f2 = r2.getField(0).toString();
return f1.compareTo(f2);
});
if (materializedTable.size() != 0) {
long maxTimestamp =
((java.sql.Timestamp) materializedTable.get(materializedTable.size() - 1)
.getField(0)).getTime();
materializedTable = materializedTable.stream()
.filter(row -> ((java.sql.Timestamp) row.getField(0)).getTime() >
maxTimestamp - tsWindowThreshold)
.collect(Collectors.toList());
for (Row row : materializedTable) {
for (int i = 0; i < row.getArity(); ++i) {
Object field = row.getField(i);
context.out.write(field.toString());
if (i != (row.getArity() - 1)) {
context.out.write("\t");
}
}
context.out.write("\n");
}
}
jobManager.sendFlinkJobUrl(context);
context.out.write(buildResult());
context.out.flush();
} catch (IOException e) {
LOGGER.error("Fail to refresh data", e);

View file

@ -28,14 +28,19 @@ import org.apache.flink.streaming.api.datastream.DataStreamSink;
import org.apache.flink.streaming.experimental.CollectSink;
import org.apache.flink.table.sinks.RetractStreamTableSink;
import org.apache.flink.types.Row;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.net.InetAddress;
import java.util.UUID;
/**
* Table sink for collecting the results locally using sockets.
*/
public class CollectStreamTableSink implements RetractStreamTableSink<Row> {
private static final Logger LOGGER = LoggerFactory.getLogger(CollectStreamTableSink.class);
private final InetAddress targetAddress;
private final int targetPort;
private final TypeSerializer<Tuple2<Boolean, Row>> serializer;
@ -46,6 +51,7 @@ public class CollectStreamTableSink implements RetractStreamTableSink<Row> {
public CollectStreamTableSink(InetAddress targetAddress,
int targetPort,
TypeSerializer<Tuple2<Boolean, Row>> serializer) {
LOGGER.info("Use address: " + targetAddress.getHostAddress() + ":" + targetPort);
this.targetAddress = targetAddress;
this.targetPort = targetPort;
this.serializer = serializer;
@ -85,7 +91,7 @@ public class CollectStreamTableSink implements RetractStreamTableSink<Row> {
// add sink
return stream
.addSink(new CollectSink<>(targetAddress, targetPort, serializer))
.name("Zeppelin Flink Sql Stream Collect Sink")
.name("Zeppelin Flink Sql Stream Collect Sink " + UUID.randomUUID())
.setParallelism(1);
}

View file

@ -1,108 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.flink.sql;
import com.google.common.collect.Lists;
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment;
import org.apache.flink.table.api.scala.StreamTableEnvironment;
import org.apache.flink.types.Row;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class RetractStreamSqlJob extends AbstractStreamSqlJob {
private static Logger LOGGER = LoggerFactory.getLogger(RetractStreamSqlJob.class);
private List<Row> materializedTable = new ArrayList<>();
public RetractStreamSqlJob(StreamExecutionEnvironment senv,
StreamTableEnvironment stEnv,
InterpreterContext context,
int defaultParallelism) {
super(senv, stEnv, context, defaultParallelism);
}
@Override
protected String getType() {
return "retract";
}
@Override
protected List<String> getValidLocalProperties() {
return Lists.newArrayList("type", "parallelism",
"refreshInterval", "enableSavePoint", "runWithSavePoint");
}
protected void processInsert(Row row) {
LOGGER.debug("processInsert: " + row.toString());
materializedTable.add(row);
}
protected void processDelete(Row row) {
LOGGER.debug("processDelete: " + row.toString());
for (int i = 0; i < materializedTable.size(); i++) {
if (materializedTable.get(i).equals(row)) {
materializedTable.remove(i);
break;
}
}
}
@Override
protected void refresh(InterpreterContext context) {
context.out().clear();
try {
context.out.write("%table\n");
for (int i = 0; i < schema.getFieldCount(); ++i) {
String field = schema.getFieldName(i).get();
context.out.write(field);
if (i != (schema.getFieldCount() - 1)) {
context.out.write("\t");
}
}
context.out.write("\n");
LOGGER.debug("*****************Row size: " + materializedTable.size());
// sort it by the first column
materializedTable.sort((r1, r2) -> {
String f1 = r1.getField(0).toString();
String f2 = r2.getField(0).toString();
return f1.compareTo(f2);
});
for (Row row : materializedTable) {
for (int i = 0; i < row.getArity(); ++i) {
Object field = row.getField(i);
context.out.write(field.toString());
if (i != (row.getArity() - 1)) {
context.out.write("\t");
}
}
context.out.write("\n");
}
context.out.flush();
} catch (IOException e) {
LOGGER.error("Fail to refresh data", e);
}
}
}

View file

@ -18,15 +18,17 @@
package org.apache.zeppelin.flink.sql;
import com.google.common.collect.Lists;
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment;
import org.apache.flink.table.api.TableEnvironment;
import org.apache.flink.table.api.scala.StreamTableEnvironment;
import org.apache.flink.types.Row;
import org.apache.flink.util.StringUtils;
import org.apache.zeppelin.flink.JobManager;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.tabledata.TableDataUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.List;
public class SingleRowStreamSqlJob extends AbstractStreamSqlJob {
@ -36,10 +38,11 @@ public class SingleRowStreamSqlJob extends AbstractStreamSqlJob {
private String template;
public SingleRowStreamSqlJob(StreamExecutionEnvironment senv,
StreamTableEnvironment stenv,
TableEnvironment stenv,
JobManager jobManager,
InterpreterContext context,
int defaultParallelism) {
super(senv, stenv, context, defaultParallelism);
super(senv, stenv, jobManager, context, defaultParallelism);
this.template = context.getLocalProperties().getOrDefault("template", "{0}");
}
@ -48,12 +51,6 @@ public class SingleRowStreamSqlJob extends AbstractStreamSqlJob {
return "single";
}
@Override
protected List<String> getValidLocalProperties() {
return Lists.newArrayList("type", "parallelism",
"refreshInterval", "template", "enableSavePoint", "runWithSavePoint");
}
protected void processInsert(Row row) {
LOGGER.debug("processInsert: " + row.toString());
latestRow = row;
@ -61,7 +58,20 @@ public class SingleRowStreamSqlJob extends AbstractStreamSqlJob {
@Override
protected void processDelete(Row row) {
LOGGER.debug("Ignore delete");
//LOGGER.debug("Ignore delete");
}
@Override
protected String buildResult() {
StringBuilder builder = new StringBuilder();
builder.append("%html\n");
String outputText = template;
for (int i = 0; i < latestRow.getArity(); ++i) {
outputText = outputText.replace("{" + i + "}",
TableDataUtils.normalizeColumn(StringUtils.arrayAwareToString(latestRow.getField(i))));
}
builder.append(outputText);
return builder.toString();
}
@Override
@ -71,13 +81,10 @@ public class SingleRowStreamSqlJob extends AbstractStreamSqlJob {
return;
}
context.out().clear();
context.out.write("%html\n");
String outputText = template;
for (int i = 0; i < latestRow.getArity(); ++i) {
outputText = outputText.replace("{" + i + "}", latestRow.getField(i).toString());
}
LOGGER.debug("SingleRow Output: " + outputText);
context.out.write(outputText);
String output = buildResult();
context.out.write(output);
jobManager.sendFlinkJobUrl(context);
LOGGER.debug("Refresh Output: " + output);
context.out.flush();
}
}

View file

@ -50,8 +50,9 @@ public final class SqlCommandParser {
for (int i = 0; i < groups.length; i++) {
groups[i] = matcher.group(i + 1);
}
final String sql = stmt;
return cmd.operandConverter.apply(groups)
.map((operands) -> new SqlCommandCall(cmd, operands));
.map((operands) -> new SqlCommandCall(cmd, operands, sql));
}
}
return Optional.empty();
@ -60,10 +61,10 @@ public final class SqlCommandParser {
// --------------------------------------------------------------------------------------------
private static final Function<String[], Optional<String[]>> NO_OPERANDS =
(operands) -> Optional.of(new String[0]);
(operands) -> Optional.of(new String[0]);
private static final Function<String[], Optional<String[]>> SINGLE_OPERAND =
(operands) -> Optional.of(new String[]{operands[0]});
(operands) -> Optional.of(new String[]{operands[0]});
private static final int DEFAULT_PATTERN_FLAGS = Pattern.CASE_INSENSITIVE | Pattern.DOTALL;
@ -99,12 +100,16 @@ public final class SqlCommandParser {
"SHOW\\s+FUNCTIONS",
NO_OPERANDS),
SHOW_MODULES(
"SHOW\\s+MODULES",
NO_OPERANDS),
USE_CATALOG(
"USE\\s+CATALOG\\s+(.*)",
SINGLE_OPERAND),
USE_DATABASE(
"USE\\s+DATABASE\\s+(.*)",
USE(
"USE\\s+(?!CATALOG)(.*)",
SINGLE_OPERAND),
DESCRIBE(
@ -123,26 +128,53 @@ public final class SqlCommandParser {
"(INSERT\\s+INTO.*)",
SINGLE_OPERAND),
CREATE_VIEW("CREATE\\s+VIEW\\s+(\\S+)\\s+AS\\s+(.*)",
(operands) -> {
if (operands.length < 2) {
return Optional.empty();
}
return Optional.of(new String[]{operands[0], operands[1]});
}),
DROP_VIEW("DROP\\s+VIEW\\s+(.*)",
INSERT_OVERWRITE(
"(INSERT\\s+OVERWRITE.*)",
SINGLE_OPERAND),
SET("SET(\\s+(\\S+)\\s*=(.*))?", // whitespace is only ignored on the left side of '='
(operands) -> {
if (operands.length < 3) {
return Optional.empty();
} else if (operands[0] == null) {
return Optional.of(new String[0]);
}
return Optional.of(new String[]{operands[1], operands[2]});
}),
CREATE_TABLE("(CREATE\\s+TABLE\\s+.*)", SINGLE_OPERAND),
DROP_TABLE("(DROP\\s+TABLE\\s+.*)", SINGLE_OPERAND),
CREATE_VIEW(
"CREATE\\s+VIEW\\s+(\\S+)\\s+AS\\s+(.*)",
(operands) -> {
if (operands.length < 2) {
return Optional.empty();
}
return Optional.of(new String[]{operands[0], operands[1]});
}),
CREATE_DATABASE(
"(CREATE\\s+DATABASE\\s+.*)",
SINGLE_OPERAND),
DROP_DATABASE(
"(DROP\\s+DATABASE\\s+.*)",
SINGLE_OPERAND),
DROP_VIEW(
"DROP\\s+VIEW\\s+(.*)",
SINGLE_OPERAND),
ALTER_DATABASE(
"(ALTER\\s+DATABASE\\s+.*)",
SINGLE_OPERAND),
ALTER_TABLE(
"(ALTER\\s+TABLE\\s+.*)",
SINGLE_OPERAND),
SET(
"SET(\\s+(\\S+)\\s*=(.*))?", // whitespace is only ignored on the left side of '='
(operands) -> {
if (operands.length < 3) {
return Optional.empty();
} else if (operands[0] == null) {
return Optional.of(new String[0]);
}
return Optional.of(new String[]{operands[1], operands[2]});
}),
RESET(
"RESET",
@ -176,14 +208,12 @@ public final class SqlCommandParser {
public static class SqlCommandCall {
public final SqlCommand command;
public final String[] operands;
public final String sql;
public SqlCommandCall(SqlCommand command, String[] operands) {
public SqlCommandCall(SqlCommand command, String[] operands, String sql) {
this.command = command;
this.operands = operands;
}
public SqlCommandCall(SqlCommand command) {
this(command, new String[0]);
this.sql = sql;
}
@Override

View file

@ -1,53 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.flink.sql;
/**
* A wrapper of a sql statement and its line info.
*/
public class SqlInfo {
private String sqlContent;
private int line;
private int firstLineIndex;
public String getSqlContent() {
return sqlContent;
}
public void setSqlContent(String sqlContent) {
this.sqlContent = sqlContent;
}
public void setLine(int line) {
this.line = line;
}
public void setFirstLineIndex(int firstLineIndex) {
this.firstLineIndex = firstLineIndex;
}
@Override
public String toString() {
return "Sqlcontent => " + sqlContent + "\nSql start line num => "
+ line + "\n First line index =>" + firstLineIndex;
}
}

View file

@ -1,204 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.flink.sql;
import org.apache.commons.lang3.StringUtils;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
*
*/
public class SqlLists {
private static final Pattern PATTERN_STATEMENT = Pattern.compile("[^\\\\];");
private static final Pattern PATTERN_STRING = Pattern.compile("(\"|')([^\"^']*)(\"|')");
private static final Pattern PATTERN_SINGLE_LINE = Pattern.compile("--.*");
private static final Pattern PATTERN_MULTI_LINE = Pattern.compile("/\\*.*?\\*/", Pattern.DOTALL);
public static List<SqlInfo> getSQLList(String context) {
Map<Integer, Integer> enterMap = new TreeMap<Integer, Integer>();
int enterCount = 1;
for (int i = 0; i < context.length(); i++) {
if (context.charAt(i) == '\n') {
enterMap.put(i, enterCount++);
}
}
enterMap.put(context.length(), enterCount++);
List<SqlInfo> list = new ArrayList<SqlInfo>();
Matcher match = PATTERN_STATEMENT.matcher(context);
int index = 0;
while (match.find()) {
if (isInComment(context, match.start() + 1)
|| !isMatch(context.substring(index, match.start() + 1), '\'')
|| !isMatch(context.substring(index, match.start() + 1), '\"')) {
continue;
}
String str = context.substring(index, match.start() + 1)
.replaceAll("\\\\;", ";");
str = str.replaceAll("^;", "");
if (!"".equals(str) && !isCommentClause(str)) {
int maxEnters = 0;
int lastEnter = 0;
int firstLineIndex = 0;
int loc = index - 1;
for (Integer i : enterMap.keySet()) {
if (loc > i) {
maxEnters = enterMap.get(i);
lastEnter = i;
}
if (loc <= i) {
if (loc == i) {
firstLineIndex = 0;
} else {
firstLineIndex = loc - lastEnter;
}
break;
}
}
SqlInfo sqlInfo = new SqlInfo();
sqlInfo.setSqlContent(str);
sqlInfo.setLine(maxEnters + 1);
sqlInfo.setFirstLineIndex(firstLineIndex);
list.add(sqlInfo);
}
index = match.start() + 2;
}
if (context.substring(index) != null
&& context.substring(index).trim().length() != 0) {
String str = context.substring(index).replaceAll("\\\\;", ";");
str = str.replaceAll("^;", "").replaceAll(";$", "");
if (!"".equals(str) && !isCommentClause(str)) {
int loc = index - 1;
int maxEnters = 0;
int lastEnter = 0;
int firstLineIndex = 0;
for (Integer i : enterMap.keySet()) {
if (index > i) {
maxEnters = enterMap.get(i);
lastEnter = i;
}
if (index <= i) {
if (index == i) {
firstLineIndex = 0;
} else {
firstLineIndex = index - lastEnter;
}
break;
}
}
SqlInfo sqlInfo = new SqlInfo();
sqlInfo.setSqlContent(str);
sqlInfo.setLine(maxEnters + 1);
sqlInfo.setFirstLineIndex(firstLineIndex);
list.add(sqlInfo);
}
}
return list;
}
public static String toLowCase(String str) {
Matcher m = PATTERN_STRING.matcher(str);
StringBuffer sb = new StringBuffer();
int index = 0;
while (m.find()) {
sb.append(str.substring(index, m.start()).toLowerCase());
sb.append(str.substring(m.start(), m.end()));
index = m.end();
}
if (index != str.length()) {
sb.append(str.substring(index, str.length()).toLowerCase());
}
return sb.toString();
}
private static boolean isCommentClause(String str) {
String trimStr = str.trim();
if (trimStr.startsWith("/*") && trimStr.endsWith("*/")) {
return true;
}
boolean res = true;
String[] lines = StringUtils.split(str, "\n");
for (String line : lines) {
String val = line.trim();
if (StringUtils.isEmpty(val) || val.startsWith("--")) {
res = true;
} else {
return false;
}
}
return res;
}
private static boolean isMatch(String source, char pattern) {
int count = 0;
for (int i = 0; i < source.length(); i++) {
if (source.charAt(i) == pattern) {
count++;
}
if (source.charAt(i) == '\\' && i < source.length() - 1
&& source.charAt(i + 1) == pattern) {
i++;
}
}
return count % 2 == 0;
}
private static boolean isInComment(String context, int index) {
Matcher singleMatch = PATTERN_SINGLE_LINE.matcher(context);
while (singleMatch.find()) {
int start = singleMatch.start();
int end = singleMatch.end() - 1;
if (index > start && index <= end) {
return true;
}
}
Matcher multiMatch = PATTERN_MULTI_LINE.matcher(context);
while (multiMatch.find()) {
int start = multiMatch.start();
int end = multiMatch.end() - 1;
if (index > start && index < end) {
return true;
}
}
return false;
}
private static boolean isComment(String context) {
return true;
}
}

View file

@ -0,0 +1,122 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.flink.sql;
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment;
import org.apache.flink.table.api.scala.StreamTableEnvironment;
import org.apache.flink.types.Row;
import org.apache.flink.util.StringUtils;
import org.apache.zeppelin.flink.JobManager;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.tabledata.TableDataUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class UpdateStreamSqlJob extends AbstractStreamSqlJob {
private static Logger LOGGER = LoggerFactory.getLogger(UpdateStreamSqlJob.class);
private List<Row> materializedTable = new ArrayList<>();
private List<Row> lastSnapshot = new ArrayList<>();
public UpdateStreamSqlJob(StreamExecutionEnvironment senv,
StreamTableEnvironment stEnv,
JobManager jobManager,
InterpreterContext context,
int defaultParallelism) {
super(senv, stEnv, jobManager, context, defaultParallelism);
}
@Override
protected String getType() {
return "retract";
}
protected void processInsert(Row row) {
enableToRefresh = true;
resultLock.notify();
LOGGER.debug("processInsert: " + row.toString());
materializedTable.add(row);
}
protected void processDelete(Row row) {
enableToRefresh = false;
LOGGER.debug("processDelete: " + row.toString());
for (int i = 0; i < materializedTable.size(); i++) {
if (materializedTable.get(i).equals(row)) {
materializedTable.remove(i);
break;
}
}
}
@Override
protected String buildResult() {
StringBuilder builder = new StringBuilder();
builder.append("%table\n");
for (int i = 0; i < schema.getFieldCount(); ++i) {
String field = schema.getFieldNames()[i];
builder.append(field);
if (i != (schema.getFieldCount() - 1)) {
builder.append("\t");
}
}
builder.append("\n");
// sort it by the first column
materializedTable.sort((r1, r2) -> {
String f1 = TableDataUtils.normalizeColumn(StringUtils.arrayAwareToString(r1.getField(0)));
String f2 = TableDataUtils.normalizeColumn(StringUtils.arrayAwareToString(r2.getField(0)));
return f1.compareTo(f2);
});
for (Row row : materializedTable) {
for (int i = 0; i < row.getArity(); ++i) {
Object field = row.getField(i);
builder.append(TableDataUtils.normalizeColumn(StringUtils.arrayAwareToString(field)));
if (i != (row.getArity() - 1)) {
builder.append("\t");
}
}
builder.append("\n");
}
builder.append("\n%text\n");
return builder.toString();
}
@Override
protected void refresh(InterpreterContext context) {
context.out().clear();
try {
jobManager.sendFlinkJobUrl(context);
String result = buildResult();
LOGGER.debug(("Refresh with data: " + result));
context.out.write(result);
context.out.flush();
this.lastSnapshot.clear();
for (Row row : materializedTable) {
this.lastSnapshot.add(row);
}
} catch (IOException e) {
LOGGER.error("Fail to refresh data", e);
}
}
}

View file

@ -3,26 +3,48 @@
"group": "flink",
"name": "flink",
"className": "org.apache.zeppelin.flink.FlinkInterpreter",
"defaultInterpreter": true,
"properties": {
"FLINK_HOME": {
"envName": null,
"propertyName": null,
"defaultValue": "",
"description": "Location of flink distribution",
"type": "string"
},
"HADOOP_CONF_DIR": {
"envName": null,
"propertyName": null,
"defaultValue": "",
"description": "Location of hadoop conf (core-site.xml, hdfs-site.xml and etc.)",
"type": "string"
},
"HIVE_CONF_DIR": {
"envName": null,
"propertyName": null,
"defaultValue": "",
"description": "Location of hive conf (hive-site.xml)",
"type": "string"
},
"flink.execution.mode": {
"envName": null,
"propertyName": null,
"defaultValue": "local",
"description": "execution mode, it could be local/remote/yarn/k8s",
"description": "Execution mode, it could be local|remote|yarn",
"type": "string"
},
"flink.execution.remote.host": {
"envName": null,
"propertyName": null,
"defaultValue": "",
"description": "host name of running JobManager. Only used for remote mode",
"description": "Host name of running JobManager. Only used for remote mode",
"type": "string"
},
"flink.execution.remote.port": {
"envName": null,
"propertyName": null,
"defaultValue": "",
"description": "port of running JobManager. Only used for remote mode",
"description": "Port of running JobManager. Only used for remote mode",
"type": "number"
},
"flink.jm.memory": {
@ -39,13 +61,6 @@
"description": "Memory for TaskManager (mb)",
"type": "number"
},
"flink.tm.num": {
"envName": null,
"propertyName": null,
"defaultValue": "2",
"description": "Number of TaskManager",
"type": "number"
},
"flink.tm.slot": {
"envName": null,
"propertyName": null,
@ -53,6 +68,13 @@
"description": "Number of slot per TaskManager",
"type": "number"
},
"local.number-taskmanager": {
"envName": null,
"propertyName": null,
"defaultValue": "4",
"description": "Number of TaskManager in local mode",
"type": "number"
},
"flink.yarn.appName": {
"envName": null,
"propertyName": null,
@ -64,30 +86,58 @@
"envName": null,
"propertyName": null,
"defaultValue": "default",
"description": "yarn queue name",
"description": "Yarn queue name",
"type": "string"
},
"flink.yarn.jars": {
"flink.webui.yarn.useProxy": {
"envName": null,
"propertyName": null,
"defaultValue": false,
"description": "Whether use yarn proxy url as flink weburl, e.g. http://localhost:8088/proxy/application_1583396598068_0004",
"type": "checkbox"
},
"flink.udf.jars": {
"envName": null,
"propertyName": null,
"defaultValue": "",
"description": "additional user jars (comma separated)",
"description": "Flink udf jars (comma separated), Zeppelin will register udfs in this jar for user automatically",
"type": "string"
},
"flink.execution.jars": {
"envName": null,
"propertyName": null,
"defaultValue": "",
"description": "Additional user jars (comma separated)",
"type": "string"
},
"flink.execution.packages": {
"envName": null,
"propertyName": null,
"defaultValue": "",
"description": "Additional user packages (comma separated), e.g. flink connector packages",
"type": "string"
},
"zeppelin.flink.scala.color": {
"envName": null,
"propertyName": null,
"defaultValue": "true",
"description": "whether display scala shell output in colorful format",
"defaultValue": true,
"description": "Whether display scala shell output in colorful format",
"type": "checkbox"
},
"zeppelin.flink.enableHive": {
"envName": null,
"propertyName": null,
"defaultValue": "false",
"description": "whether enable hive",
"defaultValue": false,
"description": "Whether enable hive",
"type": "checkbox"
},
"zeppelin.flink.hive.version": {
"envName": null,
"propertyName": null,
"defaultValue": "2.3.4",
"description": "Hive version that you would like to connect",
"type": "string"
},
"zeppelin.flink.printREPLOutput": {
"envName": null,
"propertyName": "zeppelin.flink.printREPLOutput",
@ -99,22 +149,29 @@
"envName": "zeppelin.flink.maxResult",
"propertyName": "zeppelin.flink.maxResult",
"defaultValue": "1000",
"description": "max number of row returned by sql interpreter.",
"type": "number"
},
"zeppelin.flink.planner": {
"envName": "zeppelin.flink.planner",
"propertyName": "zeppelin.flink.planner",
"defaultValue": "blink",
"description": "planner or flink table api, blink or flink",
"description": "Max number of rows returned by sql interpreter.",
"type": "number"
},
"zeppelin.pyflink.python": {
"envName": "zeppelin.pyflink.python",
"propertyName": "zeppelin.pyflink.python",
"defaultValue": "python",
"description": "python executable for pyflink",
"description": "Python executable for pyflink",
"type": "string"
},
"flink.interpreter.close.shutdown_cluster": {
"envName": "flink.interpreter.close.shutdown_cluster",
"propertyName": "flink.interpreter.close.shutdown_cluster",
"defaultValue": true,
"description": "Whether shutdown flink cluster when close interpreter",
"type": "checkbox"
},
"zeppelin.interpreter.close.cancel_job": {
"envName": "zeppelin.interpreter.close.cancel_job",
"propertyName": "zeppelin.interpreter.close.cancel_job",
"defaultValue": true,
"description": "Whether cancel flink job when closing interpreter",
"type": "checkbox"
}
},
"editor": {
@ -130,7 +187,13 @@
"name": "bsql",
"className": "org.apache.zeppelin.flink.FlinkBatchSqlInterpreter",
"properties": {
"zeppelin.flink.concurrentBatchSql.max": {
"envName": "",
"propertyName": "zeppelin.flink.concurrentBatchSql.max",
"defaultValue": "10",
"description": "Max concurrent sql of Batch Sql",
"type": "number"
}
},
"editor": {
"language": "sql",
@ -143,7 +206,13 @@
"name": "ssql",
"className": "org.apache.zeppelin.flink.FlinkStreamSqlInterpreter",
"properties": {
"zeppelin.flink.concurrentStreamSql.max": {
"envName": "",
"propertyName": "zeppelin.flink.concurrentStreamSql.max",
"defaultValue": "10",
"description": "Max concurrent sql of Stream Sql",
"type": "number"
}
},
"editor": {
"language": "sql",
@ -160,7 +229,9 @@
},
"editor": {
"language": "python",
"editOnDblClick": false
"editOnDblClick": false,
"completionKey": "TAB",
"completionSupport": true
}
},
@ -173,7 +244,9 @@
},
"editor": {
"language": "python",
"editOnDblClick": false
"editOnDblClick": false,
"completionKey": "TAB",
"completionSupport": true
}
}
]

View file

@ -24,6 +24,7 @@ from pyflink.datastream import *
from pyflink.table import *
from pyflink.table.catalog import *
from pyflink.table.descriptors import *
from pyflink.table.udf import *
from pyflink.table.window import *
import pyflink
@ -45,20 +46,25 @@ pyflink.java_gateway.import_flink_view(gateway)
pyflink.java_gateway.install_exception_handler()
b_env = pyflink.dataset.ExecutionEnvironment(intp.getJavaExecutionEnvironment())
bt_env = BatchTableEnvironment.create(b_env)
bt_env = BatchTableEnvironment(intp.getJavaBatchTableEnvironment("blink"), True)
bt_env_2 = BatchTableEnvironment(intp.getJavaBatchTableEnvironment("flink"), False)
s_env = StreamExecutionEnvironment(intp.getJavaStreamExecutionEnvironment())
st_env = StreamTableEnvironment.create(s_env)
st_env = StreamTableEnvironment(intp.getJavaStreamTableEnvironment("blink"), True)
st_env_2 = StreamTableEnvironment(intp.getJavaStreamTableEnvironment("flink"), False)
class IPyFlinkZeppelinContext(PyZeppelinContext):
def __init__(self, z, gateway):
super(IPyFlinkZeppelinContext, self).__init__(z, gateway)
def show(self, obj):
def show(self, obj, **kwargs):
from pyflink.table import Table
if isinstance(obj, Table):
print(self.z.showData(obj._j_table))
if 'stream_type' in kwargs:
self.z.show(obj._j_table, kwargs['stream_type'], kwargs)
else:
print(self.z.showData(obj._j_table))
else:
super(IPyFlinkZeppelinContext, self).show(obj)
super(IPyFlinkZeppelinContext, self).show(obj, **kwargs)
z = __zeppelin__ = IPyFlinkZeppelinContext(intp.getZeppelinContext(), gateway)

View file

@ -22,6 +22,7 @@ from pyflink.table import *
from pyflink.table.catalog import *
from pyflink.table.descriptors import *
from pyflink.table.window import *
from pyflink.table.udf import *
import pyflink
@ -34,9 +35,11 @@ pyflink.java_gateway.import_flink_view(gateway)
pyflink.java_gateway.install_exception_handler()
b_env = pyflink.dataset.ExecutionEnvironment(intp.getJavaExecutionEnvironment())
bt_env = BatchTableEnvironment.create(b_env)
bt_env = BatchTableEnvironment(intp.getJavaBatchTableEnvironment("blink"), True)
bt_env_2 = BatchTableEnvironment(intp.getJavaBatchTableEnvironment("flink"), False)
s_env = StreamExecutionEnvironment(intp.getJavaStreamExecutionEnvironment())
st_env = StreamTableEnvironment.create(s_env)
st_env = StreamTableEnvironment(intp.getJavaStreamTableEnvironment("blink"), True)
st_env_2 = StreamTableEnvironment(intp.getJavaStreamTableEnvironment("flink"), False)
from zeppelin_context import PyZeppelinContext
@ -46,12 +49,15 @@ class PyFlinkZeppelinContext(PyZeppelinContext):
def __init__(self, z, gateway):
super(PyFlinkZeppelinContext, self).__init__(z, gateway)
def show(self, obj):
def show(self, obj, **kwargs):
from pyflink.table import Table
if isinstance(obj, Table):
print(self.z.showData(obj._j_table))
if 'stream_type' in kwargs:
self.z.show(obj._j_table, kwargs['stream_type'], kwargs)
else:
print(self.z.showData(obj._j_table))
else:
super(PyFlinkZeppelinContext, self).show(obj)
super(PyFlinkZeppelinContext, self).show(obj, **kwargs)
z = __zeppelin__ = PyFlinkZeppelinContext(intp.getZeppelinContext(), gateway)
__zeppelin__._setup_matplotlib()

View file

@ -1,41 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.flink
import org.apache.flink.table.api.{Table, TableEnvironment}
import org.apache.zeppelin.interpreter.{InterpreterContext, InterpreterResult}
class FlinkSQLScalaInterpreter(scalaInterpreter: FlinkScalaInterpreter,
z: FlinkZeppelinContext,
maxRow: Int) {
private var btenv: TableEnvironment = scalaInterpreter.getBatchTableEnvironment()
def interpret(code: String, context: InterpreterContext): InterpreterResult = {
try {
val table: Table = this.btenv.sqlQuery(code)
val result = z.showData(table)
return new InterpreterResult(InterpreterResult.Code.SUCCESS, result)
} catch {
case e: Exception =>
return new InterpreterResult(InterpreterResult.Code.ERROR,
"Fail to fetch result: " + e.getMessage)
}
}
}

View file

@ -18,22 +18,38 @@
package org.apache.zeppelin.flink
import java.io.{BufferedReader, File}
import java.net.URLClassLoader
import java.io.{BufferedReader, File, IOException}
import java.net.{URL, URLClassLoader}
import java.nio.file.Files
import java.util.Properties
import java.util.{Map, Properties}
import java.util.concurrent.TimeUnit
import java.util.jar.JarFile
import org.apache.flink.api.java.ScalaShellRemoteEnvironment
import org.apache.commons.lang3.StringUtils
import org.apache.flink.api.common.JobExecutionResult
import org.apache.flink.api.scala.FlinkShell.{ExecutionMode, _}
import org.apache.flink.api.scala.{ExecutionEnvironment, FlinkILoop}
import org.apache.flink.client.program.ClusterClient
import org.apache.flink.configuration._
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironmentFactory
import org.apache.flink.streaming.api.environment.{StreamExecutionEnvironment => JStreamExecutionEnvironment}
import org.apache.flink.core.execution.{JobClient, JobListener}
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.environment.{StreamExecutionEnvironmentFactory, StreamExecutionEnvironment => JStreamExecutionEnvironment}
import org.apache.flink.api.java.{ExecutionEnvironmentFactory, ExecutionEnvironment => JExecutionEnvironment}
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.table.api.{EnvironmentSettings, TableConfig, TableEnvironment}
import org.apache.flink.table.api.scala.internal.StreamTableEnvironmentImpl
import org.apache.flink.table.api.config.ExecutionConfigOptions
import org.apache.flink.table.api.java.internal.StreamTableEnvironmentImpl
import org.apache.flink.table.api.scala.{BatchTableEnvironment, StreamTableEnvironment}
import org.apache.flink.table.api.{EnvironmentSettings, TableConfig, TableEnvironment}
import org.apache.flink.table.catalog.{CatalogManager, FunctionCatalog, GenericInMemoryCatalog}
import org.apache.flink.table.catalog.hive.HiveCatalog
import org.apache.flink.table.functions.{AggregateFunction, ScalarFunction, TableAggregateFunction, TableFunction}
import org.apache.flink.table.module.ModuleManager
import org.apache.flink.table.module.hive.HiveModule
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.hadoop.yarn.api.records.ApplicationId
import org.apache.hadoop.yarn.client.api.YarnClient
import org.apache.hadoop.yarn.conf
import org.apache.hadoop.yarn.conf.YarnConfiguration
import org.apache.zeppelin.flink.util.DependencyUtils
import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion
import org.apache.zeppelin.interpreter.util.InterpreterOutputStream
@ -45,69 +61,96 @@ import scala.tools.nsc.Settings
import scala.tools.nsc.interpreter.Completion.ScalaCompleter
import scala.tools.nsc.interpreter.{JPrintWriter, SimpleReader}
/**
* It instantiate flink scala shell and create env, senv, btenv, stenv.
*
* @param properties
*/
class FlinkScalaInterpreter(val properties: Properties) {
lazy val LOGGER: Logger = LoggerFactory.getLogger(getClass)
private lazy val LOGGER: Logger = LoggerFactory.getLogger(getClass)
private var flinkILoop: FlinkILoop = _
private var cluster: types.ClusterType = _
private var cluster: Option[ClusterClient[_]] = _
private var scalaCompleter: ScalaCompleter = _
private val interpreterOutput = new InterpreterOutputStream(LOGGER)
private var configuration: Configuration = _
private var mode: org.apache.flink.api.scala.FlinkShell.ExecutionMode.Value = _
private var mode: ExecutionMode.Value = _
private var tblEnvFactory: TableEnvFactory = _
private var benv: ExecutionEnvironment = _
private var senv: StreamExecutionEnvironment = _
// TableEnvironment of blink planner
private var btenv: TableEnvironment = _
private var stenv: StreamTableEnvironment = _
private var btEnvSetting: EnvironmentSettings = _
private var stEnvSetting: EnvironmentSettings = _
// TableEnvironment of flink planner
private var btenv_2: BatchTableEnvironment = _
private var stenv_2: StreamTableEnvironment = _
// PyFlink depends on java version of TableEnvironment,
// so need to create java version of TableEnvironment
private var java_btenv: TableEnvironment = _
private var java_stenv: TableEnvironment = _
private var java_btenv_2: TableEnvironment = _
private var java_stenv_2: TableEnvironment = _
private var z: FlinkZeppelinContext = _
private var jmWebUrl: String = _
private var jobManager: JobManager = _
private var defaultParallelism = 1;
private var defaultSqlParallelism = 1;
private var userJars: Seq[String] = _
def open(): Unit = {
val flinkHome = properties.getProperty("FLINK_HOME", sys.env.getOrElse("FLINK_HOME", ""))
val flinkConfDir = properties.getProperty("FLINK_CONF_DIR", sys.env.getOrElse("FLINK_CONF_DIR", ""))
val hadoopConfDir = properties.getProperty("HADOOP_CONF_DIR", sys.env.getOrElse("HADOOP_CONF_DIR", ""))
val yarnConfDir = properties.getProperty("YARN_CONF_DIR", sys.env.getOrElse("YARN_CONF_DIR", ""))
val hiveConfDir = properties.getProperty("HIVE_CONF_DIR", sys.env.getOrElse("HIVE_CONF_DIR", ""))
LOGGER.info("FLINK_HOME: " + flinkHome)
LOGGER.info("FLINK_CONF_DIR: " + flinkConfDir)
LOGGER.info("HADOOP_CONF_DIR: " + hadoopConfDir)
LOGGER.info("YARN_CONF_DIR: " + yarnConfDir)
LOGGER.info("HIVE_CONF_DIR: " + hiveConfDir)
this.configuration = GlobalConfiguration.loadConfiguration(flinkConfDir)
mode = ExecutionMode.withName(
properties.getProperty("flink.execution.mode", "LOCAL").toUpperCase)
var config = Config(executionMode = mode)
val jmMemory = properties.getProperty("flink.jm.memory", "1024")
config = config.copy(yarnConfig =
Some(ensureYarnConfig(config)
.copy(jobManagerMemory = Some(jmMemory))))
if (mode == ExecutionMode.YARN) {
val jmMemory = properties.getProperty("flink.jm.memory", "1024")
config = config.copy(yarnConfig =
Some(ensureYarnConfig(config)
.copy(jobManagerMemory = Some(jmMemory))))
val tmMemory = properties.getProperty("flink.tm.memory", "1024")
config = config.copy(yarnConfig =
Some(ensureYarnConfig(config)
.copy(taskManagerMemory = Some(tmMemory))))
val tmMemory = properties.getProperty("flink.tm.memory", "1024")
config = config.copy(yarnConfig =
Some(ensureYarnConfig(config)
.copy(taskManagerMemory = Some(tmMemory))))
val appName = properties.getProperty("flink.yarn.appName", "Flink Yarn App Name")
config = config.copy(yarnConfig =
Some(ensureYarnConfig(config)
.copy(name = Some(appName))))
val tmNum = Integer.parseInt(properties.getProperty("flink.tm.num", "2"))
config = config.copy(yarnConfig =
Some(ensureYarnConfig(config)
.copy(containers = Some(tmNum))))
val slotNum = Integer.parseInt(properties.getProperty("flink.tm.slot", "1"))
config = config.copy(yarnConfig =
Some(ensureYarnConfig(config)
.copy(slots = Some(slotNum))))
this.configuration.setInteger("taskmanager.numberOfTaskSlots", slotNum)
val appName = properties.getProperty("flink.yarn.appName", "Flink Yarn App Name")
config = config.copy(yarnConfig =
Some(ensureYarnConfig(config)
.copy(name = Some(appName))))
val queue = (properties.getProperty("flink.yarn.queue", "default"))
config = config.copy(yarnConfig =
Some(ensureYarnConfig(config)
.copy(queue = Some(queue))))
val slotNum = Integer.parseInt(properties.getProperty("flink.tm.slot", "1"))
config = config.copy(yarnConfig =
Some(ensureYarnConfig(config)
.copy(slots = Some(slotNum))))
val queue = (properties.getProperty("flink.yarn.queue", "default"))
config = config.copy(yarnConfig =
Some(ensureYarnConfig(config)
.copy(queue = Some(queue))))
}
this.configuration = GlobalConfiguration.loadConfiguration(System.getenv("FLINK_CONF_DIR"))
val userJars = getUserJars
this.userJars = getUserJars
LOGGER.info("UserJars: " + userJars.mkString(","))
config = config.copy(externalJars = Some(userJars.toArray))
LOGGER.info("Config: " + config)
configuration.setString("flink.yarn.jars", userJars.mkString(":"))
@ -115,12 +158,16 @@ class FlinkScalaInterpreter(val properties: Properties) {
// load other configuration from interpreter properties
properties.asScala.foreach(entry => configuration.setString(entry._1, entry._2))
this.defaultParallelism = configuration.getInteger(CoreOptions.DEFAULT_PARALLELISM)
this.defaultSqlParallelism = configuration.getInteger(ExecutionConfigOptions.TABLE_EXEC_RESOURCE_DEFAULT_PARALLELISM)
LOGGER.info("Default Parallelism: " + this.defaultParallelism)
LOGGER.info("Default SQL Parallelism: " + this.defaultSqlParallelism)
// set scala.color
if (properties.getProperty("zeppelin.flink.scala.color", "true").toBoolean) {
System.setProperty("scala.color", "true")
}
// set host/port when it is remote mode
if (config.executionMode == ExecutionMode.REMOTE) {
val host = properties.getProperty("flink.execution.remote.host")
val port = properties.getProperty("flink.execution.remote.port")
@ -144,44 +191,65 @@ class FlinkScalaInterpreter(val properties: Properties) {
}
val (iLoop, cluster) = try {
val (host, port, cluster) = fetchConnectionInfo(configuration, config)
val conf = cluster match {
case Some(Left(_)) =>
LOGGER.info("Starting MiniCluster in legacy mode")
this.jmWebUrl = "http://localhost:" + port
configuration
case Some(Right(yarnCluster)) =>
yarnCluster.setDetached(false)
// yarn mode
LOGGER.info("Starting FlinkCluster in yarn mode")
this.jmWebUrl = yarnCluster.getWebInterfaceURL
yarnCluster.getFlinkConfiguration
val (effectiveConfig, cluster) = fetchConnectionInfo(config, configuration)
this.configuration = effectiveConfig
cluster match {
case Some(clusterClient) =>
// local mode or yarn
if (mode == ExecutionMode.LOCAL) {
LOGGER.info("Starting FlinkCluster in local mode")
this.jmWebUrl = clusterClient.getWebInterfaceURL
} else if (mode == ExecutionMode.YARN) {
LOGGER.info("Starting FlinkCluster in yarn mode")
if (properties.getProperty("flink.webui.yarn.useProxy", "false").toBoolean) {
val yarnAppId = clusterClient.getClusterId.asInstanceOf[ApplicationId]
val yarnClient = YarnClient.createYarnClient
val yarnConf = new YarnConfiguration()
// disable timeline service as we only query yarn app here.
// Otherwise we may hit this kind of ERROR:
// java.lang.ClassNotFoundException: com.sun.jersey.api.client.config.ClientConfig
yarnConf.set("yarn.timeline-service.enabled", "false")
yarnClient.init(yarnConf)
yarnClient.start()
val appReport = yarnClient.getApplicationReport(yarnAppId)
this.jmWebUrl = appReport.getTrackingUrl
} else {
this.jmWebUrl = clusterClient.getWebInterfaceURL
}
} else {
throw new Exception("Starting FlinkCluster in invalid mode: " + mode)
}
case None =>
// remote mode
LOGGER.info("Starting FlinkCluster in remote mode")
this.jmWebUrl = "http://" + host + ":" + port
configuration
LOGGER.info("Use FlinkCluster in remote mode")
this.jmWebUrl = "http://" + config.host.get + ":" + config.port.get
}
LOGGER.info(s"\nConnecting to Flink cluster (host: $host, port: $port).\n")
LOGGER.info(s"\nConnecting to Flink cluster: " + this.jmWebUrl)
LOGGER.info("externalJars: " +
config.externalJars.getOrElse(Array.empty[String]).mkString(":"))
val repl = new FlinkILoop(host, port, conf, config.externalJars, None, replOut)
(repl, cluster)
val classLoader = Thread.currentThread().getContextClassLoader
try {
// use FlinkClassLoader to initialize FlinkILoop, otherwise TableFactoryService could not find
// the TableFactory properly
Thread.currentThread().setContextClassLoader(getFlinkClassLoader)
val repl = new FlinkILoop(configuration, config.externalJars, None, replOut)
(repl, cluster)
} finally {
Thread.currentThread().setContextClassLoader(classLoader)
}
} catch {
case e: IllegalArgumentException =>
println(s"Error: ${e.getMessage}")
sys.exit()
}
LOGGER.info("JobManager address: " + this.jmWebUrl)
this.flinkILoop = iLoop
this.cluster = cluster
val settings = new Settings()
settings.usejavacp.value = true
settings.Yreplsync.value = true
settings.classpath.value = getUserJars.mkString(File.pathSeparator)
settings.classpath.value = userJars.mkString(File.pathSeparator)
val outputDir = Files.createTempDirectory("flink-repl");
val interpArguments = List(
@ -196,8 +264,6 @@ class FlinkScalaInterpreter(val properties: Properties) {
// set execution environment
flinkILoop.intp.bind("benv", flinkILoop.scalaBenv)
flinkILoop.intp.bind("senv", flinkILoop.scalaSenv)
flinkILoop.intp.bind("btenv", flinkILoop.scalaBTEnv)
flinkILoop.intp.bind("stenv", flinkILoop.scalaSTEnv)
}
val in0 = getField(flinkILoop, "scala$tools$nsc$interpreter$ILoop$$in0")
@ -213,29 +279,72 @@ class FlinkScalaInterpreter(val properties: Properties) {
this.benv = flinkILoop.scalaBenv
this.senv = flinkILoop.scalaSenv
LOGGER.info("Default Parallelism for flink: " +
configuration.getInteger(CoreOptions.DEFAULT_PARALLELISM))
val timeType = properties.getProperty("flink.senv.timecharacteristic", "EventTime")
this.senv.setStreamTimeCharacteristic(TimeCharacteristic.valueOf(timeType))
this.benv.setParallelism(configuration.getInteger(CoreOptions.DEFAULT_PARALLELISM))
this.senv.setParallelism(configuration.getInteger(CoreOptions.DEFAULT_PARALLELISM))
ScalaShellRemoteEnvironment.resetContextEnvironments()
setAsContext()
if (getPlanner == "flink") {
// flink planner
LOGGER.info("Use flink planner")
this.btenv = flinkILoop.scalaBTEnv
this.stenv = flinkILoop.scalaSTEnv
} else {
// blink planner
LOGGER.info("Use blink planner")
this.btEnvSetting = EnvironmentSettings.newInstance().inBatchMode().useBlinkPlanner().build()
this.btenv = TableEnvironment.create(this.btEnvSetting)
flinkILoop.intp.bind("btenv", this.btenv)
this.stEnvSetting =
val originalClassLoader = Thread.currentThread().getContextClassLoader
try {
Thread.currentThread().setContextClassLoader(getFlinkClassLoader)
val tblConfig = new TableConfig
tblConfig.getConfiguration.addAll(configuration)
// Step 1.1 Initialize the CatalogManager if required.
val catalogManager = new CatalogManager("default_catalog",
new GenericInMemoryCatalog("default_catalog", "default_database"));
// Step 1.2 Initialize the ModuleManager if required.
val moduleManager = new ModuleManager();
// Step 1.3 Initialize the FunctionCatalog if required.
val flinkFunctionCatalog = new FunctionCatalog(tblConfig, catalogManager, moduleManager);
val blinkFunctionCatalog = new FunctionCatalog(tblConfig, catalogManager, moduleManager);
this.tblEnvFactory = new TableEnvFactory(this.benv, this.senv, tblConfig,
catalogManager, moduleManager, flinkFunctionCatalog, blinkFunctionCatalog)
// blink planner
var btEnvSetting = EnvironmentSettings.newInstance().inBatchMode().useBlinkPlanner().build()
this.btenv = tblEnvFactory.createJavaBlinkBatchTableEnvironment(btEnvSetting);
flinkILoop.intp.bind("btenv", this.btenv.asInstanceOf[StreamTableEnvironmentImpl])
this.java_btenv = this.btenv
var stEnvSetting =
EnvironmentSettings.newInstance().inStreamingMode().useBlinkPlanner().build()
this.stenv = StreamTableEnvironmentImpl.create(this.senv, this.stEnvSetting, new TableConfig)
this.stenv = tblEnvFactory.createScalaBlinkStreamTableEnvironment(stEnvSetting)
flinkILoop.intp.bind("stenv", this.stenv)
this.java_stenv = tblEnvFactory.createJavaBlinkStreamTableEnvironment(stEnvSetting)
// flink planner
this.btenv_2 = tblEnvFactory.createScalaFlinkBatchTableEnvironment()
flinkILoop.intp.bind("btenv_2", this.btenv_2)
stEnvSetting =
EnvironmentSettings.newInstance().inStreamingMode().useOldPlanner().build()
this.stenv_2 = tblEnvFactory.createScalaFlinkStreamTableEnvironment(stEnvSetting)
flinkILoop.intp.bind("stenv_2", this.stenv_2)
this.java_btenv_2 = tblEnvFactory.createJavaFlinkBatchTableEnvironment()
btEnvSetting = EnvironmentSettings.newInstance.useOldPlanner.inStreamingMode.build
this.java_stenv_2 = tblEnvFactory.createJavaFlinkStreamTableEnvironment(btEnvSetting)
} finally {
Thread.currentThread().setContextClassLoader(originalClassLoader)
}
this.properties.asScala.filter(e => e._1.startsWith("table.exec"))
.foreach(e => {
this.btenv.getConfig.getConfiguration.setString(e._1, e._2)
this.java_btenv.getConfig.getConfiguration.setString(e._1, e._2)
this.stenv.getConfig.getConfiguration.setString(e._1, e._2)
this.java_stenv.getConfig.getConfiguration.setString(e._1, e._2)
})
// set python exec for PyFlink
val pythonExec = properties.getProperty("zeppelin.pyflink.python", "")
if (!StringUtils.isBlank(pythonExec)) {
this.stenv.getConfig.getConfiguration.setString("python.exec", pythonExec)
this.btenv.getConfig.getConfiguration.setString("python.exec", pythonExec)
this.java_btenv.getConfig.getConfiguration.setString("python.exec", pythonExec)
this.java_stenv.getConfig.getConfiguration.setString("python.exec", pythonExec)
}
if (java.lang.Boolean.parseBoolean(
@ -251,23 +360,63 @@ class FlinkScalaInterpreter(val properties: Properties) {
flinkILoop.interpret("import org.apache.flink.table.functions.AggregateFunction")
flinkILoop.interpret("import org.apache.flink.table.functions.TableFunction")
this.z = new FlinkZeppelinContext(this.btenv, new InterpreterHookRegistry(),
this.z = new FlinkZeppelinContext(this, new InterpreterHookRegistry(),
Integer.parseInt(properties.getProperty("zeppelin.flink.maxResult", "1000")))
val modifiers = new java.util.ArrayList[String]()
modifiers.add("@transient");
modifiers.add("@transient")
this.bind("z", z.getClass().getCanonicalName(), z, modifiers);
this.jobManager = new JobManager(this.benv, this.senv, this.z, jmWebUrl)
this.jobManager = new JobManager(this.z, jmWebUrl)
//register hive catalog
val jobListener = new JobListener {
override def onJobSubmitted(jobClient: JobClient, e: Throwable): Unit = {
if (e != null) {
LOGGER.warn("Fail to submit job")
} else {
if (InterpreterContext.get() == null) {
LOGGER.warn("Job {} is submitted but unable to associate this job to paragraph, " +
"as InterpreterContext is null", jobClient.getJobID)
} else {
LOGGER.info("Job {} is submitted for paragraph {}", Array(jobClient.getJobID,
InterpreterContext.get().getParagraphId): _ *)
jobManager.addJob(InterpreterContext.get(), jobClient)
if (jmWebUrl != null) {
jobManager.sendFlinkJobUrl(InterpreterContext.get());
} else {
LOGGER.error("Unable to link JobURL, because JobManager weburl is null")
}
}
}
}
override def onJobExecuted(jobExecutionResult: JobExecutionResult, e: Throwable): Unit = {
if (e != null) {
LOGGER.warn("Fail to execute job")
} else {
LOGGER.info("Job {} is executed with time {} seconds", jobExecutionResult.getJobID,
jobExecutionResult.getNetRuntime(TimeUnit.SECONDS))
}
if (InterpreterContext.get() != null) {
jobManager.removeJob(InterpreterContext.get().getParagraphId)
} else {
if (e == null) {
LOGGER.warn("Unable to remove this job {}, as InterpreterContext is null",
jobExecutionResult.getJobID)
}
}
}
}
this.benv.registerJobListener(jobListener)
this.senv.registerJobListener(jobListener)
//register hive catalog
if (properties.getProperty("zeppelin.flink.enableHive", "false").toBoolean) {
LOGGER.info("Hive is enabled, registering hive catalog.")
var hiveConfDir = System.getenv("HIVE_CONF_DIR")
val hiveConfDir =
properties.getOrDefault("HIVE_CONF_DIR", System.getenv("HIVE_CONF_DIR")).toString
if (hiveConfDir == null) {
hiveConfDir = properties.getProperty("HIVE_CONF_DIR")
}
if ( hiveConfDir == null) {
throw new InterpreterException("c is not specified");
throw new InterpreterException("HIVE_CONF_DIR is not specified");
}
val database = properties.getProperty("zeppelin.flink.hive.database", "default")
if (database == null) {
@ -277,25 +426,80 @@ class FlinkScalaInterpreter(val properties: Properties) {
val hiveVersion = properties.getProperty("zeppelin.flink.hive.version", "2.3.4")
val hiveCatalog = new HiveCatalog("hive", database, hiveConfDir, hiveVersion)
this.btenv.registerCatalog("hive", hiveCatalog)
this.stenv.registerCatalog("hive", hiveCatalog)
this.btenv.useCatalog("hive")
this.stenv.useCatalog("hive")
this.btenv.useDatabase("default")
this.stenv.useDatabase("default")
this.btenv.loadModule("hive", new HiveModule(hiveVersion))
} else {
LOGGER.info("Hive is disabled.")
}
// load udf jar
val udfJars = properties.getProperty("flink.udf.jars", "")
if (!StringUtils.isBlank(udfJars)) {
udfJars.split(",").foreach(jar => {
loadUDFJar(jar)
})
}
}
def loadUDFJar(jar: String): Unit = {
LOGGER.info("Loading UDF Jar: " + jar)
val jarFile = new JarFile(jar)
val entries = jarFile.entries
val urls = Array(new URL("jar:file:" + jar + "!/"))
val cl = new URLClassLoader(urls)
while (entries.hasMoreElements) {
val je = entries.nextElement
if (!je.isDirectory && je.getName.endsWith(".class") && !je.getName.contains("$")) {
try {
// -6 because of .class
var className = je.getName.substring(0, je.getName.length - 6)
className = className.replace('/', '.')
val c = cl.loadClass(className)
val udf = c.newInstance()
if (udf.isInstanceOf[ScalarFunction]) {
val scalarUDF = udf.asInstanceOf[ScalarFunction]
btenv.registerFunction(c.getSimpleName, scalarUDF)
} else if (udf.isInstanceOf[TableFunction[_]]) {
val tableUDF = udf.asInstanceOf[TableFunction[_]]
(btenv.asInstanceOf[StreamTableEnvironmentImpl]).registerFunction(c.getSimpleName, tableUDF)
} else if (udf.isInstanceOf[AggregateFunction[_,_]]) {
val aggregateUDF = udf.asInstanceOf[AggregateFunction[_,_]]
(btenv.asInstanceOf[StreamTableEnvironmentImpl]).registerFunction(c.getSimpleName, aggregateUDF)
} else if (udf.isInstanceOf[TableAggregateFunction[_,_]]) {
val tableAggregateUDF = udf.asInstanceOf[TableAggregateFunction[_,_]]
(btenv.asInstanceOf[StreamTableEnvironmentImpl]).registerFunction(c.getSimpleName, tableAggregateUDF)
} else {
LOGGER.warn("No UDF definition found in class file: " + je.getName)
}
} catch {
case e : Exception =>
LOGGER.info("Fail to inspect udf class: " + je.getName, e)
}
}
}
}
def setAsContext(): Unit = {
val factory = new StreamExecutionEnvironmentFactory() {
val streamFactory = new StreamExecutionEnvironmentFactory() {
override def createExecutionEnvironment = senv.getJavaEnv
}
//StreamExecutionEnvironment
val method = classOf[JStreamExecutionEnvironment].getDeclaredMethod("initializeContextEnvironment",
var method = classOf[JStreamExecutionEnvironment].getDeclaredMethod("initializeContextEnvironment",
classOf[StreamExecutionEnvironmentFactory])
method.setAccessible(true)
method.invoke(null, factory);
method.invoke(null, streamFactory);
val batchFactory = new ExecutionEnvironmentFactory() {
override def createExecutionEnvironment = benv.getJavaEnv
}
//StreamExecutionEnvironment
method = classOf[JExecutionEnvironment].getDeclaredMethod("initializeContextEnvironment",
classOf[ExecutionEnvironmentFactory])
method.setAccessible(true)
method.invoke(null, batchFactory);
}
// for use in java side
@ -344,9 +548,25 @@ class FlinkScalaInterpreter(val properties: Properties) {
field.get(obj)
}
def interpret(code: String, context: InterpreterContext): InterpreterResult = {
val originalOut = System.out
/**
* This is just a workaround to make table api work in multiple threads.
*/
def createPlannerAgain(): Unit = {
val originalClassLoader = Thread.currentThread().getContextClassLoader
try {
Thread.currentThread().setContextClassLoader(getFlinkClassLoader)
val stEnvSetting =
EnvironmentSettings.newInstance().inStreamingMode().useBlinkPlanner().build()
this.tblEnvFactory.createPlanner(stEnvSetting)
} finally {
Thread.currentThread().setContextClassLoader(originalClassLoader)
}
}
def interpret(code: String, context: InterpreterContext): InterpreterResult = {
createPlannerAgain()
val originalStdOut = System.out
val originalStdErr = System.err;
if (context != null) {
interpreterOutput.setInterpreterOutput(context.out)
context.out.clear()
@ -354,6 +574,7 @@ class FlinkScalaInterpreter(val properties: Properties) {
Console.withOut(if (context != null) context.out else Console.out) {
System.setOut(Console.out)
System.setErr(Console.out)
interpreterOutput.ignoreLeadingNewLinesFromScalaReporter()
// add print("") at the end in case the last line is comment which lead to INCOMPLETE
val lines = code.split("\\n") ++ List("print(\"\")")
@ -387,7 +608,8 @@ class FlinkScalaInterpreter(val properties: Properties) {
Console.flush()
interpreterOutput.setInterpreterOutput(null)
// reset the java stdout
System.setOut(originalOut)
System.setOut(originalStdOut)
System.setErr(originalStdErr)
return new InterpreterResult(lastStatus)
}
}
@ -401,60 +623,120 @@ class FlinkScalaInterpreter(val properties: Properties) {
}
def close(): Unit = {
if (cluster != null) {
cluster match {
case Some(Left(miniCluster)) =>
LOGGER.info("Shutdown LegacyMiniCluster")
miniCluster.close()
case Some(Right(yarnCluster)) =>
LOGGER.info("Shutdown YarnCluster")
yarnCluster.shutDownCluster()
yarnCluster.shutdown()
case e =>
LOGGER.error("Unrecognized cluster type: " + e.getClass.getSimpleName)
if (properties.getProperty("flink.interpreter.close.shutdown_cluster", "true").toBoolean) {
if (cluster != null) {
cluster match {
case Some(clusterClient) =>
LOGGER.info("Shutdown FlinkCluster")
clusterClient.shutDownCluster()
clusterClient.close()
// delete staging dir
if (mode == ExecutionMode.YARN) {
cleanupStagingDirInternal(clusterClient.getClusterId.asInstanceOf[ApplicationId])
}
case None =>
LOGGER.info("Don't close the Remote FlinkCluster")
}
}
} else {
LOGGER.info("Keep cluster alive when closing interpreter")
}
if (flinkILoop != null) {
flinkILoop.closeInterpreter()
flinkILoop = null
}
if (jobManager != null) {
jobManager.shutdown()
}
}
private def cleanupStagingDirInternal(appId: ApplicationId): Unit = {
try {
val fs = FileSystem.get(new org.apache.hadoop.conf.Configuration())
val stagingDirPath = new Path(fs.getHomeDirectory, ".flink/" + appId.toString)
if (fs.delete(stagingDirPath, true)) {
LOGGER.info(s"Deleted staging directory $stagingDirPath")
}
} catch {
case ioe: IOException =>
LOGGER.warn("Failed to cleanup staging dir", ioe)
}
}
def getExecutionEnvironment(): ExecutionEnvironment = this.benv
def getStreamExecutionEnvironment(): StreamExecutionEnvironment = this.senv
def getBatchTableEnvironment(): TableEnvironment = this.btenv
def getBatchTableEnvironment(planner: String = "blink"): TableEnvironment = {
if (planner == "blink")
this.btenv
else
this.btenv_2
}
def getStreamTableEnvionment(): StreamTableEnvironment = this.stenv
def getStreamTableEnvironment(planner: String = "blink"): StreamTableEnvironment = {
if (planner == "blink")
this.stenv
else
this.stenv_2
}
def getJavaBatchTableEnvironment(planner: String): TableEnvironment = {
if (planner == "blink") {
this.java_btenv
} else {
this.java_btenv_2
}
}
def getJavaStreamTableEnvironment(planner: String): TableEnvironment = {
if (planner == "blink") {
this.java_stenv
} else {
this.java_stenv_2
}
}
def getDefaultParallelism = this.defaultParallelism
def getDefaultSqlParallelism = this.defaultSqlParallelism
def getUserJars: Seq[String] = {
val flinkJars =
if (properties.containsKey("flink.execution.jars")) {
properties.getProperty("flink.execution.jars").split(":").toSeq
if (!StringUtils.isBlank(properties.getProperty("flink.execution.jars", ""))) {
properties.getProperty("flink.execution.jars").split(",").toSeq
} else {
Seq.empty[String]
}
val flinkUDFJars =
if (!StringUtils.isBlank(properties.getProperty("flink.udf.jars", ""))) {
properties.getProperty("flink.udf.jars").split(",").toSeq
} else {
Seq.empty[String]
}
val flinkPackageJars =
if (properties.containsKey("flink.execution.packages")) {
if (!StringUtils.isBlank(properties.getProperty("flink.execution.packages", ""))) {
val packages = properties.getProperty("flink.execution.packages")
DependencyUtils.resolveMavenDependencies(null, packages, null, null, None).split(":").toSeq
} else {
Seq.empty[String]
}
flinkJars ++ flinkPackageJars
flinkJars ++ flinkPackageJars ++ flinkUDFJars
}
def getJobManager = this.jobManager
def getFlinkScalaShellLoader: ClassLoader = {
val userCodeJarFile = this.flinkILoop.writeFilesToDisk();
new URLClassLoader(Array(userCodeJarFile.toURL))
new URLClassLoader(Array(userCodeJarFile.toURL) ++ userJars.map(e => new File(e).toURL))
}
private def getFlinkClassLoader: ClassLoader = {
new URLClassLoader(userJars.map(e => new File(e).toURL).toArray)
}
def getZeppelinContext = this.z
@ -465,10 +747,6 @@ class FlinkScalaInterpreter(val properties: Properties) {
def getFlinkILoop = flinkILoop
// use blink planner by default
def getPlanner = properties.getProperty("zeppelin.flink.planner", "blink")
def getStEnvSetting = stEnvSetting
}

View file

@ -18,17 +18,23 @@
package org.apache.zeppelin.flink
import java.io.IOException
import java.util.concurrent.atomic.AtomicInteger
import com.google.common.collect.Maps
import org.apache.flink.api.scala.DataSet
import org.apache.flink.streaming.api.scala._
import org.apache.flink.table.api.internal.TableImpl
import org.apache.flink.table.api.{Table, TableEnvironment}
import org.apache.flink.table.api.scala.BatchTableEnvironment
import org.apache.flink.table.api.{Table, TableEnvironment, TableUtils}
import org.apache.flink.table.api.scala.{BatchTableEnvironment, StreamTableEnvironment}
import org.apache.flink.types.Row
import org.apache.flink.util.StringUtils
import org.apache.zeppelin.annotation.ZeppelinApi
import org.apache.zeppelin.display.AngularObjectWatcher
import org.apache.zeppelin.display.ui.OptionInput.ParamOption
import org.apache.zeppelin.flink.util.TableUtil
import org.apache.zeppelin.interpreter.{BaseZeppelinContext, InterpreterContext, InterpreterHookRegistry, ResultMessages}
import org.apache.zeppelin.flink.sql.{AppendStreamSqlJob, SingleRowStreamSqlJob, UpdateStreamSqlJob}
import org.apache.zeppelin.interpreter.{InterpreterContext, InterpreterHookRegistry, ResultMessages, ZeppelinContext}
import org.apache.zeppelin.tabledata.TableDataUtils
import scala.collection.{JavaConversions, Seq}
@ -36,10 +42,11 @@ import scala.collection.{JavaConversions, Seq}
/**
* ZeppelinContext for Flink
*/
class FlinkZeppelinContext(val btenv: TableEnvironment,
class FlinkZeppelinContext(val flinkInterpreter: FlinkScalaInterpreter,
val hooks2: InterpreterHookRegistry,
val maxResult2: Int) extends BaseZeppelinContext(hooks2, maxResult2) {
val maxResult2: Int) extends ZeppelinContext(hooks2, maxResult2) {
private val SQL_INDEX = new AtomicInteger(0)
private var currentSql: String = _
private val interpreterClassMap = Map(
@ -62,61 +69,100 @@ class FlinkZeppelinContext(val btenv: TableEnvironment,
override def getInterpreterClassMap: _root_.java.util.Map[String, String] =
JavaConversions.mapAsJavaMap(interpreterClassMap)
override def showData(obj: Any, maxResult: Int): String = {
def showTable(columnsNames: Array[String], rows: Seq[Row]): String = {
val columnNames = obj.asInstanceOf[Table].getSchema.getFieldNames
val builder: StringBuilder = new StringBuilder("%table ")
builder.append(columnNames.mkString("\t"))
builder.append("\n")
val isLargerThanMaxResult = rows.size > maxResult
var displayRows = rows
if (isLargerThanMaxResult) {
displayRows = rows.take(maxResult)
}
for (row <- displayRows) {
var i = 0;
while (i < row.getArity) {
builder.append(row.getField(i))
i += 1
if (i != row.getArity) {
builder.append("\t");
}
private def showTable(columnsNames: Array[String], rows: Seq[Row]): String = {
val builder = new java.lang.StringBuilder("%table ")
builder.append(columnsNames.mkString("\t"))
builder.append("\n")
val isLargerThanMaxResult = rows.size > maxResult
var displayRows = rows
if (isLargerThanMaxResult) {
displayRows = rows.take(maxResult)
}
for (row <- displayRows) {
var i = 0;
while (i < row.getArity) {
// expand array if the column is array
builder.append(TableDataUtils.normalizeColumn(StringUtils.arrayAwareToString(row.getField(i))))
i += 1
if (i != row.getArity) {
builder.append("\t");
}
builder.append("\n")
}
if (isLargerThanMaxResult) {
builder.append("\n")
builder.append(ResultMessages.getExceedsLimitRowsMessage(maxResult, "zeppelin.spark.maxResult"))
}
// append %text at the end, otherwise the following output will be put in table as well.
builder.append("\n%text ")
builder.toString()
builder.append("\n")
}
if (isLargerThanMaxResult) {
builder.append("\n")
builder.append(ResultMessages.getExceedsLimitRowsMessage(maxResult, "zeppelin.spark.maxResult"))
}
// append %text at the end, otherwise the following output will be put in table as well.
builder.append("\n%text ")
builder.toString()
}
override def showData(obj: Any, maxResult: Int): String = {
if (obj.isInstanceOf[DataSet[_]]) {
val ds = obj.asInstanceOf[DataSet[_]]
val env = btenv.asInstanceOf[BatchTableEnvironment]
val table = env.fromDataSet(ds)
val btenv = flinkInterpreter.getBatchTableEnvironment("flink").asInstanceOf[BatchTableEnvironment]
val table = btenv.fromDataSet(ds)
val columnNames: Array[String] = table.getSchema.getFieldNames
val dsRows: DataSet[Row] = env.toDataSet[Row](table)
val dsRows: DataSet[Row] = btenv.toDataSet[Row](table)
showTable(columnNames, dsRows.first(maxResult + 1).collect())
} else if (obj.isInstanceOf[Table]) {
if (btenv.isInstanceOf[BatchTableEnvironment]) {
val table = obj.asInstanceOf[Table]
val columnNames: Array[String] = table.getSchema.getFieldNames
val dsRows: DataSet[Row] = btenv.asInstanceOf[BatchTableEnvironment].toDataSet[Row](table)
showTable(columnNames, dsRows.first(maxResult + 1).collect())
} else {
var rows = TableUtil.collect(obj.asInstanceOf[TableImpl], currentSql)
val columnNames = obj.asInstanceOf[Table].getSchema.getFieldNames
showTable(columnNames, rows)
}
val rows = JavaConversions.asScalaBuffer(TableUtils.collectToList(obj.asInstanceOf[TableImpl])).toSeq
val columnNames = obj.asInstanceOf[Table].getSchema.getFieldNames
showTable(columnNames, rows)
} else {
obj.toString
}
}
def showFlinkTable(table: Table): String = {
val columnNames: Array[String] = table.getSchema.getFieldNames
val dsRows: DataSet[Row] = flinkInterpreter.getJavaBatchTableEnvironment("flink")
.asInstanceOf[BatchTableEnvironment].toDataSet[Row](table)
showTable(columnNames, dsRows.first(maxResult + 1).collect())
}
def showBlinkTable(table: Table): String = {
val rows = JavaConversions.asScalaBuffer(TableUtils.collectToList(table.asInstanceOf[TableImpl])).toSeq
val columnNames = table.getSchema.getFieldNames
showTable(columnNames, rows)
}
def show(table: Table, streamType: String, configs: Map[String, String] = Map.empty): Unit = {
val stenv = flinkInterpreter.getStreamTableEnvironment()
val context = InterpreterContext.get()
configs.foreach(e => context.getLocalProperties.put(e._1, e._2))
val tableName = "UnnamedTable_" + context.getParagraphId.replace("-", "_") + "_" + SQL_INDEX.getAndIncrement()
if (streamType.equalsIgnoreCase("single")) {
val streamJob = new SingleRowStreamSqlJob(flinkInterpreter.getStreamExecutionEnvironment,
stenv, flinkInterpreter.getJobManager, context, flinkInterpreter.getDefaultParallelism)
streamJob.run(table, tableName)
}
else if (streamType.equalsIgnoreCase("append")) {
val streamJob = new AppendStreamSqlJob(flinkInterpreter.getStreamExecutionEnvironment,
stenv, flinkInterpreter.getJobManager, context, flinkInterpreter.getDefaultParallelism)
streamJob.run(table, tableName)
}
else if (streamType.equalsIgnoreCase("update")) {
val streamJob = new UpdateStreamSqlJob(flinkInterpreter.getStreamExecutionEnvironment,
stenv, flinkInterpreter.getJobManager, context, flinkInterpreter.getDefaultParallelism)
streamJob.run(table, tableName)
}
else throw new IOException("Unrecognized stream type: " + streamType)
}
/**
* Called by python
* @param table
* @param streamType
* @param configs
*/
def show(table: Table, streamType: String, configs: java.util.Map[String, String]): Unit = {
show(table, streamType, JavaConversions.mapAsScalaMap(configs).toMap)
}
@ZeppelinApi
def select(name: String, options: Seq[(Any, String)]): Any = select(name, null, options)

View file

@ -1,40 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.flink
import org.apache.flink.runtime.jobgraph.JobGraph
import org.slf4j.{Logger, LoggerFactory}
class SqlJobRunner(cluster: types.ClusterType,
jobGraph: JobGraph,
jobName: String,
classLoader: ClassLoader) {
lazy val LOGGER: Logger = LoggerFactory.getLogger(getClass)
def run(): Unit = {
cluster match {
case Some(Left(miniCluster)) =>
miniCluster.submitJob(jobGraph)
case Some(Right(yarnCluster)) =>
yarnCluster.submitJob(jobGraph, Thread.currentThread().getContextClassLoader)
case None =>
LOGGER.error("Unable to run SqlJobRunner")
}
}
}

View file

@ -1,26 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.flink
import org.apache.flink.client.program.ClusterClient
import org.apache.flink.runtime.minicluster.MiniCluster
object types {
type ClusterType = Option[Either[MiniCluster, ClusterClient[_]]]
}

View file

@ -1,135 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.flink.util
import _root_.java.util.{UUID, ArrayList => JArrayList}
import org.apache.calcite.rel.RelNode
import org.apache.flink.api.common.accumulators.SerializedListAccumulator
import org.apache.flink.api.common.typeinfo.TypeInformation
import org.apache.flink.api.common.typeutils.TypeSerializer
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment
import org.apache.flink.table.api.internal.{TableEnvironmentImpl, TableImpl}
import org.apache.flink.table.api.{Table, TableEnvironment}
import org.apache.flink.table.planner.calcite.{FlinkRelBuilder, FlinkTypeFactory}
import org.apache.flink.table.planner.delegation.PlannerBase
import org.apache.flink.table.planner.plan.schema.TimeIndicatorRelDataType
import org.apache.flink.table.planner.sinks.{CollectRowTableSink, CollectTableSink}
import org.apache.flink.table.runtime.types.TypeInfoLogicalTypeConverter
import org.apache.flink.table.types.logical.TimestampType
import org.apache.flink.table.types.utils.TypeConversions.fromDataTypeToLegacyInfo
import org.apache.flink.types.Row
import org.apache.flink.util.AbstractID
import _root_.scala.collection.JavaConversions._
import _root_.scala.collection.JavaConverters._
object TableUtil {
/**
* Returns an collection that contains all rows in this Table.
*
* Note: The difference between print() and collect() is
* - print() prints data on workers and collect() collects data to the client.
* - You have to call TableEnvironment.execute() to run the job for print(), while collect()
* calls execute automatically.
*/
def collect(table: TableImpl): Seq[Row] = collectSink(table, new CollectRowTableSink, None)
def collect(table: TableImpl, jobName: String): Seq[Row] =
collectSink(table, new CollectRowTableSink, Option.apply(jobName))
def collectAsT[T](table: TableImpl, t: TypeInformation[_], jobName: String = null): Seq[T] =
collectSink(
table,
new CollectTableSink(_ => t.asInstanceOf[TypeInformation[T]]), Option(jobName))
def collectSink[T](
table: TableImpl, sink: CollectTableSink[T], jobName: Option[String] = None): Seq[T] = {
// get schema information of table
val relNode = toRelNode(table)
val rowType = relNode.getRowType
val fieldNames = rowType.getFieldNames.asScala.toArray
val fieldTypes = rowType.getFieldList.map { field =>
val `type` = field.getType match {
// converts `TIME ATTRIBUTE(ROWTIME)`/`TIME ATTRIBUTE(PROCTIME)` to `TIMESTAMP(3)` for sink
case _: TimeIndicatorRelDataType =>
relNode.getCluster
.getTypeFactory.asInstanceOf[FlinkTypeFactory]
.createFieldTypeFromLogicalType(new TimestampType(false, 3))
case t => t
}
FlinkTypeFactory.toLogicalType(`type`)
}.toArray
val configuredSink = sink.configure(
fieldNames, fieldTypes.map(TypeInfoLogicalTypeConverter.fromLogicalTypeToTypeInfo))
collect(table.getTableEnvironment,
table, configuredSink.asInstanceOf[CollectTableSink[T]], jobName)
}
/**
* Converts operation tree in the given table to a RelNode tree.
*/
def toRelNode(table: Table): RelNode = {
val plannerBase = table.asInstanceOf[TableImpl]
.getTableEnvironment.asInstanceOf[TableEnvironmentImpl]
.getPlanner.asInstanceOf[PlannerBase]
val method = classOf[PlannerBase].getMethod("getRelBuilder")
method.setAccessible(true)
method.invoke(plannerBase).asInstanceOf[FlinkRelBuilder]
.queryOperation(table.getQueryOperation).build()
}
def collect[T](
tEnv: TableEnvironment,
table: Table,
sink: CollectTableSink[T],
jobName: Option[String]): Seq[T] = {
val method = classOf[PlannerBase].getMethod("getExecEnv")
method.setAccessible(true)
val execEnv = method.invoke(tEnv.asInstanceOf[TableEnvironmentImpl]
.getPlanner).asInstanceOf[StreamExecutionEnvironment]
val typeSerializer = fromDataTypeToLegacyInfo(sink.getConsumedDataType)
.asInstanceOf[TypeInformation[T]]
.createSerializer(execEnv.getConfig)
val id = new AbstractID().toString
sink.init(typeSerializer.asInstanceOf[TypeSerializer[T]], id)
val sinkName = UUID.randomUUID().toString
// workaround, otherwise it won't find the sink properly
val originalCatalog = tEnv.getCurrentCatalog
val originalDatabase = tEnv.getCurrentDatabase
try {
tEnv.useCatalog("default_catalog")
tEnv.useDatabase("default_database")
tEnv.registerTableSink(sinkName, sink)
tEnv.insertInto(table, sinkName)
val res = tEnv.execute(jobName.getOrElse("sql collect job"))
val accResult: JArrayList[Array[Byte]] = res.getAccumulatorResult(id)
SerializedListAccumulator.deserializeList(accResult, typeSerializer)
} finally {
tEnv.useCatalog(originalCatalog)
tEnv.useDatabase(originalDatabase)
}
}
}

View file

@ -1,118 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.flink;
import org.apache.zeppelin.interpreter.InterpreterException;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.junit.Test;
import java.io.IOException;
import java.util.Properties;
import static org.junit.Assert.assertEquals;
public abstract class BatchSqlInterpreterTest extends FlinkSqlInterpreterTest {
protected abstract String getPlanner();
@Override
protected Properties getFlinkProperties() throws IOException {
Properties p = super.getFlinkProperties();
p.setProperty("zeppelin.flink.planner", getPlanner());
return p;
}
@Override
protected FlinkSqlInterrpeter createFlinkSqlInterpreter(Properties properties) {
return new FlinkBatchSqlInterpreter(properties);
}
@Test
public void testBatchSQL() throws InterpreterException {
if (getPlanner().equals("blink")) {
return;
}
InterpreterResult result = flinkInterpreter.interpret(
"val ds = benv.fromElements((1, \"jeff\"), (2, \"andy\"))", getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
result = flinkInterpreter
.interpret("btenv.registerDataSet(\"table_1\", ds, 'a, 'b)",
getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
sqlInterpreter.flinkInterpreter.getBatchTableEnvironment().useCatalog("default_catalog");
sqlInterpreter.flinkInterpreter.getBatchTableEnvironment().useDatabase("default_database");
result = sqlInterpreter.interpret("select * from default_catalog.default_database.table_1",
getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals("a\tb\n" +
"1\tjeff\n" +
"2\tandy\n", appendOutput);
}
//@Test
public void testHiveTable() throws InterpreterException {
// hiveShell.execute("create table hive_table (id int, name string)");
InterpreterResult result = sqlInterpreter.interpret(
"select * from hive_table",
getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
}
//@Test
public void testInsertInto() throws InterpreterException {
if (getPlanner().equals("flink")) {
return;
}
// hiveShell.execute("create table table_inserted (id int, name string)");
// hiveShell.executeQuery("show tables");
InterpreterResult result = flinkInterpreter.interpret(
"val ds = benv.fromElements((1, \"jeff\"), (2, \"andy\"))", getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
result = flinkInterpreter
.interpret("btenv.registerDataSet(\"table_2\", ds, 'a, 'b)",
getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
result = sqlInterpreter.interpret(
"insert into table_inserted select * from default_catalog.default_database.table_2",
getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
}
//@Test
public void testUDF() throws InterpreterException {
InterpreterResult result = flinkInterpreter.interpret(
"class AddOne extends ScalarFunction {\n" +
" def eval(a: Int): Int = a + 1\n" +
"}", getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
result = flinkInterpreter.interpret("btenv.registerFunction(\"addOne\", new $AddOne())",
getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
result = sqlInterpreter.interpret("INSERT INTO dest SELECT addOne(int_col) FROM source",
getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
}
}

View file

@ -18,10 +18,376 @@
package org.apache.zeppelin.flink;
public class FlinkBatchSqlInterpreterTest extends BatchSqlInterpreterTest {
import org.apache.commons.io.FileUtils;
import org.apache.flink.table.api.config.ExecutionConfigOptions;
import org.apache.flink.table.api.config.OptimizerConfigOptions;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterException;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.InterpreterResultMessage;
import org.junit.Test;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.util.List;
import java.util.Properties;
import static junit.framework.TestCase.assertTrue;
import static org.junit.Assert.assertEquals;
public class FlinkBatchSqlInterpreterTest extends SqlInterpreterTest {
@Override
protected String getPlanner() {
return "flink";
protected FlinkSqlInterrpeter createFlinkSqlInterpreter(Properties properties) {
return new FlinkBatchSqlInterpreter(properties);
}
@Test
public void testSelect() throws InterpreterException, IOException {
hiveShell.execute("create table source_table (id int, name string)");
hiveShell.execute("insert into source_table values(1, 'a'), (2, 'b')");
// verify select from
InterpreterContext context = getInterpreterContext();
InterpreterResult result =
sqlInterpreter.interpret("show tables", context);
List<InterpreterResultMessage> resultMessages = context.out.toInterpreterResultMessage();
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals(1, resultMessages.size());
assertEquals(InterpreterResult.Type.TABLE, resultMessages.get(0).getType());
assertEquals(resultMessages.get(0).toString(),
"table\nsource_table\n", resultMessages.get(0).getData());
// verify select from
context = getInterpreterContext();
result =
sqlInterpreter.interpret("select * from source_table", context);
resultMessages = context.out.toInterpreterResultMessage();
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals(1, resultMessages.size());
assertEquals(InterpreterResult.Type.TABLE, resultMessages.get(0).getType());
assertEquals("id\tname\n1\ta\n2\tb\n", resultMessages.get(0).getData());
// define scala udf
result = flinkInterpreter.interpret(
"class AddOne extends ScalarFunction {\n" +
" def eval(a: Int): Int = a + 1\n" +
"}", getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
result = flinkInterpreter.interpret("btenv.registerFunction(\"addOne\", new AddOne())",
getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
// select which use scala udf
context = getInterpreterContext();
result = sqlInterpreter.interpret("SELECT addOne(id) as add_one FROM source_table", context);
assertEquals(new String(context.out.toByteArray()), InterpreterResult.Code.SUCCESS, result.code());
resultMessages = context.out.toInterpreterResultMessage();
assertEquals(1, resultMessages.size());
assertEquals(InterpreterResult.Type.TABLE, resultMessages.get(0).getType());
assertEquals("add_one\n2\n3\n", resultMessages.get(0).getData());
// define python udf via PyFlinkInterpreter
result = pyFlinkInterpreter.interpret(
"class PythonUpper(ScalarFunction):\n" +
" def eval(self, s):\n" +
" return s.upper()", getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
result = pyFlinkInterpreter.interpret("bt_env.register_function(\"python_upper\", " +
"udf(PythonUpper(), DataTypes.STRING(), DataTypes.STRING()))",
getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
resultMessages = context.out.toInterpreterResultMessage();
assertEquals(1, resultMessages.size());
assertEquals(InterpreterResult.Type.TABLE, resultMessages.get(0).getType());
assertEquals("add_one\n2\n3\n", resultMessages.get(0).getData());
// select which use python udf
context = getInterpreterContext();
result = sqlInterpreter.interpret("SELECT python_upper(name) as name FROM source_table", context);
assertEquals(new String(context.out.toByteArray()), InterpreterResult.Code.SUCCESS, result.code());
resultMessages = context.out.toInterpreterResultMessage();
assertEquals(1, resultMessages.size());
assertEquals(InterpreterResult.Type.TABLE, resultMessages.get(0).getType());
assertEquals("name\nA\nB\n", resultMessages.get(0).getData());
// define python udf via IPyFlinkInterpreter
result = iPyFlinkInterpreter.interpret(
"class IPythonUpper(ScalarFunction):\n" +
" def eval(self, s):\n" +
" return s.upper()", getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
result = iPyFlinkInterpreter.interpret("bt_env.register_function(\"ipython_upper\", " +
"udf(IPythonUpper(), DataTypes.STRING(), DataTypes.STRING()))",
getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
// select which use python udf
context = getInterpreterContext();
result = sqlInterpreter.interpret("SELECT ipython_upper(name) as name FROM source_table", context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
resultMessages = context.out.toInterpreterResultMessage();
assertEquals(1, resultMessages.size());
assertEquals(InterpreterResult.Type.TABLE, resultMessages.get(0).getType());
assertEquals("name\nA\nB\n", resultMessages.get(0).getData());
// after these select queries, `show tables` should still show only one source table,
// other temporary tables should not be displayed.
context = getInterpreterContext();
result = sqlInterpreter.interpret("show tables", context);
resultMessages = context.out.toInterpreterResultMessage();
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals(1, resultMessages.size());
assertEquals(InterpreterResult.Type.TABLE, resultMessages.get(0).getType());
assertEquals(resultMessages.get(0).toString(),
"table\nsource_table\n", resultMessages.get(0).getData());
}
@Test
public void testInsertInto() throws InterpreterException, IOException {
hiveShell.execute("create table source_table (id int, name string)");
hiveShell.execute("insert into source_table values(1, 'a'), (2, 'b')");
File destDir = Files.createTempDirectory("flink_test").toFile();
FileUtils.deleteDirectory(destDir);
InterpreterResult result = sqlInterpreter.interpret(
"CREATE TABLE sink_table (\n" +
"id int,\n" +
"name string" +
") WITH (\n" +
"'format.field-delimiter'=',',\n" +
"'connector.type'='filesystem',\n" +
"'format.derive-schema'='true',\n" +
"'connector.path'='" + destDir.getAbsolutePath() + "',\n" +
"'format.type'='csv'\n" +
");", getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
// insert into
InterpreterContext context = getInterpreterContext();
result = sqlInterpreter.interpret(
"insert into sink_table select * from source_table", context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
List<InterpreterResultMessage> resultMessages = context.out.toInterpreterResultMessage();
assertEquals("Insertion successfully.\n", resultMessages.get(0).getData());
// verify insert into via select from sink_table
context = getInterpreterContext();
result = sqlInterpreter.interpret("select * from sink_table", context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
resultMessages = context.out.toInterpreterResultMessage();
assertEquals("id\tname\n1\ta\n2\tb\n", resultMessages.get(0).getData());
// insert into again will fail
context = getInterpreterContext();
result = sqlInterpreter.interpret(
"insert into sink_table select * from source_table", context);
assertEquals(InterpreterResult.Code.ERROR, result.code());
resultMessages = context.out.toInterpreterResultMessage();
assertTrue(resultMessages.get(0).getData(),
resultMessages.get(0).getData().contains("already exists"));
// insert overwrite into
// context = getInterpreterContext();
// result = sqlInterpreter.interpret(
// "insert overwrite dest_table select id + 1, name from source_table", context);
// assertEquals(InterpreterResult.Code.SUCCESS, result.code());
// resultMessages = context.out.toInterpreterResultMessage();
// assertEquals("Insertion successfully.\n", resultMessages.get(0).getData());
//
// // verify insert into via select from the dest_table
// context = getInterpreterContext();
// result = sqlInterpreter.interpret(
// "select * from dest_table", context);
// assertEquals(InterpreterResult.Code.SUCCESS, result.code());
// resultMessages = context.out.toInterpreterResultMessage();
// assertEquals("id\tname\n2\ta\n3\tb\n", resultMessages.get(0).getData());
//
// // define scala udf
// result = flinkInterpreter.interpret(
// "class AddOne extends ScalarFunction {\n" +
// " def eval(a: Int): Int = a + 1\n" +
// "}", getInterpreterContext());
// assertEquals(InterpreterResult.Code.SUCCESS, result.code());
//
// result = flinkInterpreter.interpret("btenv.registerFunction(\"addOne\", new AddOne())",
// getInterpreterContext());
// assertEquals(InterpreterResult.Code.SUCCESS, result.code());
//
// // insert into dest_table2 using udf
// destDir = Files.createTempDirectory("flink_test").toFile();
// FileUtils.deleteDirectory(destDir);
// result = sqlInterpreter.interpret(
// "CREATE TABLE dest_table2 (\n" +
// "id int,\n" +
// "name string" +
// ") WITH (\n" +
// "'format.field-delimiter'=',',\n" +
// "'connector.type'='filesystem',\n" +
// "'format.derive-schema'='true',\n" +
// "'connector.path'='" + destDir.getAbsolutePath() + "',\n" +
// "'format.type'='csv'\n" +
// ");", getInterpreterContext());
// assertEquals(InterpreterResult.Code.SUCCESS, result.code());
//
// context = getInterpreterContext();
// result = sqlInterpreter.interpret(
// "insert into dest_table2 select addOne(id), name from source_table", context);
// assertEquals(InterpreterResult.Code.SUCCESS, result.code());
// resultMessages = context.out.toInterpreterResultMessage();
// assertEquals("Insertion successfully.\n", resultMessages.get(0).getData());
//
// // verify insert into via select from the dest table
// context = getInterpreterContext();
// result = sqlInterpreter.interpret(
// "select * from dest_table2", context);
// assertEquals(InterpreterResult.Code.SUCCESS, result.code());
// resultMessages = context.out.toInterpreterResultMessage();
// assertEquals("id\tname\n2\ta\n3\tb\n", resultMessages.get(0).getData());
}
@Test
public void testSetTableConfig() throws InterpreterException, IOException {
hiveShell.execute("create table source_table (id int, name string)");
hiveShell.execute("insert into source_table values(1, 'a'), (2, 'b')");
File destDir = Files.createTempDirectory("flink_test").toFile();
FileUtils.deleteDirectory(destDir);
InterpreterResult result = sqlInterpreter.interpret(
"CREATE TABLE sink_table (\n" +
"id int,\n" +
"name string" +
") WITH (\n" +
"'format.field-delimiter'=',',\n" +
"'connector.type'='filesystem',\n" +
"'format.derive-schema'='true',\n" +
"'connector.path'='" + destDir.getAbsolutePath() + "',\n" +
"'format.type'='csv'\n" +
");", getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
// set parallelism then insert into
InterpreterContext context = getInterpreterContext();
result = sqlInterpreter.interpret(
"set table.exec.resource.default-parallelism=10;" +
"insert into sink_table select * from source_table", context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
List<InterpreterResultMessage> resultMessages = context.out.toInterpreterResultMessage();
assertEquals("Insertion successfully.\n", resultMessages.get(0).getData());
assertEquals(ExecutionConfigOptions.TABLE_EXEC_RESOURCE_DEFAULT_PARALLELISM.defaultValue(),
sqlInterpreter.tbenv.getConfig().getConfiguration().get(ExecutionConfigOptions.TABLE_EXEC_RESOURCE_DEFAULT_PARALLELISM));
// set then insert into
destDir.delete();
context = getInterpreterContext();
result = sqlInterpreter.interpret(
"set table.optimizer.source.predicate-pushdown-enabled=false;" +
"insert into sink_table select * from source_table", context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
resultMessages = context.out.toInterpreterResultMessage();
assertEquals("Insertion successfully.\n", resultMessages.get(0).getData());
assertEquals(ExecutionConfigOptions.TABLE_EXEC_RESOURCE_DEFAULT_PARALLELISM.defaultValue(),
sqlInterpreter.tbenv.getConfig().getConfiguration().get(ExecutionConfigOptions.TABLE_EXEC_RESOURCE_DEFAULT_PARALLELISM));
assertEquals(OptimizerConfigOptions.TABLE_OPTIMIZER_SOURCE_PREDICATE_PUSHDOWN_ENABLED.defaultValue(),
sqlInterpreter.tbenv.getConfig().getConfiguration().get(OptimizerConfigOptions.TABLE_OPTIMIZER_SOURCE_PREDICATE_PUSHDOWN_ENABLED));
// invalid config
destDir.delete();
context = getInterpreterContext();
result = sqlInterpreter.interpret(
"set table.invalid_config=false;" +
"insert into sink_table select * from source_table", context);
assertEquals(InterpreterResult.Code.ERROR, result.code());
resultMessages = context.out.toInterpreterResultMessage();
assertTrue(resultMessages.get(0).getData(),
resultMessages.get(0).getData().contains("table.invalid_config is not a valid table/sql config"));
}
@Test
public void testMultipleInsertInto() throws InterpreterException, IOException {
hiveShell.execute("create table source_table (id int, name string)");
hiveShell.execute("insert into source_table values(1, 'a'), (2, 'b')");
File destDir = Files.createTempDirectory("flink_test").toFile();
FileUtils.deleteDirectory(destDir);
InterpreterResult result = sqlInterpreter.interpret(
"CREATE TABLE sink_table (\n" +
"id int,\n" +
"name string" +
") WITH (\n" +
"'format.field-delimiter'=',',\n" +
"'connector.type'='filesystem',\n" +
"'format.derive-schema'='true',\n" +
"'connector.path'='" + destDir.getAbsolutePath() + "',\n" +
"'format.type'='csv'\n" +
");", getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
File destDir2 = Files.createTempDirectory("flink_test").toFile();
FileUtils.deleteDirectory(destDir2);
result = sqlInterpreter.interpret(
"CREATE TABLE sink_table2 (\n" +
"id int,\n" +
"name string" +
") WITH (\n" +
"'format.field-delimiter'=',',\n" +
"'connector.type'='filesystem',\n" +
"'format.derive-schema'='true',\n" +
"'connector.path'='" + destDir2.getAbsolutePath() + "',\n" +
"'format.type'='csv'\n" +
");", getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
// insert into
InterpreterContext context = getInterpreterContext();
result = sqlInterpreter.interpret(
"insert into sink_table select * from source_table;insert into sink_table2 select * from source_table", context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
List<InterpreterResultMessage> resultMessages = context.out.toInterpreterResultMessage();
assertEquals("Insertion successfully.\nInsertion successfully.\n", resultMessages.get(0).getData());
// verify insert into via select from sink_table
context = getInterpreterContext();
result = sqlInterpreter.interpret("select * from sink_table", context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
resultMessages = context.out.toInterpreterResultMessage();
assertEquals("id\tname\n1\ta\n2\tb\n", resultMessages.get(0).getData());
context = getInterpreterContext();
result = sqlInterpreter.interpret("select * from sink_table2", context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
resultMessages = context.out.toInterpreterResultMessage();
assertEquals("id\tname\n1\ta\n2\tb\n", resultMessages.get(0).getData());
// insert into (runAsOne)
destDir.delete();
destDir2.delete();
context = getInterpreterContext();
context.getLocalProperties().put("runAsOne", "true");
result = sqlInterpreter.interpret(
"insert into sink_table select * from source_table;insert into sink_table2 select * from source_table", context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
resultMessages = context.out.toInterpreterResultMessage();
assertEquals("Insertion successfully.\n", resultMessages.get(0).getData());
// verify insert into via select from sink_table
context = getInterpreterContext();
result = sqlInterpreter.interpret("select * from sink_table", context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
resultMessages = context.out.toInterpreterResultMessage();
assertEquals("id\tname\n1\ta\n2\tb\n", resultMessages.get(0).getData());
context = getInterpreterContext();
result = sqlInterpreter.interpret("select * from sink_table2", context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
resultMessages = context.out.toInterpreterResultMessage();
assertEquals("id\tname\n1\ta\n2\tb\n", resultMessages.get(0).getData());
}
}

View file

@ -25,9 +25,8 @@ import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterException;
import org.apache.zeppelin.interpreter.InterpreterGroup;
import org.apache.zeppelin.interpreter.InterpreterOutput;
import org.apache.zeppelin.interpreter.InterpreterOutputListener;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.InterpreterResultMessageOutput;
import org.apache.zeppelin.interpreter.InterpreterResultMessage;
import org.apache.zeppelin.interpreter.remote.RemoteInterpreterEventClient;
import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion;
import org.junit.After;
@ -35,7 +34,6 @@ import org.junit.Before;
import org.junit.Test;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Properties;
@ -49,13 +47,6 @@ import static org.mockito.Mockito.mock;
public class FlinkInterpreterTest {
private FlinkInterpreter interpreter;
private InterpreterContext context;
// catch the streaming output in onAppend
private volatile String output = "";
// catch the interpreter output in onUpdate
private List<InterpreterResultMessageOutput> messageOutput;
@Before
public void setUp() throws InterpreterException {
@ -68,62 +59,83 @@ public class FlinkInterpreterTest {
InterpreterGroup intpGroup = new InterpreterGroup();
interpreter.setInterpreterGroup(intpGroup);
interpreter.open();
context = InterpreterContext.builder()
.setIntpEventClient(mock(RemoteInterpreterEventClient.class))
.build();
InterpreterContext.set(context);
}
@After
public void tearDown() throws InterpreterException {
interpreter.close();
if (interpreter != null) {
interpreter.close();
}
}
@Test
public void testBasicScala() throws InterpreterException {
InterpreterResult result = interpreter.interpret("val a=\"hello world\"",
getInterpreterContext());
public void testScalaBasic() throws InterpreterException, IOException {
InterpreterContext context = getInterpreterContext();
InterpreterResult result = interpreter.interpret("val a=\"hello world\"", context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals("a: String = hello world\n", output);
List<InterpreterResultMessage> resultMessages = context.out.toInterpreterResultMessage();
assertEquals(InterpreterResult.Type.TEXT, resultMessages.get(0).getType());
assertEquals("a: String = hello world\n", resultMessages.get(0).getData());
result = interpreter.interpret("print(a)", getInterpreterContext());
context = getInterpreterContext();
result = interpreter.interpret("print(a)", context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals("hello world", output);
resultMessages = context.out.toInterpreterResultMessage();
assertEquals(InterpreterResult.Type.TEXT, resultMessages.get(0).getType());
assertEquals("hello world", resultMessages.get(0).getData());
// java stdout
result = interpreter.interpret("System.out.print(a)", getInterpreterContext());
context = getInterpreterContext();
result = interpreter.interpret("System.out.print(a)", context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals("hello world", output);
resultMessages = context.out.toInterpreterResultMessage();
assertEquals(InterpreterResult.Type.TEXT, resultMessages.get(0).getType());
assertEquals("hello world", resultMessages.get(0).getData());
// java stderr
context = getInterpreterContext();
result = interpreter.interpret("System.err.print(a)", context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
resultMessages = context.out.toInterpreterResultMessage();
assertEquals(InterpreterResult.Type.TEXT, resultMessages.get(0).getType());
assertEquals("hello world", resultMessages.get(0).getData());
// incomplete
result = interpreter.interpret("println(a", getInterpreterContext());
assertEquals(InterpreterResult.Code.INCOMPLETE, result.code());
// syntax error
result = interpreter.interpret("println(b)", getInterpreterContext());
context = getInterpreterContext();
result = interpreter.interpret("println(b)", context);
assertEquals(InterpreterResult.Code.ERROR, result.code());
assertTrue(output.contains("not found: value b"));
resultMessages = context.out.toInterpreterResultMessage();
assertEquals(InterpreterResult.Type.TEXT, resultMessages.get(0).getType());
assertTrue(resultMessages.get(0).getData(),
resultMessages.get(0).getData().contains("not found: value b"));
// multiple line
result = interpreter.interpret("\"123\".\ntoInt", getInterpreterContext());
context = getInterpreterContext();
result = interpreter.interpret("\"123\".\ntoInt", context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
// single line comment
result = interpreter.interpret("/*comment here*/", getInterpreterContext());
context = getInterpreterContext();
result = interpreter.interpret("/*comment here*/", context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
context = getInterpreterContext();
result = interpreter.interpret("/*comment here*/\nprint(\"hello world\")",
getInterpreterContext());
context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
// multiple line comment
result = interpreter.interpret("/*line 1 \n line 2*/",
getInterpreterContext());
context = getInterpreterContext();
result = interpreter.interpret("/*line 1 \n line 2*/", context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
// test function
result = interpreter.interpret("def add(x:Int, y:Int)\n{ return x+y }",
getInterpreterContext());
context = getInterpreterContext();
result = interpreter.interpret("def add(x:Int, y:Int)\n{ return x+y }", context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
result = interpreter.interpret("print(add(1,2))", getInterpreterContext());
@ -140,14 +152,14 @@ public class FlinkInterpreterTest {
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
// case class
// result = interpreter.interpret(
// context = getInterpreterContext();result = interpreter.interpret(
// "case class WC(word: String, count: Int)\n" +
// "val wordCounts = benv.fromElements(\n" +
// "WC(\"hello\", 1),\n" +
// "WC(\"world\", 2),\n" +
// "WC(\"world\", 8))\n" +
// "wordCounts.collect()",
// getInterpreterContext());
// context);
// assertEquals(InterpreterResult.Code.SUCCESS, result.code());
context = getInterpreterContext();
@ -195,11 +207,26 @@ public class FlinkInterpreterTest {
}
@Test
public void testCompletion() throws InterpreterException {
InterpreterResult result = interpreter.interpret("val a=\"hello world\"",
getInterpreterContext());
public void testZShow() throws InterpreterException, IOException {
// show dataset
InterpreterContext context = getInterpreterContext();
InterpreterResult result = interpreter.interpret(
"val data = benv.fromElements((1, \"jeff\"), (2, \"andy\"), (3, \"james\"))",
context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
context = getInterpreterContext();
result = interpreter.interpret("z.show(data)", context);
assertEquals(new String(context.out.toByteArray()), InterpreterResult.Code.SUCCESS, result.code());
List<InterpreterResultMessage> resultMessages = context.out.toInterpreterResultMessage();
assertEquals(InterpreterResult.Type.TABLE, resultMessages.get(0).getType());
assertEquals("_1\t_2\n1\tjeff\n2\tandy\n3\tjames\n", resultMessages.get(0).getData());
}
@Test
public void testCompletion() throws InterpreterException {
InterpreterContext context = getInterpreterContext();
InterpreterResult result = interpreter.interpret("val a=\"hello world\"", context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals("a: String = hello world\n", output);
List<InterpreterCompletion> completions = interpreter.completion("a.", 2,
getInterpreterContext());
@ -211,93 +238,59 @@ public class FlinkInterpreterTest {
@Test
public void testBatchWordCount() throws InterpreterException, IOException {
InterpreterContext context = getInterpreterContext();
InterpreterResult result = interpreter.interpret(
"val data = benv.fromElements(\"hello world\", \"hello flink\", \"hello hadoop\")",
getInterpreterContext());
context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
context = getInterpreterContext();
result = interpreter.interpret(
"data.flatMap(line => line.split(\"\\\\s\"))\n" +
" .map(w => (w, 1))\n" +
" .groupBy(0)\n" +
" .sum(1)\n" +
" .print()", getInterpreterContext());
" .print()", context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
String[] expectedCounts = {"(hello,3)", "(world,1)", "(flink,1)", "(hadoop,1)"};
Arrays.sort(expectedCounts);
String[] counts = output.split("\n");
String[] counts = context.out.toInterpreterResultMessage().get(0).getData().split("\n");
Arrays.sort(counts);
assertArrayEquals(expectedCounts, counts);
}
@Test
public void testStreamWordCount() throws InterpreterException {
public void testStreamWordCount() throws InterpreterException, IOException {
InterpreterContext context = getInterpreterContext();
InterpreterResult result = interpreter.interpret(
"val data = senv.fromElements(\"hello world\", \"hello flink\", \"hello hadoop\")",
getInterpreterContext());
context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
context = getInterpreterContext();
result = interpreter.interpret(
"data.flatMap(line => line.split(\"\\\\s\"))\n" +
" .map(w => (w, 1))\n" +
" .keyBy(0)\n" +
" .sum(1)\n" +
" .print()\n" +
"senv.execute()", getInterpreterContext());
"senv.execute()", context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
String[] expectedCounts = {"(hello,3)", "(world,1)", "(flink,1)", "(hadoop,1)"};
String output = context.out.toInterpreterResultMessage().get(0).getData();
for (String expectedCount : expectedCounts) {
assertTrue(output, output.contains(expectedCount));
}
}
//@Test
public void testStreamUDF() throws InterpreterException {
InterpreterResult result = interpreter.interpret(
"class MyUpper extends ScalarFunction {\n" +
" def eval(str: String) = str.toUpperCase\n" +
"}\n" +
"stenv.registerFunction(\"myupper\", new MyUpper())",
getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
result = interpreter.interpret(
"val data = Seq(\"Hello\", \"Flink\")\n" +
"val source = senv.fromCollection(data).toTable(stenv, 'word)\n" +
"source.select(\"myupper(word)\").print()",
getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
}
private InterpreterContext getInterpreterContext() {
output = "";
messageOutput = new ArrayList<>();
InterpreterContext context = InterpreterContext.builder()
.setAngularObjectRegistry(new AngularObjectRegistry("flink", null))
.setIntpEventClient(mock(RemoteInterpreterEventClient.class))
.build();
context.out = new InterpreterOutput(
new InterpreterOutputListener() {
@Override
public void onUpdateAll(InterpreterOutput out) {
}
@Override
public void onAppend(int index, InterpreterResultMessageOutput out, byte[] line) {
try {
output = out.toInterpreterResultMessage().getData();
} catch (IOException e) {
e.printStackTrace();
}
}
@Override
public void onUpdate(int index, InterpreterResultMessageOutput out) {
messageOutput.add(out);
}
});
return context;
return InterpreterContext.builder()
.setInterpreterOut(new InterpreterOutput(null))
.setAngularObjectRegistry(new AngularObjectRegistry("flink", null))
.setIntpEventClient(mock(RemoteInterpreterEventClient.class))
.setInterpreterOut(new InterpreterOutput(null))
.build();
}
}

View file

@ -1,368 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.flink;
import com.google.common.io.Files;
//import com.klarna.hiverunner.HiveShell;
//import com.klarna.hiverunner.annotations.HiveSQL;
import org.apache.commons.io.IOUtils;
//import org.apache.flink.connectors.hive.FlinkStandaloneHiveRunner;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.orc.OrcFile;
import org.apache.orc.TypeDescription;
import org.apache.orc.Writer;
import org.apache.parquet.column.ParquetProperties;
import org.apache.parquet.example.data.Group;
import org.apache.parquet.example.data.simple.SimpleGroupFactory;
import org.apache.parquet.hadoop.ParquetWriter;
import org.apache.parquet.hadoop.example.GroupWriteSupport;
import org.apache.parquet.hadoop.metadata.CompressionCodecName;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.MessageTypeParser;
import org.apache.zeppelin.display.AngularObjectRegistry;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterException;
import org.apache.zeppelin.interpreter.InterpreterGroup;
import org.apache.zeppelin.interpreter.InterpreterOutput;
import org.apache.zeppelin.interpreter.InterpreterOutputListener;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.InterpreterResultMessageOutput;
import org.apache.zeppelin.interpreter.remote.RemoteInterpreterEventClient;
import org.junit.After;
import org.junit.Before;
//import org.junit.runner.RunWith;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.FileOutputStream;
//import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.Properties;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.mockito.Mockito.mock;
//@RunWith(FlinkStandaloneHiveRunner.class)
public abstract class FlinkSqlInterpreterTest {
private static final Logger LOGGER = LoggerFactory.getLogger(FlinkSqlInterpreterTest.class);
protected static final String[][] INPUT_DATA = {
{"1", "1.1", "hello world", "true"},
{"2", "2.3", "hello flink", "true"},
{"3", "3.2", "hello hadoop", "false"},
};
protected FlinkInterpreter flinkInterpreter;
protected FlinkSqlInterrpeter sqlInterpreter;
// catch the streaming appendOutput in onAppend
protected volatile String appendOutput = "";
protected volatile InterpreterResult.Type appendOutputType;
// catch the flinkInterpreter appendOutput in onUpdate
protected InterpreterResultMessageOutput updatedOutput;
// @HiveSQL(files = {})
// protected static HiveShell hiveShell;
protected Properties getFlinkProperties() throws IOException {
Properties p = new Properties();
p.setProperty("zeppelin.flink.enableHive", "false");
p.setProperty("zeppelin.flink.planner", "blink");
p.setProperty("taskmanager.managed.memory.size", "32");
p.setProperty("zeppelin.flink.hive.version", "2.3.4");
File hiveConfDir = Files.createTempDir();
// hiveShell.getHiveConf().writeXml(new FileWriter(new File(hiveConfDir, "hive-site.xml")));
p.setProperty("HIVE_CONF_DIR", hiveConfDir.getAbsolutePath());
return p;
}
@Before
public void setUp() throws InterpreterException, IOException {
Properties p = getFlinkProperties();
flinkInterpreter = new FlinkInterpreter(p);
sqlInterpreter = createFlinkSqlInterpreter(p);
InterpreterGroup intpGroup = new InterpreterGroup();
flinkInterpreter.setInterpreterGroup(intpGroup);
sqlInterpreter.setInterpreterGroup(intpGroup);
intpGroup.addInterpreterToSession(flinkInterpreter, "session_1");
intpGroup.addInterpreterToSession(sqlInterpreter, "session_1");
flinkInterpreter.open();
sqlInterpreter.open();
// hiveShell.execute("drop database if exists test_db CASCADE");
// hiveShell.execute("create database test_db");
// hiveShell.execute("use test_db");
// InterpreterResult result = sqlInterpreter.interpret("use database test_db",
// getInterpreterContext());
// assertEquals(InterpreterResult.Code.SUCCESS, result.code());
}
@After
public void tearDown() throws InterpreterException {
flinkInterpreter.close();
}
protected abstract FlinkSqlInterrpeter createFlinkSqlInterpreter(Properties properties);
//@Test
public void testDatabases() throws InterpreterException {
InterpreterResult result = sqlInterpreter.interpret("show databases",
getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals(InterpreterResult.Type.TABLE, appendOutputType);
assertEquals("database\ndefault\ntest_db\n", appendOutput);
result = sqlInterpreter.interpret("create database db1",
getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals(InterpreterResult.Type.TEXT, appendOutputType);
assertEquals("Database has been created.\n", appendOutput);
result = sqlInterpreter.interpret("use db1",
getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
result = sqlInterpreter.interpret("show tables",
getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals(InterpreterResult.Type.TABLE, appendOutputType);
assertEquals("table\n", appendOutput);
result = sqlInterpreter.interpret(
"CREATE TABLE source (msg INT) with (type='csv', path='/tmp')",
getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
result = sqlInterpreter.interpret("show tables",
getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals(InterpreterResult.Type.TABLE, appendOutputType);
assertEquals("table\nsource\n", appendOutput);
result = sqlInterpreter.interpret("use `default`",
getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
result = sqlInterpreter.interpret("show tables",
getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals(InterpreterResult.Type.TABLE, appendOutputType);
assertEquals("table\n", appendOutput);
result = sqlInterpreter.interpret("drop database db1",
getInterpreterContext());
assertEquals(InterpreterResult.Code.ERROR, result.code());
assertTrue(result.message().get(0).getData(),
result.message().get(0).getData().contains("Database db1 is not empty"));
result = sqlInterpreter.interpret("drop table db1.source",
getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
result = sqlInterpreter.interpret("drop database db1",
getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
result = sqlInterpreter.interpret("show databases",
getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals(InterpreterResult.Type.TABLE, appendOutputType);
assertEquals("database\ndefault\ntest_db\n", appendOutput);
}
//@Test
public void testDescribe() throws InterpreterException {
InterpreterResult result = sqlInterpreter.interpret("create database hive.db1",
getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals(InterpreterResult.Type.TEXT, appendOutputType);
assertEquals("Database has been created.\n", appendOutput);
result = sqlInterpreter.interpret("describe database hive.db1", getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals(InterpreterResult.Type.TEXT, appendOutputType);
assertTrue(appendOutput, appendOutput.contains("db1"));
//TODO(zjffdu) hive and flink share the same namespace for db.
// result = sqlInterpreter.interpret("create database flink.db1",
// getInterpreterContext());
// assertEquals(InterpreterResult.Code.SUCCESS, result.code());
// assertEquals(InterpreterResult.Type.TEXT, outputType);
// assertEquals("Database has been created.\n", output);
//
// result = sqlInterpreter.interpret("describe database flink.db1",
// getInterpreterContext());
// assertEquals(InterpreterResult.Code.SUCCESS, result.code());
// assertEquals(InterpreterResult.Type.TEXT, outputType);
// assertTrue(output, output.contains("db1"));
result = sqlInterpreter.interpret(
"CREATE TABLE source (int_col INT, double_col double, varchar_col varchar, " +
"bool_col boolean) with (type='csv', path='/tmp')",
getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
// TODO(zjffdu) this is bug of calcite, that table name should be
// quoted with single quote if it is keyword
result = sqlInterpreter.interpret("describe `source`", getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals(InterpreterResult.Type.TEXT, appendOutputType);
assertTrue(appendOutput, appendOutput.contains("name: int_col"));
}
protected InterpreterContext getInterpreterContext() {
appendOutput = "";
InterpreterContext context = InterpreterContext.builder()
.setInterpreterOut(new InterpreterOutput(null))
.setAngularObjectRegistry(new AngularObjectRegistry("flink", null))
.setIntpEventClient(mock(RemoteInterpreterEventClient.class))
.build();
context.out = new InterpreterOutput(
new InterpreterOutputListener() {
@Override
public void onUpdateAll(InterpreterOutput out) {
System.out.println();
}
@Override
public void onAppend(int index, InterpreterResultMessageOutput out, byte[] line) {
try {
appendOutputType = out.toInterpreterResultMessage().getType();
appendOutput = out.toInterpreterResultMessage().getData();
} catch (IOException e) {
e.printStackTrace();
}
}
@Override
public void onUpdate(int index, InterpreterResultMessageOutput out) {
updatedOutput = out;
}
});
return context;
}
public static File createInputFile(String data) throws IOException {
File file = File.createTempFile("zeppelin-flink-input", ".csv");
FileOutputStream out = null;
try {
out = new FileOutputStream(file);
IOUtils.write(data, out);
} finally {
if (out != null) {
out.close();
}
}
return file;
}
public static File createInputFile(String[][] data) throws IOException {
File file = File.createTempFile("zeppelin-flink-input", ".csv");
PrintWriter writer = null;
try {
writer = new PrintWriter(new FileOutputStream(file));
// int
int rowCount = data.length;
int colCount = data[0].length;
for (int i = 0; i < rowCount; ++i) {
for (int j = 0; j < colCount; ++j) {
writer.print(data[i][j]);
if (j != colCount - 1) {
writer.print(",");
}
}
// TODO(zjffdu) This is a bug of CSV Sink of Flink, it always put
// line separator at the end
// which is not necessary.
writer.print("\n");
}
} finally {
if (writer != null) {
writer.close();
}
}
return file;
}
public File createORCFile(int[] values) throws IOException {
File file = File.createTempFile("zeppelin-flink-input", ".orc");
file.delete();
Path path = new Path(file.getAbsolutePath());
Configuration conf = new Configuration();
conf.set("orc.compress", "snappy");
TypeDescription schema = TypeDescription.fromString("struct<msg:int>");
Writer writer = OrcFile.createWriter(path,
OrcFile.writerOptions(conf)
.setSchema(schema));
VectorizedRowBatch batch = schema.createRowBatch();
LongColumnVector x = (LongColumnVector) batch.cols[0];
for (int i = 0; i < values.length; ++i) {
int row = batch.size++;
x.vector[row] = values[i];
// If the batch is full, write it out and start over.
if (batch.size == batch.getMaxSize()) {
writer.addRowBatch(batch);
batch.reset();
}
}
if (batch.size != 0) {
writer.addRowBatch(batch);
batch.reset();
}
writer.close();
return file;
}
public File createParquetFile(int[] values,
ParquetProperties.WriterVersion version) throws IOException {
File file = File.createTempFile("zeppelin-flink-input", ".par");
file.delete();
Path path = new Path(file.getAbsolutePath());
Configuration conf = new Configuration();
MessageType schema = MessageTypeParser.parseMessageType(
"message test { "
+ "required int32 int32_field; "
+ "} ");
GroupWriteSupport.setSchema(schema, conf);
SimpleGroupFactory f = new SimpleGroupFactory(schema);
ParquetWriter<Group> writer = new ParquetWriter<Group>(
path,
new GroupWriteSupport(),
CompressionCodecName.UNCOMPRESSED, 1024, 1024, 512, true, false, version, conf);
for (int i = 0; i < values.length; i++) {
writer.write(f.newGroup()
.append("int32_field", values[i]));
}
writer.close();
return file;
}
}

View file

@ -17,19 +17,24 @@
*/
package org.apache.zeppelin.flink;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterException;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.InterpreterResultMessage;
import org.junit.Test;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.util.List;
import java.util.Properties;
import static junit.framework.TestCase.assertTrue;
import static org.junit.Assert.assertEquals;
public class FlinkStreamSqlInterpreterTest extends FlinkSqlInterpreterTest {
public class FlinkStreamSqlInterpreterTest extends SqlInterpreterTest {
@Override
protected FlinkSqlInterrpeter createFlinkSqlInterpreter(Properties properties) {
@ -39,54 +44,249 @@ public class FlinkStreamSqlInterpreterTest extends FlinkSqlInterpreterTest {
@Test
public void testSingleStreamSql() throws IOException, InterpreterException {
String initStreamScalaScript = IOUtils.toString(getClass().getResource("/init_stream.scala"));
InterpreterResult result = flinkInterpreter.interpret(initStreamScalaScript,
getInterpreterContext());
InterpreterContext context = getInterpreterContext();
InterpreterResult result = flinkInterpreter.interpret(initStreamScalaScript, context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
InterpreterContext context = getInterpreterContext();
context = getInterpreterContext();
context.getLocalProperties().put("type", "single");
context.getLocalProperties().put("template", "Total Count: {1} <br/> {0}");
result = sqlInterpreter.interpret("select max(rowtime), count(1) " +
"from default_catalog.default_database.log", context);
"from log", context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals(InterpreterResult.Type.HTML, updatedOutput.toInterpreterResultMessage().getType());
assertTrue(updatedOutput.toInterpreterResultMessage().getData(),
!updatedOutput.toInterpreterResultMessage().getData().isEmpty());
List<InterpreterResultMessage> resultMessages = context.out.toInterpreterResultMessage();
assertEquals(InterpreterResult.Type.HTML, resultMessages.get(0).getType());
assertTrue(resultMessages.toString(),
resultMessages.get(0).getData().contains("Total Count"));
}
@Test
public void testRetractStreamSql() throws IOException, InterpreterException {
public void testSingleStreamTableApi() throws IOException, InterpreterException {
String initStreamScalaScript = IOUtils.toString(getClass().getResource("/init_stream.scala"));
InterpreterContext context = getInterpreterContext();
InterpreterResult result = flinkInterpreter.interpret(initStreamScalaScript, context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
context = getInterpreterContext();
String code = "val table = stenv.sqlQuery(\"select max(rowtime), count(1) from log\")\nz.show(table,streamType=\"single\", configs = Map(\"template\" -> \"Total Count: {1} <br/> {0}\"))";
result = flinkInterpreter.interpret(code, context);
assertEquals(new String(context.out.toByteArray()), InterpreterResult.Code.SUCCESS, result.code());
List<InterpreterResultMessage> resultMessages = context.out.toInterpreterResultMessage();
assertEquals(InterpreterResult.Type.HTML, resultMessages.get(0).getType());
assertTrue(resultMessages.toString(),
resultMessages.get(0).getData().contains("Total Count"));
context = getInterpreterContext();
result = sqlInterpreter.interpret("show tables", context);
assertEquals(new String(context.out.toByteArray()), InterpreterResult.Code.SUCCESS, result.code());
resultMessages = context.out.toInterpreterResultMessage();
assertEquals(InterpreterResult.Type.TABLE, resultMessages.get(0).getType());
assertEquals("table\nlog\n", resultMessages.get(0).getData());
}
@Test
public void testUpdateStreamSql() throws IOException, InterpreterException {
String initStreamScalaScript = IOUtils.toString(getClass().getResource("/init_stream.scala"));
InterpreterResult result = flinkInterpreter.interpret(initStreamScalaScript,
getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
InterpreterContext context = getInterpreterContext();
context.getLocalProperties().put("type", "retract");
context.getLocalProperties().put("type", "update");
result = sqlInterpreter.interpret("select url, count(1) as pv from " +
"default_catalog.default_database.log group by url", context);
"log group by url", context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals(InterpreterResult.Type.TABLE,
updatedOutput.toInterpreterResultMessage().getType());
assertTrue(updatedOutput.toInterpreterResultMessage().getData(),
!updatedOutput.toInterpreterResultMessage().getData().isEmpty());
List<InterpreterResultMessage> resultMessages = context.out.toInterpreterResultMessage();
assertEquals(InterpreterResult.Type.TABLE, resultMessages.get(0).getType());
assertTrue(resultMessages.toString(),
resultMessages.get(0).getData().contains("url\tpv\n"));
}
@Test
public void testTimeSeriesStreamSql() throws IOException, InterpreterException {
public void testUpdateStreamTableApi() throws IOException, InterpreterException {
String initStreamScalaScript = IOUtils.toString(getClass().getResource("/init_stream.scala"));
InterpreterResult result = flinkInterpreter.interpret(initStreamScalaScript,
getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
InterpreterContext context = getInterpreterContext();
context.getLocalProperties().put("type", "ts");
String code = "val table = stenv.sqlQuery(\"select url, count(1) as pv from log group by url\")\nz.show(table, streamType=\"update\")";
result = flinkInterpreter.interpret(code, context);
assertEquals(new String(context.out.toByteArray()), InterpreterResult.Code.SUCCESS, result.code());
List<InterpreterResultMessage> resultMessages = context.out.toInterpreterResultMessage();
assertEquals(InterpreterResult.Type.TABLE, resultMessages.get(0).getType());
assertTrue(resultMessages.toString(),
resultMessages.get(0).getData().contains("url\tpv\n"));
}
@Test
public void testAppendStreamSql() throws IOException, InterpreterException {
String initStreamScalaScript = IOUtils.toString(getClass().getResource("/init_stream.scala"));
InterpreterResult result = flinkInterpreter.interpret(initStreamScalaScript,
getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
InterpreterContext context = getInterpreterContext();
context.getLocalProperties().put("type", "append");
result = sqlInterpreter.interpret("select TUMBLE_START(rowtime, INTERVAL '5' SECOND) as " +
"start_time, url, count(1) as pv from default_catalog.default_database.log group by " +
"start_time, url, count(1) as pv from log group by " +
"TUMBLE(rowtime, INTERVAL '5' SECOND), url", context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals(InterpreterResult.Type.TABLE,
updatedOutput.toInterpreterResultMessage().getType());
assertTrue(updatedOutput.toInterpreterResultMessage().getData(),
!updatedOutput.toInterpreterResultMessage().getData().isEmpty());
List<InterpreterResultMessage> resultMessages = context.out.toInterpreterResultMessage();
assertEquals(InterpreterResult.Type.TABLE, resultMessages.get(0).getType());
assertTrue(resultMessages.toString(),
resultMessages.get(0).getData().contains("url\tpv\n"));
}
@Test
public void testAppendStreamTableApi() throws IOException, InterpreterException {
String initStreamScalaScript = IOUtils.toString(getClass().getResource("/init_stream.scala"));
InterpreterResult result = flinkInterpreter.interpret(initStreamScalaScript,
getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
InterpreterContext context = getInterpreterContext();
String code = "val table = stenv.sqlQuery(\"select TUMBLE_START(rowtime, INTERVAL '5' SECOND) as " +
"start_time, url, count(1) as pv from log group by " +
"TUMBLE(rowtime, INTERVAL '5' SECOND), url\")\nz.show(table, streamType=\"append\")";
result = flinkInterpreter.interpret(code, context);
assertEquals(new String(context.out.toByteArray()), InterpreterResult.Code.SUCCESS, result.code());
List<InterpreterResultMessage> resultMessages = context.out.toInterpreterResultMessage();
assertEquals(InterpreterResult.Type.TABLE, resultMessages.get(0).getType());
assertTrue(resultMessages.toString(),
resultMessages.get(0).getData().contains("url\tpv\n"));
}
@Test
public void testStreamUDF() throws IOException, InterpreterException {
String initStreamScalaScript = IOUtils.toString(getClass().getResource("/init_stream.scala"));
InterpreterResult result = flinkInterpreter.interpret(initStreamScalaScript,
getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
result = flinkInterpreter.interpret(
"class MyUpper extends ScalarFunction {\n" +
" def eval(a: String): String = a.toUpperCase()\n" +
"}\n" + "stenv.registerFunction(\"myupper\", new MyUpper())", getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
InterpreterContext context = getInterpreterContext();
context.getLocalProperties().put("type", "update");
result = sqlInterpreter.interpret("select myupper(url), count(1) as pv from " +
"log group by url", context);
assertEquals(new String(context.out.toByteArray()), InterpreterResult.Code.SUCCESS, result.code());
// assertEquals(InterpreterResult.Type.TABLE,
// updatedOutput.toInterpreterResultMessage().getType());
// assertTrue(updatedOutput.toInterpreterResultMessage().getData(),
// !updatedOutput.toInterpreterResultMessage().getData().isEmpty());
}
@Test
public void testInsertInto() throws InterpreterException, IOException {
hiveShell.execute("create table source_table (id int, name string)");
File destDir = Files.createTempDirectory("flink_test").toFile();
FileUtils.deleteDirectory(destDir);
InterpreterResult result = sqlInterpreter.interpret(
"CREATE TABLE dest_table (\n" +
"id int,\n" +
"name string" +
") WITH (\n" +
"'format.field-delimiter'=',',\n" +
"'connector.type'='filesystem',\n" +
"'format.derive-schema'='true',\n" +
"'connector.path'='" + destDir.getAbsolutePath() + "',\n" +
"'format.type'='csv'\n" +
");", getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
result = sqlInterpreter.interpret(
"insert into dest_table select * from source_table",
getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
// after these select queries, `show tables` should still show only one source table,
// other temporary tables should not be displayed.
InterpreterContext context = getInterpreterContext();
result = sqlInterpreter.interpret("show tables", context);
List<InterpreterResultMessage> resultMessages = context.out.toInterpreterResultMessage();
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals(1, resultMessages.size());
assertEquals(InterpreterResult.Type.TABLE, resultMessages.get(0).getType());
assertEquals(resultMessages.get(0).toString(),
"table\ndest_table\nsource_table\n", resultMessages.get(0).getData());
}
@Test
public void testMultipleInsertInto() throws InterpreterException, IOException {
hiveShell.execute("create table source_table (id int, name string)");
File destDir = Files.createTempDirectory("flink_test").toFile();
FileUtils.deleteDirectory(destDir);
InterpreterResult result = sqlInterpreter.interpret(
"CREATE TABLE dest_table (\n" +
"id int,\n" +
"name string" +
") WITH (\n" +
"'format.field-delimiter'=',',\n" +
"'connector.type'='filesystem',\n" +
"'format.derive-schema'='true',\n" +
"'connector.path'='" + destDir.getAbsolutePath() + "',\n" +
"'format.type'='csv'\n" +
");", getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
File destDir2 = Files.createTempDirectory("flink_test").toFile();
FileUtils.deleteDirectory(destDir2);
result = sqlInterpreter.interpret(
"CREATE TABLE dest_table2 (\n" +
"id int,\n" +
"name string" +
") WITH (\n" +
"'format.field-delimiter'=',',\n" +
"'connector.type'='filesystem',\n" +
"'format.derive-schema'='true',\n" +
"'connector.path'='" + destDir2.getAbsolutePath() + "',\n" +
"'format.type'='csv'\n" +
");", getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
InterpreterContext context = getInterpreterContext();
context.getLocalProperties().put("runAsOne", "true");
result = sqlInterpreter.interpret(
"insert into dest_table select * from source_table;insert into dest_table2 select * from source_table",
context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
}
@Test
public void testCreateTableWithWaterMark() throws InterpreterException, IOException {
// create table
InterpreterContext context = getInterpreterContext();
InterpreterResult result = sqlInterpreter.interpret(
"CREATE TABLE sink_kafka (\n" +
" status STRING,\n" +
" direction STRING,\n" +
" event_ts TIMESTAMP(3),\n" +
" WATERMARK FOR event_ts AS event_ts - INTERVAL '5' SECOND\n" +
") WITH (\n" +
" 'connector.type' = 'kafka', \n" +
" 'connector.version' = 'universal', \n" +
" 'connector.topic' = 'generated.events2',\n" +
" 'connector.properties.zookeeper.connect' = 'localhost:2181',\n" +
" 'connector.properties.bootstrap.servers' = 'localhost:9092',\n" +
" 'connector.properties.group.id' = 'testGroup',\n" +
" 'format.type'='json',\n" +
" 'update-mode' = 'append'\n" +
")\n",
context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
List<InterpreterResultMessage> resultMessages = context.out.toInterpreterResultMessage();
assertEquals(1, resultMessages.size());
assertEquals(InterpreterResult.Type.TEXT, resultMessages.get(0).getType());
assertEquals("Table has been created.\n", resultMessages.get(0).getData());
}
}

View file

@ -19,30 +19,37 @@ package org.apache.zeppelin.flink;
import com.google.common.io.Files;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.apache.zeppelin.interpreter.Interpreter;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterException;
import org.apache.zeppelin.interpreter.InterpreterGroup;
import org.apache.zeppelin.interpreter.InterpreterOutput;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.InterpreterResultMessage;
import org.apache.zeppelin.interpreter.LazyOpenInterpreter;
import org.apache.zeppelin.interpreter.remote.RemoteInterpreterEventClient;
import org.apache.zeppelin.python.IPythonInterpreterTest;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
import static junit.framework.TestCase.assertTrue;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.fail;
import static org.mockito.Mockito.mock;
public class IPyFlinkInterpreterTest {
public class IPyFlinkInterpreterTest extends IPythonInterpreterTest {
private InterpreterGroup intpGroup;
private Interpreter interpreter;
private RemoteInterpreterEventClient mockIntpEventClient =
mock(RemoteInterpreterEventClient.class);
private LazyOpenInterpreter flinkScalaInterpreter;
protected Properties initIntpProperties() {
Properties p = new Properties();
@ -54,17 +61,18 @@ public class IPyFlinkInterpreterTest {
return p;
}
@Override
protected void startInterpreter(Properties properties) throws InterpreterException {
InterpreterContext context = getInterpreterContext();
context.setIntpEventClient(mockIntpEventClient);
InterpreterContext.set(context);
LazyOpenInterpreter flinkInterpreter = new LazyOpenInterpreter(
this.flinkScalaInterpreter = new LazyOpenInterpreter(
new FlinkInterpreter(properties));
intpGroup = new InterpreterGroup();
intpGroup.put("session_1", new ArrayList<Interpreter>());
intpGroup.get("session_1").add(flinkInterpreter);
flinkInterpreter.setInterpreterGroup(intpGroup);
intpGroup.get("session_1").add(flinkScalaInterpreter);
flinkScalaInterpreter.setInterpreterGroup(intpGroup);
LazyOpenInterpreter pyFlinkInterpreter =
new LazyOpenInterpreter(new PyFlinkInterpreter(properties));
@ -90,14 +98,33 @@ public class IPyFlinkInterpreterTest {
}
@Test
public void testIPyFlink() throws InterpreterException {
testBatchPyFlink(interpreter);
testStreamPyFlink(interpreter);
public void testBatchIPyFlink() throws InterpreterException, IOException {
testBatchPyFlink(interpreter, flinkScalaInterpreter);
}
public static void testBatchPyFlink(Interpreter interpreter) throws InterpreterException {
InterpreterContext context = createInterpreterContext(mock(RemoteInterpreterEventClient.class));
InterpreterResult result = interpreter.interpret(
@Test
public void testStreamIPyFlink() throws InterpreterException, IOException {
testStreamPyFlink(interpreter, flinkScalaInterpreter);
}
@Test
public void testSingleStreamTableApi() throws InterpreterException, IOException {
testSingleStreamTableApi(interpreter, flinkScalaInterpreter);
}
@Test
public void testUpdateStreamTableApi() throws InterpreterException, IOException {
testUpdateStreamTableApi(interpreter, flinkScalaInterpreter);
}
@Test
public void testAppendStreamTableApi() throws InterpreterException, IOException {
testAppendStreamTableApi(interpreter, flinkScalaInterpreter);
}
public static void testBatchPyFlink(Interpreter pyflinkInterpreter, Interpreter flinkScalaInterpreter) throws InterpreterException, IOException {
InterpreterContext context = createInterpreterContext();
InterpreterResult result = pyflinkInterpreter.interpret(
"import tempfile\n" +
"import os\n" +
"import shutil\n" +
@ -124,46 +151,207 @@ public class IPyFlinkInterpreterTest {
"bt_env.execute(\"batch_job\")"
, context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
// use group by
context = createInterpreterContext();
result = pyflinkInterpreter.interpret(
"import tempfile\n" +
"import os\n" +
"import shutil\n" +
"sink_path = tempfile.gettempdir() + '/streaming.csv'\n" +
"if os.path.exists(sink_path):\n" +
" if os.path.isfile(sink_path):\n" +
" os.remove(sink_path)\n" +
" else:\n" +
" shutil.rmtree(sink_path)\n" +
"b_env.set_parallelism(1)\n" +
"t = bt_env.from_elements([(1, 'hi', 'hello'), (2, 'hi', 'hello')], ['a', 'b', 'c'])\n" +
"bt_env.connect(FileSystem().path(sink_path)) \\\n" +
" .with_format(OldCsv()\n" +
" .field_delimiter(',')\n" +
" .field(\"a\", DataTypes.STRING())\n" +
" .field(\"b\", DataTypes.BIGINT())\n" +
" .field(\"c\", DataTypes.BIGINT())) \\\n" +
" .with_schema(Schema()\n" +
" .field(\"a\", DataTypes.STRING())\n" +
" .field(\"b\", DataTypes.BIGINT())\n" +
" .field(\"c\", DataTypes.BIGINT())) \\\n" +
" .register_table_sink(\"batch_sink4\")\n" +
"t.group_by(\"c\").select(\"c, sum(a), count(b)\").insert_into(\"batch_sink4\")\n" +
"bt_env.execute(\"batch_job4\")"
, context);
assertEquals(result.toString(),InterpreterResult.Code.SUCCESS, result.code());
// use scala udf in pyflink
// define scala udf
result = flinkScalaInterpreter.interpret(
"class AddOne extends ScalarFunction {\n" +
" def eval(a: java.lang.Long): String = a + \"\1\"\n" +
"}", context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
result = flinkScalaInterpreter.interpret("btenv.registerFunction(\"addOne\", new AddOne())",
context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
context = createInterpreterContext();
result = pyflinkInterpreter.interpret(
"import tempfile\n" +
"import os\n" +
"import shutil\n" +
"sink_path = tempfile.gettempdir() + '/streaming.csv'\n" +
"if os.path.exists(sink_path):\n" +
" if os.path.isfile(sink_path):\n" +
" os.remove(sink_path)\n" +
" else:\n" +
" shutil.rmtree(sink_path)\n" +
"b_env.set_parallelism(1)\n" +
"t = bt_env.from_elements([(1, 'hi', 'hello'), (2, 'hi', 'hello')], ['a', 'b', 'c'])\n" +
"bt_env.connect(FileSystem().path(sink_path)) \\\n" +
" .with_format(OldCsv()\n" +
" .field_delimiter(',')\n" +
" .field(\"a\", DataTypes.BIGINT())\n" +
" .field(\"b\", DataTypes.STRING())\n" +
" .field(\"c\", DataTypes.STRING())) \\\n" +
" .with_schema(Schema()\n" +
" .field(\"a\", DataTypes.BIGINT())\n" +
" .field(\"b\", DataTypes.STRING())\n" +
" .field(\"c\", DataTypes.STRING())) \\\n" +
" .register_table_sink(\"batch_sink3\")\n" +
"t.select(\"a, addOne(a), c\").insert_into(\"batch_sink3\")\n" +
"bt_env.execute(\"batch_job3\")"
, context);
assertEquals(result.toString(),InterpreterResult.Code.SUCCESS, result.code());
// z.show
context = createInterpreterContext();
result = pyflinkInterpreter.interpret(
"import tempfile\n" +
"import os\n" +
"import shutil\n" +
"sink_path = tempfile.gettempdir() + '/streaming.csv'\n" +
"if os.path.exists(sink_path):\n" +
" if os.path.isfile(sink_path):\n" +
" os.remove(sink_path)\n" +
" else:\n" +
" shutil.rmtree(sink_path)\n" +
"b_env.set_parallelism(1)\n" +
"t = bt_env.from_elements([(1, 'hi', 'hello'), (2, 'hi', 'hello')], ['a', 'b', 'c'])\n" +
"z.show(t)"
, context);
assertEquals(result.toString(),InterpreterResult.Code.SUCCESS, result.code());
List<InterpreterResultMessage> resultMessages = context.out.toInterpreterResultMessage();
assertEquals(new String(context.out.toByteArray()), 1, resultMessages.size());
assertEquals(new String(context.out.toByteArray()), InterpreterResult.Type.TABLE, resultMessages.get(0).getType());
assertEquals(new String(context.out.toByteArray()), "a\tb\tc\n1\thi\thello\n2\thi\thello\n", resultMessages.get(0).getData());
}
public static void testStreamPyFlink(Interpreter interpreter) throws InterpreterException {
InterpreterContext context = createInterpreterContext(mock(RemoteInterpreterEventClient.class));
@Override
public void testIPythonFailToLaunch() throws InterpreterException {
tearDown();
Properties properties = initIntpProperties();
properties.setProperty("zeppelin.pyflink.python", "invalid_python");
try {
startInterpreter(properties);
fail("Should not be able to start IPyFlinkInterpreter");
} catch (InterpreterException e) {
String exceptionMsg = ExceptionUtils.getStackTrace(e);
assertTrue(exceptionMsg, exceptionMsg.contains("No such file or directory"));
}
}
public static void testStreamPyFlink(Interpreter interpreter, Interpreter flinkScalaInterpreter) throws InterpreterException, IOException {
InterpreterContext context = createInterpreterContext();
InterpreterResult result = interpreter.interpret(
"import tempfile\n" +
"import os\n" +
"import shutil\n" +
"sink_path = tempfile.gettempdir() + '/streaming.csv'\n" +
"if os.path.exists(sink_path):\n" +
" if os.path.isfile(sink_path):\n" +
" os.remove(sink_path)\n" +
" else:\n" +
" shutil.rmtree(sink_path)\n" +
"s_env.set_parallelism(1)\n" +
"t = st_env.from_elements([(1, 'hi', 'hello'), (2, 'hi', 'hello')], ['a', 'b', 'c'])\n" +
"st_env.connect(FileSystem().path(sink_path)) \\\n" +
" .with_format(OldCsv()\n" +
" .field_delimiter(',')\n" +
" .field(\"a\", DataTypes.BIGINT())\n" +
" .field(\"b\", DataTypes.STRING())\n" +
" .field(\"c\", DataTypes.STRING())) \\\n" +
" .with_schema(Schema()\n" +
" .field(\"a\", DataTypes.BIGINT())\n" +
" .field(\"b\", DataTypes.STRING())\n" +
" .field(\"c\", DataTypes.STRING())) \\\n" +
" .register_table_sink(\"stream_sink\")\n" +
"t.select(\"a + 1, b, c\").insert_into(\"stream_sink\")\n" +
"st_env.execute(\"stream_job\")"
"import tempfile\n" +
"import os\n" +
"import shutil\n" +
"sink_path = tempfile.gettempdir() + '/streaming.csv'\n" +
"if os.path.exists(sink_path):\n" +
" if os.path.isfile(sink_path):\n" +
" os.remove(sink_path)\n" +
" else:\n" +
" shutil.rmtree(sink_path)\n" +
"s_env.set_parallelism(1)\n" +
"t = st_env.from_elements([(1, 'hi', 'hello'), (2, 'hi', 'hello')], ['a', 'b', 'c'])\n" +
"st_env.connect(FileSystem().path(sink_path)) \\\n" +
" .with_format(OldCsv()\n" +
" .field_delimiter(',')\n" +
" .field(\"a\", DataTypes.BIGINT())\n" +
" .field(\"b\", DataTypes.STRING())\n" +
" .field(\"c\", DataTypes.STRING())) \\\n" +
" .with_schema(Schema()\n" +
" .field(\"a\", DataTypes.BIGINT())\n" +
" .field(\"b\", DataTypes.STRING())\n" +
" .field(\"c\", DataTypes.STRING())) \\\n" +
" .register_table_sink(\"stream_sink\")\n" +
"t.select(\"a + 1, b, c\").insert_into(\"stream_sink\")\n" +
"st_env.execute(\"stream_job\")"
, context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
}
private static InterpreterContext createInterpreterContext(
RemoteInterpreterEventClient mockRemoteEventClient) {
public static void testSingleStreamTableApi(Interpreter interpreter,
Interpreter flinkScalaInterpreter) throws IOException, InterpreterException {
String initStreamScalaScript = IOUtils.toString(IPyFlinkInterpreterTest.class.getResource("/init_stream.scala"));
InterpreterContext context = createInterpreterContext();
InterpreterResult result = flinkScalaInterpreter.interpret(initStreamScalaScript, context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
context = createInterpreterContext();
String code = "table = st_env.sql_query('select max(rowtime), count(1) from log')\nz.show(table,stream_type='single',template = 'Total Count: {1} <br/> {0}')";
result = interpreter.interpret(code, context);
assertEquals(new String(context.out.toByteArray()), InterpreterResult.Code.SUCCESS, result.code());
List<InterpreterResultMessage> resultMessages = context.out.toInterpreterResultMessage();
assertEquals(InterpreterResult.Type.HTML, resultMessages.get(0).getType());
assertTrue(resultMessages.toString(),
resultMessages.get(0).getData().contains("Total Count"));
}
public static void testUpdateStreamTableApi(Interpreter interpreter,
Interpreter flinkScalaInterpreter) throws IOException, InterpreterException {
String initStreamScalaScript = IOUtils.toString(IPyFlinkInterpreterTest.class.getResource("/init_stream.scala"));
InterpreterContext context = createInterpreterContext();
InterpreterResult result = flinkScalaInterpreter.interpret(initStreamScalaScript, context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
context = createInterpreterContext();
String code = "table = st_env.sql_query('select url, count(1) as pv from log group by url')\nz.show(table,stream_type='update')";
result = interpreter.interpret(code, context);
assertEquals(new String(context.out.toByteArray()), InterpreterResult.Code.SUCCESS, result.code());
List<InterpreterResultMessage> resultMessages = context.out.toInterpreterResultMessage();
assertEquals(InterpreterResult.Type.TABLE, resultMessages.get(0).getType());
assertTrue(resultMessages.toString(),
resultMessages.get(0).getData().contains("url\tpv\n"));
}
public static void testAppendStreamTableApi(Interpreter interpreter,
Interpreter flinkScalaInterpreter) throws IOException, InterpreterException {
String initStreamScalaScript = IOUtils.toString(IPyFlinkInterpreterTest.class.getResource("/init_stream.scala"));
InterpreterContext context = createInterpreterContext();
InterpreterResult result = flinkScalaInterpreter.interpret(initStreamScalaScript, context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
context = createInterpreterContext();
String code = "table = st_env.sql_query(\"select TUMBLE_START(rowtime, INTERVAL '5' SECOND) as " +
"start_time, url, count(1) as pv from log group by " +
"TUMBLE(rowtime, INTERVAL '5' SECOND), url\")\nz.show(table,stream_type='append')";
result = interpreter.interpret(code, context);
assertEquals(new String(context.out.toByteArray()), InterpreterResult.Code.SUCCESS, result.code());
List<InterpreterResultMessage> resultMessages = context.out.toInterpreterResultMessage();
assertEquals(InterpreterResult.Type.TABLE, resultMessages.get(0).getType());
assertTrue(resultMessages.toString(),
resultMessages.get(0).getData().contains("url\tpv\n"));
}
private static InterpreterContext createInterpreterContext() {
return InterpreterContext.builder()
.setNoteId("noteId")
.setParagraphId("paragraphId")
.setIntpEventClient(mockRemoteEventClient)
.setInterpreterOut(new InterpreterOutput(null))
.setIntpEventClient(mock(RemoteInterpreterEventClient.class))
.build();
}

View file

@ -19,16 +19,21 @@ package org.apache.zeppelin.flink;
import com.google.common.io.Files;
import org.apache.zeppelin.display.AngularObjectRegistry;
import org.apache.zeppelin.interpreter.Interpreter;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterException;
import org.apache.zeppelin.interpreter.InterpreterGroup;
import org.apache.zeppelin.interpreter.InterpreterOutput;
import org.apache.zeppelin.interpreter.InterpreterOutputListener;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.InterpreterResultMessageOutput;
import org.apache.zeppelin.interpreter.LazyOpenInterpreter;
import org.apache.zeppelin.interpreter.remote.RemoteInterpreterEventClient;
import org.apache.zeppelin.python.PythonInterpreterTest;
import org.junit.Test;
import java.io.IOException;
import java.util.LinkedList;
import java.util.Properties;
@ -40,6 +45,16 @@ public class PyFlinkInterpreterTest extends PythonInterpreterTest {
private RemoteInterpreterEventClient mockRemoteEventClient =
mock(RemoteInterpreterEventClient.class);
private Interpreter flinkScalaInterpreter;
private Interpreter streamSqlInterpreter;
private Interpreter batchSqlInterpreter;
// catch the streaming appendOutput in onAppend
protected volatile String appendOutput = "";
protected volatile InterpreterResult.Type appendOutputType;
// catch the flinkInterpreter appendOutput in onUpdate
protected InterpreterResultMessageOutput updatedOutput;
@Override
public void setUp() throws InterpreterException {
Properties properties = new Properties();
@ -52,28 +67,33 @@ public class PyFlinkInterpreterTest extends PythonInterpreterTest {
// create interpreter group
intpGroup = new InterpreterGroup();
intpGroup.put("note", new LinkedList<Interpreter>());
intpGroup.put("session_1", new LinkedList<>());
InterpreterContext context = InterpreterContext.builder()
.setInterpreterOut(new InterpreterOutput(null))
.setIntpEventClient(mockRemoteEventClient)
.build();
InterpreterContext.set(context);
LazyOpenInterpreter flinkInterpreter =
new LazyOpenInterpreter(new FlinkInterpreter(properties));
intpGroup.get("note").add(flinkInterpreter);
flinkInterpreter.setInterpreterGroup(intpGroup);
flinkScalaInterpreter = new LazyOpenInterpreter(new FlinkInterpreter(properties));
intpGroup.get("session_1").add(flinkScalaInterpreter);
flinkScalaInterpreter.setInterpreterGroup(intpGroup);
LazyOpenInterpreter iPyFlinkInterpreter =
new LazyOpenInterpreter(new IPyFlinkInterpreter(properties));
intpGroup.get("note").add(iPyFlinkInterpreter);
intpGroup.get("session_1").add(iPyFlinkInterpreter);
iPyFlinkInterpreter.setInterpreterGroup(intpGroup);
interpreter = new LazyOpenInterpreter(new PyFlinkInterpreter(properties));
intpGroup.get("note").add(interpreter);
intpGroup.get("session_1").add(interpreter);
interpreter.setInterpreterGroup(intpGroup);
streamSqlInterpreter = new LazyOpenInterpreter(new FlinkStreamSqlInterpreter(properties));
batchSqlInterpreter = new LazyOpenInterpreter(new FlinkBatchSqlInterpreter(properties));
intpGroup.get("session_1").add(streamSqlInterpreter);
intpGroup.get("session_1").add(batchSqlInterpreter);
streamSqlInterpreter.setInterpreterGroup(intpGroup);
batchSqlInterpreter.setInterpreterGroup(intpGroup);
interpreter.open();
}
@ -85,19 +105,59 @@ public class PyFlinkInterpreterTest extends PythonInterpreterTest {
}
@Test
public void testPyFlink() throws InterpreterException {
IPyFlinkInterpreterTest.testBatchPyFlink(interpreter);
IPyFlinkInterpreterTest.testStreamPyFlink(interpreter);
public void testBatchPyFlink() throws InterpreterException, IOException {
IPyFlinkInterpreterTest.testBatchPyFlink(interpreter, flinkScalaInterpreter);
}
private static InterpreterContext createInterpreterContext(
RemoteInterpreterEventClient mockRemoteEventClient) {
return InterpreterContext.builder()
.setNoteId("noteId")
.setParagraphId("paragraphId")
.setIntpEventClient(mockRemoteEventClient)
@Test
public void testStreamIPyFlink() throws InterpreterException, IOException {
IPyFlinkInterpreterTest.testStreamPyFlink(interpreter, flinkScalaInterpreter);
}
@Test
public void testSingleStreamTableApi() throws InterpreterException, IOException {
IPyFlinkInterpreterTest.testSingleStreamTableApi(interpreter, flinkScalaInterpreter);
}
@Test
public void testUpdateStreamTableApi() throws InterpreterException, IOException {
IPyFlinkInterpreterTest.testUpdateStreamTableApi(interpreter, flinkScalaInterpreter);
}
@Test
public void testAppendStreamTableApi() throws InterpreterException, IOException {
IPyFlinkInterpreterTest.testAppendStreamTableApi(interpreter, flinkScalaInterpreter);
}
protected InterpreterContext getInterpreterContext() {
appendOutput = "";
InterpreterContext context = InterpreterContext.builder()
.setInterpreterOut(new InterpreterOutput(null))
.setAngularObjectRegistry(new AngularObjectRegistry("flink", null))
.setIntpEventClient(mockRemoteEventClient)
.build();
}
context.out = new InterpreterOutput(
new InterpreterOutputListener() {
@Override
public void onUpdateAll(InterpreterOutput out) {
System.out.println();
}
@Override
public void onAppend(int index, InterpreterResultMessageOutput out, byte[] line) {
try {
appendOutputType = out.toInterpreterResultMessage().getType();
appendOutput = out.toInterpreterResultMessage().getData();
} catch (IOException e) {
e.printStackTrace();
}
}
@Override
public void onUpdate(int index, InterpreterResultMessageOutput out) {
updatedOutput = out;
}
});
return context;
}
}

View file

@ -0,0 +1,472 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.flink;
import com.google.common.io.Files;
import com.klarna.hiverunner.HiveShell;
import com.klarna.hiverunner.annotations.HiveSQL;
import org.apache.commons.io.IOUtils;
import org.apache.flink.connectors.hive.FlinkStandaloneHiveRunner;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.orc.OrcFile;
import org.apache.orc.TypeDescription;
import org.apache.orc.Writer;
import org.apache.parquet.column.ParquetProperties;
import org.apache.parquet.example.data.Group;
import org.apache.parquet.example.data.simple.SimpleGroupFactory;
import org.apache.parquet.hadoop.ParquetWriter;
import org.apache.parquet.hadoop.example.GroupWriteSupport;
import org.apache.parquet.hadoop.metadata.CompressionCodecName;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.MessageTypeParser;
import org.apache.zeppelin.display.AngularObjectRegistry;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterException;
import org.apache.zeppelin.interpreter.InterpreterGroup;
import org.apache.zeppelin.interpreter.InterpreterOutput;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.InterpreterResultMessage;
import org.apache.zeppelin.interpreter.remote.RemoteInterpreterEventClient;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.List;
import java.util.Properties;
import static org.apache.zeppelin.interpreter.InterpreterResult.Code;
import static org.apache.zeppelin.interpreter.InterpreterResult.Type;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.mockito.Mockito.mock;
@RunWith(FlinkStandaloneHiveRunner.class)
public abstract class SqlInterpreterTest {
private static final Logger LOGGER = LoggerFactory.getLogger(SqlInterpreterTest.class);
protected static final String[][] INPUT_DATA = {
{"1", "1.1", "hello world", "true"},
{"2", "2.3", "hello flink", "true"},
{"3", "3.2", "hello hadoop", "false"},
};
protected FlinkInterpreter flinkInterpreter;
protected IPyFlinkInterpreter iPyFlinkInterpreter;
protected PyFlinkInterpreter pyFlinkInterpreter;
protected FlinkSqlInterrpeter sqlInterpreter;
@HiveSQL(files = {})
protected static HiveShell hiveShell;
protected Properties getFlinkProperties() throws IOException {
Properties p = new Properties();
p.setProperty("zeppelin.flink.enableHive", "true");
p.setProperty("taskmanager.managed.memory.size", "32");
p.setProperty("zeppelin.flink.hive.version", "2.3.4");
p.setProperty("zeppelin.pyflink.useIPython", "false");
File hiveConfDir = Files.createTempDir();
hiveShell.getHiveConf().writeXml(new FileWriter(new File(hiveConfDir, "hive-site.xml")));
p.setProperty("HIVE_CONF_DIR", hiveConfDir.getAbsolutePath());
return p;
}
@Before
public void setUp() throws InterpreterException, IOException {
Properties p = getFlinkProperties();
flinkInterpreter = new FlinkInterpreter(p);
iPyFlinkInterpreter = new IPyFlinkInterpreter(p);
pyFlinkInterpreter = new PyFlinkInterpreter(p);
sqlInterpreter = createFlinkSqlInterpreter(p);
InterpreterGroup intpGroup = new InterpreterGroup();
flinkInterpreter.setInterpreterGroup(intpGroup);
sqlInterpreter.setInterpreterGroup(intpGroup);
iPyFlinkInterpreter.setInterpreterGroup(intpGroup);
pyFlinkInterpreter.setInterpreterGroup(intpGroup);
intpGroup.addInterpreterToSession(flinkInterpreter, "session_1");
intpGroup.addInterpreterToSession(sqlInterpreter, "session_1");
intpGroup.addInterpreterToSession(iPyFlinkInterpreter, "session_1");
intpGroup.addInterpreterToSession(pyFlinkInterpreter, "session_1");
InterpreterContext.set(getInterpreterContext());
flinkInterpreter.open();
sqlInterpreter.open();
iPyFlinkInterpreter.open();
pyFlinkInterpreter.open();
hiveShell.execute("drop database if exists test_db CASCADE");
hiveShell.execute("create database test_db");
hiveShell.execute("use test_db");
InterpreterResult result = sqlInterpreter.interpret("use test_db",
getInterpreterContext());
assertEquals(Code.SUCCESS, result.code());
}
@After
public void tearDown() throws InterpreterException {
if (flinkInterpreter != null) {
flinkInterpreter.close();
}
if (sqlInterpreter != null) {
sqlInterpreter.close();
}
if (iPyFlinkInterpreter != null) {
iPyFlinkInterpreter.close();
}
if (pyFlinkInterpreter != null) {
pyFlinkInterpreter.close();
}
}
protected abstract FlinkSqlInterrpeter createFlinkSqlInterpreter(Properties properties);
@Test
public void testDatabases() throws InterpreterException, IOException {
// show databases
InterpreterContext context = getInterpreterContext();
InterpreterResult result = sqlInterpreter.interpret("show databases", context);
assertEquals(Code.SUCCESS, result.code());
List<InterpreterResultMessage> resultMessages = context.out.toInterpreterResultMessage();
assertEquals(1, resultMessages.size());
assertEquals(Type.TABLE, resultMessages.get(0).getType());
assertEquals("database\ndefault\ntest_db\n", resultMessages.get(0).getData());
// create database
context = getInterpreterContext();
result = sqlInterpreter.interpret("create database db1", context);
assertEquals(Code.SUCCESS, result.code());
resultMessages = context.out.toInterpreterResultMessage();
assertEquals(Type.TEXT, resultMessages.get(0).getType());
assertEquals("Database has been created.\n", resultMessages.get(0).getData());
// use database
context = getInterpreterContext();
result = sqlInterpreter.interpret("use db1", context);
assertEquals(Code.SUCCESS, result.code());
context = getInterpreterContext();
result = sqlInterpreter.interpret("show tables", context);
assertEquals(Code.SUCCESS, result.code());
resultMessages = context.out.toInterpreterResultMessage();
assertEquals(Type.TABLE, resultMessages.get(0).getType());
assertEquals("table\n", resultMessages.get(0).getData());
context = getInterpreterContext();
result = sqlInterpreter.interpret("CREATE TABLE source (msg INT)", context);
assertEquals(Code.SUCCESS, result.code());
context = getInterpreterContext();
result = sqlInterpreter.interpret("show tables", context);
assertEquals(Code.SUCCESS, result.code());
resultMessages = context.out.toInterpreterResultMessage();
assertEquals(Type.TABLE, resultMessages.get(0).getType());
assertEquals("table\nsource\n", resultMessages.get(0).getData());
context = getInterpreterContext();
result = sqlInterpreter.interpret("use default", context);
assertEquals(Code.SUCCESS, result.code());
context = getInterpreterContext();
result = sqlInterpreter.interpret("show tables", context);
assertEquals(Code.SUCCESS, result.code());
resultMessages = context.out.toInterpreterResultMessage();
assertEquals(Type.TABLE, resultMessages.get(0).getType());
assertEquals("table\n", resultMessages.get(0).getData());
// fail to drop database if there's tables under this database
context = getInterpreterContext();
result = sqlInterpreter.interpret("drop database db1", context);
assertEquals(Code.ERROR, result.code());
resultMessages = context.out.toInterpreterResultMessage();
assertTrue(resultMessages.get(0).getData(),
resultMessages.get(0).getData().contains("is not empty"));
// drop table first then drop db
result = sqlInterpreter.interpret("drop table db1.source",
getInterpreterContext());
assertEquals(Code.SUCCESS, result.code());
result = sqlInterpreter.interpret("drop database db1",
getInterpreterContext());
assertEquals(Code.SUCCESS, result.code());
// verify database is dropped
context = getInterpreterContext();
result = sqlInterpreter.interpret("show databases", context);
assertEquals(Code.SUCCESS, result.code());
resultMessages = context.out.toInterpreterResultMessage();
assertEquals(Type.TABLE, resultMessages.get(0).getType());
assertEquals("database\ndefault\ntest_db\n", resultMessages.get(0).getData());
}
@Test
public void testTable() throws InterpreterException, IOException {
// create table
InterpreterContext context = getInterpreterContext();
InterpreterResult result = sqlInterpreter.interpret(
"CREATE TABLE source_table (int_col INT, double_col double, " +
"varchar_col varchar, bool_col boolean)",
context);
assertEquals(Code.SUCCESS, result.code());
List<InterpreterResultMessage> resultMessages = context.out.toInterpreterResultMessage();
assertEquals(1, resultMessages.size());
assertEquals(Type.TEXT, resultMessages.get(0).getType());
assertEquals("Table has been created.\n", resultMessages.get(0).getData());
// describe table
context = getInterpreterContext();
result = sqlInterpreter.interpret("describe source_table", context);
assertEquals(Code.SUCCESS, result.code());
assertEquals(1, resultMessages.size());
resultMessages = context.out.toInterpreterResultMessage();
assertEquals(Type.TABLE, resultMessages.get(0).getType());
assertEquals("Column\tType\n" +
"int_col\tINT\n" +
"double_col\tDOUBLE\n" +
"varchar_col\tSTRING\n" +
"bool_col\tBOOLEAN\n"
, resultMessages.get(0).getData());
// describe unknown table
context = getInterpreterContext();
result = sqlInterpreter.interpret("describe unknown_table", context);
assertEquals(Code.ERROR, result.code());
resultMessages = context.out.toInterpreterResultMessage();
assertEquals(1, resultMessages.size());
assertTrue(resultMessages.toString(),
resultMessages.get(0).getData().contains("Table `unknown_table` was not found."));
// drop unknown table
context = getInterpreterContext();
result = sqlInterpreter.interpret("drop table unknown_table", context);
resultMessages = context.out.toInterpreterResultMessage();
assertEquals(Code.ERROR, result.code());
assertEquals(1, resultMessages.size());
assertTrue(resultMessages.toString(),
resultMessages.get(0).getData().contains("does not exist in"));
// drop table
context = getInterpreterContext();
result = sqlInterpreter.interpret("drop table source_table", context);
assertEquals(Code.SUCCESS, result.code());
resultMessages = context.out.toInterpreterResultMessage();
assertEquals(1, resultMessages.size());
assertEquals("Table has been dropped.\n", resultMessages.get(0).getData());
// describe the dropped table
context = getInterpreterContext();
result = sqlInterpreter.interpret("describe source_table", context);
assertEquals(Code.ERROR, result.code());
resultMessages = context.out.toInterpreterResultMessage();
assertEquals(1, resultMessages.size());
assertTrue(resultMessages.get(0).getData(),
resultMessages.get(0).getData().contains("Table `source_table` was not found"));
}
@Test
public void testView() throws InterpreterException, IOException {
// create table
InterpreterContext context = getInterpreterContext();
InterpreterResult result = sqlInterpreter.interpret(
"CREATE TABLE source_table (int_col INT, double_col double, " +
"varchar_col varchar, bool_col boolean)" +
" WITH (\n" +
"'format.field-delimiter'='\\n',\n" +
"'connector.type'='filesystem',\n" +
"'format.derive-schema'='true',\n" +
"'connector.path'='hdfs:///tmp/bank.csv',\n" +
"'format.type'='csv'\n" +
");",
context);
assertEquals(Code.SUCCESS, result.code());
List<InterpreterResultMessage> resultMessages = context.out.toInterpreterResultMessage();
assertEquals(1, resultMessages.size());
assertEquals(Type.TEXT, resultMessages.get(0).getType());
assertEquals("Table has been created.\n", resultMessages.get(0).getData());
// create view
context = getInterpreterContext();
result = sqlInterpreter.interpret("create view my_view as select int_col from source_table", context);
assertEquals(Code.SUCCESS, result.code());
resultMessages = context.out.toInterpreterResultMessage();
assertEquals(1, resultMessages.size());
assertEquals(Type.TEXT, resultMessages.get(0).getType());
assertEquals("View has been created.\n", resultMessages.get(0).getData());
// create same view again
context = getInterpreterContext();
result = sqlInterpreter.interpret("create view my_view as select int_col from source_table", context);
assertEquals(Code.ERROR, result.code());
resultMessages = context.out.toInterpreterResultMessage();
assertEquals(1, resultMessages.size());
assertEquals(Type.TEXT, resultMessages.get(0).getType());
assertTrue(resultMessages.get(0).getData(), resultMessages.get(0).getData().contains("already exists"));
// show tables
context = getInterpreterContext();
result = sqlInterpreter.interpret("show tables", context);
assertEquals(Code.SUCCESS, result.code());
resultMessages = context.out.toInterpreterResultMessage();
assertEquals(Type.TABLE, resultMessages.get(0).getType());
assertEquals("table\nmy_view\nsource_table\n", resultMessages.get(0).getData());
// drop table
context = getInterpreterContext();
result = sqlInterpreter.interpret("drop view my_view", context);
assertEquals(Code.SUCCESS, result.code());
resultMessages = context.out.toInterpreterResultMessage();
assertEquals(1, resultMessages.size());
assertEquals("View has been dropped.\n", resultMessages.get(0).getData());
}
@Test
public void testInvalidSql() throws InterpreterException, IOException {
InterpreterContext context = getInterpreterContext();
InterpreterResult result = sqlInterpreter.interpret("Invalid sql", context);
assertEquals(Code.ERROR, result.code());
List<InterpreterResultMessage> resultMessages = context.out.toInterpreterResultMessage();
assertEquals(1, resultMessages.size());
assertEquals(Type.TEXT, resultMessages.get(0).getType());
assertTrue(resultMessages.get(0).getData(),
resultMessages.get(0).getData().contains("Invalid Sql statement: Invalid sql"));
assertTrue(resultMessages.get(0).getData(),
resultMessages.get(0).getData().contains("The following commands are available"));
}
protected InterpreterContext getInterpreterContext() {
return InterpreterContext.builder()
.setParagraphId("paragraphId")
.setInterpreterOut(new InterpreterOutput(null))
.setAngularObjectRegistry(new AngularObjectRegistry("flink", null))
.setIntpEventClient(mock(RemoteInterpreterEventClient.class))
.setInterpreterOut(new InterpreterOutput(null))
.build();
}
public static File createInputFile(String data) throws IOException {
File file = File.createTempFile("zeppelin-flink-input", ".csv");
FileOutputStream out = null;
try {
out = new FileOutputStream(file);
IOUtils.write(data, out);
} finally {
if (out != null) {
out.close();
}
}
return file;
}
public static File createInputFile(String[][] data) throws IOException {
File file = File.createTempFile("zeppelin-flink-input", ".csv");
PrintWriter writer = null;
try {
writer = new PrintWriter(new FileOutputStream(file));
// int
int rowCount = data.length;
int colCount = data[0].length;
for (int i = 0; i < rowCount; ++i) {
for (int j = 0; j < colCount; ++j) {
writer.print(data[i][j]);
if (j != colCount - 1) {
writer.print(",");
}
}
// TODO(zjffdu) This is a bug of CSV Sink of Flink, it always put
// line separator at the end
// which is not necessary.
writer.print("\n");
}
} finally {
if (writer != null) {
writer.close();
}
}
return file;
}
public File createORCFile(int[] values) throws IOException {
File file = File.createTempFile("zeppelin-flink-input", ".orc");
file.delete();
Path path = new Path(file.getAbsolutePath());
Configuration conf = new Configuration();
conf.set("orc.compress", "snappy");
TypeDescription schema = TypeDescription.fromString("struct<msg:int>");
Writer writer = OrcFile.createWriter(path,
OrcFile.writerOptions(conf)
.setSchema(schema));
VectorizedRowBatch batch = schema.createRowBatch();
LongColumnVector x = (LongColumnVector) batch.cols[0];
for (int i = 0; i < values.length; ++i) {
int row = batch.size++;
x.vector[row] = values[i];
// If the batch is full, write it out and start over.
if (batch.size == batch.getMaxSize()) {
writer.addRowBatch(batch);
batch.reset();
}
}
if (batch.size != 0) {
writer.addRowBatch(batch);
batch.reset();
}
writer.close();
return file;
}
public File createParquetFile(int[] values,
ParquetProperties.WriterVersion version) throws IOException {
File file = File.createTempFile("zeppelin-flink-input", ".par");
file.delete();
Path path = new Path(file.getAbsolutePath());
Configuration conf = new Configuration();
MessageType schema = MessageTypeParser.parseMessageType(
"message test { "
+ "required int32 int32_field; "
+ "} ");
GroupWriteSupport.setSchema(schema, conf);
SimpleGroupFactory f = new SimpleGroupFactory(schema);
ParquetWriter<Group> writer = new ParquetWriter<Group>(
path,
new GroupWriteSupport(),
CompressionCodecName.UNCOMPRESSED, 1024, 1024, 512, true, false, version, conf);
for (int i = 0; i < values.length; i++) {
writer.write(f.newGroup()
.append("int32_field", values[i]));
}
writer.close();
return file;
}
}

View file

@ -14,7 +14,7 @@ val data = senv.addSource(new SourceFunction[(Long, String)] with ListCheckpoint
var count: Long = 0
// startTime is 2018/1/1
var startTime: Long = new java.util.Date(2018 - 1900,0,1).getTime
var sleepInterval = 1000
var sleepInterval = 100
override def run(ctx: SourceFunction.SourceContext[(Long, String)]): Unit = {
val lock = ctx.getCheckpointLock

View file

@ -21,3 +21,6 @@ log4j.appender.stdout = org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout = org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%5p [%d] ({%t} %F[%M]:%L) - %m%n
log4j.logger.org.apache.hive=WARN
log4j.logger.org.apache.flink=WARN

View file

@ -19,9 +19,9 @@ name = HiveLog4j2
packages = org.apache.hadoop.hive.ql.log
# list of properties
property.hive.log.level = INFO
property.hive.log.level = WARN
property.hive.root.logger = console
property.hive.perflogger.log.level = INFO
property.hive.perflogger.log.level = WARN
# list of all appenders
appenders = console
@ -52,13 +52,13 @@ logger.JPOX.name = JPOX
logger.JPOX.level = ERROR
logger.flink.name = org.apache.zeppelin.flink
logger.flink.level = DEBUG
logger.flink.level = INFO
logger.PerfLogger.name = org.apache.hadoop.hive.ql.log.PerfLogger
logger.PerfLogger.level = ${sys:hive.perflogger.log.level}
logger.PerfLogger.level = WARN
# root logger
rootLogger.level = ${sys:hive.log.level}
rootLogger.appenderRefs = root
rootLogger.appenderRef.root.ref = ${sys:hive.root.logger}
rootLogger.appenderRef.root.ref = WARN

View file

@ -14,7 +14,7 @@
*/
package org.apache.zeppelin.geode;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.geode.cache.client.ClientCache;
import org.apache.geode.cache.client.ClientCacheFactory;
import org.apache.geode.cache.query.QueryService;

View file

@ -18,7 +18,6 @@ package org.apache.zeppelin.groovy;
import groovy.lang.Closure;
import groovy.xml.MarkupBuilder;
import org.apache.thrift.TException;
import org.apache.zeppelin.annotation.ZeppelinApi;
import org.apache.zeppelin.display.AngularObject;
import org.apache.zeppelin.display.AngularObjectRegistry;
@ -240,7 +239,7 @@ public class GObject extends groovy.lang.GroovyObjectSupport {
}
@SuppressWarnings("unchecked")
public void angularBind(String name, Object o, String noteId) throws TException {
public void angularBind(String name, Object o, String noteId) {
z.angularBind(name, o, noteId);
}
@ -251,7 +250,7 @@ public class GObject extends groovy.lang.GroovyObjectSupport {
* @param name name of the variable
* @param o value
*/
public void angularBind(String name, Object o) throws TException {
public void angularBind(String name, Object o) {
angularBind(name, o, interpreterContext.getNoteId());
}

View file

@ -17,7 +17,7 @@
package org.apache.zeppelin.groovy;
import org.apache.zeppelin.interpreter.BaseZeppelinContext;
import org.apache.zeppelin.interpreter.ZeppelinContext;
import org.apache.zeppelin.interpreter.InterpreterHookRegistry;
import java.util.List;
@ -26,7 +26,7 @@ import java.util.Map;
/**
* ZeppelinContext for Groovy
*/
public class GroovyZeppelinContext extends BaseZeppelinContext {
public class GroovyZeppelinContext extends ZeppelinContext {
public GroovyZeppelinContext(InterpreterHookRegistry hooks, int maxResult) {
super(hooks, maxResult);

View file

@ -18,7 +18,7 @@
package org.apache.zeppelin.helium;
import org.apache.zeppelin.interpreter.BaseZeppelinContext;
import org.apache.zeppelin.interpreter.ZeppelinContext;
import org.apache.zeppelin.interpreter.InterpreterHookRegistry;
import java.util.List;
@ -27,7 +27,7 @@ import java.util.Map;
/**
* ZeppelinContext for DevInterpreter
*/
public class DevZeppelinContext extends BaseZeppelinContext {
public class DevZeppelinContext extends ZeppelinContext {
public DevZeppelinContext(InterpreterHookRegistry hooks, int maxResult) {
super(hooks, maxResult);
}

View file

@ -92,7 +92,6 @@
<plugin>
<groupId>org.scala-tools</groupId>
<artifactId>maven-scala-plugin</artifactId>
<version>${plugin.scala.version}</version>
<configuration>
<scalaVersion>${ignite.scala.version}</scalaVersion>
</configuration>

View file

@ -235,7 +235,6 @@
<hadoop.common.version>2.7.2</hadoop.common.version>
<h2.version>1.4.190</h2.version>
<commons.dbcp2.version>2.0.1</commons.dbcp2.version>
<commons-lang3.version>3.7</commons-lang3.version>
<!--test library versions-->
<mockrunner.jdbc.version>1.0.8</mockrunner.jdbc.version>
@ -259,7 +258,6 @@
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>${commons-lang3.version}</version>
</dependency>
<dependency>

View file

@ -32,8 +32,9 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.alias.CredentialProvider;
import org.apache.hadoop.security.alias.CredentialProviderFactory;
import org.apache.zeppelin.interpreter.BaseZeppelinContext;
import org.apache.zeppelin.interpreter.ZeppelinContext;
import org.apache.zeppelin.interpreter.util.SqlSplitter;
import org.apache.zeppelin.tabledata.TableDataUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -140,7 +141,16 @@ public class JDBCInterpreter extends KerberosInterpreter {
private static final String DBCP_STRING = "jdbc:apache:commons:dbcp:";
private static final String MAX_ROWS_KEY = "zeppelin.jdbc.maxRows";
private final HashMap<String, Properties> basePropretiesMap;
private static final Set<String> PRESTO_PROPERTIES = new HashSet<>(Arrays.asList(
"user", "password",
"socksProxy", "httpProxy", "clientTags", "applicationNamePrefix", "accessToken",
"SSL", "SSLKeyStorePath", "SSLKeyStorePassword", "SSLTrustStorePath",
"SSLTrustStorePassword", "KerberosRemoteServiceName", "KerberosPrincipal",
"KerberosUseCanonicalHostname", "KerberosServicePrincipalPattern",
"KerberosConfigPath", "KerberosKeytabPath", "KerberosCredentialCachePath",
"extraCredentials", "roles", "sessionProperties"));
private final HashMap<String, Properties> basePropertiesMap;
private final HashMap<String, JDBCUserConfigurations> jdbcUserConfigurationsMap;
private final HashMap<String, SqlCompleter> sqlCompletersMap;
@ -152,13 +162,13 @@ public class JDBCInterpreter extends KerberosInterpreter {
public JDBCInterpreter(Properties property) {
super(property);
jdbcUserConfigurationsMap = new HashMap<>();
basePropretiesMap = new HashMap<>();
basePropertiesMap = new HashMap<>();
sqlCompletersMap = new HashMap<>();
maxLineResults = MAX_LINE_DEFAULT;
}
@Override
public BaseZeppelinContext getZeppelinContext() {
public ZeppelinContext getZeppelinContext() {
return null;
}
@ -179,7 +189,7 @@ public class JDBCInterpreter extends KerberosInterpreter {
}
public HashMap<String, Properties> getPropertiesMap() {
return basePropretiesMap;
return basePropertiesMap;
}
@Override
@ -192,20 +202,20 @@ public class JDBCInterpreter extends KerberosInterpreter {
logger.debug("key: {}, value: {}", keyValue[0], keyValue[1]);
Properties prefixProperties;
if (basePropretiesMap.containsKey(keyValue[0])) {
prefixProperties = basePropretiesMap.get(keyValue[0]);
if (basePropertiesMap.containsKey(keyValue[0])) {
prefixProperties = basePropertiesMap.get(keyValue[0]);
} else {
prefixProperties = new Properties();
basePropretiesMap.put(keyValue[0].trim(), prefixProperties);
basePropertiesMap.put(keyValue[0].trim(), prefixProperties);
}
prefixProperties.put(keyValue[1].trim(), getProperty(propertyKey));
}
}
Set<String> removeKeySet = new HashSet<>();
for (String key : basePropretiesMap.keySet()) {
for (String key : basePropertiesMap.keySet()) {
if (!COMMON_KEY.equals(key)) {
Properties properties = basePropretiesMap.get(key);
Properties properties = basePropertiesMap.get(key);
if (!properties.containsKey(DRIVER_KEY) || !properties.containsKey(URL_KEY)) {
logger.error("{} will be ignored. {}.{} and {}.{} is mandatory.",
key, DRIVER_KEY, key, key, URL_KEY);
@ -215,9 +225,9 @@ public class JDBCInterpreter extends KerberosInterpreter {
}
for (String key : removeKeySet) {
basePropretiesMap.remove(key);
basePropertiesMap.remove(key);
}
logger.debug("JDBC PropretiesMap: {}", basePropretiesMap);
logger.debug("JDBC PropretiesMap: {}", basePropertiesMap);
setMaxLineResults();
setMaxRows();
@ -237,9 +247,9 @@ public class JDBCInterpreter extends KerberosInterpreter {
}
private void setMaxLineResults() {
if (basePropretiesMap.containsKey(COMMON_KEY) &&
basePropretiesMap.get(COMMON_KEY).containsKey(MAX_LINE_KEY)) {
maxLineResults = Integer.valueOf(basePropretiesMap.get(COMMON_KEY).getProperty(MAX_LINE_KEY));
if (basePropertiesMap.containsKey(COMMON_KEY) &&
basePropertiesMap.get(COMMON_KEY).containsKey(MAX_LINE_KEY)) {
maxLineResults = Integer.valueOf(basePropertiesMap.get(COMMON_KEY).getProperty(MAX_LINE_KEY));
}
}
@ -345,9 +355,9 @@ public class JDBCInterpreter extends KerberosInterpreter {
}
private boolean existAccountInBaseProperty(String propertyKey) {
return basePropretiesMap.get(propertyKey).containsKey(USER_KEY) &&
!isEmpty((String) basePropretiesMap.get(propertyKey).get(USER_KEY)) &&
basePropretiesMap.get(propertyKey).containsKey(PASSWORD_KEY);
return basePropertiesMap.get(propertyKey).containsKey(USER_KEY) &&
!isEmpty((String) basePropertiesMap.get(propertyKey).get(USER_KEY)) &&
basePropertiesMap.get(propertyKey).containsKey(PASSWORD_KEY);
}
private UsernamePassword getUsernamePassword(InterpreterContext interpreterContext,
@ -383,14 +393,14 @@ public class JDBCInterpreter extends KerberosInterpreter {
String user = interpreterContext.getAuthenticationInfo().getUser();
JDBCUserConfigurations jdbcUserConfigurations = getJDBCConfiguration(user);
if (basePropretiesMap.get(propertyKey).containsKey(USER_KEY) &&
!basePropretiesMap.get(propertyKey).getProperty(USER_KEY).isEmpty()) {
String password = getPassword(basePropretiesMap.get(propertyKey));
if (basePropertiesMap.get(propertyKey).containsKey(USER_KEY) &&
!basePropertiesMap.get(propertyKey).getProperty(USER_KEY).isEmpty()) {
String password = getPassword(basePropertiesMap.get(propertyKey));
if (!isEmpty(password)) {
basePropretiesMap.get(propertyKey).setProperty(PASSWORD_KEY, password);
basePropertiesMap.get(propertyKey).setProperty(PASSWORD_KEY, password);
}
}
jdbcUserConfigurations.setPropertyMap(propertyKey, basePropretiesMap.get(propertyKey));
jdbcUserConfigurations.setPropertyMap(propertyKey, basePropertiesMap.get(propertyKey));
if (existAccountInBaseProperty(propertyKey)) {
return;
}
@ -406,7 +416,19 @@ public class JDBCInterpreter extends KerberosInterpreter {
}
private void createConnectionPool(String url, String user, String propertyKey,
Properties properties) throws SQLException, ClassNotFoundException {
Properties properties) throws SQLException, ClassNotFoundException, IOException {
String driverClass = properties.getProperty(DRIVER_KEY);
if (driverClass != null && (driverClass.equals("com.facebook.presto.jdbc.PrestoDriver")
|| driverClass.equals("io.prestosql.jdbc.PrestoDriver"))) {
// Only add valid properties otherwise presto won't work.
for (String key : properties.stringPropertyNames()) {
if (!PRESTO_PROPERTIES.contains(key)) {
properties.remove(key);
}
}
}
ConnectionFactory connectionFactory =
new DriverManagerConnectionFactory(url, properties);
@ -419,14 +441,14 @@ public class JDBCInterpreter extends KerberosInterpreter {
ObjectPool connectionPool = new GenericObjectPool(poolableConnectionFactory);
poolableConnectionFactory.setPool(connectionPool);
Class.forName(properties.getProperty(DRIVER_KEY));
Class.forName(driverClass);
PoolingDriver driver = new PoolingDriver();
driver.registerPool(propertyKey + user, connectionPool);
getJDBCConfiguration(user).saveDBDriverPool(propertyKey, driver);
}
private Connection getConnectionFromPool(String url, String user, String propertyKey,
Properties properties) throws SQLException, ClassNotFoundException {
Properties properties) throws SQLException, ClassNotFoundException, IOException {
String jdbcDriver = getJDBCDriverName(user, propertyKey);
if (!getJDBCConfiguration(user).isConnectionInDBDriverPool(propertyKey)) {
@ -439,7 +461,7 @@ public class JDBCInterpreter extends KerberosInterpreter {
throws ClassNotFoundException, SQLException, InterpreterException, IOException {
final String user = interpreterContext.getAuthenticationInfo().getUser();
Connection connection;
if (propertyKey == null || basePropretiesMap.get(propertyKey) == null) {
if (propertyKey == null || basePropertiesMap.get(propertyKey) == null) {
return null;
}
@ -464,7 +486,7 @@ public class JDBCInterpreter extends KerberosInterpreter {
getProperty("zeppelin.jdbc.auth.kerberos.proxy.enable"))) {
connection = getConnectionFromPool(connectionUrl, user, propertyKey, properties);
} else {
if (basePropretiesMap.get(propertyKey).containsKey("proxy.user.property")) {
if (basePropertiesMap.get(propertyKey).containsKey("proxy.user.property")) {
connection = getConnectionFromPool(connectionUrl, user, propertyKey, properties);
} else {
UserGroupInformation ugi = null;
@ -504,7 +526,7 @@ public class JDBCInterpreter extends KerberosInterpreter {
StringBuilder connectionUrl = new StringBuilder(url);
if (user != null && !user.equals("anonymous") &&
basePropretiesMap.get(propertyKey).containsKey("proxy.user.property")) {
basePropertiesMap.get(propertyKey).containsKey("proxy.user.property")) {
Integer lastIndexOfUrl = connectionUrl.indexOf("?");
if (lastIndexOfUrl == -1) {
@ -512,9 +534,9 @@ public class JDBCInterpreter extends KerberosInterpreter {
}
logger.info("Using proxy user as :" + user);
logger.info("Using proxy property for user as :" +
basePropretiesMap.get(propertyKey).getProperty("proxy.user.property"));
basePropertiesMap.get(propertyKey).getProperty("proxy.user.property"));
connectionUrl.insert(lastIndexOfUrl, ";" +
basePropretiesMap.get(propertyKey).getProperty("proxy.user.property") + "=" + user + ";");
basePropertiesMap.get(propertyKey).getProperty("proxy.user.property") + "=" + user + ";");
} else if (user != null && !user.equals("anonymous") && url.contains("hive")) {
logger.warn("User impersonation for hive has changed please refer: http://zeppelin.apache" +
".org/docs/latest/interpreter/jdbc.html#apache-hive");
@ -566,9 +588,11 @@ public class JDBCInterpreter extends KerberosInterpreter {
msg.append(TAB);
}
if (StringUtils.isNotEmpty(md.getColumnLabel(i))) {
msg.append(removeTablePrefix(replaceReservedChars(md.getColumnLabel(i))));
msg.append(removeTablePrefix(replaceReservedChars(
TableDataUtils.normalizeColumn(md.getColumnLabel(i)))));
} else {
msg.append(removeTablePrefix(replaceReservedChars(md.getColumnName(i))));
msg.append(removeTablePrefix(replaceReservedChars(
TableDataUtils.normalizeColumn(md.getColumnName(i)))));
}
}
msg.append(NEWLINE);
@ -588,7 +612,7 @@ public class JDBCInterpreter extends KerberosInterpreter {
} else {
resultValue = resultSet.getString(i);
}
msg.append(replaceReservedChars(resultValue));
msg.append(replaceReservedChars(TableDataUtils.normalizeColumn(resultValue)));
if (i != md.getColumnCount()) {
msg.append(TAB);
}
@ -605,7 +629,7 @@ public class JDBCInterpreter extends KerberosInterpreter {
public InterpreterResult executePrecode(InterpreterContext interpreterContext) {
InterpreterResult interpreterResult = null;
for (String propertyKey : basePropretiesMap.keySet()) {
for (String propertyKey : basePropertiesMap.keySet()) {
String precode = getProperty(String.format("%s.precode", propertyKey));
if (StringUtils.isNotBlank(precode)) {
interpreterResult = executeSql(propertyKey, precode, interpreterContext);

View file

@ -21,7 +21,7 @@ import static org.junit.Assert.assertTrue;
import com.google.common.base.Joiner;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang3.StringUtils;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;

View file

@ -28,7 +28,7 @@ data:
# Default value is 'local.zeppelin-project.org' while it points 127.0.0.1 and `kubectl port-forward zeppelin-server` will give localhost to connects.
# If you have your ingress controller configured to connect to `zeppelin-server` service and have a domain name for it (with wildcard subdomain point the same address), you can replace serviceDomain field with your own domain.
serviceDomain: local.zeppelin-project.org:8080
sparkContainerImage: spark:2.4.0
sparkContainerImage: spark:2.4.5
nginx.conf: |
daemon off;
worker_processes auto;
@ -118,8 +118,10 @@ spec:
configMapKeyRef:
name: zeppelin-server-conf
key: serviceDomain
- name: MASTER # default value of master property for spark interpreter.
- name: MASTER # default value of 'master' property for spark interpreter.
value: k8s://https://kubernetes.default.svc
- name: SPARK_HOME # default value of 'SPARK_HOME' property for spark interpreter.
value: /spark
# volumeMounts:
# - name: zeppelin-server-notebook-volume # configure this to persist notebook
# mountPath: /zeppelin/notebook

Some files were not shown because too many files have changed in this diff Show more