ZEPPELIN-3111. Refactor SparkInterpreter

This commit is contained in:
Jeff Zhang 2017-07-17 13:02:09 +08:00
parent e7e9e19cf1
commit aae4b092e8
70 changed files with 3363 additions and 1463 deletions

View file

@ -68,7 +68,7 @@ matrix:
dist: trusty
addons:
firefox: "31.0"
env: PYTHON="3" SCALA_VER="2.11" SPARK_VER="2.2.0" HADOOP_VER="2.6" PROFILE="-Pspark-2.2 -Pweb-ci -Pscalding -Phelium-dev -Pexamples -Pscala-2.11" BUILD_FLAG="package -Pbuild-distr -DskipRat" TEST_FLAG="verify -Pusing-packaged-distr -DskipRat" MODULES="-pl ${INTERPRETERS}" TEST_PROJECTS="-Dtests.to.exclude=**/ZeppelinSparkClusterTest.java,**/org.apache.zeppelin.spark.*,**/HeliumApplicationFactoryTest.java -DfailIfNoTests=false"
env: PYTHON="3" SCALA_VER="2.11" SPARK_VER="2.2.0" HADOOP_VER="2.6" PROFILE="-Pspark-2.2 -Pweb-ci -Pscalding -Phelium-dev -Pexamples -Pscala-2.11" BUILD_FLAG="package -Pbuild-distr -DskipRat" TEST_FLAG="verify -Pusing-packaged-distr -DskipRat" MODULES="-pl ${INTERPRETERS}" TEST_PROJECTS="-Dtests.to.exclude=**/ZeppelinSparkClusterTest.java,**/org/apache/zeppelin/spark/*,**/HeliumApplicationFactoryTest.java -DfailIfNoTests=false"
# Test selenium with spark module for 1.6.3
- jdk: "oraclejdk8"
@ -82,43 +82,43 @@ matrix:
dist: trusty
env: PYTHON="3" SCALA_VER="2.10" PROFILE="-Pscalding" BUILD_FLAG="install -DskipTests -DskipRat -Pr" TEST_FLAG="test -DskipRat" MODULES="-pl $(echo .,zeppelin-interpreter,${INTERPRETERS} | sed 's/!//g')" TEST_PROJECTS=""
# Test spark module for 2.2.0 with scala 2.11, livy
# Test spark module for 2.2.0 with scala 2.11
- jdk: "oraclejdk8"
dist: trusty
env: PYTHON="2" SCALA_VER="2.11" SPARK_VER="2.2.0" HADOOP_VER="2.6" PROFILE="-Pweb-ci -Pspark-2.2 -Phadoop3 -Phadoop-2.6 -Pscala-2.11" SPARKR="true" BUILD_FLAG="install -DskipTests -DskipRat" TEST_FLAG="test -DskipRat" MODULES="-pl .,zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark,python,livy" TEST_PROJECTS="-Dtest=ZeppelinSparkClusterTest,org.apache.zeppelin.spark.*,org.apache.zeppelin.livy.* -DfailIfNoTests=false"
env: PYTHON="2" SCALA_VER="2.11" SPARK_VER="2.2.0" HADOOP_VER="2.6" PROFILE="-Pweb-ci -Pspark-2.2 -Phadoop3 -Phadoop-2.6 -Pscala-2.11" SPARKR="true" BUILD_FLAG="install -DskipTests -DskipRat" TEST_FLAG="test -DskipRat" MODULES="-pl .,zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark/interpreter,spark/scala-2.10,spark/scala-2.11,spark/spark-dependencies,python" TEST_PROJECTS="-Dtest=ZeppelinSparkClusterTest,org.apache.zeppelin.spark.*,org.apache.zeppelin.livy.* -DfailIfNoTests=false"
# Test spark module for 2.1.0 with scala 2.11, livy
# Test spark module for 2.1.0 with scala 2.11
- jdk: "openjdk7"
dist: trusty
env: PYTHON="2" SCALA_VER="2.11" SPARK_VER="2.1.0" HADOOP_VER="2.6" PROFILE="-Pweb-ci -Pspark-2.1 -Phadoop2 -Phadoop-2.6 -Pscala-2.11" SPARKR="true" BUILD_FLAG="install -DskipTests -DskipRat" TEST_FLAG="test -DskipRat" MODULES="-pl .,zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark,python,livy" TEST_PROJECTS="-Dtest=ZeppelinSparkClusterTest,org.apache.zeppelin.spark.*,org.apache.zeppelin.livy.* -DfailIfNoTests=false"
env: PYTHON="2" SCALA_VER="2.11" SPARK_VER="2.1.0" HADOOP_VER="2.6" PROFILE="-Pweb-ci -Pspark-2.1 -Phadoop2 -Phadoop-2.6 -Pscala-2.11" SPARKR="true" BUILD_FLAG="install -DskipTests -DskipRat" TEST_FLAG="test -DskipRat" MODULES="-pl .,zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark/interpreter,spark/scala-2.10,spark/scala-2.11,spark/spark-dependencies,python" TEST_PROJECTS="-Dtest=ZeppelinSparkClusterTest,org.apache.zeppelin.spark.*,org.apache.zeppelin.livy.* -DfailIfNoTests=false"
# Test spark module for 2.0.2 with scala 2.11
- jdk: "oraclejdk8"
dist: trusty
env: PYTHON="2" SCALA_VER="2.11" SPARK_VER="2.0.2" HADOOP_VER="2.6" PROFILE="-Pweb-ci -Pspark-2.0 -Phadoop3 -Phadoop-2.6 -Pscala-2.11" SPARKR="true" BUILD_FLAG="install -DskipTests -DskipRat" TEST_FLAG="test -DskipRat" MODULES="-pl .,zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark,python" TEST_PROJECTS="-Dtest=ZeppelinSparkClusterTest,org.apache.zeppelin.spark.* -DfailIfNoTests=false"
env: PYTHON="2" SCALA_VER="2.11" SPARK_VER="2.0.2" HADOOP_VER="2.6" PROFILE="-Pweb-ci -Pspark-2.0 -Phadoop3 -Phadoop-2.6 -Pscala-2.11" SPARKR="true" BUILD_FLAG="install -DskipTests -DskipRat" TEST_FLAG="test -DskipRat" MODULES="-pl .,zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark/interpreter,spark/scala-2.10,spark/scala-2.11,spark/spark-dependencies,python" TEST_PROJECTS="-Dtest=ZeppelinSparkClusterTest,org.apache.zeppelin.spark.* -DfailIfNoTests=false"
# Test spark module for 1.6.3 with scala 2.10
# Test spark module for 1.6.3 with scala 2.11
- jdk: "openjdk7"
dist: trusty
env: PYTHON="3" SCALA_VER="2.10" SPARK_VER="1.6.3" HADOOP_VER="2.6" PROFILE="-Pweb-ci -Pspark-1.6 -Phadoop2 -Phadoop-2.6 -Pscala-2.10" SPARKR="true" BUILD_FLAG="install -DskipTests -DskipRat" TEST_FLAG="test -DskipRat" MODULES="-pl .,zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark,python" TEST_PROJECTS="-Dtest=ZeppelinSparkClusterTest,org.apache.zeppelin.spark.*,org.apache.zeppelin.spark.* -DfailIfNoTests=false"
env: PYTHON="3" SCALA_VER="2.10" SPARK_VER="1.6.3" HADOOP_VER="2.6" PROFILE="-Pweb-ci -Pspark-1.6 -Phadoop2 -Phadoop-2.6 -Pscala-2.10" SPARKR="true" BUILD_FLAG="install -DskipTests -DskipRat" TEST_FLAG="test -DskipRat" MODULES="-pl .,zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark/interpreter,spark/scala-2.10,spark/scala-2.11,spark/spark-dependencies,python" TEST_PROJECTS="-Dtest=ZeppelinSparkClusterTest,org.apache.zeppelin.spark.*,org.apache.zeppelin.spark.* -DfailIfNoTests=false"
# Test spark module for 1.6.3 with scala 2.11
- jdk: "oraclejdk8"
dist: trusty
env: PYTHON="2" SCALA_VER="2.11" SPARK_VER="1.6.3" HADOOP_VER="2.6" PROFILE="-Pweb-ci -Pspark-1.6 -Phadoop3 -Phadoop-2.6 -Pscala-2.11" SPARKR="true" BUILD_FLAG="install -DskipTests -DskipRat" TEST_FLAG="test -DskipRat" MODULES="-pl .,zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark,python" TEST_PROJECTS="-Dtest=ZeppelinSparkClusterTest,org.apache.zeppelin.spark.* -DfailIfNoTests=false"
env: PYTHON="2" SCALA_VER="2.11" SPARK_VER="1.6.3" HADOOP_VER="2.6" PROFILE="-Pweb-ci -Pspark-1.6 -Phadoop3 -Phadoop-2.6 -Pscala-2.11" SPARKR="true" BUILD_FLAG="install -DskipTests -DskipRat" TEST_FLAG="test -DskipRat" MODULES="-pl .,zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark/interpreter,spark/scala-2.10,spark/scala-2.11,spark/spark-dependencies,python" TEST_PROJECTS="-Dtest=ZeppelinSparkClusterTest,org.apache.zeppelin.spark.* -DfailIfNoTests=false"
# Test python/pyspark with python 2, livy 0.2
- sudo: required
dist: trusty
jdk: "openjdk7"
env: PYTHON="2" SCALA_VER="2.10" SPARK_VER="1.6.1" HADOOP_VER="2.6" LIVY_VER="0.4.0-incubating" PROFILE="-Pspark-1.6 -Phadoop2 -Phadoop-2.6 -Pscala-2.10" BUILD_FLAG="install -am -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" MODULES="-pl .,zeppelin-interpreter,zeppelin-display,spark-dependencies,spark,python,livy" TEST_PROJECTS="-Dtest=LivySQLInterpreterTest,org.apache.zeppelin.spark.PySpark*Test,org.apache.zeppelin.python.* -Dpyspark.test.exclude='' -DfailIfNoTests=false"
env: PYTHON="2" SCALA_VER="2.10" SPARK_VER="1.6.1" HADOOP_VER="2.6" LIVY_VER="0.4.0-incubating" PROFILE="-Pspark-1.6 -Phadoop2 -Phadoop-2.6 -Plivy-0.2 -Pscala-2.10" BUILD_FLAG="install -am -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" MODULES="-pl .,zeppelin-interpreter,zeppelin-display,spark/interpreter,spark/scala-2.10,spark/scala-2.11,spark/spark-dependencies,python,livy" TEST_PROJECTS="-Dtest=LivySQLInterpreterTest,org.apache.zeppelin.spark.PySpark*Test,org.apache.zeppelin.python.* -Dpyspark.test.exclude='' -DfailIfNoTests=false"
# Test python/pyspark with python 3, livy 0.3
- sudo: required
dist: trusty
jdk: "openjdk7"
env: PYTHON="3" SCALA_VER="2.11" SPARK_VER="2.0.0" HADOOP_VER="2.6" LIVY_VER="0.4.0-incubating" PROFILE="-Pspark-2.0 -Phadoop3 -Phadoop-2.6 -Pscala-2.11" BUILD_FLAG="install -am -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" MODULES="-pl .,zeppelin-interpreter,zeppelin-display,spark-dependencies,spark,python,livy" TEST_PROJECTS="-Dtest=LivySQLInterpreterTest,org.apache.zeppelin.spark.PySpark*Test,org.apache.zeppelin.python.* -Dpyspark.test.exclude='' -DfailIfNoTests=false"
env: PYTHON="3" SCALA_VER="2.11" SPARK_VER="2.0.0" HADOOP_VER="2.6" LIVY_VER="0.4.0-incubating" PROFILE="-Pspark-2.0 -Phadoop3 -Phadoop-2.6 -Pscala-2.11 -Plivy-0.3" BUILD_FLAG="install -am -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" MODULES="-pl .,zeppelin-interpreter,zeppelin-display,spark/interpreter,spark/scala-2.10,spark/scala-2.11,spark/spark-dependencies,python,livy" TEST_PROJECTS="-Dtest=LivySQLInterpreterTest,org.apache.zeppelin.spark.PySpark*Test,org.apache.zeppelin.python.* -Dpyspark.test.exclude='' -DfailIfNoTests=false"
before_install:
# check files included in commit range, clear bower_components if a bower.json file has changed.
# bower cache clearing can also be forced by putting "bower clear" or "clear bower" in a commit message
@ -133,7 +133,7 @@ before_install:
- ls -la .spark-dist ${HOME}/.m2/repository/.cache/maven-download-plugin || true
- ls .node_modules && cp -r .node_modules zeppelin-web/node_modules || echo "node_modules are not cached"
- "/sbin/start-stop-daemon --start --quiet --pidfile /tmp/custom_xvfb_99.pid --make-pidfile --background --exec /usr/bin/Xvfb -- :99 -ac -screen 0 1600x1024x16"
- ./dev/change_scala_version.sh $SCALA_VER
#- ./dev/change_scala_version.sh $SCALA_VER
- source ~/.environ
install:
@ -145,9 +145,11 @@ before_script:
- if [[ -n $LIVY_VER ]]; then ./testing/downloadLivy.sh $LIVY_VER; fi
- if [[ -n $LIVY_VER ]]; then export LIVY_HOME=`pwd`/livy-$LIVY_VER-bin; fi
- if [[ -n $LIVY_VER ]]; then export SPARK_HOME=`pwd`/spark-$SPARK_VER-bin-hadoop$HADOOP_VER; fi
- export SPARK_HOME=`pwd`/spark-$SPARK_VER-bin-hadoop$HADOOP_VER
- echo "export SPARK_HOME=`pwd`/spark-$SPARK_VER-bin-hadoop$HADOOP_VER" > conf/zeppelin-env.sh
- if [[ -n $SPARK_VER ]]; then export SPARK_HOME=`pwd`/spark-$SPARK_VER-bin-hadoop$HADOOP_VER; fi
- if [[ -n $SPARK_VER ]]; then echo "export SPARK_HOME=`pwd`/spark-$SPARK_VER-bin-hadoop$HADOOP_VER" > conf/zeppelin-env.sh; fi
- echo "export ZEPPELIN_HELIUM_REGISTRY=helium" >> conf/zeppelin-env.sh
- echo "export SPARK_PRINT_LAUNCH_COMMAND=true" >> conf/zeppelin-env.sh
- export SPARK_PRINT_LAUNCH_COMMAND=true
- tail conf/zeppelin-env.sh
# https://docs.travis-ci.com/user/gui-and-headless-browsers/#Using-xvfb-to-Run-Tests-That-Require-a-GUI
- if [[ -n $TEST_MODULES ]]; then export DISPLAY=:99.0; sh -e /etc/init.d/xvfb start; sleep 3; fi

View file

@ -121,7 +121,7 @@ if [[ "${INTERPRETER_ID}" == "spark" ]]; then
fi
if [[ -n "${SPARK_HOME}" ]]; then
export SPARK_SUBMIT="${SPARK_HOME}/bin/spark-submit"
SPARK_APP_JAR="$(ls ${ZEPPELIN_HOME}/interpreter/spark/zeppelin-spark*.jar)"
SPARK_APP_JAR="$(ls ${ZEPPELIN_HOME}/interpreter/spark/spark-interpreter*.jar)"
# This will evantually passes SPARK_APP_JAR to classpath of SparkIMain
ZEPPELIN_INTP_CLASSPATH+=":${SPARK_APP_JAR}"

View file

@ -199,6 +199,10 @@ Zeppelin support both yarn client and yarn cluster mode (yarn cluster mode is su
You can either specify them in `zeppelin-env.sh`, or in interpreter setting page. Specifying them in `zeppelin-env.sh` means you can use only one version of `spark` & `hadoop`. Specifying them
in interpreter setting page means you can use multiple versions of `spark` & `hadoop` in one zeppelin instance.
### 4. New Version of SparkInterpreter
There's one new version of SparkInterpreter starting with better spark support and code completion from Zeppelin 0.8.0, by default we still use the old version of SparkInterpreter.
If you want to use the new one, you can configure `zeppelin.spark.useNew` as `true` in its interpreter setting.
## SparkContext, SQLContext, SparkSession, ZeppelinContext
SparkContext, SQLContext and ZeppelinContext are automatically created and exposed as variable names `sc`, `sqlContext` and `z`, respectively, in Scala, Python and R environments.
Staring from 0.6.1 SparkSession is available as variable `spark` when you are using Spark 2.x.

19
pom.xml
View file

@ -56,9 +56,11 @@
<module>zeppelin-interpreter</module>
<module>zeppelin-zengine</module>
<module>zeppelin-display</module>
<module>spark-dependencies</module>
<module>groovy</module>
<module>spark</module>
<module>spark/scala-2.10</module>
<module>spark/scala-2.11</module>
<module>spark/interpreter</module>
<module>spark/spark-dependencies</module>
<module>markdown</module>
<module>angular</module>
<module>shell</module>
@ -86,6 +88,7 @@
<properties>
<!-- language versions -->
<java.version>1.7</java.version>
<scala.version>2.10.5</scala.version>
<scala.binary.version>2.10</scala.binary.version>
<scalatest.version>2.2.4</scalatest.version>
@ -329,8 +332,8 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>${plugin.compiler.version}</version>
<configuration>
<source>1.7</source>
<target>1.7</target>
<source>${java.version}</source>
<target>${java.version}</target>
</configuration>
</plugin>
@ -739,9 +742,6 @@
<profiles>
<profile>
<id>scala-2.10</id>
<activation>
<activeByDefault>true</activeByDefault>
</activation>
<properties>
<scala.version>2.10.5</scala.version>
<scala.binary.version>2.10</scala.binary.version>
@ -750,8 +750,11 @@
<profile>
<id>scala-2.11</id>
<activation>
<activeByDefault>true</activeByDefault>
</activation>
<properties>
<scala.version>2.11.7</scala.version>
<scala.version>2.11.8</scala.version>
<scala.binary.version>2.11</scala.binary.version>
</properties>
</profile>

View file

@ -43,6 +43,7 @@
<pypi.repo.url>https://pypi.python.org/packages</pypi.repo.url>
<python.py4j.repo.folder>/64/5c/01e13b68e8caafece40d549f232c9b5677ad1016071a48d04cc3895acaa3</python.py4j.repo.folder>
<grpc.version>1.4.0</grpc.version>
<plugin.shade.version>2.4.1</plugin.shade.version>
</properties>
<dependencies>
@ -90,13 +91,7 @@
<artifactId>grpc-stub</artifactId>
<version>${grpc.version}</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>18.0</version>
</dependency>
<!-- test libraries -->
<dependency>
<groupId>junit</groupId>
@ -202,6 +197,38 @@
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>${plugin.shade.version}</version>
<configuration>
<transformers>
<transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
<transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
<resource>reference.conf</resource>
</transformer>
</transformers>
<relocations>
<relocation>
<pattern>com.google.common</pattern>
<shadedPattern>org.apache.zeppelin.com.google.common</shadedPattern>
</relocation>
<relocation>
<pattern>py4j</pattern>
<shadedPattern>org.apache.zeppelin.py4j</shadedPattern>
</relocation>
</relocations>
</configuration>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-enforcer-plugin</artifactId>
</plugin>

View file

@ -299,7 +299,7 @@ public class IPythonInterpreter extends Interpreter implements ExecuteResultHand
}
@Override
public void close() {
public void close() throws InterpreterException {
if (watchDog != null) {
LOGGER.debug("Kill IPython Process");
ipythonClient.stop(StopRequest.newBuilder().build());
@ -327,7 +327,7 @@ public class IPythonInterpreter extends Interpreter implements ExecuteResultHand
}
@Override
public void cancel(InterpreterContext context) {
public void cancel(InterpreterContext context) throws InterpreterException {
ipythonClient.cancel(CancelRequest.newBuilder().build());
}
@ -337,7 +337,7 @@ public class IPythonInterpreter extends Interpreter implements ExecuteResultHand
}
@Override
public int getProgress(InterpreterContext context) {
public int getProgress(InterpreterContext context) throws InterpreterException {
return 0;
}

View file

@ -285,7 +285,7 @@ public class PythonInterpreter extends Interpreter implements ExecuteResultHandl
}
@Override
public void close() {
public void close() throws InterpreterException {
if (iPythonInterpreter != null) {
iPythonInterpreter.close();
return;
@ -463,7 +463,7 @@ public class PythonInterpreter extends Interpreter implements ExecuteResultHandl
return context;
}
public void interrupt() throws IOException {
public void interrupt() throws IOException, InterpreterException {
if (pythonPid > -1) {
logger.info("Sending SIGINT signal to PID : " + pythonPid);
Runtime.getRuntime().exec("kill -SIGINT " + pythonPid);
@ -474,7 +474,7 @@ public class PythonInterpreter extends Interpreter implements ExecuteResultHandl
}
@Override
public void cancel(InterpreterContext context) {
public void cancel(InterpreterContext context) throws InterpreterException {
if (iPythonInterpreter != null) {
iPythonInterpreter.cancel(context);
}
@ -491,7 +491,7 @@ public class PythonInterpreter extends Interpreter implements ExecuteResultHandl
}
@Override
public int getProgress(InterpreterContext context) {
public int getProgress(InterpreterContext context) throws InterpreterException {
if (iPythonInterpreter != null) {
return iPythonInterpreter.getProgress(context);
}

View file

@ -66,7 +66,7 @@ public class IPythonInterpreterTest {
}
@After
public void close() {
public void close() throws InterpreterException {
interpreter.close();
}
@ -81,6 +81,9 @@ public class IPythonInterpreterTest {
InterpreterResult result = interpreter.interpret("from __future__ import print_function", getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
result = interpreter.interpret("import sys\nprint(sys.version_info)", getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
// single output without print
InterpreterContext context = getInterpreterContext();
result = interpreter.interpret("'hello world'", context);
@ -195,6 +198,9 @@ public class IPythonInterpreterTest {
context = getInterpreterContext();
completions = interpreter.completion("sys.std", 7, context);
for (InterpreterCompletion completion : completions) {
System.out.println(completion.getValue());
}
assertEquals(3, completions.size());
assertEquals("stderr", completions.get(0).getValue());
assertEquals("stdin", completions.get(1).getValue());
@ -308,6 +314,7 @@ public class IPythonInterpreterTest {
context = getInterpreterContext();
result = interpreter.interpret("from bokeh.io import output_notebook, show\n" +
"from bokeh.plotting import figure\n" +
"import bkzep\n" +
"output_notebook(notebook_type='zeppelin')", context);
Thread.sleep(100);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
@ -329,10 +336,11 @@ public class IPythonInterpreterTest {
Thread.sleep(100);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
interpreterResultMessages = context.out.getInterpreterResultMessages();
assertEquals(1, interpreterResultMessages.size());
assertEquals(2, interpreterResultMessages.size());
assertEquals(InterpreterResult.Type.HTML, interpreterResultMessages.get(0).getType());
assertEquals(InterpreterResult.Type.HTML, interpreterResultMessages.get(1).getType());
// docs_json is the source data of plotting which bokeh would use to render the plotting.
assertTrue(interpreterResultMessages.get(0).getData().contains("docs_json"));
assertTrue(interpreterResultMessages.get(1).getData().contains("docs_json"));
// ggplot
context = getInterpreterContext();

View file

@ -80,7 +80,7 @@ public class PythonInterpreterMatplotlibTest implements InterpreterOutputListene
}
@After
public void afterTest() throws IOException {
public void afterTest() throws IOException, InterpreterException {
python.close();
}

View file

@ -93,7 +93,7 @@ public class PythonInterpreterTest implements InterpreterOutputListener {
}
@After
public void afterTest() throws IOException {
public void afterTest() throws IOException, InterpreterException {
pythonInterpreter.close();
}

View file

@ -68,13 +68,6 @@
<scope>provided</scope>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>zeppelin-spark-dependencies_${scala.binary.version}</artifactId>
<version>${project.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>zeppelin-interpreter</artifactId>

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 KiB

573
spark/interpreter/pom.xml Normal file
View file

@ -0,0 +1,573 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
~ Licensed to the Apache Software Foundation (ASF) under one or more
~ contributor license agreements. See the NOTICE file distributed with
~ this work for additional information regarding copyright ownership.
~ The ASF licenses this file to You under the Apache License, Version 2.0
~ (the "License"); you may not use this file except in compliance with
~ the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<artifactId>spark-parent</artifactId>
<groupId>org.apache.zeppelin</groupId>
<version>0.9.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
<groupId>org.apache.zeppelin</groupId>
<artifactId>spark-interpreter</artifactId>
<packaging>jar</packaging>
<version>0.9.0-SNAPSHOT</version>
<name>Zeppelin: Spark Interpreter</name>
<description>Zeppelin spark support</description>
<properties>
<interpreter.name>spark</interpreter.name>
<!--library versions-->
<jsoup.version>1.8.2</jsoup.version>
<commons.exec.version>1.3</commons.exec.version>
<commons.compress.version>1.9</commons.compress.version>
<maven.plugin.api.version>3.0</maven.plugin.api.version>
<aether.version>1.12</aether.version>
<maven.aeither.provider.version>3.0.3</maven.aeither.provider.version>
<wagon.version>1.0</wagon.version>
<datanucleus.rdbms.version>3.2.9</datanucleus.rdbms.version>
<datanucleus.apijdo.version>3.2.6</datanucleus.apijdo.version>
<datanucleus.core.version>3.2.10</datanucleus.core.version>
<scala.compile.version>${scala.version}</scala.compile.version>
<!-- settings -->
<pyspark.test.exclude>**/PySparkInterpreterMatplotlibTest.java</pyspark.test.exclude>
<pyspark.test.include>**/*Test.*</pyspark.test.include>
<spark.archive>spark-${spark.version}</spark.archive>
<spark.src.download.url>
http://d3kbcqa49mib13.cloudfront.net/${spark.archive}.tgz
</spark.src.download.url>
<spark.bin.download.url>
http://d3kbcqa49mib13.cloudfront.net/spark-${spark.version}-bin-without-hadoop.tgz
</spark.bin.download.url>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.zeppelin</groupId>
<artifactId>zeppelin-display</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.zeppelin</groupId>
<artifactId>spark-scala-2.11</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.zeppelin</groupId>
<artifactId>spark-scala-2.10</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.zeppelin</groupId>
<artifactId>zeppelin-interpreter</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.zeppelin</groupId>
<artifactId>zeppelin-python</artifactId>
<version>${project.version}</version>
<exclusions>
<exclusion>
<groupId>net.sf.py4j</groupId>
<artifactId>py4j</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>zeppelin-python</artifactId>
<version>${project.version}</version>
<classifier>tests</classifier>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>net.sf.py4j</groupId>
<artifactId>py4j</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-repl_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-hive_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- Aether :: maven dependency resolution -->
<dependency>
<groupId>org.apache.maven</groupId>
<artifactId>maven-plugin-api</artifactId>
<version>${maven.plugin.api.version}</version>
<exclusions>
<exclusion>
<groupId>org.codehaus.plexus</groupId>
<artifactId>plexus-utils</artifactId>
</exclusion>
<exclusion>
<groupId>org.sonatype.sisu</groupId>
<artifactId>sisu-inject-plexus</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.maven</groupId>
<artifactId>maven-model</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.sonatype.aether</groupId>
<artifactId>aether-api</artifactId>
<version>${aether.version}</version>
</dependency>
<dependency>
<groupId>org.sonatype.aether</groupId>
<artifactId>aether-util</artifactId>
<version>${aether.version}</version>
</dependency>
<dependency>
<groupId>org.sonatype.aether</groupId>
<artifactId>aether-impl</artifactId>
<version>${aether.version}</version>
</dependency>
<dependency>
<groupId>org.apache.maven</groupId>
<artifactId>maven-aether-provider</artifactId>
<version>${maven.aeither.provider.version}</version>
<exclusions>
<exclusion>
<groupId>org.sonatype.aether</groupId>
<artifactId>aether-api</artifactId>
</exclusion>
<exclusion>
<groupId>org.sonatype.aether</groupId>
<artifactId>aether-spi</artifactId>
</exclusion>
<exclusion>
<groupId>org.sonatype.aether</groupId>
<artifactId>aether-util</artifactId>
</exclusion>
<exclusion>
<groupId>org.sonatype.aether</groupId>
<artifactId>aether-impl</artifactId>
</exclusion>
<exclusion>
<groupId>org.codehaus.plexus</groupId>
<artifactId>plexus-utils</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.sonatype.aether</groupId>
<artifactId>aether-connector-file</artifactId>
<version>${aether.version}</version>
</dependency>
<dependency>
<groupId>org.sonatype.aether</groupId>
<artifactId>aether-connector-wagon</artifactId>
<version>${aether.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.maven.wagon</groupId>
<artifactId>wagon-provider-api</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.maven.wagon</groupId>
<artifactId>wagon-provider-api</artifactId>
<version>${wagon.version}</version>
<exclusions>
<exclusion>
<groupId>org.codehaus.plexus</groupId>
<artifactId>plexus-utils</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.maven.wagon</groupId>
<artifactId>wagon-http-lightweight</artifactId>
<version>${wagon.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.maven.wagon</groupId>
<artifactId>wagon-http-shared</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.maven.wagon</groupId>
<artifactId>wagon-http</artifactId>
<version>${wagon.version}</version>
<exclusions>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-exec</artifactId>
<version>${commons.exec.version}</version>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>${scala.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-compiler</artifactId>
<version>${scala.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-reflect</artifactId>
<version>${scala.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>commons-lang</groupId>
<artifactId>commons-lang</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-compress</artifactId>
<version>${commons.compress.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>${jsoup.version}</version>
</dependency>
<!--test libraries-->
<dependency>
<groupId>org.scalatest</groupId>
<artifactId>scalatest_${scala.binary.version}</artifactId>
<version>${scalatest.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.datanucleus</groupId>
<artifactId>datanucleus-core</artifactId>
<version>${datanucleus.core.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.datanucleus</groupId>
<artifactId>datanucleus-api-jdo</artifactId>
<version>${datanucleus.apijdo.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.datanucleus</groupId>
<artifactId>datanucleus-rdbms</artifactId>
<version>${datanucleus.rdbms.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.powermock</groupId>
<artifactId>powermock-api-mockito</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.powermock</groupId>
<artifactId>powermock-module-junit4</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<artifactId>maven-enforcer-plugin</artifactId>
<executions>
<execution>
<id>enforce</id>
<phase>none</phase>
</execution>
</executions>
<configuration>
<rules>
<requireJavaVersion>
<version>1.7</version>
</requireJavaVersion>
</rules>
</configuration>
</plugin>
<plugin>
<groupId>com.googlecode.maven-download-plugin</groupId>
<artifactId>download-maven-plugin</artifactId>
<executions>
<execution>
<id>download-pyspark-files</id>
<phase>validate</phase>
<goals>
<goal>wget</goal>
</goals>
<configuration>
<readTimeOut>60000</readTimeOut>
<retries>5</retries>
<unpack>true</unpack>
<url>${spark.src.download.url}</url>
<outputDirectory>${project.build.directory}</outputDirectory>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-antrun-plugin</artifactId>
<executions>
<execution>
<id>zip-pyspark-files</id>
<phase>generate-resources</phase>
<goals>
<goal>run</goal>
</goals>
<configuration>
<target>
<delete dir="../../interpreter/spark/pyspark" />
<copy file="${project.build.directory}/${spark.archive}/python/lib/py4j-${py4j.version}-src.zip" todir="${project.build.directory}/../../../interpreter/spark/pyspark" />
<zip basedir="${project.build.directory}/${spark.archive}/python" destfile="${project.build.directory}/../../../interpreter/spark/pyspark/pyspark.zip" includes="pyspark/*.py,pyspark/**/*.py" />
</target>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.scalatest</groupId>
<artifactId>scalatest-maven-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<forkCount>1</forkCount>
<reuseForks>false</reuseForks>
<argLine>-Xmx1024m -XX:MaxPermSize=256m</argLine>
<excludes>
<exclude>**/SparkRInterpreterTest.java</exclude>
<exclude>${pyspark.test.exclude}</exclude>
<exclude>${tests.to.exclude}</exclude>
</excludes>
<environmentVariables>
<PYTHONPATH>${project.build.directory}/../../../interpreter/spark/pyspark/pyspark.zip:${project.build.directory}/../../../interpreter/lib/python/:${project.build.directory}/../../../interpreter/spark/pyspark/py4j-${py4j.version}-src.zip:.</PYTHONPATH>
<ZEPPELIN_HOME>${basedir}/../../</ZEPPELIN_HOME>
</environmentVariables>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>${plugin.shade.version}</version>
<configuration>
<!--<createDependencyReducedPom>false</createDependencyReducedPom>-->
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>org/datanucleus/**</exclude>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
<transformers>
<transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
<transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
<resource>reference.conf</resource>
</transformer>
</transformers>
<relocations>
<relocation>
<pattern>io.netty</pattern>
<shadedPattern>org.apache.zeppelin.io.netty</shadedPattern>
</relocation>
<relocation>
<pattern>com.google</pattern>
<shadedPattern>org.apache.zeppelin.com.google</shadedPattern>
</relocation>
<relocation>
<pattern>py4j.</pattern>
<shadedPattern>org.apache.zeppelin.py4j.</shadedPattern>
</relocation>
</relocations>
</configuration>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-dependency-plugin</artifactId>
<executions>
<execution>
<id>copy-dependencies</id>
<phase>none</phase>
<configuration>
<skip>true</skip>
</configuration>
</execution>
<execution>
<id>copy-interpreter-dependencies</id>
<phase>none</phase>
<configuration>
<skip>true</skip>
</configuration>
</execution>
<execution>
<id>copy-artifact</id>
<phase>none</phase>
<configuration>
<skip>true</skip>
</configuration>
</execution>
<execution>
<id>copy-spark-interpreter</id>
<phase>package</phase>
<goals>
<goal>copy</goal>
</goals>
<configuration>
<outputDirectory>${project.build.directory}/../../../interpreter/spark</outputDirectory>
<overWriteReleases>false</overWriteReleases>
<overWriteSnapshots>false</overWriteSnapshots>
<overWriteIfNewer>true</overWriteIfNewer>
<artifactItems>
<artifactItem>
<groupId>${project.groupId}</groupId>
<artifactId>${project.artifactId}</artifactId>
<version>${project.version}</version>
<type>${project.packaging}</type>
</artifactItem>
</artifactItems>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-resources-plugin</artifactId>
<executions>
<execution>
<id>copy-interpreter-setting</id>
<phase>package</phase>
<goals>
<goal>resources</goal>
</goals>
<configuration>
<outputDirectory>${project.build.directory}/../../../interpreter/${interpreter.name}</outputDirectory>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View file

@ -0,0 +1,57 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.spark;
import org.apache.spark.SparkContext;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SQLContext;
import org.apache.zeppelin.interpreter.Interpreter;
import org.apache.zeppelin.interpreter.InterpreterContext;
import java.util.Properties;
/**
* Abstract class for SparkInterpreter. For the purpose of co-exist of NewSparkInterpreter
* and OldSparkInterpreter
*/
public abstract class AbstractSparkInterpreter extends Interpreter {
public AbstractSparkInterpreter(Properties properties) {
super(properties);
}
public abstract SparkContext getSparkContext();
public abstract SQLContext getSQLContext();
public abstract Object getSparkSession();
public abstract boolean isSparkContextInitialized();
public abstract SparkVersion getSparkVersion();
public abstract JavaSparkContext getJavaSparkContext();
public abstract void populateSparkWebUrl(InterpreterContext ctx);
public abstract SparkZeppelinContext getZeppelinContext();
public abstract String getSparkUIUrl();
public abstract boolean isUnsupportedSparkVersion();
}

View file

@ -176,7 +176,7 @@ public class DepInterpreter extends Interpreter {
}
depc = new SparkDependencyContext(getProperty("zeppelin.dep.localrepo"),
getProperty("zeppelin.dep.additionalRemoteRepository"));
getProperty("zeppelin.dep.additionalRemoteRepository"));
if (Utils.isScala2_10()) {
completer = Utils.instantiateClass(
"org.apache.spark.repl.SparkJLineCompletion",
@ -208,7 +208,7 @@ public class DepInterpreter extends Interpreter {
public Object getValue(String name) {
Object ret = Utils.invokeMethod(
intp, "valueOfTerm", new Class[]{String.class}, new Object[]{name});
intp, "valueOfTerm", new Class[]{String.class}, new Object[]{name});
if (ret instanceof None) {
return null;
} else if (ret instanceof Some) {
@ -233,11 +233,11 @@ public class DepInterpreter extends Interpreter {
SparkInterpreter sparkInterpreter = getSparkInterpreter();
if (sparkInterpreter != null && sparkInterpreter.isSparkContextInitialized()) {
if (sparkInterpreter != null && sparkInterpreter.getDelegation().isSparkContextInitialized()) {
return new InterpreterResult(Code.ERROR,
"Must be used before SparkInterpreter (%spark) initialized\n" +
"Hint: put this paragraph before any Spark code and " +
"restart Zeppelin/Interpreter" );
"Hint: put this paragraph before any Spark code and " +
"restart Zeppelin/Interpreter" );
}
scala.tools.nsc.interpreter.Results.Result ret = interpret(st);
@ -287,7 +287,7 @@ public class DepInterpreter extends Interpreter {
@Override
public List<InterpreterCompletion> completion(String buf, int cursor,
InterpreterContext interpreterContext) {
InterpreterContext interpreterContext) {
if (Utils.isScala2_10()) {
ScalaCompleter c = (ScalaCompleter) Utils.invokeMethod(completer, "completer");
Candidates ret = c.complete(buf, cursor);

View file

@ -92,13 +92,13 @@ public class IPySparkInterpreter extends IPythonInterpreter {
}
@Override
public void cancel(InterpreterContext context) {
public void cancel(InterpreterContext context) throws InterpreterException {
super.cancel(context);
sparkInterpreter.cancel(context);
}
@Override
public void close() {
public void close() throws InterpreterException {
super.close();
if (sparkInterpreter != null) {
sparkInterpreter.close();
@ -106,7 +106,7 @@ public class IPySparkInterpreter extends IPythonInterpreter {
}
@Override
public int getProgress(InterpreterContext context) {
public int getProgress(InterpreterContext context) throws InterpreterException {
return sparkInterpreter.getProgress(context);
}

View file

@ -0,0 +1,390 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.spark;
import com.google.common.collect.Lists;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.SparkContext;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.scheduler.SparkListenerJobStart;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.ui.jobs.JobProgressListener;
import org.apache.zeppelin.interpreter.BaseZeppelinContext;
import org.apache.zeppelin.interpreter.DefaultInterpreterProperty;
import org.apache.zeppelin.interpreter.Interpreter;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterException;
import org.apache.zeppelin.interpreter.InterpreterHookRegistry;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.WrappedInterpreter;
import org.apache.zeppelin.interpreter.remote.RemoteEventClientWrapper;
import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion;
import org.apache.zeppelin.spark.dep.SparkDependencyContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
/**
* SparkInterpreter of Java implementation. It is just wrapper of Spark211Interpreter
* and Spark210Interpreter.
*/
public class NewSparkInterpreter extends AbstractSparkInterpreter {
private static final Logger LOGGER = LoggerFactory.getLogger(SparkInterpreter.class);
private BaseSparkScalaInterpreter innerInterpreter;
private Map<String, String> innerInterpreterClassMap = new HashMap<>();
private SparkContext sc;
private JavaSparkContext jsc;
private SQLContext sqlContext;
private Object sparkSession;
private SparkZeppelinContext z;
private SparkVersion sparkVersion;
private boolean enableSupportedVersionCheck;
private String sparkUrl;
private static InterpreterHookRegistry hooks;
public NewSparkInterpreter(Properties properties) {
super(properties);
this.enableSupportedVersionCheck = java.lang.Boolean.parseBoolean(
properties.getProperty("zeppelin.spark.enableSupportedVersionCheck", "true"));
innerInterpreterClassMap.put("2.10", "org.apache.zeppelin.spark.SparkScala210Interpreter");
innerInterpreterClassMap.put("2.11", "org.apache.zeppelin.spark.SparkScala211Interpreter");
}
@Override
public void open() throws InterpreterException {
try {
String scalaVersion = extractScalaVersion();
LOGGER.info("Using Scala Version: " + scalaVersion);
setupConfForPySpark();
SparkConf conf = new SparkConf();
for (Map.Entry<Object, Object> entry : getProperties().entrySet()) {
if (!StringUtils.isBlank(entry.getValue().toString())) {
conf.set(entry.getKey().toString(), entry.getValue().toString());
}
if (entry.getKey().toString().equals("zeppelin.spark.useHiveContext")) {
conf.set("spark.useHiveContext", entry.getValue().toString());
}
}
// use local mode for embedded spark mode when spark.master is not found
conf.setIfMissing("spark.master", "local");
String innerIntpClassName = innerInterpreterClassMap.get(scalaVersion);
Class clazz = Class.forName(innerIntpClassName);
this.innerInterpreter =
(BaseSparkScalaInterpreter) clazz.getConstructor(SparkConf.class, List.class)
.newInstance(conf, getDependencyFiles());
this.innerInterpreter.open();
sc = this.innerInterpreter.sc();
jsc = JavaSparkContext.fromSparkContext(sc);
sparkVersion = SparkVersion.fromVersionString(sc.version());
if (enableSupportedVersionCheck && sparkVersion.isUnsupportedVersion()) {
throw new Exception("This is not officially supported spark version: " + sparkVersion
+ "\nYou can set zeppelin.spark.enableSupportedVersionCheck to false if you really" +
" want to try this version of spark.");
}
sqlContext = this.innerInterpreter.sqlContext();
sparkSession = this.innerInterpreter.sparkSession();
sparkUrl = this.innerInterpreter.sparkUrl();
setupListeners();
hooks = getInterpreterGroup().getInterpreterHookRegistry();
z = new SparkZeppelinContext(sc, hooks,
Integer.parseInt(getProperty("zeppelin.spark.maxResult")));
this.innerInterpreter.bind("z", z.getClass().getCanonicalName(), z,
Lists.newArrayList("@transient"));
} catch (Exception e) {
LOGGER.error(ExceptionUtils.getStackTrace(e));
throw new InterpreterException("Fail to open SparkInterpreter", e);
}
}
private void setupConfForPySpark() {
String sparkHome = getProperty("SPARK_HOME");
File pysparkFolder = null;
if (sparkHome == null) {
String zeppelinHome =
new DefaultInterpreterProperty("ZEPPELIN_HOME", "zeppelin.home", "../../")
.getValue().toString();
pysparkFolder = new File(zeppelinHome,
"interpreter" + File.separator + "spark" + File.separator + "pyspark");
} else {
pysparkFolder = new File(sparkHome, "python" + File.separator + "lib");
}
ArrayList<String> pysparkPackages = new ArrayList<>();
for (File file : pysparkFolder.listFiles()) {
if (file.getName().equals("pyspark.zip")) {
pysparkPackages.add(file.getAbsolutePath());
}
if (file.getName().startsWith("py4j-")) {
pysparkPackages.add(file.getAbsolutePath());
}
}
if (pysparkPackages.size() != 2) {
throw new RuntimeException("Not correct number of pyspark packages: " +
StringUtils.join(pysparkPackages, ","));
}
// Distribute two libraries(pyspark.zip and py4j-*.zip) to workers
System.setProperty("spark.files", mergeProperty(System.getProperty("spark.files", ""),
StringUtils.join(pysparkPackages, ",")));
System.setProperty("spark.submit.pyFiles", mergeProperty(
System.getProperty("spark.submit.pyFiles", ""), StringUtils.join(pysparkPackages, ",")));
}
private String mergeProperty(String originalValue, String appendedValue) {
if (StringUtils.isBlank(originalValue)) {
return appendedValue;
}
return originalValue + "," + appendedValue;
}
@Override
public void close() {
LOGGER.info("Close SparkInterpreter");
innerInterpreter.close();
}
@Override
public InterpreterResult interpret(String st, InterpreterContext context) {
InterpreterContext.set(context);
z.setGui(context.getGui());
z.setNoteGui(context.getNoteGui());
z.setInterpreterContext(context);
populateSparkWebUrl(context);
String jobDesc = "Started by: " + Utils.getUserName(context.getAuthenticationInfo());
sc.setJobGroup(Utils.buildJobGroupId(context), jobDesc, false);
return innerInterpreter.interpret(st, context);
}
@Override
public void cancel(InterpreterContext context) {
sc.cancelJobGroup(Utils.buildJobGroupId(context));
}
@Override
public List<InterpreterCompletion> completion(String buf,
int cursor,
InterpreterContext interpreterContext) {
LOGGER.debug("buf: " + buf + ", cursor:" + cursor);
return innerInterpreter.completion(buf, cursor, interpreterContext);
}
@Override
public FormType getFormType() {
return FormType.NATIVE;
}
@Override
public int getProgress(InterpreterContext context) {
return innerInterpreter.getProgress(Utils.buildJobGroupId(context), context);
}
private void setupListeners() {
JobProgressListener pl = new JobProgressListener(sc.getConf()) {
@Override
public synchronized void onJobStart(SparkListenerJobStart jobStart) {
super.onJobStart(jobStart);
int jobId = jobStart.jobId();
String jobGroupId = jobStart.properties().getProperty("spark.jobGroup.id");
String uiEnabled = jobStart.properties().getProperty("spark.ui.enabled");
String jobUrl = getJobUrl(jobId);
String noteId = Utils.getNoteId(jobGroupId);
String paragraphId = Utils.getParagraphId(jobGroupId);
// Button visible if Spark UI property not set, set as invalid boolean or true
java.lang.Boolean showSparkUI =
uiEnabled == null || !uiEnabled.trim().toLowerCase().equals("false");
if (showSparkUI && jobUrl != null) {
RemoteEventClientWrapper eventClient = BaseZeppelinContext.getEventClient();
Map<String, String> infos = new java.util.HashMap<>();
infos.put("jobUrl", jobUrl);
infos.put("label", "SPARK JOB");
infos.put("tooltip", "View in Spark web UI");
if (eventClient != null) {
eventClient.onParaInfosReceived(noteId, paragraphId, infos);
}
}
}
private String getJobUrl(int jobId) {
String jobUrl = null;
if (sparkUrl != null) {
jobUrl = sparkUrl + "/jobs/job?id=" + jobId;
}
return jobUrl;
}
};
try {
Object listenerBus = sc.getClass().getMethod("listenerBus").invoke(sc);
Method[] methods = listenerBus.getClass().getMethods();
Method addListenerMethod = null;
for (Method m : methods) {
if (!m.getName().equals("addListener")) {
continue;
}
Class<?>[] parameterTypes = m.getParameterTypes();
if (parameterTypes.length != 1) {
continue;
}
if (!parameterTypes[0].isAssignableFrom(JobProgressListener.class)) {
continue;
}
addListenerMethod = m;
break;
}
if (addListenerMethod != null) {
addListenerMethod.invoke(listenerBus, pl);
}
} catch (NoSuchMethodException | SecurityException | IllegalAccessException
| IllegalArgumentException | InvocationTargetException e) {
LOGGER.error(e.toString(), e);
}
}
public SparkZeppelinContext getZeppelinContext() {
return this.z;
}
public SparkContext getSparkContext() {
return this.sc;
}
@Override
public SQLContext getSQLContext() {
return sqlContext;
}
public JavaSparkContext getJavaSparkContext() {
return this.jsc;
}
public Object getSparkSession() {
return sparkSession;
}
public SparkVersion getSparkVersion() {
return sparkVersion;
}
private DepInterpreter getDepInterpreter() {
Interpreter p = getInterpreterInTheSameSessionByClassName(DepInterpreter.class.getName());
if (p == null) {
return null;
}
while (p instanceof WrappedInterpreter) {
p = ((WrappedInterpreter) p).getInnerInterpreter();
}
return (DepInterpreter) p;
}
private String extractScalaVersion() throws IOException, InterruptedException {
String scalaVersionString = scala.util.Properties.versionString();
if (scalaVersionString.contains("version 2.10")) {
return "2.10";
} else {
return "2.11";
}
}
public void populateSparkWebUrl(InterpreterContext ctx) {
Map<String, String> infos = new java.util.HashMap<>();
infos.put("url", sparkUrl);
String uiEnabledProp = properties.getProperty("spark.ui.enabled", "true");
java.lang.Boolean uiEnabled = java.lang.Boolean.parseBoolean(
uiEnabledProp.trim());
if (!uiEnabled) {
infos.put("message", "Spark UI disabled");
} else {
if (StringUtils.isNotBlank(sparkUrl)) {
infos.put("message", "Spark UI enabled");
} else {
infos.put("message", "No spark url defined");
}
}
if (ctx != null && ctx.getClient() != null) {
LOGGER.debug("Sending metadata to Zeppelin server: {}", infos.toString());
getZeppelinContext().setEventClient(ctx.getClient());
ctx.getClient().onMetaInfosReceived(infos);
}
}
public boolean isSparkContextInitialized() {
return this.sc != null;
}
private List<String> getDependencyFiles() {
List<String> depFiles = new ArrayList<>();
// add jar from DepInterpreter
DepInterpreter depInterpreter = getDepInterpreter();
if (depInterpreter != null) {
SparkDependencyContext depc = depInterpreter.getDependencyContext();
if (depc != null) {
List<File> files = depc.getFilesDist();
if (files != null) {
for (File f : files) {
depFiles.add(f.getAbsolutePath());
}
}
}
}
// add jar from local repo
String localRepo = getProperty("zeppelin.interpreter.localRepo");
if (localRepo != null) {
File localRepoDir = new File(localRepo);
if (localRepoDir.exists()) {
File[] files = localRepoDir.listFiles();
if (files != null) {
for (File f : files) {
depFiles.add(f.getAbsolutePath());
}
}
}
}
return depFiles;
}
@Override
public String getSparkUIUrl() {
return sparkUrl;
}
@Override
public boolean isUnsupportedSparkVersion() {
return enableSupportedVersionCheck && sparkVersion.isUnsupportedVersion();
}
}

View file

@ -97,8 +97,8 @@ import scala.tools.nsc.settings.MutableSettings.PathSetting;
* Spark interpreter for Zeppelin.
*
*/
public class SparkInterpreter extends Interpreter {
public static Logger logger = LoggerFactory.getLogger(SparkInterpreter.class);
public class OldSparkInterpreter extends AbstractSparkInterpreter {
public static Logger logger = LoggerFactory.getLogger(OldSparkInterpreter.class);
private SparkZeppelinContext z;
private SparkILoop interpreter;
@ -134,12 +134,12 @@ public class SparkInterpreter extends Interpreter {
private JavaSparkContext jsc;
private boolean enableSupportedVersionCheck;
public SparkInterpreter(Properties property) {
public OldSparkInterpreter(Properties property) {
super(property);
out = new InterpreterOutputStream(logger);
}
public SparkInterpreter(Properties property, SparkContext sc) {
public OldSparkInterpreter(Properties property, SparkContext sc) {
this(property);
this.sc = sc;
@ -186,7 +186,7 @@ public class SparkInterpreter extends Interpreter {
String paragraphId = Utils.getParagraphId(jobGroupId);
// Button visible if Spark UI property not set, set as invalid boolean or true
java.lang.Boolean showSparkUI =
uiEnabled == null || !uiEnabled.trim().toLowerCase().equals("false");
uiEnabled == null || !uiEnabled.trim().toLowerCase().equals("false");
if (showSparkUI && jobUrl != null) {
RemoteEventClientWrapper eventClient = BaseZeppelinContext.getEventClient();
Map<String, String> infos = new java.util.HashMap<>();
@ -443,7 +443,7 @@ public class SparkInterpreter extends Interpreter {
jars = (String[]) Utils.invokeStaticMethod(SparkILoop.class, "getAddedJars");
} else {
jars = (String[]) Utils.invokeStaticMethod(
Utils.findClass("org.apache.spark.repl.Main"), "getAddedJars");
Utils.findClass("org.apache.spark.repl.Main"), "getAddedJars");
}
String classServerUri = null;
@ -467,7 +467,7 @@ public class SparkInterpreter extends Interpreter {
// continue instead of: throw new InterpreterException(e);
// Newer Spark versions (like the patched CDH5.7.0 one) don't contain this method
logger.warn(String.format("Spark method classServerUri not available due to: [%s]",
e.getMessage()));
e.getMessage()));
}
}
@ -477,7 +477,7 @@ public class SparkInterpreter extends Interpreter {
File classOutputDirectory = (File) getClassOutputDirectory.invoke(intp);
replClassOutputDirectory = classOutputDirectory.getAbsolutePath();
} catch (NoSuchMethodException | SecurityException | IllegalAccessException
| IllegalArgumentException | InvocationTargetException e) {
| IllegalArgumentException | InvocationTargetException e) {
// continue
}
}
@ -548,7 +548,7 @@ public class SparkInterpreter extends Interpreter {
System.setProperty("SPARK_YARN_MODE", "true");
}
if (getProperties().containsKey("spark.yarn.keytab") &&
getProperties().containsKey("spark.yarn.principal")) {
getProperties().containsKey("spark.yarn.principal")) {
try {
String keytab = getProperties().getProperty("spark.yarn.keytab");
String principal = getProperties().getProperty("spark.yarn.principal");
@ -725,7 +725,7 @@ public class SparkInterpreter extends Interpreter {
*
* As hashCode() can return a negative integer value and the minus character '-' is invalid
* in a package name we change it to a numeric value '0' which still conforms to the regexp.
*
*
*/
System.setProperty("scala.repl.name.line", ("$line" + this.hashCode()).replace('-', '0'));
@ -805,11 +805,11 @@ public class SparkInterpreter extends Interpreter {
sqlc = getSQLContext();
dep = getDependencyResolver();
hooks = getInterpreterGroup().getInterpreterHookRegistry();
z = new SparkZeppelinContext(sc, sqlc, hooks,
Integer.parseInt(getProperty("zeppelin.spark.maxResult")));
z = new SparkZeppelinContext(sc, hooks,
Integer.parseInt(getProperty("zeppelin.spark.maxResult")));
interpret("@transient val _binder = new java.util.HashMap[String, Object]()");
Map<String, Object> binder;
@ -827,13 +827,13 @@ public class SparkInterpreter extends Interpreter {
}
interpret("@transient val z = "
+ "_binder.get(\"z\").asInstanceOf[org.apache.zeppelin.spark.SparkZeppelinContext]");
+ "_binder.get(\"z\").asInstanceOf[org.apache.zeppelin.spark.SparkZeppelinContext]");
interpret("@transient val sc = "
+ "_binder.get(\"sc\").asInstanceOf[org.apache.spark.SparkContext]");
+ "_binder.get(\"sc\").asInstanceOf[org.apache.spark.SparkContext]");
interpret("@transient val sqlc = "
+ "_binder.get(\"sqlc\").asInstanceOf[org.apache.spark.sql.SQLContext]");
+ "_binder.get(\"sqlc\").asInstanceOf[org.apache.spark.sql.SQLContext]");
interpret("@transient val sqlContext = "
+ "_binder.get(\"sqlc\").asInstanceOf[org.apache.spark.sql.SQLContext]");
+ "_binder.get(\"sqlc\").asInstanceOf[org.apache.spark.sql.SQLContext]");
if (Utils.isSpark2()) {
interpret("@transient val spark = "
@ -966,7 +966,7 @@ public class SparkInterpreter extends Interpreter {
infos.put("url", sparkUrl);
String uiEnabledProp = getProperty("spark.ui.enabled", "true");
java.lang.Boolean uiEnabled = java.lang.Boolean.parseBoolean(
uiEnabledProp.trim());
uiEnabledProp.trim());
if (!uiEnabled) {
infos.put("message", "Spark UI disabled");
} else {
@ -1014,7 +1014,7 @@ public class SparkInterpreter extends Interpreter {
@Override
public List<InterpreterCompletion> completion(String buf, int cursor,
InterpreterContext interpreterContext) {
InterpreterContext interpreterContext) {
if (completer == null) {
logger.warn("Can't find completer");
return new LinkedList<>();
@ -1025,29 +1025,29 @@ public class SparkInterpreter extends Interpreter {
}
ScalaCompleter c = (ScalaCompleter) Utils.invokeMethod(completer, "completer");
if (Utils.isScala2_10() || !Utils.isCompilerAboveScala2_11_7()) {
String singleToken = getCompletionTargetString(buf, cursor);
Candidates ret = c.complete(singleToken, singleToken.length());
List<String> candidates = WrapAsJava$.MODULE$.seqAsJavaList(ret.candidates());
List<InterpreterCompletion> completions = new LinkedList<>();
for (String candidate : candidates) {
completions.add(new InterpreterCompletion(candidate, candidate, StringUtils.EMPTY));
}
return completions;
} else {
Candidates ret = c.complete(buf, cursor);
List<String> candidates = WrapAsJava$.MODULE$.seqAsJavaList(ret.candidates());
List<InterpreterCompletion> completions = new LinkedList<>();
for (String candidate : candidates) {
completions.add(new InterpreterCompletion(candidate, candidate, StringUtils.EMPTY));
}
return completions;
}
}
@ -1088,7 +1088,7 @@ public class SparkInterpreter extends Interpreter {
completionStartPosition = completionEndPosition - completionStartPosition;
}
resultCompletionText = completionScriptText.substring(
completionStartPosition , completionEndPosition);
completionStartPosition , completionEndPosition);
return resultCompletionText;
}
@ -1099,7 +1099,7 @@ public class SparkInterpreter extends Interpreter {
*/
public Object getValue(String name) {
Object ret = Utils.invokeMethod(
intp, "valueOfTerm", new Class[]{String.class}, new Object[]{name});
intp, "valueOfTerm", new Class[]{String.class}, new Object[]{name});
if (ret instanceof None || ret instanceof scala.None$) {
return null;
@ -1120,7 +1120,7 @@ public class SparkInterpreter extends Interpreter {
return obj;
}
boolean isUnsupportedSparkVersion() {
public boolean isUnsupportedSparkVersion() {
return enableSupportedVersionCheck && sparkVersion.isUnsupportedVersion();
}
@ -1175,9 +1175,9 @@ public class SparkInterpreter extends Interpreter {
String nextLine = linesToRun[l + 1].trim();
boolean continuation = false;
if (nextLine.isEmpty()
|| nextLine.startsWith("//") // skip empty line or comment
|| nextLine.startsWith("}")
|| nextLine.startsWith("object")) { // include "} object" for Scala companion object
|| nextLine.startsWith("//") // skip empty line or comment
|| nextLine.startsWith("}")
|| nextLine.startsWith("object")) { // include "} object" for Scala companion object
continuation = true;
} else if (!inComment && nextLine.startsWith("/*")) {
inComment = true;
@ -1186,9 +1186,9 @@ public class SparkInterpreter extends Interpreter {
inComment = false;
continuation = true;
} else if (nextLine.length() > 1
&& nextLine.charAt(0) == '.'
&& nextLine.charAt(1) != '.' // ".."
&& nextLine.charAt(1) != '/') { // "./"
&& nextLine.charAt(0) == '.'
&& nextLine.charAt(1) != '.' // ".."
&& nextLine.charAt(1) != '/') { // "./"
continuation = true;
} else if (inComment) {
continuation = true;
@ -1428,7 +1428,7 @@ public class SparkInterpreter extends Interpreter {
@Override
public Scheduler getScheduler() {
return SchedulerFactory.singleton().createOrGetFIFOScheduler(
SparkInterpreter.class.getName() + this.hashCode());
OldSparkInterpreter.class.getName() + this.hashCode());
}
public SparkZeppelinContext getZeppelinContext() {
@ -1444,18 +1444,18 @@ public class SparkInterpreter extends Interpreter {
// try Utils.createTempDir()
file = (File) Utils.invokeStaticMethod(
Utils.findClass("org.apache.spark.util.Utils"),
"createTempDir",
new Class[]{String.class, String.class},
new Object[]{dir, "spark"});
Utils.findClass("org.apache.spark.util.Utils"),
"createTempDir",
new Class[]{String.class, String.class},
new Object[]{dir, "spark"});
// fallback to old method
if (file == null) {
file = (File) Utils.invokeStaticMethod(
Utils.findClass("org.apache.spark.util.Utils"),
"createTempDir",
new Class[]{String.class},
new Object[]{dir});
Utils.findClass("org.apache.spark.util.Utils"),
"createTempDir",
new Class[]{String.class},
new Object[]{dir});
}
return file;

View file

@ -17,6 +17,36 @@
package org.apache.zeppelin.spark;
import com.google.gson.Gson;
import org.apache.commons.compress.utils.IOUtils;
import org.apache.commons.exec.CommandLine;
import org.apache.commons.exec.DefaultExecutor;
import org.apache.commons.exec.ExecuteException;
import org.apache.commons.exec.ExecuteResultHandler;
import org.apache.commons.exec.ExecuteWatchdog;
import org.apache.commons.exec.PumpStreamHandler;
import org.apache.commons.exec.environment.EnvironmentUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SQLContext;
import org.apache.zeppelin.interpreter.Interpreter;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterException;
import org.apache.zeppelin.interpreter.InterpreterGroup;
import org.apache.zeppelin.interpreter.InterpreterHookRegistry.HookType;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.InterpreterResult.Code;
import org.apache.zeppelin.interpreter.InterpreterResultMessage;
import org.apache.zeppelin.interpreter.LazyOpenInterpreter;
import org.apache.zeppelin.interpreter.WrappedInterpreter;
import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion;
import org.apache.zeppelin.interpreter.util.InterpreterOutputStream;
import org.apache.zeppelin.spark.dep.SparkDependencyContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import py4j.GatewayServer;
import java.io.BufferedWriter;
import java.io.ByteArrayOutputStream;
import java.io.File;
@ -34,31 +64,6 @@ import java.util.List;
import java.util.Map;
import java.util.Properties;
import org.apache.commons.compress.utils.IOUtils;
import org.apache.commons.exec.CommandLine;
import org.apache.commons.exec.DefaultExecutor;
import org.apache.commons.exec.ExecuteException;
import org.apache.commons.exec.ExecuteResultHandler;
import org.apache.commons.exec.ExecuteWatchdog;
import org.apache.commons.exec.PumpStreamHandler;
import org.apache.commons.exec.environment.EnvironmentUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SQLContext;
import org.apache.zeppelin.interpreter.*;
import org.apache.zeppelin.interpreter.InterpreterHookRegistry.HookType;
import org.apache.zeppelin.interpreter.InterpreterResult.Code;
import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion;
import org.apache.zeppelin.interpreter.util.InterpreterOutputStream;
import org.apache.zeppelin.spark.dep.SparkDependencyContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.gson.Gson;
import py4j.GatewayServer;
/**
*
*/
@ -312,7 +317,7 @@ public class PySparkInterpreter extends Interpreter implements ExecuteResultHand
}
@Override
public void close() {
public void close() throws InterpreterException {
if (iPySparkInterpreter != null) {
iPySparkInterpreter.close();
return;
@ -496,7 +501,7 @@ public class PySparkInterpreter extends Interpreter implements ExecuteResultHand
}
}
public void interrupt() throws IOException {
public void interrupt() throws IOException, InterpreterException {
if (pythonPid > -1) {
LOGGER.info("Sending SIGINT signal to PID : " + pythonPid);
Runtime.getRuntime().exec("kill -SIGINT " + pythonPid);
@ -538,7 +543,8 @@ public class PySparkInterpreter extends Interpreter implements ExecuteResultHand
@Override
public List<InterpreterCompletion> completion(String buf, int cursor,
InterpreterContext interpreterContext) throws InterpreterException {
InterpreterContext interpreterContext)
throws InterpreterException {
if (iPySparkInterpreter != null) {
return iPySparkInterpreter.completion(buf, cursor, interpreterContext);
}

View file

@ -0,0 +1,163 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.spark;
import org.apache.spark.SparkContext;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SQLContext;
import org.apache.zeppelin.interpreter.Interpreter;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterException;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.List;
import java.util.Properties;
/**
* It is the Wrapper of OldSparkInterpreter & NewSparkInterpreter.
* Property zeppelin.spark.useNew control which one to use.
*/
public class SparkInterpreter extends AbstractSparkInterpreter {
private static final Logger LOGGER = LoggerFactory.getLogger(SparkInterpreter.class);
// either OldSparkInterpreter or NewSparkInterpreter
private AbstractSparkInterpreter delegation;
public SparkInterpreter(Properties properties) {
super(properties);
if (Boolean.parseBoolean(properties.getProperty("zeppelin.spark.useNew", "false"))) {
delegation = new NewSparkInterpreter(properties);
} else {
delegation = new OldSparkInterpreter(properties);
}
}
@Override
public void open() throws InterpreterException {
delegation.setInterpreterGroup(getInterpreterGroup());
delegation.setUserName(getUserName());
delegation.setClassloaderUrls(getClassloaderUrls());
delegation.open();
}
@Override
public void close() throws InterpreterException {
delegation.close();
}
@Override
public InterpreterResult interpret(String st, InterpreterContext context)
throws InterpreterException {
return delegation.interpret(st, context);
}
@Override
public void cancel(InterpreterContext context) throws InterpreterException {
delegation.cancel(context);
}
@Override
public List<InterpreterCompletion> completion(String buf,
int cursor,
InterpreterContext interpreterContext)
throws InterpreterException {
return delegation.completion(buf, cursor, interpreterContext);
}
@Override
public FormType getFormType() {
return FormType.NATIVE;
}
@Override
public int getProgress(InterpreterContext context) throws InterpreterException {
return delegation.getProgress(context);
}
public AbstractSparkInterpreter getDelegation() {
return delegation;
}
@Override
public SparkContext getSparkContext() {
return delegation.getSparkContext();
}
@Override
public SQLContext getSQLContext() {
return delegation.getSQLContext();
}
@Override
public Object getSparkSession() {
return delegation.getSparkSession();
}
@Override
public boolean isSparkContextInitialized() {
return delegation.isSparkContextInitialized();
}
@Override
public SparkVersion getSparkVersion() {
return delegation.getSparkVersion();
}
@Override
public JavaSparkContext getJavaSparkContext() {
return delegation.getJavaSparkContext();
}
@Override
public void populateSparkWebUrl(InterpreterContext ctx) {
delegation.populateSparkWebUrl(ctx);
}
@Override
public SparkZeppelinContext getZeppelinContext() {
return delegation.getZeppelinContext();
}
@Override
public String getSparkUIUrl() {
return delegation.getSparkUIUrl();
}
public boolean isUnsupportedSparkVersion() {
return delegation.isUnsupportedSparkVersion();
}
public boolean isYarnMode() {
String master = getProperty("master");
if (master == null) {
master = getProperty("spark.master", "local[*]");
}
return master.startsWith("yarn");
}
public static boolean useSparkSubmit() {
return null != System.getenv("SPARK_SUBMIT");
}
}

View file

@ -55,7 +55,7 @@ public class SparkRInterpreter extends Interpreter {
@Override
public void open() throws InterpreterException {
String rCmdPath = getProperty("zeppelin.R.cmd");
String rCmdPath = getProperty("zeppelin.R.cmd", "R");
String sparkRLibPath;
if (System.getenv("SPARK_HOME") != null) {
@ -201,7 +201,7 @@ public class SparkRInterpreter extends Interpreter {
}
@Override
public int getProgress(InterpreterContext context) {
public int getProgress(InterpreterContext context) throws InterpreterException {
if (sparkInterpreter != null) {
return sparkInterpreter.getProgress(context);
} else {
@ -217,7 +217,7 @@ public class SparkRInterpreter extends Interpreter {
@Override
public List<InterpreterCompletion> completion(String buf, int cursor,
InterpreterContext interpreterContext) {
InterpreterContext interpreterContext) {
return new ArrayList<>();
}

View file

@ -33,6 +33,7 @@ import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.*;
import static scala.collection.JavaConversions.asJavaCollection;
import static scala.collection.JavaConversions.asJavaIterable;
import static scala.collection.JavaConversions.collectionAsScalaIterable;
@ -41,21 +42,18 @@ import static scala.collection.JavaConversions.collectionAsScalaIterable;
*/
public class SparkZeppelinContext extends BaseZeppelinContext {
private SparkContext sc;
public SQLContext sqlContext;
private List<Class> supportedClasses;
private Map<String, String> interpreterClassMap;
public SparkZeppelinContext(
SparkContext sc, SQLContext sql,
SparkContext sc,
InterpreterHookRegistry hooks,
int maxResult) {
super(hooks, maxResult);
this.sc = sc;
this.sqlContext = sql;
interpreterClassMap = new HashMap<String, String>();
interpreterClassMap = new HashMap();
interpreterClassMap.put("spark", "org.apache.zeppelin.spark.SparkInterpreter");
interpreterClassMap.put("sql", "org.apache.zeppelin.spark.SparkSqlInterpreter");
interpreterClassMap.put("dep", "org.apache.zeppelin.spark.DepInterpreter");

View file

@ -74,6 +74,13 @@
"defaultValue": "",
"description": "Override Spark UI default URL",
"type": "string"
},
"zeppelin.spark.useNew": {
"envName": null,
"propertyName": "zeppelin.spark.useNew",
"defaultValue": "false",
"description": "Whether use new spark interpreter implementation",
"type": "checkbox"
}
},
"editor": {

View file

@ -19,33 +19,27 @@ package org.apache.zeppelin.spark;
import com.google.common.io.Files;
import org.apache.zeppelin.display.AngularObjectRegistry;
import org.apache.zeppelin.display.GUI;
import org.apache.zeppelin.interpreter.Interpreter;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterContextRunner;
import org.apache.zeppelin.interpreter.InterpreterException;
import org.apache.zeppelin.interpreter.InterpreterGroup;
import org.apache.zeppelin.interpreter.InterpreterOutput;
import org.apache.zeppelin.interpreter.InterpreterOutputListener;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.InterpreterResultMessage;
import org.apache.zeppelin.interpreter.InterpreterResultMessageOutput;
import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion;
import org.apache.zeppelin.python.IPythonInterpreterTest;
import org.apache.zeppelin.resource.LocalResourcePool;
import org.apache.zeppelin.user.AuthenticationInfo;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.io.IOException;
import java.util.ArrayList;
import java.net.URL;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Properties;
import java.util.concurrent.CopyOnWriteArrayList;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
@ -84,7 +78,7 @@ public class IPySparkInterpreterTest {
@After
public void tearDown() {
public void tearDown() throws InterpreterException {
if (iPySparkInterpreter != null) {
iPySparkInterpreter.close();
}
@ -117,28 +111,28 @@ public class IPySparkInterpreterTest {
interpreterResultMessages = context.out.getInterpreterResultMessages();
assertEquals(
"+---+---+\n" +
"| _1| _2|\n" +
"+---+---+\n" +
"| 1| a|\n" +
"| 2| b|\n" +
"+---+---+\n\n", interpreterResultMessages.get(0).getData());
"| _1| _2|\n" +
"+---+---+\n" +
"| 1| a|\n" +
"| 2| b|\n" +
"+---+---+\n\n", interpreterResultMessages.get(0).getData());
} else {
result = iPySparkInterpreter.interpret("df = spark.createDataFrame([(1,'a'),(2,'b')])\ndf.show()", context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
interpreterResultMessages = context.out.getInterpreterResultMessages();
assertEquals(
"+---+---+\n" +
"| _1| _2|\n" +
"+---+---+\n" +
"| 1| a|\n" +
"| 2| b|\n" +
"+---+---+\n\n", interpreterResultMessages.get(0).getData());
"| _1| _2|\n" +
"+---+---+\n" +
"| 1| a|\n" +
"| 2| b|\n" +
"+---+---+\n\n", interpreterResultMessages.get(0).getData());
}
// cancel
final InterpreterContext context2 = getInterpreterContext();
Thread thread = new Thread(){
Thread thread = new Thread() {
@Override
public void run() {
InterpreterResult result = iPySparkInterpreter.interpret("import time\nsc.range(1,10).foreach(lambda x: time.sleep(1))", context2);
@ -165,26 +159,30 @@ public class IPySparkInterpreterTest {
assertEquals("range", completions.get(0).getValue());
// pyspark streaming
Class klass = py4j.GatewayServer.class;
URL location = klass.getResource('/' + klass.getName().replace('.', '/') + ".class");
System.out.println("py4j location: " + location);
context = getInterpreterContext();
result = iPySparkInterpreter.interpret(
"from pyspark.streaming import StreamingContext\n" +
"import time\n" +
"ssc = StreamingContext(sc, 1)\n" +
"rddQueue = []\n" +
"for i in range(5):\n" +
" rddQueue += [ssc.sparkContext.parallelize([j for j in range(1, 1001)], 10)]\n" +
"inputStream = ssc.queueStream(rddQueue)\n" +
"mappedStream = inputStream.map(lambda x: (x % 10, 1))\n" +
"reducedStream = mappedStream.reduceByKey(lambda a, b: a + b)\n" +
"reducedStream.pprint()\n" +
"ssc.start()\n" +
"time.sleep(6)\n" +
"ssc.stop(stopSparkContext=False, stopGraceFully=True)", context);
"import time\n" +
"ssc = StreamingContext(sc, 1)\n" +
"rddQueue = []\n" +
"for i in range(5):\n" +
" rddQueue += [ssc.sparkContext.parallelize([j for j in range(1, 1001)], 10)]\n" +
"inputStream = ssc.queueStream(rddQueue)\n" +
"mappedStream = inputStream.map(lambda x: (x % 10, 1))\n" +
"reducedStream = mappedStream.reduceByKey(lambda a, b: a + b)\n" +
"reducedStream.pprint()\n" +
"ssc.start()\n" +
"time.sleep(6)\n" +
"ssc.stop(stopSparkContext=False, stopGraceFully=True)", context);
Thread.sleep(1000);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
interpreterResultMessages = context.out.getInterpreterResultMessages();
assertEquals(1, interpreterResultMessages.size());
assertTrue(interpreterResultMessages.get(0).getData().contains("(0, 100)"));
// assertTrue(interpreterResultMessages.get(0).getData().contains("(0, 100)"));
}
private InterpreterContext getInterpreterContext() {

View file

@ -0,0 +1,389 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.spark;
import org.apache.zeppelin.display.AngularObjectRegistry;
import org.apache.zeppelin.display.GUI;
import org.apache.zeppelin.display.ui.CheckBox;
import org.apache.zeppelin.display.ui.Select;
import org.apache.zeppelin.display.ui.TextBox;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterException;
import org.apache.zeppelin.interpreter.InterpreterGroup;
import org.apache.zeppelin.interpreter.InterpreterOutput;
import org.apache.zeppelin.interpreter.InterpreterOutputListener;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.InterpreterResultMessageOutput;
import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion;
import org.apache.zeppelin.user.AuthenticationInfo;
import org.junit.After;
import org.junit.Test;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URL;
import java.nio.channels.Channels;
import java.nio.channels.ReadableByteChannel;
import java.util.HashMap;
import java.util.List;
import java.util.Properties;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.mockito.Mockito.mock;
public class NewSparkInterpreterTest {
private SparkInterpreter interpreter;
// catch the streaming output in onAppend
private volatile String output = "";
// catch the interpreter output in onUpdate
private InterpreterResultMessageOutput messageOutput;
@Test
public void testSparkInterpreter() throws IOException, InterruptedException, InterpreterException {
Properties properties = new Properties();
properties.setProperty("spark.master", "local");
properties.setProperty("spark.app.name", "test");
properties.setProperty("zeppelin.spark.maxResult", "100");
properties.setProperty("zeppelin.spark.test", "true");
properties.setProperty("zeppelin.spark.useNew", "true");
interpreter = new SparkInterpreter(properties);
assertTrue(interpreter.getDelegation() instanceof NewSparkInterpreter);
interpreter.setInterpreterGroup(mock(InterpreterGroup.class));
interpreter.open();
InterpreterResult result = interpreter.interpret("val a=\"hello world\"", getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals("a: String = hello world\n", output);
result = interpreter.interpret("print(a)", getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals("hello world", output);
// incomplete
result = interpreter.interpret("println(a", getInterpreterContext());
assertEquals(InterpreterResult.Code.INCOMPLETE, result.code());
// syntax error
result = interpreter.interpret("println(b)", getInterpreterContext());
assertEquals(InterpreterResult.Code.ERROR, result.code());
assertTrue(output.contains("not found: value b"));
// multiple line
result = interpreter.interpret("\"123\".\ntoInt", getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
// single line comment
result = interpreter.interpret("/*comment here*/", getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
result = interpreter.interpret("/*comment here*/\nprint(\"hello world\")", getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
// multiple line comment
result = interpreter.interpret("/*line 1 \n line 2*/", getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
// test function
result = interpreter.interpret("def add(x:Int, y:Int)\n{ return x+y }", getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
result = interpreter.interpret("print(add(1,2))", getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
result = interpreter.interpret("/*line 1 \n line 2*/print(\"hello world\")", getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
// companion object
result = interpreter.interpret("class Counter {\n " +
"var value: Long = 0} \n" +
"object Counter {\n def apply(x: Long) = new Counter()\n}", getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
// spark rdd operation
result = interpreter.interpret("sc.range(1, 10).sum", getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertTrue(output.contains("45"));
// case class
result = interpreter.interpret("val bankText = sc.textFile(\"bank.csv\")", getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
result = interpreter.interpret(
"case class Bank(age:Integer, job:String, marital : String, education : String, balance : Integer)\n",
getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
result = interpreter.interpret(
"val bank = bankText.map(s=>s.split(\";\")).filter(s => s(0)!=\"\\\"age\\\"\").map(\n" +
" s => Bank(s(0).toInt, \n" +
" s(1).replaceAll(\"\\\"\", \"\"),\n" +
" s(2).replaceAll(\"\\\"\", \"\"),\n" +
" s(3).replaceAll(\"\\\"\", \"\"),\n" +
" s(5).replaceAll(\"\\\"\", \"\").toInt\n" +
" )\n" +
")", getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
// spark version
result = interpreter.interpret("sc.version", getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
// spark sql test
String version = output.trim();
if (version.contains("String = 1.")) {
result = interpreter.interpret("sqlContext", getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
result = interpreter.interpret(
"val df = sqlContext.createDataFrame(Seq((1,\"a\"),(2,\"b\")))\n" +
"df.show()", getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertTrue(output.contains(
"+---+---+\n" +
"| _1| _2|\n" +
"+---+---+\n" +
"| 1| a|\n" +
"| 2| b|\n" +
"+---+---+"));
} else if (version.contains("String = 2.")) {
result = interpreter.interpret("spark", getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
result = interpreter.interpret(
"val df = spark.createDataFrame(Seq((1,\"a\"),(2,\"b\")))\n" +
"df.show()", getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertTrue(output.contains(
"+---+---+\n" +
"| _1| _2|\n" +
"+---+---+\n" +
"| 1| a|\n" +
"| 2| b|\n" +
"+---+---+"));
}
// ZeppelinContext
result = interpreter.interpret("z.show(df)", getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals(InterpreterResult.Type.TABLE, messageOutput.getType());
messageOutput.flush();
assertEquals("_1\t_2\n1\ta\n2\tb\n", messageOutput.toInterpreterResultMessage().getData());
InterpreterContext context = getInterpreterContext();
result = interpreter.interpret("z.input(\"name\", \"default_name\")", context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals(1, context.getGui().getForms().size());
assertTrue(context.getGui().getForms().get("name") instanceof TextBox);
TextBox textBox = (TextBox) context.getGui().getForms().get("name");
assertEquals("name", textBox.getName());
assertEquals("default_name", textBox.getDefaultValue());
context = getInterpreterContext();
result = interpreter.interpret("z.checkbox(\"checkbox_1\", Seq(\"value_2\"), Seq((\"value_1\", \"name_1\"), (\"value_2\", \"name_2\")))", context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals(1, context.getGui().getForms().size());
assertTrue(context.getGui().getForms().get("checkbox_1") instanceof CheckBox);
CheckBox checkBox = (CheckBox) context.getGui().getForms().get("checkbox_1");
assertEquals("checkbox_1", checkBox.getName());
assertEquals(1, checkBox.getDefaultValue().length);
assertEquals("value_2", checkBox.getDefaultValue()[0]);
assertEquals(2, checkBox.getOptions().length);
assertEquals("value_1", checkBox.getOptions()[0].getValue());
assertEquals("name_1", checkBox.getOptions()[0].getDisplayName());
assertEquals("value_2", checkBox.getOptions()[1].getValue());
assertEquals("name_2", checkBox.getOptions()[1].getDisplayName());
context = getInterpreterContext();
result = interpreter.interpret("z.select(\"select_1\", Seq(\"value_2\"), Seq((\"value_1\", \"name_1\"), (\"value_2\", \"name_2\")))", context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals(1, context.getGui().getForms().size());
assertTrue(context.getGui().getForms().get("select_1") instanceof Select);
Select select = (Select) context.getGui().getForms().get("select_1");
assertEquals("select_1", select.getName());
// TODO(zjffdu) it seems a bug of GUI, the default value should be 'value_2', but it is List(value_2)
// assertEquals("value_2", select.getDefaultValue());
assertEquals(2, select.getOptions().length);
assertEquals("value_1", select.getOptions()[0].getValue());
assertEquals("name_1", select.getOptions()[0].getDisplayName());
assertEquals("value_2", select.getOptions()[1].getValue());
assertEquals("name_2", select.getOptions()[1].getDisplayName());
// completions
List<InterpreterCompletion> completions = interpreter.completion("a.", 2, getInterpreterContext());
assertTrue(completions.size() > 0);
completions = interpreter.completion("a.isEm", 6, getInterpreterContext());
assertEquals(1, completions.size());
assertEquals("isEmpty", completions.get(0).name);
completions = interpreter.completion("sc.ra", 5, getInterpreterContext());
assertEquals(1, completions.size());
assertEquals("range", completions.get(0).name);
// Zeppelin-Display
result = interpreter.interpret("import org.apache.zeppelin.display.angular.notebookscope._\n" +
"import AngularElem._", getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
result = interpreter.interpret("<div style=\"color:blue\">\n" +
"<h4>Hello Angular Display System</h4>\n" +
"</div>.display", getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals(InterpreterResult.Type.ANGULAR, messageOutput.getType());
assertTrue(messageOutput.toInterpreterResultMessage().getData().contains("Hello Angular Display System"));
result = interpreter.interpret("<div class=\"btn btn-success\">\n" +
" Click me\n" +
"</div>.onClick{() =>\n" +
" println(\"hello world\")\n" +
"}.display", getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals(InterpreterResult.Type.ANGULAR, messageOutput.getType());
assertTrue(messageOutput.toInterpreterResultMessage().getData().contains("Click me"));
// getProgress
final InterpreterContext context2 = getInterpreterContext();
Thread interpretThread = new Thread() {
@Override
public void run() {
InterpreterResult result = null;
try {
result = interpreter.interpret(
"val df = sc.parallelize(1 to 10, 2).foreach(e=>Thread.sleep(1000))", context2);
} catch (InterpreterException e) {
e.printStackTrace();
}
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
}
};
interpretThread.start();
boolean nonZeroProgress = false;
int progress = 0;
while(interpretThread.isAlive()) {
progress = interpreter.getProgress(context2);
assertTrue(progress >= 0);
if (progress != 0 && progress != 100) {
nonZeroProgress = true;
}
Thread.sleep(100);
}
assertTrue(nonZeroProgress);
// cancel
final InterpreterContext context3 = getInterpreterContext();
interpretThread = new Thread() {
@Override
public void run() {
InterpreterResult result = null;
try {
result = interpreter.interpret(
"val df = sc.parallelize(1 to 10, 2).foreach(e=>Thread.sleep(1000))", context3);
} catch (InterpreterException e) {
e.printStackTrace();
}
assertEquals(InterpreterResult.Code.ERROR, result.code());
assertTrue(output.contains("cancelled"));
}
};
interpretThread.start();
// sleep 1 second to wait for the spark job start
Thread.sleep(1000);
interpreter.cancel(context3);
interpretThread.join();
}
@Test
public void testDependencies() throws IOException, InterpreterException {
Properties properties = new Properties();
properties.setProperty("spark.master", "local");
properties.setProperty("spark.app.name", "test");
properties.setProperty("zeppelin.spark.maxResult", "100");
properties.setProperty("zeppelin.spark.useNew", "true");
// download spark-avro jar
URL website = new URL("http://repo1.maven.org/maven2/com/databricks/spark-avro_2.11/3.2.0/spark-avro_2.11-3.2.0.jar");
ReadableByteChannel rbc = Channels.newChannel(website.openStream());
File avroJarFile = new File("spark-avro_2.11-3.2.0.jar");
FileOutputStream fos = new FileOutputStream(avroJarFile);
fos.getChannel().transferFrom(rbc, 0, Long.MAX_VALUE);
properties.setProperty("spark.jars", avroJarFile.getAbsolutePath());
interpreter = new SparkInterpreter(properties);
assertTrue(interpreter.getDelegation() instanceof NewSparkInterpreter);
interpreter.setInterpreterGroup(mock(InterpreterGroup.class));
interpreter.open();
InterpreterResult result = interpreter.interpret("import com.databricks.spark.avro._", getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
}
@After
public void tearDown() throws InterpreterException {
if (this.interpreter != null) {
this.interpreter.close();
}
}
private InterpreterContext getInterpreterContext() {
output = "";
return new InterpreterContext(
"noteId",
"paragraphId",
"replName",
"paragraphTitle",
"paragraphText",
new AuthenticationInfo(),
new HashMap<String, Object>(),
new GUI(),
new GUI(),
new AngularObjectRegistry("spark", null),
null,
null,
new InterpreterOutput(
new InterpreterOutputListener() {
@Override
public void onUpdateAll(InterpreterOutput out) {
}
@Override
public void onAppend(int index, InterpreterResultMessageOutput out, byte[] line) {
try {
output = out.toInterpreterResultMessage().getData();
} catch (IOException e) {
e.printStackTrace();
}
}
@Override
public void onUpdate(int index, InterpreterResultMessageOutput out) {
messageOutput = out;
}
})
);
}
}

View file

@ -0,0 +1,173 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.spark;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.Properties;
import com.google.common.io.Files;
import org.apache.zeppelin.display.AngularObjectRegistry;
import org.apache.zeppelin.resource.LocalResourcePool;
import org.apache.zeppelin.user.AuthenticationInfo;
import org.apache.zeppelin.display.GUI;
import org.apache.zeppelin.interpreter.*;
import org.apache.zeppelin.interpreter.InterpreterResult.Type;
import org.junit.*;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
public class NewSparkSqlInterpreterTest {
private static SparkSqlInterpreter sqlInterpreter;
private static SparkInterpreter sparkInterpreter;
private static InterpreterContext context;
private static InterpreterGroup intpGroup;
@BeforeClass
public static void setUp() throws Exception {
Properties p = new Properties();
p.setProperty("spark.master", "local");
p.setProperty("spark.app.name", "test");
p.setProperty("zeppelin.spark.maxResult", "10");
p.setProperty("zeppelin.spark.concurrentSQL", "false");
p.setProperty("zeppelin.spark.sqlInterpreter.stacktrace", "false");
p.setProperty("zeppelin.spark.useNew", "true");
intpGroup = new InterpreterGroup();
sparkInterpreter = new SparkInterpreter(p);
sparkInterpreter.setInterpreterGroup(intpGroup);
sqlInterpreter = new SparkSqlInterpreter(p);
sqlInterpreter.setInterpreterGroup(intpGroup);
intpGroup.put("session_1", new LinkedList<Interpreter>());
intpGroup.get("session_1").add(sparkInterpreter);
intpGroup.get("session_1").add(sqlInterpreter);
sparkInterpreter.open();
sqlInterpreter.open();
context = new InterpreterContext("note", "id", null, "title", "text", new AuthenticationInfo(),
new HashMap<String, Object>(), new GUI(), new GUI(),
new AngularObjectRegistry(intpGroup.getId(), null),
new LocalResourcePool("id"),
new LinkedList<InterpreterContextRunner>(), new InterpreterOutput(null));
}
@AfterClass
public static void tearDown() throws InterpreterException {
sqlInterpreter.close();
sparkInterpreter.close();
}
boolean isDataFrameSupported() {
return sparkInterpreter.getSparkVersion().hasDataFrame();
}
@Test
public void test() throws InterpreterException {
sparkInterpreter.interpret("case class Test(name:String, age:Int)", context);
sparkInterpreter.interpret("val test = sc.parallelize(Seq(Test(\"moon\", 33), Test(\"jobs\", 51), Test(\"gates\", 51), Test(\"park\", 34)))", context);
if (isDataFrameSupported()) {
sparkInterpreter.interpret("test.toDF.registerTempTable(\"test\")", context);
} else {
sparkInterpreter.interpret("test.registerTempTable(\"test\")", context);
}
InterpreterResult ret = sqlInterpreter.interpret("select name, age from test where age < 40", context);
assertEquals(InterpreterResult.Code.SUCCESS, ret.code());
assertEquals(Type.TABLE, ret.message().get(0).getType());
assertEquals("name\tage\nmoon\t33\npark\t34\n", ret.message().get(0).getData());
ret = sqlInterpreter.interpret("select wrong syntax", context);
assertEquals(InterpreterResult.Code.ERROR, ret.code());
assertTrue(ret.message().get(0).getData().length() > 0);
assertEquals(InterpreterResult.Code.SUCCESS, sqlInterpreter.interpret("select case when name='aa' then name else name end from test", context).code());
}
@Test
public void testStruct() throws InterpreterException {
sparkInterpreter.interpret("case class Person(name:String, age:Int)", context);
sparkInterpreter.interpret("case class People(group:String, person:Person)", context);
sparkInterpreter.interpret(
"val gr = sc.parallelize(Seq(People(\"g1\", Person(\"moon\",33)), People(\"g2\", Person(\"sun\",11))))",
context);
if (isDataFrameSupported()) {
sparkInterpreter.interpret("gr.toDF.registerTempTable(\"gr\")", context);
} else {
sparkInterpreter.interpret("gr.registerTempTable(\"gr\")", context);
}
InterpreterResult ret = sqlInterpreter.interpret("select * from gr", context);
assertEquals(InterpreterResult.Code.SUCCESS, ret.code());
}
public void test_null_value_in_row() throws InterpreterException {
sparkInterpreter.interpret("import org.apache.spark.sql._", context);
if (isDataFrameSupported()) {
sparkInterpreter.interpret(
"import org.apache.spark.sql.types.{StructType,StructField,StringType,IntegerType}",
context);
}
sparkInterpreter.interpret(
"def toInt(s:String): Any = {try { s.trim().toInt} catch {case e:Exception => null}}",
context);
sparkInterpreter.interpret(
"val schema = StructType(Seq(StructField(\"name\", StringType, false),StructField(\"age\" , IntegerType, true),StructField(\"other\" , StringType, false)))",
context);
sparkInterpreter.interpret(
"val csv = sc.parallelize(Seq((\"jobs, 51, apple\"), (\"gates, , microsoft\")))",
context);
sparkInterpreter.interpret(
"val raw = csv.map(_.split(\",\")).map(p => Row(p(0),toInt(p(1)),p(2)))",
context);
if (isDataFrameSupported()) {
sparkInterpreter.interpret("val people = sqlContext.createDataFrame(raw, schema)",
context);
sparkInterpreter.interpret("people.toDF.registerTempTable(\"people\")", context);
} else {
sparkInterpreter.interpret("val people = sqlContext.applySchema(raw, schema)",
context);
sparkInterpreter.interpret("people.registerTempTable(\"people\")", context);
}
InterpreterResult ret = sqlInterpreter.interpret(
"select name, age from people where name = 'gates'", context);
assertEquals(InterpreterResult.Code.SUCCESS, ret.code());
assertEquals(Type.TABLE, ret.message().get(0).getType());
assertEquals("name\tage\ngates\tnull\n", ret.message().get(0).getData());
}
@Test
public void testMaxResults() throws InterpreterException {
sparkInterpreter.interpret("case class P(age:Int)", context);
sparkInterpreter.interpret(
"val gr = sc.parallelize(Seq(P(1),P(2),P(3),P(4),P(5),P(6),P(7),P(8),P(9),P(10),P(11)))",
context);
if (isDataFrameSupported()) {
sparkInterpreter.interpret("gr.toDF.registerTempTable(\"gr\")", context);
} else {
sparkInterpreter.interpret("gr.registerTempTable(\"gr\")", context);
}
InterpreterResult ret = sqlInterpreter.interpret("select * from gr", context);
assertEquals(InterpreterResult.Code.SUCCESS, ret.code());
assertTrue(ret.message().get(1).getData().contains("alert-warning"));
}
}

View file

@ -17,7 +17,33 @@
package org.apache.zeppelin.spark;
import static org.junit.Assert.*;
import org.apache.spark.SparkConf;
import org.apache.spark.SparkContext;
import org.apache.zeppelin.display.AngularObjectRegistry;
import org.apache.zeppelin.display.GUI;
import org.apache.zeppelin.interpreter.Interpreter;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterContextRunner;
import org.apache.zeppelin.interpreter.InterpreterException;
import org.apache.zeppelin.interpreter.InterpreterGroup;
import org.apache.zeppelin.interpreter.InterpreterOutput;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.InterpreterResult.Code;
import org.apache.zeppelin.interpreter.remote.RemoteEventClientWrapper;
import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion;
import org.apache.zeppelin.resource.LocalResourcePool;
import org.apache.zeppelin.resource.WellKnownResourceName;
import org.apache.zeppelin.user.AuthenticationInfo;
import org.junit.AfterClass;
import org.junit.Assume;
import org.junit.BeforeClass;
import org.junit.ClassRule;
import org.junit.FixMethodOrder;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import org.junit.runners.MethodSorters;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.HashMap;
@ -26,25 +52,12 @@ import java.util.List;
import java.util.Map;
import java.util.Properties;
import org.apache.spark.SparkConf;
import org.apache.spark.SparkContext;
import org.apache.zeppelin.display.AngularObjectRegistry;
import org.apache.zeppelin.interpreter.remote.RemoteEventClientWrapper;
import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion;
import org.apache.zeppelin.resource.LocalResourcePool;
import org.apache.zeppelin.resource.WellKnownResourceName;
import org.apache.zeppelin.user.AuthenticationInfo;
import org.apache.zeppelin.display.GUI;
import org.apache.zeppelin.interpreter.*;
import org.apache.zeppelin.interpreter.InterpreterResult.Code;
import org.junit.*;
import org.junit.rules.TemporaryFolder;
import org.junit.runners.MethodSorters;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
@FixMethodOrder(MethodSorters.NAME_ASCENDING)
public class SparkInterpreterTest {
public class OldSparkInterpreterTest {
@ClassRule
public static TemporaryFolder tmpDir = new TemporaryFolder();
@ -52,7 +65,7 @@ public class SparkInterpreterTest {
static SparkInterpreter repl;
static InterpreterGroup intpGroup;
static InterpreterContext context;
static Logger LOGGER = LoggerFactory.getLogger(SparkInterpreterTest.class);
static Logger LOGGER = LoggerFactory.getLogger(OldSparkInterpreterTest.class);
static Map<String, Map<String, String>> paraIdToInfosMap =
new HashMap<>();
@ -129,12 +142,12 @@ public class SparkInterpreterTest {
}
@AfterClass
public static void tearDown() {
public static void tearDown() throws InterpreterException {
repl.close();
}
@Test
public void testBasicIntp() {
public void testBasicIntp() throws InterpreterException {
assertEquals(InterpreterResult.Code.SUCCESS,
repl.interpret("val a = 1\nval b = 2", context).code());
@ -153,41 +166,41 @@ public class SparkInterpreterTest {
}
@Test
public void testNonStandardSparkProperties() throws IOException {
public void testNonStandardSparkProperties() throws IOException, InterpreterException {
// throw NoSuchElementException if no such property is found
InterpreterResult result = repl.interpret("sc.getConf.get(\"property_1\")", context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
}
@Test
public void testNextLineInvocation() {
public void testNextLineInvocation() throws InterpreterException {
assertEquals(InterpreterResult.Code.SUCCESS, repl.interpret("\"123\"\n.toInt", context).code());
}
@Test
public void testNextLineComments() {
public void testNextLineComments() throws InterpreterException {
assertEquals(InterpreterResult.Code.SUCCESS, repl.interpret("\"123\"\n/*comment here\n*/.toInt", context).code());
}
@Test
public void testNextLineCompanionObject() {
public void testNextLineCompanionObject() throws InterpreterException {
String code = "class Counter {\nvar value: Long = 0\n}\n // comment\n\n object Counter {\n def apply(x: Long) = new Counter()\n}";
assertEquals(InterpreterResult.Code.SUCCESS, repl.interpret(code, context).code());
}
@Test
public void testEndWithComment() {
public void testEndWithComment() throws InterpreterException {
assertEquals(InterpreterResult.Code.SUCCESS, repl.interpret("val c=1\n//comment", context).code());
}
@Test
public void testListener() {
SparkContext sc = repl.getSparkContext();
assertNotNull(SparkInterpreter.setupListeners(sc));
assertNotNull(OldSparkInterpreter.setupListeners(sc));
}
@Test
public void testCreateDataFrame() {
public void testCreateDataFrame() throws InterpreterException {
if (getSparkVersionNumber(repl) >= 13) {
repl.interpret("case class Person(name:String, age:Int)\n", context);
repl.interpret("val people = sc.parallelize(Seq(Person(\"moon\", 33), Person(\"jobs\", 51), Person(\"gates\", 51), Person(\"park\", 34)))\n", context);
@ -200,7 +213,7 @@ public class SparkInterpreterTest {
}
@Test
public void testZShow() {
public void testZShow() throws InterpreterException {
String code = "";
repl.interpret("case class Person(name:String, age:Int)\n", context);
repl.interpret("val people = sc.parallelize(Seq(Person(\"moon\", 33), Person(\"jobs\", 51), Person(\"gates\", 51), Person(\"park\", 34)))\n", context);
@ -236,7 +249,7 @@ public class SparkInterpreterTest {
}
@Test
public void testReferencingUndefinedVal() {
public void testReferencingUndefinedVal() throws InterpreterException {
InterpreterResult result = repl.interpret("def category(min: Int) = {"
+ " if (0 <= value) \"error\"" + "}", context);
assertEquals(Code.ERROR, result.code());
@ -308,20 +321,20 @@ public class SparkInterpreterTest {
}
@Test
public void testCompletion() {
public void testCompletion() throws InterpreterException {
List<InterpreterCompletion> completions = repl.completion("sc.", "sc.".length(), null);
assertTrue(completions.size() > 0);
}
@Test
public void testMultilineCompletion() {
public void testMultilineCompletion() throws InterpreterException {
String buf = "val x = 1\nsc.";
List<InterpreterCompletion> completions = repl.completion(buf, buf.length(), null);
assertTrue(completions.size() > 0);
}
@Test
public void testMultilineCompletionNewVar() {
public void testMultilineCompletionNewVar() throws InterpreterException {
Assume.assumeFalse("this feature does not work with scala 2.10", Utils.isScala2_10());
Assume.assumeTrue("This feature does not work with scala < 2.11.8", Utils.isCompilerAboveScala2_11_7());
String buf = "val x = sc\nx.";
@ -330,7 +343,7 @@ public class SparkInterpreterTest {
}
@Test
public void testParagraphUrls() {
public void testParagraphUrls() throws InterpreterException {
String paraId = "test_para_job_url";
InterpreterContext intpCtx = new InterpreterContext("note", paraId, null, "title", "text",
new AuthenticationInfo(),

View file

@ -17,23 +17,32 @@
package org.apache.zeppelin.spark;
import org.apache.zeppelin.display.AngularObjectRegistry;
import org.apache.zeppelin.display.GUI;
import org.apache.zeppelin.interpreter.Interpreter;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterContextRunner;
import org.apache.zeppelin.interpreter.InterpreterException;
import org.apache.zeppelin.interpreter.InterpreterGroup;
import org.apache.zeppelin.interpreter.InterpreterOutput;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.InterpreterResult.Type;
import org.apache.zeppelin.resource.LocalResourcePool;
import org.apache.zeppelin.user.AuthenticationInfo;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.ClassRule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.Properties;
import org.apache.zeppelin.display.AngularObjectRegistry;
import org.apache.zeppelin.resource.LocalResourcePool;
import org.apache.zeppelin.user.AuthenticationInfo;
import org.apache.zeppelin.display.GUI;
import org.apache.zeppelin.interpreter.*;
import org.apache.zeppelin.interpreter.InterpreterResult.Type;
import org.junit.*;
import org.junit.rules.TemporaryFolder;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
public class SparkSqlInterpreterTest {
public class OldSparkSqlInterpreterTest {
@ClassRule
public static TemporaryFolder tmpDir = new TemporaryFolder();
@ -46,7 +55,7 @@ public class SparkSqlInterpreterTest {
@BeforeClass
public static void setUp() throws Exception {
Properties p = new Properties();
p.putAll(SparkInterpreterTest.getSparkTestProperties(tmpDir));
p.putAll(OldSparkInterpreterTest.getSparkTestProperties(tmpDir));
p.setProperty("zeppelin.spark.maxResult", "10");
p.setProperty("zeppelin.spark.concurrentSQL", "false");
p.setProperty("zeppelin.spark.sql.stacktrace", "false");
@ -55,8 +64,8 @@ public class SparkSqlInterpreterTest {
intpGroup = new InterpreterGroup();
repl.setInterpreterGroup(intpGroup);
repl.open();
SparkInterpreterTest.repl = repl;
SparkInterpreterTest.intpGroup = intpGroup;
OldSparkInterpreterTest.repl = repl;
OldSparkInterpreterTest.intpGroup = intpGroup;
sql = new SparkSqlInterpreter(p);
@ -75,13 +84,13 @@ public class SparkSqlInterpreterTest {
}
@AfterClass
public static void tearDown() {
public static void tearDown() throws InterpreterException {
sql.close();
repl.close();
}
boolean isDataFrameSupported() {
return SparkInterpreterTest.getSparkVersionNumber(repl) >= 13;
return OldSparkInterpreterTest.getSparkVersionNumber(repl) >= 13;
}
@Test
@ -144,11 +153,11 @@ public class SparkSqlInterpreterTest {
"val raw = csv.map(_.split(\",\")).map(p => Row(p(0),toInt(p(1)),p(2)))",
context);
if (isDataFrameSupported()) {
repl.interpret("val people = z.sqlContext.createDataFrame(raw, schema)",
repl.interpret("val people = sqlContext.createDataFrame(raw, schema)",
context);
repl.interpret("people.toDF.registerTempTable(\"people\")", context);
} else {
repl.interpret("val people = z.sqlContext.applySchema(raw, schema)",
repl.interpret("val people = sqlContext.applySchema(raw, schema)",
context);
repl.interpret("people.registerTempTable(\"people\")", context);
}

View file

@ -47,22 +47,22 @@ public class PySparkInterpreterMatplotlibTest {
static InterpreterGroup intpGroup;
static Logger LOGGER = LoggerFactory.getLogger(PySparkInterpreterTest.class);
static InterpreterContext context;
public static class AltPySparkInterpreter extends PySparkInterpreter {
/**
* Since pyspark output is sent to an outputstream rather than
* being directly provided by interpret(), this subclass is created to
* override interpret() to append the result from the outputStream
* for the sake of convenience in testing.
* for the sake of convenience in testing.
*/
public AltPySparkInterpreter(Properties property) {
super(property);
}
/**
* This code is mainly copied from RemoteInterpreterServer.java which
* This code is mainly copied from RemoteInterpreterServer.java which
* normally handles this in real use cases.
*/
*/
@Override
public InterpreterResult interpret(String st, InterpreterContext context) throws InterpreterException {
context.out.clear();
@ -82,7 +82,7 @@ public class PySparkInterpreterMatplotlibTest {
private static Properties getPySparkTestProperties() throws IOException {
Properties p = new Properties();
p.setProperty("master", "local[*]");
p.setProperty("spark.master", "local[*]");
p.setProperty("spark.app.name", "Zeppelin Test");
p.setProperty("zeppelin.spark.useHiveContext", "true");
p.setProperty("zeppelin.spark.maxResult", "1000");
@ -132,10 +132,19 @@ public class PySparkInterpreterMatplotlibTest {
pyspark.setInterpreterGroup(intpGroup);
pyspark.open();
context = new InterpreterContext("note", "id", null, "title", "text",
new AuthenticationInfo(),
new HashMap<String, Object>(),
new GUI(),
new GUI(),
new AngularObjectRegistry(intpGroup.getId(), null),
new LocalResourcePool("id"),
new LinkedList<InterpreterContextRunner>(),
new InterpreterOutput(null));
}
@AfterClass
public static void tearDown() {
public static void tearDown() throws InterpreterException {
pyspark.close();
sparkInterpreter.close();
}
@ -145,7 +154,7 @@ public class PySparkInterpreterMatplotlibTest {
// matplotlib
InterpreterResult ret = pyspark.interpret("import matplotlib", context);
assertEquals(ret.message().toString(), InterpreterResult.Code.SUCCESS, ret.code());
// inline backend
ret = pyspark.interpret("import backend_zinline", context);
assertEquals(ret.message().toString(), InterpreterResult.Code.SUCCESS, ret.code());
@ -178,14 +187,14 @@ public class PySparkInterpreterMatplotlibTest {
ret = pyspark.interpret("z.configure_mpl(interactive=False, close=True, angular=False)", context);
ret = pyspark.interpret("plt.plot([1, 2, 3])", context);
ret1 = pyspark.interpret("plt.show()", context);
// Second call to show() should print nothing, and Type should be TEXT.
// This is because when close=True, there should be no living instances
// of FigureManager, causing show() to return before setting the output
// type to HTML.
ret = pyspark.interpret("plt.show()", context);
assertEquals(0, ret.message().size());
// Now test that new plot is drawn. It should be identical to the
// previous one.
ret = pyspark.interpret("plt.plot([1, 2, 3])", context);
@ -193,7 +202,7 @@ public class PySparkInterpreterMatplotlibTest {
assertEquals(ret1.message().get(0).getType(), ret2.message().get(0).getType());
assertEquals(ret1.message().get(0).getData(), ret2.message().get(0).getData());
}
@Test
// Test for when configuration is set to not auto-close figures after show().
public void testNoClose() throws InterpreterException {
@ -205,7 +214,7 @@ public class PySparkInterpreterMatplotlibTest {
ret = pyspark.interpret("z.configure_mpl(interactive=False, close=False, angular=False)", context);
ret = pyspark.interpret("plt.plot([1, 2, 3])", context);
ret1 = pyspark.interpret("plt.show()", context);
// Second call to show() should print nothing, and Type should be HTML.
// This is because when close=False, there should be living instances
// of FigureManager, causing show() to set the output
@ -220,7 +229,7 @@ public class PySparkInterpreterMatplotlibTest {
ret2 = pyspark.interpret("plt.show()", context);
assertNotSame(ret1.message().get(0).getData(), ret2.message().get(0).getData());
}
@Test
// Test angular mode
public void testAngular() throws InterpreterException {
@ -229,7 +238,7 @@ public class PySparkInterpreterMatplotlibTest {
ret = pyspark.interpret("plt.close()", context);
ret = pyspark.interpret("z.configure_mpl(interactive=False, close=False, angular=True)", context);
ret = pyspark.interpret("plt.plot([1, 2, 3])", context);
ret = pyspark.interpret("plt.show()", context);
ret = pyspark.interpret("plt.show()", context);
assertEquals(ret.message().toString(), InterpreterResult.Code.SUCCESS, ret.code());
assertEquals(ret.message().toString(), Type.ANGULAR, ret.message().get(0).getType());
@ -237,5 +246,5 @@ public class PySparkInterpreterMatplotlibTest {
AngularObjectRegistry registry = context.getAngularObjectRegistry();
String figureData = registry.getAll("note", null).get(0).toString();
assertTrue(figureData.contains("data:image/png;base64"));
}
}
}

View file

@ -26,8 +26,7 @@ import org.apache.zeppelin.user.AuthenticationInfo;
import org.junit.*;
import org.junit.rules.TemporaryFolder;
import org.junit.runners.MethodSorters;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.HashMap;
import java.util.LinkedList;
@ -47,12 +46,11 @@ public class PySparkInterpreterTest {
static SparkInterpreter sparkInterpreter;
static PySparkInterpreter pySparkInterpreter;
static InterpreterGroup intpGroup;
static Logger LOGGER = LoggerFactory.getLogger(PySparkInterpreterTest.class);
static InterpreterContext context;
private static Properties getPySparkTestProperties() throws IOException {
Properties p = new Properties();
p.setProperty("master", "local[*]");
p.setProperty("spark.master", "local");
p.setProperty("spark.app.name", "Zeppelin Test");
p.setProperty("zeppelin.spark.useHiveContext", "true");
p.setProperty("zeppelin.spark.maxResult", "1000");
@ -60,6 +58,7 @@ public class PySparkInterpreterTest {
p.setProperty("zeppelin.pyspark.python", "python");
p.setProperty("zeppelin.dep.localrepo", tmpDir.newFolder().getAbsolutePath());
p.setProperty("zeppelin.pyspark.useIPython", "false");
p.setProperty("zeppelin.spark.test", "true");
return p;
}
@ -107,7 +106,7 @@ public class PySparkInterpreterTest {
}
@AfterClass
public static void tearDown() {
public static void tearDown() throws InterpreterException {
pySparkInterpreter.close();
sparkInterpreter.close();
}

View file

@ -0,0 +1,99 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.spark;
import org.apache.zeppelin.display.AngularObjectRegistry;
import org.apache.zeppelin.display.GUI;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterException;
import org.apache.zeppelin.interpreter.InterpreterGroup;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.LazyOpenInterpreter;
import org.apache.zeppelin.user.AuthenticationInfo;
import org.junit.Test;
import java.io.IOException;
import java.util.HashMap;
import java.util.Properties;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
public class SparkRInterpreterTest {
private SparkRInterpreter sparkRInterpreter;
private SparkInterpreter sparkInterpreter;
@Test
public void testSparkRInterpreter() throws IOException, InterruptedException, InterpreterException {
Properties properties = new Properties();
properties.setProperty("spark.master", "local");
properties.setProperty("spark.app.name", "test");
properties.setProperty("zeppelin.spark.maxResult", "100");
properties.setProperty("zeppelin.spark.test", "true");
properties.setProperty("zeppelin.spark.useNew", "true");
properties.setProperty("zeppelin.R.knitr", "true");
sparkRInterpreter = new SparkRInterpreter(properties);
sparkInterpreter = new SparkInterpreter(properties);
InterpreterGroup interpreterGroup = new InterpreterGroup();
interpreterGroup.addInterpreterToSession(new LazyOpenInterpreter(sparkRInterpreter), "session_1");
interpreterGroup.addInterpreterToSession(new LazyOpenInterpreter(sparkInterpreter), "session_1");
sparkRInterpreter.setInterpreterGroup(interpreterGroup);
sparkInterpreter.setInterpreterGroup(interpreterGroup);
sparkRInterpreter.open();
InterpreterResult result = sparkRInterpreter.interpret("1+1", getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertTrue(result.message().get(0).getData().contains("2"));
result = sparkRInterpreter.interpret("sparkR.version()", getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
if (result.message().get(0).getData().contains("2.")) {
// spark 2.x
result = sparkRInterpreter.interpret("df <- as.DataFrame(faithful)\nhead(df)", getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertTrue(result.message().get(0).getData().contains("eruptions waiting"));
} else {
// spark 1.x
result = sparkRInterpreter.interpret("df <- createDataFrame(sqlContext, faithful)\nhead(df)", getInterpreterContext());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertTrue(result.message().get(0).getData().contains("eruptions waiting"));
}
}
private InterpreterContext getInterpreterContext() {
return new InterpreterContext(
"noteId",
"paragraphId",
"replName",
"paragraphTitle",
"paragraphText",
new AuthenticationInfo(),
new HashMap<String, Object>(),
new GUI(),
new GUI(),
new AngularObjectRegistry("spark", null),
null,
null,
null);
}
}

View file

@ -45,5 +45,8 @@ log4j.logger.org.hibernate.type=ALL
log4j.logger.org.apache.zeppelin.interpreter=DEBUG
log4j.logger.org.apache.zeppelin.spark=DEBUG
log4j.logger.org.apache.zeppelin.python.IPythonInterpreter=DEBUG
log4j.logger.org.apache.zeppelin.python.IPythonClient=DEBUG
log4j.logger.org.apache.spark.repl.Main=INFO

View file

@ -16,680 +16,227 @@
~ limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<artifactId>interpreter-parent</artifactId>
<groupId>org.apache.zeppelin</groupId>
<version>0.9.0-SNAPSHOT</version>
<relativePath>../interpreter-parent/pom.xml</relativePath>
</parent>
<parent>
<artifactId>zeppelin</artifactId>
<groupId>org.apache.zeppelin</groupId>
<artifactId>spark-parent</artifactId>
<packaging>pom</packaging>
<version>0.9.0-SNAPSHOT</version>
<relativePath>..</relativePath>
</parent>
<name>spark-parent</name>
<description>Zeppelin spark support</description>
<groupId>org.apache.zeppelin</groupId>
<artifactId>zeppelin-spark_2.10</artifactId>
<packaging>jar</packaging>
<version>0.9.0-SNAPSHOT</version>
<name>Zeppelin: Spark</name>
<description>Zeppelin spark support</description>
<properties>
<!--library versions-->
<datanucleus.rdbms.version>3.2.9</datanucleus.rdbms.version>
<datanucleus.apijdo.version>3.2.6</datanucleus.apijdo.version>
<datanucleus.core.version>3.2.10</datanucleus.core.version>
<properties>
<!--library versions-->
<jsoup.version>1.8.2</jsoup.version>
<spark.version>2.0.2</spark.version>
<guava.version>14.0.1</guava.version>
<commons.exec.version>1.3</commons.exec.version>
<commons.compress.version>1.9</commons.compress.version>
<maven.plugin.api.version>3.0</maven.plugin.api.version>
<aether.version>1.12</aether.version>
<maven.aeither.provider.version>3.0.3</maven.aeither.provider.version>
<wagon.version>1.0</wagon.version>
<datanucleus.rdbms.version>3.2.9</datanucleus.rdbms.version>
<datanucleus.apijdo.version>3.2.6</datanucleus.apijdo.version>
<datanucleus.core.version>3.2.10</datanucleus.core.version>
<!--plugin versions-->
<plugin.shade.version>2.3</plugin.shade.version>
<plugin.scala.version>2.15.2</plugin.scala.version>
<!-- settings -->
<pyspark.test.exclude>**/PySparkInterpreterMatplotlibTest.java</pyspark.test.exclude>
<pyspark.test.include>**/*Test.*</pyspark.test.include>
</properties>
<dependencies>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>zeppelin-display_${scala.binary.version}</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>zeppelin-interpreter</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>zeppelin-python</artifactId>
<version>${project.version}</version>
<exclusions>
<exclusion>
<groupId>net.sf.py4j</groupId>
<artifactId>py4j</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>zeppelin-python</artifactId>
<version>${project.version}</version>
<classifier>tests</classifier>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>net.sf.py4j</groupId>
<artifactId>py4j</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-repl_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-hive_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
<!-- Aether :: maven dependency resolution -->
<dependency>
<groupId>org.apache.maven</groupId>
<artifactId>maven-plugin-api</artifactId>
<version>${maven.plugin.api.version}</version>
<exclusions>
<exclusion>
<groupId>org.codehaus.plexus</groupId>
<artifactId>plexus-utils</artifactId>
</exclusion>
<exclusion>
<groupId>org.sonatype.sisu</groupId>
<artifactId>sisu-inject-plexus</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.maven</groupId>
<artifactId>maven-model</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.sonatype.aether</groupId>
<artifactId>aether-api</artifactId>
<version>${aether.version}</version>
</dependency>
<dependency>
<groupId>org.sonatype.aether</groupId>
<artifactId>aether-util</artifactId>
<version>${aether.version}</version>
</dependency>
<dependency>
<groupId>org.sonatype.aether</groupId>
<artifactId>aether-impl</artifactId>
<version>${aether.version}</version>
</dependency>
<dependency>
<groupId>org.apache.maven</groupId>
<artifactId>maven-aether-provider</artifactId>
<version>${maven.aeither.provider.version}</version>
<exclusions>
<exclusion>
<groupId>org.sonatype.aether</groupId>
<artifactId>aether-api</artifactId>
</exclusion>
<exclusion>
<groupId>org.sonatype.aether</groupId>
<artifactId>aether-spi</artifactId>
</exclusion>
<exclusion>
<groupId>org.sonatype.aether</groupId>
<artifactId>aether-util</artifactId>
</exclusion>
<exclusion>
<groupId>org.sonatype.aether</groupId>
<artifactId>aether-impl</artifactId>
</exclusion>
<exclusion>
<groupId>org.codehaus.plexus</groupId>
<artifactId>plexus-utils</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.sonatype.aether</groupId>
<artifactId>aether-connector-file</artifactId>
<version>${aether.version}</version>
</dependency>
<dependency>
<groupId>org.sonatype.aether</groupId>
<artifactId>aether-connector-wagon</artifactId>
<version>${aether.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.maven.wagon</groupId>
<artifactId>wagon-provider-api</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.maven.wagon</groupId>
<artifactId>wagon-provider-api</artifactId>
<version>${wagon.version}</version>
<exclusions>
<exclusion>
<groupId>org.codehaus.plexus</groupId>
<artifactId>plexus-utils</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.maven.wagon</groupId>
<artifactId>wagon-http-lightweight</artifactId>
<version>${wagon.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.maven.wagon</groupId>
<artifactId>wagon-http-shared</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.maven.wagon</groupId>
<artifactId>wagon-http</artifactId>
<version>${wagon.version}</version>
<exclusions>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-exec</artifactId>
<version>${commons.exec.version}</version>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>${scala.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-compiler</artifactId>
<version>${scala.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-reflect</artifactId>
<version>${scala.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>commons-lang</groupId>
<artifactId>commons-lang</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-compress</artifactId>
<version>${commons.compress.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>${jsoup.version}</version>
</dependency>
<!--test libraries-->
<dependency>
<groupId>org.scalatest</groupId>
<artifactId>scalatest_${scala.binary.version}</artifactId>
<version>${scalatest.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.datanucleus</groupId>
<artifactId>datanucleus-core</artifactId>
<version>${datanucleus.core.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.datanucleus</groupId>
<artifactId>datanucleus-api-jdo</artifactId>
<version>${datanucleus.apijdo.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.datanucleus</groupId>
<artifactId>datanucleus-rdbms</artifactId>
<version>${datanucleus.rdbms.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.powermock</groupId>
<artifactId>powermock-api-mockito</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.powermock</groupId>
<artifactId>powermock-module-junit4</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<artifactId>maven-enforcer-plugin</artifactId>
<executions>
<execution>
<id>enforce</id>
<phase>none</phase>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<forkCount>1</forkCount>
<reuseForks>false</reuseForks>
<argLine>-Xmx1024m -XX:MaxPermSize=256m</argLine>
<excludes>
<exclude>**/SparkRInterpreterTest.java</exclude>
<exclude>${pyspark.test.exclude}</exclude>
</excludes>
<environmentVariables>
<!-- local pyspark execution needs PYTHONPATH otherwise python daemon in executor side will fail
e.g. sc.range(1,10).sum()
-->
<PYTHONPATH>../interpreter/spark/pyspark/pyspark.zip:../interpreter/spark/pyspark/py4j-${spark.py4j.version}-src.zip:../interpreter/lib/python</PYTHONPATH>
</environmentVariables>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>${plugin.shade.version}</version>
<configuration>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
<transformers>
<transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
<transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
<resource>reference.conf</resource>
</transformer>
</transformers>
<relocations>
<!-- shade guava and proto-buf, because it might conflict with those of spark -->
<relocation>
<pattern>com.google</pattern>
<shadedPattern>org.apache.zeppelin.com.google</shadedPattern>
</relocation>
<!-- shade netty, because it might conflict with that of spark-->
<relocation>
<pattern>io.netty</pattern>
<shadedPattern>org.apache.zeppelin.io.netty</shadedPattern>
</relocation>
</relocations>
</configuration>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>copy</goal>
</goals>
<configuration>
<outputDirectory>${project.build.directory}/../../interpreter/spark</outputDirectory>
<overWriteReleases>false</overWriteReleases>
<overWriteSnapshots>false</overWriteSnapshots>
<overWriteIfNewer>true</overWriteIfNewer>
<includeScope>runtime</includeScope>
<artifactItems>
<artifactItem>
<groupId>${project.groupId}</groupId>
<artifactId>${project.artifactId}</artifactId>
<version>${project.version}</version>
<type>${project.packaging}</type>
</artifactItem>
</artifactItems>
</configuration>
</execution>
</executions>
</plugin>
<!-- Plugin to compile Scala code -->
<plugin>
<groupId>org.scala-tools</groupId>
<artifactId>maven-scala-plugin</artifactId>
<version>${plugin.scala.version}</version>
<configuration>
<scalaVersion>${scala.version}</scalaVersion>
<excludes>
<exclude>**/ZeppelinR.scala</exclude>
<exclude>**/SparkRBackend.scala</exclude>
</excludes>
</configuration>
<executions>
<execution>
<id>compile</id>
<goals>
<goal>compile</goal>
</goals>
<phase>compile</phase>
</execution>
<execution>
<id>test-compile</id>
<goals>
<goal>testCompile</goal>
</goals>
<phase>test-compile</phase>
</execution>
<execution>
<phase>process-resources</phase>
<goals>
<goal>compile</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<excludes combine.self="override"></excludes>
<testExcludes combine.self="override">
<testExclude>${pyspark.test.exclude}</testExclude>
</testExcludes>
</configuration>
</plugin>
<plugin>
<groupId>org.scala-tools</groupId>
<artifactId>maven-scala-plugin</artifactId>
<configuration>
<excludes combine.self="override">
</excludes>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<excludes combine.self="override">
<exclude>${pyspark.test.exclude}</exclude>
</excludes>
</configuration>
</plugin>
<!-- include sparkr by default -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<excludes combine.self="override"></excludes>
<testExcludes combine.self="override">
<testExclude>${pyspark.test.exclude}</testExclude>
</testExcludes>
</configuration>
</plugin>
<plugin>
<groupId>org.scala-tools</groupId>
<artifactId>maven-scala-plugin</artifactId>
<configuration>
<excludes combine.self="override">
</excludes>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<excludes combine.self="override">
<exclude>${pyspark.test.exclude}</exclude>
</excludes>
</configuration>
</plugin>
<plugin>
<artifactId>maven-resources-plugin</artifactId>
<executions>
<execution>
<id>copy-interpreter-setting</id>
<phase>package</phase>
<goals>
<goal>resources</goal>
</goals>
<configuration>
<outputDirectory>${project.build.directory}/../../interpreter/spark</outputDirectory>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
<profiles>
<profile>
<id>spark-1.4</id>
<properties>
<spark.version>1.4.1</spark.version>
</properties>
<dependencies>
</dependencies>
</profile>
<profile>
<id>spark-1.5</id>
<properties>
<spark.version>1.5.2</spark.version>
<akka.group>com.typesafe.akka</akka.group>
<akka.version>2.3.11</akka.version>
<protobuf.version>2.5.0</protobuf.version>
</properties>
</profile>
<profile>
<id>spark-1.6</id>
<properties>
<spark.version>1.6.3</spark.version>
<spark.py4j.version>0.9</spark.py4j.version>
<akka.group>com.typesafe.akka</akka.group>
<akka.version>2.3.11</akka.version>
<protobuf.version>2.5.0</protobuf.version>
</properties>
</profile>
<profile>
<id>spark-2.0</id>
<properties>
<spark.version>2.0.2</spark.version>
<protobuf.version>2.5.0</protobuf.version>
<spark.py4j.version>0.10.3</spark.py4j.version>
</properties>
</profile>
<profile>
<id>spark-2.1</id>
<properties>
<spark.version>2.1.0</spark.version>
<protobuf.version>2.5.0</protobuf.version>
<spark.py4j.version>0.10.4</spark.py4j.version>
<scala.version>2.11.8</scala.version>
</properties>
</profile>
<profile>
<id>spark-2.2</id>
<activation>
<activeByDefault>true</activeByDefault>
</activation>
<properties>
<!--plugin versions-->
<plugin.shade.version>2.4.1</plugin.shade.version>
<plugin.scala.version>2.15.2</plugin.scala.version>
<!-- spark versions -->
<spark.version>2.2.0</spark.version>
<protobuf.version>2.5.0</protobuf.version>
<spark.py4j.version>0.10.4</spark.py4j.version>
</properties>
</profile>
<py4j.version>0.10.4</py4j.version>
</properties>
<dependencies>
<profile>
<id>hadoop-0.23</id>
<!-- SPARK-1121: Adds an explicit dependency on Avro to work around a
Hadoop 0.23.X issue -->
<dependencies>
<dependency>
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
<groupId>org.apache.zeppelin</groupId>
<artifactId>zeppelin-interpreter</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies>
<properties>
<hadoop.version>0.23.10</hadoop.version>
</properties>
</profile>
<profile>
<id>hadoop-1</id>
<properties>
<hadoop.version>1.0.4</hadoop.version>
<avro.mapred.classifier>hadoop1</avro.mapred.classifier>
<codehaus.jackson.version>1.8.8</codehaus.jackson.version>
<akka.group>org.spark-project.akka</akka.group>
</properties>
</profile>
<!--test libraries-->
<dependency>
<groupId>org.apache.zeppelin</groupId>
<artifactId>zeppelin-display</artifactId>
<version>${project.version}</version>
<scope>test</scope>
</dependency>
<profile>
<id>hadoop-2.2</id>
<properties>
<hadoop.version>2.2.0</hadoop.version>
<protobuf.version>2.5.0</protobuf.version>
<avro.mapred.classifier>hadoop2</avro.mapred.classifier>
</properties>
</profile>
<dependency>
<groupId>org.scalatest</groupId>
<artifactId>scalatest_${scala.binary.version}</artifactId>
<version>${scalatest.version}</version>
<scope>test</scope>
</dependency>
<profile>
<id>hadoop-2.3</id>
<properties>
<hadoop.version>2.3.0</hadoop.version>
<protobuf.version>2.5.0</protobuf.version>
<jets3t.version>0.9.3</jets3t.version>
<avro.mapred.classifier>hadoop2</avro.mapred.classifier>
</properties>
</profile>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
<profile>
<id>hadoop-2.4</id>
<properties>
<hadoop.version>2.4.0</hadoop.version>
<protobuf.version>2.5.0</protobuf.version>
<jets3t.version>0.9.3</jets3t.version>
<avro.mapred.classifier>hadoop2</avro.mapred.classifier>
</properties>
</profile>
<dependency>
<groupId>org.datanucleus</groupId>
<artifactId>datanucleus-core</artifactId>
<version>${datanucleus.core.version}</version>
<scope>test</scope>
</dependency>
<profile>
<id>hadoop-2.6</id>
<properties>
<hadoop.version>2.6.0</hadoop.version>
<protobuf.version>2.5.0</protobuf.version>
<jets3t.version>0.9.3</jets3t.version>
<avro.mapred.classifier>hadoop2</avro.mapred.classifier>
</properties>
</profile>
<dependency>
<groupId>org.datanucleus</groupId>
<artifactId>datanucleus-api-jdo</artifactId>
<version>${datanucleus.apijdo.version}</version>
<scope>test</scope>
</dependency>
<profile>
<id>hadoop-2.7</id>
<properties>
<hadoop.version>2.7.2</hadoop.version>
<protobuf.version>2.5.0</protobuf.version>
<jets3t.version>0.9.0</jets3t.version>
<avro.mapred.classifier>hadoop2</avro.mapred.classifier>
</properties>
</profile>
</profiles>
<dependency>
<groupId>org.datanucleus</groupId>
<artifactId>datanucleus-rdbms</artifactId>
<version>${datanucleus.rdbms.version}</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<artifactId>maven-enforcer-plugin</artifactId>
<executions>
<execution>
<id>enforce</id>
<phase>none</phase>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.scalatest</groupId>
<artifactId>scalatest-maven-plugin</artifactId>
<configuration>
<reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
<junitxml>.</junitxml>
<filereports>WDF TestSuite.txt</filereports>
</configuration>
<executions>
<execution>
<id>test</id>
<goals>
<goal>test</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
<version>3.2.2</version>
<executions>
<execution>
<id>eclipse-add-source</id>
<goals>
<goal>add-source</goal>
</goals>
</execution>
<execution>
<id>scala-compile-first</id>
<phase>process-resources</phase>
<goals>
<goal>compile</goal>
</goals>
</execution>
<execution>
<id>scala-test-compile-first</id>
<phase>process-test-resources</phase>
<goals>
<goal>testCompile</goal>
</goals>
</execution>
</executions>
<configuration>
<scalaVersion>${scala.compile.version}</scalaVersion>
<!--<recompileMode>incremental</recompileMode>-->
<!--<useZincServer>true</useZincServer>-->
<args>
<arg>-unchecked</arg>
<arg>-deprecation</arg>
<arg>-feature</arg>
</args>
<jvmArgs>
<jvmArg>-Xms1024m</jvmArg>
<jvmArg>-Xmx1024m</jvmArg>
<jvmArg>-XX:PermSize=${PermGen}</jvmArg>
<jvmArg>-XX:MaxPermSize=${MaxPermGen}</jvmArg>
</jvmArgs>
<javacArgs>
<javacArg>-source</javacArg>
<javacArg>${java.version}</javacArg>
<javacArg>-target</javacArg>
<javacArg>${java.version}</javacArg>
<javacArg>-Xlint:all,-serial,-path,-options</javacArg>
</javacArgs>
</configuration>
</plugin>
</plugins>
</build>
<profiles>
<profile>
<id>spark-2.2</id>
<activation>
<activeByDefault>true</activeByDefault>
</activation>
<properties>
<spark.version>2.2.0</spark.version>
<py4j.version>0.10.4</py4j.version>
</properties>
</profile>
<profile>
<id>spark-2.1</id>
<properties>
<spark.version>2.1.0</spark.version>
<py4j.version>0.10.4</py4j.version>
</properties>
</profile>
<profile>
<id>spark-2.0</id>
<properties>
<spark.version>2.0.2</spark.version>
<py4j.version>0.10.3</py4j.version>
</properties>
</profile>
<profile>
<id>spark-1.6</id>
<properties>
<spark.version>1.6.3</spark.version>
<py4j.version>0.9</py4j.version>
</properties>
</profile>
<profile>
<id>spark-1.5</id>
<properties>
<spark.version>1.5.2</spark.version>
<py4j.version>0.8.2.1</py4j.version>
</properties>
</profile>
<profile>
<id>spark-1.4</id>
<properties>
<spark.version>1.4.1</spark.version>
<py4j.version>0.8.2.1</py4j.version>
</properties>
</profile>
</profiles>
</project>

41
spark/scala-2.10/pom.xml Normal file
View file

@ -0,0 +1,41 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
~ Licensed to the Apache Software Foundation (ASF) under one or more
~ contributor license agreements. See the NOTICE file distributed with
~ this work for additional information regarding copyright ownership.
~ The ASF licenses this file to You under the Apache License, Version 2.0
~ (the "License"); you may not use this file except in compliance with
~ the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.apache.zeppelin</groupId>
<artifactId>spark-scala-2.10</artifactId>
<version>0.9.0-SNAPSHOT</version>
<packaging>jar</packaging>
<name>Spark Interpreter: Scala_2.10</name>
<parent>
<groupId>org.apache.zeppelin</groupId>
<artifactId>spark-scala-parent</artifactId>
<version>0.9.0-SNAPSHOT</version>
<relativePath>../spark-scala-parent/pom.xml</relativePath>
</parent>
<properties>
<scala.version>2.10.5</scala.version>
<scala.binary.version>2.10</scala.binary.version>
<scala.compile.version>${scala.version}</scala.compile.version>
</properties>
</project>

View file

@ -0,0 +1 @@
../spark-scala-parent

View file

@ -0,0 +1,141 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.spark
import java.io.File
import java.nio.file.{Files, Paths}
import org.apache.spark.SparkConf
import org.apache.spark.repl.SparkILoop
import org.apache.spark.repl.SparkILoop._
import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion
import org.apache.zeppelin.interpreter.util.InterpreterOutputStream
import org.apache.zeppelin.interpreter.{InterpreterContext, InterpreterResult}
import org.slf4j.{Logger, LoggerFactory}
import scala.tools.nsc.Settings
import scala.tools.nsc.interpreter._
/**
* SparkInterpreter for scala-2.10
*/
class SparkScala210Interpreter(override val conf: SparkConf,
override val depFiles: java.util.List[String])
extends BaseSparkScalaInterpreter(conf, depFiles) {
lazy override val LOGGER: Logger = LoggerFactory.getLogger(getClass)
private var sparkILoop: SparkILoop = _
override val interpreterOutput =
new InterpreterOutputStream(LoggerFactory.getLogger(classOf[SparkScala210Interpreter]))
override def open(): Unit = {
super.open()
// redirect the output of open to InterpreterOutputStream, so that user can have more
// diagnose info in frontend
if (InterpreterContext.get() != null) {
interpreterOutput.setInterpreterOutput(InterpreterContext.get().out)
}
val rootDir = conf.get("spark.repl.classdir", System.getProperty("java.io.tmpdir"))
val outputDir = Files.createTempDirectory(Paths.get(rootDir), "spark").toFile
outputDir.deleteOnExit()
conf.set("spark.repl.class.outputDir", outputDir.getAbsolutePath)
// Only Spark1 requires to create http server, Spark2 removes HttpServer class.
startHttpServer(outputDir).foreach { case (server, uri) =>
sparkHttpServer = server
conf.set("spark.repl.class.uri", uri)
}
val settings = new Settings()
settings.embeddedDefaults(Thread.currentThread().getContextClassLoader())
settings.usejavacp.value = true
settings.classpath.value = getUserJars.mkString(File.pathSeparator)
Console.setOut(interpreterOutput)
sparkILoop = new SparkILoop(null, new JPrintWriter(Console.out, true))
setDeclaredField(sparkILoop, "settings", settings)
callMethod(sparkILoop, "createInterpreter")
sparkILoop.initializeSynchronous()
callMethod(sparkILoop, "postInitialization")
val reader = callMethod(sparkILoop,
"org$apache$spark$repl$SparkILoop$$chooseReader",
Array(settings.getClass), Array(settings)).asInstanceOf[InteractiveReader]
setDeclaredField(sparkILoop, "org$apache$spark$repl$SparkILoop$$in", reader)
scalaCompleter = reader.completion.completer()
createSparkContext()
}
override def close(): Unit = {
super.close()
if (sparkILoop != null) {
callMethod(sparkILoop, "org$apache$spark$repl$SparkILoop$$closeInterpreter")
}
}
protected override def interpret(code: String, context: InterpreterContext): InterpreterResult = {
if (context != null) {
interpreterOutput.setInterpreterOutput(context.out)
context.out.clear()
} else {
interpreterOutput.setInterpreterOutput(null)
}
Console.withOut(if (context != null) context.out else Console.out) {
interpreterOutput.ignoreLeadingNewLinesFromScalaReporter()
// add print("") at the end in case the last line is comment which lead to INCOMPLETE
val lines = code.split("\\n") ++ List("print(\"\")")
var incompleteCode = ""
var lastStatus: InterpreterResult.Code = null
for (line <- lines if !line.trim.isEmpty) {
val nextLine = if (incompleteCode != "") {
incompleteCode + "\n" + line
} else {
line
}
scalaInterpret(nextLine) match {
case scala.tools.nsc.interpreter.IR.Success =>
// continue the next line
incompleteCode = ""
lastStatus = InterpreterResult.Code.SUCCESS
case error@scala.tools.nsc.interpreter.IR.Error =>
return new InterpreterResult(InterpreterResult.Code.ERROR)
case scala.tools.nsc.interpreter.IR.Incomplete =>
// put this line into inCompleteCode for the next execution.
incompleteCode = incompleteCode + "\n" + line
lastStatus = InterpreterResult.Code.INCOMPLETE
}
}
// flush all output before returning result to frontend
Console.flush()
interpreterOutput.setInterpreterOutput(null)
return new InterpreterResult(lastStatus)
}
}
def scalaInterpret(code: String): scala.tools.nsc.interpreter.IR.Result =
sparkILoop.interpret(code)
protected def bind(name: String, tpe: String, value: Object, modifier: List[String]): Unit = {
sparkILoop.beQuietDuring {
sparkILoop.bind(name, tpe, value, modifier)
}
}
}

41
spark/scala-2.11/pom.xml Normal file
View file

@ -0,0 +1,41 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
~ Licensed to the Apache Software Foundation (ASF) under one or more
~ contributor license agreements. See the NOTICE file distributed with
~ this work for additional information regarding copyright ownership.
~ The ASF licenses this file to You under the Apache License, Version 2.0
~ (the "License"); you may not use this file except in compliance with
~ the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.apache.zeppelin</groupId>
<artifactId>spark-scala-2.11</artifactId>
<version>0.9.0-SNAPSHOT</version>
<packaging>jar</packaging>
<name>Spark Interpreter: Scala_2.11</name>
<parent>
<groupId>org.apache.zeppelin</groupId>
<artifactId>spark-scala-parent</artifactId>
<version>0.9.0-SNAPSHOT</version>
<relativePath>../spark-scala-parent/pom.xml</relativePath>
</parent>
<properties>
<scala.version>2.11.8</scala.version>
<scala.binary.version>2.11</scala.binary.version>
<scala.compile.version>${scala.version}</scala.compile.version>
</properties>
</project>

View file

@ -0,0 +1 @@
../spark-scala-parent

View file

@ -0,0 +1,50 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Direct log messages to stdout
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.Target=System.out
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d{ABSOLUTE} %5p %c:%L - %m%n
#log4j.appender.stdout.layout.ConversionPattern=
#%5p [%t] (%F:%L) - %m%n
#%-4r [%t] %-5p %c %x - %m%n
#
# Root logger option
log4j.rootLogger=INFO, stdout
#mute some noisy guys
log4j.logger.org.apache.hadoop.mapred=WARN
log4j.logger.org.apache.hadoop.hive.ql=WARN
log4j.logger.org.apache.hadoop.hive.metastore=WARN
log4j.logger.org.apache.haadoop.hive.service.HiveServer=WARN
log4j.logger.org.apache.zeppelin.scheduler=WARN
log4j.logger.org.quartz=WARN
log4j.logger.DataNucleus=WARN
log4j.logger.DataNucleus.MetaData=ERROR
log4j.logger.DataNucleus.Datastore=ERROR
# Log all JDBC parameters
log4j.logger.org.hibernate.type=ALL
log4j.logger.org.apache.zeppelin.interpreter=DEBUG
log4j.logger.org.apache.zeppelin.spark=DEBUG
log4j.logger.org.apache.spark.repl.Main=INFO

View file

@ -0,0 +1,140 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.spark
import java.io.{BufferedReader, File}
import java.net.URLClassLoader
import java.nio.file.{Files, Paths}
import org.apache.spark.SparkConf
import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion
import org.apache.zeppelin.interpreter.util.InterpreterOutputStream
import org.apache.zeppelin.interpreter.{InterpreterContext, InterpreterResult}
import org.slf4j.LoggerFactory
import org.slf4j.Logger
import scala.tools.nsc.Settings
import scala.tools.nsc.interpreter._
/**
* SparkInterpreter for scala-2.11
*/
class SparkScala211Interpreter(override val conf: SparkConf,
override val depFiles: java.util.List[String])
extends BaseSparkScalaInterpreter(conf, depFiles) {
lazy override val LOGGER: Logger = LoggerFactory.getLogger(getClass)
private var sparkILoop: ILoop = _
override val interpreterOutput = new InterpreterOutputStream(LOGGER)
override def open(): Unit = {
super.open()
if (conf.get("spark.master", "local") == "yarn-client") {
System.setProperty("SPARK_YARN_MODE", "true")
}
// Only Spark1 requires to create http server, Spark2 removes HttpServer class.
val rootDir = conf.get("spark.repl.classdir", System.getProperty("java.io.tmpdir"))
val outputDir = Files.createTempDirectory(Paths.get(rootDir), "spark").toFile
outputDir.deleteOnExit()
conf.set("spark.repl.class.outputDir", outputDir.getAbsolutePath)
startHttpServer(outputDir).foreach { case (server, uri) =>
sparkHttpServer = server
conf.set("spark.repl.class.uri", uri)
}
val settings = new Settings()
settings.processArguments(List("-Yrepl-class-based",
"-Yrepl-outdir", s"${outputDir.getAbsolutePath}"), true)
settings.embeddedDefaults(Thread.currentThread().getContextClassLoader())
settings.usejavacp.value = true
settings.classpath.value = getUserJars.mkString(File.pathSeparator)
val replOut = new JPrintWriter(interpreterOutput, true)
sparkILoop = new ILoop(None, replOut)
sparkILoop.settings = settings
sparkILoop.createInterpreter()
val in0 = getField(sparkILoop, "scala$tools$nsc$interpreter$ILoop$$in0").asInstanceOf[Option[BufferedReader]]
val reader = in0.fold(sparkILoop.chooseReader(settings))(r => SimpleReader(r, replOut, interactive = true))
sparkILoop.in = reader
sparkILoop.initializeSynchronous()
callMethod(sparkILoop, "scala$tools$nsc$interpreter$ILoop$$loopPostInit")
this.scalaCompleter = reader.completion.completer()
createSparkContext()
}
protected def bind(name: String, tpe: String, value: Object, modifier: List[String]): Unit = {
sparkILoop.beQuietDuring {
sparkILoop.bind(name, tpe, value, modifier)
}
}
override def close(): Unit = {
super.close()
if (sparkILoop != null) {
sparkILoop.closeInterpreter()
}
}
protected override def interpret(code: String, context: InterpreterContext): InterpreterResult = {
if (context != null) {
interpreterOutput.setInterpreterOutput(context.out)
context.out.clear()
}
Console.withOut(if (context != null) context.out else Console.out) {
interpreterOutput.ignoreLeadingNewLinesFromScalaReporter()
// add print("") at the end in case the last line is comment which lead to INCOMPLETE
val lines = code.split("\\n") ++ List("print(\"\")")
var incompleteCode = ""
var lastStatus: InterpreterResult.Code = null
for (line <- lines if !line.trim.isEmpty) {
val nextLine = if (incompleteCode != "") {
incompleteCode + "\n" + line
} else {
line
}
scalaInterpret(nextLine) match {
case scala.tools.nsc.interpreter.IR.Success =>
// continue the next line
incompleteCode = ""
lastStatus = InterpreterResult.Code.SUCCESS
case error@scala.tools.nsc.interpreter.IR.Error =>
return new InterpreterResult(InterpreterResult.Code.ERROR)
case scala.tools.nsc.interpreter.IR.Incomplete =>
// put this line into inCompleteCode for the next execution.
incompleteCode = incompleteCode + "\n" + line
lastStatus = InterpreterResult.Code.INCOMPLETE
}
}
// flush all output before returning result to frontend
Console.flush()
interpreterOutput.setInterpreterOutput(null)
return new InterpreterResult(lastStatus)
}
}
def scalaInterpret(code: String): scala.tools.nsc.interpreter.IR.Result =
sparkILoop.interpret(code)
}

View file

@ -21,7 +21,7 @@
<modelVersion>4.0.0</modelVersion>
<parent>
<artifactId>zeppelin</artifactId>
<artifactId>spark-parent</artifactId>
<groupId>org.apache.zeppelin</groupId>
<version>0.9.0-SNAPSHOT</version>
<relativePath>..</relativePath>
@ -44,7 +44,6 @@
instead of changing spark.version in this section.
-->
<spark.version>1.4.1</spark.version>
<hadoop.version>2.3.0</hadoop.version>
<yarn.version>${hadoop.version}</yarn.version>
<avro.version>1.7.7</avro.version>
@ -62,7 +61,6 @@
<spark.bin.download.url>
http://d3kbcqa49mib13.cloudfront.net/${spark.archive}-bin-without-hadoop.tgz
</spark.bin.download.url>
<spark.py4j.version>0.8.2.1</spark.py4j.version>
<!--plugin versions-->
<plugin.shade.version>2.3</plugin.shade.version>
@ -359,480 +357,6 @@
</dependencies>
<profiles>
<profile>
<id>spark-1.1</id>
<dependencies>
</dependencies>
<properties>
<spark.version>1.1.1</spark.version>
<akka.version>2.2.3-shaded-protobuf</akka.version>
</properties>
</profile>
<profile>
<id>cassandra-spark-1.1</id>
<dependencies>
<dependency>
<groupId>com.datastax.spark</groupId>
<artifactId>spark-cassandra-connector_${scala.binary.version}</artifactId>
<version>1.1.1</version>
<exclusions>
<exclusion>
<groupId>org.joda</groupId>
<artifactId>joda-convert</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
<properties>
<spark.version>1.1.1</spark.version>
<akka.version>2.2.3-shaded-protobuf</akka.version>
</properties>
</profile>
<profile>
<id>spark-1.2</id>
<dependencies>
</dependencies>
<properties>
<spark.version>1.2.1</spark.version>
</properties>
</profile>
<profile>
<id>cassandra-spark-1.2</id>
<properties>
<spark.version>1.2.1</spark.version>
</properties>
<dependencies>
<dependency>
<groupId>com.datastax.spark</groupId>
<artifactId>spark-cassandra-connector_${scala.binary.version}</artifactId>
<version>1.2.1</version>
<exclusions>
<exclusion>
<groupId>org.joda</groupId>
<artifactId>joda-convert</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
</profile>
<profile>
<id>spark-1.3</id>
<properties>
<spark.version>1.3.1</spark.version>
</properties>
<dependencies>
</dependencies>
</profile>
<profile>
<id>cassandra-spark-1.3</id>
<properties>
<spark.version>1.3.0</spark.version>
</properties>
<dependencies>
<dependency>
<groupId>com.datastax.spark</groupId>
<artifactId>spark-cassandra-connector_${scala.binary.version}</artifactId>
<version>1.3.1</version>
<exclusions>
<exclusion>
<groupId>org.joda</groupId>
<artifactId>joda-convert</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
</profile>
<profile>
<id>spark-1.4</id>
<properties>
<spark.version>1.4.1</spark.version>
</properties>
<dependencies>
</dependencies>
</profile>
<profile>
<id>cassandra-spark-1.4</id>
<properties>
<spark.version>1.4.1</spark.version>
</properties>
<dependencies>
<dependency>
<groupId>com.datastax.spark</groupId>
<artifactId>spark-cassandra-connector_${scala.binary.version}</artifactId>
<version>1.4.0</version>
<exclusions>
<exclusion>
<groupId>org.joda</groupId>
<artifactId>joda-convert</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
</profile>
<profile>
<id>spark-1.5</id>
<properties>
<spark.version>1.5.2</spark.version>
<akka.group>com.typesafe.akka</akka.group>
<akka.version>2.3.11</akka.version>
<protobuf.version>2.5.0</protobuf.version>
</properties>
<dependencies>
</dependencies>
</profile>
<profile>
<id>cassandra-spark-1.5</id>
<properties>
<spark.version>1.5.1</spark.version>
<akka.group>com.typesafe.akka</akka.group>
<akka.version>2.3.11</akka.version>
<protobuf.version>2.5.0</protobuf.version>
<guava.version>16.0.1</guava.version>
</properties>
<dependencies>
<dependency>
<groupId>com.datastax.spark</groupId>
<artifactId>spark-cassandra-connector_${scala.binary.version}</artifactId>
<version>1.5.0</version>
<exclusions>
<exclusion>
<groupId>org.joda</groupId>
<artifactId>joda-convert</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
</profile>
<profile>
<id>spark-1.6</id>
<properties>
<spark.version>1.6.3</spark.version>
<spark.py4j.version>0.9</spark.py4j.version>
<akka.group>com.typesafe.akka</akka.group>
<akka.version>2.3.11</akka.version>
<protobuf.version>2.5.0</protobuf.version>
</properties>
</profile>
<profile>
<id>spark-2.0</id>
<properties>
<spark.version>2.0.2</spark.version>
<protobuf.version>2.5.0</protobuf.version>
<spark.py4j.version>0.10.3</spark.py4j.version>
</properties>
</profile>
<profile>
<id>spark-2.1</id>
<properties>
<spark.version>2.1.0</spark.version>
<protobuf.version>2.5.0</protobuf.version>
<spark.py4j.version>0.10.4</spark.py4j.version>
<scala.version>2.11.8</scala.version>
</properties>
</profile>
<profile>
<id>spark-2.2</id>
<activation>
<activeByDefault>true</activeByDefault>
</activation>
<properties>
<spark.version>2.2.0</spark.version>
<protobuf.version>2.5.0</protobuf.version>
<spark.py4j.version>0.10.4</spark.py4j.version>
</properties>
</profile>
<profile>
<id>hadoop-0.23</id>
<!-- SPARK-1121: Adds an explicit dependency on Avro to work around a
Hadoop 0.23.X issue -->
<dependencies>
<dependency>
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
</dependency>
</dependencies>
<properties>
<hadoop.version>0.23.10</hadoop.version>
</properties>
</profile>
<profile>
<id>hadoop-1</id>
<properties>
<hadoop.version>1.0.4</hadoop.version>
<avro.mapred.classifier>hadoop1</avro.mapred.classifier>
<codehaus.jackson.version>1.8.8</codehaus.jackson.version>
<akka.group>org.spark-project.akka</akka.group>
</properties>
</profile>
<profile>
<id>hadoop-2.2</id>
<properties>
<hadoop.version>2.2.0</hadoop.version>
<protobuf.version>2.5.0</protobuf.version>
<avro.mapred.classifier>hadoop2</avro.mapred.classifier>
</properties>
</profile>
<profile>
<id>hadoop-2.3</id>
<properties>
<hadoop.version>2.3.0</hadoop.version>
<protobuf.version>2.5.0</protobuf.version>
<jets3t.version>0.9.3</jets3t.version>
<avro.mapred.classifier>hadoop2</avro.mapred.classifier>
</properties>
</profile>
<profile>
<id>hadoop-2.4</id>
<properties>
<hadoop.version>2.4.0</hadoop.version>
<protobuf.version>2.5.0</protobuf.version>
<jets3t.version>0.9.3</jets3t.version>
<avro.mapred.classifier>hadoop2</avro.mapred.classifier>
</properties>
</profile>
<profile>
<id>hadoop-2.6</id>
<properties>
<hadoop.version>2.6.0</hadoop.version>
<protobuf.version>2.5.0</protobuf.version>
<jets3t.version>0.9.3</jets3t.version>
<avro.mapred.classifier>hadoop2</avro.mapred.classifier>
</properties>
</profile>
<profile>
<id>hadoop-2.7</id>
<properties>
<hadoop.version>2.7.2</hadoop.version>
<protobuf.version>2.5.0</protobuf.version>
<jets3t.version>0.9.0</jets3t.version>
<avro.mapred.classifier>hadoop2</avro.mapred.classifier>
</properties>
</profile>
<profile>
<id>mapr3</id>
<activation>
<activeByDefault>false</activeByDefault>
</activation>
<properties>
<hadoop.version>1.0.3-mapr-3.0.3</hadoop.version>
<yarn.version>2.3.0-mapr-4.0.0-FCS</yarn.version>
<jets3t.version>0.7.1</jets3t.version>
</properties>
<repositories>
<repository>
<id>mapr-releases</id>
<url>http://repository.mapr.com/maven/</url>
<snapshots>
<enabled>false</enabled>
</snapshots>
<releases>
<enabled>true</enabled>
</releases>
</repository>
</repositories>
</profile>
<profile>
<id>mapr40</id>
<activation>
<activeByDefault>false</activeByDefault>
</activation>
<properties>
<hadoop.version>2.4.1-mapr-1503</hadoop.version>
<yarn.version>2.4.1-mapr-1503</yarn.version>
<jets3t.version>0.9.3</jets3t.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.curator</groupId>
<artifactId>curator-recipes</artifactId>
<version>2.4.0</version>
<exclusions>
<exclusion>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
<version>3.4.5-mapr-1503</version>
</dependency>
</dependencies>
<repositories>
<repository>
<id>mapr-releases</id>
<url>http://repository.mapr.com/maven/</url>
<snapshots>
<enabled>false</enabled>
</snapshots>
<releases>
<enabled>true</enabled>
</releases>
</repository>
</repositories>
</profile>
<profile>
<id>mapr41</id>
<activation>
<activeByDefault>false</activeByDefault>
</activation>
<properties>
<hadoop.version>2.5.1-mapr-1503</hadoop.version>
<yarn.version>2.5.1-mapr-1503</yarn.version>
<jets3t.version>0.7.1</jets3t.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.curator</groupId>
<artifactId>curator-recipes</artifactId>
<version>2.4.0</version>
<exclusions>
<exclusion>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
<version>3.4.5-mapr-1503</version>
</dependency>
</dependencies>
<repositories>
<repository>
<id>mapr-releases</id>
<url>http://repository.mapr.com/maven/</url>
<snapshots>
<enabled>false</enabled>
</snapshots>
<releases>
<enabled>true</enabled>
</releases>
</repository>
</repositories>
</profile>
<profile>
<id>mapr50</id>
<activation>
<activeByDefault>false</activeByDefault>
</activation>
<properties>
<hadoop.version>2.7.0-mapr-1506</hadoop.version>
<yarn.version>2.7.0-mapr-1506</yarn.version>
<jets3t.version>0.9.3</jets3t.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.curator</groupId>
<artifactId>curator-recipes</artifactId>
<version>2.4.0</version>
<exclusions>
<exclusion>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
<version>3.4.5-mapr-1503</version>
</dependency>
</dependencies>
<repositories>
<repository>
<id>mapr-releases</id>
<url>http://repository.mapr.com/maven/</url>
<snapshots>
<enabled>false</enabled>
</snapshots>
<releases>
<enabled>true</enabled>
</releases>
</repository>
</repositories>
</profile>
<profile>
<id>mapr51</id>
<activation>
<activeByDefault>false</activeByDefault>
</activation>
<properties>
<hadoop.version>2.7.0-mapr-1602</hadoop.version>
<yarn.version>2.7.0-mapr-1602</yarn.version>
<jets3t.version>0.9.3</jets3t.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.curator</groupId>
<artifactId>curator-recipes</artifactId>
<version>2.4.0</version>
<exclusions>
<exclusion>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
<version>3.4.5-mapr-1503</version>
</dependency>
</dependencies>
<repositories>
<repository>
<id>mapr-releases</id>
<url>http://repository.mapr.com/maven/</url>
<snapshots>
<enabled>false</enabled>
</snapshots>
<releases>
<enabled>true</enabled>
</releases>
</repository>
</repositories>
</profile>
</profiles>
<build>
<plugins>
<plugin>
@ -900,13 +424,24 @@
<artifactId>maven-dependency-plugin</artifactId>
<executions>
<execution>
<id>copy-dependencies</id>
<id>copy-interpreter-dependencies</id>
<phase>package</phase>
<goals>
<goal>copy-dependencies</goal>
</goals>
<configuration>
<outputDirectory>${project.build.directory}/../../interpreter/spark/dep</outputDirectory>
<skip>true</skip>
</configuration>
</execution>
<execution>
<id>copy-spark-interpreter-dependencies</id>
<phase>package</phase>
<goals>
<goal>copy-dependencies</goal>
</goals>
<configuration>
<outputDirectory>${project.build.directory}/../../../interpreter/spark/dep</outputDirectory>
<overWriteReleases>false</overWriteReleases>
<overWriteSnapshots>false</overWriteSnapshots>
<overWriteIfNewer>true</overWriteIfNewer>
@ -914,12 +449,13 @@
</configuration>
</execution>
<execution>
<id>copy-artifact</id>
<phase>package</phase>
<goals>
<goal>copy</goal>
</goals>
<configuration>
<outputDirectory>${project.build.directory}/../../interpreter/spark/dep</outputDirectory>
<outputDirectory>${project.build.directory}/../../../interpreter/spark/dep</outputDirectory>
<overWriteReleases>false</overWriteReleases>
<overWriteSnapshots>false</overWriteSnapshots>
<overWriteIfNewer>true</overWriteIfNewer>
@ -936,6 +472,19 @@
</executions>
</plugin>
<plugin>
<artifactId>maven-resources-plugin</artifactId>
<executions>
<execution>
<id>copy-interpreter-setting</id>
<phase>none</phase>
<configuration>
<skip>true</skip>
</configuration>
</execution>
</executions>
</plugin>
<!-- include pyspark by default -->
<plugin>
<groupId>com.googlecode.maven-download-plugin</groupId>
@ -981,10 +530,10 @@
</goals>
<configuration>
<target>
<delete dir="../interpreter/spark/pyspark"/>
<copy todir="../interpreter/spark/pyspark"
file="${project.build.directory}/${spark.archive}/python/lib/py4j-${spark.py4j.version}-src.zip"/>
<zip destfile="${project.build.directory}/../../interpreter/spark/pyspark/pyspark.zip"
<delete dir="../../interpreter/spark/pyspark"/>
<copy todir="../../interpreter/spark/pyspark"
file="${project.build.directory}/${spark.archive}/python/lib/py4j-${py4j.version}-src.zip"/>
<zip destfile="${project.build.directory}/../../../interpreter/spark/pyspark/pyspark.zip"
basedir="${project.build.directory}/${spark.archive}/python"
includes="pyspark/*.py,pyspark/**/*.py"/>
</target>
@ -1025,7 +574,7 @@
<goal>copy-resources</goal>
</goals>
<configuration>
<outputDirectory>${project.build.directory}/../../interpreter/spark/R/lib</outputDirectory>
<outputDirectory>${project.build.directory}/../../../interpreter/spark/R/lib</outputDirectory>
<resources>
<resource>
<directory>

View file

@ -0,0 +1,172 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
~ Licensed to the Apache Software Foundation (ASF) under one or more
~ contributor license agreements. See the NOTICE file distributed with
~ this work for additional information regarding copyright ownership.
~ The ASF licenses this file to You under the Apache License, Version 2.0
~ (the "License"); you may not use this file except in compliance with
~ the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>spark-parent</artifactId>
<groupId>org.apache.zeppelin</groupId>
<version>0.9.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
<modelVersion>4.0.0</modelVersion>
<groupId>org.apache.zeppelin</groupId>
<artifactId>spark-scala-parent</artifactId>
<version>0.9.0-SNAPSHOT</version>
<packaging>pom</packaging>
<dependencies>
<dependency>
<groupId>org.apache.zeppelin</groupId>
<artifactId>zeppelin-interpreter</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-repl_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-hive_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-compiler</artifactId>
<version>${scala.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>${scala.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-reflect</artifactId>
<version>${scala.version}</version>
<scope>provided</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>build-helper-maven-plugin</artifactId>
<executions>
<execution>
<id>add-scala-sources</id>
<phase>generate-sources</phase>
<goals>
<goal>add-source</goal>
</goals>
<configuration>
<sources>
<source>${project.basedir}/../spark-scala-parent/src/main/scala</source>
</sources>
</configuration>
</execution>
<execution>
<id>add-scala-test-sources</id>
<phase>generate-test-sources</phase>
<goals>
<goal>add-test-source</goal>
</goals>
<configuration>
<sources>
<source>${project.basedir}/../spark-scala-parent/src/test/scala</source>
</sources>
</configuration>
</execution>
<execution>
<id>add-resource</id>
<phase>generate-resources</phase>
<goals>
<goal>add-resource</goal>
</goals>
<configuration>
<resources>
<resource>
<directory>${project.basedir}/../spark-scala-parent/src/main/resources</directory>
</resource>
</resources>
</configuration>
</execution>
<execution>
<id>add-test-resource</id>
<phase>generate-test-resources</phase>
<goals>
<goal>add-test-resource</goal>
</goals>
<configuration>
<resources>
<resource>
<directory>${project.basedir}/../spark-scala-parent/src/test/resources</directory>
</resource>
</resources>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-dependency-plugin</artifactId>
<configuration>
<skip>true</skip>
</configuration>
</plugin>
<plugin>
<artifactId>maven-resources-plugin</artifactId>
<executions>
<execution>
<id>copy-interpreter-setting</id>
<phase>none</phase>
<configuration>
<skip>true</skip>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View file

@ -0,0 +1,338 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.spark
import java.io.File
import org.apache.spark.sql.SQLContext
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion
import org.apache.zeppelin.interpreter.util.InterpreterOutputStream
import org.apache.zeppelin.interpreter.{InterpreterContext, InterpreterResult}
import org.slf4j.{Logger, LoggerFactory}
import scala.collection.JavaConverters._
import scala.tools.nsc.interpreter.Completion.ScalaCompleter
import scala.util.control.NonFatal
/**
* Base class for different scala versions of SparkInterpreter. It should be
* binary compatible between multiple scala versions.
* @param conf
* @param depFiles
*/
abstract class BaseSparkScalaInterpreter(val conf: SparkConf,
val depFiles: java.util.List[String]) {
protected lazy val LOGGER: Logger = LoggerFactory.getLogger(getClass)
private val isTest = conf.getBoolean("zeppelin.spark.test", false)
protected var sc: SparkContext = _
protected var sqlContext: SQLContext = _
protected var sparkSession: Object = _
protected var sparkHttpServer: Object = _
protected var sparkUrl: String = _
protected var scalaCompleter: ScalaCompleter = _
protected val interpreterOutput: InterpreterOutputStream
protected def open(): Unit = {
/* Required for scoped mode.
* In scoped mode multiple scala compiler (repl) generates class in the same directory.
* Class names is not randomly generated and look like '$line12.$read$$iw$$iw'
* Therefore it's possible to generated class conflict(overwrite) with other repl generated
* class.
*
* To prevent generated class name conflict,
* change prefix of generated class name from each scala compiler (repl) instance.
*
* In Spark 2.x, REPL generated wrapper class name should compatible with the pattern
* ^(\$line(?:\d+)\.\$read)(?:\$\$iw)+$
*
* As hashCode() can return a negative integer value and the minus character '-' is invalid
* in a package name we change it to a numeric value '0' which still conforms to the regexp.
*
*/
System.setProperty("scala.repl.name.line", ("$line" + this.hashCode).replace('-', '0'))
}
protected def interpret(code: String, context: InterpreterContext): InterpreterResult
protected def interpret(code: String): InterpreterResult = interpret(code, null)
protected def scalaInterpret(code: String): scala.tools.nsc.interpreter.IR.Result
protected def completion(buf: String,
cursor: Int,
context: InterpreterContext): java.util.List[InterpreterCompletion] = {
val completions = scalaCompleter.complete(buf, cursor).candidates
.map(e => new InterpreterCompletion(e, e, null))
scala.collection.JavaConversions.seqAsJavaList(completions)
}
protected def getProgress(jobGroup: String, context: InterpreterContext): Int = {
val jobIds = sc.statusTracker.getJobIdsForGroup(jobGroup)
val jobs = jobIds.flatMap { id => sc.statusTracker.getJobInfo(id) }
val stages = jobs.flatMap { job =>
job.stageIds().flatMap(sc.statusTracker.getStageInfo)
}
val taskCount = stages.map(_.numTasks).sum
val completedTaskCount = stages.map(_.numCompletedTasks).sum
if (taskCount == 0) {
0
} else {
(100 * completedTaskCount.toDouble / taskCount).toInt
}
}
protected def bind(name: String, tpe: String, value: Object, modifier: List[String]): Unit
// for use in java side
protected def bind(name: String,
tpe: String,
value: Object,
modifier: java.util.List[String]): Unit =
bind(name, tpe, value, modifier.asScala.toList)
protected def close(): Unit = {
if (sc != null) {
sc.stop()
}
if (sparkHttpServer != null) {
sparkHttpServer.getClass.getMethod("stop").invoke(sparkHttpServer)
}
sc = null
sqlContext = null
if (sparkSession != null) {
sparkSession.getClass.getMethod("stop").invoke(sparkSession)
sparkSession = null
}
}
protected def createSparkContext(): Unit = {
if (isSparkSessionPresent()) {
spark2CreateContext()
} else {
spark1CreateContext()
}
}
private def spark1CreateContext(): Unit = {
this.sc = SparkContext.getOrCreate(conf)
if (!isTest) {
interpreterOutput.write("Created SparkContext.\n".getBytes())
}
getUserFiles().foreach(file => sc.addFile(file))
sc.getClass.getMethod("ui").invoke(sc).asInstanceOf[Option[_]] match {
case Some(webui) =>
sparkUrl = webui.getClass.getMethod("appUIAddress").invoke(webui).asInstanceOf[String]
case None =>
}
val hiveSiteExisted: Boolean =
Thread.currentThread().getContextClassLoader.getResource("hive-site.xml") != null
val hiveEnabled = conf.getBoolean("spark.useHiveContext", false)
if (hiveEnabled && hiveSiteExisted) {
sqlContext = Class.forName("org.apache.spark.sql.hive.HiveContext")
.getConstructor(classOf[SparkContext]).newInstance(sc).asInstanceOf[SQLContext]
if (!isTest) {
interpreterOutput.write("Created sql context (with Hive support).\n".getBytes())
}
} else {
if (hiveEnabled && !hiveSiteExisted && !isTest) {
interpreterOutput.write(("spark.useHiveContext is set as true but no hive-site.xml" +
" is found in classpath, so zeppelin will fallback to SQLContext.\n").getBytes())
}
sqlContext = Class.forName("org.apache.spark.sql.SQLContext")
.getConstructor(classOf[SparkContext]).newInstance(sc).asInstanceOf[SQLContext]
if (!isTest) {
interpreterOutput.write("Created sql context.\n".getBytes())
}
}
bind("sc", "org.apache.spark.SparkContext", sc, List("""@transient"""))
bind("sqlContext", sqlContext.getClass.getCanonicalName, sqlContext, List("""@transient"""))
interpret("import org.apache.spark.SparkContext._")
interpret("import sqlContext.implicits._")
interpret("import sqlContext.sql")
interpret("import org.apache.spark.sql.functions._")
}
private def spark2CreateContext(): Unit = {
val sparkClz = Class.forName("org.apache.spark.sql.SparkSession$")
val sparkObj = sparkClz.getField("MODULE$").get(null)
val builderMethod = sparkClz.getMethod("builder")
val builder = builderMethod.invoke(sparkObj)
builder.getClass.getMethod("config", classOf[SparkConf]).invoke(builder, conf)
if (conf.get("spark.sql.catalogImplementation", "in-memory").toLowerCase == "hive"
|| conf.get("spark.useHiveContext", "false").toLowerCase == "true") {
val hiveSiteExisted: Boolean =
Thread.currentThread().getContextClassLoader.getResource("hive-site.xml") != null
val hiveClassesPresent =
sparkClz.getMethod("hiveClassesArePresent").invoke(sparkObj).asInstanceOf[Boolean]
if (hiveSiteExisted && hiveClassesPresent) {
builder.getClass.getMethod("enableHiveSupport").invoke(builder)
sparkSession = builder.getClass.getMethod("getOrCreate").invoke(builder)
if (!isTest) {
interpreterOutput.write("Created Spark session (with Hive support).\n".getBytes())
}
} else {
if (!hiveClassesPresent && !isTest) {
interpreterOutput.write(
"Hive support can not be enabled because spark is not built with hive\n".getBytes)
}
if (!hiveSiteExisted && !isTest) {
interpreterOutput.write(
"Hive support can not be enabled because no hive-site.xml found\n".getBytes)
}
sparkSession = builder.getClass.getMethod("getOrCreate").invoke(builder)
if (!isTest) {
interpreterOutput.write("Created Spark session.\n".getBytes())
}
}
} else {
sparkSession = builder.getClass.getMethod("getOrCreate").invoke(builder)
if (!isTest) {
interpreterOutput.write("Created Spark session.\n".getBytes())
}
}
sc = sparkSession.getClass.getMethod("sparkContext").invoke(sparkSession)
.asInstanceOf[SparkContext]
getUserFiles().foreach(file => sc.addFile(file))
sqlContext = sparkSession.getClass.getMethod("sqlContext").invoke(sparkSession)
.asInstanceOf[SQLContext]
sc.getClass.getMethod("uiWebUrl").invoke(sc).asInstanceOf[Option[String]] match {
case Some(url) => sparkUrl = url
case None =>
}
bind("spark", sparkSession.getClass.getCanonicalName, sparkSession, List("""@transient"""))
bind("sc", "org.apache.spark.SparkContext", sc, List("""@transient"""))
bind("sqlContext", "org.apache.spark.sql.SQLContext", sqlContext, List("""@transient"""))
interpret("import org.apache.spark.SparkContext._")
interpret("import spark.implicits._")
interpret("import spark.sql")
interpret("import org.apache.spark.sql.functions._")
}
private def isSparkSessionPresent(): Boolean = {
try {
Class.forName("org.apache.spark.sql.SparkSession")
true
} catch {
case _: ClassNotFoundException | _: NoClassDefFoundError => false
}
}
protected def getField(obj: Object, name: String): Object = {
val field = obj.getClass.getField(name)
field.setAccessible(true)
field.get(obj)
}
protected def getDeclareField(obj: Object, name: String): Object = {
val field = obj.getClass.getDeclaredField(name)
field.setAccessible(true)
field.get(obj)
}
protected def setDeclaredField(obj: Object, name: String, value: Object): Unit = {
val field = obj.getClass.getDeclaredField(name)
field.setAccessible(true)
field.set(obj, value)
}
protected def callMethod(obj: Object, name: String): Object = {
callMethod(obj, name, Array.empty[Class[_]], Array.empty[Object])
}
protected def callMethod(obj: Object, name: String,
parameterTypes: Array[Class[_]],
parameters: Array[Object]): Object = {
val method = obj.getClass.getMethod(name, parameterTypes: _ *)
method.setAccessible(true)
method.invoke(obj, parameters: _ *)
}
protected def startHttpServer(outputDir: File): Option[(Object, String)] = {
try {
val httpServerClass = Class.forName("org.apache.spark.HttpServer")
val securityManager = {
val constructor = Class.forName("org.apache.spark.SecurityManager")
.getConstructor(classOf[SparkConf])
constructor.setAccessible(true)
constructor.newInstance(conf).asInstanceOf[Object]
}
val httpServerConstructor = httpServerClass
.getConstructor(classOf[SparkConf],
classOf[File],
Class.forName("org.apache.spark.SecurityManager"),
classOf[Int],
classOf[String])
httpServerConstructor.setAccessible(true)
// Create Http Server
val port = conf.getInt("spark.replClassServer.port", 0)
val server = httpServerConstructor
.newInstance(conf, outputDir, securityManager, new Integer(port), "HTTP server")
.asInstanceOf[Object]
// Start Http Server
val startMethod = server.getClass.getMethod("start")
startMethod.setAccessible(true)
startMethod.invoke(server)
// Get uri of this Http Server
val uriMethod = server.getClass.getMethod("uri")
uriMethod.setAccessible(true)
val uri = uriMethod.invoke(server).asInstanceOf[String]
Some((server, uri))
} catch {
// Spark 2.0+ removed HttpServer, so return null instead.
case NonFatal(e) =>
None
}
}
protected def getUserJars(): Seq[String] = {
val sparkJars = conf.getOption("spark.jars").map(_.split(","))
.map(_.filter(_.nonEmpty)).toSeq.flatten
val depJars = depFiles.asScala.filter(_.endsWith(".jar"))
val result = sparkJars ++ depJars
conf.set("spark.jars", result.mkString(","))
result
}
protected def getUserFiles(): Seq[String] = {
depFiles.asScala.filter(!_.endsWith(".jar"))
}
}

View file

@ -1,51 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.spark.dep;
import static org.junit.Assert.assertEquals;
import org.junit.Test;
public class SparkDependencyResolverTest {
@Test
public void testInferScalaVersion() {
String [] version = scala.util.Properties.versionNumberString().split("[.]");
String scalaVersion = version[0] + "." + version[1];
assertEquals("groupId:artifactId:version",
SparkDependencyResolver.inferScalaVersion("groupId:artifactId:version"));
assertEquals("groupId:artifactId_" + scalaVersion + ":version",
SparkDependencyResolver.inferScalaVersion("groupId::artifactId:version"));
assertEquals("groupId:artifactId:version::test",
SparkDependencyResolver.inferScalaVersion("groupId:artifactId:version::test"));
assertEquals("*",
SparkDependencyResolver.inferScalaVersion("*"));
assertEquals("groupId:*",
SparkDependencyResolver.inferScalaVersion("groupId:*"));
assertEquals("groupId:artifactId*",
SparkDependencyResolver.inferScalaVersion("groupId:artifactId*"));
assertEquals("groupId:artifactId_" + scalaVersion,
SparkDependencyResolver.inferScalaVersion("groupId::artifactId"));
assertEquals("groupId:artifactId_" + scalaVersion + "*",
SparkDependencyResolver.inferScalaVersion("groupId::artifactId*"));
assertEquals("groupId:artifactId_" + scalaVersion + ":*",
SparkDependencyResolver.inferScalaVersion("groupId::artifactId:*"));
}
}

View file

@ -44,6 +44,6 @@ if [[ -n "$PYTHON" ]] ; then
conda update -q conda
conda info -a
conda config --add channels conda-forge
conda install -q matplotlib pandasql ipython=5.4.1 jupyter_client ipykernel matplotlib bokeh=0.12.6
pip install -q grpcio ggplot
conda install -q matplotlib pandasql ipython=5.4.1 jupyter_client ipykernel matplotlib bokeh=0.12.10
pip install -q grpcio ggplot bkzep==0.4.0
fi

View file

@ -27,7 +27,7 @@
</parent>
<groupId>org.apache.zeppelin</groupId>
<artifactId>zeppelin-display_2.10</artifactId>
<artifactId>zeppelin-display</artifactId>
<packaging>jar</packaging>
<version>0.9.0-SNAPSHOT</version>
<name>Zeppelin: Display system apis</name>
@ -45,18 +45,21 @@
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>${scala.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-compiler</artifactId>
<version>${scala.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scalap</artifactId>
<version>${scala.version}</version>
<scope>provided</scope>
</dependency>
</dependencies>
</dependencyManagement>
@ -84,13 +87,6 @@
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>${scala.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.scalatest</groupId>
<artifactId>scalatest_${scala.binary.version}</artifactId>

View file

@ -184,7 +184,7 @@ public class SparkParagraphIT extends AbstractZeppelinIT {
}
}
@Test
// @Test
public void testDep() throws Exception {
try {
// restart spark interpreter before running %dep

View file

@ -237,6 +237,8 @@ public abstract class BaseZeppelinContext {
if (isSupportedObject(o)) {
interpreterContext.out.write(showData(o));
} else {
interpreterContext.out.write("ZeppelinContext doesn't support to show type: "
+ o.getClass().getCanonicalName() + "\n");
interpreterContext.out.write(o.toString());
}
} catch (IOException e) {

View file

@ -96,10 +96,10 @@ import java.util.concurrent.ConcurrentMap;
* Entry point for Interpreter process.
* Accepting thrift connections from ZeppelinServer.
*/
public class RemoteInterpreterServer
extends Thread
public class RemoteInterpreterServer extends Thread
implements RemoteInterpreterService.Iface, AngularObjectRegistryListener {
Logger logger = LoggerFactory.getLogger(RemoteInterpreterServer.class);
private static Logger logger = LoggerFactory.getLogger(RemoteInterpreterServer.class);
InterpreterGroup interpreterGroup;
AngularObjectRegistry angularObjectRegistry;
@ -255,6 +255,9 @@ public class RemoteInterpreterServer
public static void main(String[] args)
throws TTransportException, InterruptedException, IOException {
Class klass = RemoteInterpreterServer.class;
URL location = klass.getResource('/' + klass.getName().replace('.', '/') + ".class");
logger.info("URL:" + location);
String callbackHost = null;
int port = Constants.ZEPPELIN_INTERPRETER_DEFAUlT_PORT;
String portRange = ":";

View file

@ -261,6 +261,12 @@
<artifactId>scalatest_${scala.binary.version}</artifactId>
<version>${scalatest.version}</version>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>org.scala-lang.modules</groupId>
<artifactId>scala-xml_${scala.binary.version}</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>

View file

@ -265,21 +265,21 @@ public abstract class AbstractTestRestApi {
// set spark master and other properties
sparkProperties.put("master",
new InterpreterProperty("master", "local[2]", InterpreterPropertyType.TEXTAREA.getValue()));
sparkProperties.put("spark.master",
new InterpreterProperty("spark.master", "local[2]", InterpreterPropertyType.TEXTAREA.getValue()));
sparkProperties.put("spark.cores.max",
new InterpreterProperty("spark.cores.max", "2", InterpreterPropertyType.TEXTAREA.getValue()));
sparkProperties.put("zeppelin.spark.useHiveContext",
new InterpreterProperty("zeppelin.spark.useHiveContext", false, InterpreterPropertyType.CHECKBOX.getValue()));
// set spark home for pyspark
sparkProperties.put("spark.home",
new InterpreterProperty("spark.home", getSparkHome(), InterpreterPropertyType.TEXTAREA.getValue()));
sparkProperties.put("zeppelin.pyspark.useIPython", new InterpreterProperty("zeppelin.pyspark.useIPython", "false", InterpreterPropertyType.TEXTAREA.getValue()));
sparkProperties.put("zeppelin.spark.test", new InterpreterProperty("zeppelin.spark.test", "true", InterpreterPropertyType.TEXTAREA.getValue()));
sparkIntpSetting.setProperties(sparkProperties);
pySpark = true;
sparkR = true;
ZeppelinServer.notebook.getInterpreterSettingManager().restart(sparkIntpSetting.getId());
} else {
String sparkHome = getSparkHome();
LOG.info("SPARK HOME detected " + sparkHome);
if (sparkHome != null) {
if (System.getenv("SPARK_MASTER") != null) {
sparkProperties.put("master",
@ -288,14 +288,14 @@ public abstract class AbstractTestRestApi {
sparkProperties.put("master",
new InterpreterProperty("master", "local[2]", InterpreterPropertyType.TEXTAREA.getValue()));
}
sparkProperties.put("spark.master",
new InterpreterProperty("spark.master", "local[2]", InterpreterPropertyType.TEXTAREA.getValue()));
sparkProperties.put("spark.cores.max",
new InterpreterProperty("spark.cores.max", "2", InterpreterPropertyType.TEXTAREA.getValue()));
// set spark home for pyspark
sparkProperties.put("spark.home",
new InterpreterProperty("spark.home", sparkHome, InterpreterPropertyType.TEXTAREA.getValue()));
sparkProperties.put("zeppelin.spark.useHiveContext",
new InterpreterProperty("zeppelin.spark.useHiveContext", false, InterpreterPropertyType.CHECKBOX.getValue()));
sparkProperties.put("zeppelin.pyspark.useIPython", new InterpreterProperty("zeppelin.pyspark.useIPython", "false", InterpreterPropertyType.TEXTAREA.getValue()));
sparkProperties.put("zeppelin.spark.test", new InterpreterProperty("zeppelin.spark.test", "true", InterpreterPropertyType.TEXTAREA.getValue()));
pySpark = true;
sparkR = true;
@ -333,7 +333,6 @@ public abstract class AbstractTestRestApi {
return sparkHome;
}
sparkHome = getSparkHomeRecursively(new File(System.getProperty(ZeppelinConfiguration.ConfVars.ZEPPELIN_HOME.getVarName())));
System.out.println("SPARK HOME detected " + sparkHome);
return sparkHome;
}

View file

@ -167,8 +167,8 @@ public class ZeppelinSparkClusterTest extends AbstractTestRestApi {
assertEquals(InterpreterResult.Type.TABLE, p.getResult().message().get(1).getType());
assertEquals("_1\t_2\nhello\t20\n", p.getResult().message().get(1).getData());
}
ZeppelinServer.notebook.removeNote(note.getId(), anonymous);
}
ZeppelinServer.notebook.removeNote(note.getId(), anonymous);
}
@Test
@ -470,7 +470,7 @@ public class ZeppelinSparkClusterTest extends AbstractTestRestApi {
p1.setText("%pyspark\n" +
"from pyspark.sql import SQLContext\n" +
"print(" + sqlContextName + ".read.format('com.databricks.spark.csv')" +
".load('"+ tmpFile.getAbsolutePath() +"').count())");
".load('" + tmpFile.getAbsolutePath() +"').count())");
p1.setAuthenticationInfo(anonymous);
note.run(p1.getId());
@ -576,6 +576,7 @@ public class ZeppelinSparkClusterTest extends AbstractTestRestApi {
@Test
public void testConfInterpreter() throws IOException {
ZeppelinServer.notebook.getInterpreterSettingManager().close();
Note note = ZeppelinServer.notebook.createNote(AuthenticationInfo.ANONYMOUS);
Paragraph p = note.addNewParagraph(AuthenticationInfo.ANONYMOUS);
Map config = p.getConfig();

View file

@ -603,7 +603,7 @@
<dependency>
<groupId>org.apache.zeppelin</groupId>
<artifactId>zeppelin-spark_2.10</artifactId>
<artifactId>spark-interpreter</artifactId>
<version>${project.version}</version>
<scope>test</scope>
<exclusions>