Add python dependencies to .travis.yml

This commit is contained in:
Alex Goodman 2016-11-08 13:52:33 -08:00
parent e8cea41c61
commit b6b88be386
10 changed files with 404 additions and 10 deletions

View file

@ -66,16 +66,22 @@ matrix:
- jdk: "oraclejdk7"
env: TEST_SELENIUM="true" SCALA_VER="2.10" SPARK_VER="1.6.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.6 -Phadoop-2.3 -Ppyspark -Pexamples" BUILD_FLAG="package -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark -Dtest=org.apache.zeppelin.AbstractFunctionalSuite -DfailIfNoTests=false"
# Test python/pyspark with python2
- jdk: "oraclejdk7"
env: PYTHON="2.7" SCALA_VER="2.10" SPARK_VER="1.6.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.6 -Phadoop-2.3 -Ppyspark" BUILD_FLAG="package -pl spark,python -am -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-display,spark-dependencies,spark,python -Dtest=org.apache.zeppelin.spark.PySpark*Test,org.apache.zeppelin.python.* -Dpyspark.test.exclude='' -DfailIfNoTests=false"
# Test python/pyspark with python3
- jdk: "oraclejdk7"
env: PYTHON="3.5" SCALA_VER="2.10" SPARK_VER="1.6.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.6 -Phadoop-2.3 -Ppyspark" BUILD_FLAG="package -pl spark,python -am -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-display,spark-dependencies,spark,python -Dtest=org.apache.zeppelin.spark.PySpark*Test,org.apache.zeppelin.python.* -Dpyspark.test.exclude='' -DfailIfNoTests=false"
before_install:
- echo "MAVEN_OPTS='-Xms1024M -Xmx2048M -XX:MaxPermSize=1024m -XX:-UseGCOverheadLimit'" >> ~/.mavenrc
- ./testing/install_external_dependencies.sh
- ls -la .spark-dist ${HOME}/.m2/repository/.cache/maven-download-plugin || true
- ls .node_modules && cp -r .node_modules zeppelin-web/node_modules || echo "node_modules are not cached"
- mkdir -p ~/R
- echo 'R_LIBS=~/R' > ~/.Renviron
- R -e "install.packages('knitr', repos = 'http://cran.us.r-project.org', lib='~/R')"
- export R_LIBS='~/R'
- "/sbin/start-stop-daemon --start --quiet --pidfile /tmp/custom_xvfb_99.pid --make-pidfile --background --exec /usr/bin/Xvfb -- :99 -ac -screen 0 1600x1024x16"
- ./dev/change_scala_version.sh $SCALA_VER
- source ~/.environ
install:
- mvn -Dorg.slf4j.simpleLogger.defaultLogLevel=warn $BUILD_FLAG $PROFILE -B
@ -99,4 +105,3 @@ after_failure:
- cat zeppelin-distribution/target/zeppelin-*-SNAPSHOT/zeppelin-*-SNAPSHOT/logs/zeppelin*.out
- cat zeppelin-web/npm-debug.log
- cat spark-*/logs/*

View file

@ -93,6 +93,7 @@ class FigureCanvasZInline(FigureCanvasAgg):
# Express the image as bytes
buf = BytesIO()
self.print_figure(buf, **kwargs)
fmt = fmt.encode()
byte_str = b"data:image/%s;base64," %fmt
byte_str += base64.b64encode(buf.getvalue())

View file

@ -57,6 +57,7 @@ public class PythonInterpreter extends Interpreter {
private Boolean py4JisInstalled = false;
private InterpreterContext context;
private Pattern errorInLastLine = Pattern.compile(".*(Error|Exception): .*$");
private String pythonPath;
private int maxResult;
PythonProcess process = null;
@ -74,6 +75,8 @@ public class PythonInterpreter extends Interpreter {
registerHook(HookType.POST_EXEC_DEV, "z._displayhook()");
}
// Add zeppelin-bundled libs to PYTHONPATH
setPythonPath("../interpreter/lib/python:$PYTHONPATH");
LOG.info("Starting Python interpreter ---->");
LOG.info("Python path is set to:" + property.getProperty(ZEPPELIN_PYTHON));
@ -198,13 +201,17 @@ public class PythonInterpreter extends Interpreter {
return null;
}
public void setPythonPath(String pythonPath) {
this.pythonPath = pythonPath;
}
public PythonProcess getPythonProcess() {
if (process == null) {
String binPath = getProperty(ZEPPELIN_PYTHON);
if (pythonCommand != null) {
binPath = pythonCommand;
}
return new PythonProcess(binPath);
return new PythonProcess(binPath, pythonPath);
} else {
return process;
}

View file

@ -42,10 +42,12 @@ public class PythonProcess {
Process process;
private String binPath;
private String pythonPath;
private long pid;
public PythonProcess(String binPath) {
public PythonProcess(String binPath, String pythonPath) {
this.binPath = binPath;
this.pythonPath = pythonPath;
}
public void open() throws IOException {
@ -65,6 +67,9 @@ public class PythonProcess {
cmd = binPath + " -iu";
}
builder = new ProcessBuilder("bash", "-c", cmd);
if (pythonPath != null) {
builder.environment().put("PYTHONPATH", pythonPath);
}
}
builder.redirectErrorStream(true);

View file

@ -102,7 +102,7 @@ public class PythonInterpreterWithPythonInstalledTest {
realPython.open();
//when
InterpreterResult ret1 = realPython.interpret("print \"...\"", null);
InterpreterResult ret1 = realPython.interpret("print(\"...\")", null);
//then
//System.out.println("\nInterpreter response: \n" + ret.message());

View file

@ -38,6 +38,12 @@
<mockito.version>1.10.19</mockito.version>
<powermock.version>1.6.4</powermock.version>
<spark.version>2.0.1</spark.version>
<pyspark.test.exclude>
**/PySparkInterpreterMatplotlibTest.java
</pyspark.test.exclude>
<pyspark.test.include>
**/*Test.*
</pyspark.test.include>
</properties>
<dependencies>
@ -322,6 +328,7 @@
<argLine>-Xmx1024m -XX:MaxPermSize=256m</argLine>
<excludes>
<exclude>**/SparkRInterpreterTest.java</exclude>
<exclude>${pyspark.test.exclude}</exclude>
</excludes>
</configuration>
</plugin>
@ -431,11 +438,35 @@
<exclude>**/SparkRInterpreter.java</exclude>
</excludes>
<testExcludes>
<testExclude>${pyspark.test.exclude}</testExclude>
<testExclude>**/SparkRInterpreterTest.java</testExclude>
<testExclude>**/ZeppelinRTest.java</testExclude>
</testExcludes>
</configuration>
</plugin>
<plugin>
<groupId>org.scala-tools</groupId>
<artifactId>maven-scala-plugin</artifactId>
<configuration>
<excludes>
<exclude>**/ZeppelinR.scala</exclude>
<exclude>**/SparkRBackend.scala</exclude>
</excludes>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<includes>
<include>${pyspark.test.include}</include>
</includes>
<excludes>
<exclude>${pyspark.test.exclude}</exclude>
<exclude>**/SparkRInterpreterTest.java</exclude>
</excludes>
</configuration>
</plugin>
</plugins>
</build>
@ -580,7 +611,9 @@
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<excludes combine.self="override"></excludes>
<testExcludes combine.self="override"></testExcludes>
<testExcludes combine.self="override">
<testExclude>${pyspark.test.exclude}</testExclude>
</testExcludes>
</configuration>
</plugin>
<plugin>
@ -596,6 +629,7 @@
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<excludes combine.self="override">
<exclude>${pyspark.test.exclude}</exclude>
</excludes>
</configuration>
</plugin>

View file

@ -177,7 +177,8 @@ public class PySparkInterpreter extends Interpreter implements ExecuteResultHand
Map env = EnvironmentUtils.getProcEnvironment();
if (!env.containsKey("PYTHONPATH")) {
SparkConf conf = getSparkConf();
env.put("PYTHONPATH", conf.get("spark.submit.pyFiles").replaceAll(",", ":"));
env.put("PYTHONPATH", conf.get("spark.submit.pyFiles").replaceAll(",", ":") +
":../interpreter/lib/python");
}
return env;
}

View file

@ -29,6 +29,7 @@ from pyspark.broadcast import Broadcast
from pyspark.serializers import MarshalSerializer, PickleSerializer
import ast
import traceback
import warnings
# for back compatibility
from pyspark.sql import SQLContext, HiveContext, Row

View file

@ -0,0 +1,292 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.spark;
import org.apache.zeppelin.display.AngularObjectRegistry;
import org.apache.zeppelin.display.GUI;
import org.apache.zeppelin.interpreter.Interpreter;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterContextRunner;
import org.apache.zeppelin.interpreter.InterpreterException;
import org.apache.zeppelin.interpreter.InterpreterGroup;
import org.apache.zeppelin.interpreter.InterpreterOutputListener;
import org.apache.zeppelin.interpreter.InterpreterOutput;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.InterpreterResult.Type;
import org.apache.zeppelin.resource.LocalResourcePool;
import org.apache.zeppelin.user.AuthenticationInfo;
import org.junit.After;
import org.junit.Before;
import org.junit.FixMethodOrder;
import org.junit.Test;
import org.junit.runners.MethodSorters;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.Properties;
import static org.junit.Assert.*;
@FixMethodOrder(MethodSorters.NAME_ASCENDING)
public class PySparkInterpreterMatplotlibTest {
public static SparkInterpreter sparkInterpreter;
public static PySparkInterpreter pyspark;
public static InterpreterGroup intpGroup;
private File tmpDir;
public static Logger LOGGER = LoggerFactory.getLogger(PySparkInterpreterTest.class);
private InterpreterContext context;
public static class AltPySparkInterpreter extends PySparkInterpreter {
/**
* Since pyspark output is sent to an outputstream rather than
* being directly provided by interpret(), this subclass is created to
* override interpret() to append the result from the outputStream
* for the sake of convenience in testing.
*/
public AltPySparkInterpreter(Properties property) {
super(property);
}
/**
* This code is mainly copied from RemoteInterpreterServer.java which
* normally handles this in real use cases.
*/
@Override
public InterpreterResult interpret(String st, InterpreterContext context) {
InterpreterResult result = super.interpret(st, context);
String message = "";
Type outputType;
byte[] interpreterOutput;
try {
context.out.flush();
outputType = context.out.getType();
interpreterOutput = context.out.toByteArray();
} catch (IOException e) {
throw new InterpreterException(e);
}
if (interpreterOutput != null && interpreterOutput.length > 0) {
message = new String(interpreterOutput);
}
String interpreterResultMessage = result.message();
InterpreterResult combinedResult;
if (interpreterResultMessage != null && !interpreterResultMessage.isEmpty()) {
message += interpreterResultMessage;
combinedResult = new InterpreterResult(result.code(), result.type(), message);
} else {
combinedResult = new InterpreterResult(result.code(), outputType, message);
}
context.out.clear();
return combinedResult;
}
}
public static Properties getPySparkTestProperties() {
Properties p = new Properties();
p.setProperty("master", "local[*]");
p.setProperty("spark.app.name", "Zeppelin Test");
p.setProperty("zeppelin.spark.useHiveContext", "true");
p.setProperty("zeppelin.spark.maxResult", "1000");
p.setProperty("zeppelin.spark.importImplicit", "true");
p.setProperty("zeppelin.pyspark.python", "python");
return p;
}
/**
* Get spark version number as a numerical value.
* eg. 1.1.x => 11, 1.2.x => 12, 1.3.x => 13 ...
*/
public static int getSparkVersionNumber() {
if (sparkInterpreter == null) {
return 0;
}
String[] split = sparkInterpreter.getSparkContext().version().split("\\.");
int version = Integer.parseInt(split[0]) * 10 + Integer.parseInt(split[1]);
return version;
}
@Before
public void setUp() throws Exception {
tmpDir = new File(System.getProperty("java.io.tmpdir") + "/ZeppelinLTest_" + System.currentTimeMillis());
System.setProperty("zeppelin.dep.localrepo", tmpDir.getAbsolutePath() + "/local-repo");
tmpDir.mkdirs();
intpGroup = new InterpreterGroup();
intpGroup.put("note", new LinkedList<Interpreter>());
if (sparkInterpreter == null) {
sparkInterpreter = new SparkInterpreter(getPySparkTestProperties());
intpGroup.get("note").add(sparkInterpreter);
sparkInterpreter.setInterpreterGroup(intpGroup);
sparkInterpreter.open();
}
if (pyspark == null) {
pyspark = new AltPySparkInterpreter(getPySparkTestProperties());
intpGroup.get("note").add(pyspark);
pyspark.setInterpreterGroup(intpGroup);
pyspark.open();
}
context = new InterpreterContext("note", "id", "title", "text",
new AuthenticationInfo(),
new HashMap<String, Object>(),
new GUI(),
new AngularObjectRegistry(intpGroup.getId(), null),
new LocalResourcePool("id"),
new LinkedList<InterpreterContextRunner>(),
new InterpreterOutput(new InterpreterOutputListener() {
@Override
public void onAppend(InterpreterOutput out, byte[] line) {
}
@Override
public void onUpdate(InterpreterOutput out, byte[] output) {
}
}));
}
@After
public void tearDown() throws Exception {
delete(tmpDir);
}
private void delete(File file) {
if (file.isFile()) file.delete();
else if (file.isDirectory()) {
File[] files = file.listFiles();
if (files != null && files.length > 0) {
for (File f : files) {
delete(f);
}
}
file.delete();
}
}
@Test
public void dependenciesAreInstalled() {
// matplotlib
InterpreterResult ret = pyspark.interpret("import matplotlib", context);
assertEquals(ret.message(), InterpreterResult.Code.SUCCESS, ret.code());
// inline backend
ret = pyspark.interpret("import backend_zinline", context);
assertEquals(ret.message(), InterpreterResult.Code.SUCCESS, ret.code());
}
@Test
public void showPlot() {
// Simple plot test
InterpreterResult ret;
ret = pyspark.interpret("import matplotlib.pyplot as plt", context);
ret = pyspark.interpret("plt.close()", context);
ret = pyspark.interpret("z.configure_mpl(interactive=False)", context);
ret = pyspark.interpret("plt.plot([1, 2, 3])", context);
ret = pyspark.interpret("plt.show()", context);
assertEquals(ret.message(), InterpreterResult.Code.SUCCESS, ret.code());
assertEquals(ret.message(), Type.HTML, ret.type());
assertTrue(ret.message().contains("data:image/png;base64"));
assertTrue(ret.message().contains("<div>"));
}
@Test
// Test for when configuration is set to auto-close figures after show().
public void testClose() {
InterpreterResult ret;
InterpreterResult ret1;
InterpreterResult ret2;
ret = pyspark.interpret("import matplotlib.pyplot as plt", context);
ret = pyspark.interpret("plt.close()", context);
ret = pyspark.interpret("z.configure_mpl(interactive=False, close=True, angular=False)", context);
ret = pyspark.interpret("plt.plot([1, 2, 3])", context);
ret1 = pyspark.interpret("plt.show()", context);
// Second call to show() should print nothing, and Type should be TEXT.
// This is because when close=True, there should be no living instances
// of FigureManager, causing show() to return before setting the output
// type to HTML.
ret = pyspark.interpret("plt.show()", context);
assertEquals(ret.message(), InterpreterResult.Code.SUCCESS, ret.code());
assertEquals(ret.message(), Type.TEXT, ret.type());
assertTrue(ret.message().equals(""));
// Now test that new plot is drawn. It should be identical to the
// previous one.
ret = pyspark.interpret("plt.plot([1, 2, 3])", context);
ret2 = pyspark.interpret("plt.show()", context);
assertTrue(ret1.message().equals(ret2.message()));
}
@Test
// Test for when configuration is set to not auto-close figures after show().
public void testNoClose() {
InterpreterResult ret;
InterpreterResult ret1;
InterpreterResult ret2;
ret = pyspark.interpret("import matplotlib.pyplot as plt", context);
ret = pyspark.interpret("plt.close()", context);
ret = pyspark.interpret("z.configure_mpl(interactive=False, close=False, angular=False)", context);
ret = pyspark.interpret("plt.plot([1, 2, 3])", context);
ret1 = pyspark.interpret("plt.show()", context);
// Second call to show() should print nothing, and Type should be HTML.
// This is because when close=False, there should be living instances
// of FigureManager, causing show() to set the output
// type to HTML even though the figure is inactive.
ret = pyspark.interpret("plt.show()", context);
assertEquals(ret.message(), InterpreterResult.Code.SUCCESS, ret.code());
assertEquals(ret.message(), Type.HTML, ret.type());
assertTrue(ret.message().equals(""));
// Now test that plot can be reshown if it is updated. It should be
// different from the previous one because it will plot the same line
// again but in a different color.
ret = pyspark.interpret("plt.plot([1, 2, 3])", context);
ret2 = pyspark.interpret("plt.show()", context);
assertTrue(!ret1.message().equals(ret2.message()));
}
@Test
// Test angular mode
public void testAngular() {
InterpreterResult ret;
ret = pyspark.interpret("import matplotlib.pyplot as plt", context);
ret = pyspark.interpret("plt.close()", context);
ret = pyspark.interpret("z.configure_mpl(interactive=False, close=False, angular=True)", context);
ret = pyspark.interpret("plt.plot([1, 2, 3])", context);
ret = pyspark.interpret("plt.show()", context);
assertEquals(ret.message(), InterpreterResult.Code.SUCCESS, ret.code());
assertEquals(ret.message(), Type.ANGULAR, ret.type());
// Check if the figure data is in the Angular Object Registry
AngularObjectRegistry registry = context.getAngularObjectRegistry();
String figureData = registry.getAll("note", null).get(0).toString();
assertTrue(figureData.contains("data:image/png;base64"));
}
}

View file

@ -0,0 +1,48 @@
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Script for installing R / Python dependencies for Travis CI
set -ev
touch ~/.environ
# Install R dependencies if R profiles are used
if [[ ${PROFILE/"-Pr "} != $PROFILE ]] || [[ ${PROFILE/"-Psparkr "} != $PROFILE ]] ; then
mkdir -p ~/R
echo "R_LIBS=~/R" > ~/.Renviron
echo "export R_LIBS=~/R" >> ~/.environ
source ~/.environ
R -e "install.packages('knitr', repos = 'http://cran.us.r-project.org', lib='~/R')"
fi
# Install Python dependencies for Python specific tests
if [[ -n "$PYTHON" ]] ; then
if [[ "$PYTHON" == "2.7" ]] ; then
wget https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh -O miniconda.sh
else
wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
fi
bash miniconda.sh -b -p $HOME/miniconda
echo "export PATH='$HOME/miniconda/bin:$PATH'" >> ~/.environ
source ~/.environ
hash -r
conda config --set always_yes yes --set changeps1 no
conda update -q conda
conda info -a
conda config --add channels conda-forge
conda install -q matplotlib pandasql
fi