Spark/PySpark working

This commit is contained in:
Felix Cheung 2015-11-23 17:59:09 -08:00
parent b19546e9dd
commit f0c22071ff
5 changed files with 205 additions and 135 deletions

View file

@ -81,8 +81,11 @@ if [[ "${INTERPRETER_ID}" == "spark" ]]; then
# This will evantually passes SPARK_APP_JAR to classpath of SparkIMain
ZEPPELIN_CLASSPATH=${SPARK_APP_JAR}
pattern="$SPARK_HOME/python/lib/py4j-*-src.zip"
py4j=($pattern)
# pick the first match py4j zip - there should only be one
export PYTHONPATH="$SPARK_HOME/python/:$PYTHONPATH"
export PYTHONPATH="$SPARK_HOME/python/lib/py4j-0.8.2.1-src.zip:$PYTHONPATH"
export PYTHONPATH="${py4j[0]}:$PYTHONPATH"
else
# add Hadoop jars into classpath
if [[ -n "${HADOOP_HOME}" ]]; then
@ -95,7 +98,11 @@ if [[ "${INTERPRETER_ID}" == "spark" ]]; then
fi
addJarInDir "${INTERPRETER_DIR}/dep"
PYSPARKPATH="${ZEPPELIN_HOME}/interpreter/spark/pyspark/pyspark.zip:${ZEPPELIN_HOME}/interpreter/spark/pyspark/py4j-0.8.2.1-src.zip"
pattern="${ZEPPELIN_HOME}/interpreter/spark/pyspark/py4j-*-src.zip"
py4j=($pattern)
# pick the first match py4j zip - there should only be one
PYSPARKPATH="${ZEPPELIN_HOME}/interpreter/spark/pyspark/pyspark.zip:${py4j[0]}"
if [[ -z "${PYTHONPATH}" ]]; then
export PYTHONPATH="${PYSPARKPATH}"

View file

@ -33,7 +33,7 @@
<name>Zeppelin: Spark dependencies</name>
<description>Zeppelin spark support</description>
<url>http://zeppelin.incubator.apache.org</url>
<properties>
<spark.version>1.4.1</spark.version>
@ -130,117 +130,117 @@
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-api</artifactId>
<version>${yarn.version}</version>
<exclusions>
<exclusion>
<groupId>asm</groupId>
<artifactId>asm</artifactId>
</exclusion>
<exclusion>
<groupId>org.ow2.asm</groupId>
<artifactId>asm</artifactId>
</exclusion>
<exclusion>
<groupId>org.jboss.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
<exclusion>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-common</artifactId>
<version>${yarn.version}</version>
<exclusions>
<exclusion>
<groupId>asm</groupId>
<artifactId>asm</artifactId>
</exclusion>
<exclusion>
<groupId>org.ow2.asm</groupId>
<artifactId>asm</artifactId>
</exclusion>
<exclusion>
<groupId>org.jboss.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
<exclusion>
<groupId>javax.servlet</groupId>
<artifactId>servlet-api</artifactId>
</exclusion>
<exclusion>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-server-web-proxy</artifactId>
<version>${yarn.version}</version>
<exclusions>
<exclusion>
<groupId>asm</groupId>
<artifactId>asm</artifactId>
</exclusion>
<exclusion>
<groupId>org.ow2.asm</groupId>
<artifactId>asm</artifactId>
</exclusion>
<exclusion>
<groupId>org.jboss.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
<exclusion>
<groupId>javax.servlet</groupId>
<artifactId>servlet-api</artifactId>
</exclusion>
<exclusion>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-client</artifactId>
<version>${yarn.version}</version>
<exclusions>
<exclusion>
<groupId>asm</groupId>
<artifactId>asm</artifactId>
</exclusion>
<exclusion>
<groupId>org.ow2.asm</groupId>
<artifactId>asm</artifactId>
</exclusion>
<exclusion>
<groupId>org.jboss.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
<exclusion>
<groupId>javax.servlet</groupId>
<artifactId>servlet-api</artifactId>
</exclusion>
<exclusion>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
</exclusion>
</exclusions>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-api</artifactId>
<version>${yarn.version}</version>
<exclusions>
<exclusion>
<groupId>asm</groupId>
<artifactId>asm</artifactId>
</exclusion>
<exclusion>
<groupId>org.ow2.asm</groupId>
<artifactId>asm</artifactId>
</exclusion>
<exclusion>
<groupId>org.jboss.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
<exclusion>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-common</artifactId>
<version>${yarn.version}</version>
<exclusions>
<exclusion>
<groupId>asm</groupId>
<artifactId>asm</artifactId>
</exclusion>
<exclusion>
<groupId>org.ow2.asm</groupId>
<artifactId>asm</artifactId>
</exclusion>
<exclusion>
<groupId>org.jboss.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
<exclusion>
<groupId>javax.servlet</groupId>
<artifactId>servlet-api</artifactId>
</exclusion>
<exclusion>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-server-web-proxy</artifactId>
<version>${yarn.version}</version>
<exclusions>
<exclusion>
<groupId>asm</groupId>
<artifactId>asm</artifactId>
</exclusion>
<exclusion>
<groupId>org.ow2.asm</groupId>
<artifactId>asm</artifactId>
</exclusion>
<exclusion>
<groupId>org.jboss.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
<exclusion>
<groupId>javax.servlet</groupId>
<artifactId>servlet-api</artifactId>
</exclusion>
<exclusion>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-client</artifactId>
<version>${yarn.version}</version>
<exclusions>
<exclusion>
<groupId>asm</groupId>
<artifactId>asm</artifactId>
</exclusion>
<exclusion>
<groupId>org.ow2.asm</groupId>
<artifactId>asm</artifactId>
</exclusion>
<exclusion>
<groupId>org.jboss.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
<exclusion>
<groupId>javax.servlet</groupId>
<artifactId>servlet-api</artifactId>
</exclusion>
<exclusion>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
</dependencyManagement>
<dependencies>
<dependencies>
<!-- Spark -->
<dependency>
<groupId>org.apache.spark</groupId>
@ -489,7 +489,7 @@
<dependencies>
</dependencies>
</profile>
<profile>
<id>cassandra-spark-1.5</id>
<properties>
@ -513,7 +513,27 @@
</dependency>
</dependencies>
</profile>
<profile>
<id>spark-1.6</id>
<properties>
<spark.version>1.5.2</spark.version>
<!-- TODO - no 1.6.0 version on maven repo yet...
<spark.version>1.6.0</spark.version>
<py4j.version>0.9</py4j.version>
-->
<akka.group>com.typesafe.akka</akka.group>
<akka.version>2.3.11</akka.version>
<protobuf.version>2.5.0</protobuf.version>
<!-- TODO - update to release build
<spark.download.url>http://people.apache.org/~pwendell/spark-releases/spark-v${spark.version}-preview2-bin/spark-${spark.version}.tgz</spark.download.url>
-->
</properties>
<dependencies>
</dependencies>
</profile>
<profile>
<id>hadoop-0.23</id>
<!-- SPARK-1121: Adds an explicit dependency on Avro to work around a
@ -731,10 +751,6 @@
<profile>
<id>pyspark</id>
<properties>
<spark.download.url>http://archive.apache.org/dist/spark/spark-${spark.version}/spark-${spark.version}.tgz
</spark.download.url>
</properties>
<build>
<plugins>
<plugin>

View file

@ -33,16 +33,73 @@
<name>Zeppelin: Spark</name>
<description>Zeppelin spark support</description>
<url>http://zeppelin.incubator.apache.org</url>
<properties>
<spark.version>1.4.1</spark.version>
<scala.version>2.10.4</scala.version>
<scala.binary.version>2.10</scala.binary.version>
<hadoop.version>2.3.0</hadoop.version>
<py4j.version>0.8.2.1</py4j.version>
</properties>
<profiles>
<profile>
<id>vendor-repo</id>
<repositories>
<repository>
<id>cloudera</id>
<url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
</repository>
</repositories>
</profile>
<profile>
<id>spark-1.1</id>
<properties>
<spark.version>1.1.1</spark.version>
</properties>
</profile>
<profile>
<id>spark-1.2</id>
<properties>
<spark.version>1.2.1</spark.version>
</properties>
</profile>
<profile>
<id>spark-1.3</id>
<properties>
<spark.version>1.3.1</spark.version>
</properties>
</profile>
<profile>
<id>spark-1.4</id>
<properties>
<spark.version>1.4.1</spark.version>
</properties>
</profile>
<profile>
<id>spark-1.5</id>
<properties>
<spark.version>1.5.2</spark.version>
</properties>
</profile>
<profile>
<id>spark-1.6</id>
<properties>
<spark.version>1.5.2</spark.version>
<!-- TODO - no 1.6.0 version on maven repo yet...
<spark.version>1.6.0</spark.version>
<py4j.version>0.9</py4j.version>
-->
</properties>
</profile>
</profiles>
<dependencies>
<dependency>
<groupId>org.slf4j</groupId>
@ -72,7 +129,7 @@
<artifactId>guava</artifactId>
<version>14.0.1</version>
</dependency>
<!-- Aether :: maven dependency resolution -->
<dependency>
<groupId>org.apache.maven</groupId>
@ -294,18 +351,6 @@
</dependency>
</dependencies>
<profiles>
<profile>
<id>vendor-repo</id>
<repositories>
<repository>
<id>cloudera</id>
<url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
</repository>
</repositories>
</profile>
</profiles>
<build>
<plugins>
<plugin>
@ -397,7 +442,7 @@
<overWriteReleases>false</overWriteReleases>
<overWriteSnapshots>false</overWriteSnapshots>
<overWriteIfNewer>true</overWriteIfNewer>
<includeScope>runtime</includeScope>
<includeScope>runtime</includeScope>
<artifactItems>
<artifactItem>
<groupId>${project.groupId}</groupId>

View file

@ -317,7 +317,8 @@ public class SparkInterpreter extends Interpreter {
"python" + File.separator + "lib");
}
String[] pythonLibs = new String[]{"pyspark.zip", "py4j-0.8.2.1-src.zip"};
//Only one of py4j-0.9-src.zip and py4j-0.8.2.1-src.zip should exist
String[] pythonLibs = new String[]{"pyspark.zip", "py4j-0.9-src.zip", "py4j-0.8.2.1-src.zip"};
ArrayList<String> pythonLibUris = new ArrayList<>();
for (String lib : pythonLibs) {
File libFile = new File(pysparkPath, lib);

View file

@ -32,9 +32,10 @@ public class SparkVersion {
public static final SparkVersion SPARK_1_4_0 = SparkVersion.fromVersionString("1.4.0");
public static final SparkVersion SPARK_1_5_0 = SparkVersion.fromVersionString("1.5.0");
public static final SparkVersion SPARK_1_6_0 = SparkVersion.fromVersionString("1.6.0");
public static final SparkVersion SPARK_1_7_0 = SparkVersion.fromVersionString("1.7.0");
public static final SparkVersion MIN_SUPPORTED_VERSION = SPARK_1_0_0;
public static final SparkVersion UNSUPPORTED_FUTURE_VERSION = SPARK_1_6_0;
public static final SparkVersion UNSUPPORTED_FUTURE_VERSION = SPARK_1_7_0;
private int version;
private String versionString;