This commit is contained in:
beeva-victorgarcia 2016-01-08 09:23:37 +01:00
commit f0ad06d565
10 changed files with 233 additions and 157 deletions

View file

@ -22,17 +22,24 @@ before_install:
- "sh -e /etc/init.d/xvfb start"
install:
- mvn package -DskipTests -Pspark-1.5 -Phadoop-2.3 -Ppyspark -Pscalding -B
- mvn package -DskipTests -Pspark-1.6 -Phadoop-2.3 -Ppyspark -Pscalding -B
before_script:
-
script:
# spark 1.6
- mvn package -Pbuild-distr -Pspark-1.6 -Phadoop-2.3 -Ppyspark -Pscalding -B
- ./testing/startSparkCluster.sh 1.6.0 2.3
- echo "export SPARK_HOME=`pwd`/spark-1.6.0-bin-hadoop2.3" > conf/zeppelin-env.sh
- mvn verify -Pusing-packaged-distr -Pspark-1.6 -Phadoop-2.3 -Ppyspark -Pscalding -B
- ./testing/stopSparkCluster.sh 1.6.0 2.3
# spark 1.5
- mvn package -Pbuild-distr -Pspark-1.5 -Phadoop-2.3 -Ppyspark -Pscalding -B
- rm -rf `pwd`/interpreter/spark
- mvn package -DskipTests -Pspark-1.5 -Phadoop-2.3 -Ppyspark -B -pl 'zeppelin-interpreter,spark-dependencies,spark'
- ./testing/startSparkCluster.sh 1.5.2 2.3
- echo "export SPARK_HOME=`pwd`/spark-1.5.2-bin-hadoop2.3" > conf/zeppelin-env.sh
- mvn verify -Pusing-packaged-distr -Pspark-1.5 -Phadoop-2.3 -Ppyspark -Pscalding -B
- mvn package -Pspark-1.5 -Phadoop-2.3 -B -pl 'zeppelin-interpreter,zeppelin-zengine,zeppelin-server' -Dtest=org.apache.zeppelin.rest.*Test -DfailIfNoTests=false
- ./testing/stopSparkCluster.sh 1.5.2 2.3
# spark 1.4
- rm -rf `pwd`/interpreter/spark

View file

@ -67,6 +67,7 @@ Set spark major version
Available profiles are
```
-Pspark-1.6
-Pspark-1.5
-Pspark-1.4
-Pspark-1.3
@ -134,13 +135,13 @@ Here're some examples:
```
# basic build
mvn clean package -Pspark-1.5 -Phadoop-2.4 -Pyarn -Ppyspark
mvn clean package -Pspark-1.6 -Phadoop-2.4 -Pyarn -Ppyspark
# spark-cassandra integration
mvn clean package -Pcassandra-spark-1.5 -Dhadoop.version=2.6.0 -Phadoop-2.6 -DskipTests
# with CDH
mvn clean package -Pspark-1.2 -Dhadoop.version=2.5.0-cdh5.3.0 -Phadoop-2.4 -Pvendor-repo -DskipTests
mvn clean package -Pspark-1.5 -Dhadoop.version=2.6.0-cdh5.5.0 -Phadoop-2.6 -Pvendor-repo -DskipTests
# with MapR
mvn clean package -Pspark-1.5 -Pmapr50 -DskipTests

View file

@ -81,8 +81,11 @@ if [[ "${INTERPRETER_ID}" == "spark" ]]; then
# This will evantually passes SPARK_APP_JAR to classpath of SparkIMain
ZEPPELIN_CLASSPATH=${SPARK_APP_JAR}
pattern="$SPARK_HOME/python/lib/py4j-*-src.zip"
py4j=($pattern)
# pick the first match py4j zip - there should only be one
export PYTHONPATH="$SPARK_HOME/python/:$PYTHONPATH"
export PYTHONPATH="$SPARK_HOME/python/lib/py4j-0.8.2.1-src.zip:$PYTHONPATH"
export PYTHONPATH="${py4j[0]}:$PYTHONPATH"
else
# add Hadoop jars into classpath
if [[ -n "${HADOOP_HOME}" ]]; then
@ -95,7 +98,11 @@ if [[ "${INTERPRETER_ID}" == "spark" ]]; then
fi
addJarInDir "${INTERPRETER_DIR}/dep"
PYSPARKPATH="${ZEPPELIN_HOME}/interpreter/spark/pyspark/pyspark.zip:${ZEPPELIN_HOME}/interpreter/spark/pyspark/py4j-0.8.2.1-src.zip"
pattern="${ZEPPELIN_HOME}/interpreter/spark/pyspark/py4j-*-src.zip"
py4j=($pattern)
# pick the first match py4j zip - there should only be one
PYSPARKPATH="${ZEPPELIN_HOME}/interpreter/spark/pyspark/pyspark.zip:${py4j[0]}"
if [[ -z "${PYTHONPATH}" ]]; then
export PYTHONPATH="${PYSPARKPATH}"

View file

@ -33,7 +33,7 @@
<name>Zeppelin: Spark dependencies</name>
<description>Zeppelin spark support</description>
<url>http://zeppelin.incubator.apache.org</url>
<properties>
<spark.version>1.4.1</spark.version>
@ -130,117 +130,117 @@
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-api</artifactId>
<version>${yarn.version}</version>
<exclusions>
<exclusion>
<groupId>asm</groupId>
<artifactId>asm</artifactId>
</exclusion>
<exclusion>
<groupId>org.ow2.asm</groupId>
<artifactId>asm</artifactId>
</exclusion>
<exclusion>
<groupId>org.jboss.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
<exclusion>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-common</artifactId>
<version>${yarn.version}</version>
<exclusions>
<exclusion>
<groupId>asm</groupId>
<artifactId>asm</artifactId>
</exclusion>
<exclusion>
<groupId>org.ow2.asm</groupId>
<artifactId>asm</artifactId>
</exclusion>
<exclusion>
<groupId>org.jboss.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
<exclusion>
<groupId>javax.servlet</groupId>
<artifactId>servlet-api</artifactId>
</exclusion>
<exclusion>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-server-web-proxy</artifactId>
<version>${yarn.version}</version>
<exclusions>
<exclusion>
<groupId>asm</groupId>
<artifactId>asm</artifactId>
</exclusion>
<exclusion>
<groupId>org.ow2.asm</groupId>
<artifactId>asm</artifactId>
</exclusion>
<exclusion>
<groupId>org.jboss.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
<exclusion>
<groupId>javax.servlet</groupId>
<artifactId>servlet-api</artifactId>
</exclusion>
<exclusion>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-client</artifactId>
<version>${yarn.version}</version>
<exclusions>
<exclusion>
<groupId>asm</groupId>
<artifactId>asm</artifactId>
</exclusion>
<exclusion>
<groupId>org.ow2.asm</groupId>
<artifactId>asm</artifactId>
</exclusion>
<exclusion>
<groupId>org.jboss.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
<exclusion>
<groupId>javax.servlet</groupId>
<artifactId>servlet-api</artifactId>
</exclusion>
<exclusion>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
</exclusion>
</exclusions>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-api</artifactId>
<version>${yarn.version}</version>
<exclusions>
<exclusion>
<groupId>asm</groupId>
<artifactId>asm</artifactId>
</exclusion>
<exclusion>
<groupId>org.ow2.asm</groupId>
<artifactId>asm</artifactId>
</exclusion>
<exclusion>
<groupId>org.jboss.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
<exclusion>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-common</artifactId>
<version>${yarn.version}</version>
<exclusions>
<exclusion>
<groupId>asm</groupId>
<artifactId>asm</artifactId>
</exclusion>
<exclusion>
<groupId>org.ow2.asm</groupId>
<artifactId>asm</artifactId>
</exclusion>
<exclusion>
<groupId>org.jboss.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
<exclusion>
<groupId>javax.servlet</groupId>
<artifactId>servlet-api</artifactId>
</exclusion>
<exclusion>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-server-web-proxy</artifactId>
<version>${yarn.version}</version>
<exclusions>
<exclusion>
<groupId>asm</groupId>
<artifactId>asm</artifactId>
</exclusion>
<exclusion>
<groupId>org.ow2.asm</groupId>
<artifactId>asm</artifactId>
</exclusion>
<exclusion>
<groupId>org.jboss.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
<exclusion>
<groupId>javax.servlet</groupId>
<artifactId>servlet-api</artifactId>
</exclusion>
<exclusion>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-client</artifactId>
<version>${yarn.version}</version>
<exclusions>
<exclusion>
<groupId>asm</groupId>
<artifactId>asm</artifactId>
</exclusion>
<exclusion>
<groupId>org.ow2.asm</groupId>
<artifactId>asm</artifactId>
</exclusion>
<exclusion>
<groupId>org.jboss.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
<exclusion>
<groupId>javax.servlet</groupId>
<artifactId>servlet-api</artifactId>
</exclusion>
<exclusion>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
</dependencyManagement>
<dependencies>
<dependencies>
<!-- Spark -->
<dependency>
<groupId>org.apache.spark</groupId>
@ -489,7 +489,7 @@
<dependencies>
</dependencies>
</profile>
<profile>
<id>cassandra-spark-1.5</id>
<properties>
@ -513,7 +513,21 @@
</dependency>
</dependencies>
</profile>
<profile>
<id>spark-1.6</id>
<properties>
<spark.version>1.6.0</spark.version>
<py4j.version>0.9</py4j.version>
<akka.group>com.typesafe.akka</akka.group>
<akka.version>2.3.11</akka.version>
<protobuf.version>2.5.0</protobuf.version>
</properties>
<dependencies>
</dependencies>
</profile>
<profile>
<id>hadoop-0.23</id>
<!-- SPARK-1121: Adds an explicit dependency on Avro to work around a
@ -731,10 +745,6 @@
<profile>
<id>pyspark</id>
<properties>
<spark.download.url>http://archive.apache.org/dist/spark/spark-${spark.version}/spark-${spark.version}.tgz
</spark.download.url>
</properties>
<build>
<plugins>
<plugin>

View file

@ -33,16 +33,70 @@
<name>Zeppelin: Spark</name>
<description>Zeppelin spark support</description>
<url>http://zeppelin.incubator.apache.org</url>
<properties>
<spark.version>1.4.1</spark.version>
<scala.version>2.10.4</scala.version>
<scala.binary.version>2.10</scala.binary.version>
<hadoop.version>2.3.0</hadoop.version>
<py4j.version>0.8.2.1</py4j.version>
</properties>
<profiles>
<profile>
<id>vendor-repo</id>
<repositories>
<repository>
<id>cloudera</id>
<url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
</repository>
</repositories>
</profile>
<profile>
<id>spark-1.1</id>
<properties>
<spark.version>1.1.1</spark.version>
</properties>
</profile>
<profile>
<id>spark-1.2</id>
<properties>
<spark.version>1.2.1</spark.version>
</properties>
</profile>
<profile>
<id>spark-1.3</id>
<properties>
<spark.version>1.3.1</spark.version>
</properties>
</profile>
<profile>
<id>spark-1.4</id>
<properties>
<spark.version>1.4.1</spark.version>
</properties>
</profile>
<profile>
<id>spark-1.5</id>
<properties>
<spark.version>1.5.2</spark.version>
</properties>
</profile>
<profile>
<id>spark-1.6</id>
<properties>
<spark.version>1.6.0</spark.version>
<py4j.version>0.9</py4j.version>
</properties>
</profile>
</profiles>
<dependencies>
<dependency>
<groupId>org.slf4j</groupId>
@ -72,7 +126,7 @@
<artifactId>guava</artifactId>
<version>14.0.1</version>
</dependency>
<!-- Aether :: maven dependency resolution -->
<dependency>
<groupId>org.apache.maven</groupId>
@ -294,18 +348,6 @@
</dependency>
</dependencies>
<profiles>
<profile>
<id>vendor-repo</id>
<repositories>
<repository>
<id>cloudera</id>
<url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
</repository>
</repositories>
</profile>
</profiles>
<build>
<plugins>
<plugin>
@ -397,7 +439,7 @@
<overWriteReleases>false</overWriteReleases>
<overWriteSnapshots>false</overWriteSnapshots>
<overWriteIfNewer>true</overWriteIfNewer>
<includeScope>runtime</includeScope>
<includeScope>runtime</includeScope>
<artifactItems>
<artifactItem>
<groupId>${project.groupId}</groupId>

View file

@ -317,7 +317,8 @@ public class SparkInterpreter extends Interpreter {
"python" + File.separator + "lib");
}
String[] pythonLibs = new String[]{"pyspark.zip", "py4j-0.8.2.1-src.zip"};
//Only one of py4j-0.9-src.zip and py4j-0.8.2.1-src.zip should exist
String[] pythonLibs = new String[]{"pyspark.zip", "py4j-0.9-src.zip", "py4j-0.8.2.1-src.zip"};
ArrayList<String> pythonLibUris = new ArrayList<>();
for (String lib : pythonLibs) {
File libFile = new File(pysparkPath, lib);

View file

@ -32,9 +32,10 @@ public class SparkVersion {
public static final SparkVersion SPARK_1_4_0 = SparkVersion.fromVersionString("1.4.0");
public static final SparkVersion SPARK_1_5_0 = SparkVersion.fromVersionString("1.5.0");
public static final SparkVersion SPARK_1_6_0 = SparkVersion.fromVersionString("1.6.0");
public static final SparkVersion SPARK_1_7_0 = SparkVersion.fromVersionString("1.7.0");
public static final SparkVersion MIN_SUPPORTED_VERSION = SPARK_1_0_0;
public static final SparkVersion UNSUPPORTED_FUTURE_VERSION = SPARK_1_6_0;
public static final SparkVersion UNSUPPORTED_FUTURE_VERSION = SPARK_1_7_0;
private int version;
private String versionString;

View file

@ -295,30 +295,30 @@ public class ZeppelinContext extends HashMap<String, Object> {
try {
take = df.getClass().getMethod("take", int.class);
rows = (Object[]) take.invoke(df, maxResult + 1);
} catch (NoSuchMethodException | SecurityException | IllegalAccessException
| IllegalArgumentException | InvocationTargetException | ClassCastException e) {
sc.clearJobGroup();
throw new InterpreterException(e);
}
String msg = null;
List<Attribute> columns = null;
// get field names
Method queryExecution;
QueryExecution qe;
try {
queryExecution = df.getClass().getMethod("queryExecution");
qe = (QueryExecution) queryExecution.invoke(df);
// Use reflection because of classname returned by queryExecution changes from
// Spark <1.5.2 org.apache.spark.sql.SQLContext$QueryExecution
// Spark 1.6.0> org.apache.spark.sql.hive.HiveContext$QueryExecution
Object qe = df.getClass().getMethod("queryExecution").invoke(df);
Object a = qe.getClass().getMethod("analyzed").invoke(qe);
scala.collection.Seq seq = (scala.collection.Seq) a.getClass().getMethod("output").invoke(a);
columns = (List<Attribute>) scala.collection.JavaConverters.seqAsJavaListConverter(seq)
.asJava();
} catch (NoSuchMethodException | SecurityException | IllegalAccessException
| IllegalArgumentException | InvocationTargetException e) {
throw new InterpreterException(e);
}
List<Attribute> columns =
scala.collection.JavaConverters.asJavaListConverter(
qe.analyzed().output()).asJava();
String msg = null;
for (Attribute col : columns) {
if (msg == null) {
msg = col.name();

View file

@ -111,7 +111,7 @@ class PySparkCompletion:
def getGlobalCompletion(self):
objectDefList = []
try:
for completionItem in list(globals().iterkeys()):
for completionItem in list(globals().keys()):
objectDefList.append(completionItem)
except:
return None
@ -119,18 +119,20 @@ class PySparkCompletion:
return objectDefList
def getMethodCompletion(self, text_value):
objectDefList = []
execResult = locals()
if text_value == None:
return None
completion_target = text_value
try:
if len(completion_target) <= 0:
return None
if text_value[-1] == ".":
completion_target = text_value[:-1]
exec("%s = %s(%s)" % ("objectDefList", "dir", completion_target))
exec("{} = dir({})".format("objectDefList", completion_target), globals(), execResult)
except:
return None
else:
return objectDefList
return list(execResult['objectDefList'])
def getCompletion(self, text_value):
@ -147,9 +149,9 @@ class PySparkCompletion:
for completionItem in list(objectCompletionList):
completionList.add(completionItem)
if len(completionList) <= 0:
print ""
print("")
else:
print json.dumps(filter(lambda x : not re.match("^__.*", x), list(completionList)))
print(json.dumps(list(filter(lambda x : not re.match("^__.*", x), list(completionList)))))
output = Logger()

View file

@ -27,3 +27,8 @@ body {
.nv-controlsWrap {
display: block;
}
.paragraph-col .focused {
box-shadow: 0px 2px 7px rgba(0, 0, 0, 0.3);
border-color: white;
}