Merge branch 'master' of https://github.com/apache/incubator-zeppelin

2026-05-24 09:38:26 +00:00 · 2016-01-08 09:23:37 +01:00 · 2016-01-08 09:23:37 +01:00 · f0ad06d565
commit f0ad06d565
parent a0e0d54ec7 c54b8821fc
10 changed files with 233 additions and 157 deletions
--- a/.travis.yml
+++ b/.travis.yml
@ -22,17 +22,24 @@ before_install:
  - "sh -e /etc/init.d/xvfb start"

 install:
-  - mvn package -DskipTests -Pspark-1.5 -Phadoop-2.3 -Ppyspark -Pscalding -B
+  - mvn package -DskipTests -Pspark-1.6 -Phadoop-2.3 -Ppyspark -Pscalding -B

 before_script:
  -

 script:
+ # spark 1.6
+  - mvn package -Pbuild-distr -Pspark-1.6 -Phadoop-2.3 -Ppyspark -Pscalding -B
+  - ./testing/startSparkCluster.sh 1.6.0 2.3
+  - echo "export SPARK_HOME=`pwd`/spark-1.6.0-bin-hadoop2.3" > conf/zeppelin-env.sh
+  - mvn verify -Pusing-packaged-distr -Pspark-1.6 -Phadoop-2.3 -Ppyspark -Pscalding -B
+  - ./testing/stopSparkCluster.sh 1.6.0 2.3
 # spark 1.5
-  - mvn package -Pbuild-distr -Pspark-1.5 -Phadoop-2.3 -Ppyspark -Pscalding -B
+  - rm -rf `pwd`/interpreter/spark
+  - mvn package -DskipTests -Pspark-1.5 -Phadoop-2.3 -Ppyspark -B -pl 'zeppelin-interpreter,spark-dependencies,spark'
  - ./testing/startSparkCluster.sh 1.5.2 2.3
  - echo "export SPARK_HOME=`pwd`/spark-1.5.2-bin-hadoop2.3" > conf/zeppelin-env.sh
-  - mvn verify -Pusing-packaged-distr -Pspark-1.5 -Phadoop-2.3 -Ppyspark -Pscalding -B
+  - mvn package -Pspark-1.5 -Phadoop-2.3 -B -pl 'zeppelin-interpreter,zeppelin-zengine,zeppelin-server' -Dtest=org.apache.zeppelin.rest.*Test -DfailIfNoTests=false
  - ./testing/stopSparkCluster.sh 1.5.2 2.3
 # spark 1.4
  - rm -rf `pwd`/interpreter/spark
--- a/README.md
+++ b/README.md
@ -67,6 +67,7 @@ Set spark major version
 Available profiles are

 ```
+-Pspark-1.6
 -Pspark-1.5
 -Pspark-1.4
 -Pspark-1.3
@ -134,13 +135,13 @@ Here're some examples:

 ```
 # basic build
-mvn clean package -Pspark-1.5 -Phadoop-2.4 -Pyarn -Ppyspark
+mvn clean package -Pspark-1.6 -Phadoop-2.4 -Pyarn -Ppyspark

 # spark-cassandra integration
 mvn clean package -Pcassandra-spark-1.5 -Dhadoop.version=2.6.0 -Phadoop-2.6 -DskipTests

 # with CDH
-mvn clean package -Pspark-1.2 -Dhadoop.version=2.5.0-cdh5.3.0 -Phadoop-2.4 -Pvendor-repo -DskipTests
+mvn clean package -Pspark-1.5 -Dhadoop.version=2.6.0-cdh5.5.0 -Phadoop-2.6 -Pvendor-repo -DskipTests

 # with MapR
 mvn clean package -Pspark-1.5 -Pmapr50 -DskipTests
--- a/bin/interpreter.sh
+++ b/bin/interpreter.sh
@ -81,8 +81,11 @@ if [[ "${INTERPRETER_ID}" == "spark" ]]; then
    # This will evantually passes SPARK_APP_JAR to classpath of SparkIMain
    ZEPPELIN_CLASSPATH=${SPARK_APP_JAR}

+    pattern="$SPARK_HOME/python/lib/py4j-*-src.zip"
+    py4j=($pattern)
+    # pick the first match py4j zip - there should only be one
    export PYTHONPATH="$SPARK_HOME/python/:$PYTHONPATH"
-    export PYTHONPATH="$SPARK_HOME/python/lib/py4j-0.8.2.1-src.zip:$PYTHONPATH"
+    export PYTHONPATH="${py4j[0]}:$PYTHONPATH"
  else
    # add Hadoop jars into classpath
    if [[ -n "${HADOOP_HOME}" ]]; then
@ -95,7 +98,11 @@ if [[ "${INTERPRETER_ID}" == "spark" ]]; then
    fi

    addJarInDir "${INTERPRETER_DIR}/dep"
-    PYSPARKPATH="${ZEPPELIN_HOME}/interpreter/spark/pyspark/pyspark.zip:${ZEPPELIN_HOME}/interpreter/spark/pyspark/py4j-0.8.2.1-src.zip"
+
+    pattern="${ZEPPELIN_HOME}/interpreter/spark/pyspark/py4j-*-src.zip"
+    py4j=($pattern)
+    # pick the first match py4j zip - there should only be one
+    PYSPARKPATH="${ZEPPELIN_HOME}/interpreter/spark/pyspark/pyspark.zip:${py4j[0]}"

    if [[ -z "${PYTHONPATH}" ]]; then
      export PYTHONPATH="${PYSPARKPATH}"
--- a/spark-dependencies/pom.xml
+++ b/spark-dependencies/pom.xml
@ -33,7 +33,7 @@
  <name>Zeppelin: Spark dependencies</name>
  <description>Zeppelin spark support</description>
  <url>http://zeppelin.incubator.apache.org</url>
-  
+

  <properties>
    <spark.version>1.4.1</spark.version>
@ -130,117 +130,117 @@
          </exclusion>
        </exclusions>
      </dependency>
-      <dependency>		
-        <groupId>org.apache.hadoop</groupId>		
-        <artifactId>hadoop-yarn-api</artifactId>		
-        <version>${yarn.version}</version>		
-        <exclusions>		
-          <exclusion>		
-            <groupId>asm</groupId>		
-            <artifactId>asm</artifactId>		
-          </exclusion>		
-          <exclusion>		
-            <groupId>org.ow2.asm</groupId>		
-            <artifactId>asm</artifactId>		
-          </exclusion>		
-          <exclusion>		
-            <groupId>org.jboss.netty</groupId>		
-            <artifactId>netty</artifactId>		
-          </exclusion>		
-          <exclusion>		
-            <groupId>commons-logging</groupId>		
-            <artifactId>commons-logging</artifactId>		
-          </exclusion>		
-        </exclusions>		
-      </dependency>		
-		
-      <dependency>		
-        <groupId>org.apache.hadoop</groupId>		
-        <artifactId>hadoop-yarn-common</artifactId>		
-        <version>${yarn.version}</version>		
-        <exclusions>		
-          <exclusion>		
-            <groupId>asm</groupId>		
-            <artifactId>asm</artifactId>		
-          </exclusion>		
-          <exclusion>		
-            <groupId>org.ow2.asm</groupId>		
-            <artifactId>asm</artifactId>		
-          </exclusion>		
-          <exclusion>		
-            <groupId>org.jboss.netty</groupId>		
-            <artifactId>netty</artifactId>		
-          </exclusion>		
-          <exclusion>		
-            <groupId>javax.servlet</groupId>		
-            <artifactId>servlet-api</artifactId>		
-          </exclusion>		
-          <exclusion>		
-            <groupId>commons-logging</groupId>		
-            <artifactId>commons-logging</artifactId>		
-          </exclusion>		
-        </exclusions>		
-      </dependency>		
-		
-      <dependency>		
-        <groupId>org.apache.hadoop</groupId>		
-        <artifactId>hadoop-yarn-server-web-proxy</artifactId>		
-        <version>${yarn.version}</version>		
-        <exclusions>		
-          <exclusion>		
-            <groupId>asm</groupId>		
-            <artifactId>asm</artifactId>		
-          </exclusion>		
-          <exclusion>		
-            <groupId>org.ow2.asm</groupId>		
-            <artifactId>asm</artifactId>		
-          </exclusion>		
-          <exclusion>		
-            <groupId>org.jboss.netty</groupId>		
-            <artifactId>netty</artifactId>		
-          </exclusion>		
-          <exclusion>		
-            <groupId>javax.servlet</groupId>		
-            <artifactId>servlet-api</artifactId>		
-          </exclusion>		
-          <exclusion>		
-            <groupId>commons-logging</groupId>		
-            <artifactId>commons-logging</artifactId>		
-          </exclusion>		
-        </exclusions>		
-      </dependency>		
-		
-      <dependency>		
-        <groupId>org.apache.hadoop</groupId>		
-        <artifactId>hadoop-yarn-client</artifactId>		
-        <version>${yarn.version}</version>		
-        <exclusions>		
-          <exclusion>		
-            <groupId>asm</groupId>		
-            <artifactId>asm</artifactId>		
-          </exclusion>		
-          <exclusion>		
-            <groupId>org.ow2.asm</groupId>		
-            <artifactId>asm</artifactId>		
-          </exclusion>		
-          <exclusion>		
-            <groupId>org.jboss.netty</groupId>		
-            <artifactId>netty</artifactId>		
-          </exclusion>		
-          <exclusion>		
-            <groupId>javax.servlet</groupId>		
-            <artifactId>servlet-api</artifactId>		
-          </exclusion>		
-          <exclusion>		
-            <groupId>commons-logging</groupId>		
-            <artifactId>commons-logging</artifactId>		
-          </exclusion>		
-        </exclusions>		
+      <dependency>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-yarn-api</artifactId>
+        <version>${yarn.version}</version>
+        <exclusions>
+          <exclusion>
+            <groupId>asm</groupId>
+            <artifactId>asm</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.ow2.asm</groupId>
+            <artifactId>asm</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.jboss.netty</groupId>
+            <artifactId>netty</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>commons-logging</groupId>
+            <artifactId>commons-logging</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+
+      <dependency>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-yarn-common</artifactId>
+        <version>${yarn.version}</version>
+        <exclusions>
+          <exclusion>
+            <groupId>asm</groupId>
+            <artifactId>asm</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.ow2.asm</groupId>
+            <artifactId>asm</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.jboss.netty</groupId>
+            <artifactId>netty</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>javax.servlet</groupId>
+            <artifactId>servlet-api</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>commons-logging</groupId>
+            <artifactId>commons-logging</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+
+      <dependency>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-yarn-server-web-proxy</artifactId>
+        <version>${yarn.version}</version>
+        <exclusions>
+          <exclusion>
+            <groupId>asm</groupId>
+            <artifactId>asm</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.ow2.asm</groupId>
+            <artifactId>asm</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.jboss.netty</groupId>
+            <artifactId>netty</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>javax.servlet</groupId>
+            <artifactId>servlet-api</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>commons-logging</groupId>
+            <artifactId>commons-logging</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+
+      <dependency>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-yarn-client</artifactId>
+        <version>${yarn.version}</version>
+        <exclusions>
+          <exclusion>
+            <groupId>asm</groupId>
+            <artifactId>asm</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.ow2.asm</groupId>
+            <artifactId>asm</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.jboss.netty</groupId>
+            <artifactId>netty</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>javax.servlet</groupId>
+            <artifactId>servlet-api</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>commons-logging</groupId>
+            <artifactId>commons-logging</artifactId>
+          </exclusion>
+        </exclusions>
      </dependency>
    </dependencies>
  </dependencyManagement>
-  
-  <dependencies>  
+
+  <dependencies>
    <!-- Spark -->
    <dependency>
      <groupId>org.apache.spark</groupId>
@ -489,7 +489,7 @@
      <dependencies>
      </dependencies>
    </profile>
- 
+
    <profile>
      <id>cassandra-spark-1.5</id>
      <properties>
@ -513,7 +513,21 @@
        </dependency>
      </dependencies>
    </profile>
-   
+
+    <profile>
+      <id>spark-1.6</id>
+      <properties>
+        <spark.version>1.6.0</spark.version>
+        <py4j.version>0.9</py4j.version>
+        <akka.group>com.typesafe.akka</akka.group>
+        <akka.version>2.3.11</akka.version>
+        <protobuf.version>2.5.0</protobuf.version>
+      </properties>
+
+      <dependencies>
+      </dependencies>
+    </profile>
+
    <profile>
      <id>hadoop-0.23</id>
      <!-- SPARK-1121: Adds an explicit dependency on Avro to work around a
@ -731,10 +745,6 @@

    <profile>
      <id>pyspark</id>
-      <properties>
-        <spark.download.url>http://archive.apache.org/dist/spark/spark-${spark.version}/spark-${spark.version}.tgz
-        </spark.download.url>
-      </properties>
      <build>
        <plugins>
          <plugin>
--- a/spark/pom.xml
+++ b/spark/pom.xml
@ -33,16 +33,70 @@
  <name>Zeppelin: Spark</name>
  <description>Zeppelin spark support</description>
  <url>http://zeppelin.incubator.apache.org</url>
-  
+
  <properties>
    <spark.version>1.4.1</spark.version>
    <scala.version>2.10.4</scala.version>
    <scala.binary.version>2.10</scala.binary.version>
-
    <hadoop.version>2.3.0</hadoop.version>
    <py4j.version>0.8.2.1</py4j.version>
  </properties>

+  <profiles>
+    <profile>
+      <id>vendor-repo</id>
+      <repositories>
+        <repository>
+          <id>cloudera</id>
+          <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
+        </repository>
+      </repositories>
+    </profile>
+
+    <profile>
+      <id>spark-1.1</id>
+      <properties>
+        <spark.version>1.1.1</spark.version>
+      </properties>
+    </profile>
+
+    <profile>
+      <id>spark-1.2</id>
+      <properties>
+        <spark.version>1.2.1</spark.version>
+      </properties>
+    </profile>
+
+    <profile>
+      <id>spark-1.3</id>
+      <properties>
+        <spark.version>1.3.1</spark.version>
+      </properties>
+    </profile>
+
+   <profile>
+      <id>spark-1.4</id>
+      <properties>
+        <spark.version>1.4.1</spark.version>
+      </properties>
+    </profile>
+
+    <profile>
+      <id>spark-1.5</id>
+      <properties>
+        <spark.version>1.5.2</spark.version>
+      </properties>
+    </profile>
+
+    <profile>
+      <id>spark-1.6</id>
+      <properties>
+        <spark.version>1.6.0</spark.version>
+        <py4j.version>0.9</py4j.version>
+      </properties>
+    </profile>
+  </profiles>
+
  <dependencies>
    <dependency>
      <groupId>org.slf4j</groupId>
@ -72,7 +126,7 @@
      <artifactId>guava</artifactId>
      <version>14.0.1</version>
    </dependency>
-    
+
    <!-- Aether :: maven dependency resolution -->
    <dependency>
      <groupId>org.apache.maven</groupId>
@ -294,18 +348,6 @@
    </dependency>
  </dependencies>

-  <profiles>
-    <profile>
-      <id>vendor-repo</id>
-      <repositories>
-        <repository>
-          <id>cloudera</id>
-          <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
-        </repository>
-      </repositories>
-    </profile>
-  </profiles>
-
  <build>
    <plugins>
      <plugin>
@ -397,7 +439,7 @@
              <overWriteReleases>false</overWriteReleases>
              <overWriteSnapshots>false</overWriteSnapshots>
              <overWriteIfNewer>true</overWriteIfNewer>
-              <includeScope>runtime</includeScope>              
+              <includeScope>runtime</includeScope>
              <artifactItems>
                <artifactItem>
                  <groupId>${project.groupId}</groupId>
--- a/spark/src/main/java/org/apache/zeppelin/spark/SparkInterpreter.java
+++ b/spark/src/main/java/org/apache/zeppelin/spark/SparkInterpreter.java
@ -317,7 +317,8 @@ public class SparkInterpreter extends Interpreter {
          "python" + File.separator + "lib");
    }

-    String[] pythonLibs = new String[]{"pyspark.zip", "py4j-0.8.2.1-src.zip"};
+    //Only one of py4j-0.9-src.zip and py4j-0.8.2.1-src.zip should exist
+    String[] pythonLibs = new String[]{"pyspark.zip", "py4j-0.9-src.zip", "py4j-0.8.2.1-src.zip"};
    ArrayList<String> pythonLibUris = new ArrayList<>();
    for (String lib : pythonLibs) {
      File libFile = new File(pysparkPath, lib);
--- a/spark/src/main/java/org/apache/zeppelin/spark/SparkVersion.java
+++ b/spark/src/main/java/org/apache/zeppelin/spark/SparkVersion.java
@ -32,9 +32,10 @@ public class SparkVersion {
  public static final SparkVersion SPARK_1_4_0 = SparkVersion.fromVersionString("1.4.0");
  public static final SparkVersion SPARK_1_5_0 = SparkVersion.fromVersionString("1.5.0");
  public static final SparkVersion SPARK_1_6_0 = SparkVersion.fromVersionString("1.6.0");
+  public static final SparkVersion SPARK_1_7_0 = SparkVersion.fromVersionString("1.7.0");

  public static final SparkVersion MIN_SUPPORTED_VERSION =  SPARK_1_0_0;
-  public static final SparkVersion UNSUPPORTED_FUTURE_VERSION = SPARK_1_6_0;
+  public static final SparkVersion UNSUPPORTED_FUTURE_VERSION = SPARK_1_7_0;

  private int version;
  private String versionString;
--- a/spark/src/main/java/org/apache/zeppelin/spark/ZeppelinContext.java
+++ b/spark/src/main/java/org/apache/zeppelin/spark/ZeppelinContext.java
@ -295,30 +295,30 @@ public class ZeppelinContext extends HashMap<String, Object> {
    try {
      take = df.getClass().getMethod("take", int.class);
      rows = (Object[]) take.invoke(df, maxResult + 1);
-
    } catch (NoSuchMethodException | SecurityException | IllegalAccessException
        | IllegalArgumentException | InvocationTargetException | ClassCastException e) {
      sc.clearJobGroup();
      throw new InterpreterException(e);
    }

-    String msg = null;
-
+    List<Attribute> columns = null;
    // get field names
-    Method queryExecution;
-    QueryExecution qe;
    try {
-      queryExecution = df.getClass().getMethod("queryExecution");
-      qe = (QueryExecution) queryExecution.invoke(df);
+      // Use reflection because of classname returned by queryExecution changes from
+      // Spark <1.5.2 org.apache.spark.sql.SQLContext$QueryExecution
+      // Spark 1.6.0> org.apache.spark.sql.hive.HiveContext$QueryExecution
+      Object qe = df.getClass().getMethod("queryExecution").invoke(df);
+      Object a = qe.getClass().getMethod("analyzed").invoke(qe);
+      scala.collection.Seq seq = (scala.collection.Seq) a.getClass().getMethod("output").invoke(a);
+
+      columns = (List<Attribute>) scala.collection.JavaConverters.seqAsJavaListConverter(seq)
+                                                                 .asJava();
    } catch (NoSuchMethodException | SecurityException | IllegalAccessException
        | IllegalArgumentException | InvocationTargetException e) {
      throw new InterpreterException(e);
    }

-    List<Attribute> columns =
-        scala.collection.JavaConverters.asJavaListConverter(
-            qe.analyzed().output()).asJava();
-
+    String msg = null;
    for (Attribute col : columns) {
      if (msg == null) {
        msg = col.name();
--- a/spark/src/main/resources/python/zeppelin_pyspark.py
+++ b/spark/src/main/resources/python/zeppelin_pyspark.py
@ -111,7 +111,7 @@ class PySparkCompletion:
  def getGlobalCompletion(self):
    objectDefList = []
    try:
-      for completionItem in list(globals().iterkeys()):
+      for completionItem in list(globals().keys()):
        objectDefList.append(completionItem)
    except:
      return None
@ -119,18 +119,20 @@ class PySparkCompletion:
      return objectDefList

  def getMethodCompletion(self, text_value):
-    objectDefList = []
+    execResult = locals()
+    if text_value == None:
+      return None
    completion_target = text_value
    try:
      if len(completion_target) <= 0:
        return None
      if text_value[-1] == ".":
        completion_target = text_value[:-1]
-      exec("%s = %s(%s)" % ("objectDefList", "dir", completion_target))
+      exec("{} = dir({})".format("objectDefList", completion_target), globals(), execResult)
    except:
      return None
    else:
-      return objectDefList
+      return list(execResult['objectDefList'])


  def getCompletion(self, text_value):
@ -147,9 +149,9 @@ class PySparkCompletion:
        for completionItem in list(objectCompletionList):
          completionList.add(completionItem)
    if len(completionList) <= 0:
-      print ""
+      print("")
    else:
-      print json.dumps(filter(lambda x : not re.match("^__.*", x), list(completionList)))
+      print(json.dumps(list(filter(lambda x : not re.match("^__.*", x), list(completionList)))))


 output = Logger()
--- a/zeppelin-web/src/assets/styles/looknfeel/default.css
+++ b/zeppelin-web/src/assets/styles/looknfeel/default.css
@ -27,3 +27,8 @@ body {
 .nv-controlsWrap {
  display: block;
 }
+
+.paragraph-col .focused {
+  box-shadow: 0px 2px 7px rgba(0, 0, 0, 0.3);
+  border-color: white;
+}