Merge branch 'master' into ZEPPELIN-3825

2026-05-24 09:38:26 +00:00 · 2018-11-05 11:55:55 -05:00 · 2018-11-05 11:55:55 -05:00 · 15c8aa0fba
commit 15c8aa0fba
parent 84b8da4d5b d3028c21df
9 changed files with 40 additions and 78 deletions
--- a/README.md
+++ b/README.md
@ -15,15 +15,15 @@ Core feature:
   * Built-in Apache Spark support


-To know more about Zeppelin, visit our web site [http://zeppelin.apache.org](https://zeppelin.apache.org)
+To know more about Zeppelin, visit our web site [https://zeppelin.apache.org](https://zeppelin.apache.org)


 ## Getting Started

 ### Install binary package
-Please go to [install](https://zeppelin.apache.org/docs/latest/install/install.html) to install Apache Zeppelin from binary package.
+Please go to [install](https://zeppelin.apache.org/docs/latest/quickstart/install.html) to install Apache Zeppelin from binary package.

 ### Build from source
-Please check [Build from source](https://zeppelin.apache.org/docs/latest/install/build.html) to build Zeppelin from source.
+Please check [Build from source](https://zeppelin.apache.org/docs/latest/setup/basics/how_to_build.html) to build Zeppelin from source.


--- a/dev/create_release.sh
+++ b/dev/create_release.sh
@ -106,8 +106,8 @@ function make_binary_release() {

 git_clone
 make_source_package
-make_binary_release all "-Pspark-2.1 -Phadoop-2.6 -Pscala-${SCALA_VERSION}"
-make_binary_release netinst "-Pspark-2.1 -Phadoop-2.6 -Pscala-${SCALA_VERSION} -pl zeppelin-interpreter,zeppelin-zengine,:zeppelin-display_${SCALA_VERSION},:zeppelin-spark-dependencies_${SCALA_VERSION},:zeppelin-spark_${SCALA_VERSION},zeppelin-web,zeppelin-server,zeppelin-distribution -am"
+make_binary_release all "-Pspark-2.3 -Phadoop-2.6 -Pscala-${SCALA_VERSION}"
+make_binary_release netinst "-Pspark-2.3 -Phadoop-2.6 -Pscala-${SCALA_VERSION} -pl zeppelin-interpreter,zeppelin-zengine,:zeppelin-display_${SCALA_VERSION},:zeppelin-spark-dependencies_${SCALA_VERSION},:zeppelin-spark_${SCALA_VERSION},zeppelin-web,zeppelin-server,zeppelin-distribution -am"

 # remove non release files and dirs
 rm -rf "${WORKING_DIR}/zeppelin"
--- a/docs/setup/basics/how_to_build.md
+++ b/docs/setup/basics/how_to_build.md
@ -70,7 +70,7 @@ If you're unsure about the options, use the same commands that creates official
 # update all pom.xml to use scala 2.11
 ./dev/change_scala_version.sh 2.11
 # build zeppelin with all interpreters and include latest version of Apache spark support for local mode.
-mvn clean package -DskipTests -Pspark-2.0 -Phadoop-2.4 -Pr -Pscala-2.11
+mvn clean package -DskipTests -Pspark-2.3 -Phadoop-2.6 -Pscala-2.11
 ```

 #### 3. Done
@ -98,35 +98,25 @@ Set spark major version
 Available profiles are

 ```
+-Pspark-2.3
+-Pspark-2.2
 -Pspark-2.1
 -Pspark-2.0
 -Pspark-1.6
-Pspark-1.5
-Pspark-1.4
-Pcassandra-spark-1.5
-Pcassandra-spark-1.4
-Pcassandra-spark-1.3
-Pcassandra-spark-1.2
-Pcassandra-spark-1.1
 ```

 minor version can be adjusted by `-Dspark.version=x.x.x`


-##### `-Phadoop-[version]`
+##### `-Phadoop[version]`

-set hadoop major version
+set hadoop major version (default hadoop2)

 Available profiles are

 ```
-Phadoop-0.23
-Phadoop-1
-Phadoop-2.2
-Phadoop-2.3
-Phadoop-2.4
-Phadoop-2.6
-Phadoop-2.7
+-Phadoop2
+-Phadoop3
 ```

 minor version can be adjusted by `-Dhadoop.version=x.x.x`
@ -144,27 +134,13 @@ Available profiles are
 ##### `-Pr` (optional)

 enable [R](https://www.r-project.org/) support with [SparkR](https://spark.apache.org/docs/latest/sparkr.html) integration.
+Note that, this enables R interpreter which is different from sparkR included in Spark interpreter by default.

 ##### `-Pvendor-repo` (optional)

-enable 3rd party vendor repository (cloudera)
+enable 3rd party vendor repository (Cloudera, Hortonworks)


-##### `-Pmapr[version]` (optional)
-
-For the MapR Hadoop Distribution, these profiles will handle the Hadoop version. As MapR allows different versions of Spark to be installed, you should specify which version of Spark is installed on the cluster by adding a Spark profile (`-Pspark-1.6`, `-Pspark-2.0`, etc.) as needed.
-The correct Maven artifacts can be found for every version of MapR at http://doc.mapr.com
-
-Available profiles are
-
-```
-Pmapr3
-Pmapr40
-Pmapr41
-Pmapr50
-Pmapr51
-```
-
 #### -Pexamples (optional)

 Bulid examples under zeppelin-examples directory
@ -176,23 +152,17 @@ Here are some examples with several options:
 ```bash
 # build with spark-2.1, scala-2.11
 ./dev/change_scala_version.sh 2.11
-mvn clean package -Pspark-2.1 -Phadoop-2.4 -Pscala-2.11 -DskipTests
+mvn clean package -Pspark-2.1 -Pscala-2.11 -DskipTests

 # build with spark-2.0, scala-2.11
 ./dev/change_scala_version.sh 2.11
-mvn clean package -Pspark-2.0 -Phadoop-2.4 -Pscala-2.11 -DskipTests
+mvn clean package -Pspark-2.0 -Pscala-2.11 -DskipTests

 # build with spark-1.6, scala-2.10
-mvn clean package -Pspark-1.6 -Phadoop-2.4 -DskipTests
-
-# spark-cassandra integration
-mvn clean package -Pcassandra-spark-1.5 -Dhadoop.version=2.6.0 -Phadoop-2.6 -DskipTests -DskipTests
+mvn clean package -Pspark-1.6 -DskipTests

 # with CDH
-mvn clean package -Pspark-1.5 -Dhadoop.version=2.6.0-cdh5.5.0 -Phadoop-2.6 -Pvendor-repo -DskipTests
-
-# with MapR
-mvn clean package -Pspark-1.5 -Pmapr50 -DskipTests
+mvn clean package -Pspark-1.6 -Dhadoop.version=2.6.0-cdh5.5.0 -Pvendor-repo -DskipTests
 ```

 Ignite Interpreter
@ -350,10 +320,10 @@ mvn clean package -Pbuild-distr
 To build a distribution with specific profiles, run:

 ```sh
-mvn clean package -Pbuild-distr -Pspark-1.5 -Phadoop-2.4
+mvn clean package -Pbuild-distr -Pspark-2.3 -Phadoop-2.4
 ```

-The profiles `-Pspark-1.5 -Phadoop-2.4` can be adjusted if you wish to build to a specific spark versions.  
+The profiles `-Pspark-2.3 -Phadoop-2.4` can be adjusted if you wish to build to a specific spark versions.  

 The archive is generated under _`zeppelin-distribution/target`_ directory

--- a/flink/src/main/scala/org/apache/zeppelin/flink/FlinkScalaInterpreter.scala
+++ b/flink/src/main/scala/org/apache/zeppelin/flink/FlinkScalaInterpreter.scala
@ -213,10 +213,18 @@ class FlinkScalaInterpreter(val properties: Properties) {
    }
    if (cluster != null) {
      cluster match {
-        case Some(Left(Left(legacyMiniCluster))) => legacyMiniCluster.close()
-        case Some(Left(Right(newMiniCluster))) => newMiniCluster.close()
-        case Some(Right(yarnCluster)) => yarnCluster.shutdown()
-        case _ =>
+        case Some(Left(Left(legacyMiniCluster))) =>
+          LOGGER.info("Shutdown LegacyMiniCluster")
+          legacyMiniCluster.close()
+        case Some(Left(Right(newMiniCluster))) =>
+          LOGGER.info("Shutdown NewMiniCluster")
+          newMiniCluster.close()
+        case Some(Right(yarnCluster)) =>
+          LOGGER.info("Shutdown YarnCluster")
+          yarnCluster.shutDownCluster()
+          yarnCluster.shutdown()
+        case e =>
+          LOGGER.error("Unrecognized cluster type: " + e.getClass.getSimpleName)
      }
    }
  }
--- a/PySpark)_2C2AUG798.zpln
+++ b/PySpark)_2C2AUG798.zpln
@ -751,7 +751,7 @@
      "progressUpdateIntervalMs": 500
    }
  ],
-  "name": "Matplotlib (Python | PySpark)",
+  "name": "Matplotlib (Python, PySpark)",
  "id": "2C2AUG798",
  "angularObjects": {
    "2C6WUGPNH:shared_process": [],
--- a/zeppelin-interpreter/src/main/java/org/apache/zeppelin/interpreter/remote/RemoteInterpreterServer.java
+++ b/zeppelin-interpreter/src/main/java/org/apache/zeppelin/interpreter/remote/RemoteInterpreterServer.java
@ -142,6 +142,8 @@ public class RemoteInterpreterServer extends Thread
                                 String interpreterGroupId,
                                 boolean isTest)
      throws TTransportException, IOException {
+    logger.info("Starting remote interpreter server on port {}, intpEventServerAddress: {}:{}", port,
+            intpEventServerHost, intpEventServerPort);
    if (null != intpEventServerHost) {
      this.intpEventServerHost = intpEventServerHost;
      if (!isTest) {
@ -171,7 +173,6 @@ public class RemoteInterpreterServer extends Thread
    }
    server = new TThreadPoolServer(
        new TThreadPoolServer.Args(serverTransport).processor(processor));
-    logger.info("Starting remote interpreter server on port {}", port);
    remoteWorksResponsePool = Collections.synchronizedMap(new HashMap<String, Object>());
  }

--- a/zeppelin-interpreter/src/main/java/org/apache/zeppelin/interpreter/remote/RemoteInterpreterUtils.java
+++ b/zeppelin-interpreter/src/main/java/org/apache/zeppelin/interpreter/remote/RemoteInterpreterUtils.java
@ -138,7 +138,7 @@ public class RemoteInterpreterUtils {
  public static String getInterpreterSettingId(String intpGrpId) {
    String settingId = null;
    if (intpGrpId != null) {
-      int indexOfColon = intpGrpId.indexOf(":");
+      int indexOfColon = intpGrpId.indexOf("-");
      settingId = intpGrpId.substring(0, indexOfColon);
    }
    return settingId;
--- a/zeppelin-zengine/src/main/java/org/apache/zeppelin/interpreter/RemoteInterpreterEventServer.java
+++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/interpreter/RemoteInterpreterEventServer.java
@ -87,24 +87,6 @@ public class RemoteInterpreterEventServer implements RemoteInterpreterEventServi
    this.appListener = interpreterSettingManager.getAppEventListener();
  }

-  public void run() {
-    TServerSocket tSocket = null;
-    try {
-      tSocket = RemoteInterpreterUtils.createTServerSocket(portRange);
-      port = tSocket.getServerSocket().getLocalPort();
-      host = RemoteInterpreterUtils.findAvailableHostAddress();
-    } catch (IOException e1) {
-      throw new RuntimeException(e1);
-    }
-
-    LOGGER.info("InterpreterEventServer will start. Port: {}", port);
-    RemoteInterpreterEventService.Processor processor =
-        new RemoteInterpreterEventService.Processor(this);
-    this.thriftServer = new TThreadPoolServer(
-        new TThreadPoolServer.Args(tSocket).processor(processor));
-    this.thriftServer.serve();
-  }
-
  public void start() throws IOException {
    Thread startingThread = new Thread() {
      @Override
@ -118,7 +100,7 @@ public class RemoteInterpreterEventServer implements RemoteInterpreterEventServi
          throw new RuntimeException(e1);
        }

-        LOGGER.info("InterpreterEventServer will start. Port: {}", port);
+        LOGGER.info("InterpreterEventServer is starting at {}:{}", host, port);
        RemoteInterpreterEventService.Processor processor =
            new RemoteInterpreterEventService.Processor(RemoteInterpreterEventServer.this);
        thriftServer = new TThreadPoolServer(
@ -142,7 +124,7 @@ public class RemoteInterpreterEventServer implements RemoteInterpreterEventServi
    if (thriftServer != null && !thriftServer.isServing()) {
      throw new IOException("Fail to start InterpreterEventServer in 30 seconds.");
    }
-    LOGGER.info("InterpreterEventServer is started");
+    LOGGER.info("RemoteInterpreterEventServer is started");

    runner = new AppendOutputRunner(listener);
    appendFuture = appendService.scheduleWithFixedDelay(
@ -156,6 +138,7 @@ public class RemoteInterpreterEventServer implements RemoteInterpreterEventServi
    if (appendFuture != null) {
      appendFuture.cancel(true);
    }
+    LOGGER.info("RemoteInterpreterEventServer is stopped");
  }


@ -252,6 +235,7 @@ public class RemoteInterpreterEventServer implements RemoteInterpreterEventServi

  @Override
  public void addAngularObject(String intpGroupId, String json) throws TException {
+    LOGGER.debug("Add AngularObject, interpreterGroupId: " + intpGroupId + ", json: " + json);
    AngularObject angularObject = AngularObject.fromJson(json);
    InterpreterGroup interpreterGroup =
        interpreterSettingManager.getInterpreterGroupById(intpGroupId);
--- a/zeppelin-zengine/src/main/java/org/apache/zeppelin/scheduler/RemoteScheduler.java
+++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/scheduler/RemoteScheduler.java
@ -147,7 +147,6 @@ public class RemoteScheduler extends AbstractScheduler {

    public synchronized Status getStatus() {
      if (!remoteInterpreter.isOpened()) {
-        LOGGER.info("status:" + getLastStatus());
        return getLastStatus();
      }
      Status status = Status.valueOf(remoteInterpreter.getStatus(job.getId()));