Merge branch 'master' into ZEPPELIN-3825

This commit is contained in:
sanjaykumar 2018-11-05 11:55:55 -05:00
commit 15c8aa0fba
9 changed files with 40 additions and 78 deletions

View file

@ -15,15 +15,15 @@ Core feature:
* Built-in Apache Spark support
To know more about Zeppelin, visit our web site [http://zeppelin.apache.org](https://zeppelin.apache.org)
To know more about Zeppelin, visit our web site [https://zeppelin.apache.org](https://zeppelin.apache.org)
## Getting Started
### Install binary package
Please go to [install](https://zeppelin.apache.org/docs/latest/install/install.html) to install Apache Zeppelin from binary package.
Please go to [install](https://zeppelin.apache.org/docs/latest/quickstart/install.html) to install Apache Zeppelin from binary package.
### Build from source
Please check [Build from source](https://zeppelin.apache.org/docs/latest/install/build.html) to build Zeppelin from source.
Please check [Build from source](https://zeppelin.apache.org/docs/latest/setup/basics/how_to_build.html) to build Zeppelin from source.

View file

@ -106,8 +106,8 @@ function make_binary_release() {
git_clone
make_source_package
make_binary_release all "-Pspark-2.1 -Phadoop-2.6 -Pscala-${SCALA_VERSION}"
make_binary_release netinst "-Pspark-2.1 -Phadoop-2.6 -Pscala-${SCALA_VERSION} -pl zeppelin-interpreter,zeppelin-zengine,:zeppelin-display_${SCALA_VERSION},:zeppelin-spark-dependencies_${SCALA_VERSION},:zeppelin-spark_${SCALA_VERSION},zeppelin-web,zeppelin-server,zeppelin-distribution -am"
make_binary_release all "-Pspark-2.3 -Phadoop-2.6 -Pscala-${SCALA_VERSION}"
make_binary_release netinst "-Pspark-2.3 -Phadoop-2.6 -Pscala-${SCALA_VERSION} -pl zeppelin-interpreter,zeppelin-zengine,:zeppelin-display_${SCALA_VERSION},:zeppelin-spark-dependencies_${SCALA_VERSION},:zeppelin-spark_${SCALA_VERSION},zeppelin-web,zeppelin-server,zeppelin-distribution -am"
# remove non release files and dirs
rm -rf "${WORKING_DIR}/zeppelin"

View file

@ -70,7 +70,7 @@ If you're unsure about the options, use the same commands that creates official
# update all pom.xml to use scala 2.11
./dev/change_scala_version.sh 2.11
# build zeppelin with all interpreters and include latest version of Apache spark support for local mode.
mvn clean package -DskipTests -Pspark-2.0 -Phadoop-2.4 -Pr -Pscala-2.11
mvn clean package -DskipTests -Pspark-2.3 -Phadoop-2.6 -Pscala-2.11
```
#### 3. Done
@ -98,35 +98,25 @@ Set spark major version
Available profiles are
```
-Pspark-2.3
-Pspark-2.2
-Pspark-2.1
-Pspark-2.0
-Pspark-1.6
-Pspark-1.5
-Pspark-1.4
-Pcassandra-spark-1.5
-Pcassandra-spark-1.4
-Pcassandra-spark-1.3
-Pcassandra-spark-1.2
-Pcassandra-spark-1.1
```
minor version can be adjusted by `-Dspark.version=x.x.x`
##### `-Phadoop-[version]`
##### `-Phadoop[version]`
set hadoop major version
set hadoop major version (default hadoop2)
Available profiles are
```
-Phadoop-0.23
-Phadoop-1
-Phadoop-2.2
-Phadoop-2.3
-Phadoop-2.4
-Phadoop-2.6
-Phadoop-2.7
-Phadoop2
-Phadoop3
```
minor version can be adjusted by `-Dhadoop.version=x.x.x`
@ -144,27 +134,13 @@ Available profiles are
##### `-Pr` (optional)
enable [R](https://www.r-project.org/) support with [SparkR](https://spark.apache.org/docs/latest/sparkr.html) integration.
Note that, this enables R interpreter which is different from sparkR included in Spark interpreter by default.
##### `-Pvendor-repo` (optional)
enable 3rd party vendor repository (cloudera)
enable 3rd party vendor repository (Cloudera, Hortonworks)
##### `-Pmapr[version]` (optional)
For the MapR Hadoop Distribution, these profiles will handle the Hadoop version. As MapR allows different versions of Spark to be installed, you should specify which version of Spark is installed on the cluster by adding a Spark profile (`-Pspark-1.6`, `-Pspark-2.0`, etc.) as needed.
The correct Maven artifacts can be found for every version of MapR at http://doc.mapr.com
Available profiles are
```
-Pmapr3
-Pmapr40
-Pmapr41
-Pmapr50
-Pmapr51
```
#### -Pexamples (optional)
Bulid examples under zeppelin-examples directory
@ -176,23 +152,17 @@ Here are some examples with several options:
```bash
# build with spark-2.1, scala-2.11
./dev/change_scala_version.sh 2.11
mvn clean package -Pspark-2.1 -Phadoop-2.4 -Pscala-2.11 -DskipTests
mvn clean package -Pspark-2.1 -Pscala-2.11 -DskipTests
# build with spark-2.0, scala-2.11
./dev/change_scala_version.sh 2.11
mvn clean package -Pspark-2.0 -Phadoop-2.4 -Pscala-2.11 -DskipTests
mvn clean package -Pspark-2.0 -Pscala-2.11 -DskipTests
# build with spark-1.6, scala-2.10
mvn clean package -Pspark-1.6 -Phadoop-2.4 -DskipTests
# spark-cassandra integration
mvn clean package -Pcassandra-spark-1.5 -Dhadoop.version=2.6.0 -Phadoop-2.6 -DskipTests -DskipTests
mvn clean package -Pspark-1.6 -DskipTests
# with CDH
mvn clean package -Pspark-1.5 -Dhadoop.version=2.6.0-cdh5.5.0 -Phadoop-2.6 -Pvendor-repo -DskipTests
# with MapR
mvn clean package -Pspark-1.5 -Pmapr50 -DskipTests
mvn clean package -Pspark-1.6 -Dhadoop.version=2.6.0-cdh5.5.0 -Pvendor-repo -DskipTests
```
Ignite Interpreter
@ -350,10 +320,10 @@ mvn clean package -Pbuild-distr
To build a distribution with specific profiles, run:
```sh
mvn clean package -Pbuild-distr -Pspark-1.5 -Phadoop-2.4
mvn clean package -Pbuild-distr -Pspark-2.3 -Phadoop-2.4
```
The profiles `-Pspark-1.5 -Phadoop-2.4` can be adjusted if you wish to build to a specific spark versions.
The profiles `-Pspark-2.3 -Phadoop-2.4` can be adjusted if you wish to build to a specific spark versions.
The archive is generated under _`zeppelin-distribution/target`_ directory

View file

@ -213,10 +213,18 @@ class FlinkScalaInterpreter(val properties: Properties) {
}
if (cluster != null) {
cluster match {
case Some(Left(Left(legacyMiniCluster))) => legacyMiniCluster.close()
case Some(Left(Right(newMiniCluster))) => newMiniCluster.close()
case Some(Right(yarnCluster)) => yarnCluster.shutdown()
case _ =>
case Some(Left(Left(legacyMiniCluster))) =>
LOGGER.info("Shutdown LegacyMiniCluster")
legacyMiniCluster.close()
case Some(Left(Right(newMiniCluster))) =>
LOGGER.info("Shutdown NewMiniCluster")
newMiniCluster.close()
case Some(Right(yarnCluster)) =>
LOGGER.info("Shutdown YarnCluster")
yarnCluster.shutDownCluster()
yarnCluster.shutdown()
case e =>
LOGGER.error("Unrecognized cluster type: " + e.getClass.getSimpleName)
}
}
}

View file

@ -751,7 +751,7 @@
"progressUpdateIntervalMs": 500
}
],
"name": "Matplotlib (Python | PySpark)",
"name": "Matplotlib (Python, PySpark)",
"id": "2C2AUG798",
"angularObjects": {
"2C6WUGPNH:shared_process": [],

View file

@ -142,6 +142,8 @@ public class RemoteInterpreterServer extends Thread
String interpreterGroupId,
boolean isTest)
throws TTransportException, IOException {
logger.info("Starting remote interpreter server on port {}, intpEventServerAddress: {}:{}", port,
intpEventServerHost, intpEventServerPort);
if (null != intpEventServerHost) {
this.intpEventServerHost = intpEventServerHost;
if (!isTest) {
@ -171,7 +173,6 @@ public class RemoteInterpreterServer extends Thread
}
server = new TThreadPoolServer(
new TThreadPoolServer.Args(serverTransport).processor(processor));
logger.info("Starting remote interpreter server on port {}", port);
remoteWorksResponsePool = Collections.synchronizedMap(new HashMap<String, Object>());
}

View file

@ -138,7 +138,7 @@ public class RemoteInterpreterUtils {
public static String getInterpreterSettingId(String intpGrpId) {
String settingId = null;
if (intpGrpId != null) {
int indexOfColon = intpGrpId.indexOf(":");
int indexOfColon = intpGrpId.indexOf("-");
settingId = intpGrpId.substring(0, indexOfColon);
}
return settingId;

View file

@ -87,24 +87,6 @@ public class RemoteInterpreterEventServer implements RemoteInterpreterEventServi
this.appListener = interpreterSettingManager.getAppEventListener();
}
public void run() {
TServerSocket tSocket = null;
try {
tSocket = RemoteInterpreterUtils.createTServerSocket(portRange);
port = tSocket.getServerSocket().getLocalPort();
host = RemoteInterpreterUtils.findAvailableHostAddress();
} catch (IOException e1) {
throw new RuntimeException(e1);
}
LOGGER.info("InterpreterEventServer will start. Port: {}", port);
RemoteInterpreterEventService.Processor processor =
new RemoteInterpreterEventService.Processor(this);
this.thriftServer = new TThreadPoolServer(
new TThreadPoolServer.Args(tSocket).processor(processor));
this.thriftServer.serve();
}
public void start() throws IOException {
Thread startingThread = new Thread() {
@Override
@ -118,7 +100,7 @@ public class RemoteInterpreterEventServer implements RemoteInterpreterEventServi
throw new RuntimeException(e1);
}
LOGGER.info("InterpreterEventServer will start. Port: {}", port);
LOGGER.info("InterpreterEventServer is starting at {}:{}", host, port);
RemoteInterpreterEventService.Processor processor =
new RemoteInterpreterEventService.Processor(RemoteInterpreterEventServer.this);
thriftServer = new TThreadPoolServer(
@ -142,7 +124,7 @@ public class RemoteInterpreterEventServer implements RemoteInterpreterEventServi
if (thriftServer != null && !thriftServer.isServing()) {
throw new IOException("Fail to start InterpreterEventServer in 30 seconds.");
}
LOGGER.info("InterpreterEventServer is started");
LOGGER.info("RemoteInterpreterEventServer is started");
runner = new AppendOutputRunner(listener);
appendFuture = appendService.scheduleWithFixedDelay(
@ -156,6 +138,7 @@ public class RemoteInterpreterEventServer implements RemoteInterpreterEventServi
if (appendFuture != null) {
appendFuture.cancel(true);
}
LOGGER.info("RemoteInterpreterEventServer is stopped");
}
@ -252,6 +235,7 @@ public class RemoteInterpreterEventServer implements RemoteInterpreterEventServi
@Override
public void addAngularObject(String intpGroupId, String json) throws TException {
LOGGER.debug("Add AngularObject, interpreterGroupId: " + intpGroupId + ", json: " + json);
AngularObject angularObject = AngularObject.fromJson(json);
InterpreterGroup interpreterGroup =
interpreterSettingManager.getInterpreterGroupById(intpGroupId);

View file

@ -147,7 +147,6 @@ public class RemoteScheduler extends AbstractScheduler {
public synchronized Status getStatus() {
if (!remoteInterpreter.isOpened()) {
LOGGER.info("status:" + getLastStatus());
return getLastStatus();
}
Status status = Status.valueOf(remoteInterpreter.getStatus(job.getId()));