ZEPPELIN-783: pyspark&spark cache under .spark-distr, but unpack to root

This commit is contained in:
Alexander Bezzubov 2016-04-01 19:15:10 +09:00
parent d4ef96d18f
commit 5d0eb2dcd9
7 changed files with 47 additions and 17 deletions

10
pom.xml
View file

@ -241,6 +241,7 @@
<groupId>org.apache.rat</groupId>
<artifactId>apache-rat-plugin</artifactId>
</plugin>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.1</version>
@ -249,6 +250,7 @@
<target>1.7</target>
</configuration>
</plugin>
<!-- Test coverage plugin -->
<plugin>
<groupId>org.codehaus.mojo</groupId>
@ -270,6 +272,7 @@
</execution>
</executions>
</plugin>
<!-- Checkstyle plugin -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
@ -641,6 +644,13 @@
</lifecycleMappingMetadata>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-antrun-plugin</artifactId>
<version>1.7</version>
</plugin>
</plugins>
</pluginManagement>
</build>

View file

@ -50,7 +50,11 @@
<akka.group>org.spark-project.akka</akka.group>
<akka.version>2.3.4-spark</akka.version>
<spark.download.url>http://archive.apache.org/dist/spark/spark-${spark.version}/spark-${spark.version}.tgz</spark.download.url>
<spark.archive>spark-${spark.version}</spark.archive>
<spark.download.url>
http://archive.apache.org/dist/spark/${spark.archive}/${spark.archive}.tgz
</spark.download.url>
<spark.dist.cache>${project.build.directory}/../../.spark-dist</spark.dist.cache>
<py4j.version>0.8.2.1</py4j.version>
</properties>
@ -787,12 +791,12 @@
</goals>
<configuration>
<url>${spark.download.url}</url>
<unpack>true</unpack>
<outputDirectory>${project.build.directory}/../../.spark-dist</outputDirectory>
<outputDirectory>${spark.dist.cache}</outputDirectory>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-clean-plugin</artifactId>
<configuration>
@ -801,18 +805,33 @@
<directory>${basedir}/../python/build</directory>
</fileset>
<fileset>
<directory>${project.build.directory}/../../.spark-dist</directory>
<directory>${project.build.directory}/spark-dist</directory>
</fileset>
</filesets>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-antrun-plugin</artifactId>
<version>1.7</version>
<executions>
<execution>
<id>download-and-zip-pyspark-files</id>
<id>unzip-pyspark-files</id>
<phase>validate</phase>
<goals>
<goal>run</goal>
</goals>
<configuration>
<target>
<untar src="${spark.dist.cache}/${spark.archive}.tgz"
dest="${project.build.directory}/spark-dist"
compression="gzip"/>
</target>
</configuration>
</execution>
<execution>
<id>zip-pyspark-files</id>
<phase>generate-resources</phase>
<goals>
<goal>run</goal>
@ -821,9 +840,9 @@
<target>
<delete dir="../interpreter/spark/pyspark"/>
<copy todir="../interpreter/spark/pyspark"
file="${project.build.directory}/../../.spark-dist/spark-${spark.version}/python/lib/py4j-${py4j.version}-src.zip"/>
file="${project.build.directory}/spark-dist/${spark.archive}/python/lib/py4j-${py4j.version}-src.zip"/>
<zip destfile="${project.build.directory}/../../interpreter/spark/pyspark/pyspark.zip"
basedir="${project.build.directory}/../../.spark-dist/spark-${spark.version}/python"
basedir="${project.build.directory}/spark-dist/${spark.archive}/python"
includes="pyspark/*.py,pyspark/**/*.py"/>
</target>
</configuration>

View file

@ -45,13 +45,14 @@ ZEPPELIN_HOME="$(cd "${FWDIR}/.."; pwd)"
SPARK_CACHE=".spark-dist"
SPARK_ARCHIVE="spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}"
export SPARK_HOME="${ZEPPELIN_HOME}/${SPARK_CACHE}/${SPARK_ARCHIVE}"
export SPARK_HOME="${ZEPPELIN_HOME}/${SPARK_ARCHIVE}"
echo "SPARK_HOME is ${SPARK_HOME}"
if [[ ! -d "${SPARK_HOME}" ]]; then
mkdir -p "${SPARK_CACHE}"
cd "${SPARK_CACHE}"
if [[ ! -f "${SPARK_ARCHIVE}.tgz" ]]; then
echo "Cache does not have ${SPARK_ARCHIVE} downloading ..."
# download archive if not cached
if [[ "${SPARK_VER_RANGE}" == "<=1.2" ]]; then
# spark 1.1.x and spark 1.2.x can be downloaded from archive
@ -74,7 +75,9 @@ if [[ ! -d "${SPARK_HOME}" ]]; then
fi
fi
# extract archive, clean-up on failure
# extract archive in un-cached root, clean-up on failure
cp "${SPARK_ARCHIVE}.tgz" ..
cd ..
if ! tar zxf "${SPARK_ARCHIVE}.tgz" ; then
echo "Unable to extract ${SPARK_ARCHIVE}.tgz" >&2
rm -rf "${SPARK_ARCHIVE}"

View file

@ -43,9 +43,8 @@ set -xe
FWDIR="$(dirname "${BASH_SOURCE-$0}")"
ZEPPELIN_HOME="$(cd "${FWDIR}/.."; pwd)"
SPARK_CACHE=".spark-dist"
SPARK_ARCHIVE="spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}"
export SPARK_HOME="${ZEPPELIN_HOME}/${SPARK_CACHE}/${SPARK_ARCHIVE}"
export SPARK_HOME="${ZEPPELIN_HOME}/${SPARK_ARCHIVE}"
echo "SPARK_HOME is ${SPARK_HOME}"
# create PID dir. test case detect pid file so they can select active spark home dir for test

View file

@ -30,13 +30,12 @@ set -xe
FWDIR="$(dirname "${BASH_SOURCE-$0}")"
ZEPPELIN_HOME="$(cd "${FWDIR}/.."; pwd)"
SPARK_CACHE=".spark-dist"
SPARK_ARCHIVE="spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}"
export SPARK_HOME="${ZEPPELIN_HOME}/${SPARK_CACHE}/${SPARK_ARCHIVE}"
export SPARK_HOME="${ZEPPELIN_HOME}/${SPARK_ARCHIVE}"
echo "SPARK_HOME is ${SPARK_HOME}"
# set create PID dir
export SPARK_PID_DIR=${SPARK_HOME}/run
export SPARK_PID_DIR="${SPARK_HOME}/run"
${SPARK_HOME}/sbin/spark-daemon.sh stop org.apache.spark.deploy.worker.Worker 1
${SPARK_HOME}/sbin/stop-master.sh

View file

@ -369,8 +369,8 @@
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-antrun-plugin</artifactId>
<version>1.6</version>
<executions>
<execution>
<id>start-zeppelin</id>

View file

@ -47,10 +47,10 @@
<webXml>dist\WEB-INF\web.xml</webXml>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.rat</groupId>
<artifactId>apache-rat-plugin</artifactId>
<version>0.11</version>
<configuration>
<excludes>
<exclude>**/.idea/</exclude>