ZEPPELIN-783: use TravisCI caching for relieable Spark download

This commit is contained in:
Alexander Bezzubov 2016-04-01 12:44:39 +09:00
parent bd1d5e22fb
commit 542a30538d
2 changed files with 28 additions and 18 deletions

View file

@ -16,6 +16,9 @@
language: java
sudo: false
cache:
directories:
- .spark
matrix:
include:

View file

@ -43,31 +43,38 @@ set -xe
FWDIR="$(dirname "${BASH_SOURCE-$0}")"
ZEPPELIN_HOME="$(cd "${FWDIR}/.."; pwd)"
SPARK_CACHE=".spark"
SPARK_ARCHIVE="spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}"
export SPARK_HOME="${ZEPPELIN_HOME}/${SPARK_ARCHIVE}"
export SPARK_HOME="${ZEPPELIN_HOME}/${SPARK_CACHE}/${SPARK_ARCHIVE}"
echo "SPARK_HOME is ${SPARK_HOME}"
if [[ ! -d "${SPARK_HOME}" ]]; then
if [ "${SPARK_VER_RANGE}" == "<=1.2" ]; then
# spark 1.1.x and spark 1.2.x can be downloaded from archive
STARTTIME=`date +%s`
timeout -s KILL 300 wget -q "http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/${SPARK_ARCHIVE}.tgz"
ENDTIME=`date +%s`
DOWNLOADTIME="$((ENDTIME-STARTTIME))"
else
# spark 1.3.x and later can be downloaded from mirror
# get download address from mirror
MIRROR_INFO=$(curl -s "http://www.apache.org/dyn/closer.cgi/spark/spark-${SPARK_VERSION}/${SPARK_ARCHIVE}.tgz?asjson=1")
mkdir -p "${SPARK_CACHE}"
cd "${SPARK_CACHE}"
if [[ ! -f "${SPARK_ARCHIVE}.tgz" ]]; then
# download archive if not cached
if [[ "${SPARK_VER_RANGE}" == "<=1.2" ]]; then
# spark 1.1.x and spark 1.2.x can be downloaded from archive
STARTTIME=`date +%s`
timeout -s KILL 300 wget -q "http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/${SPARK_ARCHIVE}.tgz"
ENDTIME=`date +%s`
DOWNLOADTIME="$((ENDTIME-STARTTIME))"
else
# spark 1.3.x and later can be downloaded from mirror
# get download address from mirror
MIRROR_INFO=$(curl -s "http://www.apache.org/dyn/closer.cgi/spark/spark-${SPARK_VERSION}/${SPARK_ARCHIVE}.tgz?asjson=1")
PREFFERED=$(echo "${MIRROR_INFO}" | grep preferred | sed 's/[^"]*.preferred.: .\([^"]*\).*/\1/g')
PATHINFO=$(echo "${MIRROR_INFO}" | grep path_info | sed 's/[^"]*.path_info.: .\([^"]*\).*/\1/g')
PREFFERED=$(echo "${MIRROR_INFO}" | grep preferred | sed 's/[^"]*.preferred.: .\([^"]*\).*/\1/g')
PATHINFO=$(echo "${MIRROR_INFO}" | grep path_info | sed 's/[^"]*.path_info.: .\([^"]*\).*/\1/g')
STARTTIME=`date +%s`
timeout -s KILL 590 wget -q "${PREFFERED}${PATHINFO}"
ENDTIME=`date +%s`
DOWNLOADTIME="$((ENDTIME-STARTTIME))"
STARTTIME=`date +%s`
timeout -s KILL 590 wget -q "${PREFFERED}${PATHINFO}"
ENDTIME=`date +%s`
DOWNLOADTIME="$((ENDTIME-STARTTIME))"
fi
fi
# clean-up on failure
# extract archive, clean-up on failure
if ! tar zxf "${SPARK_ARCHIVE}.tgz" ; then
echo "Unable to extract ${SPARK_ARCHIVE}.tgz" >&2
rm -rf "${SPARK_ARCHIVE}"