mirror of
https://github.com/apache/zeppelin
synced 2026-05-24 09:38:26 +00:00
ZEPPELIN-783: use TravisCI caching for relieable Spark download
This commit is contained in:
parent
bd1d5e22fb
commit
542a30538d
2 changed files with 28 additions and 18 deletions
|
|
@ -16,6 +16,9 @@
|
|||
language: java
|
||||
|
||||
sudo: false
|
||||
cache:
|
||||
directories:
|
||||
- .spark
|
||||
|
||||
matrix:
|
||||
include:
|
||||
|
|
|
|||
|
|
@ -43,31 +43,38 @@ set -xe
|
|||
FWDIR="$(dirname "${BASH_SOURCE-$0}")"
|
||||
ZEPPELIN_HOME="$(cd "${FWDIR}/.."; pwd)"
|
||||
|
||||
SPARK_CACHE=".spark"
|
||||
SPARK_ARCHIVE="spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}"
|
||||
export SPARK_HOME="${ZEPPELIN_HOME}/${SPARK_ARCHIVE}"
|
||||
export SPARK_HOME="${ZEPPELIN_HOME}/${SPARK_CACHE}/${SPARK_ARCHIVE}"
|
||||
echo "SPARK_HOME is ${SPARK_HOME}"
|
||||
|
||||
if [[ ! -d "${SPARK_HOME}" ]]; then
|
||||
if [ "${SPARK_VER_RANGE}" == "<=1.2" ]; then
|
||||
# spark 1.1.x and spark 1.2.x can be downloaded from archive
|
||||
STARTTIME=`date +%s`
|
||||
timeout -s KILL 300 wget -q "http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/${SPARK_ARCHIVE}.tgz"
|
||||
ENDTIME=`date +%s`
|
||||
DOWNLOADTIME="$((ENDTIME-STARTTIME))"
|
||||
else
|
||||
# spark 1.3.x and later can be downloaded from mirror
|
||||
# get download address from mirror
|
||||
MIRROR_INFO=$(curl -s "http://www.apache.org/dyn/closer.cgi/spark/spark-${SPARK_VERSION}/${SPARK_ARCHIVE}.tgz?asjson=1")
|
||||
mkdir -p "${SPARK_CACHE}"
|
||||
cd "${SPARK_CACHE}"
|
||||
if [[ ! -f "${SPARK_ARCHIVE}.tgz" ]]; then
|
||||
# download archive if not cached
|
||||
if [[ "${SPARK_VER_RANGE}" == "<=1.2" ]]; then
|
||||
# spark 1.1.x and spark 1.2.x can be downloaded from archive
|
||||
STARTTIME=`date +%s`
|
||||
timeout -s KILL 300 wget -q "http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/${SPARK_ARCHIVE}.tgz"
|
||||
ENDTIME=`date +%s`
|
||||
DOWNLOADTIME="$((ENDTIME-STARTTIME))"
|
||||
else
|
||||
# spark 1.3.x and later can be downloaded from mirror
|
||||
# get download address from mirror
|
||||
MIRROR_INFO=$(curl -s "http://www.apache.org/dyn/closer.cgi/spark/spark-${SPARK_VERSION}/${SPARK_ARCHIVE}.tgz?asjson=1")
|
||||
|
||||
PREFFERED=$(echo "${MIRROR_INFO}" | grep preferred | sed 's/[^"]*.preferred.: .\([^"]*\).*/\1/g')
|
||||
PATHINFO=$(echo "${MIRROR_INFO}" | grep path_info | sed 's/[^"]*.path_info.: .\([^"]*\).*/\1/g')
|
||||
PREFFERED=$(echo "${MIRROR_INFO}" | grep preferred | sed 's/[^"]*.preferred.: .\([^"]*\).*/\1/g')
|
||||
PATHINFO=$(echo "${MIRROR_INFO}" | grep path_info | sed 's/[^"]*.path_info.: .\([^"]*\).*/\1/g')
|
||||
|
||||
STARTTIME=`date +%s`
|
||||
timeout -s KILL 590 wget -q "${PREFFERED}${PATHINFO}"
|
||||
ENDTIME=`date +%s`
|
||||
DOWNLOADTIME="$((ENDTIME-STARTTIME))"
|
||||
STARTTIME=`date +%s`
|
||||
timeout -s KILL 590 wget -q "${PREFFERED}${PATHINFO}"
|
||||
ENDTIME=`date +%s`
|
||||
DOWNLOADTIME="$((ENDTIME-STARTTIME))"
|
||||
fi
|
||||
fi
|
||||
# clean-up on failure
|
||||
|
||||
# extract archive, clean-up on failure
|
||||
if ! tar zxf "${SPARK_ARCHIVE}.tgz" ; then
|
||||
echo "Unable to extract ${SPARK_ARCHIVE}.tgz" >&2
|
||||
rm -rf "${SPARK_ARCHIVE}"
|
||||
|
|
|
|||
Loading…
Reference in a new issue