zeppelin/bin/interpreter.sh
Prabhjyot Singh 31f584cfee [ZEPPELIN-1320] Run zeppelin interpreter process as web front end user
Have recreated this from https://github.com/apache/zeppelin/pull/1322
### What is this PR for?

While running a Notebook using shell, spark, python uses same user as which zeppelin server is running. Which means these interprets have same permission on file system as zeppelin server.
IMO users should be able to impersonate themselves as a complete security system.
### What type of PR is it?

[Improvement]
### Todos
- [x] - Update doc
- [x] - FIX NPEs
- [x] - FIX CI
### What is the Jira issue?
- [ZEPPELIN-1320](https://issues.apache.org/jira/browse/ZEPPELIN-1320)
### How should this be tested?
- Enable shiro auth in shiro.ini
- Add ssh key for the same user you want to try and impersonate (say user1).

```
adduser user1
ssh-keygen
ssh user1localhost mkdir -p .ssh
cat ~/.ssh/id_rsa.pub | ssh user1localhost 'cat >> .ssh/authorized_keys'
```
- Start zeppelin server, try and run following in paragraph in a notebook
- Go to interpreter setting page, and enable "User Impersonate" in any of the interpreter (in my example its shell interpreter)

```
%sh
whoami
```

Check that it should run as new user, i.e. "user1"
### Screenshots (if appropriate)

![user impersonate](https://cloud.githubusercontent.com/assets/674497/20213127/f32fdc52-a82c-11e6-8e33-aebd6a943c5f.gif)

### Questions:
- Does the licenses files need update? no
- Is there breaking changes for older versions? no
- Does this needs documentation? yes

Author: Prabhjyot Singh <prabhjyotsingh@gmail.org>

Closes #1554 from prabhjyotsingh/ZEPPELIN-1320-2 and squashes the following commits:

dc69c9d [Prabhjyot Singh] @Leemoonsoo review comment: making ZEPPELIN_SSH_COMMAND configurable
1b26cc0 [Prabhjyot Singh] add doc
5a76839 [Prabhjyot Singh] show User Impersonate only when interpreter setting is "per user" and "isolated"
02c3084 [Prabhjyot Singh] Merge remote-tracking branch 'origin/master' into ZEPPELIN-1320-2
03b2f20 [Prabhjyot Singh] use user instead of ""
0ff80ec [Prabhjyot Singh] Merge remote-tracking branch 'origin/master' into ZEPPELIN-1320-2
dd0731d [Prabhjyot Singh] fix missing test cases
aff1bf0 [Prabhjyot Singh] user should have option to run these interpreters as different user.
2016-11-17 19:07:29 -08:00

222 lines
7.2 KiB
Bash
Executable file

#!/bin/bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
bin=$(dirname "${BASH_SOURCE-$0}")
bin=$(cd "${bin}">/dev/null; pwd)
function usage() {
echo "usage) $0 -p <port> -d <interpreter dir to load> -l <local interpreter repo dir to load>"
}
while getopts "hp:d:l:v:u:" o; do
case ${o} in
h)
usage
exit 0
;;
d)
INTERPRETER_DIR=${OPTARG}
;;
p)
PORT=${OPTARG}
;;
l)
LOCAL_INTERPRETER_REPO=${OPTARG}
;;
v)
. "${bin}/common.sh"
getZeppelinVersion
;;
u)
ZEPPELIN_IMPERSONATE_USER="${OPTARG}"
if [[ -z "$ZEPPELIN_IMPERSONATE_CMD" ]]; then
ZEPPELIN_IMPERSONATE_RUN_CMD=`echo "ssh ${ZEPPELIN_IMPERSONATE_USER}@localhost" `
else
ZEPPELIN_IMPERSONATE_RUN_CMD=$(eval "echo ${ZEPPELIN_IMPERSONATE_CMD} ")
fi
;;
esac
done
if [ -z "${PORT}" ] || [ -z "${INTERPRETER_DIR}" ]; then
usage
exit 1
fi
. "${bin}/common.sh"
ZEPPELIN_INTP_CLASSPATH=""
# construct classpath
if [[ -d "${ZEPPELIN_HOME}/zeppelin-interpreter/target/classes" ]]; then
ZEPPELIN_INTP_CLASSPATH+=":${ZEPPELIN_HOME}/zeppelin-interpreter/target/classes"
else
ZEPPELIN_INTERPRETER_JAR="$(ls ${ZEPPELIN_HOME}/lib/zeppelin-interpreter*.jar)"
ZEPPELIN_INTP_CLASSPATH+=":${ZEPPELIN_INTERPRETER_JAR}"
fi
# add test classes for unittest
if [[ -d "${ZEPPELIN_HOME}/zeppelin-interpreter/target/test-classes" ]]; then
ZEPPELIN_INTP_CLASSPATH+=":${ZEPPELIN_HOME}/zeppelin-interpreter/target/test-classes"
fi
if [[ -d "${ZEPPELIN_HOME}/zeppelin-zengine/target/test-classes" ]]; then
ZEPPELIN_INTP_CLASSPATH+=":${ZEPPELIN_HOME}/zeppelin-zengine/target/test-classes"
fi
addJarInDirForIntp "${ZEPPELIN_HOME}/zeppelin-interpreter/target/lib"
addJarInDirForIntp "${INTERPRETER_DIR}"
HOSTNAME=$(hostname)
ZEPPELIN_SERVER=org.apache.zeppelin.interpreter.remote.RemoteInterpreterServer
INTERPRETER_ID=$(basename "${INTERPRETER_DIR}")
ZEPPELIN_PID="${ZEPPELIN_PID_DIR}/zeppelin-interpreter-${INTERPRETER_ID}-${ZEPPELIN_IDENT_STRING}-${HOSTNAME}.pid"
ZEPPELIN_LOGFILE="${ZEPPELIN_LOG_DIR}/zeppelin-interpreter-${INTERPRETER_ID}-${ZEPPELIN_IDENT_STRING}-${HOSTNAME}.log"
JAVA_INTP_OPTS+=" -Dzeppelin.log.file=${ZEPPELIN_LOGFILE}"
if [[ ! -d "${ZEPPELIN_LOG_DIR}" ]]; then
echo "Log dir doesn't exist, create ${ZEPPELIN_LOG_DIR}"
$(mkdir -p "${ZEPPELIN_LOG_DIR}")
fi
# set spark related env variables
if [[ "${INTERPRETER_ID}" == "spark" ]]; then
if [[ -n "${SPARK_HOME}" ]]; then
export SPARK_SUBMIT="${SPARK_HOME}/bin/spark-submit"
SPARK_APP_JAR="$(ls ${ZEPPELIN_HOME}/interpreter/spark/zeppelin-spark*.jar)"
# This will evantually passes SPARK_APP_JAR to classpath of SparkIMain
ZEPPELIN_INTP_CLASSPATH+=":${SPARK_APP_JAR}"
pattern="$SPARK_HOME/python/lib/py4j-*-src.zip"
py4j=($pattern)
# pick the first match py4j zip - there should only be one
export PYTHONPATH="$SPARK_HOME/python/:$PYTHONPATH"
export PYTHONPATH="${py4j[0]}:$PYTHONPATH"
else
# add Hadoop jars into classpath
if [[ -n "${HADOOP_HOME}" ]]; then
# Apache
addEachJarInDirRecursiveForIntp "${HADOOP_HOME}/share"
# CDH
addJarInDirForIntp "${HADOOP_HOME}"
addJarInDirForIntp "${HADOOP_HOME}/lib"
fi
addJarInDirForIntp "${INTERPRETER_DIR}/dep"
pattern="${ZEPPELIN_HOME}/interpreter/spark/pyspark/py4j-*-src.zip"
py4j=($pattern)
# pick the first match py4j zip - there should only be one
PYSPARKPATH="${ZEPPELIN_HOME}/interpreter/spark/pyspark/pyspark.zip:${py4j[0]}"
if [[ -z "${PYTHONPATH}" ]]; then
export PYTHONPATH="${PYSPARKPATH}"
else
export PYTHONPATH="${PYTHONPATH}:${PYSPARKPATH}"
fi
unset PYSPARKPATH
# autodetect HADOOP_CONF_HOME by heuristic
if [[ -n "${HADOOP_HOME}" ]] && [[ -z "${HADOOP_CONF_DIR}" ]]; then
if [[ -d "${HADOOP_HOME}/etc/hadoop" ]]; then
export HADOOP_CONF_DIR="${HADOOP_HOME}/etc/hadoop"
elif [[ -d "/etc/hadoop/conf" ]]; then
export HADOOP_CONF_DIR="/etc/hadoop/conf"
fi
fi
if [[ -n "${HADOOP_CONF_DIR}" ]] && [[ -d "${HADOOP_CONF_DIR}" ]]; then
ZEPPELIN_INTP_CLASSPATH+=":${HADOOP_CONF_DIR}"
fi
export SPARK_CLASSPATH+=":${ZEPPELIN_INTP_CLASSPATH}"
fi
elif [[ "${INTERPRETER_ID}" == "hbase" ]]; then
if [[ -n "${HBASE_CONF_DIR}" ]]; then
ZEPPELIN_INTP_CLASSPATH+=":${HBASE_CONF_DIR}"
elif [[ -n "${HBASE_HOME}" ]]; then
ZEPPELIN_INTP_CLASSPATH+=":${HBASE_HOME}/conf"
else
echo "HBASE_HOME and HBASE_CONF_DIR are not set, configuration might not be loaded"
fi
elif [[ "${INTERPRETER_ID}" == "pig" ]]; then
# autodetect HADOOP_CONF_HOME by heuristic
if [[ -n "${HADOOP_HOME}" ]] && [[ -z "${HADOOP_CONF_DIR}" ]]; then
if [[ -d "${HADOOP_HOME}/etc/hadoop" ]]; then
export HADOOP_CONF_DIR="${HADOOP_HOME}/etc/hadoop"
elif [[ -d "/etc/hadoop/conf" ]]; then
export HADOOP_CONF_DIR="/etc/hadoop/conf"
fi
fi
if [[ -n "${HADOOP_CONF_DIR}" ]] && [[ -d "${HADOOP_CONF_DIR}" ]]; then
ZEPPELIN_INTP_CLASSPATH+=":${HADOOP_CONF_DIR}"
fi
# autodetect TEZ_CONF_DIR
if [[ -n "${TEZ_CONF_DIR}" ]]; then
ZEPPELIN_INTP_CLASSPATH+=":${TEZ_CONF_DIR}"
elif [[ -d "/etc/tez/conf" ]]; then
ZEPPELIN_INTP_CLASSPATH+=":/etc/tez/conf"
else
echo "TEZ_CONF_DIR is not set, configuration might not be loaded"
fi
fi
addJarInDirForIntp "${LOCAL_INTERPRETER_REPO}"
CLASSPATH+=":${ZEPPELIN_INTP_CLASSPATH}"
if [[ -n "${SPARK_SUBMIT}" ]]; then
${ZEPPELIN_IMPERSONATE_RUN_CMD} `${SPARK_SUBMIT} --class ${ZEPPELIN_SERVER} --driver-class-path "${ZEPPELIN_INTP_CLASSPATH_OVERRIDES}:${CLASSPATH}" --driver-java-options "${JAVA_INTP_OPTS}" ${SPARK_SUBMIT_OPTIONS} ${SPARK_APP_JAR} ${PORT} &`
else
${ZEPPELIN_IMPERSONATE_RUN_CMD} ${ZEPPELIN_RUNNER} ${JAVA_INTP_OPTS} ${ZEPPELIN_INTP_MEM} -cp ${ZEPPELIN_INTP_CLASSPATH_OVERRIDES}:${CLASSPATH} ${ZEPPELIN_SERVER} ${PORT} &
fi
pid=$!
if [[ -z "${pid}" ]]; then
return 1;
else
echo ${pid} > ${ZEPPELIN_PID}
fi
trap 'shutdown_hook;' SIGTERM SIGINT SIGQUIT
function shutdown_hook() {
local count
count=0
while [[ "${count}" -lt 10 ]]; do
$(kill ${pid} > /dev/null 2> /dev/null)
if kill -0 ${pid} > /dev/null 2>&1; then
sleep 3
let "count+=1"
else
rm -f "${ZEPPELIN_PID}"
break
fi
if [[ "${count}" == "5" ]]; then
$(kill -9 ${pid} > /dev/null 2> /dev/null)
rm -f "${ZEPPELIN_PID}"
fi
done
}
wait