This commit is contained in:
Jesang Yoon 2016-04-10 02:22:36 +09:00
commit 6d04fa7a4d
149 changed files with 8650 additions and 555 deletions

2
.gitignore vendored
View file

@ -12,12 +12,14 @@
spark/derby.log
spark/metastore_db
spark-1.*-bin-hadoop*
.spark-dist
zeppelin-server/derby.log
lens/lens-cli-hist.log
# conf file
conf/zeppelin-env.sh
conf/zeppelin-env.cmd
conf/zeppelin-site.xml
conf/keystore
conf/truststore

View file

@ -15,19 +15,34 @@
language: java
sudo: false
cache:
directories:
- .spark-dist
addons:
apt:
sources:
- r-packages-precise
packages:
- r-base-dev
- r-cran-evaluate
- r-cran-base64enc
matrix:
include:
# Test all modules
- jdk: "oraclejdk7"
env: SPARK_VER="1.6.0" HADOOP_VER="2.3" PROFILE="-Pspark-1.6 -Phadoop-2.3 -Ppyspark -Pscalding" BUILD_FLAG="package -Pbuild-distr" TEST_FLAG="verify -Pusing-packaged-distr" TEST_PROJECTS=""
env: SPARK_VER="1.6.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.6 -Pr -Phadoop-2.3 -Ppyspark -Psparkr -Pscalding" BUILD_FLAG="package -Pbuild-distr" TEST_FLAG="verify -Pusing-packaged-distr" TEST_PROJECTS=""
# Test spark module for 1.5.2
- jdk: "oraclejdk7"
env: SPARK_VER="1.5.2" HADOOP_VER="2.3" PROFILE="-Pspark-1.5 -Phadoop-2.3 -Ppyspark" BUILD_FLAG="package -DskipTests" TEST_FLAG="verify" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark* -DfailIfNoTests=false"
env: SPARK_VER="1.5.2" HADOOP_VER="2.3" PROFILE="-Pspark-1.5 -Pr -Phadoop-2.3 -Ppyspark -Psparkr" BUILD_FLAG="package -DskipTests" TEST_FLAG="verify" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark,r -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark* -DfailIfNoTests=false"
# Test spark module for 1.4.1
- jdk: "oraclejdk7"
env: SPARK_VER="1.4.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.4 -Phadoop-2.3 -Ppyspark" BUILD_FLAG="package -DskipTests" TEST_FLAG="verify" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark* -DfailIfNoTests=false"
env: SPARK_VER="1.4.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.4 -Pr -Phadoop-2.3 -Ppyspark -Psparkr" BUILD_FLAG="package -DskipTests" TEST_FLAG="verify" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark,r -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark* -DfailIfNoTests=false"
# Test spark module for 1.3.1
- jdk: "oraclejdk7"
@ -41,11 +56,16 @@ matrix:
- jdk: "oraclejdk7"
env: SPARK_VER="1.1.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.1 -Phadoop-2.3 -Ppyspark" BUILD_FLAG="package -DskipTests" TEST_FLAG="verify" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark* -DfailIfNoTests=false"
# Test selenium with spark module for 1.6.0
# Test selenium with spark module for 1.6.1
- jdk: "oraclejdk7"
env: TEST_SELENIUM="true" SPARK_VER="1.6.0" HADOOP_VER="2.3" PROFILE="-Pspark-1.6 -Phadoop-2.3 -Ppyspark" BUILD_FLAG="package -DskipTests" TEST_FLAG="verify" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark -Dtest=org.apache.zeppelin.AbstractFunctionalSuite -DfailIfNoTests=false"
env: TEST_SELENIUM="true" SPARK_VER="1.6.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.6 -Phadoop-2.3 -Ppyspark" BUILD_FLAG="package -DskipTests" TEST_FLAG="verify" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark -Dtest=org.apache.zeppelin.AbstractFunctionalSuite -DfailIfNoTests=false"
before_install:
- "ls -la .spark-dist"
- mkdir -p ~/R
- echo 'R_LIBS=~/R' > ~/.Renviron
- R -e "install.packages('knitr', repos = 'http://cran.us.r-project.org', lib='~/R')"
- export R_LIBS='~/R'
- "export DISPLAY=:99.0"
- "sh -e /etc/init.d/xvfb start"
@ -56,6 +76,7 @@ before_script:
- travis_retry ./testing/downloadSpark.sh $SPARK_VER $HADOOP_VER
- ./testing/startSparkCluster.sh $SPARK_VER $HADOOP_VER
- echo "export SPARK_HOME=`pwd`/spark-$SPARK_VER-bin-hadoop$HADOOP_VER" > conf/zeppelin-env.sh
- tail conf/zeppelin-env.sh
script:
- mvn $TEST_FLAG $PROFILE -B $TEST_PROJECTS
@ -69,6 +90,3 @@ after_failure:
after_script:
- ./testing/stopSparkCluster.sh $SPARK_VER $HADOOP_VER
notifications:
slack:
secure: dtIkPwlf5uTun19p9TtPEAFmrLOMK2COE8TL9m8LXX/N2WzJaKYvAnovMObEV6KEgK2oZ+72Cke7eBI+Hp4FmHZ2B7mQI/PNCfRZthI3cc3zVmMd25yvLH9AlCRa2bC6R885z2copvzaoZtLBkHnPa8bUrUkbmRp40qkDPQpgO4=

14
LICENSE
View file

@ -244,4 +244,16 @@ Apache licenses
The following components are provided under the Apache License. See project link for details.
The text of each license is also included at licenses/LICENSE-[project]-[version].txt.
(Apache 2.0) Bootstrap v3.0.2 (http://getbootstrap.com/) - https://github.com/twbs/bootstrap/blob/v3.0.2/LICENSE
(Apache 2.0) Bootstrap v3.0.2 (http://getbootstrap.com/) - https://github.com/twbs/bootstrap/blob/v3.0.2/LICENSE
========================================================================
BSD 3-Clause licenses
========================================================================
The following components are provided under the BSD 3-Clause license. See file headers and project links for details.
(BSD 3 Clause) portions of rscala 1.0.6 (https://dahl.byu.edu/software/rscala/) - https://cran.r-project.org/web/packages/rscala/index.html
r/R/rzeppelin/R/{common.R, globals.R,protocol.R,rServer.R,scalaInterpreter.R,zzz.R }
r/src/main/scala/org/apache/zeppelin/rinterpreter/rscala/{Package.scala, RClient.scala}
(BSD 3 Clause) portions of Scala (http://www.scala-lang.org/download) - http://www.scala-lang.org/download/#License
r/src/main/scala/scala/Console.scala

View file

@ -129,6 +129,7 @@ Available profiles are
-Pmapr40
-Pmapr41
-Pmapr50
-Pmapr51
```

112
bin/common.cmd Normal file
View file

@ -0,0 +1,112 @@
@echo off
REM Licensed to the Apache Software Foundation (ASF) under one or more
REM contributor license agreements. See the NOTICE file distributed with
REM this work for additional information regarding copyright ownership.
REM The ASF licenses this file to You under the Apache License, Version 2.0
REM (the "License"); you may not use this file except in compliance with
REM the License. You may obtain a copy of the License at
REM
REM http://www.apache.org/licenses/LICENSE-2.0
REM
REM Unless required by applicable law or agreed to in writing, software
REM distributed under the License is distributed on an "AS IS" BASIS,
REM WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
REM See the License for the specific language governing permissions and
REM limitations under the License.
if not defined ZEPPELIN_HOME (
for %%d in ("%~dp0..") do (
set ZEPPELIN_HOME=%%~fd
)
)
if not defined ZEPPELIN_CONF_DIR (
set ZEPPELIN_CONF_DIR=%ZEPPELIN_HOME%\conf
)
if not defined ZEPPELIN_LOG_DIR (
set ZEPPELIN_LOG_DIR=%ZEPPELIN_HOME%\logs
)
if not defined ZEPPELIN_NOTEBOOK_DIR (
set ZEPPELIN_NOTEBOOK_DIR=%ZEPPELIN_HOME%\notebook
)
if not defined ZEPPELIN_PID_DIR (
set ZEPPELIN_PID_DIR=%ZEPPELIN_HOME%\run
)
if not defined ZEPPELIN_WAR (
if exist "%ZEPPELIN_HOME%\zeppelin-web\dist" (
set ZEPPELIN_WAR=%ZEPPELIN_HOME%\zeppelin-web\dist
) else (
for %%d in ("%ZEPPELIN_HOME%\zeppelin-web*.war") do (
set ZEPPELIN_WAR=%%d
)
)
)
if not defined ZEPPELIN_INTERPRETER_DIR (
set ZEPPELIN_INTERPRETER_DIR=%ZEPPELIN_HOME%\interpreter
)
if exist "%ZEPPELIN_CONF_DIR%\zeppelin-env.cmd" (
call "%ZEPPELIN_CONF_DIR%\zeppelin-env.cmd"
)
if not defined ZEPPELIN_CLASSPATH (
set ZEPPELIN_CLASSPATH="%ZEPPELIN_CONF_DIR%"
) else (
set ZEPPELIN_CLASSPATH=%ZEPPELIN_CLASSPATH%;"%ZEPPELIN_CONF_DIR%"
)
if not defined ZEPPELIN_ENCODING (
set ZEPPELIN_ENCODING=UTF-8
)
if not defined ZEPPELIN_MEM (
set ZEPPELIN_MEM=-Xms1024m -Xmx1024m -XX:MaxPermSize=512m
)
if not defined ZEPPELIN_JAVA_OPTS (
set ZEPPELIN_JAVA_OPTS=-Dfile.encoding=%ZEPPELIN_ENCODING% %ZEPPELIN_MEM%
) else (
set ZEPPELIN_JAVA_OPTS=%ZEPPELIN_JAVA_OPTS% -Dfile.encoding=%ZEPPELIN_ENCODING% %ZEPPELIN_MEM%
)
if not defined JAVA_OPTS (
set JAVA_OPTS=%ZEPPELIN_JAVA_OPTS%
) else (
set JAVA_OPTS=%JAVA_OPTS% %ZEPPELIN_JAVA_OPTS%
)
if not defined ZEPPELIN_INTP_JAVA_OPTS (
set ZEPPELIN_INTP_JAVA_OPTS=%ZEPPELIN_JAVA_OPTS%
)
if not defined ZEPPELIN_INTP_MEM (
set ZEPPELIN_INTP_MEM=%ZEPPELIN_MEM%
)
set JAVA_INTP_OPTS=%ZEPPELIN_INTP_JAVA_OPTS% -Dfile.encoding=%ZEPPELIN_ENCODING%
if not defined JAVA_HOME (
set ZEPPELIN_RUNNER=java
) else (
set ZEPPELIN_RUNNER=%JAVA_HOME%\bin\java
)
if not defined ZEPPELIN_IDENT_STRING (
set ZEPPELIN_IDENT_STRING=%USERNAME%
)
if not defined DEBUG (
set DEBUG=0
)
if not defined ZEPPELIN_INTERPRETER_REMOTE_RUNNER (
set ZEPPELIN_INTERPRETER_REMOTE_RUNNER=bin\interpreter.cmd
)
exit /b

View file

@ -81,6 +81,18 @@ function addJarInDir(){
fi
}
ZEPPELIN_COMMANDLINE_MAIN=org.apache.zeppelin.utils.CommandLineUtils
function getZeppelinVersion(){
if [[ -d "${ZEPPELIN_HOME}/zeppelin-server/target/classes" ]]; then
ZEPPELIN_CLASSPATH+=":${ZEPPELIN_HOME}/zeppelin-server/target/classes"
fi
addJarInDir "${ZEPPELIN_HOME}/zeppelin-server/target/lib"
CLASSPATH+=":${ZEPPELIN_CLASSPATH}"
$ZEPPELIN_RUNNER -cp $CLASSPATH $ZEPPELIN_COMMANDLINE_MAIN -v
exit 0
}
# Text encoding for
# read/write job into files,
# receiving/displaying query/result.

38
bin/functions.cmd Normal file
View file

@ -0,0 +1,38 @@
@echo off
REM Licensed to the Apache Software Foundation (ASF) under one or more
REM contributor license agreements. See the NOTICE file distributed with
REM this work for additional information regarding copyright ownership.
REM The ASF licenses this file to You under the Apache License, Version 2.0
REM (the "License"); you may not use this file except in compliance with
REM the License. You may obtain a copy of the License at
REM
REM http://www.apache.org/licenses/LICENSE-2.0
REM
REM Unless required by applicable law or agreed to in writing, software
REM distributed under the License is distributed on an "AS IS" BASIS,
REM WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
REM See the License for the specific language governing permissions and
REM limitations under the License.
if not "%1"=="" goto %1
exit /b
:ADDEACHJARINDIR
for %%d in ("%~2\*.jar") do (
set ZEPPELIN_CLASSPATH="%%d";!ZEPPELIN_CLASSPATH!
)
exit /b
:ADDEACHJARINDIRRECURSIVE
for /r "%~2" %%d in (*.jar) do (
set ZEPPELIN_CLASSPATH="%%d";!ZEPPELIN_CLASSPATH!
)
exit /b
:ADDJARINDIR
if exist "%~2" (
set ZEPPELIN_CLASSPATH="%~2\*";%ZEPPELIN_CLASSPATH%
)
exit /b

136
bin/interpreter.cmd Normal file
View file

@ -0,0 +1,136 @@
@echo off
REM Licensed to the Apache Software Foundation (ASF) under one or more
REM contributor license agreements. See the NOTICE file distributed with
REM this work for additional information regarding copyright ownership.
REM The ASF licenses this file to You under the Apache License, Version 2.0
REM (the "License"); you may not use this file except in compliance with
REM the License. You may obtain a copy of the License at
REM
REM http://www.apache.org/licenses/LICENSE-2.0
REM
REM Unless required by applicable law or agreed to in writing, software
REM distributed under the License is distributed on an "AS IS" BASIS,
REM WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
REM See the License for the specific language governing permissions and
REM limitations under the License.
setlocal enableextensions enabledelayedexpansion
set bin=%~dp0
:loop
if "%~1"=="" goto cont
if /I "%~1"=="-h" goto usage
if /I "%~1"=="-d" (
set INTERPRETER_DIR=%~2
set INTERPRETER_ID=%~n2
)
if /I "%~1"=="-p" set PORT=%~2
if /I "%~1"=="-l" set LOCAL_INTERPRETER_REPO=%~2
shift
goto loop
:cont
if "%PORT%"=="" goto usage
if "%INTERPRETER_DIR%"=="" goto usage
call "%bin%\common.cmd"
if exist "%ZEPPELIN_HOME%\zeppelin-interpreter\target\classes" (
set ZEPPELIN_CLASSPATH=%ZEPPELIN_CLASSPATH%;"%ZEPPELIN_HOME%\zeppelin-interpreter\target\classes"
) else (
for %%d in ("%ZEPPELIN_HOME%\lib\zeppelin-interpreter*.jar") do (
set ZEPPELIN_INTERPRETER_JAR=%%d
)
set ZEPPELIN_CLASSPATH=%ZEPPELIN_CLASSPATH%;"!ZEPPELIN_INTERPRETER_JAR!"
)
call "%bin%\functions.cmd" ADDJARINDIR "%ZEPPELIN_HOME%\zeppelin-interpreter\target\lib"
call "%bin%\functions.cmd" ADDJARINDIR "%INTERPRETER_DIR%"
set HOSTNAME=%COMPUTERNAME%
set ZEPPELIN_SERVER=org.apache.zeppelin.interpreter.remote.RemoteInterpreterServer
set ZEPPELIN_LOGFILE=%ZEPPELIN_LOG_DIR%\zeppelin-interpreter-%INTERPRETER_ID%-%ZEPPELIN_IDENT_STRING%-%HOSTNAME%.log
if not exist "%ZEPPELIN_LOG_DIR%" (
echo Log dir doesn't exist, create %ZEPPELIN_LOG_DIR%
mkdir "%ZEPPELIN_LOG_DIR%"
)
if /I "%INTERPRETER_ID%"=="spark" (
if defined SPARK_HOME (
set SPARK_SUBMIT=%SPARK_HOME%\bin\spark-submit.cmd
for %%d in ("%ZEPPELIN_HOME%\interpreter\spark\zeppelin-spark*.jar") do (
set SPARK_APP_JAR=%%d
)
set ZEPPELIN_CLASSPATH="!SPARK_APP_JAR!"
for %%d in ("%SPARK_HOME%\python\lib\py4j-*-src.zip") do (
set py4j=%%d
)
if not defined PYTHONPATH (
set PYTHONPATH=!py4j!;%SPARK_HOME%\python
) else (
set PYTHONPATH=!py4j!;%SPARK_HOME%\python;%PYTHONPATH%
)
) else (
if defined HADOOP_HOME if exist "%HADOOP_HOME%\bin\hadoop.cmd" (
for /f "tokens=*" %%d in ('"%HADOOP_HOME%\bin\hadoop.cmd" classpath') do (
set LOCAL_HADOOP_CLASSPATH=%%d
)
set ZEPPELIN_CLASSPATH=!LOCAL_HADOOP_CLASSPATH!;%ZEPPELIN_CLASSPATH%
)
call "%bin%\functions.cmd" ADDJARINDIR "%INTERPRETER_DIR%\dep"
for %%d in ("%ZEPPELIN_HOME%\interpreter\spark\pyspark\py4j-*-src.zip") do (
set py4j=%%d
)
set PYSPARKPATH=%ZEPPELIN_HOME%\interpreter\spark\pyspark\pyspark.zip;!py4j!
if not defined PYTHONPATH (
set PYTHONPATH=!PYSPARKPATH!
) else (
set PYTHONPATH=%PYTHONPATH%;!PYSPARKPATH!
)
set PYSPARKPATH=
if defined HADOOP_HOME if not defined HADOOP_CONF_DIR (
if exist "%HADOOP_HOME%\etc\hadoop" (
set HADOOP_CONF_DIR=%HADOOP_HOME%\etc\hadoop
)
)
if exist "%HADOOP_CONF_DIR%" (
set ZEPPELIN_CLASSPATH=%ZEPPELIN_CLASSPATH%;"%HADOOP_CONF_DIR%"
)
)
)
call "%bin%\functions.cmd" ADDJARINDIR "%LOCAL_INTERPRETER_REPO%"
if not defined ZEPPELIN_CLASSPATH_OVERRIDES (
set CLASSPATH=%ZEPPELIN_CLASSPATH%
) else (
set CLASSPATH=%ZEPPELIN_CLASSPATH_OVERRIDES%;%ZEPPELIN_CLASSPATH%
)
if defined SPARK_SUBMIT (
set JAVA_INTP_OPTS=%JAVA_INTP_OPTS% -Dzeppelin.log.file='%ZEPPELIN_LOGFILE%'
"%SPARK_SUBMIT%" --class %ZEPPELIN_SERVER% --jars %CLASSPATH% --driver-java-options "!JAVA_INTP_OPTS!" %SPARK_SUBMIT_OPTIONS% "%SPARK_APP_JAR%" %PORT%
) else (
set JAVA_INTP_OPTS=%JAVA_INTP_OPTS% -Dzeppelin.log.file="%ZEPPELIN_LOGFILE%"
"%ZEPPELIN_RUNNER%" !JAVA_INTP_OPTS! %ZEPPELIN_INTP_MEM% -cp %ZEPPELIN_CLASSPATH_OVERRIDES%;%CLASSPATH% %ZEPPELIN_SERVER% %PORT%
)
exit /b
:usage
echo Usage: %~n0 -p ^<port^> -d ^<interpreter dir to load^> -l ^<local interpreter repo dir to load^>

View file

@ -23,7 +23,7 @@ function usage() {
echo "usage) $0 -p <port> -d <interpreter dir to load> -l <local interpreter repo dir to load>"
}
while getopts "hp:d:l:" o; do
while getopts "hp:d:l:v" o; do
case ${o} in
h)
usage
@ -38,6 +38,10 @@ while getopts "hp:d:l:" o; do
l)
LOCAL_INTERPRETER_REPO=${OPTARG}
;;
v)
. "${bin}/common.sh"
getZeppelinVersion
;;
esac
done
@ -81,7 +85,10 @@ if [[ "${INTERPRETER_ID}" == "spark" ]]; then
export SPARK_SUBMIT="${SPARK_HOME}/bin/spark-submit"
SPARK_APP_JAR="$(ls ${ZEPPELIN_HOME}/interpreter/spark/zeppelin-spark*.jar)"
# This will evantually passes SPARK_APP_JAR to classpath of SparkIMain
ZEPPELIN_CLASSPATH+=${SPARK_APP_JAR}
ZEPPELIN_CLASSPATH=${SPARK_APP_JAR}
# Need to add the R Interpreter
RZEPPELINPATH="$(ls ${ZEPPELIN_HOME}/interpreter/spark/zeppelin-zr*.jar)"
ZEPPELIN_CLASSPATH="${ZEPPELIN_CLASSPATH}:${RZEPPELINPATH}"
pattern="$SPARK_HOME/python/lib/py4j-*-src.zip"
py4j=($pattern)
@ -126,8 +133,18 @@ if [[ "${INTERPRETER_ID}" == "spark" ]]; then
ZEPPELIN_CLASSPATH+=":${HADOOP_CONF_DIR}"
fi
RZEPPELINPATH="$(ls ${ZEPPELIN_HOME}/interpreter/spark/zeppelin-zr*.jar)"
ZEPPELIN_CLASSPATH="${ZEPPELIN_CLASSPATH}:${RZEPPELINPATH}"
export SPARK_CLASSPATH+=":${ZEPPELIN_CLASSPATH}"
fi
elif [[ "${INTERPRETER_ID}" == "hbase" ]]; then
if [[ -n "${HBASE_CONF_DIR}" ]]; then
ZEPPELIN_CLASSPATH+=":${HBASE_CONF_DIR}"
elif [[ -n "${HBASE_HOME}" ]]; then
ZEPPELIN_CLASSPATH+=":${HBASE_HOME}/conf"
else
echo "HBASE_HOME and HBASE_CONF_DIR are not set, configuration might not be loaded"
fi
fi
addJarInDir "${LOCAL_INTERPRETER_REPO}"

View file

@ -19,7 +19,9 @@
# description: Start and stop daemon script for.
#
USAGE="Usage: zeppelin-daemon.sh [--config <conf-dir>] {start|stop|upstart|restart|reload|status}"
USAGE="-e Usage: zeppelin-daemon.sh\n\t
[--config <conf-dir>] {start|stop|upstart|restart|reload|status}\n\t
[--version | -v]"
if [[ "$1" == "--config" ]]; then
shift
@ -258,6 +260,9 @@ case "${1}" in
status)
find_zeppelin_process
;;
-v | --version)
getZeppelinVersion
;;
*)
echo ${USAGE}
esac

91
bin/zeppelin.cmd Normal file
View file

@ -0,0 +1,91 @@
@echo off
REM Licensed to the Apache Software Foundation (ASF) under one or more
REM contributor license agreements. See the NOTICE file distributed with
REM this work for additional information regarding copyright ownership.
REM The ASF licenses this file to You under the Apache License, Version 2.0
REM (the "License"); you may not use this file except in compliance with
REM the License. You may obtain a copy of the License at
REM
REM http://www.apache.org/licenses/LICENSE-2.0
REM
REM Unless required by applicable law or agreed to in writing, software
REM distributed under the License is distributed on an "AS IS" BASIS,
REM WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
REM See the License for the specific language governing permissions and
REM limitations under the License.
setlocal enableextensions enabledelayedexpansion
set bin=%~dp0
if not "%1"=="--config" goto MAIN
:SET_CONFIG
shift
set conf_dir=%~f1
shift
if not exist "%conf_dir%" (
echo ERROR: %conf_dir% is not a directory
echo Usage: %~n0 [--config ^<conf-dir^>]
exit /b 1
) else (
set ZEPPELIN_CONF_DIR=%conf_dir%
)
:MAIN
call "%bin%\common.cmd"
set HOSTNAME=%COMPUTERNAME%
set ZEPPELIN_LOGFILE=%ZEPPELIN_LOG_DIR%\zeppelin-%ZEPPELIN_IDENT_STRING%-%HOSTNAME%.log
set ZEPPELIN_SERVER=org.apache.zeppelin.server.ZeppelinServer
set JAVA_OPTS=%JAVA_OPTS% -Dzeppelin.log.file="%ZEPPELIN_LOGFILE%"
if exist "%ZEPPELIN_HOME%\zeppelin-interpreter\target\classes" (
set ZEPPELIN_CLASSPATH=%ZEPPELIN_CLASSPATH%;"%ZEPPELIN_HOME%\zeppelin-interpreter\target\classes"
)
if exist "%ZEPPELIN_HOME%\zeppelin-zengine\target\classes" (
set ZEPPELIN_CLASSPATH=%ZEPPELIN_CLASSPATH%;"%ZEPPELIN_HOME%\zeppelin-zengine\target\classes"
)
if exist "%ZEPPELIN_HOME%\zeppelin-server\target\classes" (
set ZEPPELIN_CLASSPATH=%ZEPPELIN_CLASSPATH%;"%ZEPPELIN_HOME%\zeppelin-server\target\classes"
)
call "%bin%\functions.cmd" ADDJARINDIR "%ZEPPELIN_HOME%"
call "%bin%\functions.cmd" ADDJARINDIR "%ZEPPELIN_HOME%\lib"
call "%bin%\functions.cmd" ADDJARINDIR "%ZEPPELIN_HOME%\zeppelin-interpreter\target\lib"
call "%bin%\functions.cmd" ADDJARINDIR "%ZEPPELIN_HOME%\zeppelin-zengine\target\lib"
call "%bin%\functions.cmd" ADDJARINDIR "%ZEPPELIN_HOME%\zeppelin-server\target\lib"
call "%bin%\functions.cmd" ADDJARINDIR "%ZEPPELIN_HOME%\zeppelin-web\target\lib"
if not defined CLASSPATH (
set CLASSPATH=%ZEPPELIN_CLASSPATH%
) else (
set CLASSPATH=%CLASSPATH%;%ZEPPELIN_CLASSPATH%
)
if not defined ZEPPELIN_CLASSPATH_OVERRIDES (
set CLASSPATH=%ZEPPELIN_CLASSPATH%
) else (
set CLASSPATH=%ZEPPELIN_CLASSPATH_OVERRIDES%;%ZEPPELIN_CLASSPATH%
)
if not exist %ZEPPELIN_LOG_DIR% (
echo Log dir doesn't exist, create %ZEPPELIN_LOG_DIR%
mkdir "%ZEPPELIN_LOG_DIR%"
)
if not exist %ZEPPELIN_PID_DIR% (
echo Pid dir doesn't exist, create %ZEPPELIN_PID_DIR%
mkdir "%ZEPPELIN_PID_DIR%"
)
if not exist %ZEPPELIN_NOTEBOOK_DIR% (
echo Notebook dir doesn't exist, create %ZEPPELIN_NOTEBOOK_DIR%
mkdir "%ZEPPELIN_NOTEBOOK_DIR%"
)
"%ZEPPELIN_RUNNER%" %JAVA_OPTS% -cp %CLASSPATH% %ZEPPELIN_SERVER% "%*"

View file

@ -39,6 +39,10 @@ bin=$(cd "${bin}">/dev/null; pwd)
. "${bin}/common.sh"
if [ "$1" == "--version" ] || [ "$1" == "-v" ]; then
getZeppelinVersion
fi
HOSTNAME=$(hostname)
ZEPPELIN_LOGFILE="${ZEPPELIN_LOG_DIR}/zeppelin-${ZEPPELIN_IDENT_STRING}-${HOSTNAME}.log"
LOG="${ZEPPELIN_LOG_DIR}/zeppelin-cli-${ZEPPELIN_IDENT_STRING}-${HOSTNAME}.out"

View file

@ -29,11 +29,12 @@ user3 = password4, role2
#ldapRealm.userDnTemplate = cn={0},cn=engg,ou=testdomain,dc=testdomain,dc=com
#ldapRealm.contextFactory.url = ldap://ldaphost:389
#ldapRealm.contextFactory.authenticationMechanism = SIMPLE
shiro.loginUrl = /api/login
[urls]
# anon means the access is anonymous.
# authcBasic means Basic Auth Security
# To enfore security, comment the line below and uncomment the next one
/api/version = anon
/** = anon
#/** = authcBasic
#/** = authc

View file

@ -0,0 +1,64 @@
@echo off
REM Licensed to the Apache Software Foundation (ASF) under one or more
REM contributor license agreements. See the NOTICE file distributed with
REM this work for additional information regarding copyright ownership.
REM The ASF licenses this file to You under the Apache License, Version 2.0
REM (the "License"); you may not use this file except in compliance with
REM the License. You may obtain a copy of the License at
REM
REM http://www.apache.org/licenses/LICENSE-2.0
REM
REM Unless required by applicable law or agreed to in writing, software
REM distributed under the License is distributed on an "AS IS" BASIS,
REM WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
REM See the License for the specific language governing permissions and
REM limitations under the License.
REM
REM set JAVA_HOME=
REM set MASTER= REM Spark master url. eg. spark://master_addr:7077. Leave empty if you want to use local mode.
REM set ZEPPELIN_JAVA_OPTS REM Additional jvm options. for example, set ZEPPELIN_JAVA_OPTS="-Dspark.executor.memory=8g -Dspark.cores.max=16"
REM set ZEPPELIN_MEM REM Zeppelin jvm mem options Default -Xmx1024m -XX:MaxPermSize=512m
REM set ZEPPELIN_INTP_MEM REM zeppelin interpreter process jvm mem options. Default = ZEPPELIN_MEM
REM set ZEPPELIN_INTP_JAVA_OPTS REM zeppelin interpreter process jvm options. Default = ZEPPELIN_JAVA_OPTS
REM set ZEPPELIN_LOG_DIR REM Where log files are stored. PWD by default.
REM set ZEPPELIN_PID_DIR REM The pid files are stored. /tmp by default.
REM set ZEPPELIN_WAR_TEMPDIR REM The location of jetty temporary directory.
REM set ZEPPELIN_NOTEBOOK_DIR REM Where notebook saved
REM set ZEPPELIN_NOTEBOOK_HOMESCREEN REM Id of notebook to be displayed in homescreen. ex) 2A94M5J1Z
REM set ZEPPELIN_NOTEBOOK_HOMESCREEN_HIDE REM hide homescreen notebook from list when this value set to "true". default "false"
REM set ZEPPELIN_NOTEBOOK_S3_BUCKET REM Bucket where notebook saved
REM set ZEPPELIN_NOTEBOOK_S3_USER REM User in bucket where notebook saved. For example bucket/user/notebook/2A94M5J1Z/note.json
REM set ZEPPELIN_IDENT_STRING REM A string representing this instance of zeppelin. $USER by default.
REM set ZEPPELIN_NICENESS REM The scheduling priority for daemons. Defaults to 0.
REM set ZEPPELIN_INTERPRETER_LOCALREPO REM Local repository for interpreter's additional dependency loading
REM Spark interpreter configuration
REM Use provided spark installation
REM defining SPARK_HOME makes Zeppelin run spark interpreter process using spark-submit
REM
REM set SPARK_HOME REM (required) When it is defined, load it instead of Zeppelin embedded Spark libraries
REM set SPARK_SUBMIT_OPTIONS REM (optional) extra options to pass to spark submit. eg) "--driver-memory 512M --executor-memory 1G".
REM set SPARK_APP_NAME REM (optional) The name of spark application.
REM Use embedded spark binaries
REM without SPARK_HOME defined, Zeppelin still able to run spark interpreter process using embedded spark binaries.
REM however, it is not encouraged when you can define SPARK_HOME
REM
REM Options read in YARN client mode
REM set HADOOP_CONF_DIR REM yarn-site.xml is located in configuration directory in HADOOP_CONF_DIR.
REM Pyspark (supported with Spark 1.2.1 and above)
REM To configure pyspark, you need to set spark distribution's path to 'spark.home' property in Interpreter setting screen in Zeppelin GUI
REM set PYSPARK_PYTHON REM path to the python command. must be the same path on the driver(Zeppelin) and all workers.
REM set PYTHONPATH
REM Spark interpreter options
REM
REM set ZEPPELIN_SPARK_USEHIVECONTEXT REM Use HiveContext instead of SQLContext if set true. true by default.
REM set ZEPPELIN_SPARK_CONCURRENTSQL REM Execute multiple SQL concurrently if set true. false by default.
REM set ZEPPELIN_SPARK_MAXRESULT REM Max number of SparkSQL result to display. 1000 by default.

View file

@ -29,8 +29,9 @@
# export ZEPPELIN_NOTEBOOK_DIR # Where notebook saved
# export ZEPPELIN_NOTEBOOK_HOMESCREEN # Id of notebook to be displayed in homescreen. ex) 2A94M5J1Z
# export ZEPPELIN_NOTEBOOK_HOMESCREEN_HIDE # hide homescreen notebook from list when this value set to "true". default "false"
# export ZEPPELIN_NOTEBOOK_S3_BUCKET # Bucket where notebook saved
# export ZEPPELIN_NOTEBOOK_S3_USER # User in bucket where notebook saved. For example bucket/user/notebook/2A94M5J1Z/note.json
# export ZEPPELIN_NOTEBOOK_S3_BUCKET # Bucket where notebook saved
# export ZEPPELIN_NOTEBOOK_S3_ENDPOINT # Endpoint of the bucket
# export ZEPPELIN_NOTEBOOK_S3_USER # User in bucket where notebook saved. For example bucket/user/notebook/2A94M5J1Z/note.json
# export ZEPPELIN_IDENT_STRING # A string representing this instance of zeppelin. $USER by default.
# export ZEPPELIN_NICENESS # The scheduling priority for daemons. Defaults to 0.
# export ZEPPELIN_INTERPRETER_LOCALREPO # Local repository for interpreter's additional dependency loading
@ -61,4 +62,11 @@
# export ZEPPELIN_SPARK_USEHIVECONTEXT # Use HiveContext instead of SQLContext if set true. true by default.
# export ZEPPELIN_SPARK_CONCURRENTSQL # Execute multiple SQL concurrently if set true. false by default.
# export ZEPPELIN_SPARK_MAXRESULT # Max number of SparkSQL result to display. 1000 by default.
# export ZEPPELIN_WEBSOCKET_MAX_TEXT_MESSAGE_SIZE # Size in characters of the maximum text message to be received by websocket. Defaults to 1024000
#### HBase interpreter configuration ####
## To connect to HBase running on a cluster, either HBASE_HOME or HBASE_CONF_DIR must be set
# export HBASE_HOME= # (require) Under which HBase scripts and configuration should be
# export HBASE_CONF_DIR= # (optional) Alternatively, configuration directory can be set to point to the directory that has hbase-site.xml

View file

@ -76,6 +76,12 @@
<description>bucket name for notebook storage</description>
</property>
<property>
<name>zeppelin.notebook.s3.endpoint</name>
<value>s3.amazonaws.com</value>
<description>endpoint for s3 bucket</description>
</property>
<property>
<name>zeppelin.notebook.storage</name>
<value>org.apache.zeppelin.notebook.repo.S3NotebookRepo</value>
@ -138,7 +144,7 @@
<property>
<name>zeppelin.interpreters</name>
<value>org.apache.zeppelin.spark.SparkInterpreter,org.apache.zeppelin.spark.PySparkInterpreter,org.apache.zeppelin.spark.SparkSqlInterpreter,org.apache.zeppelin.spark.DepInterpreter,org.apache.zeppelin.markdown.Markdown,org.apache.zeppelin.angular.AngularInterpreter,org.apache.zeppelin.shell.ShellInterpreter,org.apache.zeppelin.hive.HiveInterpreter,org.apache.zeppelin.tajo.TajoInterpreter,org.apache.zeppelin.file.HDFSFileInterpreter,org.apache.zeppelin.flink.FlinkInterpreter,org.apache.zeppelin.lens.LensInterpreter,org.apache.zeppelin.ignite.IgniteInterpreter,org.apache.zeppelin.ignite.IgniteSqlInterpreter,org.apache.zeppelin.cassandra.CassandraInterpreter,org.apache.zeppelin.geode.GeodeOqlInterpreter,org.apache.zeppelin.postgresql.PostgreSqlInterpreter,org.apache.zeppelin.jdbc.JDBCInterpreter,org.apache.zeppelin.phoenix.PhoenixInterpreter,org.apache.zeppelin.kylin.KylinInterpreter,org.apache.zeppelin.elasticsearch.ElasticsearchInterpreter,org.apache.zeppelin.scalding.ScaldingInterpreter,org.apache.zeppelin.alluxio.AlluxioInterpreter,org.apache.zeppelin.hbase.HbaseInterpreter</value>
<value>org.apache.zeppelin.spark.SparkInterpreter,org.apache.zeppelin.spark.PySparkInterpreter,org.apache.zeppelin.rinterpreter.RRepl,org.apache.zeppelin.rinterpreter.KnitR,org.apache.zeppelin.spark.SparkRInterpreter,org.apache.zeppelin.spark.SparkSqlInterpreter,org.apache.zeppelin.spark.DepInterpreter,org.apache.zeppelin.markdown.Markdown,org.apache.zeppelin.angular.AngularInterpreter,org.apache.zeppelin.shell.ShellInterpreter,org.apache.zeppelin.hive.HiveInterpreter,org.apache.zeppelin.tajo.TajoInterpreter,org.apache.zeppelin.file.HDFSFileInterpreter,org.apache.zeppelin.flink.FlinkInterpreter,org.apache.zeppelin.lens.LensInterpreter,org.apache.zeppelin.ignite.IgniteInterpreter,org.apache.zeppelin.ignite.IgniteSqlInterpreter,org.apache.zeppelin.cassandra.CassandraInterpreter,org.apache.zeppelin.geode.GeodeOqlInterpreter,org.apache.zeppelin.postgresql.PostgreSqlInterpreter,org.apache.zeppelin.jdbc.JDBCInterpreter,org.apache.zeppelin.phoenix.PhoenixInterpreter,org.apache.zeppelin.kylin.KylinInterpreter,org.apache.zeppelin.elasticsearch.ElasticsearchInterpreter,org.apache.zeppelin.scalding.ScaldingInterpreter,org.apache.zeppelin.alluxio.AlluxioInterpreter,org.apache.zeppelin.hbase.HbaseInterpreter</value>
<description>Comma separated interpreter configurations. First interpreter become a default</description>
</property>
@ -219,5 +225,11 @@
<description>Anonymous user allowed by default</description>
</property>
<property>
<name>zeppelin.websocket.max.text.message.size</name>
<value>1024000</value>
<description>Size in characters of the maximum text message to be received by websocket. Defaults to 1024000</description>
</property>
</configuration>

View file

@ -41,6 +41,7 @@
<ul class="dropdown-menu">
<li><a href="{{BASE_PATH}}/manual/interpreters.html">Overview</a></li>
<li role="separator" class="divider"></li>
<li><a href="{{BASE_PATH}}/interpreter/alluxio.html">Alluxio</a></li>
<li><a href="{{BASE_PATH}}/interpreter/cassandra.html">Cassandra</a></li>
<li><a href="{{BASE_PATH}}/interpreter/elasticsearch.html">Elasticsearch</a></li>
<li><a href="{{BASE_PATH}}/interpreter/flink.html">Flink</a></li>
@ -53,10 +54,10 @@
<li><a href="{{BASE_PATH}}/interpreter/lens.html">Lens</a></li>
<li><a href="{{BASE_PATH}}/interpreter/markdown.html">Markdown</a></li>
<li><a href="{{BASE_PATH}}/interpreter/postgresql.html">Postgresql, hawq</a></li>
<li><a href="{{BASE_PATH}}/interpreter/R.html">R</a></li>
<li><a href="{{BASE_PATH}}/interpreter/scalding.html">Scalding</a></li>
<li><a href="{{BASE_PATH}}/pleasecontribute.html">Shell</a></li>
<li><a href="{{BASE_PATH}}/interpreter/spark.html">Spark</a></li>
<li><a href="{{BASE_PATH}}/interpreter/alluxio.html">Alluxio</a></li>
<li><a href="{{BASE_PATH}}/pleasecontribute.html">Tajo</a></li>
<li role="separator" class="divider"></li>
<li><a href="{{BASE_PATH}}/manual/dynamicinterpreterload.html">Dynamic Interpreter Loading</a></li>

View file

@ -45,7 +45,7 @@ If you don't have requirements prepared, please check instructions in [README.md
<a name="zeppelin-configuration"> </a>
## Zeppelin Configuration
You can configure Zeppelin with both **environment variables** in `conf/zeppelin-env.sh` and **java properties** in `conf/zeppelin-site.xml`. If both are defined, then the **environment variables** will be used priorly.
You can configure Zeppelin with both **environment variables** in `conf/zeppelin-env.sh` (`conf\zeppelin-env.cmd` for Windows) and **Java properties** in `conf/zeppelin-site.xml`. If both are defined, then the **environment variables** will take priority.
<table class="table-configuration">
<tr>
@ -186,6 +186,12 @@ You can configure Zeppelin with both **environment variables** in `conf/zeppelin
<td>user</td>
<td>A user name of S3 bucket<br />i.e. <code>bucket/user/notebook/2A94M5J1Z/note.json</code></td>
</tr>
<tr>
<td>ZEPPELIN_NOTEBOOK_S3_ENDPOINT</td>
<td>zeppelin.notebook.s3.endpoint</td>
<td>s3.amazonaws.com</td>
<td>Endpoint for the bucket</td>
</tr>
<tr>
<td>ZEPPELIN_NOTEBOOK_AZURE_CONNECTION_STRING</td>
<td>zeppelin.notebook.azure.connectionString</td>
@ -225,10 +231,16 @@ You can configure Zeppelin with both **environment variables** in `conf/zeppelin
<td>interpreter</td>
<td>Zeppelin interpreter directory</td>
</tr>
<tr>
<td>ZEPPELIN_WEBSOCKET_MAX_TEXT_MESSAGE_SIZE</td>
<td>zeppelin.websocket.max.text.message.size</td>
<td>1024000</td>
<td>Size in characters of the maximum text message to be received by websocket.</td>
</tr>
</table>
Maybe you need to configure individual interpreter. If so, please check **Interpreter** section in Zeppelin documentation.
[Spark Interpreter for Apache Zeppelin](../interpreter/spark.html) will be a good example.
[Spark Interpreter for Apache Zeppelin](../interpreter/spark.html) will be a good example.
## Zeppelin Start / Stop
#### Start Zeppelin
@ -248,9 +260,9 @@ bin/zeppelin-daemon.sh stop
Zeppelin can auto start as a service with an init script, such as services managed by upstart.
The following is an example upstart script to be saved as `/etc/init/zeppelin.conf`
The following is an example upstart script to be saved as `/etc/init/zeppelin.conf`
This example has been tested with Ubuntu Linux.
This also allows the service to be managed with commands such as
This also allows the service to be managed with commands such as
`sudo service zeppelin start`
`sudo service zeppelin stop`
@ -278,4 +290,9 @@ chdir /usr/share/zeppelin
exec bin/zeppelin-daemon.sh upstart
```
#### Running on Windows
```
bin\zeppelin.cmd
```

View file

@ -10,7 +10,18 @@ group: manual
[HBase Shell](http://hbase.apache.org/book.html#shell) is a JRuby IRB client for Apache HBase. This interpreter provides all capabilities of Apache HBase shell within Apache Zeppelin. The interpreter assumes that Apache HBase client software has been installed and it can connect to the Apache HBase cluster from the machine on where Apache Zeppelin is installed.
To get start with HBase, please see [HBase Quickstart](https://hbase.apache.org/book.html#quickstart)
> Note: currently only HBase 1.0.x releases are supported.
## HBase release supported
By default, Zeppelin is built against HBase 1.0.x releases. To work with HBase 1.1.x releases, use the following build command:
```bash
# HBase 1.1.4
mvn clean package -DskipTests -Phadoop-2.6 -Dhadoop.version=2.6.0 -P build-distr -Dhbase.hbase.version=1.1.4 -Dhbase.hadoop.version=2.6.0
```
To work with HBase 1.2.0+, use the following build command:
```bash
# HBase 1.2.0
mvn clean package -DskipTests -Phadoop-2.6 -Dhadoop.version=2.6.0 -P build-distr -Dhbase.hbase.version=1.2.0 -Dhbase.hadoop.version=2.6.0
```
## Configuration
@ -23,7 +34,7 @@ To get start with HBase, please see [HBase Quickstart](https://hbase.apache.org/
<tr>
<td>hbase.home</td>
<td>/usr/lib/hbase</td>
<td>Installation directory of Hbase</td>
<td>Installation directory of HBase, defaults to HBASE_HOME in environment</td>
</tr>
<tr>
<td>hbase.ruby.sources</td>
@ -31,12 +42,31 @@ To get start with HBase, please see [HBase Quickstart](https://hbase.apache.org/
<td>Path to Ruby scripts relative to 'hbase.home'</td>
</tr>
<tr>
<td>hbase.test.mode</td>
<td>zeppelin.hbase.test.mode</td>
<td>false</td>
<td>Disable checks for unit and manual tests</td>
</tr>
</table>
If you want to connect to HBase running on a cluster, you'll need to follow the next step.
### Export HBASE_HOME
In **conf/zeppelin-env.sh**, export `HBASE_HOME` environment variable with your HBase installation path. This ensures `hbase-site.xml` can be loaded.
for example
```bash
export HBASE_HOME=/usr/lib/hbase
```
or, when running with CDH
```bash
export HBASE_HOME="/opt/cloudera/parcels/CDH/lib/hbase"
```
You can optionally export `HBASE_CONF_DIR` instead of `HBASE_HOME` should you have custom HBase configurations.
## Enabling the HBase Shell Interpreter
In a notebook, to enable the **HBase Shell** interpreter, click the **Gear** icon and select **HBase Shell**.

138
docs/interpreter/r.md Normal file
View file

@ -0,0 +1,138 @@
---
layout: page
title: "R Interpreter"
description: ""
group: manual
---
{% include JB/setup %}
## R Interpreter
This is a the Apache (incubating) Zeppelin project, with the addition of support for the R programming language and R-spark integration.
### Requirements
Additional requirements for the R interpreter are:
* R 3.1 or later (earlier versions may work, but have not been tested)
* The `evaluate` R package.
For full R support, you will also need the following R packages:
* `knitr`
* `repr` -- available with `devtools::install_github("IRkernel/repr")`
* `htmltools` -- required for some interactive plotting
* `base64enc` -- required to view R base plots
### Configuration
To run Zeppelin with the R Interpreter, the SPARK_HOME environment variable must be set. The best way to do this is by editing `conf/zeppelin-env.sh`.
If it is not set, the R Interpreter will not be able to interface with Spark.
You should also copy `conf/zeppelin-site.xml.template` to `conf/zeppelin-site.xml`. That will ensure that Zeppelin sees the R Interpreter the first time it starts up.
### Using the R Interpreter
By default, the R Interpreter appears as two Zeppelin Interpreters, `%r` and `%knitr`.
`%r` will behave like an ordinary REPL. You can execute commands as in the CLI.
[![2+2](screenshots/repl2plus2.png)](screenshots/repl2plus2.png)
R base plotting is fully supported
[![replhist](screenshots/replhist.png)](screenshots/replhist.png)
If you return a data.frame, Zeppelin will attempt to display it using Zeppelin's built-in visualizations.
[![replhist](screenshots/replhead.png)](screenshots/replhead.png)
`%knitr` interfaces directly against `knitr`, with chunk options on the first line:
[![knitgeo](screenshots/knitgeo.png)](screenshots/knitgeo.png)
[![knitstock](screenshots/knitstock.png)](screenshots/knitstock.png)
[![knitmotion](screenshots/knitmotion.png)](screenshots/knitmotion.png)
The two interpreters share the same environment. If you define a variable from `%r`, it will be within-scope if you then make a call using `knitr`.
### Using SparkR & Moving Between Languages
If `SPARK_HOME` is set, the `SparkR` package will be loaded automatically:
[![sparkrfaithful](screenshots/sparkrfaithful.png)](screenshots/sparkrfaithful.png)
The Spark Context and SQL Context are created and injected into the local environment automatically as `sc` and `sql`.
The same context are shared with the `%spark`, `%sql` and `%pyspark` interpreters:
[![backtoscala](screenshots/backtoscala.png)](screenshots/backtoscala.png)
You can also make an ordinary R variable accessible in scala and Python:
[![varr1](screenshots/varr1.png)](screenshots/varr1.png)
And vice versa:
[![varscala](screenshots/varscala.png)](screenshots/varscala.png)
[![varr2](screenshots/varr2.png)](screenshots/varr2.png)
### Caveats & Troubleshooting
* Almost all issues with the R interpreter turned out to be caused by an incorrectly set `SPARK_HOME`. The R interpreter must load a version of the `SparkR` package that matches the running version of Spark, and it does this by searching `SPARK_HOME`. If Zeppelin isn't configured to interface with Spark in `SPARK_HOME`, the R interpreter will not be able to connect to Spark.
* The `knitr` environment is persistent. If you run a chunk from Zeppelin that changes a variable, then run the same chunk again, the variable has already been changed. Use immutable variables.
* (Note that `%spark.r` and `$r` are two different ways of calling the same interpreter, as are `%spark.knitr` and `%knitr`. By default, Zeppelin puts the R interpreters in the `%spark.` Interpreter Group.
* Using the `%r` interpreter, if you return a data.frame, HTML, or an image, it will dominate the result. So if you execute three commands, and one is `hist()`, all you will see is the histogram, not the results of the other commands. This is a Zeppelin limitation.
* If you return a data.frame (for instance, from calling `head()`) from the `%spark.r` interpreter, it will be parsed by Zeppelin's built-in data visualization system.
* Why `knitr` Instead of `rmarkdown`? Why no `htmlwidgets`? In order to support `htmlwidgets`, which has indirect dependencies, `rmarkdown` uses `pandoc`, which requires writing to and reading from disc. This makes it many times slower than `knitr`, which can operate entirely in RAM.
* Why no `ggvis` or `shiny`? Supporting `shiny` would require integrating a reverse-proxy into Zeppelin, which is a task.
* Max OS X & case-insensitive filesystem. If you try to install on a case-insensitive filesystem, which is the Mac OS X default, maven can unintentionally delete the install directory because `r` and `R` become the same subdirectory.
* Error `unable to start device X11` with the repl interpreter. Check your shell login scripts to see if they are adjusting the `DISPLAY` environment variable. This is common on some operating systems as a workaround for ssh issues, but can interfere with R plotting.
* akka Library Version or `TTransport` errors. This can happen if you try to run Zeppelin with a SPARK_HOME that has a version of Spark other than the one specified with `-Pspark-1.x` when Zeppelin was compiled.
## R Interpreter for Apache Zeppelin
[R](https://www.r-project.org) is a free software environment for statistical computing and graphics.
To run R code and visualize plots in Apache Zeppelin, you will need R on your master node (or your dev laptop).
+ For Centos: `yum install R R-devel libcurl-devel openssl-devel`
+ For Ubuntu: `apt-get install r-base`
Validate your installation with a simple R command:
```
R -e "print(1+1)"
```
To enjoy plots, install additional libraries with:
```
+ devtools with `R -e "install.packages('devtools', repos = 'http://cran.us.r-project.org')"`
+ knitr with `R -e "install.packages('knitr', repos = 'http://cran.us.r-project.org')"`
+ ggplot2 with `R -e "install.packages('ggplot2', repos = 'http://cran.us.r-project.org')"`
+ Other vizualisation librairies: `R -e "install.packages(c('devtools','mplot', 'googleVis'), repos = 'http://cran.us.r-project.org'); require(devtools); install_github('ramnathv/rCharts')"`
```
We recommend you to also install the following optional R libraries for happy data analytics:
+ glmnet
+ pROC
+ data.table
+ caret
+ sqldf
+ wordcloud

Binary file not shown.

After

Width:  |  Height:  |  Size: 36 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 58 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 33 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 106 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 42 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 31 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 51 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

View file

@ -126,6 +126,8 @@ export HADOOP_CONF_DIR=/usr/lib/hadoop
export SPARK_SUBMIT_OPTIONS="--packages com.databricks:spark-csv_2.10:1.2.0"
```
For Windows, ensure you have `winutils.exe` in `%HADOOP_HOME%\bin`. For more details please see [Problems running Hadoop on Windows](https://wiki.apache.org/hadoop/WindowsProblems)
### 2. Set master in Interpreter menu
After start Zeppelin, go to **Interpreter** menu and edit **master** property in your Spark interpreter setting. The value may vary depending on your Spark cluster deployment type.

View file

@ -79,7 +79,7 @@ limitations under the License.
"zeppelin.server.context.path":"/",
"zeppelin.ssl.keystore.type":"JKS",
"zeppelin.ssl.truststore.path":"truststore",
"zeppelin.interpreters":"org.apache.zeppelin.spark.SparkInterpreter,org.apache.zeppelin.spark.PySparkInterpreter,org.apache.zeppelin.spark.SparkSqlInterpreter,org.apache.zeppelin.spark.DepInterpreter,org.apache.zeppelin.markdown.Markdown,org.apache.zeppelin.angular.AngularInterpreter,org.apache.zeppelin.shell.ShellInterpreter,org.apache.zeppelin.hive.HiveInterpreter,org.apache.zeppelin.tajo.TajoInterpreter,org.apache.zeppelin.flink.FlinkInterpreter,org.apache.zeppelin.lens.LensInterpreter,org.apache.zeppelin.ignite.IgniteInterpreter,org.apache.zeppelin.ignite.IgniteSqlInterpreter,org.apache.zeppelin.cassandra.CassandraInterpreter,org.apache.zeppelin.geode.GeodeOqlInterpreter,org.apache.zeppelin.postgresql.PostgreSqlInterpreter,org.apache.zeppelin.phoenix.PhoenixInterpreter,org.apache.zeppelin.kylin.KylinInterpreter,org.apache.zeppelin.elasticsearch.ElasticsearchInterpreter,org.apache.zeppelin.scalding.ScaldingInterpreter",
"zeppelin.interpreters":"org.apache.zeppelin.spark.SparkInterpreter,org.apache.zeppelin.spark.PySparkInterpreter,org.apache.zeppelin.spark.SparkRInterpreter,org.apache.zeppelin.spark.SparkSqlInterpreter,org.apache.zeppelin.spark.DepInterpreter,org.apache.zeppelin.markdown.Markdown,org.apache.zeppelin.angular.AngularInterpreter,org.apache.zeppelin.shell.ShellInterpreter,org.apache.zeppelin.hive.HiveInterpreter,org.apache.zeppelin.tajo.TajoInterpreter,org.apache.zeppelin.flink.FlinkInterpreter,org.apache.zeppelin.lens.LensInterpreter,org.apache.zeppelin.ignite.IgniteInterpreter,org.apache.zeppelin.ignite.IgniteSqlInterpreter,org.apache.zeppelin.cassandra.CassandraInterpreter,org.apache.zeppelin.geode.GeodeOqlInterpreter,org.apache.zeppelin.postgresql.PostgreSqlInterpreter,org.apache.zeppelin.phoenix.PhoenixInterpreter,org.apache.zeppelin.kylin.KylinInterpreter,org.apache.zeppelin.elasticsearch.ElasticsearchInterpreter,org.apache.zeppelin.scalding.ScaldingInterpreter",
"zeppelin.ssl":"false",
"zeppelin.notebook.autoInterpreterBinding":"true",
"zeppelin.notebook.homescreen":"",

View file

@ -29,7 +29,7 @@
<artifactId>zeppelin-file</artifactId>
<packaging>jar</packaging>
<version>0.6.0-incubating-SNAPSHOT</version>
<name>Zeppelin File System Interpreters</name>
<name>Zeppelin: File System Interpreters</name>
<url>http://www.apache.org</url>
<dependencies>

View file

@ -35,7 +35,7 @@
<url>http://zeppelin.incubator.apache.org</url>
<properties>
<flink.version>0.10.0</flink.version>
<flink.version>1.0.0</flink.version>
<flink.akka.version>2.3.7</flink.akka.version>
<flink.scala.binary.version>2.10</flink.scala.binary.version>
<flink.scala.version>2.10.4</flink.scala.version>
@ -73,25 +73,25 @@
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients</artifactId>
<artifactId>flink-clients_${flink.scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-runtime</artifactId>
<artifactId>flink-runtime_${flink.scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-scala</artifactId>
<artifactId>flink-scala_${flink.scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-scala-shell</artifactId>
<artifactId>flink-scala-shell_${flink.scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>

View file

@ -249,12 +249,34 @@ public class FlinkInterpreter extends Interpreter {
Code r = null;
String incomplete = "";
boolean inComment = false;
for (int l = 0; l < linesToRun.length; l++) {
final String s = linesToRun[l];
// check if next line starts with "." (but not ".." or "./") it is treated as an invocation
if (l + 1 < linesToRun.length) {
String nextLine = linesToRun[l + 1].trim();
if (nextLine.startsWith(".") && !nextLine.startsWith("..") && !nextLine.startsWith("./")) {
boolean continuation = false;
if (nextLine.isEmpty()
|| nextLine.startsWith("//") // skip empty line or comment
|| nextLine.startsWith("}")
|| nextLine.startsWith("object")) { // include "} object" for Scala companion object
continuation = true;
} else if (!inComment && nextLine.startsWith("/*")) {
inComment = true;
continuation = true;
} else if (inComment && nextLine.lastIndexOf("*/") >= 0) {
inComment = false;
continuation = true;
} else if (nextLine.length() > 1
&& nextLine.charAt(0) == '.'
&& nextLine.charAt(1) != '.' // ".."
&& nextLine.charAt(1) != '/') { // "./"
continuation = true;
} else if (inComment) {
continuation = true;
}
if (continuation) {
incomplete += s + "\n";
continue;
}

View file

@ -49,6 +49,22 @@ public class FlinkInterpreterTest {
flink.destroy();
}
@Test
public void testNextLineInvocation() {
assertEquals(InterpreterResult.Code.SUCCESS, flink.interpret("\"123\"\n.toInt", context).code());
}
@Test
public void testNextLineComments() {
assertEquals(InterpreterResult.Code.SUCCESS, flink.interpret("\"123\"\n/*comment here\n*/.toInt", context).code());
}
@Test
public void testNextLineCompanionObject() {
String code = "class Counter {\nvar value: Long = 0\n}\n // comment\n\n object Counter {\n def apply(x: Long) = new Counter()\n}";
assertEquals(InterpreterResult.Code.SUCCESS, flink.interpret(code, context).code());
}
@Test
public void testSimpleStatement() {
InterpreterResult result = flink.interpret("val a=1", context);
@ -63,12 +79,6 @@ public class FlinkInterpreterTest {
assertEquals("1", result.message());
}
@Test
public void testNextlineInvoke() {
InterpreterResult result = flink.interpret("\"123\"\n .toInt", context);
assertEquals("res0: Int = 123\n", result.message());
}
@Test
public void testWordCount() {
flink.interpret("val text = env.fromElements(\"To be or not to be\")", context);

View file

@ -37,21 +37,20 @@ import java.util.List;
import java.util.Properties;
/**
* Support for Hbase Shell. All the commands documented here
* Support for HBase Shell. All the commands documented here
* http://hbase.apache.org/book.html#shell is supported.
*
* Requirements:
* Hbase Shell should be installed on the same machine. To be more specific, the following dir.
* HBase Shell should be installed on the same machine. To be more specific, the following dir.
* should be available: https://github.com/apache/hbase/tree/master/hbase-shell/src/main/ruby
* Hbase Shell should be able to connect to the Hbase cluster from terminal. This makes sure
* HBase Shell should be able to connect to the HBase cluster from terminal. This makes sure
* that the client is configured properly.
*
* The interpreter takes 3 config parameters:
* hbase.home: Root dir. where hbase is installed. Default is /usr/lib/hbase/
* hbase.home: Root directory where HBase is installed. Default is /usr/lib/hbase/
* hbase.ruby.sources: Dir where shell ruby code is installed.
* Path is relative to hbase.home. Default: lib/ruby
* hbase.irb.load: (Testing only) Default is true.
* Whether to load irb in the interpreter.
* zeppelin.hbase.test.mode: (Testing only) Disable checks for unit and manual tests. Default: false
*/
public class HbaseInterpreter extends Interpreter {
private Logger logger = LoggerFactory.getLogger(HbaseInterpreter.class);
@ -62,11 +61,13 @@ public class HbaseInterpreter extends Interpreter {
static {
Interpreter.register("hbase", "hbase", HbaseInterpreter.class.getName(),
new InterpreterPropertyBuilder()
.add("hbase.home", "/usr/lib/hbase/", "Installation dir. of Hbase")
.add("hbase.home",
getSystemDefault("HBASE_HOME", "hbase.home", "/usr/lib/hbase/"),
"Installation directory of HBase")
.add("hbase.ruby.sources", "lib/ruby",
"Path to Ruby scripts relative to 'hbase.home'")
.add("hbase.test.mode", "false", "Disable checks for unit and manual tests")
.build());
.add("zeppelin.hbase.test.mode", "false", "Disable checks for unit and manual tests")
.build());
}
public HbaseInterpreter(Properties property) {
@ -79,7 +80,7 @@ public class HbaseInterpreter extends Interpreter {
this.writer = new StringWriter();
scriptingContainer.setOutput(this.writer);
if (!Boolean.parseBoolean(getProperty("hbase.test.mode"))) {
if (!Boolean.parseBoolean(getProperty("zeppelin.hbase.test.mode"))) {
String hbase_home = getProperty("hbase.home");
String ruby_src = getProperty("hbase.ruby.sources");
Path abs_ruby_src = Paths.get(hbase_home, ruby_src).toAbsolutePath();
@ -89,7 +90,7 @@ public class HbaseInterpreter extends Interpreter {
File f = abs_ruby_src.toFile();
if (!f.exists() || !f.isDirectory()) {
throw new InterpreterException("hbase ruby sources is not available at '" + abs_ruby_src
throw new InterpreterException("HBase ruby sources is not available at '" + abs_ruby_src
+ "'");
}
@ -155,4 +156,24 @@ public class HbaseInterpreter extends Interpreter {
return null;
}
private static String getSystemDefault(
String envName,
String propertyName,
String defaultValue) {
if (envName != null && !envName.isEmpty()) {
String envValue = System.getenv().get(envName);
if (envValue != null) {
return envValue;
}
}
if (propertyName != null && !propertyName.isEmpty()) {
String propValue = System.getProperty(propertyName);
if (propValue != null) {
return propValue;
}
}
return defaultValue;
}
}

View file

@ -40,7 +40,7 @@ public class HbaseInterpreterTest {
Properties properties = new Properties();
properties.put("hbase.home", "");
properties.put("hbase.ruby.sources", "");
properties.put("hbase.test.mode", "true");
properties.put("zeppelin.hbase.test.mode", "true");
hbaseInterpreter = new HbaseInterpreter(properties);
hbaseInterpreter.open();
@ -72,4 +72,4 @@ public class HbaseInterpreterTest {
assertEquals(InterpreterResult.Code.ERROR, result.code());
assertEquals("(NameError) undefined local variable or method `joke' for main:Object", result.message());
}
}
}

View file

@ -0,0 +1,29 @@
Copyright (c) 2013-2015, David B. Dahl, Brigham Young University
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
Neither the name of the <ORGANIZATION> nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View file

@ -0,0 +1,11 @@
Copyright (c) 2002-2016 EPFL
Copyright (c) 2011-2016 Lightbend, Inc. (formerly Typesafe, Inc.)
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
Neither the name of the EPFL nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

1036
notebook/r/note.json Normal file

File diff suppressed because one or more lines are too long

40
pom.xml
View file

@ -17,7 +17,7 @@
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.maven-v4_0_0.xsd">
<!-- Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with this
@ -241,6 +241,7 @@
<groupId>org.apache.rat</groupId>
<artifactId>apache-rat-plugin</artifactId>
</plugin>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.1</version>
@ -249,6 +250,7 @@
<target>1.7</target>
</configuration>
</plugin>
<!-- Test coverage plugin -->
<plugin>
<groupId>org.codehaus.mojo</groupId>
@ -270,6 +272,7 @@
</execution>
</executions>
</plugin>
<!-- Checkstyle plugin -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
@ -300,7 +303,6 @@
<excludes>org/apache/zeppelin/interpreter/thrift/*</excludes>
</configuration>
</execution>
</executions>
</plugin>
@ -457,6 +459,7 @@
<exclude>.github/*</exclude>
<exclude>.gitignore</exclude>
<exclude>.repository/</exclude>
<exclude>.Rhistory</exclude>
<exclude>**/*.diff</exclude>
<exclude>**/*.patch</exclude>
<exclude>**/*.avsc</exclude>
@ -488,6 +491,7 @@
<exclude>conf/notebook-authorization.json</exclude>
<exclude>conf/zeppelin-env.sh</exclude>
<exclude>spark-*-bin*/**</exclude>
<exclude>.spark-dist/**</exclude>
<!-- bundled from bootstrap -->
<exclude>docs/assets/themes/zeppelin/bootstrap/**</exclude>
@ -509,6 +513,7 @@
<exclude>docs/Rakefile</exclude>
<exclude>docs/rss.xml</exclude>
<exclude>docs/sitemap.txt</exclude>
<exclude>**/dependency-reduced-pom.xml</exclude>
<!-- bundled from jekyll -->
<exclude>docs/assets/themes/zeppelin/css/syntax.css</exclude>
@ -516,6 +521,23 @@
<!-- docs (website) build target dir -->
<exclude>docs/_site/**</exclude>
<exclude>docs/Gemfile.lock</exclude>
<!-- compiled R packages (binaries) -->
<exclude>R/lib/**</exclude>
<!--R-related files with alternative licenses-->
<exclude>r/R/rzeppelin/R/globals.R</exclude>
<exclude>r/R/rzeppelin/R/common.R</exclude>
<exclude>r/R/rzeppelin/R/protocol.R</exclude>
<exclude>r/R/rzeppelin/R/rServer.R</exclude>
<exclude>r/R/rzeppelin/R/scalaInterpreter.R</exclude>
<exclude>r/R/rzeppelin/R/zzz.R</exclude>
<exclude>r/src/main/scala/scala/Console.scala</exclude>
<exclude>r/src/main/scala/org/apache/zeppelin/rinterpreter/rscala/Package.scala</exclude>
<exclude>r/src/main/scala/org/apache/zeppelin/rinterpreter/rscala/RClient.scala</exclude>
<!--The following files are mechanical-->
<exclude>r/R/rzeppelin/DESCRIPTION</exclude>
<exclude>r/R/rzeppelin/NAMESPACE</exclude>
</excludes>
</configuration>
@ -640,6 +662,13 @@
</lifecycleMappingMetadata>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-antrun-plugin</artifactId>
<version>1.7</version>
</plugin>
</plugins>
</pluginManagement>
</build>
@ -663,6 +692,13 @@
</modules>
</profile>
<profile>
<id>r</id>
<modules>
<module>r</module>
</modules>
</profile>
<profile>
<id>scalding</id>
<modules>

41
r/R/install-dev.sh Executable file
View file

@ -0,0 +1,41 @@
#!/bin/bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# This scripts packages R files to create a package that can be loaded into R,
# and also installs necessary packages.
set -o pipefail
set -e
set -x
FWDIR="$(cd `dirname $0`; pwd)"
LIB_DIR="$FWDIR/../../R/lib"
mkdir -p $LIB_DIR
pushd $FWDIR > /dev/null
# Generate Rd files if devtools is installed
#Rscript -e ' if("devtools" %in% rownames(installed.packages())) { library(devtools); devtools::document(pkg="./pkg", roclets=c("rd")) }'
# Install SparkR to $LIB_DIR
R CMD INSTALL --library=$LIB_DIR $FWDIR/rzeppelin/
popd > /dev/null
set +x

28
r/R/rzeppelin/DESCRIPTION Normal file
View file

@ -0,0 +1,28 @@
Package: rzeppelin
Type: Package
Title: Interface from scala to R, based on rscala, for the Apache (Incubation) Zeppelin project
Version: 0.1.0
Date: 2015-12-01
Authors@R: c(person(given="David B.",family="Dahl",role=c("aut","cre"),email="dahl@stat.byu.edu"),
person(family="Scala developers",role="ctb",comment="see http://scala-lang.org/"))
URL: http://dahl.byu.edu/software/rscala/
Imports: utils,
evaluate
Suggests:
goolgeVis,
htmltools,
knitr,
rCharts,
repr,
SparkR,
base64enc
SystemRequirements: Scala (>= 2.10)
Description:
License: file LICENSE
NeedsCompilation: no
Packaged: 2015-05-15 13:36:01 UTC; dahl
Author: David B. Dahl [aut, cre],
Scala developers [ctb] (see http://scala-lang.org/)
Maintainer: Amos B. Elberg <amos.elberg@gmail.com>
Repository:
Date/Publication: 2015-12-01 21:50:02

14
r/R/rzeppelin/LICENSE Normal file
View file

@ -0,0 +1,14 @@
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

7
r/R/rzeppelin/NAMESPACE Normal file
View file

@ -0,0 +1,7 @@
import(utils)
export("rzeppelinPackage")
export("progress_zeppelin")
export(.z.put)
export(.z.get)
export(.z.input)

14
r/R/rzeppelin/R/common.R Normal file
View file

@ -0,0 +1,14 @@
strintrplt <- function(snippet,envir=parent.frame()) {
if ( ! is.character(snippet) ) stop("Character vector expected.")
if ( length(snippet) != 1 ) stop("Length of vector must be exactly one.")
m <- regexpr("@\\{([^\\}]+)\\}",snippet)
if ( m != -1 ) {
s1 <- substr(snippet,1,m-1)
s2 <- substr(snippet,m+2,m+attr(m,"match.length")-2)
s3 <- substr(snippet,m+attr(m,"match.length"),nchar(snippet))
strintrplt(paste(s1,paste(toString(eval(parse(text=s2),envir=envir)),collapse=" ",sep=""),s3,sep=""),envir)
} else snippet
}

View file

@ -0,0 +1,3 @@
lEtTeRs <- c(letters,LETTERS)
alphabet <- c(lEtTeRs,0:9)

View file

@ -0,0 +1,35 @@
UNSUPPORTED_TYPE <- 0L
INTEGER <- 1L
DOUBLE <- 2L
BOOLEAN <- 3L
STRING <- 4L
DATE <- 5L
DATETIME <- 6L
UNSUPPORTED_STRUCTURE <- 10L
NULLTYPE <- 11L
REFERENCE <- 12L
ATOMIC <- 13L
VECTOR <- 14L
MATRIX <- 15L
LIST <- 16L
DATAFRAME <- 17L
S3CLASS <- 18L
S4CLASS <- 19L
JOBJ <- 20L
EXIT <- 100L
RESET <- 101L
GC <- 102L
DEBUG <- 103L
EVAL <- 104L
SET <- 105L
SET_SINGLE <- 106L
SET_DOUBLE <- 107L
GET <- 108L
GET_REFERENCE <- 109L
DEF <- 110L
INVOKE <- 111L
SCALAP <- 112L
OK <- 1000L
ERROR <- 1001L
UNDEFINED_IDENTIFIER <- 1002L
CURRENT_SUPPORTED_SCALA_VERSION <- "2.10"

214
r/R/rzeppelin/R/rServer.R Normal file
View file

@ -0,0 +1,214 @@
rServe <- function(sockets) {
cc(sockets)
workspace <- sockets[['workspace']]
debug <- get("debug",envir=sockets[['env']])
while ( TRUE ) {
if ( debug ) cat("R DEBUG: Top of the loop waiting for a command.\n")
cmd <- rb(sockets,integer(0))
if ( cmd == EXIT ) {
if ( debug ) cat("R DEBUG: Got EXIT\n")
return()
} else if ( cmd == DEBUG ) {
if ( debug ) cat("R DEBUG: Got DEBUG\n")
newDebug <- ( rb(sockets,integer(0)) != 0 )
if ( debug != newDebug ) cat("R DEBUG: Debugging is now ",newDebug,"\n",sep="")
debug <- newDebug
assign("debug",debug,envir=sockets[['env']])
} else if ( cmd == EVAL ) {
if ( debug ) cat("R DEBUG: Got EVAL\n")
snippet <- rc(sockets)
output <- capture.output(result <- try(eval(parse(text=snippet),envir=workspace)))
if ( inherits(result,"try-error") ) {
wb(sockets,ERROR)
msg <- paste(c(output,attr(result,"condition")$message),collapse="\n")
wc(sockets,msg)
} else {
wb(sockets,OK)
output <- paste(output,collapse="\n")
wc(sockets,output)
}
assign(".rzeppelin.last.value",result,envir=workspace)
} else if ( cmd %in% c(SET,SET_SINGLE,SET_DOUBLE) ) {
if ( debug ) cat("R DEBUG: Got SET\n")
if ( cmd != SET ) index <- rc(sockets)
identifier <- rc(sockets)
dataStructure <- rb(sockets,integer(0))
if ( dataStructure == NULLTYPE ) {
if ( cmd == SET ) assign(identifier,NULL,envir=workspace)
else subassign(sockets,identifier,index,NULL,cmd==SET_SINGLE)
} else if ( dataStructure == ATOMIC ) {
dataType <- rb(sockets,integer(0))
if ( dataType == INTEGER ) value <- rb(sockets,integer(0))
else if ( dataType == DOUBLE ) value <- rb(sockets,double(0))
else if ( dataType == BOOLEAN ) value <- rb(sockets,integer(0)) != 0
else if ( dataType == STRING ) value <- rc(sockets)
# else if (dataType == DATE) value <- as.Date(rb(sockets,integer(0)), origin=as.Date("1970-01-01"))
else stop(paste("Unknown data type:",dataType))
if ( cmd == SET ) assign(identifier,value,envir=workspace)
else subassign(sockets,identifier,index,value,cmd==SET_SINGLE)
} else if ( dataStructure == VECTOR ) {
dataLength <- rb(sockets,integer(0))
dataType <- rb(sockets,integer(0))
if ( dataType == INTEGER ) value <- rb(sockets,integer(0),n=dataLength)
else if ( dataType == DOUBLE ) value <- rb(sockets,double(0),n=dataLength)
else if ( dataType == BOOLEAN ) value <- rb(sockets,integer(0),n=dataLength) != 0
else if ( dataType == STRING ) value <- sapply(1:dataLength,function(i) rc(sockets))
# else if ( dateType == DATE ) value <- as.Date(rb(sockets,integer(0), n = dataLength), origin=as.Date("1970-01-01"))
else stop(paste("Unknown data type:",dataType))
if ( cmd == SET ) assign(identifier,value,envir=workspace)
else subassign(sockets,identifier,index,value,cmd==SET_SINGLE)
} else if ( dataStructure == MATRIX ) {
dataNRow <- rb(sockets,integer(0))
dataNCol <- rb(sockets,integer(0))
dataLength <- dataNRow * dataNCol
dataType <- rb(sockets,integer(0))
if ( dataType == INTEGER ) value <- matrix(rb(sockets,integer(0),n=dataLength),nrow=dataNRow,byrow=TRUE)
else if ( dataType == DOUBLE ) value <- matrix(rb(sockets,double(0),n=dataLength),nrow=dataNRow,byrow=TRUE)
else if ( dataType == BOOLEAN ) value <- matrix(rb(sockets,integer(0),n=dataLength),nrow=dataNRow,byrow=TRUE) != 0
else if ( dataType == STRING ) value <- matrix(sapply(1:dataLength,function(i) rc(sockets)),nrow=dataNRow,byrow=TRUE)
# else if ( dateType == DATE) value <- matrix(as.Date(rb(sockets,integer(0),n=dataLength),
# origin = as.Date("1970-01-01")),nrow=dataNRow,byrow=TRUE)
else stop(paste("Unknown data type:",dataType))
if ( cmd == SET ) assign(identifier,value,envir=workspace)
else subassign(sockets,identifier,index,value,cmd==SET_SINGLE)
} else if ( dataStructure == REFERENCE ) {
otherIdentifier <- rc(sockets)
if ( exists(otherIdentifier,envir=workspace$.) ) {
wb(sockets,OK)
value <- get(otherIdentifier,envir=workspace$.)
if ( cmd == SET ) assign(identifier,value,envir=workspace)
else subassign(sockets,identifier,index,value,cmd==SET_SINGLE)
} else {
wb(sockets,UNDEFINED_IDENTIFIER)
}
} else stop(paste("Unknown data structure:",dataStructure))
} else if ( cmd == GET ) {
if ( debug ) cat("R DEBUG: Got GET\n")
identifier <- rc(sockets)
value <- tryCatch(get(identifier,envir=workspace),error=function(e) e)
if ( is.null(value) ) {
wb(sockets,NULLTYPE)
} else if ( inherits(value,"error") ) {
wb(sockets,UNDEFINED_IDENTIFIER)
} else if ( ! is.atomic(value) ) {
# This is where code for lists, data.frames, S3, and S4 classes must go
wb(sockets,UNSUPPORTED_STRUCTURE)
} else if ( is.vector(value) ) {
type <- checkType(value)
if ( ( length(value) == 1 ) && ( ! get("length.one.as.vector",envir=sockets[['env']]) ) ) {
wb(sockets,ATOMIC)
} else {
wb(sockets,VECTOR)
wb(sockets,length(value))
}
wb(sockets,type)
if ( type == STRING ) {
if ( length(value) > 0 ) for ( i in 1:length(value) ) wc(sockets,value[i])
} else {
if ( type == BOOLEAN ) wb(sockets,as.integer(value))
# else if (type == DATE) wb(sockets,as.integer(value))
else wb(sockets,value)
}
} else if ( is.matrix(value) ) {
type <- checkType(value)
wb(sockets,MATRIX)
wb(sockets,dim(value))
wb(sockets,type)
if ( nrow(value) > 0 ) for ( i in 1:nrow(value) ) {
if ( type == STRING ) {
if ( ncol(value) > 0 ) for ( j in 1:ncol(value) ) wc(sockets,value[i,j])
}
else if ( type == BOOLEAN ) wb(sockets,as.integer(value[i,]))
# else if (type == DATE) wb(sockets, as.integer(value[i,]))
else wb(sockets,value[i,])
}
} else {
wb(sockets,UNSUPPORTED_STRUCTURE)
}
} else if ( cmd == GET_REFERENCE ) {
if ( debug ) cat("R DEBUG: Got GET_REFERENCE\n")
identifier <- rc(sockets)
value <- tryCatch(get(identifier,envir=workspace),error=function(e) e)
if ( inherits(value,"error") ) {
wb(sockets,UNDEFINED_IDENTIFIER)
} else {
wb(sockets,REFERENCE)
wc(sockets,new.reference(value,workspace$.))
}
} else if ( cmd == GC ) {
if ( debug ) cat("R DEBUG: Got GC\n")
workspace$. <- new.env(parent=workspace)
} else stop(paste("Unknown command:",cmd))
flush(sockets[['socketIn']])
}
}
subassign <- function(sockets,x,i,value,single=TRUE) {
workspace <- sockets[['workspace']]
assign(".rzeppelin.set.value",value,envir=workspace)
brackets <- if ( single ) c("[","]") else c("[[","]]")
output <- capture.output(result <- try(eval(parse(text=paste0(x,brackets[1],i,brackets[2]," <- .rzeppelin.set.value")),envir=workspace)))
if ( inherits(result,"try-error") ) {
wb(sockets,ERROR)
output <- paste(paste(output,collapse="\n"),paste(attr(result,"condition")$message,collapse="\n"),sep="\n")
wc(sockets,output)
} else {
wb(sockets,OK)
}
rm(".reppelin.set.value",envir=workspace)
invisible(value)
}
new.reference <- function(value,envir) {
name <- ""
while ( ( name == "" ) || ( exists(name,envir=envir) ) ) {
name <- paste0(sample(lEtTeRs,1),paste0(sample(alphabet,7,replace=TRUE),collapse=""))
}
assign(name,value,envir=envir)
name
}
newSockets <- function (portsFilename, debug, timeout)
{
getPortNumbers <- function() {
delay <- 0.1
start <- proc.time()[3]
while (TRUE) {
if ((proc.time()[3] - start) > timeout)
stop("Timed out waiting for Scala to start.")
Sys.sleep(delay)
delay <- 1 * delay
if (file.exists(portsFilename)) {
line <- scan(portsFilename, n = 2, what = character(0),
quiet = TRUE)
if (length(line) > 0)
return(as.numeric(line))
}
}
}
ports <- getPortNumbers()
file.remove(portsFilename)
if (debug)
cat("R DEBUG: Trying to connect to port:", paste(ports,
collapse = ","), "\n")
socketConnectionIn <- socketConnection(port = ports[1], blocking = TRUE,
open = "ab", timeout = 2678400)
socketConnectionOut <- socketConnection(port = ports[2],
blocking = TRUE, open = "rb", timeout = 2678400)
functionCache <- new.env()
env <- new.env()
assign("open", TRUE, envir = env)
assign("debug", debug, envir = env)
assign("length.one.as.vector", FALSE, envir = env)
workspace <- new.env()
workspace$. <- new.env(parent = workspace)
result <- list(socketIn = socketConnectionIn, socketOut = socketConnectionOut,
env = env, workspace = workspace, functionCache = functionCache)
class(result) <- "ScalaInterpreter"
status <- rb(result, integer(0))
if ((length(status) == 0) || (status != OK))
stop("Error instantiating interpreter.")
wc(result, toString(packageVersion("rzeppelin")))
flush(result[["socketIn"]])
result
}

View file

@ -0,0 +1,95 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
.zeppenv <- new.env()
.z.ohandler = evaluate:::new_output_handler(
value = function(x) {
if (is.data.frame(x)) return(x)
if ("html" %in% class(x)) return(x)
if (require("htmltools") & require("knitr")) {
if ("htmlwidget" %in% class(x)) {
return(.z.show.htmlwidget(x))
}
}
if (isS4(x)) show(x)
else {
if (require("repr")) {
return(repr:::repr(x))
} else return(x)
}
}
)
# wrapper for evaluate
.z.valuate <- function(input) evaluate:::evaluate(
input = input,
envir =.zeppenv,
debug = FALSE,
output_handler =.z.ohandler,
stop_on_error = 0
)
# converts data.tables to the format needed for display in zeppelin
.z.table <- function(i) {
.zdfoutcon <- textConnection(".zdfout", open="w")
write.table(i,
col.names=TRUE, row.names=FALSE, sep="\t",
eol="\n", quote = FALSE, file = .zdfoutcon)
close(.zdfoutcon)
rm(.zdfoutcon)
.zdfout
}
.z.completion <- function(buf, cursor) {
utils:::.assignLinebuffer(buf)
utils:::.assignEnd(cursor)
utils:::.guessTokenFromLine()
utils:::.completeToken()
utils:::.retrieveCompletions()
}
.z.setProgress <- function(progress) SparkR:::callJMethod(.rContext, "setProgress", progress %% 100)
.z.incrementProgress <- function(increment = 1) SparkR:::callJMethod(.rContext, "incrementProgress", increment)
.z.input <- function(name) SparkR:::callJMethod(.zeppelinContext, "input", name)
.z.get <- function(name) {
isRDD <- SparkR:::callJStatic("org.apache.zeppelin.rinterpreter.RStatics", "testRDD", name)
obj <- SparkR:::callJStatic("org.apache.zeppelin.rinterpreter.RStatics", "getZ", name)
if (isRDD) SparkR:::RDD(obj)
else obj
}
.z.put <- function(name, object) {
if ("RDD" %in% class(object)) object <- SparkR:::getJRDD(object)
SparkR:::callJStatic("org.apache.zeppelin.rinterpreter.RStatics", "putZ", name, object)
}
.z.repr <- function(x) {
if (require(repr)) repr:::repr(x)
else toString(x)
}
progress_zeppelin <- function(...) {
list(init = function(x) .z.setProgress(0),
step = function() .z.incrementProgress,
term = function() {})
}

View file

@ -0,0 +1,123 @@
rzeppelinPackage <- function(pkgname) {
environmentOfDependingPackage <- parent.env(parent.frame())
E <- new.env(parent=environmentOfDependingPackage)
E$initialized <- FALSE
E$pkgname <- pkgname
assign("E",E,envir=environmentOfDependingPackage)
invisible()
}
# Private
checkType <- function(x) {
if ( is.integer(x) ) INTEGER
else if ( is.double(x) ) DOUBLE
else if ( is.logical(x) ) BOOLEAN
else if ( is.character(x) ) STRING
else if ( is.date(x)) DATE
else stop("Unsupported data type.")
}
checkType2 <- function(x) {
if ( is.integer(x) ) "Int"
else if ( is.double(x) ) "Double"
else if ( is.logical(x) ) "Boolean"
else if ( is.character(x) ) "String"
else if ( is.date(x) ) "Date"
else stop("Unsupported data type.")
}
convert <- function(x,t) {
if ( t == "Int" ) {
tt <- "atomic"
tm <- "integer"
loav <- FALSE
} else if ( t == "Double" ) {
tt <- "atomic"
tm <- "double"
loav <- FALSE
} else if ( t == "Boolean" ) {
tt <- "atomic"
tm <- "logical"
loav <- FALSE
} else if ( t == "String" ) {
tt <- "atomic"
tm <- "character"
loav <- FALSE
} else if ( t == "Array[Int]" ) {
tt <- "vector"
tm <- "integer"
loav <- TRUE
} else if ( t == "Array[Double]" ) {
tt <- "vector"
tm <- "double"
loav <- TRUE
} else if ( t == "Array[Boolean]" ) {
tt <- "vector"
tm <- "logical"
loav <- TRUE
} else if ( t == "Array[String]" ) {
tt <- "vector"
tm <- "character"
loav <- TRUE
} else if ( t == "Array[Array[Int]]" ) {
tt <- "matrix"
tm <- "integer"
loav <- TRUE
} else if ( t == "Array[Array[Double]]" ) {
tt <- "matrix"
tm <- "double"
loav <- TRUE
} else if ( t == "Array[Array[Boolean]]" ) {
tt <- "matrix"
tm <- "logical"
loav <- TRUE
} else if ( t == "Array[Array[String]]" ) {
tt <- "matrix"
tm <- "character"
loav <- TRUE
} else {
tt <- "reference"
tm <- "reference"
loav <- FALSE
}
v <- character(0)
if ( tt == "atomic" ) v <- c(v,sprintf("%s <- as.vector(%s)[1]",x,x))
else if ( tt == "vector" ) v <- c(v,sprintf("%s <- as.vector(%s)",x,x))
else if ( tt == "matrix" ) v <- c(v,sprintf("%s <- as.matrix(%s)",x,x))
if ( tm != "reference" ) v <- c(v,sprintf("storage.mode(%s) <- '%s'",x,tm))
if ( length(v) != 0 ) {
v <- c(sprintf("if ( ! inherits(%s,'ScalaInterpreterReference') ) {",x),paste(" ",v,sep=""),"}")
}
c(v,sprintf("intpSet(interpreter,'.',%s,length.one.as.vector=%s,quiet=TRUE)",x,loav))
}
cc <- function(c) {
if ( ! get("open",envir=c[['env']]) ) stop("The connection has already been closed.")
}
wb <- function(c,v) writeBin(v,c[['socketIn']],endian="big")
wc <- function(c,v) {
bytes <- charToRaw(v)
wb(c,length(bytes))
writeBin(bytes,c[['socketIn']],endian="big",useBytes=TRUE)
}
# Sockets should be blocking, but that contract is not fulfilled when other code uses functions from the parallel library. Program around their problem.
rb <- function(c,v,n=1L) {
r <- readBin(c[['socketOut']],what=v,n=n,endian="big")
if ( length(r) == n ) r
else c(r,rb(c,v,n-length(r)))
}
# Sockets should be blocking, but that contract is not fulfilled when other code uses functions from the parallel library. Program around their problem.
rc <- function(c) {
length <- rb(c,integer(0))
r <- as.raw(c())
while ( length(r) != length ) r <- c(r,readBin(c[['socketOut']],what="raw",n=length,endian="big"))
rawToChar(r)
}

9
r/R/rzeppelin/R/zzz.R Normal file
View file

@ -0,0 +1,9 @@
typeMap <- list()
typeMap[[INTEGER]] <- integer(0)
typeMap[[DOUBLE]] <- double(0)
typeMap[[BOOLEAN]] <- integer(0)
typeMap[[STRING]] <- character(0)
.onAttach <- function(libname, pkgname) {
}

282
r/_tools/checkstyle.xml Normal file
View file

@ -0,0 +1,282 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!DOCTYPE module PUBLIC
"-//Puppy Crawl//DTD Check Configuration 1.3//EN"
"http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
<!-- This is a checkstyle configuration file. For descriptions of what the
following rules do, please see the checkstyle configuration page at http://checkstyle.sourceforge.net/config.html -->
<module name="Checker">
<module name="FileTabCharacter">
<!-- Checks that there are no tab characters in the file. -->
</module>
<module name="NewlineAtEndOfFile">
<property name="lineSeparator" value="lf" />
</module>
<module name="RegexpSingleline">
<!-- Checks that FIXME is not used in comments. TODO is preferred. -->
<property name="format" value="((//.*)|(\*.*))FIXME" />
<property name="message"
value='TODO is preferred to FIXME. e.g. "TODO(johndoe): Refactor when v2 is released."' />
</module>
<module name="RegexpSingleline">
<!-- Checks that TODOs are named. (Actually, just that they are followed
by an open paren.) -->
<property name="format" value="((//.*)|(\*.*))TODO[^(]" />
<property name="message"
value='All TODOs should be named. e.g. "TODO(johndoe): Refactor when v2 is released."' />
</module>
<!-- <module name="JavadocPackage"> - Checks that each Java package has
a Javadoc file used for commenting. Only allows a package-info.java, not
package.html. </module> -->
<!-- All Java AST specific tests live under TreeWalker module. -->
<module name="TreeWalker">
<!-- IMPORT CHECKS -->
<module name="RedundantImport">
<!-- Checks for redundant import statements. -->
<property name="severity" value="error" />
</module>
<!-- <module name="ImportOrder"> Checks for out of order import statements
<property name="severity" value="warning"/> <property name="groups" value="com.google,android,junit,net,org,java,javax"/>
This ensures that static imports go first <property name="option" value="top"/>
<property name="tokens" value="STATIC_IMPORT, IMPORT"/> </module> -->
<!-- JAVADOC CHECKS -->
<!-- Checks for Javadoc comments. -->
<!-- See http://checkstyle.sf.net/config_javadoc.html -->
<module name="JavadocMethod">
<property name="scope" value="protected" />
<property name="severity" value="warning" />
<property name="allowMissingJavadoc" value="true" />
<property name="allowMissingParamTags" value="true" />
<property name="allowMissingReturnTag" value="true" />
<property name="allowMissingThrowsTags" value="true" />
<property name="allowThrowsTagsForSubclasses" value="true" />
<property name="allowUndeclaredRTE" value="true" />
</module>
<module name="JavadocType">
<property name="scope" value="protected" />
<property name="severity" value="error" />
</module>
<module name="JavadocStyle">
<property name="severity" value="warning" />
</module>
<!-- NAMING CHECKS -->
<!-- Item 38 - Adhere to generally accepted naming conventions -->
<module name="PackageName">
<!-- Validates identifiers for package names against the supplied expression. -->
<!-- Here the default checkstyle rule restricts package name parts to
seven characters, this is not in line with common practice at Google. -->
<property name="format" value="^[a-z]+(\.[a-z][a-z0-9]{1,})*$" />
<property name="severity" value="warning" />
</module>
<module name="TypeNameCheck">
<!-- Validates static, final fields against the expression "^[A-Z][a-zA-Z0-9]*$". -->
<metadata name="altname" value="TypeName" />
<property name="severity" value="warning" />
</module>
<module name="ConstantNameCheck">
<!-- Validates non-private, static, final fields against the supplied
public/package final fields "^[A-Z][A-Z0-9]*(_[A-Z0-9]+)*$". -->
<metadata name="altname" value="ConstantName" />
<property name="applyToPublic" value="true" />
<property name="applyToProtected" value="true" />
<property name="applyToPackage" value="true" />
<property name="applyToPrivate" value="false" />
<property name="format" value="^([A-Z][A-Z0-9]*(_[A-Z0-9]+)*|FLAG_.*)$" />
<message key="name.invalidPattern"
value="Variable ''{0}'' should be in ALL_CAPS (if it is a constant) or be private (otherwise)." />
<property name="severity" value="warning" />
</module>
<module name="StaticVariableNameCheck">
<!-- Validates static, non-final fields against the supplied expression
"^[a-z][a-zA-Z0-9]*_?$". -->
<metadata name="altname" value="StaticVariableName" />
<property name="applyToPublic" value="true" />
<property name="applyToProtected" value="true" />
<property name="applyToPackage" value="true" />
<property name="applyToPrivate" value="true" />
<property name="format" value="^[a-z][a-zA-Z0-9]*_?$" />
<property name="severity" value="warning" />
</module>
<module name="MemberNameCheck">
<!-- Validates non-static members against the supplied expression. -->
<metadata name="altname" value="MemberName" />
<property name="applyToPublic" value="true" />
<property name="applyToProtected" value="true" />
<property name="applyToPackage" value="true" />
<property name="applyToPrivate" value="true" />
<property name="format" value="^[a-z][a-zA-Z0-9]*$" />
<property name="severity" value="warning" />
</module>
<module name="MethodNameCheck">
<!-- Validates identifiers for method names. -->
<metadata name="altname" value="MethodName" />
<property name="format" value="^[a-z][a-zA-Z0-9]*(_[a-zA-Z0-9]+)*$" />
<property name="severity" value="warning" />
</module>
<module name="ParameterName">
<!-- Validates identifiers for method parameters against the expression
"^[a-z][a-zA-Z0-9]*$". -->
<property name="severity" value="warning" />
</module>
<module name="LocalFinalVariableName">
<!-- Validates identifiers for local final variables against the expression
"^[a-z][a-zA-Z0-9]*$". -->
<property name="severity" value="warning" />
</module>
<module name="LocalVariableName">
<!-- Validates identifiers for local variables against the expression
"^[a-z][a-zA-Z0-9]*$". -->
<property name="severity" value="warning" />
</module>
<!-- LENGTH and CODING CHECKS -->
<module name="LineLength">
<!-- Checks if a line is too long. -->
<property name="max"
value="${com.puppycrawl.tools.checkstyle.checks.sizes.LineLength.max}"
default="100" />
<property name="severity" value="error" />
<!-- The default ignore pattern exempts the following elements: - import
statements - long URLs inside comments -->
<property name="ignorePattern"
value="${com.puppycrawl.tools.checkstyle.checks.sizes.LineLength.ignorePattern}"
default="^(package .*;\s*)|(import .*;\s*)|( *\* *https?://.*)$" />
</module>
<module name="LeftCurly">
<!-- Checks for placement of the left curly brace ('{'). -->
<property name="severity" value="warning" />
</module>
<module name="RightCurly">
<!-- Checks right curlies on CATCH, ELSE, and TRY blocks are on the same
line. e.g., the following example is fine: <pre> if { ... } else </pre> -->
<!-- This next example is not fine: <pre> if { ... } else </pre> -->
<property name="option" value="same" />
<property name="severity" value="warning" />
</module>
<!-- Checks for braces around if and else blocks -->
<module name="NeedBraces">
<property name="severity" value="warning" />
<property name="tokens"
value="LITERAL_IF, LITERAL_ELSE, LITERAL_FOR, LITERAL_WHILE, LITERAL_DO" />
</module>
<module name="UpperEll">
<!-- Checks that long constants are defined with an upper ell. -->
<property name="severity" value="error" />
</module>
<module name="FallThrough">
<!-- Warn about falling through to the next case statement. Similar to
javac -Xlint:fallthrough, but the check is suppressed if a single-line comment
on the last non-blank line preceding the fallen-into case contains 'fall
through' (or some other variants which we don't publicized to promote consistency). -->
<property name="reliefPattern"
value="fall through|Fall through|fallthru|Fallthru|falls through|Falls through|fallthrough|Fallthrough|No break|NO break|no break|continue on" />
<property name="severity" value="error" />
</module>
<!-- MODIFIERS CHECKS -->
<module name="ModifierOrder">
<!-- Warn if modifier order is inconsistent with JLS3 8.1.1, 8.3.1, and
8.4.3. The prescribed order is: public, protected, private, abstract, static,
final, transient, volatile, synchronized, native, strictfp -->
</module>
<!-- WHITESPACE CHECKS -->
<module name="WhitespaceAround">
<!-- Checks that various tokens are surrounded by whitespace. This includes
most binary operators and keywords followed by regular or curly braces. -->
<property name="tokens"
value="ASSIGN, BAND, BAND_ASSIGN, BOR,
BOR_ASSIGN, BSR, BSR_ASSIGN, BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN,
EQUAL, GE, GT, LAND, LE, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE, LOR, LT, MINUS,
MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL, PLUS, PLUS_ASSIGN, QUESTION,
SL, SL_ASSIGN, SR_ASSIGN, STAR, STAR_ASSIGN" />
<property name="severity" value="error" />
</module>
<module name="WhitespaceAfter">
<!-- Checks that commas, semicolons and typecasts are followed by whitespace. -->
<property name="tokens" value="COMMA, SEMI, TYPECAST" />
</module>
<module name="NoWhitespaceAfter">
<!-- Checks that there is no whitespace after various unary operators.
Linebreaks are allowed. -->
<property name="tokens"
value="BNOT, DEC, DOT, INC, LNOT, UNARY_MINUS,
UNARY_PLUS" />
<property name="allowLineBreaks" value="true" />
<property name="severity" value="error" />
</module>
<module name="NoWhitespaceBefore">
<!-- Checks that there is no whitespace before various unary operators.
Linebreaks are allowed. -->
<property name="tokens" value="SEMI, DOT, POST_DEC, POST_INC" />
<property name="allowLineBreaks" value="true" />
<property name="severity" value="error" />
</module>
<module name="ParenPad">
<!-- Checks that there is no whitespace before close parens or after open
parens. -->
<property name="severity" value="warning" />
</module>
<module name="Indentation">
<!-- Checks code indentation -->
<property name="basicOffset" value="2" />
</module>
</module>
</module>

146
r/_tools/scalastyle.xml Normal file
View file

@ -0,0 +1,146 @@
<!--
~ Licensed to the Apache Software Foundation (ASF) under one or more
~ contributor license agreements. See the NOTICE file distributed with
~ this work for additional information regarding copyright ownership.
~ The ASF licenses this file to You under the Apache License, Version 2.0
~ (the "License"); you may not use this file except in compliance with
~ the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<!-- NOTE: This was taken and adapted from Apache Spark. -->
<!-- If you wish to turn off checking for a section of code, you can put a comment in the source
before and after the section, with the following syntax: -->
<!-- // scalastyle:off -->
<!-- ... -->
<!-- // naughty stuff -->
<!-- ... -->
<!-- // scalastyle:on -->
<scalastyle>
<name>Scalastyle standard configuration</name>
<check level="error" class="org.scalastyle.file.FileTabChecker" enabled="true"></check>
<!-- <check level="error" class="org.scalastyle.file.FileLengthChecker" enabled="true"> -->
<!-- <parameters> -->
<!-- <parameter name="maxFileLength"><![CDATA[800]]></parameter> -->
<!-- </parameters> -->
<!-- </check> -->
<check level="error" class="org.scalastyle.file.HeaderMatchesChecker" enabled="true">
<parameters>
<parameter name="header"><![CDATA[/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/]]></parameter>
</parameters>
</check>
<check level="error" class="org.scalastyle.scalariform.SpacesAfterPlusChecker" enabled="true"></check>
<check level="error" class="org.scalastyle.file.WhitespaceEndOfLineChecker" enabled="false"></check>
<check level="error" class="org.scalastyle.scalariform.SpacesBeforePlusChecker" enabled="true"></check>
<check level="error" class="org.scalastyle.file.FileLineLengthChecker" enabled="true">
<parameters>
<parameter name="maxLineLength"><![CDATA[100]]></parameter>
<parameter name="tabSize"><![CDATA[2]]></parameter>
<parameter name="ignoreImports">true</parameter>
</parameters>
</check>
<check level="error" class="org.scalastyle.scalariform.ClassNamesChecker" enabled="true">
<parameters>
<parameter name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter>
</parameters>
</check>
<check level="error" class="org.scalastyle.scalariform.ObjectNamesChecker" enabled="true">
<parameters>
<parameter name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter>
</parameters>
</check>
<check level="error" class="org.scalastyle.scalariform.PackageObjectNamesChecker" enabled="true">
<parameters>
<parameter name="regex"><![CDATA[^[a-z][A-Za-z]*$]]></parameter>
</parameters>
</check>
<check level="error" class="org.scalastyle.scalariform.EqualsHashCodeChecker" enabled="false"></check>
<!-- <check level="error" class="org.scalastyle.scalariform.IllegalImportsChecker" enabled="true"> -->
<!-- <parameters> -->
<!-- <parameter name="illegalImports"><![CDATA[sun._,java.awt._]]></parameter> -->
<!-- </parameters> -->
<!-- </check> -->
<check level="error" class="org.scalastyle.scalariform.ParameterNumberChecker" enabled="true">
<parameters>
<parameter name="maxParameters"><![CDATA[10]]></parameter>
</parameters>
</check>
<!-- <check level="error" class="org.scalastyle.scalariform.MagicNumberChecker" enabled="true"> -->
<!-- <parameters> -->
<!-- <parameter name="ignore"><![CDATA[-1,0,1,2,3]]></parameter> -->
<!-- </parameters> -->
<!-- </check> -->
<check level="error" class="org.scalastyle.scalariform.NoWhitespaceBeforeLeftBracketChecker" enabled="false"></check>
<check level="error" class="org.scalastyle.scalariform.NoWhitespaceAfterLeftBracketChecker" enabled="false"></check>
<!-- <check level="error" class="org.scalastyle.scalariform.ReturnChecker" enabled="true"></check> -->
<!-- <check level="error" class="org.scalastyle.scalariform.NullChecker" enabled="true"></check> -->
<!-- <check level="error" class="org.scalastyle.scalariform.NoCloneChecker" enabled="true"></check> -->
<!-- <check level="error" class="org.scalastyle.scalariform.NoFinalizeChecker" enabled="true"></check> -->
<!-- <check level="error" class="org.scalastyle.scalariform.CovariantEqualsChecker" enabled="true"></check> -->
<!-- <check level="error" class="org.scalastyle.scalariform.StructuralTypeChecker" enabled="true"></check> -->
<!-- <check level="error" class="org.scalastyle.file.RegexChecker" enabled="true"> -->
<!-- <parameters> -->
<!-- <parameter name="regex"><![CDATA[println]]></parameter> -->
<!-- </parameters> -->
<!-- </check> -->
<!-- <check level="error" class="org.scalastyle.scalariform.NumberOfTypesChecker" enabled="true"> -->
<!-- <parameters> -->
<!-- <parameter name="maxTypes"><![CDATA[30]]></parameter> -->
<!-- </parameters> -->
<!-- </check> -->
<!-- <check level="error" class="org.scalastyle.scalariform.CyclomaticComplexityChecker" enabled="true"> -->
<!-- <parameters> -->
<!-- <parameter name="maximum"><![CDATA[10]]></parameter> -->
<!-- </parameters> -->
<!-- </check> -->
<check level="error" class="org.scalastyle.scalariform.UppercaseLChecker" enabled="true"></check>
<check level="error" class="org.scalastyle.scalariform.SimplifyBooleanExpressionChecker" enabled="false"></check>
<check level="error" class="org.scalastyle.scalariform.IfBraceChecker" enabled="true">
<parameters>
<parameter name="singleLineAllowed"><![CDATA[true]]></parameter>
<parameter name="doubleLineAllowed"><![CDATA[true]]></parameter>
</parameters>
</check>
<!-- <check level="error" class="org.scalastyle.scalariform.MethodLengthChecker" enabled="true"> -->
<!-- <parameters> -->
<!-- <parameter name="maxLength"><![CDATA[50]]></parameter> -->
<!-- </parameters> -->
<!-- </check> -->
<!-- <check level="error" class="org.scalastyle.scalariform.MethodNamesChecker" enabled="true"> -->
<!-- <parameters> -->
<!-- <parameter name="regex"><![CDATA[^[a-z][A-Za-z0-9]*$]]></parameter> -->
<!-- </parameters> -->
<!-- </check> -->
<!-- <check level="error" class="org.scalastyle.scalariform.NumberOfMethodsInTypeChecker" enabled="true"> -->
<!-- <parameters> -->
<!-- <parameter name="maxMethods"><![CDATA[30]]></parameter> -->
<!-- </parameters> -->
<!-- </check> -->
<!-- <check level="error" class="org.scalastyle.scalariform.PublicMethodsHaveTypeChecker" enabled="true"></check> -->
<check level="error" class="org.scalastyle.file.NewLineAtEofChecker" enabled="true"></check>
<check level="error" class="org.scalastyle.file.NoNewLineAtEofChecker" enabled="false"></check>
</scalastyle>

396
r/pom.xml Normal file
View file

@ -0,0 +1,396 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
~ Licensed to the Apache Software Foundation (ASF) under one or more
~ contributor license agreements. See the NOTICE file distributed with
~ this work for additional information regarding copyright ownership.
~ The ASF licenses this file to You under the Apache License, Version 2.0
~ (the "License"); you may not use this file except in compliance with
~ the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="http://maven.apache.org/POM/4.0.0"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<artifactId>zeppelin</artifactId>
<groupId>org.apache.zeppelin</groupId>
<version>0.6.0-incubating-SNAPSHOT</version>
<relativePath>..</relativePath>
</parent>
<artifactId>zeppelin-zrinterpreter</artifactId>
<packaging>jar</packaging>
<name>Zeppelin: R Interpreter</name>
<description>R Interpreter for Zeppelin</description>
<properties>
<script.extension>.sh</script.extension>
<path.separator>/</path.separator>
<spark.version>1.4.1</spark.version>
<scala.version>2.10.4</scala.version>
<scala.binary.version>2.10</scala.binary.version>
</properties>
<developers>
<developer>
<id>amos</id>
<name>Amos Elberg</name>
</developer>
</developers>
<dependencies>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>zeppelin-spark-dependencies</artifactId>
<version>${project.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>zeppelin-interpreter</artifactId>
<version>${project.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>zeppelin-spark</artifactId>
<version>${project.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-repl_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-hive_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-catalyst_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>${scala.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.scalatest</groupId>
<artifactId>scalatest_${scala.binary.version}</artifactId>
<version>2.2.4</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.scalacheck</groupId>
<artifactId>scalacheck_${scala.binary.version}</artifactId>
<version>1.12.5</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
<scope>compile</scope>
</dependency>
<dependency>
<!-- jsoup HTML parser library @ http://jsoup.org/ -->
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>[1.8.0,)</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.datanucleus</groupId>
<artifactId>datanucleus-core</artifactId>
<scope>test</scope>
<version>3.2.10</version>
</dependency>
<dependency>
<groupId>org.datanucleus</groupId>
<artifactId>datanucleus-api-jdo</artifactId>
<version>3.2.6</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.datanucleus</groupId>
<artifactId>datanucleus-rdbms</artifactId>
<version>3.2.9</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.rat</groupId>
<artifactId>apache-rat-plugin</artifactId>
<configuration>
<excludes>
<exclude>lib/**</exclude>
<exclude>**/r/lib/**</exclude>
<!--The following files are subject to the BSD-license or variants,
as shown in the file headers-->
<exclude>**/R/rzeppelin/R/globals.R</exclude>
<exclude>**/R/rzeppelin/R/common.R</exclude>
<exclude>**/R/rzeppelin/R/protocol.R</exclude>
<exclude>**/R/rzeppelin/R/rServer.R</exclude>
<exclude>**/R/rzeppelin/R/scalaInterpreter.R</exclude>
<exclude>**/R/rzeppelin/R/zzz.R</exclude>
<exclude>**/scala/Console.scala</exclude>
<exclude>**/zeppelin/rinterpreter/rscala/Package.scala</exclude>
<exclude>**/zeppelin/rinterpreter/rscala/RClient.scala</exclude>
<!--End of files subject to BSD-license.-->
<exclude>**/.idea/</exclude>
<!--The following files are mechanical-->
<exclude>**/R/rzeppelin/DESCRIPTION</exclude>
<exclude>**/R/rzeppelin/NAMESPACE</exclude>
<!--End of mechanical R files-->
<exclude>**/*.iml</exclude>
<exclude>.gitignore</exclude>
<exclude>**/.settings/*</exclude>
<exclude>**/.classpath</exclude>
<exclude>**/.project</exclude>
<exclude>**/target/**</exclude>
<exclude>**/derby.log</exclude>
<exclude>**/metastore_db/</exclude>
<exclude>**/README.md</exclude>
<exclude>**/dependency-reduced-pom.xml</exclude>
</excludes>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-deploy-plugin</artifactId>
<version>2.7</version>
<configuration>
<skip>true</skip>
</configuration>
</plugin>
<plugin>
<artifactId>maven-enforcer-plugin</artifactId>
<version>1.3.1</version>
<executions>
<execution>
<id>enforce</id>
<phase>none</phase>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.17</version>
<configuration>
<forkCount>1</forkCount>
<reuseForks>false</reuseForks>
<argLine>-Xmx1024m -XX:MaxPermSize=256m</argLine>
<skipTests>true</skipTests>
</configuration>
</plugin>
<plugin>
<groupId>org.scalatest</groupId>
<artifactId>scalatest-maven-plugin</artifactId>
<version>1.0</version>
<configuration>
<reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
<junitxml>.</junitxml>
<filereports>testoutput.txt</filereports>
<parallel>false</parallel>
<forkMode>once</forkMode>
<systemProperties>
<scala.usejavacp>true</scala.usejavacp>
</systemProperties>
</configuration>
<executions>
<execution>
<id>test</id>
<goals>
<goal>test</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>2.3</version>
<configuration>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>org/datanucleus/**</exclude>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
<transformers>
<transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
<transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
<resource>reference.conf</resource>
</transformer>
</transformers>
</configuration>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
</execution>
</executions>
</plugin>
<!-- Deploy datanucleus jars to the interpreter/spark directory -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<version>2.8</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>copy</goal>
</goals>
<configuration>
<outputDirectory>${project.build.directory}/../../interpreter/spark</outputDirectory>
<overWriteReleases>false</overWriteReleases>
<overWriteSnapshots>false</overWriteSnapshots>
<overWriteIfNewer>true</overWriteIfNewer>
<artifactItems>
<artifactItem>
<groupId>${project.groupId}</groupId>
<artifactId>${project.artifactId}</artifactId>
<version>${project.version}</version>
<type>${project.packaging}</type>
</artifactItem>
</artifactItems>
</configuration>
</execution>
</executions>
</plugin>
<!-- Plugin to compile Scala code -->
<plugin>
<groupId>org.scala-tools</groupId>
<artifactId>maven-scala-plugin</artifactId>
<executions>
<execution>
<id>compile</id>
<goals>
<goal>compile</goal>
</goals>
<phase>compile</phase>
</execution>
<execution>
<id>test-compile</id>
<goals>
<goal>testCompile</goal>
</goals>
<phase>test-compile</phase>
</execution>
<execution>
<phase>process-resources</phase>
<goals>
<goal>compile</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
<executions>
<execution>
<phase>compile</phase>
<goals>
<goal>exec</goal>
</goals>
</execution>
</executions>
<configuration>
<executable>R${path.separator}install-dev${script.extension}</executable>
</configuration>
</plugin>
<plugin>
<artifactId>maven-clean-plugin</artifactId>
<version>2.6.1</version>
<configuration>
<filesets>
<fileset>
<directory>${project.build.directory}/../../R</directory>
<includes>
<include>**/lib/**</include>
</includes>
</fileset>
<fileset>
<directory>${project.build.directory}/../../interpreter/spark</directory>
<includes>
<include>**/zeppelin-zr*.jar</include>
</includes>
</fileset>
</filesets>
</configuration>
</plugin>
</plugins>
</build>
</project>

View file

@ -0,0 +1,135 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.rinterpreter;
import org.apache.zeppelin.interpreter.*;
import org.apache.zeppelin.scheduler.Scheduler;
import java.net.URL;
import java.util.List;
import java.util.Properties;
/**
* KnitR is a simple wrapper around KnitRInterpreter to handle that Zeppelin prefers
* to load interpreters through classes defined in Java with static methods that run
* when the class is loaded.
*
*/
public class KnitR extends Interpreter implements WrappedInterpreter {
KnitRInterpreter intp;
static {
Interpreter.register("knitr", "spark", KnitR.class.getName(),
RInterpreter.getProps()
);
}
public KnitR(Properties property, Boolean startSpark) {
super(property);
intp = new KnitRInterpreter(property, startSpark);
}
public KnitR(Properties property) {
this(property, true);
}
public KnitR() {
this(new Properties());
}
@Override
public void open() {
intp.open();
}
@Override
public void close() {
intp.close();
}
@Override
public InterpreterResult interpret(String s, InterpreterContext interpreterContext) {
return intp.interpret(s, interpreterContext);
}
@Override
public void cancel(InterpreterContext interpreterContext) {
intp.cancel(interpreterContext);
}
@Override
public FormType getFormType() {
return intp.getFormType();
}
@Override
public int getProgress(InterpreterContext interpreterContext) {
return intp.getProgress(interpreterContext);
}
@Override
public List<String> completion(String s, int i) {
return intp.completion(s, i);
}
@Override
public Interpreter getInnerInterpreter() {
return intp;
}
@Override
public Scheduler getScheduler() {
return intp.getScheduler();
}
@Override
public void setProperty(Properties property) {
super.setProperty(property);
intp.setProperty(property);
}
@Override
public Properties getProperty() {
return intp.getProperty();
}
@Override
public String getProperty(String key) {
return intp.getProperty(key);
}
@Override
public void setInterpreterGroup(InterpreterGroup interpreterGroup) {
super.setInterpreterGroup(interpreterGroup);
intp.setInterpreterGroup(interpreterGroup);
}
@Override
public InterpreterGroup getInterpreterGroup() {
return intp.getInterpreterGroup();
}
@Override
public void setClassloaderUrls(URL[] classloaderUrls) {
intp.setClassloaderUrls(classloaderUrls);
}
@Override
public URL[] getClassloaderUrls() {
return intp.getClassloaderUrls();
}
}

View file

@ -0,0 +1,135 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.rinterpreter;
import org.apache.zeppelin.interpreter.*;
import org.apache.zeppelin.scheduler.Scheduler;
import java.net.URL;
import java.util.List;
import java.util.Properties;
/**
* RRepl is a simple wrapper around RReplInterpreter to handle that Zeppelin prefers
* to load interpreters through classes defined in Java with static methods that run
* when the class is loaded.
*
*/
public class RRepl extends Interpreter implements WrappedInterpreter {
RReplInterpreter intp;
static {
Interpreter.register("r", "spark", RRepl.class.getName(),
RInterpreter.getProps()
);
}
public RRepl(Properties property, Boolean startSpark) {
super(property);
intp = new RReplInterpreter(property, startSpark);
}
public RRepl(Properties property) {
this(property, true);
}
public RRepl() {
this(new Properties());
}
@Override
public void open() {
intp.open();
}
@Override
public void close() {
intp.close();
}
@Override
public InterpreterResult interpret(String s, InterpreterContext interpreterContext) {
return intp.interpret(s, interpreterContext);
}
@Override
public void cancel(InterpreterContext interpreterContext) {
intp.cancel(interpreterContext);
}
@Override
public FormType getFormType() {
return intp.getFormType();
}
@Override
public int getProgress(InterpreterContext interpreterContext) {
return intp.getProgress(interpreterContext);
}
@Override
public List<String> completion(String s, int i) {
return intp.completion(s, i);
}
@Override
public Interpreter getInnerInterpreter() {
return intp;
}
@Override
public Scheduler getScheduler() {
return intp.getScheduler();
}
@Override
public void setProperty(Properties property) {
super.setProperty(property);
intp.setProperty(property);
}
@Override
public Properties getProperty() {
return intp.getProperty();
}
@Override
public String getProperty(String key) {
return intp.getProperty(key);
}
@Override
public void setInterpreterGroup(InterpreterGroup interpreterGroup) {
super.setInterpreterGroup(interpreterGroup);
intp.setInterpreterGroup(interpreterGroup);
}
@Override
public InterpreterGroup getInterpreterGroup() {
return intp.getInterpreterGroup();
}
@Override
public void setClassloaderUrls(URL[] classloaderUrls) {
intp.setClassloaderUrls(classloaderUrls);
}
@Override
public URL[] getClassloaderUrls() {
return intp.getClassloaderUrls();
}
}

View file

@ -0,0 +1,86 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
The purpose of this class is to provide something for R to call through the backend
to bootstrap.
*/
package org.apache.zeppelin.rinterpreter;
import org.apache.spark.SparkContext;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SQLContext;
import org.apache.zeppelin.spark.ZeppelinContext;
/**
* RStatics provides static class methods that can be accessed through the SparkR bridge
*
*/
public class RStatics {
private static SparkContext sc = null;
private static ZeppelinContext z = null;
private static SQLContext sql = null;
private static RContext rCon = null;
public static SparkContext setSC(SparkContext newSC) {
sc = newSC;
return sc;
}
public static ZeppelinContext setZ(ZeppelinContext newZ) {
z = newZ;
return z;
}
public static SQLContext setSQL(SQLContext newSQL) {
sql = newSQL;
return sql;
}
public static JavaSparkContext getJSC() {
return new JavaSparkContext(sc);
}
public static SparkContext getSC() {
return sc;
}
public static SQLContext getSQL() {
return sql;
}
public static Object getZ(String name) {
return z.get(name);
}
public static void putZ(String name, Object obj) {
z.put(name, obj);
}
public static RContext getRCon() {
return rCon;
}
public static RContext setrCon(RContext newrCon) {
rCon = newrCon;
return rCon;
}
public static Boolean testRDD(String name) {
Object x = z.get(name);
return (x instanceof org.apache.spark.api.java.JavaRDD);
}
}

View file

@ -0,0 +1,84 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
With grattitude to Shivaram for advice regarding how to get SparkR talking to an existing SparkContext in Java
*/
package org.apache.spark.api.r
class RBackendHelper(val backend : RBackend) {
def close() : Unit = backend.close()
var port : Int = 0
def init() : Int = {
port = backend.init()
port
}
val backendThread : Thread = new Thread("SparkR backend") {
override def run() {
backend.run()
}
}
def start() : Thread = {
if (port == 0) throw new RuntimeException("BackendHelper must be initialized before starting")
if (!backendThread.isAlive) backendThread.start()
backendThread
}
/*
The sequence is:
1. Before initializing spark in R, after loading library, Backend goes up and starts listening. (Note that its able to execute arbitrary methods!!! We can use it for
zeppelin context!!!)
2. Tell SparkR to make a connection to the backend, setting the EXISTING port to the one in backendhelper.
3. Track sparkR.init, but where it calls spark/R/pkg/R/sparkR.R calls org.apache.spark.api.r.RRDD.createSparkContext to get sc,
which is then returned as a jobj link, instead call RBackendHelper.getSC
3a Actually the object returned right now is of type JavaSparkContext ????? Need to understand this
4. SparkR for the other contexts calls related methods, org.apache.spark.sql.api.r.SQLUtils.createSQLContext and
org.apache.spark.sql.hive.HiveContext is just made new, with the jobj reference assigned to an object. We should track
the same pattern as above.
*/
}
object RBackendHelper {
/*
This function creates a new SparkContext, but does not register it, based on whatever properties are provided.
Its for testing purposes and should never be called
*/
// def buildSparkContext( props : Properties) : SparkContext = {
// val traversableProps : Traversable[(String, String)] = propertiesAsScalaMap(props)
// val conf = new SparkConf().setAll(traversableProps)
// conf.setIfMissing("spark.master", "local")
// conf.setIfMissing("spark.app.name", "ZeppelinRContext")
// conf.validateSettings()
// new SparkContext(conf)
// }
def apply() : RBackendHelper = new RBackendHelper(new RBackend())
}

View file

@ -0,0 +1,77 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.rinterpreter
// TODO: Capture the knitr progress bar
import java.util._
import org.apache.zeppelin.interpreter.InterpreterContext
import org.apache.zeppelin.interpreter.InterpreterResult
import org.apache.zeppelin.rinterpreter.rscala.RException
class KnitRInterpreter(property: Properties, startSpark : Boolean = true) extends RInterpreter(property, startSpark) {
def this(property : Properties) = {
this(property, true)
}
override def open: Unit = {
logger.trace("Opening knitr")
rContext.synchronized {
super.open
logger.debug("Knitr open, initial commands")
rContext.testRPackage("knitr", true, true, "Without knitr, the knitr interpreter cannot run.")
rContext.eval(
"""opts_knit$set(out.format = 'html',
|results='asis',
|progress = FALSE,
|self.contained = TRUE,
|verbose = FALSE,
|comment = NA,
|echo = FALSE,
|tidy = FALSE)
| """.stripMargin)
}
logger.info("KnitR: Finished initial commands")
}
def interpret(st: String, context: InterpreterContext): InterpreterResult = try {
logger.trace("interpreting" + st)
// need to convert st into an array of Strings within R
val commandSt : Array[String] = st.split("\n")
val chunkOptions = commandSt.head
val chunkLine : String = s"```{r $chunkOptions}"
val chunk : Array[String] = Array(chunkLine) ++: commandSt.tail ++: Array("```")
val out: String = rContext.synchronized {
rContext.set(".zeppknitrinput", chunk)
rContext.eval(".knitout <- knit2html(text=.zeppknitrinput, envir = rzeppelin:::.zeppenv)")
rContext.getS0(".knitout")
}
new InterpreterResult(InterpreterResult.Code.SUCCESS,
InterpreterResult.Type.HTML,
RInterpreter.processHTML(out)
)
} catch {
case r: RException => r.getInterpreterResult(st)
case e: Exception => new InterpreterResult(InterpreterResult.Code.ERROR, e.getMessage())
}
}

View file

@ -0,0 +1,321 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.rinterpreter
import java.io._
import java.nio.file.{Files, Paths}
import java.util.Properties
import org.apache.spark.SparkContext
import org.apache.spark.api.r.RBackendHelper
import org.apache.spark.sql.SQLContext
import org.apache.zeppelin.interpreter._
import org.apache.zeppelin.rinterpreter.rscala.RClient._
import org.apache.zeppelin.rinterpreter.rscala._
import org.apache.zeppelin.scheduler._
import org.apache.zeppelin.spark.{SparkInterpreter, ZeppelinContext}
import org.slf4j._
import scala.collection.JavaConversions._
// TODO: Setup rmr, etc.
// TODO: Stress-test spark. What happens on close? Etc.
private[rinterpreter] class RContext(private val sockets: ScalaSockets,
debug: Boolean) extends RClient(sockets.in, sockets.out, debug) {
private val logger: Logger = RContext.logger
lazy val getScheduler: Scheduler = SchedulerFactory.singleton().createOrGetFIFOScheduler(this.hashCode().toString)
val backend: RBackendHelper = RBackendHelper()
private var sc: Option[SparkContext] = None
private var sql: Option[SQLContext] = None
private var z: Option[ZeppelinContext] = None
val rPkgMatrix = collection.mutable.HashMap[String,Boolean]()
var isOpen: Boolean = false
private var isFresh : Boolean = true
private var property: Properties = null
private[rinterpreter] var sparkRStarted : Boolean = false
override def toString() : String = s"""${super.toString()}
|\t Open: $isOpen Fresh: $isFresh SparkStarted: $sparkRStarted
|\t Progress: $progress
|\t Sockets: ${sockets.toString()}
""".stripMargin
var progress: Int = 0
def getProgress: Int = {
return progress
}
def setProgress(i: Int) : Unit = {
progress = i % 100
}
def incrementProgress(i: Int) : Unit = {
progress = (progress + i) % 100
}
// handle properties this way so it can be a mutable object shared with the R Interpreters
def setProperty(properties: Properties): Unit = synchronized {
if (property == null) property = properties
else property.putAll(properties)
}
def open(startSpark : Option[SparkInterpreter]): Unit = synchronized {
if (isOpen && sparkRStarted) {
logger.trace("Reusing rContext.")
return
}
testRPackage("rzeppelin", fail = true, message =
"The rinterpreter cannot run without the rzeppelin package, which was included in your distribution.")
startSpark match {
case Some(x : SparkInterpreter) => {
sparkStartup(x)
}
case _ => logger.error("Could not find a SparkInterpreter")
}
isOpen = true
}
private def sparkStartup(startSpark : SparkInterpreter): Unit = try {
val sparkHome: String = System.getenv("SPARK_HOME") match {
case null => {
logger.error("SPARK_HOME is not set. The R Interpreter will start without Spark.")
return
}
case y => y
}
testRPackage("SparkR", fail = true, path = sparkHome)
if (startSpark.getSparkVersion() == null) throw new RuntimeException("No spark version")
if (!startSpark.getSparkVersion().isSparkRSupported) throw new RuntimeException("SparkR requires Spark 1.4 or later")
sc = Some(startSpark.getSparkContext())
sql = Some(startSpark.getSQLContext())
z = Some(startSpark.getZeppelinContext())
logger.trace("Registered Spark Contexts")
backend.init()
backend.start()
if (!backend.backendThread.isAlive) throw new RuntimeException("SparkR could not startup because the Backend Thread is not alive")
logger.trace("Started Spark Backend")
eval( s"""SparkR:::connectBackend("localhost", ${backend.port})""")
logger.trace("SparkR backend connected")
initializeSparkR(sc.get, sql.get, z.get)
logger.info("Initialized SparkR")
sparkRStarted = true
} catch {
case e: Exception => throw new RuntimeException("""
Could not connect R to Spark. If the stack trace is not clear,
check whether SPARK_HOME is set properly.""", e)
}
private def initializeSparkR(sc : SparkContext, sql : SQLContext, z : ZeppelinContext) : Unit = synchronized {
logger.trace("Getting a handle to the JavaSparkContext")
eval("assign(\".scStartTime\", as.integer(Sys.time()), envir = SparkR:::.sparkREnv)")
RStatics.setSC(sc)
eval(
"""
|assign(
|".sparkRjsc",
|SparkR:::callJStatic("org.apache.zeppelin.rinterpreter.RStatics",
| "getJSC"),
| envir = SparkR:::.sparkREnv)""".stripMargin)
eval("assign(\"sc\", get(\".sparkRjsc\", envir = SparkR:::.sparkREnv), envir=.GlobalEnv)")
logger.trace("Established SparkR Context")
val sqlEnvName = sql match {
case null => throw new RuntimeException("Tried to initialize SparkR without setting a SQLContext")
case x : org.apache.spark.sql.hive.HiveContext => ".sparkRHivesc"
case x : SQLContext => ".sparkRSQLsc"
}
RStatics.setSQL(sql)
eval(
s"""
|assign(
|"${sqlEnvName}",
|SparkR:::callJStatic("org.apache.zeppelin.rinterpreter.RStatics",
| "getSQL"),
| envir = SparkR:::.sparkREnv)""".stripMargin)
eval(
s"""
|assign("sqlContext",
|get("$sqlEnvName",
|envir = SparkR:::.sparkREnv),
|envir = .GlobalEnv)
""".stripMargin)
logger.trace("Proving spark")
val proof = evalS1("names(SparkR:::.sparkREnv)")
logger.info("Proof of spark is : " + proof.mkString)
RStatics.setZ(z)
RStatics.setrCon(this)
eval(
s"""
|assign(".rContext",
| SparkR:::callJStatic("org.apache.zeppelin.rinterpreter.RStatics",
| "getRCon"),
| envir = .GlobalEnv)
""".stripMargin
)
}
def close(): Unit = synchronized {
if (isOpen) {
if (sparkRStarted) {
try {
eval("SparkR:::sparkR.stop()")
} catch {
case e: RException => {}
case e: Exception => logger.error("Error closing SparkR", e)
}
}
try {
backend.close
backend.backendThread.stop()
} catch {
case e: Exception => logger.error("Error closing RContext ", e)
}
try {
exit()
} catch {
case e: Exception => logger.error("Shutdown error", e)
}
}
isOpen = false
}
private[rinterpreter] def testRPackage(pack: String,
fail: Boolean = false,
license: Boolean = false,
message: String = "",
path : String = ""): Boolean = synchronized {
rPkgMatrix.get(pack) match {
case Some(x: Boolean) => return x
case None => {}
}
evalB0( s"""require('$pack',quietly=TRUE, lib.loc="$path/R/lib/")""") match {
case true => {
rPkgMatrix.put(pack, true)
return (true)
}
case false => {
evalB0(s"require('$pack', quietly=TRUE)") match {
case true => {
rPkgMatrix.put(pack, true)
return true
}
case false => {
rPkgMatrix.put(pack, false)
val failMessage =
s"""The $pack package could not be loaded. """ + {
if (license) "We cannot install it for you because it is published under the GPL3 license."
else ""
} + message
logger.error(failMessage)
if (fail) throw new RException(failMessage)
return (false)
}
}
}
}
}
logger.info("RContext Finished Starting")
}
object RContext {
val logger: Logger = LoggerFactory.getLogger(getClass)
logger.trace("Inside the RContext Object")
private val contextMap : collection.mutable.HashMap[String, RContext] = collection.mutable.HashMap[String,RContext]()
// This function is here to work around inconsistencies in the SparkInterpreter startup sequence
// that caused testing issues
private[rinterpreter] def resetRcon() : Boolean = synchronized {
contextMap foreach((con) => {
con._2.close()
if (con._2.isOpen) throw new RuntimeException("Failed to close an existing RContext")
contextMap.remove(con._1)
})
return true
}
def apply( property: Properties, id : String): RContext = synchronized {
contextMap.get(id) match {
case Some(x : RContext) if x.isFresh || x.isOpen => return(x)
case Some(x : RContext) => resetRcon()
case _ => {}
}
val debug: Boolean = property.getProperty("rscala.debug", "false").toBoolean
val timeout: Int = property.getProperty("rscala.timeout", "60").toInt
import scala.sys.process._
logger.trace("Creating processIO")
var cmd: PrintWriter = null
val command = RClient.defaultRCmd +: RClient.defaultArguments
val processCmd = Process(command)
val processIO = new ProcessIO(
o => {
cmd = new PrintWriter(o)
},
reader("STDOUT DEBUG: "),
reader("STDERR DEBUG: "),
true
)
val portsFile = File.createTempFile("rscala-", "")
val processInstance = processCmd.run(processIO)
// Find rzeppelin
val libpath : String = if (Files.exists(Paths.get("R/lib"))) "R/lib"
else if (Files.exists(Paths.get("../R/lib"))) "../R/lib"
else throw new RuntimeException("Could not find rzeppelin - it must be in either R/lib or ../R/lib")
val snippet =
s"""
library(lib.loc="$libpath", rzeppelin)
rzeppelin:::rServe(rzeppelin:::newSockets('${portsFile.getAbsolutePath.replaceAll(File.separator, "/")}',debug=${if (debug) "TRUE" else "FALSE"},timeout=${timeout}))
q(save='no')"""
while (cmd == null) Thread.sleep(100)
cmd.println(snippet)
cmd.flush()
val sockets = RClient.makeSockets(portsFile.getAbsolutePath)
sockets.out.writeInt(RClient.Protocol.OK)
sockets.out.flush()
val packVersion = RClient.readString(sockets.in)
if (packVersion != org.apache.zeppelin.rinterpreter.rscala.Version) {
logger.warn("Connection to R started but versions don't match " + packVersion + " " + org.apache.zeppelin.rinterpreter.rscala.Version)
} else {
logger.trace("Connected to a new R Session")
}
val context = new RContext(sockets, debug)
context.setProperty(property)
contextMap.put(id, context)
context
}
}

View file

@ -0,0 +1,167 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.rinterpreter
import java.nio.file.{Files, Paths}
import java.util._
import org.apache.commons.codec.binary.{Base64, StringUtils}
import org.apache.zeppelin.interpreter.Interpreter.FormType
import org.apache.zeppelin.interpreter.remote.RemoteInterpreter
import org.apache.zeppelin.interpreter.{InterpreterContext, _}
import org.apache.zeppelin.scheduler.Scheduler
import org.apache.zeppelin.spark.SparkInterpreter
import org.jsoup.Jsoup
import org.jsoup.nodes._
import org.jsoup.select.Elements
import org.slf4j.{Logger, LoggerFactory}
import scala.collection.JavaConversions._
import scala.io.Source
abstract class RInterpreter(properties : Properties, startSpark : Boolean = true) extends Interpreter (properties) {
protected val logger: Logger = RInterpreter.logger
logger.trace("Initialising an RInterpreter of class " + this.getClass.getName)
def getrContext: RContext = rContext
protected lazy val rContext : RContext = synchronized{ RContext(property, this.getInterpreterGroup().getId()) }
def open: Unit = rContext.synchronized {
logger.trace("RInterpreter opening")
// We leave this as an Option[] because the pattern of nesting SparkInterpreter inside of wrapper interpreters
// has changed several times, and this allows us to fail more gracefully and handle those changes in one place.
val intp : Option[SparkInterpreter] = getSparkInterpreter()
rContext.open(intp)
rContext.testRPackage("htmltools", message =
"""You can continue
| without it, but some interactive visualizations will fail.
| You can install it from cran."""")
rContext.testRPackage("repr", license = true, message =
"""You can continue
| without it, but some forms of output from the REPL may not appear properly."""")
rContext.testRPackage("base64enc", license = true, message =
"""You can continue
| without it, but the REPL may not show images properly.""")
rContext.testRPackage("evaluate", license = false, message =
"""
|The REPL needs this to run. It can be installed from CRAN
| Thanks to Hadley Wickham and Yihui Xie for graciously making evaluate available under an Apache-compatible
| license so it can be used with this project.""".stripMargin)
}
def getSparkInterpreter() : Option[SparkInterpreter] =
getSparkInterpreter(getInterpreterInTheSameSessionByClassName(classOf[SparkInterpreter].getName))
def getSparkInterpreter(p1 : Interpreter) : Option[SparkInterpreter] = p1 match {
case s : SparkInterpreter => Some[SparkInterpreter](s)
case lzy : LazyOpenInterpreter => {
val p = lzy.getInnerInterpreter
lzy.open()
return getSparkInterpreter(p)
}
case w : WrappedInterpreter => return getSparkInterpreter(w.getInnerInterpreter)
case _ => None
}
def close: Unit = {
rContext.close
}
def getProgress(context :InterpreterContext): Int = rContext.getProgress
def cancel(context:InterpreterContext) : Unit = {}
def getFormType: FormType = {
return FormType.NONE
}
override def getScheduler : Scheduler = rContext.getScheduler
// TODO: completion is disabled because it could not be tested with current Zeppelin code
def completion(buf :String,cursor : Int) : List[String] = Array[String]("").toList
private[rinterpreter] def hiddenCompletion(buf :String,cursor : Int) : List[String] =
rContext.evalS1(s"""
|rzeppelin:::.z.completion("$buf", $cursor)
""".stripMargin).toList
}
object RInterpreter {
private val logger: Logger = LoggerFactory.getLogger(getClass)
logger.trace("logging inside the RInterpreter singleton")
// These are the additional properties we need on top of the ones provided by the spark interpreters
lazy val props: Map[String, InterpreterProperty] = new InterpreterPropertyBuilder()
.add("rhadoop.cmd", SparkInterpreter.getSystemDefault("rhadoop.cmd", "HADOOP_CMD", ""), "Usually /usr/bin/hadoop")
.add("rhadooop.streamingjar", SparkInterpreter.getSystemDefault("rhadoop.cmd", "HADOOP_STREAMING", ""), "Usually /usr/lib/hadoop/contrib/streaming/hadoop-streaming-<version>.jar")
.add("rscala.debug", SparkInterpreter.getSystemDefault("rscala.debug","RSCALA_DEBUG", "false"), "Whether to turn on rScala debugging") // TEST: Implemented but not tested
.add("rscala.timeout", SparkInterpreter.getSystemDefault("rscala.timeout","RSCALA_TIMEOUT", "60"), "Timeout for rScala") // TEST: Implemented but not tested
.build
def getProps() = {
props
}
// Some R interactive visualization packages insist on producing HTML that refers to javascript
// or css by file path. These functions are intended to load those files and embed them into the
// HTML as Base64 encoded DataURIs.
//FIXME These don't error but may not yet properly be converting script links
def scriptToBase(doc : Element, testAttr : String, tag : String, mime : String): Unit = {
val elems : Elements = doc.getElementsByTag(tag)
elems.filter( (e : Element) => {
e.attributes().hasKey(testAttr) && e.attr(testAttr) != "" && e.attr(testAttr).slice(0,1) == "/"
}).foreach(scriptToBase(_, testAttr, mime))
}
def scriptToBase(node : Element, field : String, mime : String) : Unit = node.attr(field) match {
case x if Files.exists(Paths.get(x)) => node.attr(field, dataURI(x, mime))
case x if x.slice(0,4) == "http" => {}
case x if x.contains("ajax") => {}
case x if x.contains("googleapis") => {}
case x if x.slice(0,2) == "//" => node.attr(field, "http:" + x)
case _ => {}
}
def dataURI(file : String, mime : String) : String = {
val data: String = Source.fromFile(file).getLines().mkString("\n")
s"""data:${mime};base64,""" + StringUtils.newStringUtf8(Base64.encodeBase64(data.getBytes(), false))
}
// The purpose here is to deal with knitr producing HTML with script and css tags outside the <body>
def processHTML(input: Array[String]): String = processHTML(input.mkString("\n"))
def processHTML(input: String) : String = {
val doc : Document = Jsoup.parse(input)
processHTML(doc)
}
private def processHTML(doc : Document) : String = {
val bod : Element = doc.body()
val head : Element = doc.head()
// Try to ignore the knitr script that breaks zeppelin display
head.getElementsByTag("script").reverseIterator.foreach(bod.prependChild(_))
// Only get css from head if it links to a file
head.getElementsByTag("link").foreach(bod.prependChild(_))
scriptToBase(bod, "href", "link", "text/css")
scriptToBase(bod, "src", "script", "text/javascript")
bod.html()
}
}

View file

@ -0,0 +1,98 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.rinterpreter
// TODO: Option for setting size of output images
import java.util._
import org.apache.zeppelin.interpreter.InterpreterContext
import org.apache.zeppelin.interpreter.InterpreterResult
import org.apache.zeppelin.rinterpreter.rscala.RException
class RReplInterpreter(property: Properties, startSpark : Boolean = true) extends RInterpreter(property, startSpark) {
// protected val rContext : RContext = RContext(property)
def this(property : Properties) = {
this(property, true)
}
private var firstCell : Boolean = true
def interpret(st: String, context: InterpreterContext): InterpreterResult = {
rContext.synchronized {
try {
import scala.collection.immutable._
logger.info("intrpreting " + st)
rContext.set(".zreplin", st.split("\n"))
rContext.eval(".zreplout <- rzeppelin:::.z.valuate(.zreplin)")
val reslength: Int = rContext.evalI0("length(.zreplout)")
logger.debug("Length of evaluate result is " + reslength)
var gotError: Boolean = false
val result: String = List.range(1, reslength + 1).map((i: Int) => {
rContext.evalS1(s"class(.zreplout[[${i}]])") match {
case x: Array[String] if x contains ("recordedplot") => {
if (!rContext.testRPackage("repr", fail = false)) return new InterpreterResult(InterpreterResult.Code.ERROR,
InterpreterResult.Type.TEXT,
"Displaying images through the R REPL requires the repr package, which is not installed.")
val image: String = rContext.evalS0(s"base64enc:::base64encode(repr:::repr_jpg(.zreplout[[${i}]]))")
return new InterpreterResult(InterpreterResult.Code.SUCCESS,
InterpreterResult.Type.IMG, image)
}
//TODO: If the html contains a link to a file, transform it to a DataURI. This is necessary for htmlwidgets
case x: Array[String] if x contains ("html") => {
val html: String = RInterpreter.processHTML(rContext.evalS0(s"rzeppelin:::.z.repr(.zreplout[[${i}]])"))
return new InterpreterResult(InterpreterResult.Code.SUCCESS,
InterpreterResult.Type.HTML, html)
}
case x: Array[String] if x contains "data.frame" => {
val table: Array[String] = rContext.evalS1( s"""rzeppelin:::.z.table(.zreplout[[${i}]])""")
return new InterpreterResult(InterpreterResult.Code.SUCCESS,
InterpreterResult.Type.TABLE,
table.mkString(sep = "\n"))
}
case x: Array[String] if x contains "source" => rContext.evalS0(s".zreplout[[${i}]]" + "$src")
case x: Array[String] if x contains "character" => rContext.evalS0(s".zreplout[[${i}]]")
case x: Array[String] if x contains "packageStartupMessage" => if (firstCell) {""} else {
firstCell = true
"Package Startup Message: " + rContext.evalS1(s"rzeppelin:::.z.repr(.zreplout[[${i}]])").mkString("\n")
}
case x: Array[String] if x contains "simpleError" => {
gotError = true
val error = rContext.evalS1(s"rzeppelin:::.z.repr(.zreplout[[${i}]])").mkString("\n")
logger.error(error)
error
}
case _ => rContext.evalS1(s"rzeppelin:::.z.repr(.zreplout[[${i}]])").mkString("\n")
}
}).mkString("\n\n")
return new InterpreterResult({
if (!gotError) InterpreterResult.Code.SUCCESS
else InterpreterResult.Code.ERROR
}, result)
} catch {
case re: RException => return re.getInterpreterResult(st)
case e: Exception => {
logger.error("Error interpreting " + st, e)
return new InterpreterResult(InterpreterResult.Code.ERROR, e.getMessage() + e.getStackTrace)
}
}
}
}
}

View file

@ -0,0 +1,29 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin
// TODO: Keeping interpreter out of spark interpreter group for now, until the context sharing code is developed
// TEST: rmr2
// TODO: Link getProgress to plyr (and knitr progress) if possible
// TODO: Forms?
// TODO: Completion? Currently commented-out
// TODO: It would be nice if the RReplInterpreter output svg instead of jpg, or intelligently selected, at a minimum
// TODO: Some kind of proxy may be necessary for shiny and widgets see http://blog.dominodatalab.com/interactive-dashboards-with-knitr-and-html-widgets/
package object rinterpreter {
}

View file

@ -0,0 +1,39 @@
package org.apache.zeppelin.rinterpreter
/*
Copyright (c) 2013-2015, David B. Dahl, Brigham Young University
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
Neither the name of the <ORGANIZATION> nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package object rscala {
val Version = "0.1.0"
val Date = "2015-05-15"
}

View file

@ -0,0 +1,527 @@
/*
Copyright (c) 2013-2015, David B. Dahl, Brigham Young University
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
Neither the name of the <ORGANIZATION> nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.apache.zeppelin.rinterpreter.rscala
// TODO: Add libdir to constructor
import java.io._
import java.net.{InetAddress, ServerSocket}
import org.slf4j.{Logger, LoggerFactory}
import scala.language.dynamics
class RClient (private val in: DataInputStream,
private val out: DataOutputStream,
val debug: Boolean = true) extends Dynamic {
var damagedState : Boolean = false
private val logger: Logger = LoggerFactory.getLogger(getClass)
case class RObjectRef(val reference : String) {
override def toString() = ".$"+reference
}
/** __For rscala developers only__: Sets whether debugging output should be displayed. */
def debug_=(v: Boolean) = {
if ( v != debug ) {
if ( debug ) logger.debug("Sending DEBUG request.")
out.writeInt(RClient.Protocol.DEBUG)
out.writeInt(if ( v ) 1 else 0)
out.flush()
}
}
def exit() = {
logger.debug("Sending EXIT request.")
out.writeInt(RClient.Protocol.EXIT)
out.flush()
}
def eval(snippet: String, evalOnly: Boolean = true): Any = try {
if (damagedState) throw new RException("Connection to R already damaged")
logger.debug("Sending EVAL request.")
out.writeInt(RClient.Protocol.EVAL)
RClient.writeString(out,snippet)
out.flush()
val status = in.readInt()
val output = RClient.readString(in)
if ( output != "" ) {
logger.error("R Error " + snippet + " " + output)
throw new RException(snippet, output)
}
if ( status != RClient.Protocol.OK ) throw new RException(snippet, output, "Error in R evaluation.")
if ( evalOnly ) null else get(".rzeppelin.last.value")._1
} catch {
case e : java.net.SocketException => {
logger.error("Connection to R appears to have shut down" + e)
damagedState = true
}
}
def evalI0(snippet: String) = { eval(snippet,true); getI0(".rzeppelin.last.value") }
def evalB0(snippet: String) = { eval(snippet,true); getB0(".rzeppelin.last.value") }
def evalS0(snippet: String) = { eval(snippet,true); getS0(".rzeppelin.last.value") }
def evalI1(snippet: String) = { eval(snippet,true); getI1(".rzeppelin.last.value") }
def evalB1(snippet: String) = { eval(snippet,true); getB1(".rzeppelin.last.value") }
def evalS1(snippet: String) = { eval(snippet,true); getS1(".rzeppelin.last.value") }
def evalR( snippet: String) = { eval(snippet,true); getR( ".rzeppelin.last.value") }
def set(identifier: String, value: Any): Unit = set(identifier,value,"",true)
def set(identifier: String, value: Any, index: String = "", singleBrackets: Boolean = true): Unit = {
if (damagedState) throw new RException("Connection to R already damaged")
val v = value
if ( index == "" ) out.writeInt(RClient.Protocol.SET)
else if ( singleBrackets ) {
out.writeInt(RClient.Protocol.SET_SINGLE)
RClient.writeString(out,index)
} else {
out.writeInt(RClient.Protocol.SET_DOUBLE)
RClient.writeString(out,index)
}
RClient.writeString(out,identifier)
if ( v == null || v.isInstanceOf[Unit] ) {
logger.debug("... which is null")
out.writeInt(RClient.Protocol.NULLTYPE)
out.flush()
if ( index != "" ) {
val status = in.readInt()
if ( status != RClient.Protocol.OK ) {
val output = RClient.readString(in)
if ( output != "" ) {
logger.error("R error setting " + output)
throw new RException(identifier + value.toString(), output, "Error setting")
}
throw new RException("Error in R evaluation. Set " + identifier + " to " + value.toString())
}
}
return
}
val c = v.getClass
logger.debug("... whose class is: "+c)
logger.debug("... and whose value is: "+v)
if ( c.isArray ) {
c.getName match {
case "[I" =>
val vv = v.asInstanceOf[Array[Int]]
out.writeInt(RClient.Protocol.VECTOR)
out.writeInt(vv.length)
out.writeInt(RClient.Protocol.INTEGER)
for ( i <- 0 until vv.length ) out.writeInt(vv(i))
case "[Z" =>
val vv = v.asInstanceOf[Array[Boolean]]
out.writeInt(RClient.Protocol.VECTOR)
out.writeInt(vv.length)
out.writeInt(RClient.Protocol.BOOLEAN)
for ( i <- 0 until vv.length ) out.writeInt(if ( vv(i) ) 1 else 0)
case "[Ljava.lang.String;" =>
val vv = v.asInstanceOf[Array[String]]
out.writeInt(RClient.Protocol.VECTOR)
out.writeInt(vv.length)
out.writeInt(RClient.Protocol.STRING)
for ( i <- 0 until vv.length ) RClient.writeString(out,vv(i))
case _ =>
throw new RException("Unsupported array type: "+c.getName)
}
} else {
c.getName match {
case "java.lang.Integer" =>
out.writeInt(RClient.Protocol.ATOMIC)
out.writeInt(RClient.Protocol.INTEGER)
out.writeInt(v.asInstanceOf[Int])
case "java.lang.Boolean" =>
out.writeInt(RClient.Protocol.ATOMIC)
out.writeInt(RClient.Protocol.BOOLEAN)
out.writeInt(if (v.asInstanceOf[Boolean]) 1 else 0)
case "java.lang.String" =>
out.writeInt(RClient.Protocol.ATOMIC)
out.writeInt(RClient.Protocol.STRING)
RClient.writeString(out,v.asInstanceOf[String])
case _ =>
throw new RException("Unsupported non-array type: "+c.getName)
}
}
out.flush()
if ( index != "" ) {
val status = in.readInt()
if ( status != RClient.Protocol.OK ) {
val output = RClient.readString(in)
if ( output != "" ) throw new RException(identifier + value.toString(), output, "Error setting")
throw new RException("Error in R evaluation.")
}
}
}
def get(identifier: String, asReference: Boolean = false): (Any,String) = {
logger.debug("Getting: "+identifier)
out.writeInt(if ( asReference ) RClient.Protocol.GET_REFERENCE else RClient.Protocol.GET)
RClient.writeString(out,identifier)
out.flush()
if ( asReference ) {
val r = in.readInt() match {
case RClient.Protocol.REFERENCE => (RObjectRef(RClient.readString(in)),"RObject")
case RClient.Protocol.UNDEFINED_IDENTIFIER =>
throw new RException("Undefined identifier")
}
return r
}
in.readInt match {
case RClient.Protocol.NULLTYPE =>
logger.debug("Getting null.")
(null,"Null")
case RClient.Protocol.ATOMIC =>
logger.debug("Getting atomic.")
in.readInt() match {
case RClient.Protocol.INTEGER => (in.readInt(),"Int")
case RClient.Protocol.DOUBLE => (in.readDouble(),"Double")
case RClient.Protocol.BOOLEAN => (( in.readInt() != 0 ),"Boolean")
case RClient.Protocol.STRING => (RClient.readString(in),"String")
case _ => throw new RException("Protocol error")
}
case RClient.Protocol.VECTOR =>
logger.debug("Getting vector...")
val length = in.readInt()
logger.debug("... of length: "+length)
in.readInt() match {
case RClient.Protocol.INTEGER => (Array.fill(length) { in.readInt() },"Array[Int]")
case RClient.Protocol.DOUBLE => (Array.fill(length) { in.readDouble() },"Array[Double]")
case RClient.Protocol.BOOLEAN => (Array.fill(length) { ( in.readInt() != 0 ) },"Array[Boolean]")
case RClient.Protocol.STRING => (Array.fill(length) { RClient.readString(in) },"Array[String]")
case _ => throw new RException("Protocol error")
}
case RClient.Protocol.MATRIX =>
logger.debug("Getting matrix...")
val nrow = in.readInt()
val ncol = in.readInt()
logger.debug("... of dimensions: "+nrow+","+ncol)
in.readInt() match {
case RClient.Protocol.INTEGER => (Array.fill(nrow) { Array.fill(ncol) { in.readInt() } },"Array[Array[Int]]")
case RClient.Protocol.DOUBLE => (Array.fill(nrow) { Array.fill(ncol) { in.readDouble() } },"Array[Array[Double]]")
case RClient.Protocol.BOOLEAN => (Array.fill(nrow) { Array.fill(ncol) { ( in.readInt() != 0 ) } },"Array[Array[Boolean]]")
case RClient.Protocol.STRING => (Array.fill(nrow) { Array.fill(ncol) { RClient.readString(in) } },"Array[Array[String]]")
case _ => throw new RException("Protocol error")
}
case RClient.Protocol.UNDEFINED_IDENTIFIER => throw new RException("Undefined identifier")
case RClient.Protocol.UNSUPPORTED_STRUCTURE => throw new RException("Unsupported data type")
case _ => throw new RException("Protocol error")
}
}
def getI0(identifier: String): Int = get(identifier) match {
case (a,"Int") => a.asInstanceOf[Int]
case (a,"Double") => a.asInstanceOf[Double].toInt
case (a,"Boolean") => if (a.asInstanceOf[Boolean]) 1 else 0
case (a,"String") => a.asInstanceOf[String].toInt
case (a,"Array[Int]") => a.asInstanceOf[Array[Int]](0)
case (a,"Array[Double]") => a.asInstanceOf[Array[Double]](0).toInt
case (a,"Array[Boolean]") => if ( a.asInstanceOf[Array[Boolean]](0) ) 1 else 0
case (a,"Array[String]") => a.asInstanceOf[Array[String]](0).toInt
case (_,tp) => throw new RException(s"Unable to cast ${tp} to Int")
}
def getD0(identifier: String): Double = get(identifier) match {
case (a,"Int") => a.asInstanceOf[Int].toDouble
case (a,"Double") => a.asInstanceOf[Double]
case (a,"Boolean") => if (a.asInstanceOf[Boolean]) 1.0 else 0.0
case (a,"String") => a.asInstanceOf[String].toDouble
case (a,"Array[Int]") => a.asInstanceOf[Array[Int]](0).toDouble
case (a,"Array[Double]") => a.asInstanceOf[Array[Double]](0)
case (a,"Array[Boolean]") => if ( a.asInstanceOf[Array[Boolean]](0) ) 1.0 else 0.0
case (a,"Array[String]") => a.asInstanceOf[Array[String]](0).toDouble
case (_,tp) => throw new RException(s"Unable to cast ${tp} to Double")
}
def getB0(identifier: String): Boolean = get(identifier) match {
case (a,"Int") => a.asInstanceOf[Int] != 0
case (a,"Boolean") => a.asInstanceOf[Boolean]
case (a,"String") => a.asInstanceOf[String].toLowerCase != "false"
case (a,"Array[Int]") => a.asInstanceOf[Array[Int]](0) != 0
case (a,"Array[Boolean]") => a.asInstanceOf[Array[Boolean]](0)
case (a,"Array[String]") => a.asInstanceOf[Array[String]](0).toLowerCase != "false"
case (_,tp) => throw new RException(s"Unable to cast ${tp} to Boolean")
}
def getS0(identifier: String): String = get(identifier) match {
case (a,"Int") => a.asInstanceOf[Int].toString
case (a,"Boolean") => a.asInstanceOf[Boolean].toString
case (a,"String") => a.asInstanceOf[String]
case (a,"Array[Int]") => a.asInstanceOf[Array[Int]](0).toString
case (a,"Array[Boolean]") => a.asInstanceOf[Array[Boolean]](0).toString
case (a,"Array[String]") => a.asInstanceOf[Array[String]](0)
case (_,tp) => throw new RException(s"Unable to cast ${tp} to String")
}
def getI1(identifier: String): Array[Int] = get(identifier) match {
case (a,"Int") => Array(a.asInstanceOf[Int])
case (a,"Boolean") => Array(if (a.asInstanceOf[Boolean]) 1 else 0)
case (a,"String") => Array(a.asInstanceOf[String].toInt)
case (a,"Array[Int]") => a.asInstanceOf[Array[Int]]
case (a,"Array[Boolean]") => a.asInstanceOf[Array[Boolean]].map(x => if (x) 1 else 0)
case (a,"Array[String]") => a.asInstanceOf[Array[String]].map(_.toInt)
case (_,tp) => throw new RException(s"Unable to cast ${tp} to Array[Int]")
}
def getB1(identifier: String): Array[Boolean] = get(identifier) match {
case (a,"Int") => Array(a.asInstanceOf[Int] != 0)
case (a,"Boolean") => Array(a.asInstanceOf[Boolean])
case (a,"String") => Array(a.asInstanceOf[String].toLowerCase != "false")
case (a,"Array[Int]") => a.asInstanceOf[Array[Int]].map(_ != 0)
case (a,"Array[Boolean]") => a.asInstanceOf[Array[Boolean]]
case (a,"Array[String]") => a.asInstanceOf[Array[String]].map(_.toLowerCase != "false")
case (_,tp) => throw new RException(s"Unable to cast ${tp} to Array[Boolean]")
}
def getS1(identifier: String): Array[String] = get(identifier) match {
case (a,"Int") => Array(a.asInstanceOf[Int].toString)
case (a,"Boolean") => Array(a.asInstanceOf[Boolean].toString)
case (a,"String") => Array(a.asInstanceOf[String])
case (a,"Array[Int]") => a.asInstanceOf[Array[Int]].map(_.toString)
case (a,"Array[Boolean]") => a.asInstanceOf[Array[Boolean]].map(_.toString)
case (a,"Array[String]") => a.asInstanceOf[Array[String]]
case (_,tp) => throw new RException(s"Unable to cast ${tp} to Array[String]")
}
def getR(identifier: String): RObjectRef = get(identifier,true) match {
case (a,"RObject") => a.asInstanceOf[RObjectRef]
case (_,tp) => throw new RException(s"Unable to cast ${tp} to RObject")
}
def gc(): Unit = {
logger.debug("Sending GC request.")
out.writeInt(RClient.Protocol.GC)
out.flush()
}
}
object RClient {
object Protocol {
// Data Types
val UNSUPPORTED_TYPE = 0
val INTEGER = 1
val DOUBLE = 2
val BOOLEAN = 3
val STRING = 4
val DATE = 5
val DATETIME = 6
// Data Structures
val UNSUPPORTED_STRUCTURE = 10
val NULLTYPE = 11
val REFERENCE = 12
val ATOMIC = 13
val VECTOR = 14
val MATRIX = 15
val LIST = 16
val DATAFRAME = 17
val S3CLASS = 18
val S4CLASS = 19
val JOBJ = 20
// Commands
val EXIT = 100
val RESET = 101
val GC = 102
val DEBUG = 103
val EVAL = 104
val SET = 105
val SET_SINGLE = 106
val SET_DOUBLE = 107
val GET = 108
val GET_REFERENCE = 109
val DEF = 110
val INVOKE = 111
val SCALAP = 112
// Result
val OK = 1000
val ERROR = 1001
val UNDEFINED_IDENTIFIER = 1002
// Misc.
val CURRENT_SUPPORTED_SCALA_VERSION = "2.10"
}
def writeString(out: DataOutputStream, string: String): Unit = {
val bytes = string.getBytes("UTF-8")
val length = bytes.length
out.writeInt(length)
out.write(bytes,0,length)
}
def readString(in: DataInputStream): String = {
val length = in.readInt()
val bytes = new Array[Byte](length)
in.readFully(bytes)
new String(bytes,"UTF-8")
}
def isMatrix[T](x: Array[Array[T]]): Boolean = {
if ( x.length != 0 ) {
val len = x(0).length
for ( i <- 1 until x.length ) {
if ( x(i).length != len ) return false
}
}
true
}
import scala.sys.process._
private val logger: Logger = LoggerFactory.getLogger(getClass)
val OS = sys.props("os.name").toLowerCase match {
case s if s.startsWith("""windows""") => "windows"
case s if s.startsWith("""linux""") => "linux"
case s if s.startsWith("""unix""") => "linux"
case s if s.startsWith("""mac""") => "macintosh"
case _ => throw new RException("Unrecognized OS")
}
val defaultArguments = OS match {
case "windows" => Array[String]("--vanilla","--silent","--slave","--ess")
case "linux" => Array[String]("--vanilla","--silent","--slave","--interactive")
case "unix" => Array[String]("--vanilla","--silent","--slave","--interactive")
case "macintosh" => Array[String]("--vanilla","--silent","--slave","--interactive")
}
lazy val defaultRCmd = OS match {
case "windows" => findROnWindows
case "linux" => """R"""
case "unix" => """R"""
case "macintosh" => """R"""
}
def findROnWindows: String = {
val NEWLINE = sys.props("line.separator")
var result : String = null
for ( root <- List("HKEY_LOCAL_MACHINE","HKEY_CURRENT_USER") ) {
val out = new StringBuilder()
val logger = ProcessLogger((o: String) => { out.append(o); out.append(NEWLINE) },(e: String) => {})
try {
("reg query \"" + root + "\\Software\\R-core\\R\" /v \"InstallPath\"") ! logger
val a = out.toString.split(NEWLINE).filter(_.matches("""^\s*InstallPath\s*.*"""))(0)
result = a.split("REG_SZ")(1).trim() + """\bin\R.exe"""
} catch {
case _ : Throwable =>
}
}
if ( result == null ) throw new RException("Cannot locate R using Windows registry.")
else return result
}
def reader(label: String)(input: InputStream) = {
val in = new BufferedReader(new InputStreamReader(input))
var line = in.readLine()
while ( line != null ) {
logger.debug(label+line)
line = in.readLine()
}
in.close()
}
class ScalaSockets(portsFilename: String) {
private val logger: Logger = LoggerFactory.getLogger(getClass)
val serverIn = new ServerSocket(0,0,InetAddress.getByName(null))
val serverOut = new ServerSocket(0,0,InetAddress.getByName(null))
locally {
logger.info("Trying to open ports filename: "+portsFilename)
val portNumberFile = new File(portsFilename)
val p = new PrintWriter(portNumberFile)
p.println(serverIn.getLocalPort+" "+serverOut.getLocalPort)
p.close()
logger.info("Servers are running on port "+serverIn.getLocalPort+" "+serverOut.getLocalPort)
}
val socketIn = serverIn.accept
logger.info("serverinaccept done")
val in = new DataInputStream(new BufferedInputStream(socketIn.getInputStream))
logger.info("in has been created")
val socketOut = serverOut.accept
logger.info("serverouacceptdone")
val out = new DataOutputStream(new BufferedOutputStream(socketOut.getOutputStream))
logger.info("out is done")
}
def makeSockets(portsFilename : String) = new ScalaSockets(portsFilename)
def apply(): RClient = apply(defaultRCmd)
def apply(rCmd: String, libdir : String = "",debug: Boolean = false, timeout: Int = 60): RClient = {
logger.debug("Creating processIO")
var cmd: PrintWriter = null
val command = rCmd +: defaultArguments
val processCmd = Process(command)
val processIO = new ProcessIO(
o => { cmd = new PrintWriter(o) },
reader("STDOUT DEBUG: "),
reader("STDERR DEBUG: "),
true
)
val portsFile = File.createTempFile("rscala-","")
val processInstance = processCmd.run(processIO)
val snippet = s"""
rscala:::rServe(rscala:::newSockets('${portsFile.getAbsolutePath.replaceAll(File.separator,"/")}',debug=${if ( debug ) "TRUE" else "FALSE"},timeout=${timeout}))
q(save='no')
"""
while ( cmd == null ) Thread.sleep(100)
logger.info("sending snippet " + snippet)
cmd.println(snippet)
cmd.flush()
val sockets = makeSockets(portsFile.getAbsolutePath)
sockets.out.writeInt(Protocol.OK)
sockets.out.flush()
try {
assert( readString(sockets.in) == org.apache.zeppelin.rinterpreter.rscala.Version )
} catch {
case _: Throwable => throw new RException("The scala and R versions of the package don't match")
}
apply(sockets.in,sockets.out)
}
/** __For rscala developers only__: Returns an instance of the [[RClient]] class. */
def apply(in: DataInputStream, out: DataOutputStream): RClient = new RClient(in,out)
}

View file

@ -0,0 +1,31 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.rinterpreter.rscala
import org.apache.zeppelin.interpreter.InterpreterResult
class RException(val snippet : String, val error : String, val message : String = "") extends Exception {
def this(snippet : String) = this(snippet, "")
def getInterpreterResult : InterpreterResult = new
InterpreterResult(InterpreterResult.Code.ERROR, message + "\n" + snippet + "\n" + error)
def getInterpreterResult(st : String) : InterpreterResult = new
InterpreterResult(InterpreterResult.Code.ERROR, message + "\n" + st + "\n" + error)
}

View file

@ -0,0 +1,491 @@
/* __ *\
Copyright (c) 2002-2016 EPFL
Copyright (c) 2011-2016 Lightbend, Inc. (formerly Typesafe, Inc.)
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
Neither the name of the EPFL nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF M MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package scala
import java.io.{BufferedReader, InputStream, InputStreamReader,
IOException, OutputStream, PrintStream, Reader}
import java.text.MessageFormat
import scala.util.DynamicVariable
/** Implements functionality for
* printing Scala values on the terminal as well as reading specific values.
* Also defines constants for marking up text on ANSI terminals.
*
* @author Matthias Zenger
* @version 1.0, 03/09/2003
*/
object Console {
/** Foreground color for ANSI black */
final val BLACK = "\033[30m"
/** Foreground color for ANSI red */
final val RED = "\033[31m"
/** Foreground color for ANSI green */
final val GREEN = "\033[32m"
/** Foreground color for ANSI yellow */
final val YELLOW = "\033[33m"
/** Foreground color for ANSI blue */
final val BLUE = "\033[34m"
/** Foreground color for ANSI magenta */
final val MAGENTA = "\033[35m"
/** Foreground color for ANSI cyan */
final val CYAN = "\033[36m"
/** Foreground color for ANSI white */
final val WHITE = "\033[37m"
/** Background color for ANSI black */
final val BLACK_B = "\033[40m"
/** Background color for ANSI red */
final val RED_B = "\033[41m"
/** Background color for ANSI green */
final val GREEN_B = "\033[42m"
/** Background color for ANSI yellow */
final val YELLOW_B = "\033[43m"
/** Background color for ANSI blue */
final val BLUE_B = "\033[44m"
/** Background color for ANSI magenta */
final val MAGENTA_B = "\033[45m"
/** Background color for ANSI cyan */
final val CYAN_B = "\033[46m"
/** Background color for ANSI white */
final val WHITE_B = "\033[47m"
/** Reset ANSI styles */
final val RESET = "\033[0m"
/** ANSI bold */
final val BOLD = "\033[1m"
/** ANSI underlines */
final val UNDERLINED = "\033[4m"
/** ANSI blink */
final val BLINK = "\033[5m"
/** ANSI reversed */
final val REVERSED = "\033[7m"
/** ANSI invisible */
final val INVISIBLE = "\033[8m"
// From Scala 2.10.5
// Start of rscala patch which only takes effect if RSCALA_TUNNELING environment variable is TRUE.
val baosOut = new java.io.ByteArrayOutputStream()
val baosErr = new java.io.ByteArrayOutputStream()
val psOut = new java.io.PrintStream(baosOut,true)
val psErr = new java.io.PrintStream(baosErr,true)
val originalOut = java.lang.System.out
val originalErr = java.lang.System.err
try {
if ( sys.env("RSCALA_TUNNELING") == "TRUE" ) {
java.lang.System.setOut(psOut)
java.lang.System.setErr(psErr)
}
} catch {
case _: Throwable =>
}
// End of rscala patch.
private val outVar = new DynamicVariable[PrintStream](java.lang.System.out)
private val errVar = new DynamicVariable[PrintStream](java.lang.System.err)
private val inVar = new DynamicVariable[BufferedReader](
new BufferedReader(new InputStreamReader(java.lang.System.in)))
/** The default output, can be overridden by `setOut` */
def out = outVar.value
/** The default error, can be overridden by `setErr` */
def err = errVar.value
/** The default input, can be overridden by `setIn` */
def in = inVar.value
/** Sets the default output stream.
*
* @param out the new output stream.
*/
def setOut(out: PrintStream) { outVar.value = out }
/** Sets the default output stream for the duration
* of execution of one thunk.
*
* @example {{{
* withOut(Console.err) { println("This goes to default _error_") }
* }}}
*
* @param out the new output stream.
* @param thunk the code to execute with
* the new output stream active
* @return the results of `thunk`
* @see `withOut[T](out:OutputStream)(thunk: => T)`
*/
def withOut[T](out: PrintStream)(thunk: =>T): T =
outVar.withValue(out)(thunk)
/** Sets the default output stream.
*
* @param out the new output stream.
*/
def setOut(out: OutputStream): Unit =
setOut(new PrintStream(out))
/** Sets the default output stream for the duration
* of execution of one thunk.
*
* @param out the new output stream.
* @param thunk the code to execute with
* the new output stream active
* @return the results of `thunk`
* @see `withOut[T](out:PrintStream)(thunk: => T)`
*/
def withOut[T](out: OutputStream)(thunk: =>T): T =
withOut(new PrintStream(out))(thunk)
/** Sets the default error stream.
*
* @param err the new error stream.
*/
def setErr(err: PrintStream) { errVar.value = err }
/** Set the default error stream for the duration
* of execution of one thunk.
* @example {{{
* withErr(Console.out) { println("This goes to default _out_") }
* }}}
*
* @param err the new error stream.
* @param thunk the code to execute with
* the new error stream active
* @return the results of `thunk`
* @see `withErr[T](err:OutputStream)(thunk: =>T)`
*/
def withErr[T](err: PrintStream)(thunk: =>T): T =
errVar.withValue(err)(thunk)
/** Sets the default error stream.
*
* @param err the new error stream.
*/
def setErr(err: OutputStream): Unit =
setErr(new PrintStream(err))
/** Sets the default error stream for the duration
* of execution of one thunk.
*
* @param err the new error stream.
* @param thunk the code to execute with
* the new error stream active
* @return the results of `thunk`
* @see `withErr[T](err:PrintStream)(thunk: =>T)`
*/
def withErr[T](err: OutputStream)(thunk: =>T): T =
withErr(new PrintStream(err))(thunk)
/** Sets the default input stream.
*
* @param reader specifies the new input stream.
*/
def setIn(reader: Reader) {
inVar.value = new BufferedReader(reader)
}
/** Sets the default input stream for the duration
* of execution of one thunk.
*
* @example {{{
* val someFile:Reader = openFile("file.txt")
* withIn(someFile) {
* // Reads a line from file.txt instead of default input
* println(readLine)
* }
* }}}
*
* @param thunk the code to execute with
* the new input stream active
*
* @return the results of `thunk`
* @see `withIn[T](in:InputStream)(thunk: =>T)`
*/
def withIn[T](reader: Reader)(thunk: =>T): T =
inVar.withValue(new BufferedReader(reader))(thunk)
/** Sets the default input stream.
*
* @param in the new input stream.
*/
def setIn(in: InputStream) {
setIn(new InputStreamReader(in))
}
/** Sets the default input stream for the duration
* of execution of one thunk.
*
* @param in the new input stream.
* @param thunk the code to execute with
* the new input stream active
* @return the results of `thunk`
* @see `withIn[T](reader:Reader)(thunk: =>T)`
*/
def withIn[T](in: InputStream)(thunk: =>T): T =
withIn(new InputStreamReader(in))(thunk)
/** Prints an object to `out` using its `toString` method.
*
* @param obj the object to print; may be null.
*/
def print(obj: Any) {
out.print(if (null == obj) "null" else obj.toString())
}
/** Flushes the output stream. This function is required when partial
* output (i.e. output not terminated by a newline character) has
* to be made visible on the terminal.
*/
def flush() { out.flush() }
/** Prints a newline character on the default output.
*/
def println() { out.println() }
/** Prints out an object to the default output, followed by a newline character.
*
* @param x the object to print.
*/
def println(x: Any) { out.println(x) }
/** Prints its arguments as a formatted string to the default output,
* based on a string pattern (in a fashion similar to printf in C).
*
* The interpretation of the formatting patterns is described in
* <a href="" target="contentFrame" class="java/util/Formatter">
* `java.util.Formatter`</a>.
*
* @param text the pattern for formatting the arguments.
* @param args the arguments used to instantiating the pattern.
* @throws java.lang.IllegalArgumentException if there was a problem with the format string or arguments
*/
def printf(text: String, args: Any*) { out.print(text format (args : _*)) }
/** Read a full line from the default input. Returns `null` if the end of the
* input stream has been reached.
*
* @return the string read from the terminal or null if the end of stream was reached.
*/
def readLine(): String = in.readLine()
/** Print formatted text to the default output and read a full line from the default input.
* Returns `null` if the end of the input stream has been reached.
*
* @param text the format of the text to print out, as in `printf`.
* @param args the parameters used to instantiate the format, as in `printf`.
* @return the string read from the default input
*/
def readLine(text: String, args: Any*): String = {
printf(text, args: _*)
readLine()
}
/** Reads a boolean value from an entire line of the default input.
* Has a fairly liberal interpretation of the input.
*
* @return the boolean value read, or false if it couldn't be converted to a boolean
* @throws java.io.EOFException if the end of the input stream has been reached.
*/
def readBoolean(): Boolean = {
val s = readLine()
if (s == null)
throw new java.io.EOFException("Console has reached end of input")
else
s.toLowerCase() match {
case "true" => true
case "t" => true
case "yes" => true
case "y" => true
case _ => false
}
}
/** Reads a byte value from an entire line of the default input.
*
* @return the Byte that was read
* @throws java.io.EOFException if the end of the
* input stream has been reached.
* @throws java.lang.NumberFormatException if the value couldn't be converted to a Byte
*/
def readByte(): Byte = {
val s = readLine()
if (s == null)
throw new java.io.EOFException("Console has reached end of input")
else
s.toByte
}
/** Reads a short value from an entire line of the default input.
*
* @return the short that was read
* @throws java.io.EOFException if the end of the
* input stream has been reached.
* @throws java.lang.NumberFormatException if the value couldn't be converted to a Short
*/
def readShort(): Short = {
val s = readLine()
if (s == null)
throw new java.io.EOFException("Console has reached end of input")
else
s.toShort
}
/** Reads a char value from an entire line of the default input.
*
* @return the Char that was read
* @throws java.io.EOFException if the end of the
* input stream has been reached.
* @throws java.lang.StringIndexOutOfBoundsException if the line read from default input was empty
*/
def readChar(): Char = {
val s = readLine()
if (s == null)
throw new java.io.EOFException("Console has reached end of input")
else
s charAt 0
}
/** Reads an int value from an entire line of the default input.
*
* @return the Int that was read
* @throws java.io.EOFException if the end of the
* input stream has been reached.
* @throws java.lang.NumberFormatException if the value couldn't be converted to an Int
*/
def readInt(): Int = {
val s = readLine()
if (s == null)
throw new java.io.EOFException("Console has reached end of input")
else
s.toInt
}
/** Reads an long value from an entire line of the default input.
*
* @return the Long that was read
* @throws java.io.EOFException if the end of the
* input stream has been reached.
* @throws java.lang.NumberFormatException if the value couldn't be converted to a Long
*/
def readLong(): Long = {
val s = readLine()
if (s == null)
throw new java.io.EOFException("Console has reached end of input")
else
s.toLong
}
/** Reads a float value from an entire line of the default input.
* @return the Float that was read.
* @throws java.io.EOFException if the end of the
* input stream has been reached.
* @throws java.lang.NumberFormatException if the value couldn't be converted to a Float
*
*/
def readFloat(): Float = {
val s = readLine()
if (s == null)
throw new java.io.EOFException("Console has reached end of input")
else
s.toFloat
}
/** Reads a double value from an entire line of the default input.
*
* @return the Double that was read.
* @throws java.io.EOFException if the end of the
* input stream has been reached.
* @throws java.lang.NumberFormatException if the value couldn't be converted to a Float
*/
def readDouble(): Double = {
val s = readLine()
if (s == null)
throw new java.io.EOFException("Console has reached end of input")
else
s.toDouble
}
/** Reads in some structured input (from the default input), specified by
* a format specifier. See class `java.text.MessageFormat` for details of
* the format specification.
*
* @param format the format of the input.
* @return a list of all extracted values.
* @throws java.io.EOFException if the end of the input stream has been
* reached.
*/
def readf(format: String): List[Any] = {
val s = readLine()
if (s == null)
throw new java.io.EOFException("Console has reached end of input")
else
textComponents(new MessageFormat(format).parse(s))
}
/** Reads in some structured input (from the default input), specified by
* a format specifier, returning only the first value extracted, according
* to the format specification.
*
* @param format format string, as accepted by `readf`.
* @return The first value that was extracted from the input
*/
def readf1(format: String): Any = readf(format).head
/** Reads in some structured input (from the default input), specified
* by a format specifier, returning only the first two values extracted,
* according to the format specification.
*
* @param format format string, as accepted by `readf`.
* @return A [[scala.Tuple2]] containing the first two values extracted
*/
def readf2(format: String): (Any, Any) = {
val res = readf(format)
(res.head, res.tail.head)
}
/** Reads in some structured input (from the default input), specified
* by a format specifier, returning only the first three values extracted,
* according to the format specification.
*
* @param format format string, as accepted by `readf`.
* @return A [[scala.Tuple3]] containing the first three values extracted
*/
def readf3(format: String): (Any, Any, Any) = {
val res = readf(format)
(res.head, res.tail.head, res.tail.tail.head)
}
private def textComponents(a: Array[AnyRef]): List[Any] = {
var i: Int = a.length - 1
var res: List[Any] = Nil
while (i >= 0) {
res = (a(i) match {
case x: java.lang.Boolean => x.booleanValue()
case x: java.lang.Byte => x.byteValue()
case x: java.lang.Short => x.shortValue()
case x: java.lang.Character => x.charValue()
case x: java.lang.Integer => x.intValue()
case x: java.lang.Long => x.longValue()
case x: java.lang.Float => x.floatValue()
case x: java.lang.Double => x.doubleValue()
case x => x
}) :: res;
i -= 1
}
res
}
}

View file

@ -0,0 +1,49 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.api.r
import org.scalatest.FlatSpec
import org.scalatest.Matchers._
class RBackendHelperTest extends FlatSpec {
val backend : RBackendHelper = RBackendHelper()
val backend2 : RBackendHelper = RBackendHelper()
"RBackendHelper" should "create a SparkR backend" in {
val rbackend = backend
assert(true) // only looking for exceptions here
}
it should "initialize properly, returning a port > 0" in {
val port = backend.init()
assert(port > 0)
}
it should "start a thread" in {
val backend = backend2
backend.init()
val thread = backend.start()
thread shouldBe a [Thread]
}
it should "close without error" in {
backend2.close
assert(true) // only looking for exceptions
}
}

View file

@ -0,0 +1,113 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.rinterpreter
import java.io.{File, PrintWriter}
import java.nio.file.{Files, Paths}
import org.apache.zeppelin.rinterpreter.rscala.RClient
import org.apache.zeppelin.rinterpreter.rscala.RClient._
import org.scalatest.Matchers._
import org.scalatest._
class RContextInitTest extends FlatSpec {
import scala.sys.process._
var cmd: PrintWriter = null
val command = RClient.defaultRCmd +: RClient.defaultArguments
var processCmd : ProcessBuilder = null
"Process command" should "create a process builder" in {
processCmd = Process(command)
processCmd shouldBe a[ProcessBuilder]
}
it should "be persistent for testing purposes" in {
processCmd shouldBe a [ProcessBuilder]
}
var processIO : ProcessIO = null
"Creating Process IO" should "not throw an exception" in {
processIO = new ProcessIO(
o => {
cmd = new PrintWriter(o)
},
reader("STDOUT DEBUG: "),
reader("STDERR DEBUG: "),
true
)
processIO shouldBe a [ProcessIO]
}
var portsFile : File = null
"A temp file " should "be created" in {
portsFile = File.createTempFile("rscala-", "")
assertResult(true) {portsFile.exists()}
}
var processInstance : Process = null
"Process instance" should "launch" in {
processInstance = processCmd.run(processIO)
assert(true)
}
var libpath : String = null
"RZeppelin R Package" should "be found" in {
libpath = if (Files.exists(Paths.get("R/lib"))) "R/lib"
else if (Files.exists(Paths.get("../R/lib"))) "../R/lib"
else throw new RuntimeException("Could not find rzeppelin - it must be in either R/lib or ../R/lib")
assert(Files.exists(Paths.get(libpath + "/rzeppelin")))
}
var snippet : String = null
"Creating the snippit" should "be impossible to fail" in {
snippet = s"""
library(lib.loc="$libpath", rzeppelin)
rzeppelin:::rServe(rzeppelin:::newSockets('${portsFile.getAbsolutePath.replaceAll(File.separator, "/")}',debug=FALSE,timeout=60))
q(save='no')"""
assert(true)
}
"Cmd" should "stop being null" in {
while (cmd == null) Thread.sleep(100)
assert(cmd != null)
}
it should "accept the snippet" in {
cmd.println(snippet)
cmd.flush()
assert(true)
}
var sockets : ScalaSockets = null
"Scala Sockets" should "be created and signal OK" in {
sockets = new ScalaSockets(portsFile.getAbsolutePath)
sockets.out.writeInt(RClient.Protocol.OK)
sockets.out.flush()
assert(true)
}
"The R and Scala versions" should "match" in {
assert(RClient.readString(sockets.in) == org.apache.zeppelin.rinterpreter.rscala.Version)
}
var rcon : RContext = null
"Creating an RContext" should "not fail" in {
rcon = new RContext(sockets, false)
}
"An open RContext" should "destroy safely" in {
rcon.close()
assertResult(false) {
rcon.isOpen
}
}
}

View file

@ -0,0 +1,115 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.rinterpreter
import java.util.Properties
import org.apache.zeppelin.RTest
import org.apache.zeppelin.rinterpreter.rscala.RException
import org.apache.zeppelin.spark.SparkInterpreter
import org.scalatest.Matchers._
import org.scalatest._
class RContextTest extends FlatSpec {
RContext.resetRcon()
val rcon = RContext(new Properties(), "test")
"The RContext Singleton" should "create an RContext without Spark" in { () =>
rcon shouldBe a[RContext]
}
"The RContext" should "be openable without spark" in { () =>
rcon.open(None)
assert(rcon.isOpen)
}
it should "be able to confirm that stats is available" taggedAs(RTest) in { () =>
assertResult(true) {
rcon.testRPackage("stats")
}
}
it should "be able to confirm that a bogus package is not available" taggedAs(RTest) in { () =>
assertResult(false) {
rcon.testRPackage("thisisagarbagepackagename")
}
}
it should "be able to add 2 + 2" taggedAs(RTest) in { () =>
assertResult(4) {
rcon.evalI0("2 + 2")
}
}
it should "be able to return a vector" taggedAs(RTest) in { () =>
assertResult(10) {
rcon.evalI1("1:10").length
}
}
it should "be able to return a string" taggedAs(RTest) in { () =>
assertResult("hello world") {
rcon.evalS0("'hello world'")
}
}
it should "be able to return a vector of strings" taggedAs(RTest) in { () =>
assertResult(26) {
rcon.evalS1("LETTERS").length
}
}
it should "throw an RException if told to evaluate garbage code" taggedAs(RTest) in { () =>
intercept[RException] {
rcon.eval("funkyfunction()")
}
}
// it should "Throw an exception if we try to initialize SparkR without a SQLContext" in {() =>
//
// intercept[RuntimeException] {
// rcon.initializeSparkRTest()
// }
// }
it should "have rzeppelin available" taggedAs(RTest) in { () =>
assertResult(true) {
rcon.testRPackage("rzeppelin")
}
}
it should "have evaluate available" taggedAs(RTest) in { () =>
assertResult(true) {
rcon.testRPackage("evaluate")
}
}
it should "have repr available" taggedAs(RTest) in { () =>
assertResult(true) {
rcon.testRPackage("repr")
}
}
it should "also close politely" taggedAs(RTest) in { () =>
rcon.close()
assertResult(2) {rcon.isOpen}
}
}

View file

@ -0,0 +1,141 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.rinterpreter
import java.util.Properties
import org.apache.zeppelin.RTest
import org.apache.zeppelin.interpreter.{Interpreter, InterpreterContext, InterpreterResult, InterpreterGroup}
import org.scalatest.Matchers._
import org.scalatest._
import java.util.ArrayList
class RInterpreterTest extends FlatSpec {
RContext.resetRcon()
class RIntTester extends RInterpreter(new Properties(), startSpark = false) {
def interpret(s: String, interpreterContext: InterpreterContext): InterpreterResult = {
val result : Array[String] = rContext.evalS1(s)
new InterpreterResult(InterpreterResult.Code.SUCCESS, result.mkString("\n"))
}
}
val rint = new RIntTester()
"An RInterpreter" should "exist" in {
assert(rint != null)
}
it should "not complain when we assign it a group" in {
val grp : InterpreterGroup = new InterpreterGroup("test")
val lst : ArrayList[Interpreter] = new ArrayList[Interpreter]()
lst.add(rint)
grp.put(rint.getClassName(), lst)
rint.setInterpreterGroup(grp)
}
it should "create a fresh rContext when we ask for one" in {
assert(! rint.getrContext.isOpen)
}
it should "open" taggedAs(RTest) in {
rint.open()
assert(rint.getrContext.isOpen)
}
it should "have rzeppelin available" taggedAs(RTest) in {
assume(rint.getrContext.isOpen)
assert(rint.getrContext.testRPackage("rzeppelin"))
}
it should "have an rContext able to do simple addition" taggedAs(RTest) in {
assume(rint.getrContext.isOpen)
assert(rint.getrContext.evalI0("2 + 2") == 4)
}
it should "have a functional completion function" taggedAs(RTest) in {
val result = rint.hiddenCompletion("hi", 3)
result should (contain ("hist"))
}
it should "have a working progress meter" in {
rint.getrContext.setProgress(50)
assertResult(50) {
rint.getrContext.getProgress
}
}
it should "have persistent properties" in {
val props = new Properties()
props.setProperty("hello", "world")
rint.setProperty(props)
assertResult("world") {
rint.getProperty("hello")
}
}
var rint2 : RIntTester = null
it should "Share RContexts if they share the same InterpreterGroup" in {
rint2 = new RIntTester()
val lst : ArrayList[Interpreter] = new ArrayList[Interpreter]()
lst.add(rint2)
val grp = rint.getInterpreterGroup()
grp.put(rint2.getClassName(), lst)
rint2.setInterpreterGroup(grp)
rint2.open()
rint.getrContext should be theSameInstanceAs rint2.getrContext
}
"Opening the second RInterpreter" should "not have closed the first RContext" in {
assert(rint.getrContext.isOpen)
}
var rint3 : RIntTester = null
"An RInterpreter in a different InterpreterGroup" should "have a different R Context" in {
rint3 = new RIntTester()
val grp : InterpreterGroup = new InterpreterGroup("othertest")
val lst : ArrayList[Interpreter] = new ArrayList[Interpreter]()
lst.add(rint3)
grp.put(rint3.getClassName(), lst)
rint3.setInterpreterGroup(grp)
rint3.open()
rint3.getrContext shouldNot be theSameInstanceAs rint2.getrContext
}
"The first RInterpreter" should "close politely" in {
rint.close()
assert(!rint.getrContext.isOpen)
}
"and so" should "the other one" in {
rint2.close()
assert(!rint2.getrContext.isOpen)
}
"and " should "the third one" in {
rint3.close()
assert(!rint2.getrContext.isOpen)
}
// fixture.sparky.close()
}

View file

@ -0,0 +1,103 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.rinterpreter
import java.util
import java.util.Properties
import org.apache.zeppelin.interpreter.{Interpreter, InterpreterGroup, InterpreterResult}
import org.scalatest.FlatSpec
import java.util.List
import org.scalatest.Matchers._
class WrapperTest extends FlatSpec {
RContext.resetRcon()
val repl: RRepl = new RRepl(new Properties(), false)
val group : InterpreterGroup = new InterpreterGroup()
var lst = new util.LinkedList[Interpreter]()
lst.add(repl)
group.put(repl.getClassName(), lst)
repl.setInterpreterGroup(group)
"The R REPL" should "exist and be of the right class" in {
repl shouldBe a[RRepl]
}
it should "Have a RRepl Interpreter inside" in {
repl.getInnerInterpreter shouldBe a[RReplInterpreter]
}
val repi = repl.getInnerInterpreter.asInstanceOf[RReplInterpreter]
it should "have a fresh rContext" in {
assert(!repi.getrContext.isOpen)
}
val knitr: KnitR = new KnitR(new Properties(), false)
lst = new util.LinkedList[Interpreter]()
lst.add(knitr)
group.put(knitr.getClassName(), lst)
knitr.setInterpreterGroup(group)
"The KnitR wrapper" should "exist and be of the right class" in {
knitr shouldBe a[KnitR]
}
it should "have a KnitRInterpreter inside" in {
knitr.getInnerInterpreter shouldBe a [KnitRInterpreter]
}
it should "share the RContext" in {
knitr.getInnerInterpreter.asInstanceOf[KnitRInterpreter].getrContext should be theSameInstanceAs repi.getrContext
}
it should "open without error" in {
knitr.open()
assert(knitr.getInnerInterpreter.asInstanceOf[KnitRInterpreter].getrContext.isOpen)
}
it should "produce HTML in response to a simple query" in {
val result = knitr.interpret(
"""
|```{r}
|2 + 2
|```
""".stripMargin, null)
withClue(result.message()) {
result should have (
'code (InterpreterResult.Code.SUCCESS),
'type (InterpreterResult.Type.HTML)
)
}
}
it should "close properly" in {
repi.getrContext.close()
assertResult(false) {
repi.getrContext.isOpen
}
}
"Just in case there are two rContexts, the other one" should "close properly also" in {
val rcon = knitr.getInnerInterpreter.asInstanceOf[KnitRInterpreter].getrContext
rcon.close()
assertResult(false) {
rcon.isOpen
}
}
}

View file

@ -0,0 +1,23 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin
import org.scalatest.Tag
object RTest extends Tag("RTest")
object SparkTest extends Tag("SparkTest")

View file

@ -209,13 +209,34 @@ public class ScaldingInterpreter extends Interpreter {
out.reset();
Code r = null;
String incomplete = "";
boolean inComment = false;
for (int l = 0; l < linesToRun.length; l++) {
String s = linesToRun[l];
// check if next line starts with "." (but not ".." or "./") it is treated as an invocation
if (l + 1 < linesToRun.length) {
String nextLine = linesToRun[l + 1].trim();
if (nextLine.startsWith(".") && !nextLine.startsWith("..") && !nextLine.startsWith("./")) {
boolean continuation = false;
if (nextLine.isEmpty()
|| nextLine.startsWith("//") // skip empty line or comment
|| nextLine.startsWith("}")
|| nextLine.startsWith("object")) { // include "} object" for Scala companion object
continuation = true;
} else if (!inComment && nextLine.startsWith("/*")) {
inComment = true;
continuation = true;
} else if (inComment && nextLine.lastIndexOf("*/") >= 0) {
inComment = false;
continuation = true;
} else if (nextLine.length() > 1
&& nextLine.charAt(0) == '.'
&& nextLine.charAt(1) != '.' // ".."
&& nextLine.charAt(1) != '/') { // "./"
continuation = true;
} else if (inComment) {
continuation = true;
}
if (continuation) {
incomplete += s + "\n";
continue;
}

View file

@ -88,6 +88,17 @@ public class ScaldingInterpreterTest {
}
}
@Test
public void testNextLineComments() {
assertEquals(InterpreterResult.Code.SUCCESS, repl.interpret("\"123\"\n/*comment here\n*/.toInt", context).code());
}
@Test
public void testNextLineCompanionObject() {
String code = "class Counter {\nvar value: Long = 0\n}\n // comment\n\n object Counter {\n def apply(x: Long) = new Counter()\n}";
assertEquals(InterpreterResult.Code.SUCCESS, repl.interpret(code, context).code());
}
@Test
public void testBasicIntp() {
assertEquals(InterpreterResult.Code.SUCCESS,

View file

@ -23,3 +23,4 @@
- nodejs
- maven
- python-addons
- r

View file

@ -0,0 +1,24 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# defaults variables for r role
---
r_cran_mirror: http://cran.rstudio.com/
r_repository:
- type: deb
url: "{{ r_cran_mirror }}/bin/linux/ubuntu {{ ansible_distribution_release }}/"
r_packages_repos: "{{ r_cran_mirror }}"

View file

@ -0,0 +1,65 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Install R binaries and CRAN packages
---
# The Ubuntu archives on CRAN are signed with a key with ID E084DAB9. Add this key to the system.
- name: repository | add public key
apt_key:
id: E084DAB9
keyserver: keyserver.ubuntu.com
state: present
# in order to get the latest version of R, add CRAN repository to the to the list of sources
- name: repository | add cran-r
apt_repository:
repo: "{{item.type}} {{item.url}}"
state: present
update_cache: true
with_items: r_repository
- name: Install R. This may take a while.
apt: pkg=r-base state=present
- name: openssl and libcurl required for R devtools package
apt: pkg={{item}} state=present
with_items:
- libssl-dev
- libcurl4-openssl-dev
# knitr will also pull in the `evaluate` R package as a dependency
- name: Install R packages required for the R interpreter. This may take a while.
shell: /usr/bin/Rscript --slave --no-save --no-restore-history -e "if (! ('{{item}}' %in% installed.packages()[,'Package'])) install.packages(pkgs=c('{{item}}'), repos=c('{{r_packages_repos}}'))"
with_items:
- knitr
- devtools
- name: Install rCharts (requires devtools first).
shell: /usr/bin/Rscript --slave --no-save --no-restore-history -e "if (! ('rCharts' %in% installed.packages()[,'Package'])) devtools::install_github('rCharts', 'ramnathv')"
- name: Install R repr package recommended for the R interpreter display system (requires devtools first).
shell: /usr/bin/Rscript --slave --no-save --no-restore-history -e "if (! ('repr' %in% installed.packages()[,'Package'])) devtools::install_github('IRkernel/repr')"
- name: Install R packages recommended for the R interpreter.
shell: /usr/bin/Rscript --slave --no-save --no-restore-history -e "if (! ('{{item}}' %in% installed.packages()[,'Package'])) install.packages(pkgs=c('{{item}}'), repos=c('{{r_packages_repos}}'))"
with_items:
- ggplot2
- googleVis
- mplot
- htmltools
- base64enc
- data.table

View file

@ -32,9 +32,9 @@ echo
echo 'cd /vagrant/incubator-zeppelin'
echo 'mvn clean package -DskipTests'
echo
echo '# or for a specific build'
echo '# or for a specific Spark/Hadoop build with additional options such as python and R support'
echo
echo 'mvn clean package -Pspark-1.5 -Ppyspark -Dhadoop.version=2.2.0 -Phadoop-2.2 -DskipTests'
echo 'mvn clean package -Pspark-1.6 -Ppyspark -Phadoop-2.4 -Psparkr -DskipTests'
echo './bin/zeppelin-daemon.sh start'
echo
echo 'On your host machine browse to http://localhost:8080/'

View file

@ -19,17 +19,14 @@ package org.apache.zeppelin.shell;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.*;
import org.apache.commons.exec.CommandLine;
import org.apache.commons.exec.DefaultExecutor;
import org.apache.commons.exec.ExecuteException;
import org.apache.commons.exec.ExecuteWatchdog;
import org.apache.commons.exec.Executor;
import org.apache.commons.exec.PumpStreamHandler;
import org.apache.commons.lang3.StringUtils;
import org.apache.zeppelin.interpreter.Interpreter;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterPropertyBuilder;
@ -50,6 +47,10 @@ public class ShellInterpreter extends Interpreter {
public static final String SHELL_COMMAND_TIMEOUT = "shell.command.timeout.millisecs";
public static final String DEFAULT_COMMAND_TIMEOUT = "600000";
int commandTimeOut;
private static final boolean isWindows = System
.getProperty("os.name")
.startsWith("Windows");
final String shell = isWindows ? "cmd /c" : "bash -c";
static {
Interpreter.register(
@ -83,11 +84,15 @@ public class ShellInterpreter extends Interpreter {
@Override
public InterpreterResult interpret(String cmd, InterpreterContext contextInterpreter) {
logger.debug("Run shell command '" + cmd + "'");
CommandLine cmdLine = CommandLine.parse("bash");
cmdLine.addArgument("-c", false);
CommandLine cmdLine = CommandLine.parse(shell);
// the Windows CMD shell doesn't handle multiline statements,
// they need to be delimited by '&&' instead
if (isWindows) {
String[] lines = StringUtils.split(cmd, "\n");
cmd = StringUtils.join(lines, " && ");
}
cmdLine.addArgument(cmd, false);
DefaultExecutor executor = new DefaultExecutor();
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
ByteArrayOutputStream errorStream = new ByteArrayOutputStream();
executor.setStreamHandler(new PumpStreamHandler(contextInterpreter.out, errorStream));
executor.setWatchdog(new ExecuteWatchdog(commandTimeOut));

View file

@ -50,7 +50,12 @@
<akka.group>org.spark-project.akka</akka.group>
<akka.version>2.3.4-spark</akka.version>
<spark.download.url>http://archive.apache.org/dist/spark/spark-${spark.version}/spark-${spark.version}.tgz</spark.download.url>
<spark.archive>spark-${spark.version}</spark.archive>
<spark.download.url>
http://archive.apache.org/dist/spark/${spark.archive}/${spark.archive}.tgz
</spark.download.url>
<spark.bin.download.url>http://archive.apache.org/dist/spark/spark-${spark.version}/spark-${spark.version}-bin-without-hadoop.tgz</spark.bin.download.url>
<spark.dist.cache>${project.build.directory}/../../.spark-dist</spark.dist.cache>
<py4j.version>0.8.2.1</py4j.version>
</properties>
@ -505,7 +510,7 @@
<profile>
<id>spark-1.6</id>
<properties>
<spark.version>1.6.0</spark.version>
<spark.version>1.6.1</spark.version>
<py4j.version>0.9</py4j.version>
<akka.group>com.typesafe.akka</akka.group>
<akka.version>2.3.11</akka.version>
@ -714,6 +719,45 @@
</repositories>
</profile>
<profile>
<id>mapr51</id>
<activation>
<activeByDefault>false</activeByDefault>
</activation>
<properties>
<hadoop.version>2.7.0-mapr-1602</hadoop.version>
<yarn.version>2.7.0-mapr-1602</yarn.version>
<jets3t.version>0.9.3</jets3t.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.curator</groupId>
<artifactId>curator-recipes</artifactId>
<version>2.4.0</version>
<exclusions>
<exclusion>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
<version>3.4.5-mapr-1503</version>
</dependency>
</dependencies>
<repositories>
<repository>
<id>mapr-releases</id>
<url>http://repository.mapr.com/maven/</url>
<snapshots><enabled>false</enabled></snapshots>
<releases><enabled>true</enabled></releases>
</repository>
</repositories>
</profile>
<profile>
<id>yarn</id>
<dependencies>
@ -748,8 +792,87 @@
</goals>
<configuration>
<url>${spark.download.url}</url>
<outputDirectory>${spark.dist.cache}</outputDirectory>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-clean-plugin</artifactId>
<configuration>
<filesets>
<fileset>
<directory>${project.build.directory}/spark-dist</directory>
</fileset>
<fileset>
<directory>${basedir}/../python/build</directory>
</fileset>
</filesets>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-antrun-plugin</artifactId>
<executions>
<execution>
<id>unzip-pyspark-files</id>
<phase>validate</phase>
<goals>
<goal>run</goal>
</goals>
<configuration>
<target>
<untar src="${spark.dist.cache}/${spark.archive}.tgz"
dest="${project.build.directory}/spark-dist"
compression="gzip"/>
</target>
</configuration>
</execution>
<execution>
<id>zip-pyspark-files</id>
<phase>generate-resources</phase>
<goals>
<goal>run</goal>
</goals>
<configuration>
<target>
<delete dir="../interpreter/spark/pyspark"/>
<copy todir="../interpreter/spark/pyspark"
file="${project.build.directory}/spark-dist/${spark.archive}/python/lib/py4j-${py4j.version}-src.zip"/>
<zip destfile="${project.build.directory}/../../interpreter/spark/pyspark/pyspark.zip"
basedir="${project.build.directory}/spark-dist/${spark.archive}/python"
includes="pyspark/*.py,pyspark/**/*.py"/>
</target>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</profile>
<profile>
<id>sparkr</id>
<build>
<plugins>
<plugin>
<groupId>com.googlecode.maven-download-plugin</groupId>
<artifactId>download-maven-plugin</artifactId>
<version>1.2.1</version>
<executions>
<execution>
<id>download-sparkr-files</id>
<phase>validate</phase>
<goals>
<goal>wget</goal>
</goals>
<configuration>
<url>${spark.bin.download.url}</url>
<unpack>true</unpack>
<outputDirectory>${project.build.directory}/spark-dist</outputDirectory>
<outputDirectory>${project.build.directory}/spark-bin-dist</outputDirectory>
</configuration>
</execution>
</executions>
@ -759,34 +882,28 @@
<configuration>
<filesets>
<fileset>
<directory>${basedir}/../python/build</directory>
</fileset>
<fileset>
<directory>${project.build.directory}/spark-dist</directory>
<directory>${project.build.directory}/spark-bin-dist</directory>
</fileset>
</filesets>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-antrun-plugin</artifactId>
<version>1.7</version>
<artifactId>maven-resources-plugin</artifactId>
<version>2.7</version>
<executions>
<execution>
<id>download-and-zip-pyspark-files</id>
<id>copy-sparkr-files</id>
<phase>generate-resources</phase>
<goals>
<goal>run</goal>
<goal>copy-resources</goal>
</goals>
<configuration>
<target>
<delete dir="../interpreter/spark/pyspark"/>
<copy todir="../interpreter/spark/pyspark"
file="${project.build.directory}/spark-dist/spark-${spark.version}/python/lib/py4j-${py4j.version}-src.zip"/>
<zip destfile="${project.build.directory}/../../interpreter/spark/pyspark/pyspark.zip"
basedir="${project.build.directory}/spark-dist/spark-${spark.version}/python"
includes="pyspark/*.py,pyspark/**/*.py"/>
</target>
<outputDirectory>${project.build.directory}/../../interpreter/spark/R/lib</outputDirectory>
<resources>
<resource>
<directory>${project.build.directory}/spark-bin-dist/spark-${spark.version}-bin-without-hadoop/R/lib</directory>
</resource>
</resources>
</configuration>
</execution>
</executions>
@ -794,6 +911,7 @@
</plugins>
</build>
</profile>
</profiles>
<build>

View file

@ -35,12 +35,14 @@
<url>http://zeppelin.incubator.apache.org</url>
<properties>
<jsoup.version>1.8.2</jsoup.version>
<mockito.version>1.10.19</mockito.version>
<powermock.version>1.6.4</powermock.version>
<spark.version>1.4.1</spark.version>
<scala.version>2.10.4</scala.version>
<scala.binary.version>2.10</scala.binary.version>
</properties>
<dependencies>
<dependency>
<groupId>org.slf4j</groupId>
@ -231,6 +233,11 @@
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>${jsoup.version}</version>
</dependency>
<!--TEST-->
<dependency>
@ -267,6 +274,25 @@
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>
<version>${mockito.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.powermock</groupId>
<artifactId>powermock-api-mockito</artifactId>
<version>${powermock.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.powermock</groupId>
<artifactId>powermock-module-junit4</artifactId>
<version>${powermock.version}</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
@ -286,7 +312,7 @@
<exclude>**/derby.log</exclude>
<exclude>**/metastore_db/</exclude>
<exclude>**/README.md</exclude>
<exclude>dependency-reduced-pom.xml</exclude>
<exclude>**/dependency-reduced-pom.xml</exclude>
</excludes>
</configuration>
</plugin>
@ -404,4 +430,56 @@
</plugins>
</build>
<profiles>
<!-- to deactivate 'exclude-sparkr' automatically when 'spark' is activated -->
<profile>
<id>sparkr</id>
</profile>
<profile>
<id>exclude-sparkr</id>
<activation>
<activeByDefault>true</activeByDefault>
</activation>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<excludes>
<exclude>**/SparkRInterpreter.java</exclude>
</excludes>
<testExcludes>
<testExclude>**/SparkRInterpreterTest.java</testExclude>
<testExclude>**/ZeppelinRTest.java</testExclude>
</testExcludes>
</configuration>
</plugin>
<plugin>
<groupId>org.scala-tools</groupId>
<artifactId>maven-scala-plugin</artifactId>
<configuration>
<excludes>
<exclude>**/ZeppelinR.scala</exclude>
<exclude>**/SparkRBackend.scala</exclude>
</excludes>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<excludes>
<exclude>**/SparkRInterpreterTest.java</exclude>
</excludes>
</configuration>
</plugin>
</plugins>
</build>
</profile>
</profiles>
</project>

View file

@ -0,0 +1,226 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.spark;
import static org.apache.zeppelin.spark.ZeppelinRDisplay.render;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.spark.SparkRBackend;
import org.apache.zeppelin.interpreter.*;
import org.apache.zeppelin.scheduler.Scheduler;
import org.apache.zeppelin.scheduler.SchedulerFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
/**
* R and SparkR interpreter with visualization support.
*/
public class SparkRInterpreter extends Interpreter {
private static final Logger logger = LoggerFactory.getLogger(SparkRInterpreter.class);
private static String renderOptions;
private ZeppelinR zeppelinR;
static {
Interpreter.register(
"r",
"spark",
SparkRInterpreter.class.getName(),
new InterpreterPropertyBuilder()
.add("zeppelin.R.cmd",
SparkInterpreter.getSystemDefault("ZEPPELIN_R_CMD", "zeppelin.R.cmd", "R"),
"R repl path")
.add("zeppelin.R.knitr",
SparkInterpreter.getSystemDefault("ZEPPELIN_R_KNITR", "zeppelin.R.knitr", "true"),
"whether use knitr or not")
.add("zeppelin.R.image.width",
SparkInterpreter.getSystemDefault("ZEPPELIN_R_IMAGE_WIDTH",
"zeppelin.R.image.width", "100%"),
"")
.add("zeppelin.R.render.options",
SparkInterpreter.getSystemDefault("ZEPPELIN_R_RENDER_OPTIONS",
"zeppelin.R.render.options",
"out.format = 'html', comment = NA, "
+ "echo = FALSE, results = 'asis', message = F, warning = F"),
"")
.build());
}
public SparkRInterpreter(Properties property) {
super(property);
}
@Override
public void open() {
String rCmdPath = getProperty("zeppelin.R.cmd");
String sparkRLibPath;
if (System.getenv("SPARK_HOME") != null) {
sparkRLibPath = System.getenv("SPARK_HOME") + "/R/lib";
} else {
sparkRLibPath = System.getenv("ZEPPELIN_HOME") + "/interpreter/spark/R/lib";
// workaround to make sparkr work without SPARK_HOME
System.setProperty("spark.test.home", System.getenv("ZEPPELIN_HOME") + "/interpreter/spark");
}
synchronized (SparkRBackend.backend()) {
if (!SparkRBackend.isStarted()) {
SparkRBackend.init();
SparkRBackend.start();
}
}
int port = SparkRBackend.port();
SparkInterpreter sparkInterpreter = getSparkInterpreter();
ZeppelinRContext.setSparkContext(sparkInterpreter.getSparkContext());
ZeppelinRContext.setSqlContext(sparkInterpreter.getSQLContext());
ZeppelinRContext.setZepplinContext(sparkInterpreter.getZeppelinContext());
zeppelinR = new ZeppelinR(rCmdPath, sparkRLibPath, port);
try {
zeppelinR.open();
} catch (IOException e) {
logger.error("Exception while opening SparkRInterpreter", e);
throw new InterpreterException(e);
}
if (useKnitr()) {
zeppelinR.eval("library('knitr')");
}
renderOptions = getProperty("zeppelin.R.render.options");
}
@Override
public InterpreterResult interpret(String lines, InterpreterContext interpreterContext) {
String imageWidth = getProperty("zeppelin.R.image.width");
String[] sl = lines.split("\n");
if (sl[0].contains("{") && sl[0].contains("}")) {
String jsonConfig = sl[0].substring(sl[0].indexOf("{"), sl[0].indexOf("}") + 1);
ObjectMapper m = new ObjectMapper();
try {
JsonNode rootNode = m.readTree(jsonConfig);
JsonNode imageWidthNode = rootNode.path("imageWidth");
if (!imageWidthNode.isMissingNode()) imageWidth = imageWidthNode.textValue();
}
catch (Exception e) {
logger.warn("Can not parse json config: " + jsonConfig, e);
}
finally {
lines = lines.replace(jsonConfig, "");
}
}
try {
// render output with knitr
if (useKnitr()) {
zeppelinR.setInterpreterOutput(null);
zeppelinR.set(".zcmd", "\n```{r " + renderOptions + "}\n" + lines + "\n```");
zeppelinR.eval(".zres <- knit2html(text=.zcmd)");
String html = zeppelinR.getS0(".zres");
RDisplay rDisplay = render(html, imageWidth);
return new InterpreterResult(
rDisplay.code(),
rDisplay.type(),
rDisplay.content()
);
} else {
// alternatively, stream the output (without knitr)
zeppelinR.setInterpreterOutput(interpreterContext.out);
zeppelinR.eval(lines);
return new InterpreterResult(InterpreterResult.Code.SUCCESS, "");
}
} catch (Exception e) {
logger.error("Exception while connecting to R", e);
return new InterpreterResult(InterpreterResult.Code.ERROR, e.getMessage());
} finally {
try {
} catch (Exception e) {
// Do nothing...
}
}
}
@Override
public void close() {
zeppelinR.close();
}
@Override
public void cancel(InterpreterContext context) {}
@Override
public FormType getFormType() {
return FormType.NONE;
}
@Override
public int getProgress(InterpreterContext context) {
return 0;
}
@Override
public Scheduler getScheduler() {
return SchedulerFactory.singleton().createOrGetFIFOScheduler(
SparkRInterpreter.class.getName() + this.hashCode());
}
@Override
public List<String> completion(String buf, int cursor) {
return new ArrayList<String>();
}
private SparkInterpreter getSparkInterpreter() {
LazyOpenInterpreter lazy = null;
SparkInterpreter spark = null;
Interpreter p = getInterpreterInTheSameSessionByClassName(SparkInterpreter.class.getName());
while (p instanceof WrappedInterpreter) {
if (p instanceof LazyOpenInterpreter) {
lazy = (LazyOpenInterpreter) p;
}
p = ((WrappedInterpreter) p).getInnerInterpreter();
}
spark = (SparkInterpreter) p;
if (lazy != null) {
lazy.open();
}
return spark;
}
private boolean useKnitr() {
try {
return Boolean.parseBoolean(getProperty("zeppelin.R.knitr"));
} catch (Exception e) {
return false;
}
}
}

View file

@ -72,7 +72,6 @@ public class SparkVersion {
return olderThan(MIN_SUPPORTED_VERSION) || newerThanEquals(UNSUPPORTED_FUTURE_VERSION);
}
public static SparkVersion fromVersionString(String versionString) {
return new SparkVersion(versionString);
}
@ -81,6 +80,10 @@ public class SparkVersion {
return this.newerThanEquals(SPARK_1_2_0);
}
public boolean isSparkRSupported() {
return this.newerThanEquals(SPARK_1_4_0);
}
public boolean hasDataFrame() {
return this.newerThanEquals(SPARK_1_4_0);
}

View file

@ -0,0 +1,404 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.spark;
import org.apache.commons.exec.*;
import org.apache.commons.exec.environment.EnvironmentUtils;
import org.apache.commons.io.IOUtils;
import org.apache.zeppelin.interpreter.InterpreterException;
import org.apache.zeppelin.interpreter.InterpreterOutput;
import org.apache.zeppelin.interpreter.InterpreterOutputListener;
import parquet.org.slf4j.Logger;
import parquet.org.slf4j.LoggerFactory;
import java.io.*;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
/**
* R repl interaction
*/
public class ZeppelinR implements ExecuteResultHandler {
Logger logger = LoggerFactory.getLogger(ZeppelinR.class);
private final String rCmdPath;
private DefaultExecutor executor;
private SparkOutputStream outputStream;
private PipedOutputStream input;
private final String scriptPath;
private final String libPath;
static Map<Integer, ZeppelinR> zeppelinR = Collections.synchronizedMap(
new HashMap<Integer, ZeppelinR>());
private InterpreterOutput initialOutput;
private final int port;
private boolean rScriptRunning;
/**
* To be notified R repl initialization
*/
boolean rScriptInitialized = false;
Integer rScriptInitializeNotifier = new Integer(0);
/**
* Request to R repl
*/
Request rRequestObject = null;
Integer rRequestNotifier = new Integer(0);
/**
* Request object
*
* type : "eval", "set", "get"
* stmt : statement to evaluate when type is "eval"
* key when type is "set" or "get"
* value : value object when type is "put"
*/
public static class Request {
String type;
String stmt;
Object value;
public Request(String type, String stmt, Object value) {
this.type = type;
this.stmt = stmt;
this.value = value;
}
public String getType() {
return type;
}
public String getStmt() {
return stmt;
}
public Object getValue() {
return value;
}
}
/**
* Response from R repl
*/
Object rResponseValue = null;
boolean rResponseError = false;
Integer rResponseNotifier = new Integer(0);
/**
* Create ZeppelinR instance
* @param rCmdPath R repl commandline path
* @param libPath sparkr library path
*/
public ZeppelinR(String rCmdPath, String libPath, int sparkRBackendPort) {
this.rCmdPath = rCmdPath;
this.libPath = libPath;
this.port = sparkRBackendPort;
scriptPath = System.getProperty("java.io.tmpdir") + "/zeppelin_sparkr.R";
}
/**
* Start R repl
* @throws IOException
*/
public void open() throws IOException {
createRScript();
zeppelinR.put(hashCode(), this);
CommandLine cmd = CommandLine.parse(rCmdPath);
cmd.addArgument("--no-save");
cmd.addArgument("--no-restore");
cmd.addArgument("-f");
cmd.addArgument(scriptPath);
cmd.addArgument("--args");
cmd.addArgument(Integer.toString(hashCode()));
cmd.addArgument(Integer.toString(port));
cmd.addArgument(libPath);
executor = new DefaultExecutor();
outputStream = new SparkOutputStream();
input = new PipedOutputStream();
PipedInputStream in = new PipedInputStream(input);
PumpStreamHandler streamHandler = new PumpStreamHandler(outputStream, outputStream, in);
executor.setWatchdog(new ExecuteWatchdog(ExecuteWatchdog.INFINITE_TIMEOUT));
executor.setStreamHandler(streamHandler);
Map env = EnvironmentUtils.getProcEnvironment();
initialOutput = new InterpreterOutput(new InterpreterOutputListener() {
@Override
public void onAppend(InterpreterOutput out, byte[] line) {
logger.debug(new String(line));
}
@Override
public void onUpdate(InterpreterOutput out, byte[] output) {
}
});
outputStream.setInterpreterOutput(initialOutput);
executor.execute(cmd, env, this);
rScriptRunning = true;
// flush output
eval("cat('')");
}
/**
* Evaluate expression
* @param expr
* @return
*/
public Object eval(String expr) {
synchronized (this) {
rRequestObject = new Request("eval", expr, null);
return request();
}
}
/**
* assign value to key
* @param key
* @param value
*/
public void set(String key, Object value) {
synchronized (this) {
rRequestObject = new Request("set", key, value);
request();
}
}
/**
* get value of key
* @param key
* @return
*/
public Object get(String key) {
synchronized (this) {
rRequestObject = new Request("get", key, null);
return request();
}
}
/**
* get value of key, as a string
* @param key
* @return
*/
public String getS0(String key) {
synchronized (this) {
rRequestObject = new Request("getS", key, null);
return (String) request();
}
}
/**
* Send request to r repl and return response
* @return responseValue
*/
private Object request() throws RuntimeException {
if (!rScriptRunning) {
throw new RuntimeException("r repl is not running");
}
// wait for rscript initialized
if (!rScriptInitialized) {
waitForRScriptInitialized();
}
rResponseValue = null;
synchronized (rRequestNotifier) {
rRequestNotifier.notify();
}
Object respValue = null;
synchronized (rResponseNotifier) {
while (rResponseValue == null && rScriptRunning) {
try {
rResponseNotifier.wait(1000);
} catch (InterruptedException e) {
logger.error(e.getMessage(), e);
}
}
respValue = rResponseValue;
rResponseValue = null;
}
if (rResponseError) {
throw new RuntimeException(respValue.toString());
} else {
return respValue;
}
}
/**
* Wait until src/main/resources/R/zeppelin_sparkr.R is initialized
* and call onScriptInitialized()
*
* @throws InterpreterException
*/
private void waitForRScriptInitialized() throws InterpreterException {
synchronized (rScriptInitializeNotifier) {
long startTime = System.nanoTime();
while (rScriptInitialized == false &&
rScriptRunning &&
System.nanoTime() - startTime < 10L * 1000 * 1000000) {
try {
rScriptInitializeNotifier.wait(1000);
} catch (InterruptedException e) {
logger.error(e.getMessage(), e);
}
}
}
String errorMessage = "";
try {
initialOutput.flush();
errorMessage = new String(initialOutput.toByteArray());
} catch (IOException e) {
e.printStackTrace();
}
if (rScriptInitialized == false) {
throw new InterpreterException("sparkr is not responding " + errorMessage);
}
}
/**
* invoked by src/main/resources/R/zeppelin_sparkr.R
* @return
*/
public Request getRequest() {
synchronized (rRequestNotifier) {
while (rRequestObject == null) {
try {
rRequestNotifier.wait(1000);
} catch (InterruptedException e) {
logger.error(e.getMessage(), e);
}
}
Request req = rRequestObject;
rRequestObject = null;
return req;
}
}
/**
* invoked by src/main/resources/R/zeppelin_sparkr.R
* @param value
* @param error
*/
public void setResponse(Object value, boolean error) {
synchronized (rResponseNotifier) {
rResponseValue = value;
rResponseError = error;
rResponseNotifier.notify();
}
}
/**
* invoked by src/main/resources/R/zeppelin_sparkr.R
*/
public void onScriptInitialized() {
synchronized (rScriptInitializeNotifier) {
rScriptInitialized = true;
rScriptInitializeNotifier.notifyAll();
}
}
/**
* Create R script in tmp dir
*/
private void createRScript() {
ClassLoader classLoader = getClass().getClassLoader();
File out = new File(scriptPath);
if (out.exists() && out.isDirectory()) {
throw new InterpreterException("Can't create r script " + out.getAbsolutePath());
}
try {
FileOutputStream outStream = new FileOutputStream(out);
IOUtils.copy(
classLoader.getResourceAsStream("R/zeppelin_sparkr.R"),
outStream);
outStream.close();
} catch (IOException e) {
throw new InterpreterException(e);
}
logger.info("File {} created", scriptPath);
}
/**
* Terminate this R repl
*/
public void close() {
executor.getWatchdog().destroyProcess();
new File(scriptPath).delete();
zeppelinR.remove(hashCode());
}
/**
* Get instance
* This method will be invoded from zeppelin_sparkr.R
* @param hashcode
* @return
*/
public static ZeppelinR getZeppelinR(int hashcode) {
return zeppelinR.get(hashcode);
}
/**
* Pass InterpreterOutput to capture the repl output
* @param out
*/
public void setInterpreterOutput(InterpreterOutput out) {
outputStream.setInterpreterOutput(out);
}
@Override
public void onProcessComplete(int i) {
logger.info("process complete {}", i);
rScriptRunning = false;
}
@Override
public void onProcessFailed(ExecuteException e) {
logger.error(e.getMessage(), e);
rScriptRunning = false;
}
}

View file

@ -0,0 +1,55 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.spark;
import org.apache.spark.SparkContext;
import org.apache.spark.sql.SQLContext;
/**
* Contains the Spark and Zeppelin Contexts made available to SparkR.
*/
public class ZeppelinRContext {
private static SparkContext sparkContext;
private static SQLContext sqlContext;
private static ZeppelinContext zeppelinContext;
public static void setSparkContext(SparkContext sparkContext) {
ZeppelinRContext.sparkContext = sparkContext;
}
public static void setZepplinContext(ZeppelinContext zeppelinContext) {
ZeppelinRContext.zeppelinContext = zeppelinContext;
}
public static void setSqlContext(SQLContext sqlContext) {
ZeppelinRContext.sqlContext = sqlContext;
}
public static SparkContext getSparkContext() {
return sparkContext;
}
public static SQLContext getSqlContext() {
return sqlContext;
}
public static ZeppelinContext getZeppelinContext() {
return zeppelinContext;
}
}

View file

@ -0,0 +1,99 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
args <- commandArgs(trailingOnly = TRUE)
hashCode <- as.integer(args[1])
port <- as.integer(args[2])
libPath <- args[3]
rm(args)
print(paste("Port ", toString(port)))
print(paste("LibPath ", libPath))
.libPaths(c(file.path(libPath), .libPaths()))
library(SparkR)
SparkR:::connectBackend("localhost", port)
# scStartTime is needed by R/pkg/R/sparkR.R
assign(".scStartTime", as.integer(Sys.time()), envir = SparkR:::.sparkREnv)
# getZeppelinR
.zeppelinR = SparkR:::callJStatic("org.apache.zeppelin.spark.ZeppelinR", "getZeppelinR", hashCode)
# setup spark env
assign(".sc", SparkR:::callJStatic("org.apache.zeppelin.spark.ZeppelinRContext", "getSparkContext"), envir = SparkR:::.sparkREnv)
assign("sc", get(".sc", envir = SparkR:::.sparkREnv), envir=.GlobalEnv)
assign(".sqlc", SparkR:::callJStatic("org.apache.zeppelin.spark.ZeppelinRContext", "getSqlContext"), envir = SparkR:::.sparkREnv)
assign("sqlContext", get(".sqlc", envir = SparkR:::.sparkREnv), envir = .GlobalEnv)
assign(".zeppelinContext", SparkR:::callJStatic("org.apache.zeppelin.spark.ZeppelinRContext", "getZeppelinContext"), envir = .GlobalEnv)
z.put <- function(name, object) {
SparkR:::callJMethod(.zeppelinContext, "put", name, object)
}
z.get <- function(name) {
SparkR:::callJMethod(.zeppelinContext, "get", name)
}
z.input <- function(name, value) {
SparkR:::callJMethod(.zeppelinContext, "input", name, value)
}
# notify script is initialized
SparkR:::callJMethod(.zeppelinR, "onScriptInitialized")
while (TRUE) {
req <- SparkR:::callJMethod(.zeppelinR, "getRequest")
type <- SparkR:::callJMethod(req, "getType")
stmt <- SparkR:::callJMethod(req, "getStmt")
value <- SparkR:::callJMethod(req, "getValue")
if (type == "eval") {
tryCatch({
ret <- eval(parse(text=stmt))
SparkR:::callJMethod(.zeppelinR, "setResponse", "", FALSE)
}, error = function(e) {
SparkR:::callJMethod(.zeppelinR, "setResponse", toString(e), TRUE)
})
} else if (type == "set") {
tryCatch({
ret <- assign(stmt, value)
SparkR:::callJMethod(.zeppelinR, "setResponse", "", FALSE)
}, error = function(e) {
SparkR:::callJMethod(.zeppelinR, "setResponse", toString(e), TRUE)
})
} else if (type == "get") {
tryCatch({
ret <- eval(parse(text=stmt))
SparkR:::callJMethod(.zeppelinR, "setResponse", ret, FALSE)
}, error = function(e) {
SparkR:::callJMethod(.zeppelinR, "setResponse", toString(e), TRUE)
})
} else if (type == "getS") {
tryCatch({
ret <- eval(parse(text=stmt))
SparkR:::callJMethod(.zeppelinR, "setResponse", toString(ret), FALSE)
}, error = function(e) {
SparkR:::callJMethod(.zeppelinR, "setResponse", toString(e), TRUE)
})
} else {
# unsupported type
SparkR:::callJMethod(.zeppelinR, "setResponse", paste("Unsupported type ", type), TRUE)
}
}

View file

@ -0,0 +1,54 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark
import org.apache.spark.api.r.RBackend
object SparkRBackend {
val backend : RBackend = new RBackend()
private var started = false;
private var portNumber = 0;
val backendThread : Thread = new Thread("SparkRBackend") {
override def run() {
backend.run()
}
}
def init() : Int = {
portNumber = backend.init()
portNumber
}
def start() : Unit = {
backendThread.start()
started = true
}
def close() : Unit = {
backend.close()
backendThread.join()
}
def isStarted() : Boolean = {
started
}
def port(): Int = {
return portNumber
}
}

View file

@ -0,0 +1,119 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.spark
import org.apache.zeppelin.interpreter.InterpreterResult.Code
import org.apache.zeppelin.interpreter.InterpreterResult.Code.{SUCCESS, ERROR}
import org.apache.zeppelin.interpreter.InterpreterResult.Type
import org.apache.zeppelin.interpreter.InterpreterResult.Type.{TEXT, HTML, TABLE, IMG}
import org.jsoup.Jsoup
import org.jsoup.nodes.Element
import org.jsoup.nodes.Document
import scala.collection.JavaConversions._
import scala.util.matching.Regex
case class RDisplay(content: String, `type`: Type, code: Code)
object ZeppelinRDisplay {
val pattern = new Regex("""^ *\[\d*\] """)
def render(html: String, imageWidth: String): RDisplay = {
val document = Jsoup.parse(html)
document.outputSettings().prettyPrint(false)
val body = document.body()
if (body.getElementsByTag("p").isEmpty) return RDisplay(body.html(), HTML, SUCCESS)
val bodyHtml = body.html()
if (! bodyHtml.contains("<img")
&& ! bodyHtml.contains("<script")
&& ! bodyHtml.contains("%html ")
&& ! bodyHtml.contains("%table ")
&& ! bodyHtml.contains("%img ")
) {
return textDisplay(body)
}
if (bodyHtml.contains("%table")) {
return tableDisplay(body)
}
if (bodyHtml.contains("%img")) {
return imgDisplay(body)
}
return htmlDisplay(body, imageWidth)
}
private def textDisplay(body: Element): RDisplay = {
RDisplay(body.getElementsByTag("p").get(0).html(), TEXT, SUCCESS)
}
private def tableDisplay(body: Element): RDisplay = {
val p = body.getElementsByTag("p").get(0).html.replace("“%table " , "").replace("”", "")
val r = (pattern findFirstIn p).getOrElse("")
val table = p.replace(r, "").replace("\\t", "\t").replace("\\n", "\n")
RDisplay(table, TABLE, SUCCESS)
}
private def imgDisplay(body: Element): RDisplay = {
val p = body.getElementsByTag("p").get(0).html.replace("“%img " , "").replace("”", "")
val r = (pattern findFirstIn p).getOrElse("")
val img = p.replace(r, "")
RDisplay(img, IMG, SUCCESS)
}
private def htmlDisplay(body: Element, imageWidth: String): RDisplay = {
var div = new String()
for (element <- body.children) {
val eHtml = element.html()
var eOuterHtml = element.outerHtml()
eOuterHtml = eOuterHtml.replace("“%html " , "").replace("”", "")
val r = (pattern findFirstIn eHtml).getOrElse("")
div = div + eOuterHtml.replace(r, "")
}
val content = div
.replaceAll("src=\"//", "src=\"http://")
.replaceAll("href=\"//", "href=\"http://")
body.html(content)
for (image <- body.getElementsByTag("img")) {
image.attr("width", imageWidth)
}
RDisplay(body.html, HTML, SUCCESS)
}
}

View file

@ -17,7 +17,7 @@
#
if [ $# -ne 2 ]; then
if [[ "$#" -ne 2 ]]; then
echo "usage) $0 [spark version] [hadoop version]"
echo " eg) $0 1.3.1 2.6"
exit 1
@ -26,10 +26,10 @@ fi
SPARK_VERSION="${1}"
HADOOP_VERSION="${2}"
echo ${SPARK_VERSION} | grep "^1.[123].[0-9]" > /dev/null
if [ $? -eq 0 ]; then
echo "${SPARK_VERSION}" | grep "^1.[123].[0-9]" > /dev/null
if [[ "$?" -eq 0 ]]; then
echo "${SPARK_VERSION}" | grep "^1.[12].[0-9]" > /dev/null
if [ $? -eq 0 ]; then
if [[ "$?" -eq 0 ]]; then
SPARK_VER_RANGE="<=1.2"
else
SPARK_VER_RANGE="<=1.3"
@ -40,31 +40,73 @@ fi
set -xe
FWDIR=$(dirname "${BASH_SOURCE-$0}")
MAX_DOWNLOAD_TIME_SEC=590
FWDIR="$(dirname "${BASH_SOURCE-$0}")"
ZEPPELIN_HOME="$(cd "${FWDIR}/.."; pwd)"
export SPARK_HOME=${ZEPPELIN_HOME}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}
echo "SPARK_HOME is ${SPARK_HOME}"
if [ ! -d "${SPARK_HOME}" ]; then
if [ "${SPARK_VER_RANGE}" == "<=1.2" ]; then
# spark 1.1.x and spark 1.2.x can be downloaded from archive
STARTTIME=`date +%s`
timeout -s KILL 300 wget -q http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz
ENDTIME=`date +%s`
DOWNLOADTIME=$((ENDTIME-STARTTIME))
else
# spark 1.3.x and later can be downloaded from mirror
# get download address from mirror
MIRROR_INFO=$(curl -s "http://www.apache.org/dyn/closer.cgi/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz?asjson=1")
PREFFERED=$(echo "${MIRROR_INFO}" | grep preferred | sed 's/[^"]*.preferred.: .\([^"]*\).*/\1/g')
PATHINFO=$(echo "${MIRROR_INFO}" | grep path_info | sed 's/[^"]*.path_info.: .\([^"]*\).*/\1/g')
STARTTIME=`date +%s`
timeout -s KILL 590 wget -q "${PREFFERED}${PATHINFO}"
ENDTIME=`date +%s`
DOWNLOADTIME=$((ENDTIME-STARTTIME))
#######################################
# Downloads file from the givrn URL.
# Ties 3 times with 1s delay, 20s read and 15s connection timeouts.
# Globals:
# None
# Arguments:
# url - source URL
# Returns:
# None
#######################################
download_with_retry() {
local url="$1"
wget --retry-connrefused --waitretry=1 --read-timeout=20 --timeout=15 -t 3 "${url}"
if [[ "$?" -ne 0 ]]; then
echo "3 download attempts for ${url} failed"
fi
}
SPARK_CACHE=".spark-dist"
SPARK_ARCHIVE="spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}"
export SPARK_HOME="${ZEPPELIN_HOME}/${SPARK_ARCHIVE}"
echo "SPARK_HOME is ${SPARK_HOME}"
if [[ ! -d "${SPARK_HOME}" ]]; then
mkdir -p "${SPARK_CACHE}"
cd "${SPARK_CACHE}"
if [[ ! -f "${SPARK_ARCHIVE}.tgz" ]]; then
pwd
ls -la .
echo "${SPARK_CACHE} does not have ${SPARK_ARCHIVE} downloading ..."
# download archive if not cached
if [[ "${SPARK_VER_RANGE}" == "<=1.2" ]]; then
# spark 1.1.x and spark 1.2.x can be downloaded from archive
STARTTIME=`date +%s`
#timeout -s KILL "${MAX_DOWNLOAD_TIME_SEC}" wget "http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/${SPARK_ARCHIVE}.tgz"
download_with_retry "http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/${SPARK_ARCHIVE}.tgz"
ENDTIME=`date +%s`
DOWNLOADTIME="$((ENDTIME-STARTTIME))"
else
# spark 1.3.x and later can be downloaded from mirror
# get download address from mirror
MIRROR_INFO=$(curl -s "http://www.apache.org/dyn/closer.cgi/spark/spark-${SPARK_VERSION}/${SPARK_ARCHIVE}.tgz?asjson=1")
PREFFERED=$(echo "${MIRROR_INFO}" | grep preferred | sed 's/[^"]*.preferred.: .\([^"]*\).*/\1/g')
PATHINFO=$(echo "${MIRROR_INFO}" | grep path_info | sed 's/[^"]*.path_info.: .\([^"]*\).*/\1/g')
STARTTIME=`date +%s`
#timeout -s KILL "${MAX_DOWNLOAD_TIME_SEC}" wget -q "${PREFFERED}${PATHINFO}"
download_with_retry "${PREFFERED}${PATHINFO}"
ENDTIME=`date +%s`
DOWNLOADTIME="$((ENDTIME-STARTTIME))"
fi
fi
# extract archive in un-cached root, clean-up on failure
cp "${SPARK_ARCHIVE}.tgz" ..
cd ..
if ! tar zxf "${SPARK_ARCHIVE}.tgz" ; then
echo "Unable to extract ${SPARK_ARCHIVE}.tgz" >&2
rm -rf "${SPARK_ARCHIVE}"
rm -f "${SPARK_ARCHIVE}.tgz"
fi
tar zxf spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz
fi
set +xe

View file

@ -17,7 +17,7 @@
#
if [ $# -ne 2 ]; then
if [[ "$#" -ne 2 ]]; then
echo "usage) $0 [spark version] [hadoop version]"
echo " eg) $0 1.3.1 2.6"
exit 1
@ -26,10 +26,10 @@ fi
SPARK_VERSION="${1}"
HADOOP_VERSION="${2}"
echo ${SPARK_VERSION} | grep "^1.[123].[0-9]" > /dev/null
if [ $? -eq 0 ]; then
echo "${SPARK_VERSION}" | grep "^1.[123].[0-9]" > /dev/null
if [[ "$?" -eq 0 ]]; then
echo "${SPARK_VERSION}" | grep "^1.[12].[0-9]" > /dev/null
if [ $? -eq 0 ]; then
if [[ "$?" -eq 0 ]]; then
SPARK_VER_RANGE="<=1.2"
else
SPARK_VER_RANGE="<=1.3"
@ -38,17 +38,18 @@ else
SPARK_VER_RANGE=">1.3"
fi
set -xe
FWDIR=$(dirname "${BASH_SOURCE-$0}")
FWDIR="$(dirname "${BASH_SOURCE-$0}")"
ZEPPELIN_HOME="$(cd "${FWDIR}/.."; pwd)"
export SPARK_HOME=${ZEPPELIN_HOME}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}
SPARK_ARCHIVE="spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}"
export SPARK_HOME="${ZEPPELIN_HOME}/${SPARK_ARCHIVE}"
echo "SPARK_HOME is ${SPARK_HOME}"
# create PID dir. test case detect pid file so they can select active spark home dir for test
mkdir -p ${SPARK_HOME}/run
export SPARK_PID_DIR=${SPARK_HOME}/run
export SPARK_PID_DIR="${SPARK_HOME}/run"
mkdir -p "${SPARK_PID_DIR}"
# start
export SPARK_MASTER_PORT=7071

View file

@ -16,7 +16,7 @@
# limitations under the License.
#
if [ $# -ne 2 ]; then
if [[ "$#" -ne 2 ]]; then
echo "usage) $0 [spark version] [hadoop version]"
echo " eg) $0 1.3.1 2.6"
exit 1
@ -27,12 +27,15 @@ HADOOP_VERSION="${2}"
set -xe
FWDIR=$(dirname "${BASH_SOURCE-$0}")
FWDIR="$(dirname "${BASH_SOURCE-$0}")"
ZEPPELIN_HOME="$(cd "${FWDIR}/.."; pwd)"
export SPARK_HOME=${ZEPPELIN_HOME}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}
SPARK_ARCHIVE="spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}"
export SPARK_HOME="${ZEPPELIN_HOME}/${SPARK_ARCHIVE}"
echo "SPARK_HOME is ${SPARK_HOME}"
# set create PID dir
export SPARK_PID_DIR=${SPARK_HOME}/run
export SPARK_PID_DIR="${SPARK_HOME}/run"
${SPARK_HOME}/sbin/spark-daemon.sh stop org.apache.spark.deploy.worker.Worker 1
${SPARK_HOME}/sbin/stop-master.sh

View file

@ -69,6 +69,12 @@
</fileSet>
<fileSet>
<directory>../conf</directory>
<excludes>
<exclude>interpreter.json</exclude>
<exclude>zeppelin-env.cmd</exclude>
<exclude>zeppelin-env.sh</exclude>
<exclude>zeppelin-site.xml</exclude>
</excludes>
</fileSet>
<fileSet>
<directory>../interpreter</directory>

View file

@ -1,6 +1,6 @@
(Apache 2.0) nvd3.js v1.7.1 (http://nvd3.org/) - https://github.com/novus/nvd3/blob/v1.7.1/LICENSE.md
(Apache 2.0) gson v2.2 (com.google.code.gson:gson:jar:2.2 - https://github.com/google/gson) - https://github.com/google/gson/blob/gson-2.2/LICENSE
(Apache 2.0) Amazon Web Services SDK for Java v1.10.1 (https://aws.amazon.com/sdk-for-java/) - https://raw.githubusercontent.com/aws/aws-sdk-java/1.10.1/LICENSE.txt
(Apache 2.0) Amazon Web Services SDK for Java v1.10.62 (https://aws.amazon.com/sdk-for-java/) - https://raw.githubusercontent.com/aws/aws-sdk-java/1.10.62/LICENSE.txt
(Apache 2.0) JavaEWAH v0.7.9 (https://github.com/lemire/javaewah) - https://github.com/lemire/javaewah/blob/master/LICENSE-2.0.txt

View file

@ -35,6 +35,11 @@ import org.junit.Before;
import org.junit.Test;
public class RemoteAngularObjectTest implements AngularObjectRegistryListener {
private static final String INTERPRETER_SCRIPT =
System.getProperty("os.name").startsWith("Windows") ?
"../bin/interpreter.cmd" :
"../bin/interpreter.sh";
private InterpreterGroup intpGroup;
private HashMap<String, String> env;
private RemoteInterpreter intp;
@ -63,7 +68,7 @@ public class RemoteAngularObjectTest implements AngularObjectRegistryListener {
p,
"note",
MockInterpreterAngular.class.getName(),
new File("../bin/interpreter.sh").getAbsolutePath(),
new File(INTERPRETER_SCRIPT).getAbsolutePath(),
"fake",
"fakeRepo",
env,

View file

@ -38,6 +38,10 @@ import static org.junit.Assert.assertEquals;
* Test for remote interpreter output stream
*/
public class RemoteInterpreterOutputTestStream implements RemoteInterpreterProcessListener {
private static final String INTERPRETER_SCRIPT =
System.getProperty("os.name").startsWith("Windows") ?
"../bin/interpreter.cmd" :
"../bin/interpreter.sh";
private InterpreterGroup intpGroup;
private HashMap<String, String> env;
@ -61,7 +65,7 @@ public class RemoteInterpreterOutputTestStream implements RemoteInterpreterProce
new Properties(),
"note",
MockInterpreterOutputStream.class.getName(),
new File("../bin/interpreter.sh").getAbsolutePath(),
new File(INTERPRETER_SCRIPT).getAbsolutePath(),
"fake",
"fakeRepo",
env,

View file

@ -28,12 +28,16 @@ import org.apache.zeppelin.interpreter.thrift.RemoteInterpreterService.Client;
import org.junit.Test;
public class RemoteInterpreterProcessTest {
private static final String INTERPRETER_SCRIPT =
System.getProperty("os.name").startsWith("Windows") ?
"../bin/interpreter.cmd" :
"../bin/interpreter.sh";
@Test
public void testStartStop() {
InterpreterGroup intpGroup = new InterpreterGroup();
RemoteInterpreterProcess rip = new RemoteInterpreterProcess(
"../bin/interpreter.sh", "nonexists", "fakeRepo", new HashMap<String, String>(),
INTERPRETER_SCRIPT, "nonexists", "fakeRepo", new HashMap<String, String>(),
10 * 1000, null);
assertFalse(rip.isRunning());
assertEquals(0, rip.referenceCount());
@ -50,7 +54,7 @@ public class RemoteInterpreterProcessTest {
public void testClientFactory() throws Exception {
InterpreterGroup intpGroup = new InterpreterGroup();
RemoteInterpreterProcess rip = new RemoteInterpreterProcess(
"../bin/interpreter.sh", "nonexists", "fakeRepo", new HashMap<String, String>(),
INTERPRETER_SCRIPT, "nonexists", "fakeRepo", new HashMap<String, String>(),
mock(RemoteInterpreterEventPoller.class), 10 * 1000);
rip.reference(intpGroup);
assertEquals(0, rip.getNumActiveClient());

Some files were not shown because too many files have changed in this diff Show more