R Interpreter

Working on CI

CI

CI

CI

CI permissions

CI

Should be good

Triggering CI

squashme - force push

squashme CI

Removing unused dependency

squashme

squashme

squashme

squashme

squashme

squashme

License changes requested by @bzz

squashme
This commit is contained in:
Amos B. Elberg 2016-02-17 02:04:45 -05:00 committed by Amos Elb
parent 67e0fd554f
commit a08ec5b83d
60 changed files with 4554 additions and 189 deletions

View file

@ -16,23 +16,24 @@
language: java
sudo: false
cache:
directories:
- .spark-dist
matrix:
include:
# Test all modules
- jdk: "oraclejdk7"
env: SPARK_VER="1.6.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.6 -Phadoop-2.3 -Ppyspark -Pscalding" BUILD_FLAG="package -Pbuild-distr" TEST_FLAG="verify -Pusing-packaged-distr" TEST_PROJECTS=""
env: SPARK_VER="1.6.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.6 -Pr -Phadoop-2.3 -Ppyspark -Pscalding" BUILD_FLAG="package -Pbuild-distr" TEST_FLAG="verify -Pusing-packaged-distr" TEST_PROJECTS=""
# Test spark module for 1.5.2
- jdk: "oraclejdk7"
env: SPARK_VER="1.5.2" HADOOP_VER="2.3" PROFILE="-Pspark-1.5 -Phadoop-2.3 -Ppyspark" BUILD_FLAG="package -DskipTests" TEST_FLAG="verify" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark* -DfailIfNoTests=false"
env: SPARK_VER="1.5.2" HADOOP_VER="2.3" PROFILE="-Pspark-1.5 -Pr -Phadoop-2.3 -Ppyspark" BUILD_FLAG="package -DskipTests" TEST_FLAG="verify" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark,r -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark* -DfailIfNoTests=false"
# Test spark module for 1.4.1
- jdk: "oraclejdk7"
env: SPARK_VER="1.4.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.4 -Phadoop-2.3 -Ppyspark" BUILD_FLAG="package -DskipTests" TEST_FLAG="verify" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark* -DfailIfNoTests=false"
env: SPARK_VER="1.4.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.4 -Pr -Phadoop-2.3 -Ppyspark" BUILD_FLAG="package -DskipTests" TEST_FLAG="verify" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark,r -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark* -DfailIfNoTests=false"
# Test spark module for 1.3.1
- jdk: "oraclejdk7"
@ -46,12 +47,24 @@ matrix:
- jdk: "oraclejdk7"
env: SPARK_VER="1.1.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.1 -Phadoop-2.3 -Ppyspark" BUILD_FLAG="package -DskipTests" TEST_FLAG="verify" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark* -DfailIfNoTests=false"
# Test selenium with spark module for 1.6.0
# Test selenium with spark module for 1.6.1
- jdk: "oraclejdk7"
env: TEST_SELENIUM="true" SPARK_VER="1.6.0" HADOOP_VER="2.3" PROFILE="-Pspark-1.6 -Phadoop-2.3 -Ppyspark" BUILD_FLAG="package -DskipTests" TEST_FLAG="verify" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark -Dtest=org.apache.zeppelin.AbstractFunctionalSuite -DfailIfNoTests=false"
env: TEST_SELENIUM="true" SPARK_VER="1.6.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.6 -Phadoop-2.3 -Ppyspark" BUILD_FLAG="package -DskipTests" TEST_FLAG="verify" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark -Dtest=org.apache.zeppelin.AbstractFunctionalSuite -DfailIfNoTests=false"
addons:
apt:
sources:
- r-packages-precise
packages:
- r-base-dev
- r-cran-evaluate
- r-cran-base64enc
before_install:
- "ls -la .spark-dist"
- mkdir -p ~/R
- R -e "install.packages('knitr', repos = 'http://cran.us.r-project.org', lib='~/R')"
- export R_LIBS='~/R'
- "export DISPLAY=:99.0"
- "sh -e /etc/init.d/xvfb start"

14
LICENSE
View file

@ -244,4 +244,16 @@ Apache licenses
The following components are provided under the Apache License. See project link for details.
The text of each license is also included at licenses/LICENSE-[project]-[version].txt.
(Apache 2.0) Bootstrap v3.0.2 (http://getbootstrap.com/) - https://github.com/twbs/bootstrap/blob/v3.0.2/LICENSE
(Apache 2.0) Bootstrap v3.0.2 (http://getbootstrap.com/) - https://github.com/twbs/bootstrap/blob/v3.0.2/LICENSE
========================================================================
BSD 3-Clause licenses
========================================================================
The following components are provided under the BSD 3-Clause license. See file headers and project links for details.
(BSD 3 Clause) portions of rscala 1.0.6 (https://dahl.byu.edu/software/rscala/) - https://cran.r-project.org/web/packages/rscala/index.html
r/R/rzeppelin/R/{common.R, globals.R,protocol.R,rServer.R,scalaInterpreter.R,zzz.R }
r/src/main/scala/org/apache/zeppelin/rinterpreter/rscala/{Package.scala, RClient.scala}
(BSD 3 Clause) portions of Scala (http://www.scala-lang.org/download) - http://www.scala-lang.org/download/#License
r/src/main/scala/scala/Console.scala

View file

@ -85,7 +85,10 @@ if [[ "${INTERPRETER_ID}" == "spark" ]]; then
export SPARK_SUBMIT="${SPARK_HOME}/bin/spark-submit"
SPARK_APP_JAR="$(ls ${ZEPPELIN_HOME}/interpreter/spark/zeppelin-spark*.jar)"
# This will evantually passes SPARK_APP_JAR to classpath of SparkIMain
ZEPPELIN_CLASSPATH+=${SPARK_APP_JAR}
ZEPPELIN_CLASSPATH=${SPARK_APP_JAR}
# Need to add the R Interpreter
RZEPPELINPATH="$(ls ${ZEPPELIN_HOME}/interpreter/spark/zeppelin-zr*.jar)"
ZEPPELIN_CLASSPATH="${ZEPPELIN_CLASSPATH}:${RZEPPELINPATH}"
pattern="$SPARK_HOME/python/lib/py4j-*-src.zip"
py4j=($pattern)
@ -130,6 +133,8 @@ if [[ "${INTERPRETER_ID}" == "spark" ]]; then
ZEPPELIN_CLASSPATH+=":${HADOOP_CONF_DIR}"
fi
RZEPPELINPATH="$(ls ${ZEPPELIN_HOME}/interpreter/spark/zeppelin-zr*.jar)"
ZEPPELIN_CLASSPATH="${ZEPPELIN_CLASSPATH}:${RZEPPELINPATH}"
export SPARK_CLASSPATH+=":${ZEPPELIN_CLASSPATH}"
fi
elif [[ "${INTERPRETER_ID}" == "hbase" ]]; then

View file

@ -144,7 +144,7 @@
<property>
<name>zeppelin.interpreters</name>
<value>org.apache.zeppelin.spark.SparkInterpreter,org.apache.zeppelin.spark.PySparkInterpreter,org.apache.zeppelin.spark.SparkSqlInterpreter,org.apache.zeppelin.spark.DepInterpreter,org.apache.zeppelin.markdown.Markdown,org.apache.zeppelin.angular.AngularInterpreter,org.apache.zeppelin.shell.ShellInterpreter,org.apache.zeppelin.hive.HiveInterpreter,org.apache.zeppelin.tajo.TajoInterpreter,org.apache.zeppelin.file.HDFSFileInterpreter,org.apache.zeppelin.flink.FlinkInterpreter,org.apache.zeppelin.lens.LensInterpreter,org.apache.zeppelin.ignite.IgniteInterpreter,org.apache.zeppelin.ignite.IgniteSqlInterpreter,org.apache.zeppelin.cassandra.CassandraInterpreter,org.apache.zeppelin.geode.GeodeOqlInterpreter,org.apache.zeppelin.postgresql.PostgreSqlInterpreter,org.apache.zeppelin.jdbc.JDBCInterpreter,org.apache.zeppelin.phoenix.PhoenixInterpreter,org.apache.zeppelin.kylin.KylinInterpreter,org.apache.zeppelin.elasticsearch.ElasticsearchInterpreter,org.apache.zeppelin.scalding.ScaldingInterpreter,org.apache.zeppelin.alluxio.AlluxioInterpreter,org.apache.zeppelin.hbase.HbaseInterpreter</value>
<value>org.apache.zeppelin.spark.SparkInterpreter,org.apache.zeppelin.spark.PySparkInterpreter,org.apache.zeppelin.spark.SparkSqlInterpreter,org.apache.zeppelin.spark.DepInterpreter,org.apache.zeppelin.markdown.Markdown,org.apache.zeppelin.angular.AngularInterpreter,org.apache.zeppelin.shell.ShellInterpreter,org.apache.zeppelin.hive.HiveInterpreter,org.apache.zeppelin.tajo.TajoInterpreter,org.apache.zeppelin.file.HDFSFileInterpreter,org.apache.zeppelin.flink.FlinkInterpreter,org.apache.zeppelin.lens.LensInterpreter,org.apache.zeppelin.ignite.IgniteInterpreter,org.apache.zeppelin.ignite.IgniteSqlInterpreter,org.apache.zeppelin.cassandra.CassandraInterpreter,org.apache.zeppelin.geode.GeodeOqlInterpreter,org.apache.zeppelin.postgresql.PostgreSqlInterpreter,org.apache.zeppelin.jdbc.JDBCInterpreter,org.apache.zeppelin.phoenix.PhoenixInterpreter,org.apache.zeppelin.kylin.KylinInterpreter,org.apache.zeppelin.elasticsearch.ElasticsearchInterpreter,org.apache.zeppelin.scalding.ScaldingInterpreter,org.apache.zeppelin.alluxio.AlluxioInterpreter,org.apache.zeppelin.hbase.HbaseInterpreter,org.apache.zeppelin.rinterpreter.KnitR,org.apache.zeppelin.rinterpreter.RRepl</value>
<description>Comma separated interpreter configurations. First interpreter become a default</description>
</property>

100
docs/interpreter/r.md Normal file
View file

@ -0,0 +1,100 @@
---
layout: page
title: "R Interpreter"
description: ""
group: manual
---
{% include JB/setup %}
## R Interpreter
This is a the Apache (incubating) Zeppelin project, with the addition of support for the R programming language and R-spark integration.
### Requirements
Additional requirements for the R interpreter are:
* R 3.1 or later (earlier versions may work, but have not been tested)
* The `evaluate` R package.
For full R support, you will also need the following R packages:
* `knitr`
* `repr` -- available with `devtools::install_github("IRkernel/repr")`
* `htmltools` -- required for some interactive plotting
* `base64enc` -- required to view R base plots
### Configuration
To run Zeppelin with the R Interpreter, the SPARK_HOME environment variable must be set. The best way to do this is by editing `conf/zeppelin-env.sh`.
If it is not set, the R Interpreter will not be able to interface with Spark.
You should also copy `conf/zeppelin-site.xml.template` to `conf/zeppelin-site.xml`. That will ensure that Zeppelin sees the R Interpreter the first time it starts up.
### Using the R Interpreter
By default, the R Interpreter appears as two Zeppelin Interpreters, `%r` and `%knitr`.
`%r` will behave like an ordinary REPL. You can execute commands as in the CLI.
[![2+2](screenshots/repl2plus2.png)](screenshots/repl2plus2.png)
R base plotting is fully supported
[![replhist](screenshots/replhist.png)](screenshots/replhist.png)
If you return a data.frame, Zeppelin will attempt to display it using Zeppelin's built-in visualizations.
[![replhist](screenshots/replhead.png)](screenshots/replhead.png)
`%knitr` interfaces directly against `knitr`, with chunk options on the first line:
[![knitgeo](screenshots/knitgeo.png)](screenshots/knitgeo.png)
[![knitstock](screenshots/knitstock.png)](screenshots/knitstock.png)
[![knitmotion](screenshots/knitmotion.png)](screenshots/knitmotion.png)
The two interpreters share the same environment. If you define a variable from `%r`, it will be within-scope if you then make a call using `knitr`.
### Using SparkR & Moving Between Languages
If `SPARK_HOME` is set, the `SparkR` package will be loaded automatically:
[![sparkrfaithful](screenshots/sparkrfaithful.png)](screenshots/sparkrfaithful.png)
The Spark Context and SQL Context are created and injected into the local environment automatically as `sc` and `sql`.
The same context are shared with the `%spark`, `%sql` and `%pyspark` interpreters:
[![backtoscala](screenshots/backtoscala.png)](screenshots/backtoscala.png)
You can also make an ordinary R variable accessible in scala and Python:
[![varr1](screenshots/varr1.png)](screenshots/varr1.png)
And vice versa:
[![varscala](screenshots/varscala.png)](screenshots/varscala.png)
[![varr2](screenshots/varr2.png)](screenshots/varr2.png)
### Caveats & Troubleshooting
* Almost all issues with the R interpreter turned out to be caused by an incorrectly set `SPARK_HOME`. The R interpreter must load a version of the `SparkR` package that matches the running version of Spark, and it does this by searching `SPARK_HOME`. If Zeppelin isn't configured to interface with Spark in `SPARK_HOME`, the R interpreter will not be able to connect to Spark.
* The `knitr` environment is persistent. If you run a chunk from Zeppelin that changes a variable, then run the same chunk again, the variable has already been changed. Use immutable variables.
* (Note that `%spark.r` and `$r` are two different ways of calling the same interpreter, as are `%spark.knitr` and `%knitr`. By default, Zeppelin puts the R interpreters in the `%spark.` Interpreter Group.
* Using the `%r` interpreter, if you return a data.frame, HTML, or an image, it will dominate the result. So if you execute three commands, and one is `hist()`, all you will see is the histogram, not the results of the other commands. This is a Zeppelin limitation.
* If you return a data.frame (for instance, from calling `head()`) from the `%spark.r` interpreter, it will be parsed by Zeppelin's built-in data visualization system.
* Why `knitr` Instead of `rmarkdown`? Why no `htmlwidgets`? In order to support `htmlwidgets`, which has indirect dependencies, `rmarkdown` uses `pandoc`, which requires writing to and reading from disc. This makes it many times slower than `knitr`, which can operate entirely in RAM.
* Why no `ggvis` or `shiny`? Supporting `shiny` would require integrating a reverse-proxy into Zeppelin, which is a task.
* Max OS X & case-insensitive filesystem. If you try to install on a case-insensitive filesystem, which is the Mac OS X default, maven can unintentionally delete the install directory because `r` and `R` become the same subdirectory.
* Error `unable to start device X11` with the repl interpreter. Check your shell login scripts to see if they are adjusting the `DISPLAY` environment variable. This is common on some operating systems as a workaround for ssh issues, but can interfere with R plotting.
* akka Library Version or `TTransport` errors. This can happen if you try to run Zeppelin with a SPARK_HOME that has a version of Spark other than the one specified with `-Pspark-1.x` when Zeppelin was compiled.

Binary file not shown.

After

Width:  |  Height:  |  Size: 36 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 58 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 33 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 106 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 42 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 31 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 51 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

View file

@ -0,0 +1,29 @@
Copyright (c) 2013-2015, David B. Dahl, Brigham Young University
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
Neither the name of the <ORGANIZATION> nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View file

@ -0,0 +1,11 @@
Copyright (c) 2002-2016 EPFL
Copyright (c) 2011-2016 Lightbend, Inc. (formerly Typesafe, Inc.)
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
Neither the name of the EPFL nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

12
pom.xml
View file

@ -303,7 +303,6 @@
<excludes>org/apache/zeppelin/interpreter/thrift/*</excludes>
</configuration>
</execution>
</executions>
</plugin>
@ -460,6 +459,7 @@
<exclude>.github/*</exclude>
<exclude>.gitignore</exclude>
<exclude>.repository/</exclude>
<exclude>.Rhistory</exclude>
<exclude>**/*.diff</exclude>
<exclude>**/*.patch</exclude>
<exclude>**/*.avsc</exclude>
@ -520,6 +520,9 @@
<!-- docs (website) build target dir -->
<exclude>docs/_site/**</exclude>
<exclude>docs/Gemfile.lock</exclude>
<!-- compiled R packages (binaries) -->
<exclude>R/lib/**</exclude>
</excludes>
</configuration>
@ -674,6 +677,13 @@
</modules>
</profile>
<profile>
<id>r</id>
<modules>
<module>r</module>
</modules>
</profile>
<profile>
<id>scalding</id>
<modules>

41
r/R/install-dev.sh Executable file
View file

@ -0,0 +1,41 @@
#!/bin/bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# This scripts packages R files to create a package that can be loaded into R,
# and also installs necessary packages.
set -o pipefail
set -e
set -x
FWDIR="$(cd `dirname $0`; pwd)"
LIB_DIR="$FWDIR/../../R/lib"
mkdir -p $LIB_DIR
pushd $FWDIR > /dev/null
# Generate Rd files if devtools is installed
#Rscript -e ' if("devtools" %in% rownames(installed.packages())) { library(devtools); devtools::document(pkg="./pkg", roclets=c("rd")) }'
# Install SparkR to $LIB_DIR
R CMD INSTALL --library=$LIB_DIR $FWDIR/rzeppelin/
popd > /dev/null
set +x

28
r/R/rzeppelin/DESCRIPTION Normal file
View file

@ -0,0 +1,28 @@
Package: rzeppelin
Type: Package
Title: Interface from scala to R, based on rscala, for the Apache (Incubation) Zeppelin project
Version: 0.1.0
Date: 2015-12-01
Authors@R: c(person(given="David B.",family="Dahl",role=c("aut","cre"),email="dahl@stat.byu.edu"),
person(family="Scala developers",role="ctb",comment="see http://scala-lang.org/"))
URL: http://dahl.byu.edu/software/rscala/
Imports: utils,
evaluate
Suggests:
goolgeVis,
htmltools,
knitr,
rCharts,
repr,
SparkR,
base64enc
SystemRequirements: Scala (>= 2.10)
Description:
License: file LICENSE
NeedsCompilation: no
Packaged: 2015-05-15 13:36:01 UTC; dahl
Author: David B. Dahl [aut, cre],
Scala developers [ctb] (see http://scala-lang.org/)
Maintainer: Amos B. Elberg <amos.elberg@gmail.com>
Repository:
Date/Publication: 2015-12-01 21:50:02

14
r/R/rzeppelin/LICENSE Normal file
View file

@ -0,0 +1,14 @@
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

7
r/R/rzeppelin/NAMESPACE Normal file
View file

@ -0,0 +1,7 @@
import(utils)
export("rzeppelinPackage")
export("progress_zeppelin")
export(.z.put)
export(.z.get)
export(.z.input)

14
r/R/rzeppelin/R/common.R Normal file
View file

@ -0,0 +1,14 @@
strintrplt <- function(snippet,envir=parent.frame()) {
if ( ! is.character(snippet) ) stop("Character vector expected.")
if ( length(snippet) != 1 ) stop("Length of vector must be exactly one.")
m <- regexpr("@\\{([^\\}]+)\\}",snippet)
if ( m != -1 ) {
s1 <- substr(snippet,1,m-1)
s2 <- substr(snippet,m+2,m+attr(m,"match.length")-2)
s3 <- substr(snippet,m+attr(m,"match.length"),nchar(snippet))
strintrplt(paste(s1,paste(toString(eval(parse(text=s2),envir=envir)),collapse=" ",sep=""),s3,sep=""),envir)
} else snippet
}

View file

@ -0,0 +1,3 @@
lEtTeRs <- c(letters,LETTERS)
alphabet <- c(lEtTeRs,0:9)

View file

@ -0,0 +1,35 @@
UNSUPPORTED_TYPE <- 0L
INTEGER <- 1L
DOUBLE <- 2L
BOOLEAN <- 3L
STRING <- 4L
DATE <- 5L
DATETIME <- 6L
UNSUPPORTED_STRUCTURE <- 10L
NULLTYPE <- 11L
REFERENCE <- 12L
ATOMIC <- 13L
VECTOR <- 14L
MATRIX <- 15L
LIST <- 16L
DATAFRAME <- 17L
S3CLASS <- 18L
S4CLASS <- 19L
JOBJ <- 20L
EXIT <- 100L
RESET <- 101L
GC <- 102L
DEBUG <- 103L
EVAL <- 104L
SET <- 105L
SET_SINGLE <- 106L
SET_DOUBLE <- 107L
GET <- 108L
GET_REFERENCE <- 109L
DEF <- 110L
INVOKE <- 111L
SCALAP <- 112L
OK <- 1000L
ERROR <- 1001L
UNDEFINED_IDENTIFIER <- 1002L
CURRENT_SUPPORTED_SCALA_VERSION <- "2.10"

214
r/R/rzeppelin/R/rServer.R Normal file
View file

@ -0,0 +1,214 @@
rServe <- function(sockets) {
cc(sockets)
workspace <- sockets[['workspace']]
debug <- get("debug",envir=sockets[['env']])
while ( TRUE ) {
if ( debug ) cat("R DEBUG: Top of the loop waiting for a command.\n")
cmd <- rb(sockets,integer(0))
if ( cmd == EXIT ) {
if ( debug ) cat("R DEBUG: Got EXIT\n")
return()
} else if ( cmd == DEBUG ) {
if ( debug ) cat("R DEBUG: Got DEBUG\n")
newDebug <- ( rb(sockets,integer(0)) != 0 )
if ( debug != newDebug ) cat("R DEBUG: Debugging is now ",newDebug,"\n",sep="")
debug <- newDebug
assign("debug",debug,envir=sockets[['env']])
} else if ( cmd == EVAL ) {
if ( debug ) cat("R DEBUG: Got EVAL\n")
snippet <- rc(sockets)
output <- capture.output(result <- try(eval(parse(text=snippet),envir=workspace)))
if ( inherits(result,"try-error") ) {
wb(sockets,ERROR)
msg <- paste(c(output,attr(result,"condition")$message),collapse="\n")
wc(sockets,msg)
} else {
wb(sockets,OK)
output <- paste(output,collapse="\n")
wc(sockets,output)
}
assign(".rzeppelin.last.value",result,envir=workspace)
} else if ( cmd %in% c(SET,SET_SINGLE,SET_DOUBLE) ) {
if ( debug ) cat("R DEBUG: Got SET\n")
if ( cmd != SET ) index <- rc(sockets)
identifier <- rc(sockets)
dataStructure <- rb(sockets,integer(0))
if ( dataStructure == NULLTYPE ) {
if ( cmd == SET ) assign(identifier,NULL,envir=workspace)
else subassign(sockets,identifier,index,NULL,cmd==SET_SINGLE)
} else if ( dataStructure == ATOMIC ) {
dataType <- rb(sockets,integer(0))
if ( dataType == INTEGER ) value <- rb(sockets,integer(0))
else if ( dataType == DOUBLE ) value <- rb(sockets,double(0))
else if ( dataType == BOOLEAN ) value <- rb(sockets,integer(0)) != 0
else if ( dataType == STRING ) value <- rc(sockets)
# else if (dataType == DATE) value <- as.Date(rb(sockets,integer(0)), origin=as.Date("1970-01-01"))
else stop(paste("Unknown data type:",dataType))
if ( cmd == SET ) assign(identifier,value,envir=workspace)
else subassign(sockets,identifier,index,value,cmd==SET_SINGLE)
} else if ( dataStructure == VECTOR ) {
dataLength <- rb(sockets,integer(0))
dataType <- rb(sockets,integer(0))
if ( dataType == INTEGER ) value <- rb(sockets,integer(0),n=dataLength)
else if ( dataType == DOUBLE ) value <- rb(sockets,double(0),n=dataLength)
else if ( dataType == BOOLEAN ) value <- rb(sockets,integer(0),n=dataLength) != 0
else if ( dataType == STRING ) value <- sapply(1:dataLength,function(i) rc(sockets))
# else if ( dateType == DATE ) value <- as.Date(rb(sockets,integer(0), n = dataLength), origin=as.Date("1970-01-01"))
else stop(paste("Unknown data type:",dataType))
if ( cmd == SET ) assign(identifier,value,envir=workspace)
else subassign(sockets,identifier,index,value,cmd==SET_SINGLE)
} else if ( dataStructure == MATRIX ) {
dataNRow <- rb(sockets,integer(0))
dataNCol <- rb(sockets,integer(0))
dataLength <- dataNRow * dataNCol
dataType <- rb(sockets,integer(0))
if ( dataType == INTEGER ) value <- matrix(rb(sockets,integer(0),n=dataLength),nrow=dataNRow,byrow=TRUE)
else if ( dataType == DOUBLE ) value <- matrix(rb(sockets,double(0),n=dataLength),nrow=dataNRow,byrow=TRUE)
else if ( dataType == BOOLEAN ) value <- matrix(rb(sockets,integer(0),n=dataLength),nrow=dataNRow,byrow=TRUE) != 0
else if ( dataType == STRING ) value <- matrix(sapply(1:dataLength,function(i) rc(sockets)),nrow=dataNRow,byrow=TRUE)
# else if ( dateType == DATE) value <- matrix(as.Date(rb(sockets,integer(0),n=dataLength),
# origin = as.Date("1970-01-01")),nrow=dataNRow,byrow=TRUE)
else stop(paste("Unknown data type:",dataType))
if ( cmd == SET ) assign(identifier,value,envir=workspace)
else subassign(sockets,identifier,index,value,cmd==SET_SINGLE)
} else if ( dataStructure == REFERENCE ) {
otherIdentifier <- rc(sockets)
if ( exists(otherIdentifier,envir=workspace$.) ) {
wb(sockets,OK)
value <- get(otherIdentifier,envir=workspace$.)
if ( cmd == SET ) assign(identifier,value,envir=workspace)
else subassign(sockets,identifier,index,value,cmd==SET_SINGLE)
} else {
wb(sockets,UNDEFINED_IDENTIFIER)
}
} else stop(paste("Unknown data structure:",dataStructure))
} else if ( cmd == GET ) {
if ( debug ) cat("R DEBUG: Got GET\n")
identifier <- rc(sockets)
value <- tryCatch(get(identifier,envir=workspace),error=function(e) e)
if ( is.null(value) ) {
wb(sockets,NULLTYPE)
} else if ( inherits(value,"error") ) {
wb(sockets,UNDEFINED_IDENTIFIER)
} else if ( ! is.atomic(value) ) {
# This is where code for lists, data.frames, S3, and S4 classes must go
wb(sockets,UNSUPPORTED_STRUCTURE)
} else if ( is.vector(value) ) {
type <- checkType(value)
if ( ( length(value) == 1 ) && ( ! get("length.one.as.vector",envir=sockets[['env']]) ) ) {
wb(sockets,ATOMIC)
} else {
wb(sockets,VECTOR)
wb(sockets,length(value))
}
wb(sockets,type)
if ( type == STRING ) {
if ( length(value) > 0 ) for ( i in 1:length(value) ) wc(sockets,value[i])
} else {
if ( type == BOOLEAN ) wb(sockets,as.integer(value))
# else if (type == DATE) wb(sockets,as.integer(value))
else wb(sockets,value)
}
} else if ( is.matrix(value) ) {
type <- checkType(value)
wb(sockets,MATRIX)
wb(sockets,dim(value))
wb(sockets,type)
if ( nrow(value) > 0 ) for ( i in 1:nrow(value) ) {
if ( type == STRING ) {
if ( ncol(value) > 0 ) for ( j in 1:ncol(value) ) wc(sockets,value[i,j])
}
else if ( type == BOOLEAN ) wb(sockets,as.integer(value[i,]))
# else if (type == DATE) wb(sockets, as.integer(value[i,]))
else wb(sockets,value[i,])
}
} else {
wb(sockets,UNSUPPORTED_STRUCTURE)
}
} else if ( cmd == GET_REFERENCE ) {
if ( debug ) cat("R DEBUG: Got GET_REFERENCE\n")
identifier <- rc(sockets)
value <- tryCatch(get(identifier,envir=workspace),error=function(e) e)
if ( inherits(value,"error") ) {
wb(sockets,UNDEFINED_IDENTIFIER)
} else {
wb(sockets,REFERENCE)
wc(sockets,new.reference(value,workspace$.))
}
} else if ( cmd == GC ) {
if ( debug ) cat("R DEBUG: Got GC\n")
workspace$. <- new.env(parent=workspace)
} else stop(paste("Unknown command:",cmd))
flush(sockets[['socketIn']])
}
}
subassign <- function(sockets,x,i,value,single=TRUE) {
workspace <- sockets[['workspace']]
assign(".rzeppelin.set.value",value,envir=workspace)
brackets <- if ( single ) c("[","]") else c("[[","]]")
output <- capture.output(result <- try(eval(parse(text=paste0(x,brackets[1],i,brackets[2]," <- .rzeppelin.set.value")),envir=workspace)))
if ( inherits(result,"try-error") ) {
wb(sockets,ERROR)
output <- paste(paste(output,collapse="\n"),paste(attr(result,"condition")$message,collapse="\n"),sep="\n")
wc(sockets,output)
} else {
wb(sockets,OK)
}
rm(".reppelin.set.value",envir=workspace)
invisible(value)
}
new.reference <- function(value,envir) {
name <- ""
while ( ( name == "" ) || ( exists(name,envir=envir) ) ) {
name <- paste0(sample(lEtTeRs,1),paste0(sample(alphabet,7,replace=TRUE),collapse=""))
}
assign(name,value,envir=envir)
name
}
newSockets <- function (portsFilename, debug, timeout)
{
getPortNumbers <- function() {
delay <- 0.1
start <- proc.time()[3]
while (TRUE) {
if ((proc.time()[3] - start) > timeout)
stop("Timed out waiting for Scala to start.")
Sys.sleep(delay)
delay <- 1 * delay
if (file.exists(portsFilename)) {
line <- scan(portsFilename, n = 2, what = character(0),
quiet = TRUE)
if (length(line) > 0)
return(as.numeric(line))
}
}
}
ports <- getPortNumbers()
file.remove(portsFilename)
if (debug)
cat("R DEBUG: Trying to connect to port:", paste(ports,
collapse = ","), "\n")
socketConnectionIn <- socketConnection(port = ports[1], blocking = TRUE,
open = "ab", timeout = 2678400)
socketConnectionOut <- socketConnection(port = ports[2],
blocking = TRUE, open = "rb", timeout = 2678400)
functionCache <- new.env()
env <- new.env()
assign("open", TRUE, envir = env)
assign("debug", debug, envir = env)
assign("length.one.as.vector", FALSE, envir = env)
workspace <- new.env()
workspace$. <- new.env(parent = workspace)
result <- list(socketIn = socketConnectionIn, socketOut = socketConnectionOut,
env = env, workspace = workspace, functionCache = functionCache)
class(result) <- "ScalaInterpreter"
status <- rb(result, integer(0))
if ((length(status) == 0) || (status != OK))
stop("Error instantiating interpreter.")
wc(result, toString(packageVersion("rzeppelin")))
flush(result[["socketIn"]])
result
}

View file

@ -0,0 +1,95 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
.zeppenv <- new.env()
.z.ohandler = evaluate:::new_output_handler(
value = function(x) {
if (is.data.frame(x)) return(x)
if ("html" %in% class(x)) return(x)
if (require("htmltools") & require("knitr")) {
if ("htmlwidget" %in% class(x)) {
return(.z.show.htmlwidget(x))
}
}
if (isS4(x)) show(x)
else {
if (require("repr")) {
return(repr:::repr(x))
} else return(x)
}
}
)
# wrapper for evaluate
.z.valuate <- function(input) evaluate:::evaluate(
input = input,
envir =.zeppenv,
debug = FALSE,
output_handler =.z.ohandler,
stop_on_error = 0
)
# converts data.tables to the format needed for display in zeppelin
.z.table <- function(i) {
.zdfoutcon <- textConnection(".zdfout", open="w")
write.table(i,
col.names=TRUE, row.names=FALSE, sep="\t",
eol="\n", quote = FALSE, file = .zdfoutcon)
close(.zdfoutcon)
rm(.zdfoutcon)
.zdfout
}
.z.completion <- function(buf, cursor) {
utils:::.assignLinebuffer(buf)
utils:::.assignEnd(cursor)
utils:::.guessTokenFromLine()
utils:::.completeToken()
utils:::.retrieveCompletions()
}
.z.setProgress <- function(progress) SparkR:::callJMethod(.rContext, "setProgress", progress %% 100)
.z.incrementProgress <- function(increment = 1) SparkR:::callJMethod(.rContext, "incrementProgress", increment)
.z.input <- function(name) SparkR:::callJMethod(.zeppelinContext, "input", name)
.z.get <- function(name) {
isRDD <- SparkR:::callJStatic("org.apache.zeppelin.rinterpreter.RStatics", "testRDD", name)
obj <- SparkR:::callJStatic("org.apache.zeppelin.rinterpreter.RStatics", "getZ", name)
if (isRDD) SparkR:::RDD(obj)
else obj
}
.z.put <- function(name, object) {
if ("RDD" %in% class(object)) object <- SparkR:::getJRDD(object)
SparkR:::callJStatic("org.apache.zeppelin.rinterpreter.RStatics", "putZ", name, object)
}
.z.repr <- function(x) {
if (require(repr)) repr:::repr(x)
else toString(x)
}
progress_zeppelin <- function(...) {
list(init = function(x) .z.setProgress(0),
step = function() .z.incrementProgress,
term = function() {})
}

View file

@ -0,0 +1,123 @@
rzeppelinPackage <- function(pkgname) {
environmentOfDependingPackage <- parent.env(parent.frame())
E <- new.env(parent=environmentOfDependingPackage)
E$initialized <- FALSE
E$pkgname <- pkgname
assign("E",E,envir=environmentOfDependingPackage)
invisible()
}
# Private
checkType <- function(x) {
if ( is.integer(x) ) INTEGER
else if ( is.double(x) ) DOUBLE
else if ( is.logical(x) ) BOOLEAN
else if ( is.character(x) ) STRING
else if ( is.date(x)) DATE
else stop("Unsupported data type.")
}
checkType2 <- function(x) {
if ( is.integer(x) ) "Int"
else if ( is.double(x) ) "Double"
else if ( is.logical(x) ) "Boolean"
else if ( is.character(x) ) "String"
else if ( is.date(x) ) "Date"
else stop("Unsupported data type.")
}
convert <- function(x,t) {
if ( t == "Int" ) {
tt <- "atomic"
tm <- "integer"
loav <- FALSE
} else if ( t == "Double" ) {
tt <- "atomic"
tm <- "double"
loav <- FALSE
} else if ( t == "Boolean" ) {
tt <- "atomic"
tm <- "logical"
loav <- FALSE
} else if ( t == "String" ) {
tt <- "atomic"
tm <- "character"
loav <- FALSE
} else if ( t == "Array[Int]" ) {
tt <- "vector"
tm <- "integer"
loav <- TRUE
} else if ( t == "Array[Double]" ) {
tt <- "vector"
tm <- "double"
loav <- TRUE
} else if ( t == "Array[Boolean]" ) {
tt <- "vector"
tm <- "logical"
loav <- TRUE
} else if ( t == "Array[String]" ) {
tt <- "vector"
tm <- "character"
loav <- TRUE
} else if ( t == "Array[Array[Int]]" ) {
tt <- "matrix"
tm <- "integer"
loav <- TRUE
} else if ( t == "Array[Array[Double]]" ) {
tt <- "matrix"
tm <- "double"
loav <- TRUE
} else if ( t == "Array[Array[Boolean]]" ) {
tt <- "matrix"
tm <- "logical"
loav <- TRUE
} else if ( t == "Array[Array[String]]" ) {
tt <- "matrix"
tm <- "character"
loav <- TRUE
} else {
tt <- "reference"
tm <- "reference"
loav <- FALSE
}
v <- character(0)
if ( tt == "atomic" ) v <- c(v,sprintf("%s <- as.vector(%s)[1]",x,x))
else if ( tt == "vector" ) v <- c(v,sprintf("%s <- as.vector(%s)",x,x))
else if ( tt == "matrix" ) v <- c(v,sprintf("%s <- as.matrix(%s)",x,x))
if ( tm != "reference" ) v <- c(v,sprintf("storage.mode(%s) <- '%s'",x,tm))
if ( length(v) != 0 ) {
v <- c(sprintf("if ( ! inherits(%s,'ScalaInterpreterReference') ) {",x),paste(" ",v,sep=""),"}")
}
c(v,sprintf("intpSet(interpreter,'.',%s,length.one.as.vector=%s,quiet=TRUE)",x,loav))
}
cc <- function(c) {
if ( ! get("open",envir=c[['env']]) ) stop("The connection has already been closed.")
}
wb <- function(c,v) writeBin(v,c[['socketIn']],endian="big")
wc <- function(c,v) {
bytes <- charToRaw(v)
wb(c,length(bytes))
writeBin(bytes,c[['socketIn']],endian="big",useBytes=TRUE)
}
# Sockets should be blocking, but that contract is not fulfilled when other code uses functions from the parallel library. Program around their problem.
rb <- function(c,v,n=1L) {
r <- readBin(c[['socketOut']],what=v,n=n,endian="big")
if ( length(r) == n ) r
else c(r,rb(c,v,n-length(r)))
}
# Sockets should be blocking, but that contract is not fulfilled when other code uses functions from the parallel library. Program around their problem.
rc <- function(c) {
length <- rb(c,integer(0))
r <- as.raw(c())
while ( length(r) != length ) r <- c(r,readBin(c[['socketOut']],what="raw",n=length,endian="big"))
rawToChar(r)
}

9
r/R/rzeppelin/R/zzz.R Normal file
View file

@ -0,0 +1,9 @@
typeMap <- list()
typeMap[[INTEGER]] <- integer(0)
typeMap[[DOUBLE]] <- double(0)
typeMap[[BOOLEAN]] <- integer(0)
typeMap[[STRING]] <- character(0)
.onAttach <- function(libname, pkgname) {
}

282
r/_tools/checkstyle.xml Normal file
View file

@ -0,0 +1,282 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!DOCTYPE module PUBLIC
"-//Puppy Crawl//DTD Check Configuration 1.3//EN"
"http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
<!-- This is a checkstyle configuration file. For descriptions of what the
following rules do, please see the checkstyle configuration page at http://checkstyle.sourceforge.net/config.html -->
<module name="Checker">
<module name="FileTabCharacter">
<!-- Checks that there are no tab characters in the file. -->
</module>
<module name="NewlineAtEndOfFile">
<property name="lineSeparator" value="lf" />
</module>
<module name="RegexpSingleline">
<!-- Checks that FIXME is not used in comments. TODO is preferred. -->
<property name="format" value="((//.*)|(\*.*))FIXME" />
<property name="message"
value='TODO is preferred to FIXME. e.g. "TODO(johndoe): Refactor when v2 is released."' />
</module>
<module name="RegexpSingleline">
<!-- Checks that TODOs are named. (Actually, just that they are followed
by an open paren.) -->
<property name="format" value="((//.*)|(\*.*))TODO[^(]" />
<property name="message"
value='All TODOs should be named. e.g. "TODO(johndoe): Refactor when v2 is released."' />
</module>
<!-- <module name="JavadocPackage"> - Checks that each Java package has
a Javadoc file used for commenting. Only allows a package-info.java, not
package.html. </module> -->
<!-- All Java AST specific tests live under TreeWalker module. -->
<module name="TreeWalker">
<!-- IMPORT CHECKS -->
<module name="RedundantImport">
<!-- Checks for redundant import statements. -->
<property name="severity" value="error" />
</module>
<!-- <module name="ImportOrder"> Checks for out of order import statements
<property name="severity" value="warning"/> <property name="groups" value="com.google,android,junit,net,org,java,javax"/>
This ensures that static imports go first <property name="option" value="top"/>
<property name="tokens" value="STATIC_IMPORT, IMPORT"/> </module> -->
<!-- JAVADOC CHECKS -->
<!-- Checks for Javadoc comments. -->
<!-- See http://checkstyle.sf.net/config_javadoc.html -->
<module name="JavadocMethod">
<property name="scope" value="protected" />
<property name="severity" value="warning" />
<property name="allowMissingJavadoc" value="true" />
<property name="allowMissingParamTags" value="true" />
<property name="allowMissingReturnTag" value="true" />
<property name="allowMissingThrowsTags" value="true" />
<property name="allowThrowsTagsForSubclasses" value="true" />
<property name="allowUndeclaredRTE" value="true" />
</module>
<module name="JavadocType">
<property name="scope" value="protected" />
<property name="severity" value="error" />
</module>
<module name="JavadocStyle">
<property name="severity" value="warning" />
</module>
<!-- NAMING CHECKS -->
<!-- Item 38 - Adhere to generally accepted naming conventions -->
<module name="PackageName">
<!-- Validates identifiers for package names against the supplied expression. -->
<!-- Here the default checkstyle rule restricts package name parts to
seven characters, this is not in line with common practice at Google. -->
<property name="format" value="^[a-z]+(\.[a-z][a-z0-9]{1,})*$" />
<property name="severity" value="warning" />
</module>
<module name="TypeNameCheck">
<!-- Validates static, final fields against the expression "^[A-Z][a-zA-Z0-9]*$". -->
<metadata name="altname" value="TypeName" />
<property name="severity" value="warning" />
</module>
<module name="ConstantNameCheck">
<!-- Validates non-private, static, final fields against the supplied
public/package final fields "^[A-Z][A-Z0-9]*(_[A-Z0-9]+)*$". -->
<metadata name="altname" value="ConstantName" />
<property name="applyToPublic" value="true" />
<property name="applyToProtected" value="true" />
<property name="applyToPackage" value="true" />
<property name="applyToPrivate" value="false" />
<property name="format" value="^([A-Z][A-Z0-9]*(_[A-Z0-9]+)*|FLAG_.*)$" />
<message key="name.invalidPattern"
value="Variable ''{0}'' should be in ALL_CAPS (if it is a constant) or be private (otherwise)." />
<property name="severity" value="warning" />
</module>
<module name="StaticVariableNameCheck">
<!-- Validates static, non-final fields against the supplied expression
"^[a-z][a-zA-Z0-9]*_?$". -->
<metadata name="altname" value="StaticVariableName" />
<property name="applyToPublic" value="true" />
<property name="applyToProtected" value="true" />
<property name="applyToPackage" value="true" />
<property name="applyToPrivate" value="true" />
<property name="format" value="^[a-z][a-zA-Z0-9]*_?$" />
<property name="severity" value="warning" />
</module>
<module name="MemberNameCheck">
<!-- Validates non-static members against the supplied expression. -->
<metadata name="altname" value="MemberName" />
<property name="applyToPublic" value="true" />
<property name="applyToProtected" value="true" />
<property name="applyToPackage" value="true" />
<property name="applyToPrivate" value="true" />
<property name="format" value="^[a-z][a-zA-Z0-9]*$" />
<property name="severity" value="warning" />
</module>
<module name="MethodNameCheck">
<!-- Validates identifiers for method names. -->
<metadata name="altname" value="MethodName" />
<property name="format" value="^[a-z][a-zA-Z0-9]*(_[a-zA-Z0-9]+)*$" />
<property name="severity" value="warning" />
</module>
<module name="ParameterName">
<!-- Validates identifiers for method parameters against the expression
"^[a-z][a-zA-Z0-9]*$". -->
<property name="severity" value="warning" />
</module>
<module name="LocalFinalVariableName">
<!-- Validates identifiers for local final variables against the expression
"^[a-z][a-zA-Z0-9]*$". -->
<property name="severity" value="warning" />
</module>
<module name="LocalVariableName">
<!-- Validates identifiers for local variables against the expression
"^[a-z][a-zA-Z0-9]*$". -->
<property name="severity" value="warning" />
</module>
<!-- LENGTH and CODING CHECKS -->
<module name="LineLength">
<!-- Checks if a line is too long. -->
<property name="max"
value="${com.puppycrawl.tools.checkstyle.checks.sizes.LineLength.max}"
default="100" />
<property name="severity" value="error" />
<!-- The default ignore pattern exempts the following elements: - import
statements - long URLs inside comments -->
<property name="ignorePattern"
value="${com.puppycrawl.tools.checkstyle.checks.sizes.LineLength.ignorePattern}"
default="^(package .*;\s*)|(import .*;\s*)|( *\* *https?://.*)$" />
</module>
<module name="LeftCurly">
<!-- Checks for placement of the left curly brace ('{'). -->
<property name="severity" value="warning" />
</module>
<module name="RightCurly">
<!-- Checks right curlies on CATCH, ELSE, and TRY blocks are on the same
line. e.g., the following example is fine: <pre> if { ... } else </pre> -->
<!-- This next example is not fine: <pre> if { ... } else </pre> -->
<property name="option" value="same" />
<property name="severity" value="warning" />
</module>
<!-- Checks for braces around if and else blocks -->
<module name="NeedBraces">
<property name="severity" value="warning" />
<property name="tokens"
value="LITERAL_IF, LITERAL_ELSE, LITERAL_FOR, LITERAL_WHILE, LITERAL_DO" />
</module>
<module name="UpperEll">
<!-- Checks that long constants are defined with an upper ell. -->
<property name="severity" value="error" />
</module>
<module name="FallThrough">
<!-- Warn about falling through to the next case statement. Similar to
javac -Xlint:fallthrough, but the check is suppressed if a single-line comment
on the last non-blank line preceding the fallen-into case contains 'fall
through' (or some other variants which we don't publicized to promote consistency). -->
<property name="reliefPattern"
value="fall through|Fall through|fallthru|Fallthru|falls through|Falls through|fallthrough|Fallthrough|No break|NO break|no break|continue on" />
<property name="severity" value="error" />
</module>
<!-- MODIFIERS CHECKS -->
<module name="ModifierOrder">
<!-- Warn if modifier order is inconsistent with JLS3 8.1.1, 8.3.1, and
8.4.3. The prescribed order is: public, protected, private, abstract, static,
final, transient, volatile, synchronized, native, strictfp -->
</module>
<!-- WHITESPACE CHECKS -->
<module name="WhitespaceAround">
<!-- Checks that various tokens are surrounded by whitespace. This includes
most binary operators and keywords followed by regular or curly braces. -->
<property name="tokens"
value="ASSIGN, BAND, BAND_ASSIGN, BOR,
BOR_ASSIGN, BSR, BSR_ASSIGN, BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN,
EQUAL, GE, GT, LAND, LE, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE, LOR, LT, MINUS,
MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL, PLUS, PLUS_ASSIGN, QUESTION,
SL, SL_ASSIGN, SR_ASSIGN, STAR, STAR_ASSIGN" />
<property name="severity" value="error" />
</module>
<module name="WhitespaceAfter">
<!-- Checks that commas, semicolons and typecasts are followed by whitespace. -->
<property name="tokens" value="COMMA, SEMI, TYPECAST" />
</module>
<module name="NoWhitespaceAfter">
<!-- Checks that there is no whitespace after various unary operators.
Linebreaks are allowed. -->
<property name="tokens"
value="BNOT, DEC, DOT, INC, LNOT, UNARY_MINUS,
UNARY_PLUS" />
<property name="allowLineBreaks" value="true" />
<property name="severity" value="error" />
</module>
<module name="NoWhitespaceBefore">
<!-- Checks that there is no whitespace before various unary operators.
Linebreaks are allowed. -->
<property name="tokens" value="SEMI, DOT, POST_DEC, POST_INC" />
<property name="allowLineBreaks" value="true" />
<property name="severity" value="error" />
</module>
<module name="ParenPad">
<!-- Checks that there is no whitespace before close parens or after open
parens. -->
<property name="severity" value="warning" />
</module>
<module name="Indentation">
<!-- Checks code indentation -->
<property name="basicOffset" value="2" />
</module>
</module>
</module>

146
r/_tools/scalastyle.xml Normal file
View file

@ -0,0 +1,146 @@
<!--
~ Licensed to the Apache Software Foundation (ASF) under one or more
~ contributor license agreements. See the NOTICE file distributed with
~ this work for additional information regarding copyright ownership.
~ The ASF licenses this file to You under the Apache License, Version 2.0
~ (the "License"); you may not use this file except in compliance with
~ the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<!-- NOTE: This was taken and adapted from Apache Spark. -->
<!-- If you wish to turn off checking for a section of code, you can put a comment in the source
before and after the section, with the following syntax: -->
<!-- // scalastyle:off -->
<!-- ... -->
<!-- // naughty stuff -->
<!-- ... -->
<!-- // scalastyle:on -->
<scalastyle>
<name>Scalastyle standard configuration</name>
<check level="error" class="org.scalastyle.file.FileTabChecker" enabled="true"></check>
<!-- <check level="error" class="org.scalastyle.file.FileLengthChecker" enabled="true"> -->
<!-- <parameters> -->
<!-- <parameter name="maxFileLength"><![CDATA[800]]></parameter> -->
<!-- </parameters> -->
<!-- </check> -->
<check level="error" class="org.scalastyle.file.HeaderMatchesChecker" enabled="true">
<parameters>
<parameter name="header"><![CDATA[/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/]]></parameter>
</parameters>
</check>
<check level="error" class="org.scalastyle.scalariform.SpacesAfterPlusChecker" enabled="true"></check>
<check level="error" class="org.scalastyle.file.WhitespaceEndOfLineChecker" enabled="false"></check>
<check level="error" class="org.scalastyle.scalariform.SpacesBeforePlusChecker" enabled="true"></check>
<check level="error" class="org.scalastyle.file.FileLineLengthChecker" enabled="true">
<parameters>
<parameter name="maxLineLength"><![CDATA[100]]></parameter>
<parameter name="tabSize"><![CDATA[2]]></parameter>
<parameter name="ignoreImports">true</parameter>
</parameters>
</check>
<check level="error" class="org.scalastyle.scalariform.ClassNamesChecker" enabled="true">
<parameters>
<parameter name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter>
</parameters>
</check>
<check level="error" class="org.scalastyle.scalariform.ObjectNamesChecker" enabled="true">
<parameters>
<parameter name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter>
</parameters>
</check>
<check level="error" class="org.scalastyle.scalariform.PackageObjectNamesChecker" enabled="true">
<parameters>
<parameter name="regex"><![CDATA[^[a-z][A-Za-z]*$]]></parameter>
</parameters>
</check>
<check level="error" class="org.scalastyle.scalariform.EqualsHashCodeChecker" enabled="false"></check>
<!-- <check level="error" class="org.scalastyle.scalariform.IllegalImportsChecker" enabled="true"> -->
<!-- <parameters> -->
<!-- <parameter name="illegalImports"><![CDATA[sun._,java.awt._]]></parameter> -->
<!-- </parameters> -->
<!-- </check> -->
<check level="error" class="org.scalastyle.scalariform.ParameterNumberChecker" enabled="true">
<parameters>
<parameter name="maxParameters"><![CDATA[10]]></parameter>
</parameters>
</check>
<!-- <check level="error" class="org.scalastyle.scalariform.MagicNumberChecker" enabled="true"> -->
<!-- <parameters> -->
<!-- <parameter name="ignore"><![CDATA[-1,0,1,2,3]]></parameter> -->
<!-- </parameters> -->
<!-- </check> -->
<check level="error" class="org.scalastyle.scalariform.NoWhitespaceBeforeLeftBracketChecker" enabled="false"></check>
<check level="error" class="org.scalastyle.scalariform.NoWhitespaceAfterLeftBracketChecker" enabled="false"></check>
<!-- <check level="error" class="org.scalastyle.scalariform.ReturnChecker" enabled="true"></check> -->
<!-- <check level="error" class="org.scalastyle.scalariform.NullChecker" enabled="true"></check> -->
<!-- <check level="error" class="org.scalastyle.scalariform.NoCloneChecker" enabled="true"></check> -->
<!-- <check level="error" class="org.scalastyle.scalariform.NoFinalizeChecker" enabled="true"></check> -->
<!-- <check level="error" class="org.scalastyle.scalariform.CovariantEqualsChecker" enabled="true"></check> -->
<!-- <check level="error" class="org.scalastyle.scalariform.StructuralTypeChecker" enabled="true"></check> -->
<!-- <check level="error" class="org.scalastyle.file.RegexChecker" enabled="true"> -->
<!-- <parameters> -->
<!-- <parameter name="regex"><![CDATA[println]]></parameter> -->
<!-- </parameters> -->
<!-- </check> -->
<!-- <check level="error" class="org.scalastyle.scalariform.NumberOfTypesChecker" enabled="true"> -->
<!-- <parameters> -->
<!-- <parameter name="maxTypes"><![CDATA[30]]></parameter> -->
<!-- </parameters> -->
<!-- </check> -->
<!-- <check level="error" class="org.scalastyle.scalariform.CyclomaticComplexityChecker" enabled="true"> -->
<!-- <parameters> -->
<!-- <parameter name="maximum"><![CDATA[10]]></parameter> -->
<!-- </parameters> -->
<!-- </check> -->
<check level="error" class="org.scalastyle.scalariform.UppercaseLChecker" enabled="true"></check>
<check level="error" class="org.scalastyle.scalariform.SimplifyBooleanExpressionChecker" enabled="false"></check>
<check level="error" class="org.scalastyle.scalariform.IfBraceChecker" enabled="true">
<parameters>
<parameter name="singleLineAllowed"><![CDATA[true]]></parameter>
<parameter name="doubleLineAllowed"><![CDATA[true]]></parameter>
</parameters>
</check>
<!-- <check level="error" class="org.scalastyle.scalariform.MethodLengthChecker" enabled="true"> -->
<!-- <parameters> -->
<!-- <parameter name="maxLength"><![CDATA[50]]></parameter> -->
<!-- </parameters> -->
<!-- </check> -->
<!-- <check level="error" class="org.scalastyle.scalariform.MethodNamesChecker" enabled="true"> -->
<!-- <parameters> -->
<!-- <parameter name="regex"><![CDATA[^[a-z][A-Za-z0-9]*$]]></parameter> -->
<!-- </parameters> -->
<!-- </check> -->
<!-- <check level="error" class="org.scalastyle.scalariform.NumberOfMethodsInTypeChecker" enabled="true"> -->
<!-- <parameters> -->
<!-- <parameter name="maxMethods"><![CDATA[30]]></parameter> -->
<!-- </parameters> -->
<!-- </check> -->
<!-- <check level="error" class="org.scalastyle.scalariform.PublicMethodsHaveTypeChecker" enabled="true"></check> -->
<check level="error" class="org.scalastyle.file.NewLineAtEofChecker" enabled="true"></check>
<check level="error" class="org.scalastyle.file.NoNewLineAtEofChecker" enabled="false"></check>
</scalastyle>

396
r/pom.xml Normal file
View file

@ -0,0 +1,396 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
~ Licensed to the Apache Software Foundation (ASF) under one or more
~ contributor license agreements. See the NOTICE file distributed with
~ this work for additional information regarding copyright ownership.
~ The ASF licenses this file to You under the Apache License, Version 2.0
~ (the "License"); you may not use this file except in compliance with
~ the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="http://maven.apache.org/POM/4.0.0"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<artifactId>zeppelin</artifactId>
<groupId>org.apache.zeppelin</groupId>
<version>0.6.0-incubating-SNAPSHOT</version>
<relativePath>..</relativePath>
</parent>
<artifactId>zeppelin-zrinterpreter</artifactId>
<packaging>jar</packaging>
<name>Zeppelin: R Interpreter</name>
<description>R Interpreter for Zeppelin</description>
<properties>
<script.extension>.sh</script.extension>
<path.separator>/</path.separator>
<spark.version>1.4.1</spark.version>
<scala.version>2.10.4</scala.version>
<scala.binary.version>2.10</scala.binary.version>
</properties>
<developers>
<developer>
<id>amos</id>
<name>Amos Elberg</name>
</developer>
</developers>
<dependencies>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>zeppelin-spark-dependencies</artifactId>
<version>${project.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>zeppelin-interpreter</artifactId>
<version>${project.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>zeppelin-spark</artifactId>
<version>${project.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-repl_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-hive_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-catalyst_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>${scala.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.scalatest</groupId>
<artifactId>scalatest_${scala.binary.version}</artifactId>
<version>2.2.4</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.scalacheck</groupId>
<artifactId>scalacheck_${scala.binary.version}</artifactId>
<version>1.12.5</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
<scope>compile</scope>
</dependency>
<dependency>
<!-- jsoup HTML parser library @ http://jsoup.org/ -->
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>[1.8.0,)</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.datanucleus</groupId>
<artifactId>datanucleus-core</artifactId>
<scope>test</scope>
<version>3.2.10</version>
</dependency>
<dependency>
<groupId>org.datanucleus</groupId>
<artifactId>datanucleus-api-jdo</artifactId>
<version>3.2.6</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.datanucleus</groupId>
<artifactId>datanucleus-rdbms</artifactId>
<version>3.2.9</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.rat</groupId>
<artifactId>apache-rat-plugin</artifactId>
<configuration>
<excludes>
<exclude>lib/**</exclude>
<exclude>**/r/lib/**</exclude>
<!--The following files are subject to the BSD-license or variants,
as shown in the file headers-->
<exclude>**/R/rzeppelin/R/globals.R</exclude>
<exclude>**/R/rzeppelin/R/common.R</exclude>
<exclude>**/R/rzeppelin/R/protocol.R</exclude>
<exclude>**/R/rzeppelin/R/rServer.R</exclude>
<exclude>**/R/rzeppelin/R/scalaInterpreter.R</exclude>
<exclude>**/R/rzeppelin/R/zzz.R</exclude>
<exclude>**/scala/Console.scala</exclude>
<exclude>**/zeppelin/rinterpreter/rscala/Package.scala</exclude>
<exclude>**/zeppelin/rinterpreter/rscala/RClient.scala</exclude>
<!--End of files subject to BSD-license.-->
<exclude>**/.idea/</exclude>
<!--The following files are mechanical-->
<exclude>**/R/rzeppelin/DESCRIPTION</exclude>
<exclude>**/R/rzeppelin/NAMESPACE</exclude>
<!--End of mechanical R files-->
<exclude>**/*.iml</exclude>
<exclude>.gitignore</exclude>
<exclude>**/.settings/*</exclude>
<exclude>**/.classpath</exclude>
<exclude>**/.project</exclude>
<exclude>**/target/**</exclude>
<exclude>**/derby.log</exclude>
<exclude>**/metastore_db/</exclude>
<exclude>**/README.md</exclude>
<exclude>**/dependency-reduced-pom.xml</exclude>
</excludes>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-deploy-plugin</artifactId>
<version>2.7</version>
<configuration>
<skip>true</skip>
</configuration>
</plugin>
<plugin>
<artifactId>maven-enforcer-plugin</artifactId>
<version>1.3.1</version>
<executions>
<execution>
<id>enforce</id>
<phase>none</phase>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.17</version>
<configuration>
<forkCount>1</forkCount>
<reuseForks>false</reuseForks>
<argLine>-Xmx1024m -XX:MaxPermSize=256m</argLine>
<skipTests>true</skipTests>
</configuration>
</plugin>
<plugin>
<groupId>org.scalatest</groupId>
<artifactId>scalatest-maven-plugin</artifactId>
<version>1.0</version>
<configuration>
<reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
<junitxml>.</junitxml>
<filereports>testoutput.txt</filereports>
<parallel>false</parallel>
<forkMode>once</forkMode>
<systemProperties>
<scala.usejavacp>true</scala.usejavacp>
</systemProperties>
</configuration>
<executions>
<execution>
<id>test</id>
<goals>
<goal>test</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>2.3</version>
<configuration>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>org/datanucleus/**</exclude>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
<transformers>
<transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
<transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
<resource>reference.conf</resource>
</transformer>
</transformers>
</configuration>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
</execution>
</executions>
</plugin>
<!-- Deploy datanucleus jars to the interpreter/spark directory -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<version>2.8</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>copy</goal>
</goals>
<configuration>
<outputDirectory>${project.build.directory}/../../interpreter/spark</outputDirectory>
<overWriteReleases>false</overWriteReleases>
<overWriteSnapshots>false</overWriteSnapshots>
<overWriteIfNewer>true</overWriteIfNewer>
<artifactItems>
<artifactItem>
<groupId>${project.groupId}</groupId>
<artifactId>${project.artifactId}</artifactId>
<version>${project.version}</version>
<type>${project.packaging}</type>
</artifactItem>
</artifactItems>
</configuration>
</execution>
</executions>
</plugin>
<!-- Plugin to compile Scala code -->
<plugin>
<groupId>org.scala-tools</groupId>
<artifactId>maven-scala-plugin</artifactId>
<executions>
<execution>
<id>compile</id>
<goals>
<goal>compile</goal>
</goals>
<phase>compile</phase>
</execution>
<execution>
<id>test-compile</id>
<goals>
<goal>testCompile</goal>
</goals>
<phase>test-compile</phase>
</execution>
<execution>
<phase>process-resources</phase>
<goals>
<goal>compile</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
<executions>
<execution>
<phase>compile</phase>
<goals>
<goal>exec</goal>
</goals>
</execution>
</executions>
<configuration>
<executable>R${path.separator}install-dev${script.extension}</executable>
</configuration>
</plugin>
<plugin>
<artifactId>maven-clean-plugin</artifactId>
<version>2.6.1</version>
<configuration>
<filesets>
<fileset>
<directory>${project.build.directory}/../../R</directory>
<includes>
<include>**/lib/**</include>
</includes>
</fileset>
<fileset>
<directory>${project.build.directory}/../../interpreter/spark</directory>
<includes>
<include>**/zeppelin-zr*.jar</include>
</includes>
</fileset>
</filesets>
</configuration>
</plugin>
</plugins>
</build>
</project>

View file

@ -0,0 +1,135 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.rinterpreter;
import org.apache.zeppelin.interpreter.*;
import org.apache.zeppelin.scheduler.Scheduler;
import java.net.URL;
import java.util.List;
import java.util.Properties;
/**
* KnitR is a simple wrapper around KnitRInterpreter to handle that Zeppelin prefers
* to load interpreters through classes defined in Java with static methods that run
* when the class is loaded.
*
*/
public class KnitR extends Interpreter implements WrappedInterpreter {
KnitRInterpreter intp;
static {
Interpreter.register("knitr", "spark", KnitR.class.getName(),
RInterpreter.getProps()
);
}
public KnitR(Properties property, Boolean startSpark) {
super(property);
intp = new KnitRInterpreter(property, startSpark);
}
public KnitR(Properties property) {
this(property, true);
}
public KnitR() {
this(new Properties());
}
@Override
public void open() {
intp.open();
}
@Override
public void close() {
intp.close();
}
@Override
public InterpreterResult interpret(String s, InterpreterContext interpreterContext) {
return intp.interpret(s, interpreterContext);
}
@Override
public void cancel(InterpreterContext interpreterContext) {
intp.cancel(interpreterContext);
}
@Override
public FormType getFormType() {
return intp.getFormType();
}
@Override
public int getProgress(InterpreterContext interpreterContext) {
return intp.getProgress(interpreterContext);
}
@Override
public List<String> completion(String s, int i) {
return intp.completion(s, i);
}
@Override
public Interpreter getInnerInterpreter() {
return intp;
}
@Override
public Scheduler getScheduler() {
return intp.getScheduler();
}
@Override
public void setProperty(Properties property) {
super.setProperty(property);
intp.setProperty(property);
}
@Override
public Properties getProperty() {
return intp.getProperty();
}
@Override
public String getProperty(String key) {
return intp.getProperty(key);
}
@Override
public void setInterpreterGroup(InterpreterGroup interpreterGroup) {
super.setInterpreterGroup(interpreterGroup);
intp.setInterpreterGroup(interpreterGroup);
}
@Override
public InterpreterGroup getInterpreterGroup() {
return intp.getInterpreterGroup();
}
@Override
public void setClassloaderUrls(URL[] classloaderUrls) {
intp.setClassloaderUrls(classloaderUrls);
}
@Override
public URL[] getClassloaderUrls() {
return intp.getClassloaderUrls();
}
}

View file

@ -0,0 +1,135 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.rinterpreter;
import org.apache.zeppelin.interpreter.*;
import org.apache.zeppelin.scheduler.Scheduler;
import java.net.URL;
import java.util.List;
import java.util.Properties;
/**
* RRepl is a simple wrapper around RReplInterpreter to handle that Zeppelin prefers
* to load interpreters through classes defined in Java with static methods that run
* when the class is loaded.
*
*/
public class RRepl extends Interpreter implements WrappedInterpreter {
RReplInterpreter intp;
static {
Interpreter.register("r", "spark", RRepl.class.getName(),
RInterpreter.getProps()
);
}
public RRepl(Properties property, Boolean startSpark) {
super(property);
intp = new RReplInterpreter(property, startSpark);
}
public RRepl(Properties property) {
this(property, true);
}
public RRepl() {
this(new Properties());
}
@Override
public void open() {
intp.open();
}
@Override
public void close() {
intp.close();
}
@Override
public InterpreterResult interpret(String s, InterpreterContext interpreterContext) {
return intp.interpret(s, interpreterContext);
}
@Override
public void cancel(InterpreterContext interpreterContext) {
intp.cancel(interpreterContext);
}
@Override
public FormType getFormType() {
return intp.getFormType();
}
@Override
public int getProgress(InterpreterContext interpreterContext) {
return intp.getProgress(interpreterContext);
}
@Override
public List<String> completion(String s, int i) {
return intp.completion(s, i);
}
@Override
public Interpreter getInnerInterpreter() {
return intp;
}
@Override
public Scheduler getScheduler() {
return intp.getScheduler();
}
@Override
public void setProperty(Properties property) {
super.setProperty(property);
intp.setProperty(property);
}
@Override
public Properties getProperty() {
return intp.getProperty();
}
@Override
public String getProperty(String key) {
return intp.getProperty(key);
}
@Override
public void setInterpreterGroup(InterpreterGroup interpreterGroup) {
super.setInterpreterGroup(interpreterGroup);
intp.setInterpreterGroup(interpreterGroup);
}
@Override
public InterpreterGroup getInterpreterGroup() {
return intp.getInterpreterGroup();
}
@Override
public void setClassloaderUrls(URL[] classloaderUrls) {
intp.setClassloaderUrls(classloaderUrls);
}
@Override
public URL[] getClassloaderUrls() {
return intp.getClassloaderUrls();
}
}

View file

@ -0,0 +1,86 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
The purpose of this class is to provide something for R to call through the backend
to bootstrap.
*/
package org.apache.zeppelin.rinterpreter;
import org.apache.spark.SparkContext;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SQLContext;
import org.apache.zeppelin.spark.ZeppelinContext;
/**
* RStatics provides static class methods that can be accessed through the SparkR bridge
*
*/
public class RStatics {
private static SparkContext sc = null;
private static ZeppelinContext z = null;
private static SQLContext sql = null;
private static RContext rCon = null;
public static SparkContext setSC(SparkContext newSC) {
sc = newSC;
return sc;
}
public static ZeppelinContext setZ(ZeppelinContext newZ) {
z = newZ;
return z;
}
public static SQLContext setSQL(SQLContext newSQL) {
sql = newSQL;
return sql;
}
public static JavaSparkContext getJSC() {
return new JavaSparkContext(sc);
}
public static SparkContext getSC() {
return sc;
}
public static SQLContext getSQL() {
return sql;
}
public static Object getZ(String name) {
return z.get(name);
}
public static void putZ(String name, Object obj) {
z.put(name, obj);
}
public static RContext getRCon() {
return rCon;
}
public static RContext setrCon(RContext newrCon) {
rCon = newrCon;
return rCon;
}
public static Boolean testRDD(String name) {
Object x = z.get(name);
return (x instanceof org.apache.spark.api.java.JavaRDD);
}
}

View file

@ -0,0 +1,84 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
With grattitude to Shivaram for advice regarding how to get SparkR talking to an existing SparkContext in Java
*/
package org.apache.spark.api.r
class RBackendHelper(val backend : RBackend) {
def close() : Unit = backend.close()
var port : Int = 0
def init() : Int = {
port = backend.init()
port
}
val backendThread : Thread = new Thread("SparkR backend") {
override def run() {
backend.run()
}
}
def start() : Thread = {
if (port == 0) throw new RuntimeException("BackendHelper must be initialized before starting")
if (!backendThread.isAlive) backendThread.start()
backendThread
}
/*
The sequence is:
1. Before initializing spark in R, after loading library, Backend goes up and starts listening. (Note that its able to execute arbitrary methods!!! We can use it for
zeppelin context!!!)
2. Tell SparkR to make a connection to the backend, setting the EXISTING port to the one in backendhelper.
3. Track sparkR.init, but where it calls spark/R/pkg/R/sparkR.R calls org.apache.spark.api.r.RRDD.createSparkContext to get sc,
which is then returned as a jobj link, instead call RBackendHelper.getSC
3a Actually the object returned right now is of type JavaSparkContext ????? Need to understand this
4. SparkR for the other contexts calls related methods, org.apache.spark.sql.api.r.SQLUtils.createSQLContext and
org.apache.spark.sql.hive.HiveContext is just made new, with the jobj reference assigned to an object. We should track
the same pattern as above.
*/
}
object RBackendHelper {
/*
This function creates a new SparkContext, but does not register it, based on whatever properties are provided.
Its for testing purposes and should never be called
*/
// def buildSparkContext( props : Properties) : SparkContext = {
// val traversableProps : Traversable[(String, String)] = propertiesAsScalaMap(props)
// val conf = new SparkConf().setAll(traversableProps)
// conf.setIfMissing("spark.master", "local")
// conf.setIfMissing("spark.app.name", "ZeppelinRContext")
// conf.validateSettings()
// new SparkContext(conf)
// }
def apply() : RBackendHelper = new RBackendHelper(new RBackend())
}

View file

@ -0,0 +1,77 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.rinterpreter
// TODO: Capture the knitr progress bar
import java.util._
import org.apache.zeppelin.interpreter.InterpreterContext
import org.apache.zeppelin.interpreter.InterpreterResult
import org.apache.zeppelin.rinterpreter.rscala.RException
class KnitRInterpreter(property: Properties, startSpark : Boolean = true) extends RInterpreter(property, startSpark) {
def this(property : Properties) = {
this(property, true)
}
override def open: Unit = {
logger.trace("Opening knitr")
rContext.synchronized {
super.open
logger.debug("Knitr open, initial commands")
rContext.testRPackage("knitr", true, true, "Without knitr, the knitr interpreter cannot run.")
rContext.eval(
"""opts_knit$set(out.format = 'html',
|results='asis',
|progress = FALSE,
|self.contained = TRUE,
|verbose = FALSE,
|comment = NA,
|echo = FALSE,
|tidy = FALSE)
| """.stripMargin)
}
logger.info("KnitR: Finished initial commands")
}
def interpret(st: String, context: InterpreterContext): InterpreterResult = try {
logger.trace("interpreting" + st)
// need to convert st into an array of Strings within R
val commandSt : Array[String] = st.split("\n")
val chunkOptions = commandSt.head
val chunkLine : String = s"```{r $chunkOptions}"
val chunk : Array[String] = Array(chunkLine) ++: commandSt.tail ++: Array("```")
val out: String = rContext.synchronized {
rContext.set(".zeppknitrinput", chunk)
rContext.eval(".knitout <- knit2html(text=.zeppknitrinput, envir = rzeppelin:::.zeppenv)")
rContext.getS0(".knitout")
}
new InterpreterResult(InterpreterResult.Code.SUCCESS,
InterpreterResult.Type.HTML,
RInterpreter.processHTML(out)
)
} catch {
case r: RException => r.getInterpreterResult(st)
case e: Exception => new InterpreterResult(InterpreterResult.Code.ERROR, e.getMessage())
}
}

View file

@ -0,0 +1,321 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.rinterpreter
import java.io._
import java.nio.file.{Files, Paths}
import java.util.Properties
import org.apache.spark.SparkContext
import org.apache.spark.api.r.RBackendHelper
import org.apache.spark.sql.SQLContext
import org.apache.zeppelin.interpreter._
import org.apache.zeppelin.rinterpreter.rscala.RClient._
import org.apache.zeppelin.rinterpreter.rscala._
import org.apache.zeppelin.scheduler._
import org.apache.zeppelin.spark.{SparkInterpreter, ZeppelinContext}
import org.slf4j._
import scala.collection.JavaConversions._
// TODO: Setup rmr, etc.
// TODO: Stress-test spark. What happens on close? Etc.
private[rinterpreter] class RContext(private val sockets: ScalaSockets,
debug: Boolean) extends RClient(sockets.in, sockets.out, debug) {
private val logger: Logger = RContext.logger
lazy val getScheduler: Scheduler = SchedulerFactory.singleton().createOrGetFIFOScheduler(this.hashCode().toString)
val backend: RBackendHelper = RBackendHelper()
private var sc: Option[SparkContext] = None
private var sql: Option[SQLContext] = None
private var z: Option[ZeppelinContext] = None
val rPkgMatrix = collection.mutable.HashMap[String,Boolean]()
var isOpen: Boolean = false
private var isFresh : Boolean = true
private var property: Properties = null
private[rinterpreter] var sparkRStarted : Boolean = false
override def toString() : String = s"""${super.toString()}
|\t Open: $isOpen Fresh: $isFresh SparkStarted: $sparkRStarted
|\t Progress: $progress
|\t Sockets: ${sockets.toString()}
""".stripMargin
var progress: Int = 0
def getProgress: Int = {
return progress
}
def setProgress(i: Int) : Unit = {
progress = i % 100
}
def incrementProgress(i: Int) : Unit = {
progress = (progress + i) % 100
}
// handle properties this way so it can be a mutable object shared with the R Interpreters
def setProperty(properties: Properties): Unit = synchronized {
if (property == null) property = properties
else property.putAll(properties)
}
def open(startSpark : Option[SparkInterpreter]): Unit = synchronized {
if (isOpen && sparkRStarted) {
logger.trace("Reusing rContext.")
return
}
testRPackage("rzeppelin", fail = true, message =
"The rinterpreter cannot run without the rzeppelin package, which was included in your distribution.")
startSpark match {
case Some(x : SparkInterpreter) => {
sparkStartup(x)
}
case _ => logger.error("Could not find a SparkInterpreter")
}
isOpen = true
}
private def sparkStartup(startSpark : SparkInterpreter): Unit = try {
val sparkHome: String = System.getenv("SPARK_HOME") match {
case null => {
logger.error("SPARK_HOME is not set. The R Interpreter will start without Spark.")
return
}
case y => y
}
testRPackage("SparkR", fail = true, path = sparkHome)
if (startSpark.getSparkVersion() == null) throw new RuntimeException("No spark version")
if (!startSpark.getSparkVersion().isSparkRSupported) throw new RuntimeException("SparkR requires Spark 1.4 or later")
sc = Some(startSpark.getSparkContext())
sql = Some(startSpark.getSQLContext())
z = Some(startSpark.getZeppelinContext())
logger.trace("Registered Spark Contexts")
backend.init()
backend.start()
if (!backend.backendThread.isAlive) throw new RuntimeException("SparkR could not startup because the Backend Thread is not alive")
logger.trace("Started Spark Backend")
eval( s"""SparkR:::connectBackend("localhost", ${backend.port})""")
logger.trace("SparkR backend connected")
initializeSparkR(sc.get, sql.get, z.get)
logger.info("Initialized SparkR")
sparkRStarted = true
} catch {
case e: Exception => throw new RuntimeException("""
Could not connect R to Spark. If the stack trace is not clear,
check whether SPARK_HOME is set properly.""", e)
}
private def initializeSparkR(sc : SparkContext, sql : SQLContext, z : ZeppelinContext) : Unit = synchronized {
logger.trace("Getting a handle to the JavaSparkContext")
eval("assign(\".scStartTime\", as.integer(Sys.time()), envir = SparkR:::.sparkREnv)")
RStatics.setSC(sc)
eval(
"""
|assign(
|".sparkRjsc",
|SparkR:::callJStatic("org.apache.zeppelin.rinterpreter.RStatics",
| "getJSC"),
| envir = SparkR:::.sparkREnv)""".stripMargin)
eval("assign(\"sc\", get(\".sparkRjsc\", envir = SparkR:::.sparkREnv), envir=.GlobalEnv)")
logger.trace("Established SparkR Context")
val sqlEnvName = sql match {
case null => throw new RuntimeException("Tried to initialize SparkR without setting a SQLContext")
case x : org.apache.spark.sql.hive.HiveContext => ".sparkRHivesc"
case x : SQLContext => ".sparkRSQLsc"
}
RStatics.setSQL(sql)
eval(
s"""
|assign(
|"${sqlEnvName}",
|SparkR:::callJStatic("org.apache.zeppelin.rinterpreter.RStatics",
| "getSQL"),
| envir = SparkR:::.sparkREnv)""".stripMargin)
eval(
s"""
|assign("sqlContext",
|get("$sqlEnvName",
|envir = SparkR:::.sparkREnv),
|envir = .GlobalEnv)
""".stripMargin)
logger.trace("Proving spark")
val proof = evalS1("names(SparkR:::.sparkREnv)")
logger.info("Proof of spark is : " + proof.mkString)
RStatics.setZ(z)
RStatics.setrCon(this)
eval(
s"""
|assign(".rContext",
| SparkR:::callJStatic("org.apache.zeppelin.rinterpreter.RStatics",
| "getRCon"),
| envir = .GlobalEnv)
""".stripMargin
)
}
def close(): Unit = synchronized {
if (isOpen) {
if (sparkRStarted) {
try {
eval("SparkR:::sparkR.stop()")
} catch {
case e: RException => {}
case e: Exception => logger.error("Error closing SparkR", e)
}
}
try {
backend.close
backend.backendThread.stop()
} catch {
case e: Exception => logger.error("Error closing RContext ", e)
}
try {
exit()
} catch {
case e: Exception => logger.error("Shutdown error", e)
}
}
isOpen = false
}
private[rinterpreter] def testRPackage(pack: String,
fail: Boolean = false,
license: Boolean = false,
message: String = "",
path : String = ""): Boolean = synchronized {
rPkgMatrix.get(pack) match {
case Some(x: Boolean) => return x
case None => {}
}
evalB0( s"""require('$pack',quietly=TRUE, lib.loc="$path/R/lib/")""") match {
case true => {
rPkgMatrix.put(pack, true)
return (true)
}
case false => {
evalB0(s"require('$pack', quietly=TRUE)") match {
case true => {
rPkgMatrix.put(pack, true)
return true
}
case false => {
rPkgMatrix.put(pack, false)
val failMessage =
s"""The $pack package could not be loaded. """ + {
if (license) "We cannot install it for you because it is published under the GPL3 license."
else ""
} + message
logger.error(failMessage)
if (fail) throw new RException(failMessage)
return (false)
}
}
}
}
}
logger.info("RContext Finished Starting")
}
object RContext {
val logger: Logger = LoggerFactory.getLogger(getClass)
logger.trace("Inside the RContext Object")
private val contextMap : collection.mutable.HashMap[String, RContext] = collection.mutable.HashMap[String,RContext]()
// This function is here to work around inconsistencies in the SparkInterpreter startup sequence
// that caused testing issues
private[rinterpreter] def resetRcon() : Boolean = synchronized {
contextMap foreach((con) => {
con._2.close()
if (con._2.isOpen) throw new RuntimeException("Failed to close an existing RContext")
contextMap.remove(con._1)
})
return true
}
def apply( property: Properties, id : String): RContext = synchronized {
contextMap.get(id) match {
case Some(x : RContext) if x.isFresh || x.isOpen => return(x)
case Some(x : RContext) => resetRcon()
case _ => {}
}
val debug: Boolean = property.getProperty("rscala.debug", "false").toBoolean
val timeout: Int = property.getProperty("rscala.timeout", "60").toInt
import scala.sys.process._
logger.trace("Creating processIO")
var cmd: PrintWriter = null
val command = RClient.defaultRCmd +: RClient.defaultArguments
val processCmd = Process(command)
val processIO = new ProcessIO(
o => {
cmd = new PrintWriter(o)
},
reader("STDOUT DEBUG: "),
reader("STDERR DEBUG: "),
true
)
val portsFile = File.createTempFile("rscala-", "")
val processInstance = processCmd.run(processIO)
// Find rzeppelin
val libpath : String = if (Files.exists(Paths.get("R/lib"))) "R/lib"
else if (Files.exists(Paths.get("../R/lib"))) "../R/lib"
else throw new RuntimeException("Could not find rzeppelin - it must be in either R/lib or ../R/lib")
val snippet =
s"""
library(lib.loc="$libpath", rzeppelin)
rzeppelin:::rServe(rzeppelin:::newSockets('${portsFile.getAbsolutePath.replaceAll(File.separator, "/")}',debug=${if (debug) "TRUE" else "FALSE"},timeout=${timeout}))
q(save='no')"""
while (cmd == null) Thread.sleep(100)
cmd.println(snippet)
cmd.flush()
val sockets = RClient.makeSockets(portsFile.getAbsolutePath)
sockets.out.writeInt(RClient.Protocol.OK)
sockets.out.flush()
val packVersion = RClient.readString(sockets.in)
if (packVersion != org.apache.zeppelin.rinterpreter.rscala.Version) {
logger.warn("Connection to R started but versions don't match " + packVersion + " " + org.apache.zeppelin.rinterpreter.rscala.Version)
} else {
logger.trace("Connected to a new R Session")
}
val context = new RContext(sockets, debug)
context.setProperty(property)
contextMap.put(id, context)
context
}
}

View file

@ -0,0 +1,167 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.rinterpreter
import java.nio.file.{Files, Paths}
import java.util._
import org.apache.commons.codec.binary.{Base64, StringUtils}
import org.apache.zeppelin.interpreter.Interpreter.FormType
import org.apache.zeppelin.interpreter.remote.RemoteInterpreter
import org.apache.zeppelin.interpreter.{InterpreterContext, _}
import org.apache.zeppelin.scheduler.Scheduler
import org.apache.zeppelin.spark.SparkInterpreter
import org.jsoup.Jsoup
import org.jsoup.nodes._
import org.jsoup.select.Elements
import org.slf4j.{Logger, LoggerFactory}
import scala.collection.JavaConversions._
import scala.io.Source
abstract class RInterpreter(properties : Properties, startSpark : Boolean = true) extends Interpreter (properties) {
protected val logger: Logger = RInterpreter.logger
logger.trace("Initialising an RInterpreter of class " + this.getClass.getName)
def getrContext: RContext = rContext
protected lazy val rContext : RContext = synchronized{ RContext(property, this.getInterpreterGroup().getId()) }
def open: Unit = rContext.synchronized {
logger.trace("RInterpreter opening")
// We leave this as an Option[] because the pattern of nesting SparkInterpreter inside of wrapper interpreters
// has changed several times, and this allows us to fail more gracefully and handle those changes in one place.
val intp : Option[SparkInterpreter] = getSparkInterpreter()
rContext.open(intp)
rContext.testRPackage("htmltools", message =
"""You can continue
| without it, but some interactive visualizations will fail.
| You can install it from cran."""")
rContext.testRPackage("repr", license = true, message =
"""You can continue
| without it, but some forms of output from the REPL may not appear properly."""")
rContext.testRPackage("base64enc", license = true, message =
"""You can continue
| without it, but the REPL may not show images properly.""")
rContext.testRPackage("evaluate", license = false, message =
"""
|The REPL needs this to run. It can be installed from CRAN
| Thanks to Hadley Wickham and Yihui Xie for graciously making evaluate available under an Apache-compatible
| license so it can be used with this project.""".stripMargin)
}
def getSparkInterpreter() : Option[SparkInterpreter] =
getSparkInterpreter(getInterpreterInTheSameSessionByClassName(classOf[SparkInterpreter].getName))
def getSparkInterpreter(p1 : Interpreter) : Option[SparkInterpreter] = p1 match {
case s : SparkInterpreter => Some[SparkInterpreter](s)
case lzy : LazyOpenInterpreter => {
val p = lzy.getInnerInterpreter
lzy.open()
return getSparkInterpreter(p)
}
case w : WrappedInterpreter => return getSparkInterpreter(w.getInnerInterpreter)
case _ => None
}
def close: Unit = {
rContext.close
}
def getProgress(context :InterpreterContext): Int = rContext.getProgress
def cancel(context:InterpreterContext) : Unit = {}
def getFormType: FormType = {
return FormType.NONE
}
override def getScheduler : Scheduler = rContext.getScheduler
// TODO: completion is disabled because it could not be tested with current Zeppelin code
def completion(buf :String,cursor : Int) : List[String] = Array[String]("").toList
private[rinterpreter] def hiddenCompletion(buf :String,cursor : Int) : List[String] =
rContext.evalS1(s"""
|rzeppelin:::.z.completion("$buf", $cursor)
""".stripMargin).toList
}
object RInterpreter {
private val logger: Logger = LoggerFactory.getLogger(getClass)
logger.trace("logging inside the RInterpreter singleton")
// These are the additional properties we need on top of the ones provided by the spark interpreters
lazy val props: Map[String, InterpreterProperty] = new InterpreterPropertyBuilder()
.add("rhadoop.cmd", SparkInterpreter.getSystemDefault("rhadoop.cmd", "HADOOP_CMD", ""), "Usually /usr/bin/hadoop")
.add("rhadooop.streamingjar", SparkInterpreter.getSystemDefault("rhadoop.cmd", "HADOOP_STREAMING", ""), "Usually /usr/lib/hadoop/contrib/streaming/hadoop-streaming-<version>.jar")
.add("rscala.debug", SparkInterpreter.getSystemDefault("rscala.debug","RSCALA_DEBUG", "false"), "Whether to turn on rScala debugging") // TEST: Implemented but not tested
.add("rscala.timeout", SparkInterpreter.getSystemDefault("rscala.timeout","RSCALA_TIMEOUT", "60"), "Timeout for rScala") // TEST: Implemented but not tested
.build
def getProps() = {
props
}
// Some R interactive visualization packages insist on producing HTML that refers to javascript
// or css by file path. These functions are intended to load those files and embed them into the
// HTML as Base64 encoded DataURIs.
//FIXME These don't error but may not yet properly be converting script links
def scriptToBase(doc : Element, testAttr : String, tag : String, mime : String): Unit = {
val elems : Elements = doc.getElementsByTag(tag)
elems.filter( (e : Element) => {
e.attributes().hasKey(testAttr) && e.attr(testAttr) != "" && e.attr(testAttr).slice(0,1) == "/"
}).foreach(scriptToBase(_, testAttr, mime))
}
def scriptToBase(node : Element, field : String, mime : String) : Unit = node.attr(field) match {
case x if Files.exists(Paths.get(x)) => node.attr(field, dataURI(x, mime))
case x if x.slice(0,4) == "http" => {}
case x if x.contains("ajax") => {}
case x if x.contains("googleapis") => {}
case x if x.slice(0,2) == "//" => node.attr(field, "http:" + x)
case _ => {}
}
def dataURI(file : String, mime : String) : String = {
val data: String = Source.fromFile(file).getLines().mkString("\n")
s"""data:${mime};base64,""" + StringUtils.newStringUtf8(Base64.encodeBase64(data.getBytes(), false))
}
// The purpose here is to deal with knitr producing HTML with script and css tags outside the <body>
def processHTML(input: Array[String]): String = processHTML(input.mkString("\n"))
def processHTML(input: String) : String = {
val doc : Document = Jsoup.parse(input)
processHTML(doc)
}
private def processHTML(doc : Document) : String = {
val bod : Element = doc.body()
val head : Element = doc.head()
// Try to ignore the knitr script that breaks zeppelin display
head.getElementsByTag("script").reverseIterator.foreach(bod.prependChild(_))
// Only get css from head if it links to a file
head.getElementsByTag("link").foreach(bod.prependChild(_))
scriptToBase(bod, "href", "link", "text/css")
scriptToBase(bod, "src", "script", "text/javascript")
bod.html()
}
}

View file

@ -0,0 +1,98 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.rinterpreter
// TODO: Option for setting size of output images
import java.util._
import org.apache.zeppelin.interpreter.InterpreterContext
import org.apache.zeppelin.interpreter.InterpreterResult
import org.apache.zeppelin.rinterpreter.rscala.RException
class RReplInterpreter(property: Properties, startSpark : Boolean = true) extends RInterpreter(property, startSpark) {
// protected val rContext : RContext = RContext(property)
def this(property : Properties) = {
this(property, true)
}
private var firstCell : Boolean = true
def interpret(st: String, context: InterpreterContext): InterpreterResult = {
rContext.synchronized {
try {
import scala.collection.immutable._
logger.info("intrpreting " + st)
rContext.set(".zreplin", st.split("\n"))
rContext.eval(".zreplout <- rzeppelin:::.z.valuate(.zreplin)")
val reslength: Int = rContext.evalI0("length(.zreplout)")
logger.debug("Length of evaluate result is " + reslength)
var gotError: Boolean = false
val result: String = List.range(1, reslength + 1).map((i: Int) => {
rContext.evalS1(s"class(.zreplout[[${i}]])") match {
case x: Array[String] if x contains ("recordedplot") => {
if (!rContext.testRPackage("repr", fail = false)) return new InterpreterResult(InterpreterResult.Code.ERROR,
InterpreterResult.Type.TEXT,
"Displaying images through the R REPL requires the repr package, which is not installed.")
val image: String = rContext.evalS0(s"base64enc:::base64encode(repr:::repr_jpg(.zreplout[[${i}]]))")
return new InterpreterResult(InterpreterResult.Code.SUCCESS,
InterpreterResult.Type.IMG, image)
}
//TODO: If the html contains a link to a file, transform it to a DataURI. This is necessary for htmlwidgets
case x: Array[String] if x contains ("html") => {
val html: String = RInterpreter.processHTML(rContext.evalS0(s"rzeppelin:::.z.repr(.zreplout[[${i}]])"))
return new InterpreterResult(InterpreterResult.Code.SUCCESS,
InterpreterResult.Type.HTML, html)
}
case x: Array[String] if x contains "data.frame" => {
val table: Array[String] = rContext.evalS1( s"""rzeppelin:::.z.table(.zreplout[[${i}]])""")
return new InterpreterResult(InterpreterResult.Code.SUCCESS,
InterpreterResult.Type.TABLE,
table.mkString(sep = "\n"))
}
case x: Array[String] if x contains "source" => rContext.evalS0(s".zreplout[[${i}]]" + "$src")
case x: Array[String] if x contains "character" => rContext.evalS0(s".zreplout[[${i}]]")
case x: Array[String] if x contains "packageStartupMessage" => if (firstCell) {""} else {
firstCell = true
"Package Startup Message: " + rContext.evalS1(s"rzeppelin:::.z.repr(.zreplout[[${i}]])").mkString("\n")
}
case x: Array[String] if x contains "simpleError" => {
gotError = true
val error = rContext.evalS1(s"rzeppelin:::.z.repr(.zreplout[[${i}]])").mkString("\n")
logger.error(error)
error
}
case _ => rContext.evalS1(s"rzeppelin:::.z.repr(.zreplout[[${i}]])").mkString("\n")
}
}).mkString("\n\n")
return new InterpreterResult({
if (!gotError) InterpreterResult.Code.SUCCESS
else InterpreterResult.Code.ERROR
}, result)
} catch {
case re: RException => return re.getInterpreterResult(st)
case e: Exception => {
logger.error("Error interpreting " + st, e)
return new InterpreterResult(InterpreterResult.Code.ERROR, e.getMessage() + e.getStackTrace)
}
}
}
}
}

View file

@ -0,0 +1,29 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin
// TODO: Keeping interpreter out of spark interpreter group for now, until the context sharing code is developed
// TEST: rmr2
// TODO: Link getProgress to plyr (and knitr progress) if possible
// TODO: Forms?
// TODO: Completion? Currently commented-out
// TODO: It would be nice if the RReplInterpreter output svg instead of jpg, or intelligently selected, at a minimum
// TODO: Some kind of proxy may be necessary for shiny and widgets see http://blog.dominodatalab.com/interactive-dashboards-with-knitr-and-html-widgets/
package object rinterpreter {
}

View file

@ -0,0 +1,39 @@
package org.apache.zeppelin.rinterpreter
/*
Copyright (c) 2013-2015, David B. Dahl, Brigham Young University
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
Neither the name of the <ORGANIZATION> nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package object rscala {
val Version = "0.1.0"
val Date = "2015-05-15"
}

View file

@ -0,0 +1,527 @@
/*
Copyright (c) 2013-2015, David B. Dahl, Brigham Young University
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
Neither the name of the <ORGANIZATION> nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.apache.zeppelin.rinterpreter.rscala
// TODO: Add libdir to constructor
import java.io._
import java.net.{InetAddress, ServerSocket}
import org.slf4j.{Logger, LoggerFactory}
import scala.language.dynamics
class RClient (private val in: DataInputStream,
private val out: DataOutputStream,
val debug: Boolean = true) extends Dynamic {
var damagedState : Boolean = false
private val logger: Logger = LoggerFactory.getLogger(getClass)
case class RObjectRef(val reference : String) {
override def toString() = ".$"+reference
}
/** __For rscala developers only__: Sets whether debugging output should be displayed. */
def debug_=(v: Boolean) = {
if ( v != debug ) {
if ( debug ) logger.debug("Sending DEBUG request.")
out.writeInt(RClient.Protocol.DEBUG)
out.writeInt(if ( v ) 1 else 0)
out.flush()
}
}
def exit() = {
logger.debug("Sending EXIT request.")
out.writeInt(RClient.Protocol.EXIT)
out.flush()
}
def eval(snippet: String, evalOnly: Boolean = true): Any = try {
if (damagedState) throw new RException("Connection to R already damaged")
logger.debug("Sending EVAL request.")
out.writeInt(RClient.Protocol.EVAL)
RClient.writeString(out,snippet)
out.flush()
val status = in.readInt()
val output = RClient.readString(in)
if ( output != "" ) {
logger.error("R Error " + snippet + " " + output)
throw new RException(snippet, output)
}
if ( status != RClient.Protocol.OK ) throw new RException(snippet, output, "Error in R evaluation.")
if ( evalOnly ) null else get(".rzeppelin.last.value")._1
} catch {
case e : java.net.SocketException => {
logger.error("Connection to R appears to have shut down" + e)
damagedState = true
}
}
def evalI0(snippet: String) = { eval(snippet,true); getI0(".rzeppelin.last.value") }
def evalB0(snippet: String) = { eval(snippet,true); getB0(".rzeppelin.last.value") }
def evalS0(snippet: String) = { eval(snippet,true); getS0(".rzeppelin.last.value") }
def evalI1(snippet: String) = { eval(snippet,true); getI1(".rzeppelin.last.value") }
def evalB1(snippet: String) = { eval(snippet,true); getB1(".rzeppelin.last.value") }
def evalS1(snippet: String) = { eval(snippet,true); getS1(".rzeppelin.last.value") }
def evalR( snippet: String) = { eval(snippet,true); getR( ".rzeppelin.last.value") }
def set(identifier: String, value: Any): Unit = set(identifier,value,"",true)
def set(identifier: String, value: Any, index: String = "", singleBrackets: Boolean = true): Unit = {
if (damagedState) throw new RException("Connection to R already damaged")
val v = value
if ( index == "" ) out.writeInt(RClient.Protocol.SET)
else if ( singleBrackets ) {
out.writeInt(RClient.Protocol.SET_SINGLE)
RClient.writeString(out,index)
} else {
out.writeInt(RClient.Protocol.SET_DOUBLE)
RClient.writeString(out,index)
}
RClient.writeString(out,identifier)
if ( v == null || v.isInstanceOf[Unit] ) {
logger.debug("... which is null")
out.writeInt(RClient.Protocol.NULLTYPE)
out.flush()
if ( index != "" ) {
val status = in.readInt()
if ( status != RClient.Protocol.OK ) {
val output = RClient.readString(in)
if ( output != "" ) {
logger.error("R error setting " + output)
throw new RException(identifier + value.toString(), output, "Error setting")
}
throw new RException("Error in R evaluation. Set " + identifier + " to " + value.toString())
}
}
return
}
val c = v.getClass
logger.debug("... whose class is: "+c)
logger.debug("... and whose value is: "+v)
if ( c.isArray ) {
c.getName match {
case "[I" =>
val vv = v.asInstanceOf[Array[Int]]
out.writeInt(RClient.Protocol.VECTOR)
out.writeInt(vv.length)
out.writeInt(RClient.Protocol.INTEGER)
for ( i <- 0 until vv.length ) out.writeInt(vv(i))
case "[Z" =>
val vv = v.asInstanceOf[Array[Boolean]]
out.writeInt(RClient.Protocol.VECTOR)
out.writeInt(vv.length)
out.writeInt(RClient.Protocol.BOOLEAN)
for ( i <- 0 until vv.length ) out.writeInt(if ( vv(i) ) 1 else 0)
case "[Ljava.lang.String;" =>
val vv = v.asInstanceOf[Array[String]]
out.writeInt(RClient.Protocol.VECTOR)
out.writeInt(vv.length)
out.writeInt(RClient.Protocol.STRING)
for ( i <- 0 until vv.length ) RClient.writeString(out,vv(i))
case _ =>
throw new RException("Unsupported array type: "+c.getName)
}
} else {
c.getName match {
case "java.lang.Integer" =>
out.writeInt(RClient.Protocol.ATOMIC)
out.writeInt(RClient.Protocol.INTEGER)
out.writeInt(v.asInstanceOf[Int])
case "java.lang.Boolean" =>
out.writeInt(RClient.Protocol.ATOMIC)
out.writeInt(RClient.Protocol.BOOLEAN)
out.writeInt(if (v.asInstanceOf[Boolean]) 1 else 0)
case "java.lang.String" =>
out.writeInt(RClient.Protocol.ATOMIC)
out.writeInt(RClient.Protocol.STRING)
RClient.writeString(out,v.asInstanceOf[String])
case _ =>
throw new RException("Unsupported non-array type: "+c.getName)
}
}
out.flush()
if ( index != "" ) {
val status = in.readInt()
if ( status != RClient.Protocol.OK ) {
val output = RClient.readString(in)
if ( output != "" ) throw new RException(identifier + value.toString(), output, "Error setting")
throw new RException("Error in R evaluation.")
}
}
}
def get(identifier: String, asReference: Boolean = false): (Any,String) = {
logger.debug("Getting: "+identifier)
out.writeInt(if ( asReference ) RClient.Protocol.GET_REFERENCE else RClient.Protocol.GET)
RClient.writeString(out,identifier)
out.flush()
if ( asReference ) {
val r = in.readInt() match {
case RClient.Protocol.REFERENCE => (RObjectRef(RClient.readString(in)),"RObject")
case RClient.Protocol.UNDEFINED_IDENTIFIER =>
throw new RException("Undefined identifier")
}
return r
}
in.readInt match {
case RClient.Protocol.NULLTYPE =>
logger.debug("Getting null.")
(null,"Null")
case RClient.Protocol.ATOMIC =>
logger.debug("Getting atomic.")
in.readInt() match {
case RClient.Protocol.INTEGER => (in.readInt(),"Int")
case RClient.Protocol.DOUBLE => (in.readDouble(),"Double")
case RClient.Protocol.BOOLEAN => (( in.readInt() != 0 ),"Boolean")
case RClient.Protocol.STRING => (RClient.readString(in),"String")
case _ => throw new RException("Protocol error")
}
case RClient.Protocol.VECTOR =>
logger.debug("Getting vector...")
val length = in.readInt()
logger.debug("... of length: "+length)
in.readInt() match {
case RClient.Protocol.INTEGER => (Array.fill(length) { in.readInt() },"Array[Int]")
case RClient.Protocol.DOUBLE => (Array.fill(length) { in.readDouble() },"Array[Double]")
case RClient.Protocol.BOOLEAN => (Array.fill(length) { ( in.readInt() != 0 ) },"Array[Boolean]")
case RClient.Protocol.STRING => (Array.fill(length) { RClient.readString(in) },"Array[String]")
case _ => throw new RException("Protocol error")
}
case RClient.Protocol.MATRIX =>
logger.debug("Getting matrix...")
val nrow = in.readInt()
val ncol = in.readInt()
logger.debug("... of dimensions: "+nrow+","+ncol)
in.readInt() match {
case RClient.Protocol.INTEGER => (Array.fill(nrow) { Array.fill(ncol) { in.readInt() } },"Array[Array[Int]]")
case RClient.Protocol.DOUBLE => (Array.fill(nrow) { Array.fill(ncol) { in.readDouble() } },"Array[Array[Double]]")
case RClient.Protocol.BOOLEAN => (Array.fill(nrow) { Array.fill(ncol) { ( in.readInt() != 0 ) } },"Array[Array[Boolean]]")
case RClient.Protocol.STRING => (Array.fill(nrow) { Array.fill(ncol) { RClient.readString(in) } },"Array[Array[String]]")
case _ => throw new RException("Protocol error")
}
case RClient.Protocol.UNDEFINED_IDENTIFIER => throw new RException("Undefined identifier")
case RClient.Protocol.UNSUPPORTED_STRUCTURE => throw new RException("Unsupported data type")
case _ => throw new RException("Protocol error")
}
}
def getI0(identifier: String): Int = get(identifier) match {
case (a,"Int") => a.asInstanceOf[Int]
case (a,"Double") => a.asInstanceOf[Double].toInt
case (a,"Boolean") => if (a.asInstanceOf[Boolean]) 1 else 0
case (a,"String") => a.asInstanceOf[String].toInt
case (a,"Array[Int]") => a.asInstanceOf[Array[Int]](0)
case (a,"Array[Double]") => a.asInstanceOf[Array[Double]](0).toInt
case (a,"Array[Boolean]") => if ( a.asInstanceOf[Array[Boolean]](0) ) 1 else 0
case (a,"Array[String]") => a.asInstanceOf[Array[String]](0).toInt
case (_,tp) => throw new RException(s"Unable to cast ${tp} to Int")
}
def getD0(identifier: String): Double = get(identifier) match {
case (a,"Int") => a.asInstanceOf[Int].toDouble
case (a,"Double") => a.asInstanceOf[Double]
case (a,"Boolean") => if (a.asInstanceOf[Boolean]) 1.0 else 0.0
case (a,"String") => a.asInstanceOf[String].toDouble
case (a,"Array[Int]") => a.asInstanceOf[Array[Int]](0).toDouble
case (a,"Array[Double]") => a.asInstanceOf[Array[Double]](0)
case (a,"Array[Boolean]") => if ( a.asInstanceOf[Array[Boolean]](0) ) 1.0 else 0.0
case (a,"Array[String]") => a.asInstanceOf[Array[String]](0).toDouble
case (_,tp) => throw new RException(s"Unable to cast ${tp} to Double")
}
def getB0(identifier: String): Boolean = get(identifier) match {
case (a,"Int") => a.asInstanceOf[Int] != 0
case (a,"Boolean") => a.asInstanceOf[Boolean]
case (a,"String") => a.asInstanceOf[String].toLowerCase != "false"
case (a,"Array[Int]") => a.asInstanceOf[Array[Int]](0) != 0
case (a,"Array[Boolean]") => a.asInstanceOf[Array[Boolean]](0)
case (a,"Array[String]") => a.asInstanceOf[Array[String]](0).toLowerCase != "false"
case (_,tp) => throw new RException(s"Unable to cast ${tp} to Boolean")
}
def getS0(identifier: String): String = get(identifier) match {
case (a,"Int") => a.asInstanceOf[Int].toString
case (a,"Boolean") => a.asInstanceOf[Boolean].toString
case (a,"String") => a.asInstanceOf[String]
case (a,"Array[Int]") => a.asInstanceOf[Array[Int]](0).toString
case (a,"Array[Boolean]") => a.asInstanceOf[Array[Boolean]](0).toString
case (a,"Array[String]") => a.asInstanceOf[Array[String]](0)
case (_,tp) => throw new RException(s"Unable to cast ${tp} to String")
}
def getI1(identifier: String): Array[Int] = get(identifier) match {
case (a,"Int") => Array(a.asInstanceOf[Int])
case (a,"Boolean") => Array(if (a.asInstanceOf[Boolean]) 1 else 0)
case (a,"String") => Array(a.asInstanceOf[String].toInt)
case (a,"Array[Int]") => a.asInstanceOf[Array[Int]]
case (a,"Array[Boolean]") => a.asInstanceOf[Array[Boolean]].map(x => if (x) 1 else 0)
case (a,"Array[String]") => a.asInstanceOf[Array[String]].map(_.toInt)
case (_,tp) => throw new RException(s"Unable to cast ${tp} to Array[Int]")
}
def getB1(identifier: String): Array[Boolean] = get(identifier) match {
case (a,"Int") => Array(a.asInstanceOf[Int] != 0)
case (a,"Boolean") => Array(a.asInstanceOf[Boolean])
case (a,"String") => Array(a.asInstanceOf[String].toLowerCase != "false")
case (a,"Array[Int]") => a.asInstanceOf[Array[Int]].map(_ != 0)
case (a,"Array[Boolean]") => a.asInstanceOf[Array[Boolean]]
case (a,"Array[String]") => a.asInstanceOf[Array[String]].map(_.toLowerCase != "false")
case (_,tp) => throw new RException(s"Unable to cast ${tp} to Array[Boolean]")
}
def getS1(identifier: String): Array[String] = get(identifier) match {
case (a,"Int") => Array(a.asInstanceOf[Int].toString)
case (a,"Boolean") => Array(a.asInstanceOf[Boolean].toString)
case (a,"String") => Array(a.asInstanceOf[String])
case (a,"Array[Int]") => a.asInstanceOf[Array[Int]].map(_.toString)
case (a,"Array[Boolean]") => a.asInstanceOf[Array[Boolean]].map(_.toString)
case (a,"Array[String]") => a.asInstanceOf[Array[String]]
case (_,tp) => throw new RException(s"Unable to cast ${tp} to Array[String]")
}
def getR(identifier: String): RObjectRef = get(identifier,true) match {
case (a,"RObject") => a.asInstanceOf[RObjectRef]
case (_,tp) => throw new RException(s"Unable to cast ${tp} to RObject")
}
def gc(): Unit = {
logger.debug("Sending GC request.")
out.writeInt(RClient.Protocol.GC)
out.flush()
}
}
object RClient {
object Protocol {
// Data Types
val UNSUPPORTED_TYPE = 0
val INTEGER = 1
val DOUBLE = 2
val BOOLEAN = 3
val STRING = 4
val DATE = 5
val DATETIME = 6
// Data Structures
val UNSUPPORTED_STRUCTURE = 10
val NULLTYPE = 11
val REFERENCE = 12
val ATOMIC = 13
val VECTOR = 14
val MATRIX = 15
val LIST = 16
val DATAFRAME = 17
val S3CLASS = 18
val S4CLASS = 19
val JOBJ = 20
// Commands
val EXIT = 100
val RESET = 101
val GC = 102
val DEBUG = 103
val EVAL = 104
val SET = 105
val SET_SINGLE = 106
val SET_DOUBLE = 107
val GET = 108
val GET_REFERENCE = 109
val DEF = 110
val INVOKE = 111
val SCALAP = 112
// Result
val OK = 1000
val ERROR = 1001
val UNDEFINED_IDENTIFIER = 1002
// Misc.
val CURRENT_SUPPORTED_SCALA_VERSION = "2.10"
}
def writeString(out: DataOutputStream, string: String): Unit = {
val bytes = string.getBytes("UTF-8")
val length = bytes.length
out.writeInt(length)
out.write(bytes,0,length)
}
def readString(in: DataInputStream): String = {
val length = in.readInt()
val bytes = new Array[Byte](length)
in.readFully(bytes)
new String(bytes,"UTF-8")
}
def isMatrix[T](x: Array[Array[T]]): Boolean = {
if ( x.length != 0 ) {
val len = x(0).length
for ( i <- 1 until x.length ) {
if ( x(i).length != len ) return false
}
}
true
}
import scala.sys.process._
private val logger: Logger = LoggerFactory.getLogger(getClass)
val OS = sys.props("os.name").toLowerCase match {
case s if s.startsWith("""windows""") => "windows"
case s if s.startsWith("""linux""") => "linux"
case s if s.startsWith("""unix""") => "linux"
case s if s.startsWith("""mac""") => "macintosh"
case _ => throw new RException("Unrecognized OS")
}
val defaultArguments = OS match {
case "windows" => Array[String]("--vanilla","--silent","--slave","--ess")
case "linux" => Array[String]("--vanilla","--silent","--slave","--interactive")
case "unix" => Array[String]("--vanilla","--silent","--slave","--interactive")
case "macintosh" => Array[String]("--vanilla","--silent","--slave","--interactive")
}
lazy val defaultRCmd = OS match {
case "windows" => findROnWindows
case "linux" => """R"""
case "unix" => """R"""
case "macintosh" => """R"""
}
def findROnWindows: String = {
val NEWLINE = sys.props("line.separator")
var result : String = null
for ( root <- List("HKEY_LOCAL_MACHINE","HKEY_CURRENT_USER") ) {
val out = new StringBuilder()
val logger = ProcessLogger((o: String) => { out.append(o); out.append(NEWLINE) },(e: String) => {})
try {
("reg query \"" + root + "\\Software\\R-core\\R\" /v \"InstallPath\"") ! logger
val a = out.toString.split(NEWLINE).filter(_.matches("""^\s*InstallPath\s*.*"""))(0)
result = a.split("REG_SZ")(1).trim() + """\bin\R.exe"""
} catch {
case _ : Throwable =>
}
}
if ( result == null ) throw new RException("Cannot locate R using Windows registry.")
else return result
}
def reader(label: String)(input: InputStream) = {
val in = new BufferedReader(new InputStreamReader(input))
var line = in.readLine()
while ( line != null ) {
logger.debug(label+line)
line = in.readLine()
}
in.close()
}
class ScalaSockets(portsFilename: String) {
private val logger: Logger = LoggerFactory.getLogger(getClass)
val serverIn = new ServerSocket(0,0,InetAddress.getByName(null))
val serverOut = new ServerSocket(0,0,InetAddress.getByName(null))
locally {
logger.info("Trying to open ports filename: "+portsFilename)
val portNumberFile = new File(portsFilename)
val p = new PrintWriter(portNumberFile)
p.println(serverIn.getLocalPort+" "+serverOut.getLocalPort)
p.close()
logger.info("Servers are running on port "+serverIn.getLocalPort+" "+serverOut.getLocalPort)
}
val socketIn = serverIn.accept
logger.info("serverinaccept done")
val in = new DataInputStream(new BufferedInputStream(socketIn.getInputStream))
logger.info("in has been created")
val socketOut = serverOut.accept
logger.info("serverouacceptdone")
val out = new DataOutputStream(new BufferedOutputStream(socketOut.getOutputStream))
logger.info("out is done")
}
def makeSockets(portsFilename : String) = new ScalaSockets(portsFilename)
def apply(): RClient = apply(defaultRCmd)
def apply(rCmd: String, libdir : String = "",debug: Boolean = false, timeout: Int = 60): RClient = {
logger.debug("Creating processIO")
var cmd: PrintWriter = null
val command = rCmd +: defaultArguments
val processCmd = Process(command)
val processIO = new ProcessIO(
o => { cmd = new PrintWriter(o) },
reader("STDOUT DEBUG: "),
reader("STDERR DEBUG: "),
true
)
val portsFile = File.createTempFile("rscala-","")
val processInstance = processCmd.run(processIO)
val snippet = s"""
rscala:::rServe(rscala:::newSockets('${portsFile.getAbsolutePath.replaceAll(File.separator,"/")}',debug=${if ( debug ) "TRUE" else "FALSE"},timeout=${timeout}))
q(save='no')
"""
while ( cmd == null ) Thread.sleep(100)
logger.info("sending snippet " + snippet)
cmd.println(snippet)
cmd.flush()
val sockets = makeSockets(portsFile.getAbsolutePath)
sockets.out.writeInt(Protocol.OK)
sockets.out.flush()
try {
assert( readString(sockets.in) == org.apache.zeppelin.rinterpreter.rscala.Version )
} catch {
case _: Throwable => throw new RException("The scala and R versions of the package don't match")
}
apply(sockets.in,sockets.out)
}
/** __For rscala developers only__: Returns an instance of the [[RClient]] class. */
def apply(in: DataInputStream, out: DataOutputStream): RClient = new RClient(in,out)
}

View file

@ -0,0 +1,31 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.rinterpreter.rscala
import org.apache.zeppelin.interpreter.InterpreterResult
class RException(val snippet : String, val error : String, val message : String = "") extends Exception {
def this(snippet : String) = this(snippet, "")
def getInterpreterResult : InterpreterResult = new
InterpreterResult(InterpreterResult.Code.ERROR, message + "\n" + snippet + "\n" + error)
def getInterpreterResult(st : String) : InterpreterResult = new
InterpreterResult(InterpreterResult.Code.ERROR, message + "\n" + st + "\n" + error)
}

View file

@ -0,0 +1,491 @@
/* __ *\
Copyright (c) 2002-2016 EPFL
Copyright (c) 2011-2016 Lightbend, Inc. (formerly Typesafe, Inc.)
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
Neither the name of the EPFL nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF M MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package scala
import java.io.{BufferedReader, InputStream, InputStreamReader,
IOException, OutputStream, PrintStream, Reader}
import java.text.MessageFormat
import scala.util.DynamicVariable
/** Implements functionality for
* printing Scala values on the terminal as well as reading specific values.
* Also defines constants for marking up text on ANSI terminals.
*
* @author Matthias Zenger
* @version 1.0, 03/09/2003
*/
object Console {
/** Foreground color for ANSI black */
final val BLACK = "\033[30m"
/** Foreground color for ANSI red */
final val RED = "\033[31m"
/** Foreground color for ANSI green */
final val GREEN = "\033[32m"
/** Foreground color for ANSI yellow */
final val YELLOW = "\033[33m"
/** Foreground color for ANSI blue */
final val BLUE = "\033[34m"
/** Foreground color for ANSI magenta */
final val MAGENTA = "\033[35m"
/** Foreground color for ANSI cyan */
final val CYAN = "\033[36m"
/** Foreground color for ANSI white */
final val WHITE = "\033[37m"
/** Background color for ANSI black */
final val BLACK_B = "\033[40m"
/** Background color for ANSI red */
final val RED_B = "\033[41m"
/** Background color for ANSI green */
final val GREEN_B = "\033[42m"
/** Background color for ANSI yellow */
final val YELLOW_B = "\033[43m"
/** Background color for ANSI blue */
final val BLUE_B = "\033[44m"
/** Background color for ANSI magenta */
final val MAGENTA_B = "\033[45m"
/** Background color for ANSI cyan */
final val CYAN_B = "\033[46m"
/** Background color for ANSI white */
final val WHITE_B = "\033[47m"
/** Reset ANSI styles */
final val RESET = "\033[0m"
/** ANSI bold */
final val BOLD = "\033[1m"
/** ANSI underlines */
final val UNDERLINED = "\033[4m"
/** ANSI blink */
final val BLINK = "\033[5m"
/** ANSI reversed */
final val REVERSED = "\033[7m"
/** ANSI invisible */
final val INVISIBLE = "\033[8m"
// From Scala 2.10.5
// Start of rscala patch which only takes effect if RSCALA_TUNNELING environment variable is TRUE.
val baosOut = new java.io.ByteArrayOutputStream()
val baosErr = new java.io.ByteArrayOutputStream()
val psOut = new java.io.PrintStream(baosOut,true)
val psErr = new java.io.PrintStream(baosErr,true)
val originalOut = java.lang.System.out
val originalErr = java.lang.System.err
try {
if ( sys.env("RSCALA_TUNNELING") == "TRUE" ) {
java.lang.System.setOut(psOut)
java.lang.System.setErr(psErr)
}
} catch {
case _: Throwable =>
}
// End of rscala patch.
private val outVar = new DynamicVariable[PrintStream](java.lang.System.out)
private val errVar = new DynamicVariable[PrintStream](java.lang.System.err)
private val inVar = new DynamicVariable[BufferedReader](
new BufferedReader(new InputStreamReader(java.lang.System.in)))
/** The default output, can be overridden by `setOut` */
def out = outVar.value
/** The default error, can be overridden by `setErr` */
def err = errVar.value
/** The default input, can be overridden by `setIn` */
def in = inVar.value
/** Sets the default output stream.
*
* @param out the new output stream.
*/
def setOut(out: PrintStream) { outVar.value = out }
/** Sets the default output stream for the duration
* of execution of one thunk.
*
* @example {{{
* withOut(Console.err) { println("This goes to default _error_") }
* }}}
*
* @param out the new output stream.
* @param thunk the code to execute with
* the new output stream active
* @return the results of `thunk`
* @see `withOut[T](out:OutputStream)(thunk: => T)`
*/
def withOut[T](out: PrintStream)(thunk: =>T): T =
outVar.withValue(out)(thunk)
/** Sets the default output stream.
*
* @param out the new output stream.
*/
def setOut(out: OutputStream): Unit =
setOut(new PrintStream(out))
/** Sets the default output stream for the duration
* of execution of one thunk.
*
* @param out the new output stream.
* @param thunk the code to execute with
* the new output stream active
* @return the results of `thunk`
* @see `withOut[T](out:PrintStream)(thunk: => T)`
*/
def withOut[T](out: OutputStream)(thunk: =>T): T =
withOut(new PrintStream(out))(thunk)
/** Sets the default error stream.
*
* @param err the new error stream.
*/
def setErr(err: PrintStream) { errVar.value = err }
/** Set the default error stream for the duration
* of execution of one thunk.
* @example {{{
* withErr(Console.out) { println("This goes to default _out_") }
* }}}
*
* @param err the new error stream.
* @param thunk the code to execute with
* the new error stream active
* @return the results of `thunk`
* @see `withErr[T](err:OutputStream)(thunk: =>T)`
*/
def withErr[T](err: PrintStream)(thunk: =>T): T =
errVar.withValue(err)(thunk)
/** Sets the default error stream.
*
* @param err the new error stream.
*/
def setErr(err: OutputStream): Unit =
setErr(new PrintStream(err))
/** Sets the default error stream for the duration
* of execution of one thunk.
*
* @param err the new error stream.
* @param thunk the code to execute with
* the new error stream active
* @return the results of `thunk`
* @see `withErr[T](err:PrintStream)(thunk: =>T)`
*/
def withErr[T](err: OutputStream)(thunk: =>T): T =
withErr(new PrintStream(err))(thunk)
/** Sets the default input stream.
*
* @param reader specifies the new input stream.
*/
def setIn(reader: Reader) {
inVar.value = new BufferedReader(reader)
}
/** Sets the default input stream for the duration
* of execution of one thunk.
*
* @example {{{
* val someFile:Reader = openFile("file.txt")
* withIn(someFile) {
* // Reads a line from file.txt instead of default input
* println(readLine)
* }
* }}}
*
* @param thunk the code to execute with
* the new input stream active
*
* @return the results of `thunk`
* @see `withIn[T](in:InputStream)(thunk: =>T)`
*/
def withIn[T](reader: Reader)(thunk: =>T): T =
inVar.withValue(new BufferedReader(reader))(thunk)
/** Sets the default input stream.
*
* @param in the new input stream.
*/
def setIn(in: InputStream) {
setIn(new InputStreamReader(in))
}
/** Sets the default input stream for the duration
* of execution of one thunk.
*
* @param in the new input stream.
* @param thunk the code to execute with
* the new input stream active
* @return the results of `thunk`
* @see `withIn[T](reader:Reader)(thunk: =>T)`
*/
def withIn[T](in: InputStream)(thunk: =>T): T =
withIn(new InputStreamReader(in))(thunk)
/** Prints an object to `out` using its `toString` method.
*
* @param obj the object to print; may be null.
*/
def print(obj: Any) {
out.print(if (null == obj) "null" else obj.toString())
}
/** Flushes the output stream. This function is required when partial
* output (i.e. output not terminated by a newline character) has
* to be made visible on the terminal.
*/
def flush() { out.flush() }
/** Prints a newline character on the default output.
*/
def println() { out.println() }
/** Prints out an object to the default output, followed by a newline character.
*
* @param x the object to print.
*/
def println(x: Any) { out.println(x) }
/** Prints its arguments as a formatted string to the default output,
* based on a string pattern (in a fashion similar to printf in C).
*
* The interpretation of the formatting patterns is described in
* <a href="" target="contentFrame" class="java/util/Formatter">
* `java.util.Formatter`</a>.
*
* @param text the pattern for formatting the arguments.
* @param args the arguments used to instantiating the pattern.
* @throws java.lang.IllegalArgumentException if there was a problem with the format string or arguments
*/
def printf(text: String, args: Any*) { out.print(text format (args : _*)) }
/** Read a full line from the default input. Returns `null` if the end of the
* input stream has been reached.
*
* @return the string read from the terminal or null if the end of stream was reached.
*/
def readLine(): String = in.readLine()
/** Print formatted text to the default output and read a full line from the default input.
* Returns `null` if the end of the input stream has been reached.
*
* @param text the format of the text to print out, as in `printf`.
* @param args the parameters used to instantiate the format, as in `printf`.
* @return the string read from the default input
*/
def readLine(text: String, args: Any*): String = {
printf(text, args: _*)
readLine()
}
/** Reads a boolean value from an entire line of the default input.
* Has a fairly liberal interpretation of the input.
*
* @return the boolean value read, or false if it couldn't be converted to a boolean
* @throws java.io.EOFException if the end of the input stream has been reached.
*/
def readBoolean(): Boolean = {
val s = readLine()
if (s == null)
throw new java.io.EOFException("Console has reached end of input")
else
s.toLowerCase() match {
case "true" => true
case "t" => true
case "yes" => true
case "y" => true
case _ => false
}
}
/** Reads a byte value from an entire line of the default input.
*
* @return the Byte that was read
* @throws java.io.EOFException if the end of the
* input stream has been reached.
* @throws java.lang.NumberFormatException if the value couldn't be converted to a Byte
*/
def readByte(): Byte = {
val s = readLine()
if (s == null)
throw new java.io.EOFException("Console has reached end of input")
else
s.toByte
}
/** Reads a short value from an entire line of the default input.
*
* @return the short that was read
* @throws java.io.EOFException if the end of the
* input stream has been reached.
* @throws java.lang.NumberFormatException if the value couldn't be converted to a Short
*/
def readShort(): Short = {
val s = readLine()
if (s == null)
throw new java.io.EOFException("Console has reached end of input")
else
s.toShort
}
/** Reads a char value from an entire line of the default input.
*
* @return the Char that was read
* @throws java.io.EOFException if the end of the
* input stream has been reached.
* @throws java.lang.StringIndexOutOfBoundsException if the line read from default input was empty
*/
def readChar(): Char = {
val s = readLine()
if (s == null)
throw new java.io.EOFException("Console has reached end of input")
else
s charAt 0
}
/** Reads an int value from an entire line of the default input.
*
* @return the Int that was read
* @throws java.io.EOFException if the end of the
* input stream has been reached.
* @throws java.lang.NumberFormatException if the value couldn't be converted to an Int
*/
def readInt(): Int = {
val s = readLine()
if (s == null)
throw new java.io.EOFException("Console has reached end of input")
else
s.toInt
}
/** Reads an long value from an entire line of the default input.
*
* @return the Long that was read
* @throws java.io.EOFException if the end of the
* input stream has been reached.
* @throws java.lang.NumberFormatException if the value couldn't be converted to a Long
*/
def readLong(): Long = {
val s = readLine()
if (s == null)
throw new java.io.EOFException("Console has reached end of input")
else
s.toLong
}
/** Reads a float value from an entire line of the default input.
* @return the Float that was read.
* @throws java.io.EOFException if the end of the
* input stream has been reached.
* @throws java.lang.NumberFormatException if the value couldn't be converted to a Float
*
*/
def readFloat(): Float = {
val s = readLine()
if (s == null)
throw new java.io.EOFException("Console has reached end of input")
else
s.toFloat
}
/** Reads a double value from an entire line of the default input.
*
* @return the Double that was read.
* @throws java.io.EOFException if the end of the
* input stream has been reached.
* @throws java.lang.NumberFormatException if the value couldn't be converted to a Float
*/
def readDouble(): Double = {
val s = readLine()
if (s == null)
throw new java.io.EOFException("Console has reached end of input")
else
s.toDouble
}
/** Reads in some structured input (from the default input), specified by
* a format specifier. See class `java.text.MessageFormat` for details of
* the format specification.
*
* @param format the format of the input.
* @return a list of all extracted values.
* @throws java.io.EOFException if the end of the input stream has been
* reached.
*/
def readf(format: String): List[Any] = {
val s = readLine()
if (s == null)
throw new java.io.EOFException("Console has reached end of input")
else
textComponents(new MessageFormat(format).parse(s))
}
/** Reads in some structured input (from the default input), specified by
* a format specifier, returning only the first value extracted, according
* to the format specification.
*
* @param format format string, as accepted by `readf`.
* @return The first value that was extracted from the input
*/
def readf1(format: String): Any = readf(format).head
/** Reads in some structured input (from the default input), specified
* by a format specifier, returning only the first two values extracted,
* according to the format specification.
*
* @param format format string, as accepted by `readf`.
* @return A [[scala.Tuple2]] containing the first two values extracted
*/
def readf2(format: String): (Any, Any) = {
val res = readf(format)
(res.head, res.tail.head)
}
/** Reads in some structured input (from the default input), specified
* by a format specifier, returning only the first three values extracted,
* according to the format specification.
*
* @param format format string, as accepted by `readf`.
* @return A [[scala.Tuple3]] containing the first three values extracted
*/
def readf3(format: String): (Any, Any, Any) = {
val res = readf(format)
(res.head, res.tail.head, res.tail.tail.head)
}
private def textComponents(a: Array[AnyRef]): List[Any] = {
var i: Int = a.length - 1
var res: List[Any] = Nil
while (i >= 0) {
res = (a(i) match {
case x: java.lang.Boolean => x.booleanValue()
case x: java.lang.Byte => x.byteValue()
case x: java.lang.Short => x.shortValue()
case x: java.lang.Character => x.charValue()
case x: java.lang.Integer => x.intValue()
case x: java.lang.Long => x.longValue()
case x: java.lang.Float => x.floatValue()
case x: java.lang.Double => x.doubleValue()
case x => x
}) :: res;
i -= 1
}
res
}
}

View file

@ -0,0 +1,49 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.api.r
import org.scalatest.FlatSpec
import org.scalatest.Matchers._
class RBackendHelperTest extends FlatSpec {
val backend : RBackendHelper = RBackendHelper()
val backend2 : RBackendHelper = RBackendHelper()
"RBackendHelper" should "create a SparkR backend" in {
val rbackend = backend
assert(true) // only looking for exceptions here
}
it should "initialize properly, returning a port > 0" in {
val port = backend.init()
assert(port > 0)
}
it should "start a thread" in {
val backend = backend2
backend.init()
val thread = backend.start()
thread shouldBe a [Thread]
}
it should "close without error" in {
backend2.close
assert(true) // only looking for exceptions
}
}

View file

@ -0,0 +1,113 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.rinterpreter
import java.io.{File, PrintWriter}
import java.nio.file.{Files, Paths}
import org.apache.zeppelin.rinterpreter.rscala.RClient
import org.apache.zeppelin.rinterpreter.rscala.RClient._
import org.scalatest.Matchers._
import org.scalatest._
class RContextInitTest extends FlatSpec {
import scala.sys.process._
var cmd: PrintWriter = null
val command = RClient.defaultRCmd +: RClient.defaultArguments
var processCmd : ProcessBuilder = null
"Process command" should "create a process builder" in {
processCmd = Process(command)
processCmd shouldBe a[ProcessBuilder]
}
it should "be persistent for testing purposes" in {
processCmd shouldBe a [ProcessBuilder]
}
var processIO : ProcessIO = null
"Creating Process IO" should "not throw an exception" in {
processIO = new ProcessIO(
o => {
cmd = new PrintWriter(o)
},
reader("STDOUT DEBUG: "),
reader("STDERR DEBUG: "),
true
)
processIO shouldBe a [ProcessIO]
}
var portsFile : File = null
"A temp file " should "be created" in {
portsFile = File.createTempFile("rscala-", "")
assertResult(true) {portsFile.exists()}
}
var processInstance : Process = null
"Process instance" should "launch" in {
processInstance = processCmd.run(processIO)
assert(true)
}
var libpath : String = null
"RZeppelin R Package" should "be found" in {
libpath = if (Files.exists(Paths.get("R/lib"))) "R/lib"
else if (Files.exists(Paths.get("../R/lib"))) "../R/lib"
else throw new RuntimeException("Could not find rzeppelin - it must be in either R/lib or ../R/lib")
assert(Files.exists(Paths.get(libpath + "/rzeppelin")))
}
var snippet : String = null
"Creating the snippit" should "be impossible to fail" in {
snippet = s"""
library(lib.loc="$libpath", rzeppelin)
rzeppelin:::rServe(rzeppelin:::newSockets('${portsFile.getAbsolutePath.replaceAll(File.separator, "/")}',debug=FALSE,timeout=60))
q(save='no')"""
assert(true)
}
"Cmd" should "stop being null" in {
while (cmd == null) Thread.sleep(100)
assert(cmd != null)
}
it should "accept the snippet" in {
cmd.println(snippet)
cmd.flush()
assert(true)
}
var sockets : ScalaSockets = null
"Scala Sockets" should "be created and signal OK" in {
sockets = new ScalaSockets(portsFile.getAbsolutePath)
sockets.out.writeInt(RClient.Protocol.OK)
sockets.out.flush()
assert(true)
}
"The R and Scala versions" should "match" in {
assert(RClient.readString(sockets.in) == org.apache.zeppelin.rinterpreter.rscala.Version)
}
var rcon : RContext = null
"Creating an RContext" should "not fail" in {
rcon = new RContext(sockets, false)
}
"An open RContext" should "destroy safely" in {
rcon.close()
assertResult(false) {
rcon.isOpen
}
}
}

View file

@ -0,0 +1,115 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.rinterpreter
import java.util.Properties
import org.apache.zeppelin.RTest
import org.apache.zeppelin.rinterpreter.rscala.RException
import org.apache.zeppelin.spark.SparkInterpreter
import org.scalatest.Matchers._
import org.scalatest._
class RContextTest extends FlatSpec {
RContext.resetRcon()
val rcon = RContext(new Properties(), "test")
"The RContext Singleton" should "create an RContext without Spark" in { () =>
rcon shouldBe a[RContext]
}
"The RContext" should "be openable without spark" in { () =>
rcon.open(None)
assert(rcon.isOpen)
}
it should "be able to confirm that stats is available" taggedAs(RTest) in { () =>
assertResult(true) {
rcon.testRPackage("stats")
}
}
it should "be able to confirm that a bogus package is not available" taggedAs(RTest) in { () =>
assertResult(false) {
rcon.testRPackage("thisisagarbagepackagename")
}
}
it should "be able to add 2 + 2" taggedAs(RTest) in { () =>
assertResult(4) {
rcon.evalI0("2 + 2")
}
}
it should "be able to return a vector" taggedAs(RTest) in { () =>
assertResult(10) {
rcon.evalI1("1:10").length
}
}
it should "be able to return a string" taggedAs(RTest) in { () =>
assertResult("hello world") {
rcon.evalS0("'hello world'")
}
}
it should "be able to return a vector of strings" taggedAs(RTest) in { () =>
assertResult(26) {
rcon.evalS1("LETTERS").length
}
}
it should "throw an RException if told to evaluate garbage code" taggedAs(RTest) in { () =>
intercept[RException] {
rcon.eval("funkyfunction()")
}
}
// it should "Throw an exception if we try to initialize SparkR without a SQLContext" in {() =>
//
// intercept[RuntimeException] {
// rcon.initializeSparkRTest()
// }
// }
it should "have rzeppelin available" taggedAs(RTest) in { () =>
assertResult(true) {
rcon.testRPackage("rzeppelin")
}
}
it should "have evaluate available" taggedAs(RTest) in { () =>
assertResult(true) {
rcon.testRPackage("evaluate")
}
}
it should "have repr available" taggedAs(RTest) in { () =>
assertResult(true) {
rcon.testRPackage("repr")
}
}
it should "also close politely" taggedAs(RTest) in { () =>
rcon.close()
assertResult(2) {rcon.isOpen}
}
}

View file

@ -0,0 +1,141 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.rinterpreter
import java.util.Properties
import org.apache.zeppelin.RTest
import org.apache.zeppelin.interpreter.{Interpreter, InterpreterContext, InterpreterResult, InterpreterGroup}
import org.scalatest.Matchers._
import org.scalatest._
import java.util.ArrayList
class RInterpreterTest extends FlatSpec {
RContext.resetRcon()
class RIntTester extends RInterpreter(new Properties(), startSpark = false) {
def interpret(s: String, interpreterContext: InterpreterContext): InterpreterResult = {
val result : Array[String] = rContext.evalS1(s)
new InterpreterResult(InterpreterResult.Code.SUCCESS, result.mkString("\n"))
}
}
val rint = new RIntTester()
"An RInterpreter" should "exist" in {
assert(rint != null)
}
it should "not complain when we assign it a group" in {
val grp : InterpreterGroup = new InterpreterGroup("test")
val lst : ArrayList[Interpreter] = new ArrayList[Interpreter]()
lst.add(rint)
grp.put(rint.getClassName(), lst)
rint.setInterpreterGroup(grp)
}
it should "create a fresh rContext when we ask for one" in {
assert(! rint.getrContext.isOpen)
}
it should "open" taggedAs(RTest) in {
rint.open()
assert(rint.getrContext.isOpen)
}
it should "have rzeppelin available" taggedAs(RTest) in {
assume(rint.getrContext.isOpen)
assert(rint.getrContext.testRPackage("rzeppelin"))
}
it should "have an rContext able to do simple addition" taggedAs(RTest) in {
assume(rint.getrContext.isOpen)
assert(rint.getrContext.evalI0("2 + 2") == 4)
}
it should "have a functional completion function" taggedAs(RTest) in {
val result = rint.hiddenCompletion("hi", 3)
result should (contain ("hist"))
}
it should "have a working progress meter" in {
rint.getrContext.setProgress(50)
assertResult(50) {
rint.getrContext.getProgress
}
}
it should "have persistent properties" in {
val props = new Properties()
props.setProperty("hello", "world")
rint.setProperty(props)
assertResult("world") {
rint.getProperty("hello")
}
}
var rint2 : RIntTester = null
it should "Share RContexts if they share the same InterpreterGroup" in {
rint2 = new RIntTester()
val lst : ArrayList[Interpreter] = new ArrayList[Interpreter]()
lst.add(rint2)
val grp = rint.getInterpreterGroup()
grp.put(rint2.getClassName(), lst)
rint2.setInterpreterGroup(grp)
rint2.open()
rint.getrContext should be theSameInstanceAs rint2.getrContext
}
"Opening the second RInterpreter" should "not have closed the first RContext" in {
assert(rint.getrContext.isOpen)
}
var rint3 : RIntTester = null
"An RInterpreter in a different InterpreterGroup" should "have a different R Context" in {
rint3 = new RIntTester()
val grp : InterpreterGroup = new InterpreterGroup("othertest")
val lst : ArrayList[Interpreter] = new ArrayList[Interpreter]()
lst.add(rint3)
grp.put(rint3.getClassName(), lst)
rint3.setInterpreterGroup(grp)
rint3.open()
rint3.getrContext shouldNot be theSameInstanceAs rint2.getrContext
}
"The first RInterpreter" should "close politely" in {
rint.close()
assert(!rint.getrContext.isOpen)
}
"and so" should "the other one" in {
rint2.close()
assert(!rint2.getrContext.isOpen)
}
"and " should "the third one" in {
rint3.close()
assert(!rint2.getrContext.isOpen)
}
// fixture.sparky.close()
}

View file

@ -0,0 +1,103 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.rinterpreter
import java.util
import java.util.Properties
import org.apache.zeppelin.interpreter.{Interpreter, InterpreterGroup, InterpreterResult}
import org.scalatest.FlatSpec
import java.util.List
import org.scalatest.Matchers._
class WrapperTest extends FlatSpec {
RContext.resetRcon()
val repl: RRepl = new RRepl(new Properties(), false)
val group : InterpreterGroup = new InterpreterGroup()
var lst = new util.LinkedList[Interpreter]()
lst.add(repl)
group.put(repl.getClassName(), lst)
repl.setInterpreterGroup(group)
"The R REPL" should "exist and be of the right class" in {
repl shouldBe a[RRepl]
}
it should "Have a RRepl Interpreter inside" in {
repl.getInnerInterpreter shouldBe a[RReplInterpreter]
}
val repi = repl.getInnerInterpreter.asInstanceOf[RReplInterpreter]
it should "have a fresh rContext" in {
assert(!repi.getrContext.isOpen)
}
val knitr: KnitR = new KnitR(new Properties(), false)
lst = new util.LinkedList[Interpreter]()
lst.add(knitr)
group.put(knitr.getClassName(), lst)
knitr.setInterpreterGroup(group)
"The KnitR wrapper" should "exist and be of the right class" in {
knitr shouldBe a[KnitR]
}
it should "have a KnitRInterpreter inside" in {
knitr.getInnerInterpreter shouldBe a [KnitRInterpreter]
}
it should "share the RContext" in {
knitr.getInnerInterpreter.asInstanceOf[KnitRInterpreter].getrContext should be theSameInstanceAs repi.getrContext
}
it should "open without error" in {
knitr.open()
assert(knitr.getInnerInterpreter.asInstanceOf[KnitRInterpreter].getrContext.isOpen)
}
it should "produce HTML in response to a simple query" in {
val result = knitr.interpret(
"""
|```{r}
|2 + 2
|```
""".stripMargin, null)
withClue(result.message()) {
result should have (
'code (InterpreterResult.Code.SUCCESS),
'type (InterpreterResult.Type.HTML)
)
}
}
it should "close properly" in {
repi.getrContext.close()
assertResult(false) {
repi.getrContext.isOpen
}
}
"Just in case there are two rContexts, the other one" should "close properly also" in {
val rcon = knitr.getInnerInterpreter.asInstanceOf[KnitRInterpreter].getrContext
rcon.close()
assertResult(false) {
rcon.isOpen
}
}
}

View file

@ -0,0 +1,23 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin
import org.scalatest.Tag
object RTest extends Tag("RTest")
object SparkTest extends Tag("SparkTest")

View file

@ -231,7 +231,6 @@
<scope>provided</scope>
</dependency>
<!--TEST-->
<dependency>
<groupId>org.scalatest</groupId>

View file

@ -72,7 +72,6 @@ public class SparkVersion {
return olderThan(MIN_SUPPORTED_VERSION) || newerThanEquals(UNSUPPORTED_FUTURE_VERSION);
}
public static SparkVersion fromVersionString(String versionString) {
return new SparkVersion(versionString);
}
@ -81,6 +80,10 @@ public class SparkVersion {
return this.newerThanEquals(SPARK_1_2_0);
}
public boolean isSparkRSupported() {
return this.newerThanEquals(SPARK_1_4_0);
}
public boolean hasDataFrame() {
return this.newerThanEquals(SPARK_1_4_0);
}

View file

@ -41,187 +41,193 @@ import com.google.gson.Gson;
* Spark cluster is started by CI server using testing/startSparkCluster.sh
*/
public class ZeppelinSparkClusterTest extends AbstractTestRestApi {
Gson gson = new Gson();
Gson gson = new Gson();
@BeforeClass
public static void init() throws Exception {
AbstractTestRestApi.startUp();
}
@AfterClass
public static void destroy() throws Exception {
AbstractTestRestApi.shutDown();
}
private void waitForFinish(Paragraph p) {
while (p.getStatus() != Status.FINISHED
&& p.getStatus() != Status.ERROR
&& p.getStatus() != Status.ABORT) {
try {
Thread.sleep(100);
} catch (InterruptedException e) {
LOG.error("Exception in WebDriverManager while getWebDriver ", e);
}
@BeforeClass
public static void init() throws Exception {
AbstractTestRestApi.startUp();
}
}
@Test
public void basicRDDTransformationAndActionTest() throws IOException {
// create new note
Note note = ZeppelinServer.notebook.createNote();
// run markdown paragraph, again
Paragraph p = note.addParagraph();
Map config = p.getConfig();
config.put("enabled", true);
p.setConfig(config);
p.setText("%spark print(sc.parallelize(1 to 10).reduce(_ + _))");
note.run(p.getId());
waitForFinish(p);
assertEquals(Status.FINISHED, p.getStatus());
assertEquals("55", p.getResult().message());
ZeppelinServer.notebook.removeNote(note.id());
}
@Test
public void pySparkTest() throws IOException {
// create new note
Note note = ZeppelinServer.notebook.createNote();
int sparkVersion = getSparkVersionNumber(note);
if (isPyspark() && sparkVersion >= 12) { // pyspark supported from 1.2.1
// run markdown paragraph, again
Paragraph p = note.addParagraph();
Map config = p.getConfig();
config.put("enabled", true);
p.setConfig(config);
p.setText("%pyspark print(sc.parallelize(range(1, 11)).reduce(lambda a, b: a + b))");
note.run(p.getId());
waitForFinish(p);
assertEquals(Status.FINISHED, p.getStatus());
assertEquals("55\n", p.getResult().message());
@AfterClass
public static void destroy() throws Exception {
AbstractTestRestApi.shutDown();
}
ZeppelinServer.notebook.removeNote(note.id());
}
@Test
public void pySparkAutoConvertOptionTest() throws IOException {
// create new note
Note note = ZeppelinServer.notebook.createNote();
int sparkVersion = getSparkVersionNumber(note);
if (isPyspark() && sparkVersion >= 14) { // auto_convert enabled from spark 1.4
// run markdown paragraph, again
Paragraph p = note.addParagraph();
Map config = p.getConfig();
config.put("enabled", true);
p.setConfig(config);
p.setText("%pyspark\nfrom pyspark.sql.functions import *\n"
+ "print(sqlContext.range(0, 10).withColumn('uniform', rand(seed=10) * 3.14).count())");
note.run(p.getId());
waitForFinish(p);
assertEquals(Status.FINISHED, p.getStatus());
assertEquals("10\n", p.getResult().message());
}
ZeppelinServer.notebook.removeNote(note.id());
}
@Test
public void zRunTest() throws IOException {
// create new note
Note note = ZeppelinServer.notebook.createNote();
Paragraph p0 = note.addParagraph();
Map config0 = p0.getConfig();
config0.put("enabled", true);
p0.setConfig(config0);
p0.setText("%spark z.run(1)");
Paragraph p1 = note.addParagraph();
Map config1 = p1.getConfig();
config1.put("enabled", true);
p1.setConfig(config1);
p1.setText("%spark val a=10");
Paragraph p2 = note.addParagraph();
Map config2 = p2.getConfig();
config2.put("enabled", true);
p2.setConfig(config2);
p2.setText("%spark print(a)");
note.run(p0.getId());
waitForFinish(p0);
assertEquals(Status.FINISHED, p0.getStatus());
note.run(p2.getId());
waitForFinish(p2);
assertEquals(Status.FINISHED, p2.getStatus());
assertEquals("10", p2.getResult().message());
ZeppelinServer.notebook.removeNote(note.id());
}
@Test
public void pySparkDepLoaderTest() throws IOException {
// create new note
Note note = ZeppelinServer.notebook.createNote();
if (isPyspark() && getSparkVersionNumber(note) >= 14) {
// restart spark interpreter
List<InterpreterSetting> settings =
ZeppelinServer.notebook.getBindedInterpreterSettings(note.id());
for (InterpreterSetting setting : settings) {
if (setting.getGroup().equals("spark")) {
ZeppelinServer.notebook.getInterpreterFactory().restart(setting.id());
break;
private void waitForFinish(Paragraph p) {
while (p.getStatus() != Status.FINISHED
&& p.getStatus() != Status.ERROR
&& p.getStatus() != Status.ABORT) {
try {
Thread.sleep(100);
} catch (InterruptedException e) {
LOG.error("Exception in WebDriverManager while getWebDriver ", e);
}
}
}
// load dep
Paragraph p0 = note.addParagraph();
Map config = p0.getConfig();
config.put("enabled", true);
p0.setConfig(config);
p0.setText("%dep z.load(\"com.databricks:spark-csv_2.11:1.2.0\")");
note.run(p0.getId());
waitForFinish(p0);
assertEquals(Status.FINISHED, p0.getStatus());
// write test csv file
File tmpFile = File.createTempFile("test", "csv");
FileUtils.write(tmpFile, "a,b\n1,2");
// load data using libraries from dep loader
Paragraph p1 = note.addParagraph();
p1.setConfig(config);
p1.setText("%pyspark\n" +
"from pyspark.sql import SQLContext\n" +
"print(sqlContext.read.format('com.databricks.spark.csv')" +
".load('"+ tmpFile.getAbsolutePath() +"').count())");
note.run(p1.getId());
waitForFinish(p1);
assertEquals(Status.FINISHED, p1.getStatus());
assertEquals("2\n", p1.getResult().message());
}
}
/**
* Get spark version number as a numerical value.
* eg. 1.1.x => 11, 1.2.x => 12, 1.3.x => 13 ...
*/
private int getSparkVersionNumber(Note note) {
Paragraph p = note.addParagraph();
Map config = p.getConfig();
config.put("enabled", true);
p.setConfig(config);
p.setText("%spark print(sc.version)");
note.run(p.getId());
waitForFinish(p);
assertEquals(Status.FINISHED, p.getStatus());
String sparkVersion = p.getResult().message();
System.out.println("Spark version detected " + sparkVersion);
String[] split = sparkVersion.split("\\.");
int version = Integer.parseInt(split[0]) * 10 + Integer.parseInt(split[1]);
return version;
}
}
@Test
public void basicRDDTransformationAndActionTest() throws IOException {
// create new note
Note note = ZeppelinServer.notebook.createNote();
// run markdown paragraph, again
Paragraph p = note.addParagraph();
Map config = p.getConfig();
config.put("enabled", true);
p.setConfig(config);
p.setText("%spark print(sc.parallelize(1 to 10).reduce(_ + _))");
note.run(p.getId());
waitForFinish(p);
assertEquals(Status.FINISHED, p.getStatus());
assertEquals("55", p.getResult().message());
ZeppelinServer.notebook.removeNote(note.id());
}
@Test
public void pySparkTest() throws IOException {
// create new note
Note note = ZeppelinServer.notebook.createNote();
note.setName("note");
int sparkVersion = getSparkVersionNumber(note);
if (isPyspark() && sparkVersion >= 12) { // pyspark supported from 1.2.1
// run markdown paragraph, again
Paragraph p = note.addParagraph();
Map config = p.getConfig();
config.put("enabled", true);
p.setConfig(config);
p.setText("%pyspark print(sc.parallelize(range(1, 11)).reduce(lambda a, b: a + b))");
// p.getRepl("org.apache.zeppelin.spark.SparkInterpreter").open();
note.run(p.getId());
waitForFinish(p);
assertEquals(Status.FINISHED, p.getStatus());
assertEquals("55\n", p.getResult().message());
}
ZeppelinServer.notebook.removeNote(note.id());
}
@Test
public void pySparkAutoConvertOptionTest() throws IOException {
// create new note
Note note = ZeppelinServer.notebook.createNote();
note.setName("note");
int sparkVersion = getSparkVersionNumber(note);
if (isPyspark() && sparkVersion >= 14) { // auto_convert enabled from spark 1.4
// run markdown paragraph, again
Paragraph p = note.addParagraph();
Map config = p.getConfig();
config.put("enabled", true);
p.setConfig(config);
p.setText("%pyspark\nfrom pyspark.sql.functions import *\n"
+ "print(sqlContext.range(0, 10).withColumn('uniform', rand(seed=10) * 3.14).count())");
// p.getRepl("org.apache.zeppelin.spark.SparkInterpreter").open();
note.run(p.getId());
waitForFinish(p);
assertEquals(Status.FINISHED, p.getStatus());
assertEquals("10\n", p.getResult().message());
}
ZeppelinServer.notebook.removeNote(note.id());
}
@Test
public void zRunTest() throws IOException {
// create new note
Note note = ZeppelinServer.notebook.createNote();
Paragraph p0 = note.addParagraph();
Map config0 = p0.getConfig();
config0.put("enabled", true);
p0.setConfig(config0);
p0.setText("%spark z.run(1)");
Paragraph p1 = note.addParagraph();
Map config1 = p1.getConfig();
config1.put("enabled", true);
p1.setConfig(config1);
p1.setText("%spark val a=10");
Paragraph p2 = note.addParagraph();
Map config2 = p2.getConfig();
config2.put("enabled", true);
p2.setConfig(config2);
p2.setText("%spark print(a)");
note.run(p0.getId());
waitForFinish(p0);
assertEquals(Status.FINISHED, p0.getStatus());
note.run(p2.getId());
waitForFinish(p2);
assertEquals(Status.FINISHED, p2.getStatus());
assertEquals("10", p2.getResult().message());
ZeppelinServer.notebook.removeNote(note.id());
}
@Test
public void pySparkDepLoaderTest() throws IOException {
// create new note
Note note = ZeppelinServer.notebook.createNote();
if (isPyspark() && getSparkVersionNumber(note) >= 14) {
// restart spark interpreter
List<InterpreterSetting> settings =
ZeppelinServer.notebook.getBindedInterpreterSettings(note.id());
for (InterpreterSetting setting : settings) {
if (setting.getGroup().equals("spark")) {
ZeppelinServer.notebook.getInterpreterFactory().restart(setting.id());
break;
}
}
// load dep
Paragraph p0 = note.addParagraph();
Map config = p0.getConfig();
config.put("enabled", true);
p0.setConfig(config);
p0.setText("%dep z.load(\"com.databricks:spark-csv_2.11:1.2.0\")");
note.run(p0.getId());
waitForFinish(p0);
assertEquals(Status.FINISHED, p0.getStatus());
// write test csv file
File tmpFile = File.createTempFile("test", "csv");
FileUtils.write(tmpFile, "a,b\n1,2");
// load data using libraries from dep loader
Paragraph p1 = note.addParagraph();
p1.setConfig(config);
p1.setText("%pyspark\n" +
"from pyspark.sql import SQLContext\n" +
"print(sqlContext.read.format('com.databricks.spark.csv')" +
".load('"+ tmpFile.getAbsolutePath() +"').count())");
note.run(p1.getId());
waitForFinish(p1);
assertEquals(Status.FINISHED, p1.getStatus());
assertEquals("2\n", p1.getResult().message());
}
}
/**
* Get spark version number as a numerical value.
* eg. 1.1.x => 11, 1.2.x => 12, 1.3.x => 13 ...
*/
private int getSparkVersionNumber(Note note) {
Paragraph p = note.addParagraph();
note.setName("note");
Map config = p.getConfig();
config.put("enabled", true);
p.setConfig(config);
p.setText("%spark print(sc.version)");
// p.getRepl("org.apache.zeppelin.spark.SparkInterpreter").open();
note.run(p.getId());
waitForFinish(p);
assertEquals(Status.FINISHED, p.getStatus());
String sparkVersion = p.getResult().message();
System.out.println("Spark version detected " + sparkVersion);
String[] split = sparkVersion.split("\\.");
int version = Integer.parseInt(split[0]) * 10 + Integer.parseInt(split[1]);
return version;
}
}

View file

@ -47,6 +47,7 @@
"src-noconflict/mode-sql.js",
"src-noconflict/mode-markdown.js",
"src-noconflict/mode-sh.js",
"src-noconflict/mode-r.js",
"src-noconflict/keybinding-emacs.js",
"src-noconflict/ext-language_tools.js",
"src-noconflict/theme-chrome.js"

View file

@ -124,6 +124,7 @@
</goals>
<configuration>
<arguments>build</arguments>
<arguments>--force</arguments>
</configuration>
</execution>

View file

@ -108,6 +108,7 @@ limitations under the License.
<script src="bower_components/ace-builds/src-noconflict/mode-sql.js"></script>
<script src="bower_components/ace-builds/src-noconflict/mode-markdown.js"></script>
<script src="bower_components/ace-builds/src-noconflict/mode-sh.js"></script>
<script src="bower_components/ace-builds/src-noconflict/mode-r.js"></script>
<script src="bower_components/ace-builds/src-noconflict/keybinding-emacs.js"></script>
<script src="bower_components/ace-builds/src-noconflict/ext-language_tools.js"></script>
<script src="bower_components/ace-builds/src-noconflict/theme-chrome.js"></script>

View file

@ -37,6 +37,7 @@ module.exports = function(config) {
'bower_components/ace-builds/src-noconflict/mode-sql.js',
'bower_components/ace-builds/src-noconflict/mode-markdown.js',
'bower_components/ace-builds/src-noconflict/mode-sh.js',
'bower_components/ace-builds/src-noconflict/mode-r.js',
'bower_components/ace-builds/src-noconflict/keybinding-emacs.js',
'bower_components/ace-builds/src-noconflict/ext-language_tools.js',
'bower_components/ace-builds/src-noconflict/theme-chrome.js',

View file

@ -472,7 +472,9 @@ public class ZeppelinConfiguration extends XMLConfiguration {
+ "org.apache.zeppelin.elasticsearch.ElasticsearchInterpreter,"
+ "org.apache.zeppelin.scalding.ScaldingInterpreter,"
+ "org.apache.zeppelin.jdbc.JDBCInterpreter,"
+ "org.apache.zeppelin.hbase.HbaseInterpreter"),
+ "org.apache.zeppelin.hbase.HbaseInterpreter,"
+ "org.apache.zeppelin.rinterpreter.RRepl,"
+ "org.apache.zeppelin.rinterpreter.KnitR"),
ZEPPELIN_INTERPRETER_DIR("zeppelin.interpreter.dir", "interpreter"),
ZEPPELIN_INTERPRETER_LOCALREPO("zeppelin.interpreter.localRepo", "local-repo"),
ZEPPELIN_INTERPRETER_CONNECT_TIMEOUT("zeppelin.interpreter.connect.timeout", 30000),