Merge pull request #1 from apache/master

merge latest commits
This commit is contained in:
gss2002 2016-11-08 11:26:11 -05:00 committed by GitHub
commit 1702cc52c2
291 changed files with 11401 additions and 3647 deletions

5
.gitignore vendored
View file

@ -1,4 +1,5 @@
*.class
*.pyc
# Package Files #
*.jar
@ -6,7 +7,8 @@
*.ear
# interpreter
/interpreter/
/interpreter/*
!/interpreter/lib
# interpreter temp files
spark/derby.log
@ -21,6 +23,7 @@ lens/lens-cli-hist.log
conf/zeppelin-env.sh
conf/zeppelin-env.cmd
conf/zeppelin-site.xml
conf/shiro.ini
conf/keystore
conf/truststore
conf/interpreter.json

View file

@ -36,7 +36,7 @@ matrix:
include:
# Test License compliance using RAT tool
- jdk: "oraclejdk7"
env: SCALA_VER="2.11" SPARK_VER="2.0.0" HADOOP_VER="2.3" PROFILE="-Prat" BUILD_FLAG="clean" TEST_FLAG="org.apache.rat:apache-rat-plugin:check" TEST_PROJECTS=""
env: SCALA_VER="2.11" PROFILE="-Prat" BUILD_FLAG="clean" TEST_FLAG="org.apache.rat:apache-rat-plugin:check" TEST_PROJECTS=""
# Test all modules with spark 2.0.0 and scala 2.11
- jdk: "oraclejdk7"
@ -58,18 +58,6 @@ matrix:
- jdk: "oraclejdk7"
env: SCALA_VER="2.10" SPARK_VER="1.4.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.4 -Pr -Phadoop-2.3 -Ppyspark -Psparkr" BUILD_FLAG="package -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark,r -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark.* -DfailIfNoTests=false"
# Test spark module for 1.3.1
- jdk: "oraclejdk7"
env: SCALA_VER="2.10" SPARK_VER="1.3.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.3 -Phadoop-2.3 -Ppyspark" BUILD_FLAG="package -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark.* -DfailIfNoTests=false"
# Test spark module for 1.2.2
- jdk: "oraclejdk7"
env: SCALA_VER="2.10" SPARK_VER="1.2.2" HADOOP_VER="2.3" PROFILE="-Pspark-1.2 -Phadoop-2.3 -Ppyspark" BUILD_FLAG="package -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark.* -DfailIfNoTests=false"
# Test spark module for 1.1.1
- jdk: "oraclejdk7"
env: SCALA_VER="2.10" SPARK_VER="1.1.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.1 -Phadoop-2.3 -Ppyspark" BUILD_FLAG="package -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark.* -DfailIfNoTests=false"
# Test selenium with spark module for 1.6.1
- jdk: "oraclejdk7"
env: TEST_SELENIUM="true" SCALA_VER="2.10" SPARK_VER="1.6.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.6 -Phadoop-2.3 -Ppyspark -Pexamples" BUILD_FLAG="package -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark -Dtest=org.apache.zeppelin.AbstractFunctionalSuite -DfailIfNoTests=false"

View file

@ -2,7 +2,7 @@
**Documentation:** [User Guide](http://zeppelin.apache.org/docs/latest/index.html)<br/>
**Mailing Lists:** [User and Dev mailing list](http://zeppelin.apache.org/community.html)<br/>
**Continuous Integration:** [![Build Status](https://secure.travis-ci.org/apache/zeppelin.png?branch=master)](https://travis-ci.org/apache/zeppelin) <br/>
**Continuous Integration:** [![Build Status](https://travis-ci.org/apache/zeppelin.svg?branch=master)](https://travis-ci.org/apache/zeppelin) <br/>
**Contributing:** [Contribution Guide](https://zeppelin.apache.org/contribution/contributions.html)<br/>
**Issue Tracker:** [Jira](https://issues.apache.org/jira/browse/ZEPPELIN)<br/>
**License:** [Apache 2.0](https://github.com/apache/zeppelin/blob/master/LICENSE)
@ -128,9 +128,6 @@ Available profiles are
-Pspark-1.6
-Pspark-1.5
-Pspark-1.4
-Pspark-1.3
-Pspark-1.2
-Pspark-1.1
-Pcassandra-spark-1.5
-Pcassandra-spark-1.4
-Pcassandra-spark-1.3
@ -192,7 +189,7 @@ enable 3rd party vendor repository (cloudera)
##### `-Pmapr[version]` (optional)
For the MapR Hadoop Distribution, these profiles will handle the Hadoop version. As MapR allows different versions of Spark to be installed, you should specify which version of Spark is installed on the cluster by adding a Spark profile (`-Pspark-1.2`, `-Pspark-1.3`, etc.) as needed.
For the MapR Hadoop Distribution, these profiles will handle the Hadoop version. As MapR allows different versions of Spark to be installed, you should specify which version of Spark is installed on the cluster by adding a Spark profile (`-Pspark-1.6`, `-Pspark-2.0`, etc.) as needed.
The correct Maven artifacts can be found for every version of MapR at http://doc.mapr.com
Available profiles are

View file

@ -135,7 +135,7 @@ public class AlluxioInterpreter extends Interpreter {
private String[] splitAndRemoveEmpty(String st, String splitSeparator) {
String[] voices = st.split(splitSeparator);
ArrayList<String> result = new ArrayList<String>();
ArrayList<String> result = new ArrayList<>();
for (String voice : voices) {
if (!voice.trim().isEmpty()) {
result.add(voice);
@ -145,7 +145,7 @@ public class AlluxioInterpreter extends Interpreter {
}
private String[] splitAndRemoveEmpty(String[] sts, String splitSeparator) {
ArrayList<String> result = new ArrayList<String>();
ArrayList<String> result = new ArrayList<>();
for (String st : sts) {
result.addAll(Arrays.asList(splitAndRemoveEmpty(st, splitSeparator)));
}

View file

@ -16,6 +16,9 @@
"defaultValue": "19998",
"description": "Alluxio master port"
}
},
"editor": {
"editOnDblClick": false
}
}
]
]

View file

@ -93,7 +93,7 @@ public class AlluxioInterpreterTest {
List expectedResultThree = Arrays.asList(
new InterpreterCompletion("copyFromLocal", "copyFromLocal"),
new InterpreterCompletion("copyToLocal", "copyToLocal"));
List expectedResultNone = new ArrayList<String>();
List expectedResultNone = new ArrayList<>();
List<InterpreterCompletion> resultOne = alluxioInterpreter.completion("c", 0);
List<InterpreterCompletion> resultTwo = alluxioInterpreter.completion("co", 0);

View file

@ -4,7 +4,9 @@
"name": "angular",
"className": "org.apache.zeppelin.angular.AngularInterpreter",
"properties": {
},
"editor": {
"editOnDblClick": true
}
}
]

View file

@ -5,7 +5,9 @@
"className": "org.apache.zeppelin.beam.BeamInterpreter",
"defaultInterpreter": true,
"properties": {
},
"editor": {
"editOnDblClick": false
}
}
]

View file

@ -24,7 +24,8 @@
}
},
"editor": {
"language": "sql"
"language": "sql",
"editOnDblClick": false
}
}
]

View file

@ -149,6 +149,28 @@ elif [[ "${INTERPRETER_ID}" == "hbase" ]]; then
else
echo "HBASE_HOME and HBASE_CONF_DIR are not set, configuration might not be loaded"
fi
elif [[ "${INTERPRETER_ID}" == "pig" ]]; then
# autodetect HADOOP_CONF_HOME by heuristic
if [[ -n "${HADOOP_HOME}" ]] && [[ -z "${HADOOP_CONF_DIR}" ]]; then
if [[ -d "${HADOOP_HOME}/etc/hadoop" ]]; then
export HADOOP_CONF_DIR="${HADOOP_HOME}/etc/hadoop"
elif [[ -d "/etc/hadoop/conf" ]]; then
export HADOOP_CONF_DIR="/etc/hadoop/conf"
fi
fi
if [[ -n "${HADOOP_CONF_DIR}" ]] && [[ -d "${HADOOP_CONF_DIR}" ]]; then
ZEPPELIN_INTP_CLASSPATH+=":${HADOOP_CONF_DIR}"
fi
# autodetect TEZ_CONF_DIR
if [[ -n "${TEZ_CONF_DIR}" ]]; then
ZEPPELIN_INTP_CLASSPATH+=":${TEZ_CONF_DIR}"
elif [[ -d "/etc/tez/conf" ]]; then
ZEPPELIN_INTP_CLASSPATH+=":/etc/tez/conf"
else
echo "TEZ_CONF_DIR is not set, configuration might not be loaded"
fi
fi
addJarInDirForIntp "${LOCAL_INTERPRETER_REPO}"

View file

@ -190,6 +190,9 @@
"defaultValue": "true",
"description": "Cassandra socket TCP no delay. Default = true"
}
},
"editor": {
"editOnDblClick": false
}
}
]

View file

@ -32,6 +32,7 @@ kylin org.apache.zeppelin:zeppelin-kylin:0.6.1 Kylin in
lens org.apache.zeppelin:zeppelin-lens:0.6.1 Lens interpreter
livy org.apache.zeppelin:zeppelin-livy:0.6.1 Livy interpreter
md org.apache.zeppelin:zeppelin-markdown:0.6.1 Markdown support
pig org.apache.zeppelin:zeppelin-pig:0.6.1 Pig interpreter
postgresql org.apache.zeppelin:zeppelin-postgresql:0.6.1 Postgresql interpreter
python org.apache.zeppelin:zeppelin-python:0.6.1 Python interpreter
shell org.apache.zeppelin:zeppelin-shell:0.6.1 Shell command

View file

@ -45,6 +45,10 @@ user3 = password4, role2
#ldapRealm.userDnTemplate = uid={0},ou=Users,dc=COMPANY,dc=COM
#ldapRealm.contextFactory.authenticationMechanism = SIMPLE
### A sample PAM configuration
#pamRealm=org.apache.zeppelin.realm.PamRealm
#pamRealm.service=sshd
### A sample for configuring ZeppelinHub Realm
#zeppelinHubRealm = org.apache.zeppelin.realm.ZeppelinHubRealm
## Url of ZeppelinHub
@ -78,5 +82,5 @@ admin = *
#/api/interpreter/** = authc, roles[admin]
#/api/configurations/** = authc, roles[admin]
#/api/credential/** = authc, roles[admin]
/** = anon
#/** = authc
#/** = anon
/** = authc

View file

@ -190,7 +190,7 @@
<property>
<name>zeppelin.interpreters</name>
<value>org.apache.zeppelin.spark.SparkInterpreter,org.apache.zeppelin.spark.PySparkInterpreter,org.apache.zeppelin.rinterpreter.RRepl,org.apache.zeppelin.rinterpreter.KnitR,org.apache.zeppelin.spark.SparkRInterpreter,org.apache.zeppelin.spark.SparkSqlInterpreter,org.apache.zeppelin.spark.DepInterpreter,org.apache.zeppelin.markdown.Markdown,org.apache.zeppelin.angular.AngularInterpreter,org.apache.zeppelin.shell.ShellInterpreter,org.apache.zeppelin.file.HDFSFileInterpreter,org.apache.zeppelin.flink.FlinkInterpreter,,org.apache.zeppelin.python.PythonInterpreter,org.apache.zeppelin.lens.LensInterpreter,org.apache.zeppelin.ignite.IgniteInterpreter,org.apache.zeppelin.ignite.IgniteSqlInterpreter,org.apache.zeppelin.cassandra.CassandraInterpreter,org.apache.zeppelin.geode.GeodeOqlInterpreter,org.apache.zeppelin.postgresql.PostgreSqlInterpreter,org.apache.zeppelin.jdbc.JDBCInterpreter,org.apache.zeppelin.kylin.KylinInterpreter,org.apache.zeppelin.elasticsearch.ElasticsearchInterpreter,org.apache.zeppelin.scalding.ScaldingInterpreter,org.apache.zeppelin.alluxio.AlluxioInterpreter,org.apache.zeppelin.hbase.HbaseInterpreter,org.apache.zeppelin.livy.LivySparkInterpreter,org.apache.zeppelin.livy.LivyPySparkInterpreter,org.apache.zeppelin.livy.LivySparkRInterpreter,org.apache.zeppelin.livy.LivySparkSQLInterpreter,org.apache.zeppelin.bigquery.BigQueryInterpreter,org.apache.zeppelin.beam.BeamInterpreter</value>
<value>org.apache.zeppelin.spark.SparkInterpreter,org.apache.zeppelin.spark.PySparkInterpreter,org.apache.zeppelin.rinterpreter.RRepl,org.apache.zeppelin.rinterpreter.KnitR,org.apache.zeppelin.spark.SparkRInterpreter,org.apache.zeppelin.spark.SparkSqlInterpreter,org.apache.zeppelin.spark.DepInterpreter,org.apache.zeppelin.markdown.Markdown,org.apache.zeppelin.angular.AngularInterpreter,org.apache.zeppelin.shell.ShellInterpreter,org.apache.zeppelin.file.HDFSFileInterpreter,org.apache.zeppelin.flink.FlinkInterpreter,,org.apache.zeppelin.python.PythonInterpreter,org.apache.zeppelin.lens.LensInterpreter,org.apache.zeppelin.ignite.IgniteInterpreter,org.apache.zeppelin.ignite.IgniteSqlInterpreter,org.apache.zeppelin.cassandra.CassandraInterpreter,org.apache.zeppelin.geode.GeodeOqlInterpreter,org.apache.zeppelin.postgresql.PostgreSqlInterpreter,org.apache.zeppelin.jdbc.JDBCInterpreter,org.apache.zeppelin.kylin.KylinInterpreter,org.apache.zeppelin.elasticsearch.ElasticsearchInterpreter,org.apache.zeppelin.scalding.ScaldingInterpreter,org.apache.zeppelin.alluxio.AlluxioInterpreter,org.apache.zeppelin.hbase.HbaseInterpreter,org.apache.zeppelin.livy.LivySparkInterpreter,org.apache.zeppelin.livy.LivyPySparkInterpreter,org.apache.zeppelin.livy.LivySparkRInterpreter,org.apache.zeppelin.livy.LivySparkSQLInterpreter,org.apache.zeppelin.bigquery.BigQueryInterpreter,org.apache.zeppelin.beam.BeamInterpreter,org.apache.zeppelin.pig.PigInterpreter, org.apache.zeppelin.pig.PigQueryInterpreter</value>
<description>Comma separated interpreter configurations. First interpreter become a default</description>
</property>

View file

@ -62,6 +62,7 @@
<li><a href="{{BASE_PATH}}/interpreter/lens.html">Lens</a></li>
<li><a href="{{BASE_PATH}}/interpreter/livy.html">Livy</a></li>
<li><a href="{{BASE_PATH}}/interpreter/markdown.html">Markdown</a></li>
<li><a href="{{BASE_PATH}}/interpreter/pig.html">Pig</a></li>
<li><a href="{{BASE_PATH}}/interpreter/python.html">Python</a></li>
<li><a href="{{BASE_PATH}}/interpreter/postgresql.html">Postgresql, HAWQ</a></li>
<li><a href="{{BASE_PATH}}/interpreter/r.html">R</a></li>
@ -118,8 +119,6 @@
<li><a href="{{BASE_PATH}}/development/howtocontributewebsite.html">How to contribute (website)</a></li>
</ul>
</li>
</ul>
<ul class="nav navbar-nav">
<li>
<a href="{{BASE_PATH}}/search.html" class="nav-search-link">
<span class="fa fa-search nav-search-icon"></span>

View file

@ -619,6 +619,10 @@ and (max-width: 1024px) {
.navbar-collapse.collapse {
padding-right: 0;
}
.navbar-fixed-top > .container {
width: 800px;
}
}
/* master branch docs dropdown menu */

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 320 KiB

After

Width:  |  Height:  |  Size: 364 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 199 KiB

After

Width:  |  Height:  |  Size: 280 KiB

View file

@ -62,7 +62,7 @@ When you are ready, just make a pull-request.
## Alternative way
You can directly edit `.md` files in `/docs/` directory at the web interface of github and make pull-request immediatly.
You can directly edit `.md` files in `/docs/` directory at the web interface of github and make pull-request immediately.
## Stay involved
Contributors should join the Zeppelin mailing lists.

View file

@ -34,7 +34,7 @@ Interpreters in the same InterpreterGroup can reference each other. For example,
[InterpreterSetting](https://github.com/apache/zeppelin/blob/master/zeppelin-zengine/src/main/java/org/apache/zeppelin/interpreter/InterpreterSetting.java) is configuration of a given [InterpreterGroup](https://github.com/apache/zeppelin/blob/master/zeppelin-interpreter/src/main/java/org/apache/zeppelin/interpreter/InterpreterGroup.java) and a unit of start/stop interpreter.
All Interpreters in the same InterpreterSetting are launched in a single, separate JVM process. The Interpreter communicates with Zeppelin engine via **[Thrift](https://github.com/apache/zeppelin/blob/master/zeppelin-interpreter/src/main/thrift/RemoteInterpreterService.thrift)**.
In 'Separate Interpreter(scoped / isolated) for each note' mode which you can see at the **Interpreter Setting** menu when you create a new interpreter, new interpreter instance will be created per notebook. But it still runs on the same JVM while they're in the same InterpreterSettings.
In 'Separate Interpreter(scoped / isolated) for each note' mode which you can see at the **Interpreter Setting** menu when you create a new interpreter, new interpreter instance will be created per note. But it still runs on the same JVM while they're in the same InterpreterSettings.
## Make your own Interpreter
@ -48,7 +48,7 @@ There are three locations where you can store your interpreter group, name and o
{ZEPPELIN_INTERPRETER_DIR}/{YOUR_OWN_INTERPRETER_DIR}/interpreter-setting.json
```
Here is an example of `interpreter-setting.json` on your own interpreter. Note that if you don't specify editor object, your interpreter will use plain text mode for syntax highlighting.
Here is an example of `interpreter-setting.json` on your own interpreter.
```json
[
@ -71,7 +71,8 @@ Here is an example of `interpreter-setting.json` on your own interpreter. Note t
}, ...
},
"editor": {
"language": "your-syntax-highlight-language"
"language": "your-syntax-highlight-language",
"editOnDblClick": false
}
},
{
@ -98,15 +99,18 @@ The name of the interpreter is what you later write to identify a paragraph whic
some interpreter specific code...
```
## Programming Languages for Interpreter
If the interpreter uses a specific programming language (like Scala, Python, SQL), it is generally recommended to add a syntax highlighting supported for that to the notebook paragraph editor.
## Editor setting for Interpreter
You can add `editor` object to `interpreter-setting.json` file to specify paragraph editor settings.
To check out the list of languages supported, see the `mode-*.js` files under `zeppelin-web/bower_components/ace-builds/src-noconflict` or from [github.com/ajaxorg/ace-builds](https://github.com/ajaxorg/ace-builds/tree/master/src-noconflict).
### Language
If the interpreter uses a specific programming language (like Scala, Python, SQL), it is generally recommended to add a syntax highlighting supported for that to the note paragraph editor.
To check out the list of languages supported, see the `mode-*.js` files under `zeppelin-web/bower_components/ace-builds/src-noconflict` or from [github.com/ajaxorg/ace-builds](https://github.com/ajaxorg/ace-builds/tree/master/src-noconflict).
If you want to add a new set of syntax highlighting,
1. Add the `mode-*.js` file to <code>[zeppelin-web/bower.json](https://github.com/apache/zeppelin/blob/master/zeppelin-web/bower.json)</code> ( when built, <code>[zeppelin-web/src/index.html](https://github.com/apache/zeppelin/blob/master/zeppelin-web/src/index.html)</code> will be changed automatically. ).
2. Add `editor` object to `interpreter-setting.json` file. If you want to set your language to `java` for example, add:
1. Add the `mode-*.js` file to <code>[zeppelin-web/bower.json](https://github.com/apache/zeppelin/blob/master/zeppelin-web/bower.json)</code> (when built, <code>[zeppelin-web/src/index.html](https://github.com/apache/zeppelin/blob/master/zeppelin-web/src/index.html)</code> will be changed automatically).
2. Add `language` field to `editor` object. Note that if you don't specify language field, your interpreter will use plain text mode for syntax highlighting. Let's say you want to set your language to `java`, then add:
```
"editor": {
@ -114,6 +118,14 @@ If you want to add a new set of syntax highlighting,
}
```
### Edit on double click
If your interpreter uses mark-up language such as markdown or HTML, set `editOnDblClick` to `true` so that text editor opens on pargraph double click and closes on paragraph run. Otherwise set it to `false`.
```
"editor": {
"editOnDblClick": false
}
```
## Install your interpreter binary
Once you have built your interpreter, you can place it under the interpreter directory with all its dependencies.
@ -150,7 +162,7 @@ Now you are done and ready to use your interpreter.
## Use your interpreter
### 0.5.0
Inside of a notebook, `%[INTERPRETER_NAME]` directive will call your interpreter.
Inside of a note, `%[INTERPRETER_NAME]` directive will call your interpreter.
Note that the first interpreter configuration in zeppelin.interpreters will be the default one.
For example,
@ -163,7 +175,7 @@ println(a)
```
### 0.6.0 and later
Inside of a notebook, `%[INTERPRETER_GROUP].[INTERPRETER_NAME]` directive will call your interpreter.
Inside of a note, `%[INTERPRETER_GROUP].[INTERPRETER_NAME]` directive will call your interpreter.
You can omit either [INTERPRETER\_GROUP] or [INTERPRETER\_NAME]. If you omit [INTERPRETER\_NAME], then first available interpreter will be selected in the [INTERPRETER\_GROUP].
Likewise, if you skip [INTERPRETER\_GROUP], then [INTERPRETER\_NAME] will be chosen from default interpreter group.
@ -216,7 +228,7 @@ Checkout some interpreters released with Zeppelin by default.
We welcome contribution to a new interpreter. Please follow these few steps:
- First, check out the general contribution guide [here](https://zeppelin.apache.org/contribution/contributions.html).
- Follow the steps in [Make your own Interpreter](#make-your-own-interpreter) section above.
- Follow the steps in [Make your own Interpreter](#make-your-own-interpreter) section and [Editor setting for Interpreter](#editor-setting-for-interpreter) above.
- Add your interpreter as in the [Configure your interpreter](#configure-your-interpreter) section above; also add it to the example template [zeppelin-site.xml.template](https://github.com/apache/zeppelin/blob/master/conf/zeppelin-site.xml.template).
- Add tests! They are run by [Travis](https://travis-ci.org/apache/zeppelin) for all changes and it is important that they are self-contained.
- Include your interpreter as a module in [`pom.xml`](https://github.com/apache/zeppelin/blob/master/pom.xml).

View file

@ -25,7 +25,7 @@ limitations under the License.
## Text
By default, Apache Zeppelin prints interpreter responce as a plain text using `text` display system.
By default, Apache Zeppelin prints interpreter response as a plain text using `text` display system.
<img src="/assets/themes/zeppelin/img/screenshots/display_text.png" />
@ -41,7 +41,7 @@ With `%html` directive, Zeppelin treats your output as HTML
## Table
If you have data that row seprated by '\n' (newline) and column separated by '\t' (tab) with first row as header row, for example
If you have data that row separated by '\n' (newline) and column separated by '\t' (tab) with first row as header row, for example
<img src="/assets/themes/zeppelin/img/screenshots/display_table.png" />

View file

@ -55,7 +55,7 @@ Stable binary packages are available on the [Apache Zeppelin Download Page](http
If you downloaded the default package, just unpack it in a directory of your choice and you're ready to go. If you downloaded the *net-install* package, you should manually [install additional interpreters](../manual/interpreterinstallation.html) first. You can also install everything by running `./bin/install-interpreter.sh --all`.
After unpacking, jump to the [Starting Apache Zeppelin with Command Line](#starting-apache-zeppelin-with-command-line).
After unpacking, jump to the [Starting Apache Zeppelin from Command Line](#starting-apache-zeppelin-from-the-command-line).
### Building from Source
@ -178,7 +178,7 @@ chdir /usr/share/zeppelin
exec bin/zeppelin-daemon.sh upstart
```
## Next Steps:
## Next Steps
Congratulations, you have successfully installed Apache Zeppelin! Here are two next steps you might find useful:
@ -188,7 +188,7 @@ Congratulations, you have successfully installed Apache Zeppelin! Here are two n
* If you need more configuration for Apache Zeppelin, jump to the next section: [Apache Zeppelin Configuration](#apache-zeppelin-configuration).
#### If you need more information about Spark or JDBC interpreter settings...
* Apache Zeppelin provides deep integration with [Apache Spark](http://spark.apache.org/). For more informtation, see [Spark Interpreter for Apache Zeppelin](../interpreter/spark.html).
* Apache Zeppelin provides deep integration with [Apache Spark](http://spark.apache.org/). For more information, see [Spark Interpreter for Apache Zeppelin](../interpreter/spark.html).
* You can also use generic JDBC connections in Apache Zeppelin. Go to [Generic JDBC Interpreter for Apache Zeppelin](../interpreter/jdbc.html).
#### If you are in a multi-user environment...
@ -311,13 +311,13 @@ You can configure Apache Zeppelin with either **environment variables** in `conf
<td>ZEPPELIN_NOTEBOOK_HOMESCREEN</td>
<td>zeppelin.notebook.homescreen</td>
<td></td>
<td>Display notebook IDs on the Apache Zeppelin homescreen <br />i.e. 2A94M5J1Z</td>
<td>Display note IDs on the Apache Zeppelin homescreen <br />i.e. 2A94M5J1Z</td>
</tr>
<tr>
<td>ZEPPELIN_NOTEBOOK_HOMESCREEN_HIDE</td>
<td>zeppelin.notebook.homescreen.hide</td>
<td>false</td>
<td>Hide the notebook ID set by <code>ZEPPELIN_NOTEBOOK_HOMESCREEN</code> on the Apache Zeppelin homescreen. <br />For the further information, please read <a href="../manual/notebookashomepage.html">Customize your Zeppelin homepage</a>.</td>
<td>Hide the note ID set by <code>ZEPPELIN_NOTEBOOK_HOMESCREEN</code> on the Apache Zeppelin homescreen. <br />For the further information, please read <a href="../manual/notebookashomepage.html">Customize your Zeppelin homepage</a>.</td>
</tr>
<tr>
<td>ZEPPELIN_WAR_TEMPDIR</td>

View file

@ -52,3 +52,4 @@ So, copying `notebook` and `conf` directory should be enough.
- From 0.7, we don't use `ZEPPELIN_JAVA_OPTS` as default value of `ZEPPELIN_INTP_JAVA_OPTS` and also the same for `ZEPPELIN_MEM`/`ZEPPELIN_INTP_MEM`. If user want to configure the jvm opts of interpreter process, please set `ZEPPELIN_INTP_JAVA_OPTS` and `ZEPPELIN_INTP_MEM` explicitly. If you don't set `ZEPPELIN_INTP_MEM`, Zeppelin will set it to `-Xms1024m -Xmx1024m -XX:MaxPermSize=512m` by default.
- Mapping from `%jdbc(prefix)` to `%prefix` is no longer available. Instead, you can use %[interpreter alias] with multiple interpreter setttings on GUI.
- Usage of `ZEPPELIN_PORT` is not supported in ssl mode. Instead use `ZEPPELIN_SSL_PORT` to configure the ssl port. Value from `ZEPPELIN_PORT` is used only when `ZEPPELIN_SSL` is set to `false`.
- The support on Spark 1.1.x to 1.3.x is deprecated.

View file

@ -75,7 +75,7 @@ into a directory on your host machine, or directly in your virtual machine.
Cloning Zeppelin into the `/scripts/vagrant/zeppelin-dev` directory from the host, will allow the directory to be shared between your host and the guest machine.
Cloning the project again may seem counter intuitive, since this script likley originated from the project repository. Consider copying just the vagrant/zeppelin-dev script from the Zeppelin project as a stand alone directory, then once again clone the specific branch you wish to build.
Cloning the project again may seem counter intuitive, since this script likely originated from the project repository. Consider copying just the vagrant/zeppelin-dev script from the Zeppelin project as a stand alone directory, then once again clone the specific branch you wish to build.
Synced folders enable Vagrant to sync a folder on the host machine to the guest machine, allowing you to continue working on your project's files on your host machine, but use the resources in the guest machine to compile or run your project. _[(1) Synced Folder Description from Vagrant Up](https://docs.vagrantup.com/v2/synced-folders/index.html)_
@ -163,7 +163,7 @@ import matplotlib.pyplot as plt
import numpy as np
import StringIO
# clear out any previous plots on this notebook
# clear out any previous plots on this note
plt.clf()
def show(p):

97
docs/interpreter/pig.md Normal file
View file

@ -0,0 +1,97 @@
---
layout: page
title: "Pig Interpreter for Apache Zeppelin"
description: "Apache Pig is a platform for analyzing large data sets that consists of a high-level language for expressing data analysis programs, coupled with infrastructure for evaluating these programs."
group: manual
---
{% include JB/setup %}
# Pig Interpreter for Apache Zeppelin
<div id="toc"></div>
## Overview
[Apache Pig](https://pig.apache.org/) is a platform for analyzing large data sets that consists of a high-level language for expressing data analysis programs, coupled with infrastructure for evaluating these programs. The salient property of Pig programs is that their structure is amenable to substantial parallelization, which in turns enables them to handle very large data sets.
## Supported interpreter type
- `%pig.script` (default)
All the pig script can run in this type of interpreter, and display type is plain text.
- `%pig.query`
Almost the same as `%pig.script`. The only difference is that you don't need to add alias in the last statement. And the display type is table.
## Supported runtime mode
- Local
- MapReduce
- Tez (Only Tez 0.7 is supported)
## How to use
### How to setup Pig
- Local Mode
Nothing needs to be done for local mode
- MapReduce Mode
HADOOP\_CONF\_DIR needs to be specified in `ZEPPELIN_HOME/conf/zeppelin-env.sh`.
- Tez Mode
HADOOP\_CONF\_DIR and TEZ\_CONF\_DIR needs to be specified in `ZEPPELIN_HOME/conf/zeppelin-env.sh`.
### How to configure interpreter
At the Interpreters menu, you have to create a new Pig interpreter. Pig interpreter has below properties by default.
<table class="table-configuration">
<tr>
<th>Property</th>
<th>Default</th>
<th>Description</th>
</tr>
<tr>
<td>zeppelin.pig.execType</td>
<td>mapreduce</td>
<td>Execution mode for pig runtime. local | mapreduce | tez </td>
</tr>
<tr>
<td>zeppelin.pig.includeJobStats</td>
<td>false</td>
<td>whether display jobStats info in <code>%pig.script</code></td>
</tr>
<tr>
<td>zeppelin.pig.maxResult</td>
<td>1000</td>
<td>max row number displayed in <code>%pig.query</code></td>
</tr>
</table>
### Example
##### pig
```
%pig
raw_data = load 'dataset/sf_crime/train.csv' using PigStorage(',') as (Dates,Category,Descript,DayOfWeek,PdDistrict,Resolution,Address,X,Y);
b = group raw_data all;
c = foreach b generate COUNT($1);
dump c;
```
##### pig.query
```
%pig.query
b = foreach raw_data generate Category;
c = group b by Category;
foreach c generate group as category, COUNT($1) as count;
```
Data is shared between `%pig` and `%pig.query`, so that you can do some common work in `%pig`, and do different kinds of query based on the data of `%pig`.

View file

@ -86,9 +86,26 @@ print("".join(z.checkbox("f3", [("o1","1"), ("o2","2")],["1"])))
* Code-completion is currently not implemented.
## Matplotlib integration
The python interpreter can display matplotlib graph with the function `z.show()`.
You need to have matplotlib module installed and a XServer running to use this functionality!
The python interpreter can display matplotlib figures inline automatically using the `pyplot` module:
```python
%python
import matplotlib.pyplot as plt
plt.plot([1, 2, 3])
```
This is the recommended method for using matplotlib from within a Zeppelin notebook. The output of this command will by default be converted to HTML by implicitly making use of the `%html` magic. Additional configuration can be achieved using the builtin `z.configure_mpl()` method. For example,
```python
z.configure_mpl(width=400, height=300, fmt='svg')
plt.plot([1, 2, 3])
```
Will produce a 400x300 image in SVG format, which by default are normally 600x400 and PNG respectively. In the future, another option called `angular` can be used to make it possible to update a plot produced from one paragraph directly from another (the output will be `%angular` instead of `%html`). However, this feature is already available in the `pyspark` interpreter. More details can be found in the included "Zeppelin Tutorial: Python - matplotlib basic" tutorial notebook.
If Zeppelin cannot find the matplotlib backend files (which should usually be found in `$ZEPPELIN_HOME/interpreter/lib/python`) in your `PYTHONPATH`, then the backend will automatically be set to agg, and the (otherwise deprecated) instructions below can be used for more limited inline plotting.
If you are unable to load the inline backend, use `z.show(plt)`:
```python
%python
import matplotlib.pyplot as plt

View file

@ -363,6 +363,11 @@ select * from ${table=defaultTableName} where text like '%${search}%'
To learn more about dynamic form, checkout [Dynamic Form](../manual/dynamicform.html).
## Matplotlib Integration (pyspark)
Both the `python` and `pyspark` interpreters have built-in support for inline visualization using `matplotlib`, a popular plotting library for python. More details can be found in the [python interpreter documentation](../interpreter/python.html), since matplotlib support is identical. More advanced interactive plotting can be done with pyspark through utilizing Zeppelin's built-in [Angular Display System](../displaysystem/back-end-angular.html), as shown below:
<img class="img-responsive" src="../assets/themes/zeppelin/img/docs-img/matplotlibAngularExample.gif" />
## Interpreter setting option
You can choose one of `shared`, `scoped` and `isolated` options wheh you configure Spark interpreter. Spark interpreter creates separated Scala compiler per each notebook but share a single SparkContext in `scoped` mode (experimental). It creates separated SparkContext per each notebook in `isolated` mode.

View file

@ -85,7 +85,7 @@ If you install one of these interpreters only with `--name` option, installer wi
```
#### Install Spark interpreter built with Scala 2.10
Spark distribution package has been built with Scala 2.10 until 1.6.2. If you have `SPARK_HOME` set pointing to Spark version ealier than 2.0.0, you need to download Spark interpreter packaged with Scala 2.10. To do so, use follow command:
Spark distribution package has been built with Scala 2.10 until 1.6.2. If you have `SPARK_HOME` set pointing to Spark version earlier than 2.0.0, you need to download Spark interpreter packaged with Scala 2.10. To do so, use follow command:
```
rm -rf ./interpreter/spark

View file

@ -79,6 +79,52 @@ interpreter.start()
```
The above code will start interpreter thread inside your process. Once the interpreter is started you can configure zeppelin to connect to RemoteInterpreter by checking **Connect to existing process** checkbox and then provide **Host** and **Port** on which interpreter porocess is listening as shown in the image below:
The above code will start interpreter thread inside your process. Once the interpreter is started you can configure zeppelin to connect to RemoteInterpreter by checking **Connect to existing process** checkbox and then provide **Host** and **Port** on which interpreter process is listening as shown in the image below:
<img src="../assets/themes/zeppelin/img/screenshots/existing_interpreter.png" width="450px">
## (Experimental) Interpreter Execution Hooks
Zeppelin allows for users to specify additional code to be executed by an interpreter at pre and post-paragraph code execution. This is primarily useful if you need to run the same set of code for all of the paragraphs within your notebook at specific times. Currently, this feature is only available for the spark and pyspark interpreters. To specify your hook code, you may use '`z.registerHook()`. For example, enter the following into one paragraph:
```python
%pyspark
z.registerHook("post_exec", "print 'This code should be executed before the parapgraph code!'")
z.registerHook("pre_exec", "print 'This code should be executed after the paragraph code!'")
```
These calls will not take into effect until the next time you run a paragraph. In another paragraph, enter
```python
%pyspark
print "This code should be entered into the paragraph by the user!"
```
The output should be:
```
This code should be executed before the paragraph code!
This code should be entered into the paragraph by the user!
This code should be executed after the paragraph code!
```
If you ever need to know the hook code, use `z.getHook()`:
```python
%pyspark
print z.getHook("post_exec")
```
```
print 'This code should be executed after the paragraph code!'
```
Any call to `z.registerHook()` will automatically overwrite what was previously registered. To completely unregister a hook event, use `z.unregisterHook(eventCode)`. Currently only `"post_exec"` and `"pre_exec"` are valid event codes for the Zeppelin Hook Registry system.
Finally, the hook registry is internally shared by other interpreters in the same group. This would allow for hook code for one interpreter REPL to be set by another as follows:
```scala
%spark
z.unregisterHook("post_exec", "pyspark")
```
The API is identical for both the spark (scala) and pyspark (python) implementations.
### Caveats
Calls to `z.registerHook("pre_exec", ...)` should be made with care. If there are errors in your specified hook code, this will cause the interpreter REPL to become unable to execute any code pass the pre-execute stage making it impossible for direct calls to `z.unregisterHook()` to take into effect. Current workarounds include calling `z.unregisterHook()` from a different interpreter REPL in the same interpreter group (see above) or manually restarting the interpreter group in the UI.

View file

@ -1,7 +1,7 @@
---
layout: page
title: "Customize Apache Zeppelin homepage"
description: "Apache Zeppelin allows you to use one of the notebooks you create as your Zeppelin Homepage. With that you can brand your Zeppelin installation, adjust the instruction to your users needs and even translate to other languages."
description: "Apache Zeppelin allows you to use one of the notes you create as your Zeppelin Homepage. With that you can brand your Zeppelin installation, adjust the instruction to your users needs and even translate to other languages."
group: manual
---
<!--
@ -23,53 +23,53 @@ limitations under the License.
<div id="toc"></div>
Apache Zeppelin allows you to use one of the notebooks you create as your Zeppelin Homepage.
Apache Zeppelin allows you to use one of the notes you create as your Zeppelin Homepage.
With that you can brand your Zeppelin installation, adjust the instruction to your users needs and even translate to other languages.
## How to set a notebook as your Zeppelin homepage
## How to set a note as your Zeppelin homepage
The process for creating your homepage is very simple as shown below:
1. Create a notebook using Zeppelin
2. Set the notebook id in the config file
1. Create a note using Zeppelin
2. Set the note id in the config file
3. Restart Zeppelin
### Create a notebook using Zeppelin
Create a new notebook using Zeppelin,
### Create a note using Zeppelin
Create a new note using Zeppelin,
you can use ```%md``` interpreter for markdown content or any other interpreter you like.
You can also use the display system to generate [text](../displaysystem/basicdisplaysystem.html#text), [html](../displaysystem/basicdisplaysystem.html#html), [table](../displaysystem/basicdisplaysystem.html#table) or
Angular ([backend API](../displaysystem/back-end-angular.html), [frontend API](../displaysystem/front-end-angular.html)).
Run (shift+Enter) the notebook and see the output. Optionally, change the notebook view to report to hide
Run (shift+Enter) the note and see the output. Optionally, change the note view to report to hide
the code sections.
### Set the notebook id in the config file
To set the notebook id in the config file, you should copy it from the last word in the notebook url.
### Set the note id in the config file
To set the note id in the config file, you should copy it from the last word in the note url.
For example,
<img src="/assets/themes/zeppelin/img/screenshots/homepage_notebook_id.png" width="400px" />
Set the notebook id to the ```ZEPPELIN_NOTEBOOK_HOMESCREEN``` environment variable
Set the note id to the ```ZEPPELIN_NOTEBOOK_HOMESCREEN``` environment variable
or ```zeppelin.notebook.homescreen``` property.
You can also set the ```ZEPPELIN_NOTEBOOK_HOMESCREEN_HIDE``` environment variable
or ```zeppelin.notebook.homescreen.hide``` property to hide the new notebook from the notebook list.
or ```zeppelin.notebook.homescreen.hide``` property to hide the new note from the note list.
### Restart Zeppelin
Restart your Zeppelin server
```
./bin/zeppelin-deamon stop
./bin/zeppelin-deamon start
./bin/zeppelin-daemon stop
./bin/zeppelin-daemon start
```
That's it! Open your browser and navigate to Apache Zeppelin and see your customized homepage.
<br />
## Show notebooks list in your custom homepage
If you want to display the list of notebooks on your custom Apache Zeppelin homepage all
## Show notes list in your custom homepage
If you want to display the list of notes on your custom Apache Zeppelin homepage all
you need to do is use our %angular support.
Add the following code to a paragraph in you home page and run it... walla! you have your notebooks list.
Add the following code to a paragraph in you home page and run it... Voila! You have your notes list.
```javascript
println(
@ -89,7 +89,7 @@ println(
""")
```
After running the notebook you will see output similar to this one:
After running the note you will see output similar to this one:
<img src="/assets/themes/zeppelin/img/screenshots/homepage_notebook_list.png" />
The main trick here relays in linking the ```<div>``` to the controller:
@ -99,4 +99,4 @@ The main trick here relays in linking the ```<div>``` to the controller:
```
Once we have ```home``` as our controller variable in our ```<div></div>```
we can use ```home.notes.list``` to get access to the notebook list.
we can use ```home.notes.list``` to get access to the note list.

View file

@ -18,6 +18,12 @@ See the License for the specific language governing permissions and
limitations under the License.
-->
{% include JB/setup %}
# Install with flink and spark cluster
<div id="toc"></div>
This tutorial is extremely entry-level. It assumes no prior knowledge of Linux, git, or other tools. If you carefully type what I tell you when I tell you, you should be able to get Zeppelin running.
## Installing Zeppelin with Flink and Spark in cluster mode

View file

@ -400,6 +400,16 @@ The role of registered interpreters, settings and interpreters group are describ
<td>Fail code</td>
<td> 500 </td>
</tr>
<tr>
<td>Sample JSON input (Optional)</td>
<td>
<pre>
{
"noteId": "2AVQJVC8N"
}
</pre>
</td>
</tr>
<tr>
<td>Sample JSON response</td>
<td>

View file

@ -35,13 +35,13 @@ If you work with Apache Zeppelin and find a need for an additional REST API, ple
Notebooks REST API supports the following operations: List, Create, Get, Delete, Clone, Run, Export, Import as detailed in the following tables.
### List of the notebooks
### List of the notes
<table class="table-configuration">
<col width="200">
<tr>
<td>Description</td>
<td>This ```GET``` method lists the available notebooks on your server.
Notebook JSON contains the ```name``` and ```id``` of all notebooks.
<td>This ```GET``` method lists the available notes on your server.
Notebook JSON contains the ```name``` and ```id``` of all notes.
</td>
</tr>
<tr>
@ -77,13 +77,13 @@ If you work with Apache Zeppelin and find a need for an additional REST API, ple
</table>
<br/>
### Create a new notebook
### Create a new note
<table class="table-configuration">
<col width="200">
<tr>
<td>Description</td>
<td>This ```POST``` method creates a new notebook using the given name or default name if none given.
The body field of the returned JSON contains the new notebook id.
<td>This ```POST``` method creates a new note using the given name or default name if none given.
The body field of the returned JSON contains the new note id.
</td>
</tr>
<tr>
@ -100,13 +100,13 @@ If you work with Apache Zeppelin and find a need for an additional REST API, ple
</tr>
<tr>
<td> sample JSON input (without paragraphs) </td>
<td><pre>{"name": "name of new notebook"}</pre></td>
<td><pre>{"name": "name of new note"}</pre></td>
</tr>
<tr>
<td> sample JSON input (with initial paragraphs) </td>
<td><pre>
{
"name": "name of new notebook",
"name": "name of new note",
"paragraphs": [
{
"title": "paragraph title1",
@ -131,18 +131,18 @@ If you work with Apache Zeppelin and find a need for an additional REST API, ple
</table>
<br/>
### Get an existing notebook information
### Get an existing note information
<table class="table-configuration">
<col width="200">
<tr>
<td>Description</td>
<td>This ```GET``` method retrieves an existing notebook's information using the given id.
The body field of the returned JSON contain information about paragraphs in the notebook.
<td>This ```GET``` method retrieves an existing note's information using the given id.
The body field of the returned JSON contain information about paragraphs in the note.
</td>
</tr>
<tr>
<td>URL</td>
<td>```http://[zeppelin-server]:[zeppelin-port]/api/notebook/[notebookId]```</td>
<td>```http://[zeppelin-server]:[zeppelin-port]/api/notebook/[noteId]```</td>
</tr>
<tr>
<td>Success code</td>
@ -228,17 +228,17 @@ If you work with Apache Zeppelin and find a need for an additional REST API, ple
</table>
<br/>
### Delete a notebook
### Delete a note
<table class="table-configuration">
<col width="200">
<tr>
<td>Description</td>
<td>This ```DELETE``` method deletes a notebook by the given notebook id.
<td>This ```DELETE``` method deletes a note by the given note id.
</td>
</tr>
<tr>
<td>URL</td>
<td>```http://[zeppelin-server]:[zeppelin-port]/api/notebook/[notebookId]```</td>
<td>```http://[zeppelin-server]:[zeppelin-port]/api/notebook/[noteId]```</td>
</tr>
<tr>
<td>Success code</td>
@ -255,19 +255,19 @@ If you work with Apache Zeppelin and find a need for an additional REST API, ple
</table>
<br/>
### Clone a notebook
### Clone a note
<table class="table-configuration">
<col width="200">
<tr>
<td>Description</td>
<td>This ```POST``` method clones a notebook by the given id and create a new notebook using the given name
<td>This ```POST``` method clones a note by the given id and create a new note using the given name
or default name if none given.
The body field of the returned JSON contains the new notebook id.
The body field of the returned JSON contains the new note id.
</td>
</tr>
<tr>
<td>URL</td>
<td>```http://[zeppelin-server]:[zeppelin-port]/api/notebook/[notebookId]```</td>
<td>```http://[zeppelin-server]:[zeppelin-port]/api/notebook/[noteId]```</td>
</tr>
<tr>
<td>Success code</td>
@ -279,7 +279,7 @@ If you work with Apache Zeppelin and find a need for an additional REST API, ple
</tr>
<tr>
<td> sample JSON input </td>
<td><pre>{"name": "name of new notebook"}</pre></td>
<td><pre>{"name": "name of new note"}</pre></td>
</tr>
<tr>
<td> sample JSON response </td>
@ -299,14 +299,14 @@ If you work with Apache Zeppelin and find a need for an additional REST API, ple
<tr>
<td>Description</td>
<td>
This ```POST``` method runs all paragraphs in the given notebook id. <br />
If you can not find Notebook id 404 returns.
This ```POST``` method runs all paragraphs in the given note id. <br />
If you can not find Note id 404 returns.
If there is a problem with the interpreter returns a 412 error.
</td>
</tr>
<tr>
<td>URL</td>
<td>```http://[zeppelin-server]:[zeppelin-port]/api/notebook/job/[notebookId]```</td>
<td>```http://[zeppelin-server]:[zeppelin-port]/api/notebook/job/[noteId]```</td>
</tr>
<tr>
<td>Success code</td>
@ -345,12 +345,12 @@ If you work with Apache Zeppelin and find a need for an additional REST API, ple
<col width="200">
<tr>
<td>Description</td>
<td>This ```DELETE``` method stops all paragraphs in the given notebook id.
<td>This ```DELETE``` method stops all paragraphs in the given note id.
</td>
</tr>
<tr>
<td>URL</td>
<td>```http://[zeppelin-server]:[zeppelin-port]/api/notebook/job/[notebookId]```</td>
<td>```http://[zeppelin-server]:[zeppelin-port]/api/notebook/job/[noteId]```</td>
</tr>
<tr>
<td>Success code</td>
@ -372,13 +372,13 @@ If you work with Apache Zeppelin and find a need for an additional REST API, ple
<col width="200">
<tr>
<td>Description</td>
<td>This ```GET``` method gets the status of all paragraphs by the given notebook id.
<td>This ```GET``` method gets the status of all paragraphs by the given note id.
The body field of the returned JSON contains of the array that compose of the paragraph id, paragraph status, paragraph finish date, paragraph started date.
</td>
</tr>
<tr>
<td>URL</td>
<td>```http://[zeppelin-server]:[zeppelin-port]/api/notebook/job/[notebookId]```</td>
<td>```http://[zeppelin-server]:[zeppelin-port]/api/notebook/job/[noteId]```</td>
</tr>
<tr>
<td>Success code</td>
@ -418,13 +418,13 @@ If you work with Apache Zeppelin and find a need for an additional REST API, ple
<col width="200">
<tr>
<td>Description</td>
<td>This ```GET``` method gets the status of a single paragraph by the given notebook and paragraph id.
<td>This ```GET``` method gets the status of a single paragraph by the given note and paragraph id.
The body field of the returned JSON contains of the array that compose of the paragraph id, paragraph status, paragraph finish date, paragraph started date.
</td>
</tr>
<tr>
<td>URL</td>
<td>```http://[zeppelin-server]:[zeppelin-port]/api/notebook/job/[notebookId]/[paragraphId]```</td>
<td>```http://[zeppelin-server]:[zeppelin-port]/api/notebook/job/[noteId]/[paragraphId]```</td>
</tr>
<tr>
<td>Success code</td>
@ -455,12 +455,12 @@ If you work with Apache Zeppelin and find a need for an additional REST API, ple
<col width="200">
<tr>
<td>Description</td>
<td>This ```POST``` method runs the paragraph asynchronously by given notebook and paragraph id. This API always return SUCCESS even if the execution of the paragraph fails later because the API is asynchronous
<td>This ```POST``` method runs the paragraph asynchronously by given note and paragraph id. This API always return SUCCESS even if the execution of the paragraph fails later because the API is asynchronous
</td>
</tr>
<tr>
<td>URL</td>
<td>```http://[zeppelin-server]:[zeppelin-port]/api/notebook/job/[notebookId]/[paragraphId]```</td>
<td>```http://[zeppelin-server]:[zeppelin-port]/api/notebook/job/[noteId]/[paragraphId]```</td>
</tr>
<tr>
<td>Success code</td>
@ -474,7 +474,7 @@ If you work with Apache Zeppelin and find a need for an additional REST API, ple
<td> sample JSON input (optional, only needed when if you want to update dynamic form's value) </td>
<td><pre>
{
"name": "name of new notebook",
"name": "name of new note",
"params": {
"formLabel1": "value1",
"formLabel2": "value2"
@ -493,12 +493,12 @@ If you work with Apache Zeppelin and find a need for an additional REST API, ple
<col width="200">
<tr>
<td>Description</td>
<td> This ```POST``` method runs the paragraph synchronously by given notebook and paragraph id. This API can return SUCCESS or ERROR depending on the outcome of the paragraph execution
<td>This ```POST``` method runs the paragraph synchronously by given note and paragraph id. This API can return SUCCESS or ERROR depending on the outcome of the paragraph execution
</td>
</tr>
<tr>
<td>URL</td>
<td>```http://[zeppelin-server]:[zeppelin-port]/api/notebook/job/[notebookId]/[paragraphId]```</td>
<td>```http://[zeppelin-server]:[zeppelin-port]/api/notebook/run/[noteId]/[paragraphId]```</td>
</tr>
<tr>
<td>Success code</td>
@ -512,7 +512,7 @@ If you work with Apache Zeppelin and find a need for an additional REST API, ple
<td> sample JSON input (optional, only needed when if you want to update dynamic form's value) </td>
<td><pre>
{
"name": "name of new notebook",
"name": "name of new note",
"params": {
"formLabel1": "value1",
"formLabel2": "value2"
@ -543,12 +543,12 @@ If you work with Apache Zeppelin and find a need for an additional REST API, ple
<col width="200">
<tr>
<td>Description</td>
<td>This ```DELETE``` method stops the paragraph by given notebook and paragraph id.
<td>This ```DELETE``` method stops the paragraph by given note and paragraph id.
</td>
</tr>
<tr>
<td>URL</td>
<td>```http://[zeppelin-server]:[zeppelin-port]/api/notebook/job/[notebookId]/[paragraphId]```</td>
<td>```http://[zeppelin-server]:[zeppelin-port]/api/notebook/job/[noteId]/[paragraphId]```</td>
</tr>
<tr>
<td>Success code</td>
@ -570,12 +570,12 @@ If you work with Apache Zeppelin and find a need for an additional REST API, ple
<col width="200">
<tr>
<td>Description</td>
<td>This ```POST``` method adds cron job by the given notebook id.
<td>This ```POST``` method adds cron job by the given note id.
</td>
</tr>
<tr>
<td>URL</td>
<td>```http://[zeppelin-server]:[zeppelin-port]/api/notebook/cron/[notebookId]```</td>
<td>```http://[zeppelin-server]:[zeppelin-port]/api/notebook/cron/[noteId]```</td>
</tr>
<tr>
<td>Success code</td>
@ -587,7 +587,7 @@ If you work with Apache Zeppelin and find a need for an additional REST API, ple
</tr>
<tr>
<td> sample JSON input </td>
<td><pre>{"cron": "cron expression of notebook"}</pre></td>
<td><pre>{"cron": "cron expression of note"}</pre></td>
</tr>
<tr>
<td> sample JSON response </td>
@ -602,12 +602,12 @@ If you work with Apache Zeppelin and find a need for an additional REST API, ple
<col width="200">
<tr>
<td>Description</td>
<td>This ```DELETE``` method removes cron job by the given notebook id.
<td>This ```DELETE``` method removes cron job by the given note id.
</td>
</tr>
<tr>
<td>URL</td>
<td>```http://[zeppelin-server]:[zeppelin-port]/api/notebook/cron/[notebookId]```</td>
<td>```http://[zeppelin-server]:[zeppelin-port]/api/notebook/cron/[noteId]```</td>
</tr>
<tr>
<td>Success code</td>
@ -630,13 +630,13 @@ If you work with Apache Zeppelin and find a need for an additional REST API, ple
<col width="200">
<tr>
<td>Description</td>
<td>This ```GET``` method gets cron job expression of given notebook id.
<td>This ```GET``` method gets cron job expression of given note id.
The body field of the returned JSON contains the cron expression.
</td>
</tr>
<tr>
<td>URL</td>
<td>```http://[zeppelin-server]:[zeppelin-port]/api/notebook/cron/[notebookId]```</td>
<td>```http://[zeppelin-server]:[zeppelin-port]/api/notebook/cron/[noteId]```</td>
</tr>
<tr>
<td>Success code</td>
@ -653,7 +653,7 @@ If you work with Apache Zeppelin and find a need for an additional REST API, ple
</table>
<br />
### Full text search through the paragraphs in all notebooks
### Full text search through the paragraphs in all notes
<table class="table-configuration">
<col width="200">
<tr>
@ -681,7 +681,7 @@ If you work with Apache Zeppelin and find a need for an additional REST API, ple
"body": [
{
"id": "<noteId>/paragraph/<paragraphId>",
"name":"Notebook Name",
"name":"Note Name",
"snippet":"",
"text":""
}
@ -702,7 +702,7 @@ If you work with Apache Zeppelin and find a need for an additional REST API, ple
</tr>
<tr>
<td>URL</td>
<td>```http://[zeppelin-server]:[zeppelin-port]/api/notebook/[notebookId]/paragraph```</td>
<td>```http://[zeppelin-server]:[zeppelin-port]/api/notebook/[noteId]/paragraph```</td>
</tr>
<tr>
<td>Success code</td>
@ -752,7 +752,7 @@ If you work with Apache Zeppelin and find a need for an additional REST API, ple
</tr>
<tr>
<td>URL</td>
<td>```http://[zeppelin-server]:[zeppelin-port]/api/notebook/[notebookId]/paragraph/[paragraphId]```</td>
<td>```http://[zeppelin-server]:[zeppelin-port]/api/notebook/[noteId]/paragraph/[paragraphId]```</td>
</tr>
<tr>
<td>Success code</td>
@ -814,12 +814,12 @@ If you work with Apache Zeppelin and find a need for an additional REST API, ple
<col width="200">
<tr>
<td>Description</td>
<td>This ```POST``` method moves a paragraph to the specific index (order) from the notebook.
<td>This ```POST``` method moves a paragraph to the specific index (order) from the note.
</td>
</tr>
<tr>
<td>URL</td>
<td>```http://[zeppelin-server]:[zeppelin-port]/api/notebook/[notebookId]/paragraph/[paragraphId]/move/[newIndex]```</td>
<td>```http://[zeppelin-server]:[zeppelin-port]/api/notebook/[noteId]/paragraph/[paragraphId]/move/[newIndex]```</td>
</tr>
<tr>
<td>Success code</td>
@ -842,12 +842,12 @@ If you work with Apache Zeppelin and find a need for an additional REST API, ple
<col width="200">
<tr>
<td>Description</td>
<td>This ```DELETE``` method deletes a paragraph by the given notebook and paragraph id.
<td>This ```DELETE``` method deletes a paragraph by the given note and paragraph id.
</td>
</tr>
<tr>
<td>URL</td>
<td>```http://[zeppelin-server]:[zeppelin-port]/api/notebook/[notebookId]/paragraph/[paragraphId]```</td>
<td>```http://[zeppelin-server]:[zeppelin-port]/api/notebook/[noteId]/paragraph/[paragraphId]```</td>
</tr>
<tr>
<td>Success code</td>
@ -864,17 +864,17 @@ If you work with Apache Zeppelin and find a need for an additional REST API, ple
</table>
<br />
### Export a notebook
### Export a note
<table class="table-configuration">
<col width="200">
<tr>
<td>Description</td>
<td>This ```GET``` method exports a notebook by the given id and gernerates a JSON
<td>This ```GET``` method exports a note by the given id and gernerates a JSON
</td>
</tr>
<tr>
<td>URL</td>
<td>```http://[zeppelin-server]:[zeppelin-port]/api/notebook/export/[notebookId]```</td>
<td>```http://[zeppelin-server]:[zeppelin-port]/api/notebook/export/[noteId]```</td>
</tr>
<tr>
<td>Success code</td>
@ -914,12 +914,12 @@ If you work with Apache Zeppelin and find a need for an additional REST API, ple
</table>
<br />
### Import a notebook
### Import a note
<table class="table-configuration">
<col width="200">
<tr>
<td>Description</td>
<td>This ```POST``` method imports a notebook from the notebook JSON input
<td>This ```POST``` method imports a note from the note JSON input
</td>
</tr>
<tr>
@ -972,3 +972,39 @@ If you work with Apache Zeppelin and find a need for an additional REST API, ple
</tr>
</tr>
</table>
<br />
### Clear all paragraph result
<table class="table-configuration">
<col width="200">
<tr>
<td>Description</td>
<td>This ```PUT``` method clear all paragraph results from note of given id.
</td>
</tr>
<tr>
<td>URL</td>
<td>```http://[zeppelin-server]:[zeppelin-port]/api/notebook/[noteId]/clear```</td>
</tr>
<tr>
<td>Success code</td>
<td>200</td>
</tr>
<tr>
<td>Forbidden code</td>
<td>401</td>
</tr>
<tr>
<td>Not Found code</td>
<td>404</td>
</tr>
<tr>
<td>Fail code</td>
<td>500</td>
</tr>
<tr>
<td>sample JSON response</td>
<td><pre>{"status": "OK"}</pre></td>
</tr>
</tr>
</table>

View file

@ -85,7 +85,7 @@ This instruction based on Ubuntu 14.04 LTS but may work with other OS with few c
}
location /ws { # For websocket support
proxy_pass http://zeppelin;
proxy_pass http://zeppelin/ws;
proxy_http_version 1.1;
proxy_set_header Upgrade websocket;
proxy_set_header Connection upgrade;
@ -130,4 +130,4 @@ This instruction based on Ubuntu 14.04 LTS but may work with other OS with few c
Another option is to have an authentication server that can verify user credentials in an LDAP server.
If an incoming request to the Zeppelin server does not have a cookie with user information encrypted with the authentication server public key, the user
is redirected to the authentication server. Once the user is verified, the authentication server redirects the browser to a specific URL in the Zeppelin server which sets the authentication cookie in the browser.
The end result is that all requests to the Zeppelin web server have the authentication cookie which contains user and groups information.
The end result is that all requests to the Zeppelin web server have the authentication cookie which contains user and groups information.

View file

@ -31,7 +31,11 @@ When you connect to Apache Zeppelin, you will be asked to enter your credentials
## Security Setup
You can setup **Zeppelin notebook authentication** in some simple steps.
### 1. Secure the HTTP channel
### 1. Enable Shiro
By default in `conf`, you will find `shiro.ini.template`, this file is used as an example and it is strongly recommended
to create a `shiro.ini` file by doing the following command line `cp conf/shiro.ini.template conf/shiro.ini`.
### 2. Secure the HTTP channel
To secure the HTTP channel, you have to change both **anon** and **authc** settings in `conf/shiro.ini`. In here, **anon** means "the access is anonymous" and **authc** means "formed auth security".
The default status of them is
@ -49,10 +53,10 @@ Deactivate the line "/** = anon" and activate the line "/** = authc" in `conf/sh
For the further information about `shiro.ini` file format, please refer to [Shiro Configuration](http://shiro.apache.org/configuration.html#Configuration-INISections).
### 2. Secure the Websocket channel
### 3. Secure the Websocket channel
Set to property **zeppelin.anonymous.allowed** to **false** in `conf/zeppelin-site.xml`. If you don't have this file yet, just copy `conf/zeppelin-site.xml.template` to `conf/zeppelin-site.xml`.
### 3. Start Zeppelin
### 4. Start Zeppelin
```
bin/zeppelin-daemon.sh start (or restart)
@ -60,7 +64,7 @@ bin/zeppelin-daemon.sh start (or restart)
Then you can browse Zeppelin at [http://localhost:8080](http://localhost:8080).
### 4. Login
### 5. Login
Finally, you can login using one of the below **username/password** combinations.
<center><img src="../assets/themes/zeppelin/img/docs-img/zeppelin-login.png"></center>
@ -94,7 +98,7 @@ ldapRealm.contextFactory.url = ldap://ldap.test.com:389
ldapRealm.userDnTemplate = uid={0},ou=Users,dc=COMPANY,dc=COM
ldapRealm.contextFactory.authenticationMechanism = SIMPLE
```
also define roles/groups that you want to have in system, like below;
```
@ -143,6 +147,19 @@ ldapRealm.userDnTemplate = uid={0},ou=Users,dc=COMPANY,dc=COM
ldapRealm.contextFactory.authenticationMechanism = SIMPLE
```
### PAM
[PAM](https://en.wikipedia.org/wiki/Pluggable_authentication_module) authentication support allows the reuse of existing authentication
moduls on the host where Zeppelin is running. On a typical system modules are configured per service for example sshd, passwd, etc. under `/etc/pam.d/`. You can
either reuse one of these services or create your own for Zeppelin. Activiting PAM authentication requires two parameters:
1. realm: The Shiro realm being used
2. service: The service configured under `/etc/pam.d/` to be used. The name here needs to be the same as the file name under `/etc/pam.d/`
```
[main]
pamRealm=org.apache.zeppelin.realm.PamRealm
pamRealm.service=sshd
```
### ZeppelinHub
[ZeppelinHub](https://www.zeppelinhub.com) is a service that synchronize your Apache Zeppelin notebooks and enables you to collaborate easily.
@ -159,8 +176,8 @@ securityManager.realms = $zeppelinHubRealm
> Note: ZeppelinHub is not releated to apache Zeppelin project.
## Secure your Zeppelin information (optional)
By default, anyone who defined in `[users]` can share **Interpreter Setting**, **Credential** and **Configuration** information in Apache Zeppelin.
Sometimes you might want to hide these information for your use case.
By default, anyone who defined in `[users]` can share **Interpreter Setting**, **Credential** and **Configuration** information in Apache Zeppelin.
Sometimes you might want to hide these information for your use case.
Since Shiro provides **url-based security**, you can hide the information by commenting or uncommenting these below lines in `conf/shiro.ini`.
```
@ -171,9 +188,8 @@ Since Shiro provides **url-based security**, you can hide the information by com
/api/credential/** = authc, roles[admin]
```
In this case, only who have `admin` role can see **Interpreter Setting**, **Credential** and **Configuration** information.
In this case, only who have `admin` role can see **Interpreter Setting**, **Credential** and **Configuration** information.
If you want to grant this permission to other users, you can change **roles[ ]** as you defined at `[users]` section.
<br/>
> **NOTE :** All of the above configurations are defined in the `conf/shiro.ini` file. This documentation is originally from [SECURITY-README.md](https://github.com/apache/zeppelin/blob/master/SECURITY-README.md).

View file

@ -22,6 +22,7 @@ import java.net.InetAddress;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
@ -35,7 +36,6 @@ import java.util.regex.Pattern;
import org.apache.commons.lang.StringUtils;
import org.apache.zeppelin.interpreter.Interpreter;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterPropertyBuilder;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion;
import org.elasticsearch.action.delete.DeleteResponse;
@ -48,6 +48,8 @@ import org.elasticsearch.client.Client;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.InetSocketTransportAddress;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
@ -437,14 +439,37 @@ public class ElasticsearchInterpreter extends Interpreter {
resMsg = XContentHelper.toString((InternalSingleBucketAggregation) agg).toString();
}
else if (agg instanceof InternalMultiBucketAggregation) {
final StringBuffer buffer = new StringBuffer("key\tdoc_count");
final Set<String> headerKeys = new HashSet<>();
final List<Map<String, Object>> buckets = new LinkedList<>();
final InternalMultiBucketAggregation multiBucketAgg = (InternalMultiBucketAggregation) agg;
for (MultiBucketsAggregation.Bucket bucket : multiBucketAgg.getBuckets()) {
buffer.append("\n")
.append(bucket.getKeyAsString())
.append("\t")
.append(bucket.getDocCount());
try {
final XContentBuilder builder = XContentFactory.jsonBuilder();
bucket.toXContent(builder, null);
final Map<String, Object> bucketMap = JsonFlattener.flattenAsMap(builder.string());
headerKeys.addAll(bucketMap.keySet());
buckets.add(bucketMap);
}
catch (IOException e) {
logger.error("Processing bucket: " + e.getMessage(), e);
}
}
final StringBuffer buffer = new StringBuffer();
final String[] keys = headerKeys.toArray(new String[0]);
for (String key: keys) {
buffer.append("\t" + key);
}
buffer.deleteCharAt(0);
for (Map<String, Object> bucket : buckets) {
buffer.append("\n");
for (String key: keys) {
buffer.append(bucket.get(key)).append("\t");
}
buffer.deleteCharAt(buffer.length() - 1);
}
resType = InterpreterResult.Type.TABLE;

View file

@ -28,6 +28,9 @@
"defaultValue": "10",
"description": "The size of the result set of a search query"
}
},
"editor": {
"editOnDblClick": false
}
}
]

View file

@ -21,7 +21,12 @@ import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.junit.Assert.assertEquals;
import java.io.IOException;
import java.util.*;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.Properties;
import java.util.UUID;
import org.apache.commons.lang.math.RandomUtils;
import org.apache.zeppelin.interpreter.InterpreterResult;
@ -178,6 +183,11 @@ public class ElasticsearchInterpreterTest {
res = interpreter.interpret("search /logs { \"aggs\" : { \"status_count\" : " +
" { \"terms\" : { \"field\" : \"status\" } } } }", null);
assertEquals(Code.SUCCESS, res.code());
res = interpreter.interpret("search /logs { \"aggs\" : { " +
" \"length\" : { \"terms\": { \"field\": \"status\" }, " +
" \"aggs\" : { \"sum_length\" : { \"sum\" : { \"field\" : \"content_length\" } }, \"sum_status\" : { \"sum\" : { \"field\" : \"status\" } } } } } }", null);
assertEquals(Code.SUCCESS, res.code());
}
@Test

View file

@ -22,6 +22,9 @@
"defaultValue": "1000",
"description": "Maximum number of lines of results fetched"
}
},
"editor": {
"editOnDblClick": false
}
}
]

View file

@ -123,7 +123,7 @@ public class HDFSFileInterpreterTest extends TestCase {
* Store command results from curl against a real file system
*/
class MockFileSystem {
HashMap<String, String> mfs = new HashMap<String, String>();
HashMap<String, String> mfs = new HashMap<>();
void addListStatusData() {
mfs.put("/?op=LISTSTATUS",
"{\"FileStatuses\":{\"FileStatus\":[\n" +

View file

@ -34,7 +34,7 @@
<description>Zeppelin flink support</description>
<properties>
<flink.version>1.1.2</flink.version>
<flink.version>1.1.3</flink.version>
<flink.akka.version>2.3.7</flink.akka.version>
<scala.macros.version>2.0.1</scala.macros.version>
</properties>

View file

@ -27,8 +27,12 @@ import java.net.URL;
import java.net.URLClassLoader;
import java.util.*;
import org.apache.flink.api.common.JobID;
import org.apache.flink.api.scala.FlinkILoop;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.runtime.akka.AkkaUtils;
import org.apache.flink.runtime.instance.ActorGateway;
import org.apache.flink.runtime.messages.JobManagerMessages;
import org.apache.flink.runtime.minicluster.LocalFlinkMiniCluster;
import org.apache.flink.runtime.util.EnvironmentInformation;
import org.apache.zeppelin.interpreter.Interpreter;
@ -47,10 +51,12 @@ import scala.Option;
import scala.Some;
import scala.collection.JavaConversions;
import scala.collection.immutable.Nil;
import scala.concurrent.duration.FiniteDuration;
import scala.runtime.AbstractFunction0;
import scala.tools.nsc.Settings;
import scala.tools.nsc.interpreter.IMain;
import scala.tools.nsc.interpreter.Results;
import scala.tools.nsc.settings.MutableSettings;
import scala.tools.nsc.settings.MutableSettings.BooleanSetting;
import scala.tools.nsc.settings.MutableSettings.PathSetting;
@ -175,12 +181,18 @@ public class FlinkInterpreter extends Interpreter {
pathSettings.v_$eq(classpath);
settings.scala$tools$nsc$settings$ScalaSettings$_setter_$classpath_$eq(pathSettings);
settings.explicitParentLoader_$eq(new Some<ClassLoader>(Thread.currentThread()
settings.explicitParentLoader_$eq(new Some<>(Thread.currentThread()
.getContextClassLoader()));
BooleanSetting b = (BooleanSetting) settings.usejavacp();
b.v_$eq(true);
settings.scala$tools$nsc$settings$StandardScalaSettings$_setter_$usejavacp_$eq(b);
// To prevent 'File name too long' error on some file system.
MutableSettings.IntSetting numClassFileSetting = settings.maxClassfileName();
numClassFileSetting.v_$eq(128);
settings.scala$tools$nsc$settings$ScalaSettings$_setter_$maxClassfileName_$eq(
numClassFileSetting);
return settings;
}
@ -197,7 +209,7 @@ public class FlinkInterpreter extends Interpreter {
}
private List<File> classPath(ClassLoader cl) {
List<File> paths = new LinkedList<File>();
List<File> paths = new LinkedList<>();
if (cl == null) {
return paths;
}
@ -217,7 +229,7 @@ public class FlinkInterpreter extends Interpreter {
public Object getLastObject() {
Object obj = imain.lastRequest().lineRep().call(
"$result",
JavaConversions.asScalaBuffer(new LinkedList<Object>()));
JavaConversions.asScalaBuffer(new LinkedList<>()));
return obj;
}
@ -334,6 +346,20 @@ public class FlinkInterpreter extends Interpreter {
@Override
public void cancel(InterpreterContext context) {
if (localMode()) {
// In localMode we can cancel all running jobs,
// because the local cluster can only run one job at the time.
for (JobID job : this.localFlinkCluster.getCurrentlyRunningJobsJava()) {
logger.info("Stop job: " + job);
cancelJobLocalMode(job);
}
}
}
private void cancelJobLocalMode(JobID jobID){
FiniteDuration timeout = AkkaUtils.getTimeout(this.localFlinkCluster.configuration());
ActorGateway leader = this.localFlinkCluster.getLeaderGateway(timeout);
leader.ask(new JobManagerMessages.CancelJob(jobID), timeout);
}
@Override

View file

@ -18,7 +18,8 @@
}
},
"editor": {
"language": "scala"
"language": "scala",
"editOnDblClick": false
}
}
]

View file

@ -21,7 +21,6 @@ import java.util.Properties;
import org.apache.commons.lang.StringUtils;
import org.apache.zeppelin.interpreter.Interpreter;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterPropertyBuilder;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.InterpreterResult.Code;
import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion;
@ -38,7 +37,7 @@ import com.gemstone.gemfire.cache.query.Struct;
import com.gemstone.gemfire.pdx.PdxInstance;
/**
* Apache Geode OQL Interpreter (http://geode.incubator.apache.org)
* Apache Geode OQL Interpreter (http://geode.apache.org)
*
* <ul>
* <li>{@code geode.locator.host} - The Geode Locator {@code <HOST>} to connect to.</li>
@ -87,30 +86,12 @@ public class GeodeOqlInterpreter extends Interpreter {
private Logger logger = LoggerFactory.getLogger(GeodeOqlInterpreter.class);
public static final String DEFAULT_PORT = "10334";
public static final String DEFAULT_HOST = "localhost";
public static final String DEFAULT_MAX_RESULT = "1000";
private static final char NEWLINE = '\n';
private static final char TAB = '\t';
private static final char WHITESPACE = ' ';
private static final String TABLE_MAGIC_TAG = "%table ";
public static final String LOCATOR_HOST = "geode.locator.host";
public static final String LOCATOR_PORT = "geode.locator.port";
public static final String MAX_RESULT = "geode.max.result";
static {
Interpreter.register(
"oql",
"geode",
GeodeOqlInterpreter.class.getName(),
new InterpreterPropertyBuilder().add(LOCATOR_HOST, DEFAULT_HOST, "The Geode Locator Host.")
.add(LOCATOR_PORT, DEFAULT_PORT, "The Geode Locator Port")
.add(MAX_RESULT, DEFAULT_MAX_RESULT, "Max number of OQL result to display.").build());
}
private ClientCache clientCache = null;
private QueryService queryService = null;
private Exception exceptionOnConnect;
@ -122,8 +103,8 @@ public class GeodeOqlInterpreter extends Interpreter {
protected ClientCache getClientCache() {
String locatorHost = getProperty(LOCATOR_HOST);
int locatorPort = Integer.valueOf(getProperty(LOCATOR_PORT));
String locatorHost = getProperty("geode.locator.host");
int locatorPort = Integer.valueOf(getProperty("geode.locator.port"));
ClientCache clientCache =
new ClientCacheFactory().addPoolLocator(locatorHost, locatorPort).create();
@ -139,7 +120,7 @@ public class GeodeOqlInterpreter extends Interpreter {
close();
try {
maxResult = Integer.valueOf(getProperty(MAX_RESULT));
maxResult = Integer.valueOf(getProperty("geode.max.result"));
clientCache = getClientCache();
queryService = clientCache.getQueryService();

View file

@ -0,0 +1,30 @@
[
{
"group": "geode",
"name": "oql",
"className": "org.apache.zeppelin.geode.GeodeOqlInterpreter",
"properties": {
"geode.locator.host": {
"envName": null,
"propertyName": "geode.locator.host",
"defaultValue": "localhost",
"description": "The Geode Locator Host."
},
"geode.locator.port": {
"envName": null,
"propertyName": "geode.locator.port",
"defaultValue": "10334",
"description": "The Geode Locator Port."
},
"geode.max.result": {
"envName": null,
"propertyName": "geode.max.result",
"defaultValue": "1000",
"description": "Max number of OQL result to display."
}
},
"editor": {
"language": "sql"
}
}
]

View file

@ -58,9 +58,9 @@ public class GeodeOqlInterpreterTest {
public void testOpenCommandIndempotency() {
Properties properties = new Properties();
properties.put(LOCATOR_HOST, DEFAULT_HOST);
properties.put(LOCATOR_PORT, DEFAULT_PORT);
properties.put(MAX_RESULT, DEFAULT_MAX_RESULT);
properties.put("geode.locator.host", "localhost");
properties.put("geode.locator.port", "10334");
properties.put("geode.max.result", "1000");
GeodeOqlInterpreter spyGeodeOqlInterpreter = spy(new GeodeOqlInterpreter(properties));

View file

@ -20,6 +20,9 @@
"defaultValue": "false",
"description": "Disable checks for unit and manual tests"
}
},
"editor": {
"editOnDblClick": false
}
}
]

View file

@ -178,7 +178,7 @@ public class IgniteInterpreter extends Interpreter {
public Object getLastObject() {
Object obj = imain.lastRequest().lineRep().call(
"$result",
JavaConversions.asScalaBuffer(new LinkedList<Object>()));
JavaConversions.asScalaBuffer(new LinkedList<>()));
return obj;
}

View file

@ -0,0 +1,312 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This file provides a static (non-interactive) matplotlib plotting backend
# for zeppelin notebooks for use with the python/pyspark interpreters
from __future__ import print_function
import uuid
import warnings
import base64
from io import BytesIO
try:
from StringIO import StringIO
except ImportError:
from io import StringIO
import mpl_config
import matplotlib
from matplotlib._pylab_helpers import Gcf
from matplotlib.backends.backend_agg import new_figure_manager, FigureCanvasAgg
from matplotlib.backend_bases import ShowBase, FigureManagerBase
from matplotlib.figure import Figure
########################################################################
#
# The following functions and classes are for pylab and implement
# window/figure managers, etc...
#
########################################################################
class Show(ShowBase):
"""
A callable object that displays the figures to the screen. Valid kwargs
include figure width and height (in units supported by the div tag), block
(allows users to override blocking behavior regardless of whether or not
interactive mode is enabled, currently unused) and close (Implicitly call
matplotlib.pyplot.close('all') with each call to show()).
"""
def __call__(self, close=None, block=None, **kwargs):
if close is None:
close = mpl_config.get('close')
try:
managers = Gcf.get_all_fig_managers()
if not managers:
return
# Tell zeppelin that the output will be html using the %html magic
# We want to do this only once to avoid seeing "%html" printed
# directly to the outout when multiple figures are displayed from
# one paragraph.
if mpl_config.get('angular'):
print('%angular')
else:
print('%html')
# Show all open figures
for manager in managers:
manager.show(**kwargs)
finally:
# This closes all the figures if close is set to True.
if close and Gcf.get_all_fig_managers():
Gcf.destroy_all()
class FigureCanvasZInline(FigureCanvasAgg):
"""
The canvas the figure renders into. Calls the draw and print fig
methods, creates the renderers, etc...
"""
def get_bytes(self, **kwargs):
"""
Get the byte representation of the figure.
Should only be used with jpg/png formats.
"""
# Make sure format is correct
fmt = kwargs.get('format', mpl_config.get('format'))
if fmt == 'svg':
raise ValueError("get_bytes() does not support svg, use png or jpg")
# Express the image as bytes
buf = BytesIO()
self.print_figure(buf, **kwargs)
byte_str = b"data:image/%s;base64," %fmt
byte_str += base64.b64encode(buf.getvalue())
# Python3 forces all strings to default to unicode, but for raster image
# formats (eg png, jpg), we want to work with bytes. Thus this step is
# needed to ensure compatability for all python versions.
byte_str = byte_str.decode('ascii')
buf.close()
return byte_str
def get_svg(self, **kwargs):
"""
Get the svg representation of the figure.
Should only be used with svg format.
"""
# Make sure format is correct
fmt = kwargs.get('format', mpl_config.get('format'))
if fmt != 'svg':
raise ValueError("get_svg() does not support png or jpg, use svg")
# For SVG the data string has to be unicode, not bytes
buf = StringIO()
self.print_figure(buf, **kwargs)
svg_str = buf.getvalue()
buf.close()
return svg_str
def draw_idle(self, *args, **kwargs):
"""
Called when the figure gets updated (eg through a plotting command).
This is overriden to allow open figures to be reshown after they
are updated when mpl_config.get('close') is False.
"""
if not self._is_idle_drawing:
with self._idle_draw_cntx():
self.draw(*args, **kwargs)
draw_if_interactive()
class FigureManagerZInline(FigureManagerBase):
"""
Wrap everything up into a window for the pylab interface
"""
def __init__(self, canvas, num):
FigureManagerBase.__init__(self, canvas, num)
self.fig_id = "figure_{0}".format(uuid.uuid4().hex)
self._shown = False
def angular_bind(self, **kwargs):
"""
Bind figure data to Zeppelin's Angular Object Registry.
If mpl_config("angular") is True and PY4J is supported, this allows
for the possibility to interactively update a figure from a separate
paragraph without having to display it multiple times.
"""
# This doesn't work for SVG so make sure it's not our format
fmt = kwargs.get('format', mpl_config.get('format'))
if fmt == 'svg':
return
# Get the figure data as a byte array
src = self.canvas.get_bytes(**kwargs)
# Flag to determine whether or not to use
# zeppelin's angular display system
angular = mpl_config.get('angular')
# ZeppelinContext instance (requires PY4J)
context = mpl_config.get('context')
# Finally we must ensure that automatic closing is set to False,
# as otherwise using the angular display system is pointless
close = mpl_config.get('close')
# If above conditions are met, bind the figure data to
# the Angular Object Registry.
if not close and angular:
if hasattr(context, 'angularBind'):
# Binding is performed through figure ID to ensure this works
# if multiple figures are open
context.angularBind(self.fig_id, src)
# Zeppelin will automatically replace this value even if it
# is updated from another pargraph thanks to the {{}} notation
src = "{{%s}}" %self.fig_id
else:
warnings.warn("Cannot bind figure to Angular Object Registry. "
"Check if PY4J is installed.")
return src
def angular_unbind(self):
"""
Unbind figure from angular display system.
"""
context = mpl_config.get('context')
if hasattr(context, 'angularUnbind'):
context.angularUnbind(self.fig_id)
def destroy(self):
"""
Called when close=True or implicitly by pyplot.close().
Overriden to automatically clean up the angular object registry.
"""
self.angular_unbind()
def show(self, **kwargs):
if not self._shown:
zdisplay(self.canvas.figure, **kwargs)
else:
self.canvas.draw_idle()
self.angular_bind(**kwargs)
self._shown = True
def draw_if_interactive():
"""
If interactive mode is on, this allows for updating properties of
the figure when each new plotting command is called.
"""
manager = Gcf.get_active()
interactive = matplotlib.is_interactive()
angular = mpl_config.get('angular')
# Don't bother continuing if we aren't in interactive mode
# or if there are no active figures. Also pointless to continue
# in angular mode as we don't want to reshow the figure.
if not interactive or angular or manager is None:
return
# Allow for figure to be reshown if close is false since
# this function call implies that it has been updated
if not mpl_config.get('close'):
manager._shown = False
def new_figure_manager(num, *args, **kwargs):
"""
Create a new figure manager instance
"""
# if a main-level app must be created, this (and
# new_figure_manager_given_figure) is the usual place to
# do it -- see backend_wx, backend_wxagg and backend_tkagg for
# examples. Not all GUIs require explicit instantiation of a
# main-level app (egg backend_gtk, backend_gtkagg) for pylab
FigureClass = kwargs.pop('FigureClass', Figure)
thisFig = FigureClass(*args, **kwargs)
return new_figure_manager_given_figure(num, thisFig)
def new_figure_manager_given_figure(num, figure):
"""
Create a new figure manager instance for the given figure.
"""
canvas = FigureCanvasZInline(figure)
manager = FigureManagerZInline(canvas, num)
return manager
########################################################################
#
# Backend specific functions
#
########################################################################
def zdisplay(fig, **kwargs):
"""
Publishes a matplotlib figure to the notebook paragraph output.
"""
# kwargs can be width or height (in units supported by div tag)
width = kwargs.pop('width', 'auto')
height = kwargs.pop('height', 'auto')
fmt = kwargs.get('format', mpl_config.get('format'))
# Check if format is supported
supported_formats = mpl_config.get('supported_formats')
if fmt not in supported_formats:
raise ValueError("Unsupported format %s" %fmt)
# For SVG the data string has to be unicode, not bytes
if fmt == 'svg':
img = fig.canvas.get_svg(**kwargs)
# This is needed to ensure the SVG image is the correct size.
# We should find a better way to do this...
width = '{}px'.format(mpl_config.get('width'))
height = '{}px'.format(mpl_config.get('height'))
else:
# Express the image as bytes
src = fig.canvas.manager.angular_bind(**kwargs)
img = "<img src={src} style='width={width};height:{height}'>"
img = img.format(src=src, width=width, height=height)
# Print the image to the notebook paragraph via the %html magic
html = "<div style='width:{width};height:{height}'>{img}<div>"
print(html.format(width=width, height=height, img=img))
def displayhook():
"""
Called post paragraph execution if interactive mode is on
"""
if matplotlib.is_interactive():
show()
########################################################################
#
# Now just provide the standard names that backend.__init__ is expecting
#
########################################################################
# Create a reference to the show function we are using. This is what actually
# gets called by matplotlib.pyplot.show().
show = Show()
# Default FigureCanvas and FigureManager classes to use from the backend
FigureCanvas = FigureCanvasZInline
FigureManager = FigureManagerZInline

View file

@ -0,0 +1,95 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This module provides utitlites for users to configure the inline plotting
# backend through a PyZeppelinContext instance (eg, through z.configure_mpl())
import matplotlib
def configure(**kwargs):
"""
Generic configure function.
Usage: configure(prop1='foo', prop2='bar', ...)
Currently supported zeppelin-specific properties are:
interactive - If true show all figures without explicit call to show()
via a post-execute hook.
angular - If true, bind figures to angular display system.
close - If true, close all figures once shown.
width, height - Default width / height of the figure in pixels.
fontsize - Font size.
dpi - dpi of the figure.
fmt - Figure format
supported_formats - Supported Figure formats ()
context - ZeppelinContext instance (requires PY4J)
"""
_config.update(**kwargs)
# Broadcast relevant changes to matplotlib RC
_on_config_change()
def get(key):
"""
Get the configuration info given a key
"""
return _config[key]
def _on_config_change():
# dpi
dpi = _config['dpi']
matplotlib.rcParams['savefig.dpi'] = dpi
matplotlib.rcParams['figure.dpi'] = dpi
# Width and height
width = float(_config['width']) / dpi
height = float(_config['height']) / dpi
matplotlib.rcParams['figure.figsize'] = (width, height)
# Font size
fontsize = _config['fontsize']
matplotlib.rcParams['font.size'] = fontsize
# Default Figure Format
fmt = _config['format']
supported_formats = _config['supported_formats']
if fmt not in supported_formats:
raise ValueError("Unsupported format %s" %fmt)
matplotlib.rcParams['savefig.format'] = fmt
# Interactive mode
interactive = _config['interactive']
matplotlib.interactive(interactive)
def _init_config():
dpi = matplotlib.rcParams['savefig.dpi']
fmt = matplotlib.rcParams['savefig.format']
width, height = matplotlib.rcParams['figure.figsize']
fontsize = matplotlib.rcParams['font.size']
_config['dpi'] = dpi
_config['format'] = fmt
_config['width'] = width*dpi
_config['height'] = height*dpi
_config['fontsize'] = fontsize
_config['close'] = True
_config['interactive'] = matplotlib.is_interactive()
_config['angular'] = False
_config['supported_formats'] = ['png', 'jpg', 'svg']
_config['context'] = None
_config = {}
_init_config()

View file

@ -66,7 +66,8 @@
}
},
"editor": {
"language": "sql"
"language": "sql",
"editOnDblClick": false
}
}
]

View file

@ -56,24 +56,6 @@ public class KylinInterpreter extends Interpreter {
static final Pattern KYLIN_TABLE_FORMAT_REGEX_LABEL = Pattern.compile("\"label\":\"(.*?)\"");
static final Pattern KYLIN_TABLE_FORMAT_REGEX = Pattern.compile("\"results\":\\[\\[\"(.*?)\"]]");
static {
Interpreter.register(
"kylin",
"kylin",
KylinInterpreter.class.getName(),
new InterpreterPropertyBuilder()
.add(KYLIN_USERNAME, "ADMIN", "username for kylin user")
.add(KYLIN_PASSWORD, "KYLIN", "password for kylin user")
.add(KYLIN_QUERY_API_URL, "http://<host>:<port>/kylin/api/query", "Kylin API.")
.add(KYLIN_QUERY_PROJECT, "default", "kylin project name")
.add(KYLIN_QUERY_OFFSET, "0", "kylin query offset")
.add(KYLIN_QUERY_LIMIT, "5000", "kylin query limit")
.add(KYLIN_QUERY_ACCEPT_PARTIAL, "true", "The kylin query partial flag").build());
}
public KylinInterpreter(Properties property) {
super(property);
}

View file

@ -0,0 +1,55 @@
[
{
"group": "kylin",
"name": "kylin",
"className": "org.apache.zeppelin.kylin.KylinInterpreter",
"properties": {
"kylin.api.url": {
"envName": null,
"propertyName": "kylin.api.url",
"defaultValue": "http://localhost:7070/kylin/api/query",
"description": "Kylin API"
},
"kylin.api.user": {
"envName": null,
"propertyName": "kylin.api.user",
"defaultValue": "ADMIN",
"description": "username for kylin user"
},
"kylin.api.password": {
"envName": null,
"propertyName": "kylin.api.password",
"defaultValue": "KYLIN",
"description": "password for kylin user"
},
"kylin.query.project": {
"envName": null,
"propertyName": "kylin.query.project",
"defaultValue": "default",
"description": "kylin project name"
},
"kylin.query.offset": {
"envName": null,
"propertyName": "kylin.query.offset",
"defaultValue": "0",
"description": "kylin query offset"
},
"kylin.query.limit": {
"envName": null,
"propertyName": "kylin.query.limit",
"defaultValue": "5000",
"description": "kylin query limit"
},
"kylin.query.ispartial": {
"envName": null,
"propertyName": "kylin.query.ispartial",
"defaultValue": "true",
"description": "The kylin query partial flag"
}
},
"editor": {
"language": "sql",
"editOnDblClick": false
}
}
]

View file

@ -20,8 +20,7 @@ import org.apache.http.client.methods.HttpPost;
import org.apache.http.message.AbstractHttpMessage;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.kylin.KylinInterpreter;
import org.junit.After;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import java.io.ByteArrayInputStream;
@ -33,19 +32,23 @@ import java.util.Properties;
import static org.junit.Assert.assertEquals;
public class KylinInterpreterTest {
@Before
public void setUp() throws Exception {
}
static final Properties kylinProperties = new Properties();
@After
public void tearDown() throws Exception {
@BeforeClass
public static void setUpClass() {
kylinProperties.put("kylin.api.url", "http://localhost:7070/kylin/api/query");
kylinProperties.put("kylin.api.user", "ADMIN");
kylinProperties.put("kylin.api.password", "KYLIN");
kylinProperties.put("kylin.query.project", "default");
kylinProperties.put("kylin.query.offset", "0");
kylinProperties.put("kylin.query.limit", "5000");
kylinProperties.put("kylin.query.ispartial", "true");
}
@Test
public void test(){
KylinInterpreter t = new MockKylinInterpreter(new Properties());
KylinInterpreter t = new MockKylinInterpreter(kylinProperties);
InterpreterResult result = t.interpret(
"select a.date,sum(b.measure) as measure from kylin_fact_table a " +
"inner join kylin_lookup_table b on a.date=b.date group by a.date", null);
@ -198,4 +201,4 @@ class MockEntity implements HttpEntity{
public void consumeContent() throws IOException {
}
}
}

View file

@ -76,9 +76,9 @@ public class LensInterpreter extends Interpreter {
private static Pattern s_queryExecutePattern = Pattern.compile(".*query\\s+execute\\s+(.*)");
private static Map<String, ExecutionDetail> s_paraToQH =
new ConcurrentHashMap<String, ExecutionDetail> (); //tracks paragraphID -> Lens QueryHandle
new ConcurrentHashMap<> (); //tracks paragraphId -> Lens QueryHandle
private static Map<LensClient, Boolean> s_clientMap =
new ConcurrentHashMap<LensClient, Boolean>();
new ConcurrentHashMap<>();
private int m_maxResults;
private int m_maxThreads;

View file

@ -40,6 +40,9 @@
<achilles.version>3.2.4-Zeppelin</achilles.version>
<assertj.version>1.7.0</assertj.version>
<mockito.version>1.9.5</mockito.version>
<livy.version>0.2.0</livy.version>
<spark.version>1.5.2</spark.version>
<hadoop.version>2.6.0</hadoop.version>
</properties>
<dependencies>
@ -106,8 +109,238 @@
<version>4.3.0.RELEASE</version>
</dependency>
<dependency>
<groupId>com.cloudera.livy</groupId>
<artifactId>livy-integration-test</artifactId>
<version>${livy.version}</version>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>org.xerial.snappy</groupId>
<artifactId>snappy-java</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.10</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.10</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.10</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.spark</groupId>
<artifactId>spark-hive_2.10</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.spark</groupId>
<artifactId>spark-repl_2.10</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.spark</groupId>
<artifactId>spark-yarn_2.10</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-auth</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-client</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-server-tests</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.cloudera.livy</groupId>
<artifactId>livy-test-lib</artifactId>
<version>${livy.version}</version>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>org.xerial.snappy</groupId>
<artifactId>snappy-java</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.10</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.10</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.10</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.spark</groupId>
<artifactId>spark-hive_2.10</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.spark</groupId>
<artifactId>spark-repl_2.10</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.spark</groupId>
<artifactId>spark-yarn_2.10</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.cloudera.livy</groupId>
<artifactId>livy-core</artifactId>
<version>${livy.version}</version>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>org.xerial.snappy</groupId>
<artifactId>snappy-java</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.10</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.10</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.10</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.spark</groupId>
<artifactId>spark-hive_2.10</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.spark</groupId>
<artifactId>spark-repl_2.10</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.spark</groupId>
<artifactId>spark-yarn_2.10</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-hive_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-repl_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-yarn_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-yarn_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-auth</artifactId>
<version>2.6.0</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>${hadoop.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-client</artifactId>
<version>${hadoop.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-server-tests</artifactId>
<version>${hadoop.version}</version>
<scope>test</scope>
</dependency>
</dependencies>
<repositories>
<repository>
<id>ossrh</id>
<name>ossrh repository</name>
<url>https://oss.sonatype.org/content/repositories/releases/</url>
<releases>
<enabled>true</enabled>
</releases>
<snapshots>
<enabled>false</enabled>
</snapshots>
</repository>
</repositories>
<build>
<plugins>
<plugin>
@ -165,6 +398,62 @@
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-failsafe-plugin</artifactId>
<version>2.16</version>
<executions>
<execution>
<goals>
<goal>integration-test</goal>
<goal>verify</goal>
</goals>
</execution>
</executions>
<configuration>
<systemPropertyVariables>
<java.io.tmpdir>${project.build.directory}/tmp</java.io.tmpdir>
</systemPropertyVariables>
<argLine>-Xmx2048m</argLine>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-antrun-plugin</artifactId>
<version>1.8</version>
<executions>
<!-- Cleans up files that tests append to (because we have two test plugins). -->
<execution>
<id>pre-test-clean</id>
<phase>generate-test-resources</phase>
<goals>
<goal>run</goal>
</goals>
<configuration>
<target>
<delete file="${project.build.directory}/unit-tests.log"
quiet="true"/>
<delete file="${project.build.directory}/jacoco.exec" quiet="true"/>
<delete dir="${project.build.directory}/tmp" quiet="true"/>
</target>
</configuration>
</execution>
<!-- Create the temp directory to be used by tests. -->
<execution>
<id>create-tmp-dir</id>
<phase>generate-test-resources</phase>
<goals>
<goal>run</goal>
</goals>
<configuration>
<target>
<mkdir dir="${project.build.directory}/tmp"/>
</target>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>

View file

@ -57,7 +57,7 @@ public class LivyHelper {
public Integer createSession(InterpreterContext context, String kind) throws Exception {
try {
Map<String, String> conf = new HashMap<String, String>();
Map<String, String> conf = new HashMap<>();
Iterator<Entry<Object, Object>> it = property.entrySet().iterator();
while (it.hasNext()) {
@ -321,6 +321,7 @@ public class LivyHelper {
+ userSessionMap.get(context.getAuthenticationInfo().getUser())
+ "/statements/" + id,
"GET", null, context.getParagraphId());
LOGGER.debug("statement {} response: {}", id, json);
try {
Map jsonMap = gson.fromJson(json,
new TypeToken<Map>() {
@ -351,16 +352,16 @@ public class LivyHelper {
ResponseEntity<String> response = null;
try {
if (method.equals("POST")) {
HttpEntity<String> entity = new HttpEntity<String>(jsonData, headers);
HttpEntity<String> entity = new HttpEntity<>(jsonData, headers);
response = restTemplate.exchange(targetURL, HttpMethod.POST, entity, String.class);
paragraphHttpMap.put(paragraphId, response);
} else if (method.equals("GET")) {
HttpEntity<String> entity = new HttpEntity<String>(headers);
HttpEntity<String> entity = new HttpEntity<>(headers);
response = restTemplate.exchange(targetURL, HttpMethod.GET, entity, String.class);
paragraphHttpMap.put(paragraphId, response);
} else if (method.equals("DELETE")) {
HttpEntity<String> entity = new HttpEntity<String>(headers);
HttpEntity<String> entity = new HttpEntity<>(headers);
response = restTemplate.exchange(targetURL, HttpMethod.DELETE, entity, String.class);
}
} catch (HttpClientErrorException e) {

View file

@ -93,8 +93,18 @@
"description": "Whether display app info"
}
},
"option": {
"remote": true,
"port": -1,
"perNote": "shared",
"perUser": "scoped",
"isExistingProcess": false,
"setPermission": false,
"users": []
},
"editor": {
"language": "scala"
"language": "scala",
"editOnDblClick": false
}
},
{
@ -114,8 +124,18 @@
"description": "Execute multiple SQL concurrently if set true."
}
},
"option": {
"remote": true,
"port": -1,
"perNote": "shared",
"perUser": "scoped",
"isExistingProcess": false,
"setPermission": false,
"users": []
},
"editor": {
"language": "sql"
"language": "sql",
"editOnDblClick": false
}
},
{
@ -124,8 +144,18 @@
"className": "org.apache.zeppelin.livy.LivyPySparkInterpreter",
"properties": {
},
"option": {
"remote": true,
"port": -1,
"perNote": "shared",
"perUser": "scoped",
"isExistingProcess": false,
"setPermission": false,
"users": []
},
"editor": {
"language": "python"
"language": "python",
"editOnDblClick": false
}
},
{
@ -134,8 +164,18 @@
"className": "org.apache.zeppelin.livy.LivySparkRInterpreter",
"properties": {
},
"option": {
"remote": true,
"port": -1,
"perNote": "shared",
"perUser": "scoped",
"isExistingProcess": false,
"setPermission": false,
"users": []
},
"editor": {
"language": "r"
"language": "r",
"editOnDblClick": false
}
}
]

View file

@ -28,6 +28,7 @@ import org.junit.runner.RunWith;
import org.mockito.Answers;
import org.mockito.Mock;
import org.mockito.runners.MockitoJUnitRunner;
import org.slf4j.LoggerFactory;
import java.util.HashMap;
import java.util.Properties;
@ -63,14 +64,14 @@ public class LivyHelperTest {
livyHelper.property = properties;
livyHelper.paragraphHttpMap = new HashMap<>();
livyHelper.gson = new GsonBuilder().setPrettyPrinting().create();
livyHelper.LOGGER = LoggerFactory.getLogger(LivyHelper.class);
doReturn("{\"id\":1,\"state\":\"idle\",\"kind\":\"spark\",\"proxyUser\":\"null\",\"log\":[]}")
.when(livyHelper)
.executeHTTP(
livyHelper.property.getProperty("zeppelin.livy.url") + "/sessions",
"POST",
"{\"kind\": \"spark\", \"proxyUser\": \"null\"}",
"{\"kind\": \"spark\", \"conf\": {}, \"proxyUser\": null}",
null
);
@ -80,7 +81,7 @@ public class LivyHelperTest {
.executeHTTP(
livyHelper.property.getProperty("zeppelin.livy.url") + "/sessions/1/statements",
"POST",
"{\"code\": \"print(1)\" }",
"{\"code\": \"print(1)\"}",
null
);

View file

@ -0,0 +1,242 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.livy;
import com.cloudera.livy.test.framework.Cluster;
import com.cloudera.livy.test.framework.Cluster$;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterOutput;
import org.apache.zeppelin.interpreter.InterpreterOutputListener;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.user.AuthenticationInfo;
import org.junit.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Properties;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
public class LivyIntegrationTest {
private static Logger LOGGER = LoggerFactory.getLogger(LivyIntegrationTest.class);
private static Cluster cluster;
private static Properties properties;
@BeforeClass
public static void setUp() {
if (!checkPreCondition()) {
return;
}
cluster = Cluster$.MODULE$.get();
LOGGER.info("Starting livy at {}", cluster.livyEndpoint());
properties = new Properties();
properties.setProperty("zeppelin.livy.url", cluster.livyEndpoint());
properties.setProperty("zeppelin.livy.create.session.retries", "120");
properties.setProperty("zeppelin.livy.spark.sql.maxResult", "100");
}
@AfterClass
public static void tearDown() {
if (cluster != null) {
cluster.cleanUp();
}
}
public static boolean checkPreCondition() {
if (System.getenv("LIVY_HOME") == null) {
LOGGER.warn(("livy integration is skipped because LIVY_HOME is not set"));
return false;
}
if (System.getenv("SPARK_HOME") == null) {
LOGGER.warn(("livy integration is skipped because SPARK_HOME is not set"));
return false;
}
return true;
}
@Test
public void testSparkInterpreter() {
if (!checkPreCondition()) {
return;
}
LivySparkInterpreter sparkInterpreter = new LivySparkInterpreter(properties);
AuthenticationInfo authInfo = new AuthenticationInfo("user1");
MyInterpreterOutputListener outputListener = new MyInterpreterOutputListener();
InterpreterOutput output = new InterpreterOutput(outputListener);
InterpreterContext context = new InterpreterContext("noteId", "paragraphId", "title",
"text", authInfo, null, null, null, null, null, output);
sparkInterpreter.open();
InterpreterResult result = sparkInterpreter.interpret("sc.version", context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals(InterpreterResult.Type.TEXT, result.type());
// test RDD api
outputListener.reset();
result = sparkInterpreter.interpret("sc.parallelize(1 to 10).sum()", context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals(InterpreterResult.Type.TEXT, result.type());
assertTrue(outputListener.getOutputAppended().contains("Double = 55.0"));
// test DataFrame api
outputListener.reset();
sparkInterpreter.interpret("val sqlContext = new org.apache.spark.sql.SQLContext(sc)\n"
+ "import sqlContext.implicits._", context);
result = sparkInterpreter.interpret("val df=sqlContext.createDataFrame(Seq((\"hello\",20)))\n"
+ "df.collect()" , context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals(InterpreterResult.Type.TEXT, result.type());
assertTrue(outputListener.getOutputAppended()
.contains("Array[org.apache.spark.sql.Row] = Array([hello,20])"));
sparkInterpreter.interpret("df.registerTempTable(\"df\")", context);
// test LivySparkSQLInterpreter which share the same SparkContext with LivySparkInterpreter
outputListener.reset();
LivySparkSQLInterpreter sqlInterpreter = new LivySparkSQLInterpreter(properties);
sqlInterpreter.open();
result = sqlInterpreter.interpret("select * from df", context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals(InterpreterResult.Type.TABLE, result.type());
// TODO (zjffdu), \t at the end of each line is not necessary, it is a bug of LivySparkSQLInterpreter
assertEquals("_1\t_2\t\nhello\t20\t\n", result.message());
// single line comment
outputListener.reset();
String singleLineComment = "// my comment";
result = sparkInterpreter.interpret(singleLineComment, context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals(InterpreterResult.Type.TEXT, result.type());
assertNull(result.message());
// multiple line comment
outputListener.reset();
String multipleLineComment = "/* multiple \n" + "line \n" + "comment */";
result = sparkInterpreter.interpret(multipleLineComment, context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals(InterpreterResult.Type.TEXT, result.type());
assertNull(result.message());
// multi-line string
outputListener.reset();
String multiLineString = "val str = \"\"\"multiple\n" +
"line\"\"\"\n" +
"println(str)";
result = sparkInterpreter.interpret(multiLineString, context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals(InterpreterResult.Type.TEXT, result.type());
assertNull(result.message());
assertTrue(outputListener.getOutputAppended().contains("multiple\nline"));
// case class
outputListener.reset();
String caseClassCode = "case class Person(id:Int, \n" +
"name:String)\n" +
"val p=Person(1, \"name_a\")";
result = sparkInterpreter.interpret(caseClassCode, context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals(InterpreterResult.Type.TEXT, result.type());
assertNull(result.message());
assertTrue(outputListener.getOutputAppended().contains("defined class Person"));
// object class
outputListener.reset();
String objectClassCode = "object Person {}";
result = sparkInterpreter.interpret(objectClassCode, context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals(InterpreterResult.Type.TEXT, result.type());
assertNull(result.message());
assertTrue(outputListener.getOutputAppended().contains("defined module Person"));
}
@Test
public void testPySparkInterpreter() {
if (!checkPreCondition()) {
return;
}
LivyPySparkInterpreter pysparkInterpreter = new LivyPySparkInterpreter(properties);
AuthenticationInfo authInfo = new AuthenticationInfo("user1");
MyInterpreterOutputListener outputListener = new MyInterpreterOutputListener();
InterpreterOutput output = new InterpreterOutput(outputListener);
InterpreterContext context = new InterpreterContext("noteId", "paragraphId", "title",
"text", authInfo, null, null, null, null, null, output);
pysparkInterpreter.open();
InterpreterResult result = pysparkInterpreter.interpret("sc.version", context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals(InterpreterResult.Type.TEXT, result.type());
// test RDD api
outputListener.reset();
result = pysparkInterpreter.interpret("sc.range(1, 10).sum()", context);
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals(InterpreterResult.Type.TEXT, result.type());
assertEquals("45", result.message());
// test DataFrame api
outputListener.reset();
pysparkInterpreter.interpret("from pyspark.sql import SQLContext\n"
+ "sqlContext = SQLContext(sc)", context);
result = pysparkInterpreter.interpret("df=sqlContext.createDataFrame([(\"hello\",20)])\n"
+ "df.collect()" , context);
assertTrue(result.message().contains("[Row(_1=u'hello', _2=20)]"));
assertEquals(InterpreterResult.Type.TEXT, result.type());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
}
@Test
public void testSparkRInterpreter() {
if (!checkPreCondition()) {
return;
}
// TODO (zjffdu), Livy's SparkRIntepreter has some issue, do it after livy-0.3 release.
}
public static class MyInterpreterOutputListener implements InterpreterOutputListener {
private StringBuilder outputAppended = new StringBuilder();
private StringBuilder outputUpdated = new StringBuilder();
@Override
public void onAppend(InterpreterOutput out, byte[] line) {
LOGGER.info("onAppend:" + new String(line));
outputAppended.append(new String(line));
}
@Override
public void onUpdate(InterpreterOutput out, byte[] output) {
LOGGER.info("onUpdate:" + new String(output));
outputUpdated.append(new String(output));
}
public String getOutputAppended() {
return outputAppended.toString();
}
public String getOutputUpdated() {
return outputUpdated.toString();
}
public void reset() {
outputAppended = new StringBuilder();
outputUpdated = new StringBuilder();
}
}
}

View file

@ -0,0 +1,24 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
log4j.rootLogger = INFO, stdout
log4j.appender.stdout = org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout = org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%5p [%d] ({%t} %F[%M]:%L) - %m%n
log4j.logger.org.apache.zeppelin.livy=DEBUG

View file

@ -12,7 +12,8 @@
}
},
"editor": {
"language": "markdown"
"language": "markdown",
"editOnDblClick": true
}
}
]

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

184
pig/pom.xml Normal file
View file

@ -0,0 +1,184 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
~ Licensed to the Apache Software Foundation (ASF) under one or more
~ contributor license agreements. See the NOTICE file distributed with
~ this work for additional information regarding copyright ownership.
~ The ASF licenses this file to You under the Apache License, Version 2.0
~ (the "License"); you may not use this file except in compliance with
~ the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<artifactId>zeppelin</artifactId>
<groupId>org.apache.zeppelin</groupId>
<version>0.7.0-SNAPSHOT</version>
</parent>
<groupId>org.apache.zeppelin</groupId>
<artifactId>zeppelin-pig</artifactId>
<packaging>jar</packaging>
<version>0.7.0-SNAPSHOT</version>
<name>Zeppelin: Apache Pig Interpreter</name>
<description>Zeppelin interpreter for Apache Pig</description>
<url>http://zeppelin.apache.org</url>
<properties>
<pig.version>0.16.0</pig.version>
<hadoop.version>2.6.0</hadoop.version>
<tez.version>0.7.0</tez.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.zeppelin</groupId>
<artifactId>zeppelin-interpreter</artifactId>
<version>${project.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.pig</groupId>
<artifactId>pig</artifactId>
<classifier>h2</classifier>
<version>${pig.version}</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.tez</groupId>
<artifactId>tez-api</artifactId>
<version>${tez.version}</version>
</dependency>
<dependency>
<groupId>org.apache.tez</groupId>
<artifactId>tez-common</artifactId>
<version>${tez.version}</version>
</dependency>
<dependency>
<groupId>org.apache.tez</groupId>
<artifactId>tez-dag</artifactId>
<version>${tez.version}</version>
</dependency>
<dependency>
<groupId>org.apache.tez</groupId>
<artifactId>tez-runtime-library</artifactId>
<version>${tez.version}</version>
</dependency>
<dependency>
<groupId>org.apache.tez</groupId>
<artifactId>tez-runtime-internals</artifactId>
<version>${tez.version}</version>
</dependency>
<dependency>
<groupId>org.apache.tez</groupId>
<artifactId>tez-mapreduce</artifactId>
<version>${tez.version}</version>
</dependency>
<dependency>
<groupId>org.apache.tez</groupId>
<artifactId>tez-yarn-timeline-history-with-acls</artifactId>
<version>${tez.version}</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<artifactId>maven-enforcer-plugin</artifactId>
<version>1.3.1</version>
<executions>
<execution>
<id>enforce</id>
<phase>none</phase>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-dependency-plugin</artifactId>
<version>2.8</version>
<executions>
<execution>
<id>copy-dependencies</id>
<phase>package</phase>
<goals>
<goal>copy-dependencies</goal>
</goals>
<configuration>
<outputDirectory>${project.build.directory}/../../interpreter/pig
</outputDirectory>
<overWriteReleases>false</overWriteReleases>
<overWriteSnapshots>false</overWriteSnapshots>
<overWriteIfNewer>true</overWriteIfNewer>
<includeScope>runtime</includeScope>
</configuration>
</execution>
<execution>
<id>copy-artifact</id>
<phase>package</phase>
<goals>
<goal>copy</goal>
</goals>
<configuration>
<outputDirectory>${project.build.directory}/../../interpreter/pig
</outputDirectory>
<overWriteReleases>false</overWriteReleases>
<overWriteSnapshots>false</overWriteSnapshots>
<overWriteIfNewer>true</overWriteIfNewer>
<includeScope>runtime</includeScope>
<artifactItems>
<artifactItem>
<groupId>${project.groupId}</groupId>
<artifactId>${project.artifactId}</artifactId>
<version>${project.version}</version>
<type>${project.packaging}</type>
</artifactItem>
</artifactItems>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View file

@ -0,0 +1,100 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.pig;
import org.apache.hadoop.conf.Configuration;
import org.apache.pig.PigServer;
import org.apache.pig.backend.BackendException;
import org.apache.pig.backend.hadoop.executionengine.HExecutionEngine;
import org.apache.pig.backend.hadoop.executionengine.Launcher;
import org.apache.zeppelin.interpreter.Interpreter;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.scheduler.Scheduler;
import org.apache.zeppelin.scheduler.SchedulerFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.lang.reflect.Field;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
/**
*
*/
public abstract class BasePigInterpreter extends Interpreter {
private static Logger LOGGER = LoggerFactory.getLogger(BasePigInterpreter.class);
protected ConcurrentHashMap<String, PigScriptListener> listenerMap = new ConcurrentHashMap<>();
public BasePigInterpreter(Properties property) {
super(property);
}
@Override
public void cancel(InterpreterContext context) {
LOGGER.info("Cancel paragraph:" + context.getParagraphId());
PigScriptListener listener = listenerMap.get(context.getParagraphId());
if (listener != null) {
Set<String> jobIds = listener.getJobIds();
if (jobIds.isEmpty()) {
LOGGER.info("No job is started, so can not cancel paragraph:" + context.getParagraphId());
}
for (String jobId : jobIds) {
LOGGER.info("Kill jobId:" + jobId);
HExecutionEngine engine =
(HExecutionEngine) getPigServer().getPigContext().getExecutionEngine();
try {
Field launcherField = HExecutionEngine.class.getDeclaredField("launcher");
launcherField.setAccessible(true);
Launcher launcher = (Launcher) launcherField.get(engine);
// It doesn't work for Tez Engine due to PIG-5035
launcher.killJob(jobId, new Configuration());
} catch (NoSuchFieldException | BackendException | IllegalAccessException e) {
LOGGER.error("Fail to cancel paragraph:" + context.getParagraphId(), e);
}
}
} else {
LOGGER.warn("No PigScriptListener found, can not cancel paragraph:"
+ context.getParagraphId());
}
}
@Override
public FormType getFormType() {
return FormType.SIMPLE;
}
@Override
public int getProgress(InterpreterContext context) {
PigScriptListener listener = listenerMap.get(context.getParagraphId());
if (listener != null) {
return listener.getProgress();
}
return 0;
}
@Override
public Scheduler getScheduler() {
return SchedulerFactory.singleton().createOrGetFIFOScheduler(
PigInterpreter.class.getName() + this.hashCode());
}
public abstract PigServer getPigServer();
}

View file

@ -0,0 +1,137 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.pig;
import org.apache.commons.io.output.ByteArrayOutputStream;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.apache.pig.PigServer;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.tools.pigstats.*;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.InterpreterResult.Code;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import java.util.*;
/**
* Pig interpreter for Zeppelin.
*/
public class PigInterpreter extends BasePigInterpreter {
private static Logger LOGGER = LoggerFactory.getLogger(PigInterpreter.class);
private PigServer pigServer;
private boolean includeJobStats = false;
public PigInterpreter(Properties property) {
super(property);
}
@Override
public void open() {
String execType = getProperty("zeppelin.pig.execType");
if (execType == null) {
execType = "mapreduce";
}
String includeJobStats = getProperty("zeppelin.pig.includeJobStats");
if (includeJobStats != null) {
this.includeJobStats = Boolean.parseBoolean(includeJobStats);
}
try {
pigServer = new PigServer(execType);
} catch (IOException e) {
LOGGER.error("Fail to initialize PigServer", e);
throw new RuntimeException("Fail to initialize PigServer", e);
}
}
@Override
public void close() {
pigServer = null;
}
@Override
public InterpreterResult interpret(String cmd, InterpreterContext contextInterpreter) {
// remember the origial stdout, because we will redirect stdout to capture
// the pig dump output.
PrintStream originalStdOut = System.out;
ByteArrayOutputStream bytesOutput = new ByteArrayOutputStream();
File tmpFile = null;
try {
tmpFile = PigUtils.createTempPigScript(cmd);
System.setOut(new PrintStream(bytesOutput));
// each thread should its own ScriptState & PigStats
ScriptState.start(pigServer.getPigContext().getExecutionEngine().instantiateScriptState());
// reset PigStats, otherwise you may get the PigStats of last job in the same thread
// because PigStats is ThreadLocal variable
PigStats.start(pigServer.getPigContext().getExecutionEngine().instantiatePigStats());
PigScriptListener scriptListener = new PigScriptListener();
ScriptState.get().registerListener(scriptListener);
listenerMap.put(contextInterpreter.getParagraphId(), scriptListener);
pigServer.registerScript(tmpFile.getAbsolutePath());
} catch (IOException e) {
if (e instanceof FrontendException) {
FrontendException fe = (FrontendException) e;
if (!fe.getMessage().contains("Backend error :")) {
// If the error message contains "Backend error :", that means the exception is from
// backend.
LOGGER.error("Fail to run pig script.", e);
return new InterpreterResult(Code.ERROR, ExceptionUtils.getStackTrace(e));
}
}
PigStats stats = PigStats.get();
if (stats != null) {
String errorMsg = PigUtils.extactJobStats(stats);
if (errorMsg != null) {
LOGGER.error("Fail to run pig script, " + errorMsg);
return new InterpreterResult(Code.ERROR, errorMsg);
}
}
LOGGER.error("Fail to run pig script.", e);
return new InterpreterResult(Code.ERROR, ExceptionUtils.getStackTrace(e));
} finally {
System.setOut(originalStdOut);
listenerMap.remove(contextInterpreter.getParagraphId());
if (tmpFile != null) {
tmpFile.delete();
}
}
StringBuilder outputBuilder = new StringBuilder();
PigStats stats = PigStats.get();
if (stats != null && includeJobStats) {
String jobStats = PigUtils.extactJobStats(stats);
if (jobStats != null) {
outputBuilder.append(jobStats);
}
}
outputBuilder.append(bytesOutput.toString());
return new InterpreterResult(Code.SUCCESS, outputBuilder.toString());
}
public PigServer getPigServer() {
return pigServer;
}
}

View file

@ -0,0 +1,172 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.pig;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.apache.pig.PigServer;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.logicalLayer.schema.Schema;
import org.apache.pig.tools.pigstats.PigStats;
import org.apache.pig.tools.pigstats.ScriptState;
import org.apache.zeppelin.interpreter.*;
import org.apache.zeppelin.interpreter.InterpreterResult.Code;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Properties;
/**
*
*/
public class PigQueryInterpreter extends BasePigInterpreter {
private static Logger LOGGER = LoggerFactory.getLogger(PigQueryInterpreter.class);
private PigServer pigServer;
private int maxResult;
public PigQueryInterpreter(Properties properties) {
super(properties);
}
@Override
public void open() {
pigServer = getPigInterpreter().getPigServer();
maxResult = Integer.parseInt(getProperty("zeppelin.pig.maxResult"));
}
@Override
public void close() {
}
@Override
public InterpreterResult interpret(String st, InterpreterContext context) {
// '-' is invalid for pig alias
String alias = "paragraph_" + context.getParagraphId().replace("-", "_");
String[] lines = st.split("\n");
List<String> queries = new ArrayList<>();
for (int i = 0; i < lines.length; ++i) {
if (i == lines.length - 1) {
lines[i] = alias + " = " + lines[i];
}
queries.add(lines[i]);
}
StringBuilder resultBuilder = new StringBuilder("%table ");
try {
File tmpScriptFile = PigUtils.createTempPigScript(queries);
// each thread should its own ScriptState & PigStats
ScriptState.start(pigServer.getPigContext().getExecutionEngine().instantiateScriptState());
// reset PigStats, otherwise you may get the PigStats of last job in the same thread
// because PigStats is ThreadLocal variable
PigStats.start(pigServer.getPigContext().getExecutionEngine().instantiatePigStats());
PigScriptListener scriptListener = new PigScriptListener();
ScriptState.get().registerListener(scriptListener);
listenerMap.put(context.getParagraphId(), scriptListener);
pigServer.registerScript(tmpScriptFile.getAbsolutePath());
Schema schema = pigServer.dumpSchema(alias);
boolean schemaKnown = (schema != null);
if (schemaKnown) {
for (int i = 0; i < schema.size(); ++i) {
Schema.FieldSchema field = schema.getField(i);
resultBuilder.append(field.alias);
if (i != schema.size() - 1) {
resultBuilder.append("\t");
}
}
resultBuilder.append("\n");
}
Iterator<Tuple> iter = pigServer.openIterator(alias);
boolean firstRow = true;
int index = 0;
while (iter.hasNext() && index <= maxResult) {
index++;
Tuple tuple = iter.next();
if (firstRow && !schemaKnown) {
for (int i = 0; i < tuple.size(); ++i) {
resultBuilder.append("c_" + i + "\t");
}
resultBuilder.append("\n");
firstRow = false;
}
resultBuilder.append(StringUtils.join(tuple, "\t"));
resultBuilder.append("\n");
}
if (index >= maxResult && iter.hasNext()) {
resultBuilder.append("\n<font color=red>Results are limited by " + maxResult + ".</font>");
}
} catch (IOException e) {
// Extract error in the following order
// 1. catch FrontendException, FrontendException happens in the query compilation phase.
// 2. PigStats, This is execution error
// 3. Other errors.
if (e instanceof FrontendException) {
FrontendException fe = (FrontendException) e;
if (!fe.getMessage().contains("Backend error :")) {
LOGGER.error("Fail to run pig query.", e);
return new InterpreterResult(Code.ERROR, ExceptionUtils.getStackTrace(e));
}
}
PigStats stats = PigStats.get();
if (stats != null) {
String errorMsg = PigUtils.extactJobStats(stats);
if (errorMsg != null) {
return new InterpreterResult(Code.ERROR, errorMsg);
}
}
LOGGER.error("Fail to run pig query.", e);
return new InterpreterResult(Code.ERROR, ExceptionUtils.getStackTrace(e));
} finally {
listenerMap.remove(context.getParagraphId());
}
return new InterpreterResult(Code.SUCCESS, resultBuilder.toString());
}
@Override
public PigServer getPigServer() {
return this.pigServer;
}
private PigInterpreter getPigInterpreter() {
LazyOpenInterpreter lazy = null;
PigInterpreter pig = null;
Interpreter p = getInterpreterInTheSameSessionByClassName(PigInterpreter.class.getName());
while (p instanceof WrappedInterpreter) {
if (p instanceof LazyOpenInterpreter) {
lazy = (LazyOpenInterpreter) p;
}
p = ((WrappedInterpreter) p).getInnerInterpreter();
}
pig = (PigInterpreter) p;
if (lazy != null) {
lazy.open();
}
return pig;
}
}

View file

@ -0,0 +1,94 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.pig;
import org.apache.pig.impl.plan.OperatorPlan;
import org.apache.pig.tools.pigstats.JobStats;
import org.apache.pig.tools.pigstats.OutputStats;
import org.apache.pig.tools.pigstats.PigProgressNotificationListener;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.HashSet;
import java.util.Set;
/**
*
*/
public class PigScriptListener implements PigProgressNotificationListener {
private static Logger LOGGER = LoggerFactory.getLogger(PigScriptListener.class);
private Set<String> jobIds = new HashSet();
private int progress;
@Override
public void initialPlanNotification(String scriptId, OperatorPlan<?> plan) {
}
@Override
public void launchStartedNotification(String scriptId, int numJobsToLaunch) {
}
@Override
public void jobsSubmittedNotification(String scriptId, int numJobsSubmitted) {
}
@Override
public void jobStartedNotification(String scriptId, String assignedJobId) {
this.jobIds.add(assignedJobId);
}
@Override
public void jobFinishedNotification(String scriptId, JobStats jobStats) {
}
@Override
public void jobFailedNotification(String scriptId, JobStats jobStats) {
}
@Override
public void outputCompletedNotification(String scriptId, OutputStats outputStats) {
}
@Override
public void progressUpdatedNotification(String scriptId, int progress) {
LOGGER.debug("scriptId:" + scriptId + ", progress:" + progress);
this.progress = progress;
}
@Override
public void launchCompletedNotification(String scriptId, int numJobsSucceeded) {
}
public Set<String> getJobIds() {
return jobIds;
}
public int getProgress() {
return progress;
}
}

View file

@ -0,0 +1,292 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.pig;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.apache.pig.PigRunner;
import org.apache.pig.backend.hadoop.executionengine.tez.TezExecType;
import org.apache.pig.tools.pigstats.InputStats;
import org.apache.pig.tools.pigstats.JobStats;
import org.apache.pig.tools.pigstats.OutputStats;
import org.apache.pig.tools.pigstats.PigStats;
import org.apache.pig.tools.pigstats.mapreduce.MRJobStats;
import org.apache.pig.tools.pigstats.mapreduce.SimplePigStats;
import org.apache.pig.tools.pigstats.tez.TezDAGStats;
import org.apache.pig.tools.pigstats.tez.TezPigScriptStats;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.lang.reflect.Field;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Map;
/**
*
*/
public class PigUtils {
private static Logger LOGGER = LoggerFactory.getLogger(PigUtils.class);
protected static final String DATE_FORMAT = "yyyy-MM-dd HH:mm:ss";
public static File createTempPigScript(String content) throws IOException {
File tmpFile = File.createTempFile("zeppelin", "pig");
LOGGER.debug("Create pig script file:" + tmpFile.getAbsolutePath());
FileWriter writer = new FileWriter(tmpFile);
IOUtils.write(content, writer);
writer.close();
return tmpFile.getAbsoluteFile();
}
public static File createTempPigScript(List<String> lines) throws IOException {
return createTempPigScript(StringUtils.join(lines, "\n"));
}
public static String extactJobStats(PigStats stats) {
if (stats instanceof SimplePigStats) {
return extractFromSimplePigStats((SimplePigStats) stats);
} else if (stats instanceof TezPigScriptStats) {
return extractFromTezPigStats((TezPigScriptStats) stats);
} else {
throw new RuntimeException("Unrecognized stats type:" + stats.getClass().getSimpleName());
}
}
public static String extractFromSimplePigStats(SimplePigStats stats) {
try {
Field userIdField = PigStats.class.getDeclaredField("userId");
userIdField.setAccessible(true);
String userId = (String) (userIdField.get(stats));
Field startTimeField = PigStats.class.getDeclaredField("startTime");
startTimeField.setAccessible(true);
long startTime = (Long) (startTimeField.get(stats));
Field endTimeField = PigStats.class.getDeclaredField("endTime");
endTimeField.setAccessible(true);
long endTime = (Long) (endTimeField.get(stats));
if (stats.getReturnCode() == PigRunner.ReturnCode.UNKNOWN) {
LOGGER.warn("unknown return code, can't display the results");
return null;
}
if (stats.getPigContext() == null) {
LOGGER.warn("unknown exec type, don't display the results");
return null;
}
SimpleDateFormat sdf = new SimpleDateFormat(DATE_FORMAT);
StringBuilder sb = new StringBuilder();
sb.append("\nHadoopVersion\tPigVersion\tUserId\tStartedAt\tFinishedAt\tFeatures\n");
sb.append(stats.getHadoopVersion()).append("\t").append(stats.getPigVersion()).append("\t")
.append(userId).append("\t")
.append(sdf.format(new Date(startTime))).append("\t")
.append(sdf.format(new Date(endTime))).append("\t")
.append(stats.getFeatures()).append("\n");
sb.append("\n");
if (stats.getReturnCode() == PigRunner.ReturnCode.SUCCESS) {
sb.append("Success!\n");
} else if (stats.getReturnCode() == PigRunner.ReturnCode.PARTIAL_FAILURE) {
sb.append("Some jobs have failed! Stop running all dependent jobs\n");
} else {
sb.append("Failed!\n");
}
sb.append("\n");
Field jobPlanField = PigStats.class.getDeclaredField("jobPlan");
jobPlanField.setAccessible(true);
PigStats.JobGraph jobPlan = (PigStats.JobGraph) jobPlanField.get(stats);
if (stats.getReturnCode() == PigRunner.ReturnCode.SUCCESS
|| stats.getReturnCode() == PigRunner.ReturnCode.PARTIAL_FAILURE) {
sb.append("Job Stats (time in seconds):\n");
sb.append(MRJobStats.SUCCESS_HEADER).append("\n");
List<JobStats> arr = jobPlan.getSuccessfulJobs();
for (JobStats js : arr) {
sb.append(js.getDisplayString());
}
sb.append("\n");
}
if (stats.getReturnCode() == PigRunner.ReturnCode.FAILURE
|| stats.getReturnCode() == PigRunner.ReturnCode.PARTIAL_FAILURE) {
sb.append("Failed Jobs:\n");
sb.append(MRJobStats.FAILURE_HEADER).append("\n");
List<JobStats> arr = jobPlan.getFailedJobs();
for (JobStats js : arr) {
sb.append(js.getDisplayString());
}
sb.append("\n");
}
sb.append("Input(s):\n");
for (InputStats is : stats.getInputStats()) {
sb.append(is.getDisplayString());
}
sb.append("\n");
sb.append("Output(s):\n");
for (OutputStats ds : stats.getOutputStats()) {
sb.append(ds.getDisplayString());
}
sb.append("\nCounters:\n");
sb.append("Total records written : " + stats.getRecordWritten()).append("\n");
sb.append("Total bytes written : " + stats.getBytesWritten()).append("\n");
sb.append("Spillable Memory Manager spill count : "
+ stats.getSMMSpillCount()).append("\n");
sb.append("Total bags proactively spilled: "
+ stats.getProactiveSpillCountObjects()).append("\n");
sb.append("Total records proactively spilled: "
+ stats.getProactiveSpillCountRecords()).append("\n");
sb.append("\nJob DAG:\n").append(jobPlan.toString());
return "Script Statistics: \n" + sb.toString();
} catch (Exception e) {
LOGGER.error("Can not extract message from SimplePigStats", e);
return "Can not extract message from SimpelPigStats," + ExceptionUtils.getStackTrace(e);
}
}
private static String extractFromTezPigStats(TezPigScriptStats stats) {
try {
Field userIdField = PigStats.class.getDeclaredField("userId");
userIdField.setAccessible(true);
String userId = (String) (userIdField.get(stats));
Field startTimeField = PigStats.class.getDeclaredField("startTime");
startTimeField.setAccessible(true);
long startTime = (Long) (startTimeField.get(stats));
Field endTimeField = PigStats.class.getDeclaredField("endTime");
endTimeField.setAccessible(true);
long endTime = (Long) (endTimeField.get(stats));
SimpleDateFormat sdf = new SimpleDateFormat(DATE_FORMAT);
StringBuilder sb = new StringBuilder();
sb.append("\n");
sb.append(String.format("%1$20s: %2$-100s%n", "HadoopVersion", stats.getHadoopVersion()));
sb.append(String.format("%1$20s: %2$-100s%n", "PigVersion", stats.getPigVersion()));
sb.append(String.format("%1$20s: %2$-100s%n", "TezVersion", TezExecType.getTezVersion()));
sb.append(String.format("%1$20s: %2$-100s%n", "UserId", userId));
sb.append(String.format("%1$20s: %2$-100s%n", "FileName", stats.getFileName()));
sb.append(String.format("%1$20s: %2$-100s%n", "StartedAt", sdf.format(new Date(startTime))));
sb.append(String.format("%1$20s: %2$-100s%n", "FinishedAt", sdf.format(new Date(endTime))));
sb.append(String.format("%1$20s: %2$-100s%n", "Features", stats.getFeatures()));
sb.append("\n");
if (stats.getReturnCode() == PigRunner.ReturnCode.SUCCESS) {
sb.append("Success!\n");
} else if (stats.getReturnCode() == PigRunner.ReturnCode.PARTIAL_FAILURE) {
sb.append("Some tasks have failed! Stop running all dependent tasks\n");
} else {
sb.append("Failed!\n");
}
sb.append("\n");
// Print diagnostic info in case of failure
if (stats.getReturnCode() == PigRunner.ReturnCode.FAILURE
|| stats.getReturnCode() == PigRunner.ReturnCode.PARTIAL_FAILURE) {
if (stats.getErrorMessage() != null) {
String[] lines = stats.getErrorMessage().split("\n");
for (int i = 0; i < lines.length; i++) {
String s = lines[i].trim();
if (i == 0 || !org.apache.commons.lang.StringUtils.isEmpty(s)) {
sb.append(String.format("%1$20s: %2$-100s%n", i == 0 ? "ErrorMessage" : "", s));
}
}
sb.append("\n");
}
}
Field tezDAGStatsMapField = TezPigScriptStats.class.getDeclaredField("tezDAGStatsMap");
tezDAGStatsMapField.setAccessible(true);
Map<String, TezDAGStats> tezDAGStatsMap =
(Map<String, TezDAGStats>) tezDAGStatsMapField.get(stats);
int count = 0;
for (TezDAGStats dagStats : tezDAGStatsMap.values()) {
sb.append("\n");
sb.append("DAG " + count++ + ":\n");
sb.append(dagStats.getDisplayString());
sb.append("\n");
}
sb.append("Input(s):\n");
for (InputStats is : stats.getInputStats()) {
sb.append(is.getDisplayString().trim()).append("\n");
}
sb.append("\n");
sb.append("Output(s):\n");
for (OutputStats os : stats.getOutputStats()) {
sb.append(os.getDisplayString().trim()).append("\n");
}
return "Script Statistics:\n" + sb.toString();
} catch (Exception e) {
LOGGER.error("Can not extract message from SimplePigStats", e);
return "Can not extract message from SimpelPigStats," + ExceptionUtils.getStackTrace(e);
}
}
public static List<String> extractJobIds(PigStats stat) {
if (stat instanceof SimplePigStats) {
return extractJobIdsFromSimplePigStats((SimplePigStats) stat);
} else if (stat instanceof TezPigScriptStats) {
return extractJobIdsFromTezPigStats((TezPigScriptStats) stat);
} else {
throw new RuntimeException("Unrecognized stats type:" + stat.getClass().getSimpleName());
}
}
public static List<String> extractJobIdsFromSimplePigStats(SimplePigStats stat) {
List<String> jobIds = new ArrayList<>();
try {
Field jobPlanField = PigStats.class.getDeclaredField("jobPlan");
jobPlanField.setAccessible(true);
PigStats.JobGraph jobPlan = (PigStats.JobGraph) jobPlanField.get(stat);
List<JobStats> arr = jobPlan.getJobList();
for (JobStats js : arr) {
jobIds.add(js.getJobId());
}
return jobIds;
} catch (Exception e) {
LOGGER.error("Can not extract jobIds from SimpelPigStats", e);
throw new RuntimeException("Can not extract jobIds from SimpelPigStats", e);
}
}
public static List<String> extractJobIdsFromTezPigStats(TezPigScriptStats stat) {
List<String> jobIds = new ArrayList<>();
try {
Field tezDAGStatsMapField = TezPigScriptStats.class.getDeclaredField("tezDAGStatsMap");
tezDAGStatsMapField.setAccessible(true);
Map<String, TezDAGStats> tezDAGStatsMap =
(Map<String, TezDAGStats>) tezDAGStatsMapField.get(stat);
for (TezDAGStats dagStats : tezDAGStatsMap.values()) {
LOGGER.debug("Tez JobId:" + dagStats.getJobId());
jobIds.add(dagStats.getJobId());
}
return jobIds;
} catch (Exception e) {
LOGGER.error("Can not extract jobIds from TezPigScriptStats", e);
throw new RuntimeException("Can not extract jobIds from TezPigScriptStats", e);
}
}
}

View file

@ -0,0 +1,48 @@
[
{
"group": "pig",
"name": "script",
"className": "org.apache.zeppelin.pig.PigInterpreter",
"properties": {
"zeppelin.pig.execType": {
"envName": null,
"propertyName": "zeppelin.pig.execType",
"defaultValue": "mapreduce",
"description": "local | mapreduce | tez"
},
"zeppelin.pig.includeJobStats": {
"envName": null,
"propertyName": "zeppelin.pig.includeJobStats",
"defaultValue": "false",
"description": "flag to include job stats in output"
}
},
"editor": {
"language": "pig",
"editOnDblClick": false
}
},
{
"group": "pig",
"name": "query",
"className": "org.apache.zeppelin.pig.PigQueryInterpreter",
"properties": {
"zeppelin.pig.execType": {
"envName": null,
"propertyName": "zeppelin.pig.execType",
"defaultValue": "mapreduce",
"description": "local | mapreduce | tez"
},
"zeppelin.pig.maxResult": {
"envName": null,
"propertyName": "zeppelin.pig.maxResult",
"defaultValue": "1000",
"description": "max row number for %pig.query"
}
},
"editor": {
"language": "pig",
"editOnDblClick": false
}
}
]

View file

@ -0,0 +1,155 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.pig;
import org.apache.commons.io.IOUtils;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.InterpreterResult.Code;
import org.apache.zeppelin.interpreter.InterpreterResult.Type;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Properties;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
public class PigInterpreterTest {
private PigInterpreter pigInterpreter;
private InterpreterContext context;
@Before
public void setUp() {
Properties properties = new Properties();
properties.put("zeppelin.pig.execType", "local");
pigInterpreter = new PigInterpreter(properties);
pigInterpreter.open();
context = new InterpreterContext(null, "paragraph_id", null, null, null, null, null, null, null,
null, null);
}
@After
public void tearDown() {
pigInterpreter.close();
}
@Test
public void testBasics() throws IOException {
String content = "1\tandy\n"
+ "2\tpeter\n";
File tmpFile = File.createTempFile("zeppelin", "test");
FileWriter writer = new FileWriter(tmpFile);
IOUtils.write(content, writer);
writer.close();
// simple pig script using dump
String pigscript = "a = load '" + tmpFile.getAbsolutePath() + "';"
+ "dump a;";
InterpreterResult result = pigInterpreter.interpret(pigscript, context);
assertEquals(Type.TEXT, result.type());
assertEquals(Code.SUCCESS, result.code());
assertTrue(result.message().contains("(1,andy)\n(2,peter)"));
// describe
pigscript = "a = load '" + tmpFile.getAbsolutePath() + "' as (id: int, name: bytearray);"
+ "describe a;";
result = pigInterpreter.interpret(pigscript, context);
assertEquals(Type.TEXT, result.type());
assertEquals(Code.SUCCESS, result.code());
assertTrue(result.message().contains("a: {id: int,name: bytearray}"));
// syntax error (compilation error)
pigscript = "a = loa '" + tmpFile.getAbsolutePath() + "';"
+ "describe a;";
result = pigInterpreter.interpret(pigscript, context);
assertEquals(Type.TEXT, result.type());
assertEquals(Code.ERROR, result.code());
assertTrue(result.message().contains("Syntax error, unexpected symbol at or near 'a'"));
// execution error
pigscript = "a = load 'invalid_path';"
+ "dump a;";
result = pigInterpreter.interpret(pigscript, context);
assertEquals(Type.TEXT, result.type());
assertEquals(Code.ERROR, result.code());
assertTrue(result.message().contains("Input path does not exist"));
}
@Test
public void testIncludeJobStats() throws IOException {
Properties properties = new Properties();
properties.put("zeppelin.pig.execType", "local");
properties.put("zeppelin.pig.includeJobStats", "true");
pigInterpreter = new PigInterpreter(properties);
pigInterpreter.open();
String content = "1\tandy\n"
+ "2\tpeter\n";
File tmpFile = File.createTempFile("zeppelin", "test");
FileWriter writer = new FileWriter(tmpFile);
IOUtils.write(content, writer);
writer.close();
// simple pig script using dump
String pigscript = "a = load '" + tmpFile.getAbsolutePath() + "';"
+ "dump a;";
InterpreterResult result = pigInterpreter.interpret(pigscript, context);
assertEquals(Type.TEXT, result.type());
assertEquals(Code.SUCCESS, result.code());
assertTrue(result.message().contains("Counters:"));
assertTrue(result.message().contains("(1,andy)\n(2,peter)"));
// describe
pigscript = "a = load '" + tmpFile.getAbsolutePath() + "' as (id: int, name: bytearray);"
+ "describe a;";
result = pigInterpreter.interpret(pigscript, context);
assertEquals(Type.TEXT, result.type());
assertEquals(Code.SUCCESS, result.code());
// no job is launched, so no jobStats
assertTrue(!result.message().contains("Counters:"));
assertTrue(result.message().contains("a: {id: int,name: bytearray}"));
// syntax error (compilation error)
pigscript = "a = loa '" + tmpFile.getAbsolutePath() + "';"
+ "describe a;";
result = pigInterpreter.interpret(pigscript, context);
assertEquals(Type.TEXT, result.type());
assertEquals(Code.ERROR, result.code());
// no job is launched, so no jobStats
assertTrue(!result.message().contains("Counters:"));
assertTrue(result.message().contains("Syntax error, unexpected symbol at or near 'a'"));
// execution error
pigscript = "a = load 'invalid_path';"
+ "dump a;";
result = pigInterpreter.interpret(pigscript, context);
assertEquals(Type.TEXT, result.type());
assertEquals(Code.ERROR, result.code());
assertTrue(result.message().contains("Counters:"));
assertTrue(result.message().contains("Input path does not exist"));
}
}

View file

@ -0,0 +1,153 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.pig;
import org.apache.commons.io.IOUtils;
import org.apache.zeppelin.interpreter.Interpreter;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterGroup;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.junit.After;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
/**
*
*/
public class PigQueryInterpreterTest {
private PigInterpreter pigInterpreter;
private PigQueryInterpreter pigQueryInterpreter;
private InterpreterContext context;
@Before
public void setUp() {
Properties properties = new Properties();
properties.put("zeppelin.pig.execType", "local");
properties.put("zeppelin.pig.maxResult", "20");
pigInterpreter = new PigInterpreter(properties);
pigQueryInterpreter = new PigQueryInterpreter(properties);
List<Interpreter> interpreters = new ArrayList();
interpreters.add(pigInterpreter);
interpreters.add(pigQueryInterpreter);
InterpreterGroup group = new InterpreterGroup();
group.put("note_id", interpreters);
pigInterpreter.setInterpreterGroup(group);
pigQueryInterpreter.setInterpreterGroup(group);
pigInterpreter.open();
pigQueryInterpreter.open();
context = new InterpreterContext(null, "paragraph_id", null, null, null, null, null, null, null,
null, null);
}
@After
public void tearDown() {
pigInterpreter.close();
pigQueryInterpreter.close();
}
@Test
public void testBasics() throws IOException {
String content = "andy\tmale\t10\n"
+ "peter\tmale\t20\n"
+ "amy\tfemale\t14\n";
File tmpFile = File.createTempFile("zeppelin", "test");
FileWriter writer = new FileWriter(tmpFile);
IOUtils.write(content, writer);
writer.close();
// run script in PigInterpreter
String pigscript = "a = load '" + tmpFile.getAbsolutePath() + "' as (name, gender, age);\n"
+ "a2 = load 'invalid_path' as (name, gender, age);\n"
+ "dump a;";
InterpreterResult result = pigInterpreter.interpret(pigscript, context);
assertEquals(InterpreterResult.Type.TEXT, result.type());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertTrue(result.message().contains("(andy,male,10)\n(peter,male,20)\n(amy,female,14)"));
// run single line query in PigQueryInterpreter
String query = "foreach a generate name, age;";
result = pigQueryInterpreter.interpret(query, context);
assertEquals(InterpreterResult.Type.TABLE, result.type());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals("name\tage\nandy\t10\npeter\t20\namy\t14\n", result.message());
// run multiple line query in PigQueryInterpreter
query = "b = group a by gender;\nforeach b generate group as gender, COUNT($1) as count;";
result = pigQueryInterpreter.interpret(query, context);
assertEquals(InterpreterResult.Type.TABLE, result.type());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals("gender\tcount\nmale\t2\nfemale\t1\n", result.message());
// syntax error in PigQueryInterpereter
query = "b = group a by invalid_column;\nforeach b generate group as gender, COUNT($1) as count;";
result = pigQueryInterpreter.interpret(query, context);
assertEquals(InterpreterResult.Type.TEXT, result.type());
assertEquals(InterpreterResult.Code.ERROR, result.code());
assertTrue(result.message().contains("Projected field [invalid_column] does not exist in schema"));
// execution error in PigQueryInterpreter
query = "foreach a2 generate name, age;";
result = pigQueryInterpreter.interpret(query, context);
assertEquals(InterpreterResult.Type.TEXT, result.type());
assertEquals(InterpreterResult.Code.ERROR, result.code());
assertTrue(result.message().contains("Input path does not exist"));
}
@Test
public void testMaxResult() throws IOException {
StringBuilder content = new StringBuilder();
for (int i=0;i<30;++i) {
content.append(i + "\tname_" + i + "\n");
}
File tmpFile = File.createTempFile("zeppelin", "test");
FileWriter writer = new FileWriter(tmpFile);
IOUtils.write(content, writer);
writer.close();
// run script in PigInterpreter
String pigscript = "a = load '" + tmpFile.getAbsolutePath() + "' as (id, name);";
InterpreterResult result = pigInterpreter.interpret(pigscript, context);
assertEquals(InterpreterResult.Type.TEXT, result.type());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
// empty output
assertTrue(result.message().isEmpty());
// run single line query in PigQueryInterpreter
String query = "foreach a generate id;";
result = pigQueryInterpreter.interpret(query, context);
assertEquals(InterpreterResult.Type.TABLE, result.type());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertTrue(result.message().contains("id\n0\n1\n2"));
assertTrue(result.message().contains("Results are limited by 20"));
}
}

View file

@ -0,0 +1,22 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
log4j.rootLogger = INFO, stdout
log4j.appender.stdout = org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout = org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%5p [%d] ({%t} %F[%M]:%L) - %m%n

View file

@ -62,6 +62,7 @@
<module>shell</module>
<module>livy</module>
<module>hbase</module>
<module>pig</module>
<module>postgresql</module>
<module>jdbc</module>
<module>file</module>
@ -459,6 +460,9 @@
<fileset>
<directory>interpreter</directory>
<followSymlinks>false</followSymlinks>
<excludes>
<exclude>lib/**</exclude>
</excludes>
</fileset>
</filesets>
</configuration>

View file

@ -324,7 +324,7 @@ public class PostgreSqlInterpreter extends Interpreter {
@Override
public List<InterpreterCompletion> completion(String buf, int cursor) {
List<CharSequence> candidates = new ArrayList<CharSequence>();
List<CharSequence> candidates = new ArrayList<>();
if (sqlCompleter != null && sqlCompleter.complete(buf, cursor, candidates) >= 0) {
List completion = Lists.transform(candidates, sequenceToStringTransformer);
return completion;

View file

@ -65,7 +65,7 @@ public class SqlCompleter extends StringsCompleter {
}
};
private Set<String> modelCompletions = new HashSet<String>();
private Set<String> modelCompletions = new HashSet<>();
public SqlCompleter(Set<String> allCompletions, Set<String> dataModelCompletions) {
super(allCompletions);
@ -150,7 +150,7 @@ public class SqlCompleter extends StringsCompleter {
keywords += "," + driverKeywords.toUpperCase();
}
Set<String> completions = new TreeSet<String>();
Set<String> completions = new TreeSet<>();
// Add the keywords from the current JDBC connection
@ -193,7 +193,7 @@ public class SqlCompleter extends StringsCompleter {
public static Set<String> getDataModelMetadataCompletions(Connection connection)
throws SQLException {
Set<String> completions = new TreeSet<String>();
Set<String> completions = new TreeSet<>();
getColumnNames(connection.getMetaData(), completions);
getSchemaNames(connection.getMetaData(), completions);
return completions;

View file

@ -39,7 +39,7 @@ public class SqlCompleterTest extends BasicJDBCTestCaseAdapter {
private Logger logger = LoggerFactory.getLogger(SqlCompleterTest.class);
private final static Set<String> EMPTY = new HashSet<String>();
private final static Set<String> EMPTY = new HashSet<>();
private CompleterTester tester;
@ -157,7 +157,7 @@ public class SqlCompleterTest extends BasicJDBCTestCaseAdapter {
private void expectedCompletions(String buffer, int cursor, Set<String> expected) {
ArrayList<CharSequence> candidates = new ArrayList<CharSequence>();
ArrayList<CharSequence> candidates = new ArrayList<>();
completer.complete(buffer, cursor, candidates);

View file

@ -47,6 +47,6 @@ mvn -Dpython.test.exclude='' test -pl python -am
* JavaBuilder can't send SIGINT signal to interrupt paragraph execution. Therefore interpreter directly send a `kill SIGINT PID` to python process to interrupt execution. Python process catch SIGINT signal with some code defined in bootstrap.py
* Matplotlib display feature is made with SVG export (in string) and then displays it with html code.
* Matplotlib figures are displayed inline with the notebook automatically using a built-in backend for zeppelin in conjunction with a post-execute hook.
* `%python.sql` support for Pandas DataFrames is optional and provided using https://github.com/yhat/pandasql if user have one installed
* `%python.sql` support for Pandas DataFrames is optional and provided using https://github.com/yhat/pandasql if user have one installed

View file

@ -36,7 +36,8 @@
<py4j.version>0.9.2</py4j.version>
<python.test.exclude>
**/PythonInterpreterWithPythonInstalledTest.java,
**/PythonInterpreterPandasSqlTest.java
**/PythonInterpreterPandasSqlTest.java,
**/PythonInterpreterMatplotlibTest.java
</python.test.exclude>
</properties>

View file

@ -32,6 +32,8 @@ import org.apache.zeppelin.interpreter.Interpreter;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.InterpreterResult.Code;
import org.apache.zeppelin.interpreter.InterpreterHookRegistry.HookType;
import org.apache.zeppelin.interpreter.InterpreterGroup;
import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion;
import org.apache.zeppelin.scheduler.Job;
import org.apache.zeppelin.scheduler.Scheduler;
@ -68,7 +70,13 @@ public class PythonInterpreter extends Interpreter {
@Override
public void open() {
LOG.info("Starting Python interpreter .....");
// Add matplotlib display hook
InterpreterGroup intpGroup = getInterpreterGroup();
if (intpGroup != null && intpGroup.getInterpreterHookRegistry() != null) {
registerHook(HookType.POST_EXEC_DEV, "z._displayhook()");
}
LOG.info("Starting Python interpreter ---->");
LOG.info("Python path is set to:" + property.getProperty(ZEPPELIN_PYTHON));
maxResult = Integer.valueOf(getProperty(MAX_RESULT));
@ -111,7 +119,7 @@ public class PythonInterpreter extends Interpreter {
@Override
public void close() {
LOG.info("closing Python interpreter .....");
LOG.info("closing Python interpreter <----");
try {
if (process != null) {
process.close();
@ -134,11 +142,9 @@ public class PythonInterpreter extends Interpreter {
InterpreterResult result;
if (pythonErrorIn(output)) {
result = new InterpreterResult(Code.ERROR, output);
result = new InterpreterResult(Code.ERROR, output.replaceAll("\\.\\.\\.", ""));
} else {
// TODO(zjffdu), we should not do string replacement operation in the result, as it is
// possible that the output contains the kind of pattern itself, e.g. print("...")
result = new InterpreterResult(Code.SUCCESS, output.replaceAll("\\.\\.\\.", ""));
result = new InterpreterResult(Code.SUCCESS, output);
}
return result;
}

View file

@ -91,11 +91,6 @@ public class PythonProcess {
String line = null;
while (!(line = reader.readLine()).contains(STATEMENT_END)) {
logger.debug("Read line from python shell : " + line);
if (line.equals("...")) {
logger.warn("Syntax error ! ");
output.append("Syntax error ! ");
break;
}
output.append(line + "\n");
}
return output.toString();

View file

@ -16,10 +16,11 @@
# PYTHON 2 / 3 compatibility :
# bootstrap.py must be runnable with Python 2 or 3
# Remove interactive mode displayhook
import os
import sys
import signal
import base64
import warnings
from io import BytesIO
try:
from StringIO import StringIO
@ -117,6 +118,7 @@ class PyZeppelinContext(object):
def __init__(self):
self.max_result = 1000
self._displayhook = lambda *args: None
def input(self, name, defaultValue=""):
print(self.errorMsg)
@ -164,7 +166,7 @@ class PyZeppelinContext(object):
#)
body_buf.close(); header_buf.close()
def show_matplotlib(self, p, fmt="png", width="auto", height="auto",
def show_matplotlib(self, p, fmt="png", width="auto", height="auto",
**kwargs):
"""Matplotlib show function
"""
@ -187,6 +189,39 @@ class PyZeppelinContext(object):
html = "%html <div style='width:{width};height:{height}'>{img}<div>"
print(html.format(width=width, height=height, img=img_str))
img.close()
def configure_mpl(self, **kwargs):
import mpl_config
mpl_config.configure(**kwargs)
def _setup_matplotlib(self):
# If we don't have matplotlib installed don't bother continuing
try:
import matplotlib
except ImportError:
pass
# Make sure custom backends are available in the PYTHONPATH
rootdir = os.environ.get('ZEPPELIN_HOME', os.getcwd())
mpl_path = os.path.join(rootdir, 'interpreter', 'lib', 'python')
if mpl_path not in sys.path:
sys.path.append(mpl_path)
# Finally check if backend exists, and if so configure as appropriate
try:
matplotlib.use('module://backend_zinline')
import backend_zinline
# Everything looks good so make config assuming that we are using
# an inline backend
self._displayhook = backend_zinline.displayhook
self.configure_mpl(width=600, height=400, dpi=72,
fontsize=10, interactive=True, format='png')
except ImportError:
# Fall back to Agg if no custom backend installed
matplotlib.use('Agg')
warnings.warn("Unable to load inline matplotlib backend, "
"falling back to Agg")
z = PyZeppelinContext()
z._setup_matplotlib()

View file

@ -18,13 +18,19 @@
}
},
"editor": {
"language": "python"
"language": "python",
"editOnDblClick": false
}
},
{
"group": "python",
"name": "sql",
"className": "org.apache.zeppelin.python.PythonInterpreterPandasSql",
"properties": { }
"properties": {
},
"editor":{
"language": "sql",
"editOnDblClick": false
}
}
]

View file

@ -0,0 +1,169 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.python;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import java.util.*;
import org.apache.zeppelin.display.AngularObjectRegistry;
import org.apache.zeppelin.display.GUI;
import org.apache.zeppelin.interpreter.Interpreter;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterContextRunner;
import org.apache.zeppelin.interpreter.InterpreterGroup;
import org.apache.zeppelin.interpreter.InterpreterOutput;
import org.apache.zeppelin.interpreter.InterpreterOutputListener;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.InterpreterResult.Type;
import org.apache.zeppelin.user.AuthenticationInfo;
import org.junit.Before;
import org.junit.Test;
/**
* In order for this test to work, test env must have installed:
* <ol>
* - <li>Python</li>
* - <li>Matplotlib</li>
* <ol>
*
* Your PYTHONPATH should also contain the directory of the Matplotlib
* backend files. Usually these can be found in $ZEPPELIN_HOME/interpreter/lib/python.
*
* To run manually on such environment, use:
* <code>
* mvn -Dpython.test.exclude='' test -pl python -am
* </code>
*/
public class PythonInterpreterMatplotlibTest {
private InterpreterGroup intpGroup;
private PythonInterpreter python;
private InterpreterContext context;
@Before
public void setUp() throws Exception {
Properties p = new Properties();
p.setProperty("zeppelin.python", "python");
p.setProperty("zeppelin.python.maxResult", "100");
intpGroup = new InterpreterGroup();
python = new PythonInterpreter(p);
python.setInterpreterGroup(intpGroup);
python.open();
List<Interpreter> interpreters = new LinkedList<>();
interpreters.add(python);
intpGroup.put("note", interpreters);
context = new InterpreterContext("note", "id", "title", "text", new AuthenticationInfo(),
new HashMap<String, Object>(), new GUI(),
new AngularObjectRegistry(intpGroup.getId(), null), null,
new LinkedList<InterpreterContextRunner>(), new InterpreterOutput(
new InterpreterOutputListener() {
@Override public void onAppend(InterpreterOutput out, byte[] line) {}
@Override public void onUpdate(InterpreterOutput out, byte[] output) {}
}));
}
@Test
public void dependenciesAreInstalled() {
// matplotlib
InterpreterResult ret = python.interpret("import matplotlib", context);
assertEquals(ret.message(), InterpreterResult.Code.SUCCESS, ret.code());
// inline backend
ret = python.interpret("import backend_zinline", context);
assertEquals(ret.message(), InterpreterResult.Code.SUCCESS, ret.code());
}
@Test
public void showPlot() {
// Simple plot test
InterpreterResult ret;
ret = python.interpret("import matplotlib.pyplot as plt", context);
ret = python.interpret("z.configure_mpl(interactive=False)", context);
ret = python.interpret("plt.plot([1, 2, 3])", context);
ret = python.interpret("plt.show()", context);
assertEquals(ret.message(), InterpreterResult.Code.SUCCESS, ret.code());
assertEquals(ret.message(), Type.HTML, ret.type());
assertTrue(ret.message().contains("data:image/png;base64"));
assertTrue(ret.message().contains("<div>"));
}
@Test
// Test for when configuration is set to auto-close figures after show().
public void testClose() {
InterpreterResult ret;
InterpreterResult ret1;
InterpreterResult ret2;
ret = python.interpret("import matplotlib.pyplot as plt", context);
ret = python.interpret("z.configure_mpl(interactive=False)", context);
ret = python.interpret("plt.plot([1, 2, 3])", context);
ret1 = python.interpret("plt.show()", context);
// Second call to show() should print nothing, and Type should be TEXT.
// This is because when close=True, there should be no living instances
// of FigureManager, causing show() to return before setting the output
// type to HTML.
ret = python.interpret("plt.show()", context);
assertEquals(ret.message(), InterpreterResult.Code.SUCCESS, ret.code());
assertEquals(ret.message(), Type.TEXT, ret.type());
assertTrue(ret.message().equals(""));
// Now test that new plot is drawn. It should be identical to the
// previous one.
ret = python.interpret("plt.plot([1, 2, 3])", context);
ret2 = python.interpret("plt.show()", context);
assertTrue(ret1.message().equals(ret2.message()));
}
@Test
// Test for when configuration is set to not auto-close figures after show().
public void testNoClose() {
InterpreterResult ret;
InterpreterResult ret1;
InterpreterResult ret2;
ret = python.interpret("import matplotlib.pyplot as plt", context);
ret = python.interpret("z.configure_mpl(interactive=False, close=False)", context);
ret = python.interpret("plt.plot([1, 2, 3])", context);
ret1 = python.interpret("plt.show()", context);
// Second call to show() should print nothing, and Type should be HTML.
// This is because when close=False, there should be living instances
// of FigureManager, causing show() to set the output
// type to HTML even though the figure is inactive.
ret = python.interpret("plt.show()", context);
assertEquals(ret.message(), InterpreterResult.Code.SUCCESS, ret.code());
assertEquals(ret.message(), Type.HTML, ret.type());
assertTrue(ret.message().equals(""));
// Now test that plot can be reshown if it is updated. It should be
// different from the previous one because it will plot the same line
// again but in a different color.
ret = python.interpret("plt.plot([1, 2, 3])", context);
ret2 = python.interpret("plt.show()", context);
assertTrue(!ret1.message().equals(ret2.message()));
}
}

View file

@ -56,6 +56,8 @@ public class PythonInterpreterWithPythonInstalledTest {
//System.out.println("\nInterpreter response: \n" + ret.message());
assertEquals(InterpreterResult.Code.ERROR, ret.code());
assertTrue(ret.message().length() > 0);
realPython.close();
}
@Test
@ -73,6 +75,36 @@ public class PythonInterpreterWithPythonInstalledTest {
//System.out.println("\nInterpreter response: \n" + ret.message());
assertEquals(InterpreterResult.Code.SUCCESS, ret.code());
assertTrue(ret.message().length() > 0);
realPython.close();
}
@Test
public void testZeppelin1555() {
//given
PythonInterpreter realPython = new PythonInterpreter(
PythonInterpreterTest.getPythonTestProperties());
realPython.open();
//when
InterpreterResult ret1 = realPython.interpret("print \"...\"", null);
//then
//System.out.println("\nInterpreter response: \n" + ret.message());
assertEquals(InterpreterResult.Code.SUCCESS, ret1.code());
assertEquals("...\n", ret1.message());
InterpreterResult ret2 = realPython.interpret("for i in range(5):", null);
//then
//System.out.println("\nInterpreterResultterpreter response: \n" + ret2.message());
assertEquals(InterpreterResult.Code.ERROR, ret2.code());
assertEquals(" File \"<stdin>\", line 2\n" +
" \n" +
" ^\n" +
"IndentationError: expected an indented block\n", ret2.message());
realPython.close();
}
}

View file

@ -140,7 +140,7 @@ import matplotlib.pyplot as plt
import numpy as np
import StringIO
# clear out any previous plots on this notebook
# clear out any previous plots on this note
plt.clf()
def show(p):

View file

@ -60,7 +60,7 @@ public class ShellInterpreter extends Interpreter {
@Override
public void open() {
LOGGER.info("Command timeout property: {}", getProperty(TIMEOUT_PROPERTY));
executors = new ConcurrentHashMap<String, DefaultExecutor>();
executors = new ConcurrentHashMap<>();
if (!StringUtils.isAnyEmpty(getProperty("zeppelin.shell.auth.type"))) {
ShellSecurityImpl.createSecureConfiguration(getProperty(), shell);
}

View file

@ -30,7 +30,8 @@
}
},
"editor": {
"language": "sh"
"language": "sh",
"editOnDblClick": false
}
}
]

View file

@ -525,9 +525,9 @@
<activeByDefault>true</activeByDefault>
</activation>
<properties>
<spark.version>2.0.0</spark.version>
<spark.version>2.0.1</spark.version>
<protobuf.version>2.5.0</protobuf.version>
<py4j.version>0.10.1</py4j.version>
<py4j.version>0.10.3</py4j.version>
<scala.version>2.11.8</scala.version>
</properties>
</profile>

View file

@ -37,7 +37,7 @@
<jsoup.version>1.8.2</jsoup.version>
<mockito.version>1.10.19</mockito.version>
<powermock.version>1.6.4</powermock.version>
<spark.version>2.0.0</spark.version>
<spark.version>2.0.1</spark.version>
</properties>
<dependencies>
@ -450,38 +450,6 @@
</build>
<profiles>
<profile>
<id>spark-1.1</id>
<dependencies>
</dependencies>
<properties>
<spark.version>1.1.1</spark.version>
<akka.version>2.2.3-shaded-protobuf</akka.version>
</properties>
</profile>
<profile>
<id>spark-1.2</id>
<dependencies>
</dependencies>
<properties>
<spark.version>1.2.1</spark.version>
</properties>
</profile>
<profile>
<id>spark-1.3</id>
<properties>
<spark.version>1.3.1</spark.version>
</properties>
<dependencies>
</dependencies>
</profile>
<profile>
<id>spark-1.4</id>
<properties>
@ -519,9 +487,9 @@
<activeByDefault>true</activeByDefault>
</activation>
<properties>
<spark.version>2.0.0</spark.version>
<spark.version>2.0.1</spark.version>
<protobuf.version>2.5.0</protobuf.version>
<py4j.version>0.10.1</py4j.version>
<py4j.version>0.10.3</py4j.version>
<scala.version>2.11.8</scala.version>
</properties>
</profile>

View file

@ -153,7 +153,7 @@ public class DepInterpreter extends Interpreter {
settings.scala$tools$nsc$settings$ScalaSettings$_setter_$classpath_$eq(pathSettings);
// set classloader for scala compiler
settings.explicitParentLoader_$eq(new Some<ClassLoader>(Thread.currentThread()
settings.explicitParentLoader_$eq(new Some<>(Thread.currentThread()
.getContextClassLoader()));
BooleanSetting b = (BooleanSetting) settings.usejavacp();
@ -219,7 +219,7 @@ public class DepInterpreter extends Interpreter {
public Object getLastObject() {
IMain.Request r = (IMain.Request) Utils.invokeMethod(intp, "lastRequest");
Object obj = r.lineRep().call("$result",
JavaConversions.asScalaBuffer(new LinkedList<Object>()));
JavaConversions.asScalaBuffer(new LinkedList<>()));
return obj;
}
@ -290,7 +290,7 @@ public class DepInterpreter extends Interpreter {
Candidates ret = c.complete(buf, cursor);
List<String> candidates = WrapAsJava$.MODULE$.seqAsJavaList(ret.candidates());
List<InterpreterCompletion> completions = new LinkedList<InterpreterCompletion>();
List<InterpreterCompletion> completions = new LinkedList<>();
for (String candidate : candidates) {
completions.add(new InterpreterCompletion(candidate, candidate));
@ -298,7 +298,7 @@ public class DepInterpreter extends Interpreter {
return completions;
} else {
return new LinkedList<InterpreterCompletion>();
return new LinkedList<>();
}
}
@ -314,7 +314,7 @@ public class DepInterpreter extends Interpreter {
}
private List<File> classPath(ClassLoader cl) {
List<File> paths = new LinkedList<File>();
List<File> paths = new LinkedList<>();
if (cl == null) {
return paths;
}

View file

@ -48,10 +48,12 @@ import org.apache.spark.sql.SQLContext;
import org.apache.zeppelin.interpreter.Interpreter;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterException;
import org.apache.zeppelin.interpreter.InterpreterGroup;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.InterpreterResult.Code;
import org.apache.zeppelin.interpreter.InterpreterHookRegistry.HookType;
import org.apache.zeppelin.interpreter.LazyOpenInterpreter;
import org.apache.zeppelin.interpreter.WrappedInterpreter;
import org.apache.zeppelin.interpreter.InterpreterResult.Code;
import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion;
import org.apache.zeppelin.spark.dep.SparkDependencyContext;
import org.slf4j.Logger;
@ -111,11 +113,16 @@ public class PySparkInterpreter extends Interpreter implements ExecuteResultHand
@Override
public void open() {
// Add matplotlib display hook
InterpreterGroup intpGroup = getInterpreterGroup();
if (intpGroup != null && intpGroup.getInterpreterHookRegistry() != null) {
registerHook(HookType.POST_EXEC_DEV, "z._displayhook()");
}
DepInterpreter depInterpreter = getDepInterpreter();
// load libraries from Dependency Interpreter
URL [] urls = new URL[0];
List<URL> urlList = new LinkedList<URL>();
List<URL> urlList = new LinkedList<>();
if (depInterpreter != null) {
SparkDependencyContext depc = depInterpreter.getDependencyContext();
@ -165,6 +172,15 @@ public class PySparkInterpreter extends Interpreter implements ExecuteResultHand
}
}
private Map setupPySparkEnv() throws IOException{
Map env = EnvironmentUtils.getProcEnvironment();
if (!env.containsKey("PYTHONPATH")) {
SparkConf conf = getSparkConf();
env.put("PYTHONPATH", conf.get("spark.submit.pyFiles").replaceAll(",", ":"));
}
return env;
}
private void createGatewayServerAndStartScript() {
// create python script
createPythonScript();
@ -196,10 +212,8 @@ public class PySparkInterpreter extends Interpreter implements ExecuteResultHand
executor.setStreamHandler(streamHandler);
executor.setWatchdog(new ExecuteWatchdog(ExecuteWatchdog.INFINITE_TIMEOUT));
try {
Map env = EnvironmentUtils.getProcEnvironment();
Map env = setupPySparkEnv();
executor.execute(cmd, env, this);
pythonscriptRunning = true;
} catch (IOException e) {

View file

@ -49,6 +49,7 @@ import org.apache.spark.ui.jobs.JobProgressListener;
import org.apache.zeppelin.interpreter.Interpreter;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterException;
import org.apache.zeppelin.interpreter.InterpreterHookRegistry;
import org.apache.zeppelin.interpreter.InterpreterProperty;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.InterpreterResult.Code;
@ -101,6 +102,7 @@ public class SparkInterpreter extends Interpreter {
private SparkConf conf;
private static SparkContext sc;
private static SQLContext sqlc;
private static InterpreterHookRegistry hooks;
private static SparkEnv env;
private static Object sparkSession; // spark 2.x
private static JobProgressListener sparkListener;
@ -479,7 +481,7 @@ public class SparkInterpreter extends Interpreter {
//Only one of py4j-0.9-src.zip and py4j-0.8.2.1-src.zip should exist
String[] pythonLibs = new String[]{"pyspark.zip", "py4j-0.9-src.zip", "py4j-0.8.2.1-src.zip",
"py4j-0.10.1-src.zip"};
"py4j-0.10.1-src.zip", "py4j-0.10.3-src.zip"};
ArrayList<String> pythonLibUris = new ArrayList<>();
for (String lib : pythonLibs) {
File libFile = new File(pysparkPath, lib);
@ -502,6 +504,7 @@ public class SparkInterpreter extends Interpreter {
conf.set("spark.files", conf.get("spark.yarn.dist.files"));
}
conf.set("spark.submit.pyArchives", Joiner.on(":").join(pythonLibs));
conf.set("spark.submit.pyFiles", Joiner.on(",").join(pythonLibUris));
}
// Distributes needed libraries to workers
@ -594,7 +597,7 @@ public class SparkInterpreter extends Interpreter {
}
String[] argsArray = args.split(" ");
LinkedList<String> argList = new LinkedList<String>();
LinkedList<String> argList = new LinkedList<>();
for (String arg : argsArray) {
argList.add(arg);
}
@ -717,7 +720,7 @@ public class SparkInterpreter extends Interpreter {
// set classloader for scala compiler
settings.explicitParentLoader_$eq(new Some<ClassLoader>(Thread.currentThread()
settings.explicitParentLoader_$eq(new Some<>(Thread.currentThread()
.getContextClassLoader()));
BooleanSetting b = (BooleanSetting) settings.usejavacp();
b.v_$eq(true);
@ -813,8 +816,10 @@ public class SparkInterpreter extends Interpreter {
sqlc = getSQLContext();
dep = getDependencyResolver();
hooks = getInterpreterGroup().getInterpreterHookRegistry();
z = new ZeppelinContext(sc, sqlc, null, dep,
z = new ZeppelinContext(sc, sqlc, null, dep, hooks,
Integer.parseInt(getProperty("zeppelin.spark.maxResult")));
interpret("@transient val _binder = new java.util.HashMap[String, Object]()");
@ -953,7 +958,7 @@ public class SparkInterpreter extends Interpreter {
}
private List<File> classPath(ClassLoader cl) {
List<File> paths = new LinkedList<File>();
List<File> paths = new LinkedList<>();
if (cl == null) {
return paths;
}
@ -974,7 +979,7 @@ public class SparkInterpreter extends Interpreter {
public List<InterpreterCompletion> completion(String buf, int cursor) {
if (completer == null) {
logger.warn("Can't find completer");
return new LinkedList<InterpreterCompletion>();
return new LinkedList<>();
}
if (buf.length() < cursor) {
@ -990,7 +995,7 @@ public class SparkInterpreter extends Interpreter {
Candidates ret = c.complete(completionText, cursor);
List<String> candidates = WrapAsJava$.MODULE$.seqAsJavaList(ret.candidates());
List<InterpreterCompletion> completions = new LinkedList<InterpreterCompletion>();
List<InterpreterCompletion> completions = new LinkedList<>();
for (String candidate : candidates) {
completions.add(new InterpreterCompletion(candidate, candidate));
@ -1063,7 +1068,7 @@ public class SparkInterpreter extends Interpreter {
return null;
}
Object obj = r.lineRep().call("$result",
JavaConversions.asScalaBuffer(new LinkedList<Object>()));
JavaConversions.asScalaBuffer(new LinkedList<>()));
return obj;
}

View file

@ -28,11 +28,14 @@ import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.HashMap;
import org.apache.spark.SparkContext;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.sql.catalyst.expressions.Attribute;
import org.apache.zeppelin.annotation.ZeppelinApi;
import org.apache.zeppelin.annotation.Experimental;
import org.apache.zeppelin.display.AngularObject;
import org.apache.zeppelin.display.AngularObjectRegistry;
import org.apache.zeppelin.display.AngularObjectWatcher;
@ -41,6 +44,7 @@ import org.apache.zeppelin.display.Input.ParamOption;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterContextRunner;
import org.apache.zeppelin.interpreter.InterpreterException;
import org.apache.zeppelin.interpreter.InterpreterHookRegistry;
import org.apache.zeppelin.spark.dep.SparkDependencyResolver;
import org.apache.zeppelin.resource.Resource;
import org.apache.zeppelin.resource.ResourcePool;
@ -53,19 +57,33 @@ import scala.Unit;
* Spark context for zeppelin.
*/
public class ZeppelinContext {
// Map interpreter class name (to be used by hook registry) from
// given replName in parapgraph
private static final Map<String, String> interpreterClassMap;
static {
interpreterClassMap = new HashMap<>();
interpreterClassMap.put("spark", "org.apache.zeppelin.spark.SparkInterpreter");
interpreterClassMap.put("sql", "org.apache.zeppelin.spark.SparkSqlInterpreter");
interpreterClassMap.put("dep", "org.apache.zeppelin.spark.DepInterpreter");
interpreterClassMap.put("pyspark", "org.apache.zeppelin.spark.PySparkInterpreter");
}
private SparkDependencyResolver dep;
private InterpreterContext interpreterContext;
private int maxResult;
private List<Class> supportedClasses;
private InterpreterHookRegistry hooks;
public ZeppelinContext(SparkContext sc, SQLContext sql,
InterpreterContext interpreterContext,
SparkDependencyResolver dep,
InterpreterHookRegistry hooks,
int maxResult) {
this.sc = sc;
this.sqlContext = sql;
this.interpreterContext = interpreterContext;
this.dep = dep;
this.hooks = hooks;
this.maxResult = maxResult;
this.supportedClasses = new ArrayList<>();
try {
@ -116,7 +134,7 @@ public class ZeppelinContext {
@ZeppelinApi
public scala.collection.Iterable<Object> checkbox(String name,
scala.collection.Iterable<Tuple2<Object, String>> options) {
List<Object> allChecked = new LinkedList<Object>();
List<Object> allChecked = new LinkedList<>();
for (Tuple2<Object, String> option : asJavaIterable(options)) {
allChecked.add(option._1());
}
@ -382,7 +400,7 @@ public class ZeppelinContext {
@ZeppelinApi
public List<String> listParagraphs() {
List<String> paragraphs = new LinkedList<String>();
List<String> paragraphs = new LinkedList<>();
for (InterpreterContextRunner r : interpreterContext.getRunners()) {
paragraphs.add(r.getParagraphId());
@ -697,6 +715,90 @@ public class ZeppelinContext {
registry.remove(name, noteId, null);
}
/**
* Get the interpreter class name from name entered in paragraph
* @param replName if replName is a valid className, return that instead.
*/
public String getClassNameFromReplName(String replName) {
for (String name : interpreterClassMap.values()) {
if (replName.equals(name)) {
return replName;
}
}
if (replName.contains("spark.")) {
replName = replName.replace("spark.", "");
}
return interpreterClassMap.get(replName);
}
/**
* General function to register hook event
* @param event The type of event to hook to (pre_exec, post_exec)
* @param cmd The code to be executed by the interpreter on given event
* @param replName Name of the interpreter
*/
@Experimental
public void registerHook(String event, String cmd, String replName) {
String noteId = interpreterContext.getNoteId();
String className = getClassNameFromReplName(replName);
hooks.register(noteId, className, event, cmd);
}
/**
* registerHook() wrapper for current repl
* @param event The type of event to hook to (pre_exec, post_exec)
* @param cmd The code to be executed by the interpreter on given event
*/
@Experimental
public void registerHook(String event, String cmd) {
String className = interpreterContext.getClassName();
registerHook(event, cmd, className);
}
/**
* Get the hook code
* @param event The type of event to hook to (pre_exec, post_exec)
* @param replName Name of the interpreter
*/
@Experimental
public String getHook(String event, String replName) {
String noteId = interpreterContext.getNoteId();
String className = getClassNameFromReplName(replName);
return hooks.get(noteId, className, event);
}
/**
* getHook() wrapper for current repl
* @param event The type of event to hook to (pre_exec, post_exec)
*/
@Experimental
public String getHook(String event) {
String className = interpreterContext.getClassName();
return getHook(event, className);
}
/**
* Unbind code from given hook event
* @param event The type of event to hook to (pre_exec, post_exec)
* @param replName Name of the interpreter
*/
@Experimental
public void unregisterHook(String event, String replName) {
String noteId = interpreterContext.getNoteId();
String className = getClassNameFromReplName(replName);
hooks.unregister(noteId, className, event);
}
/**
* unregisterHook() wrapper for current repl
* @param event The type of event to hook to (pre_exec, post_exec)
*/
@Experimental
public void unregisterHook(String event) {
String className = interpreterContext.getClassName();
unregisterHook(event, className);
}
/**
* Add object into resource pool

View file

@ -141,6 +141,9 @@ public class ZeppelinR implements ExecuteResultHandler {
cmd.addArgument(Integer.toString(port));
cmd.addArgument(libPath);
cmd.addArgument(Integer.toString(sparkVersion.toNumber()));
// dump out the R command to facilitate manually running it, e.g. for fault diagnosis purposes
logger.debug(cmd.toString());
executor = new DefaultExecutor();
outputStream = new SparkOutputStream(logger);

View file

@ -49,16 +49,16 @@ import scala.Console;
*
*/
public class SparkDependencyContext {
List<Dependency> dependencies = new LinkedList<Dependency>();
List<Repository> repositories = new LinkedList<Repository>();
List<Dependency> dependencies = new LinkedList<>();
List<Repository> repositories = new LinkedList<>();
List<File> files = new LinkedList<File>();
List<File> filesDist = new LinkedList<File>();
List<File> files = new LinkedList<>();
List<File> filesDist = new LinkedList<>();
private RepositorySystem system = Booter.newRepositorySystem();
private RepositorySystemSession session;
private RemoteRepository mavenCentral = Booter.newCentralRepository();
private RemoteRepository mavenLocal = Booter.newLocalRepository();
private List<RemoteRepository> additionalRepos = new LinkedList<RemoteRepository>();
private List<RemoteRepository> additionalRepos = new LinkedList<>();
public SparkDependencyContext(String localRepoPath, String additionalRemoteRepository) {
session = Booter.newRepositorySystemSession(system, localRepoPath);
@ -88,11 +88,11 @@ public class SparkDependencyContext {
public void reset() {
Console.println("DepInterpreter(%dep) deprecated. "
+ "Remove dependencies and repositories through GUI interpreter menu instead.");
dependencies = new LinkedList<Dependency>();
repositories = new LinkedList<Repository>();
dependencies = new LinkedList<>();
repositories = new LinkedList<>();
files = new LinkedList<File>();
filesDist = new LinkedList<File>();
files = new LinkedList<>();
filesDist = new LinkedList<>();
}
private void addRepoFromProperty(String listOfRepo) {

View file

@ -114,7 +114,7 @@ public class SparkDependencyResolver extends AbstractDependencyResolver {
}
// NOTE: Must use reflection until this is exposed/fixed upstream in Scala
List<String> classPaths = new LinkedList<String>();
List<String> classPaths = new LinkedList<>();
for (URL url : urls) {
classPaths.add(url.getPath());
}
@ -151,7 +151,7 @@ public class SparkDependencyResolver extends AbstractDependencyResolver {
private MergedClassPath<AbstractFile> mergeUrlsIntoClassPath(JavaPlatform platform, URL[] urls) {
IndexedSeq<ClassPath<AbstractFile>> entries =
((MergedClassPath<AbstractFile>) platform.classPath()).entries();
List<ClassPath<AbstractFile>> cp = new LinkedList<ClassPath<AbstractFile>>();
List<ClassPath<AbstractFile>> cp = new LinkedList<>();
for (int i = 0; i < entries.size(); i++) {
cp.add(entries.apply(i));
@ -200,7 +200,7 @@ public class SparkDependencyResolver extends AbstractDependencyResolver {
return loadFromMvn(artifact, excludes, addSparkContext);
} else {
loadFromFs(artifact, addSparkContext);
LinkedList<String> libs = new LinkedList<String>();
LinkedList<String> libs = new LinkedList<>();
libs.add(artifact);
return libs;
}
@ -224,8 +224,8 @@ public class SparkDependencyResolver extends AbstractDependencyResolver {
private List<String> loadFromMvn(String artifact, Collection<String> excludes,
boolean addSparkContext) throws Exception {
List<String> loadedLibs = new LinkedList<String>();
Collection<String> allExclusions = new LinkedList<String>();
List<String> loadedLibs = new LinkedList<>();
Collection<String> allExclusions = new LinkedList<>();
allExclusions.addAll(excludes);
allExclusions.addAll(Arrays.asList(exclusions));
@ -244,8 +244,8 @@ public class SparkDependencyResolver extends AbstractDependencyResolver {
}
}
List<URL> newClassPathList = new LinkedList<URL>();
List<File> files = new LinkedList<File>();
List<URL> newClassPathList = new LinkedList<>();
List<File> files = new LinkedList<>();
for (ArtifactResult artifactResult : listOfArtifact) {
logger.info("Load " + artifactResult.getArtifact().getGroupId() + ":"
+ artifactResult.getArtifact().getArtifactId() + ":"
@ -302,7 +302,7 @@ public class SparkDependencyResolver extends AbstractDependencyResolver {
}
public static Collection<String> inferScalaVersion(Collection<String> artifact) {
List<String> list = new LinkedList<String>();
List<String> list = new LinkedList<>();
for (String a : artifact) {
list.add(inferScalaVersion(a));
}

Some files were not shown because too many files have changed in this diff Show more