Merge pull request #7 from apache/master

merge master
This commit is contained in:
mfelgamal 2016-08-16 20:33:46 +03:00 committed by GitHub
commit 858f1e1014
234 changed files with 5643 additions and 1942 deletions

3
.gitignore vendored
View file

@ -105,3 +105,6 @@ tramp
.\#*
*.swp
**/dependency-reduced-pom.xml
# Generated by zeppelin-examples
/helium

View file

@ -33,37 +33,41 @@ addons:
matrix:
include:
# Test all modules with spark 2.0.0 and scala 2.11
- jdk: "oraclejdk7"
env: SCALA_VER="2.11" SPARK_VER="2.0.0" HADOOP_VER="2.3" PROFILE="-Pspark-2.0 -Phadoop-2.3 -Ppyspark -Psparkr -Pscalding -Pexamples -Pscala-2.11" BUILD_FLAG="package -Pbuild-distr" TEST_FLAG="verify -Pusing-packaged-distr" TEST_PROJECTS=""
# Test all modules with scala 2.10
- jdk: "oraclejdk7"
env: SPARK_VER="1.6.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.6 -Pr -Phadoop-2.3 -Ppyspark -Psparkr -Pscalding -Pexamples" BUILD_FLAG="package -Dscala-2.10 -Pbuild-distr" TEST_FLAG="verify -Pusing-packaged-distr" TEST_PROJECTS=""
env: SCALA_VER="2.10" SPARK_VER="1.6.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.6 -Pr -Phadoop-2.3 -Ppyspark -Psparkr -Pscalding -Pexamples -Pscala-2.10" BUILD_FLAG="package -Pbuild-distr" TEST_FLAG="verify -Pusing-packaged-distr" TEST_PROJECTS=""
# Test all modules with scala 2.11
- jdk: "oraclejdk7"
env: SPARK_VER="1.6.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.6 -Pr -Phadoop-2.3 -Ppyspark -Psparkr -Pscalding -Pexamples -Pscala-2.11" BUILD_FLAG="package -Dscala-2.11 -Pbuild-distr" TEST_FLAG="verify -Pusing-packaged-distr" TEST_PROJECTS=""
env: SCALA_VER="2.11" SPARK_VER="1.6.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.6 -Pr -Phadoop-2.3 -Ppyspark -Psparkr -Pscalding -Pexamples -Pscala-2.11" BUILD_FLAG="package -Pbuild-distr" TEST_FLAG="verify -Pusing-packaged-distr" TEST_PROJECTS=""
# Test spark module for 1.5.2
- jdk: "oraclejdk7"
env: SPARK_VER="1.5.2" HADOOP_VER="2.3" PROFILE="-Pspark-1.5 -Pr -Phadoop-2.3 -Ppyspark -Psparkr" BUILD_FLAG="package -DskipTests" TEST_FLAG="verify" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark,r -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark* -DfailIfNoTests=false"
env: SCALA_VER="2.10" SPARK_VER="1.5.2" HADOOP_VER="2.3" PROFILE="-Pspark-1.5 -Pr -Phadoop-2.3 -Ppyspark -Psparkr" BUILD_FLAG="package -DskipTests" TEST_FLAG="verify" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark,r -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark* -DfailIfNoTests=false"
# Test spark module for 1.4.1
- jdk: "oraclejdk7"
env: SPARK_VER="1.4.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.4 -Pr -Phadoop-2.3 -Ppyspark -Psparkr" BUILD_FLAG="package -DskipTests" TEST_FLAG="verify" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark,r -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark* -DfailIfNoTests=false"
env: SCALA_VER="2.10" SPARK_VER="1.4.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.4 -Pr -Phadoop-2.3 -Ppyspark -Psparkr" BUILD_FLAG="package -DskipTests" TEST_FLAG="verify" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark,r -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark* -DfailIfNoTests=false"
# Test spark module for 1.3.1
- jdk: "oraclejdk7"
env: SPARK_VER="1.3.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.3 -Phadoop-2.3 -Ppyspark" BUILD_FLAG="package -DskipTests" TEST_FLAG="verify" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark* -DfailIfNoTests=false"
env: SCALA_VER="2.10" SPARK_VER="1.3.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.3 -Phadoop-2.3 -Ppyspark" BUILD_FLAG="package -DskipTests" TEST_FLAG="verify" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark* -DfailIfNoTests=false"
# Test spark module for 1.2.2
- jdk: "oraclejdk7"
env: SPARK_VER="1.2.2" HADOOP_VER="2.3" PROFILE="-Pspark-1.2 -Phadoop-2.3 -Ppyspark" BUILD_FLAG="package -DskipTests" TEST_FLAG="verify" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark* -DfailIfNoTests=false"
env: SCALA_VER="2.10" SPARK_VER="1.2.2" HADOOP_VER="2.3" PROFILE="-Pspark-1.2 -Phadoop-2.3 -Ppyspark" BUILD_FLAG="package -DskipTests" TEST_FLAG="verify" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark* -DfailIfNoTests=false"
# Test spark module for 1.1.1
- jdk: "oraclejdk7"
env: SPARK_VER="1.1.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.1 -Phadoop-2.3 -Ppyspark" BUILD_FLAG="package -DskipTests" TEST_FLAG="verify" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark* -DfailIfNoTests=false"
env: SCALA_VER="2.10" SPARK_VER="1.1.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.1 -Phadoop-2.3 -Ppyspark" BUILD_FLAG="package -DskipTests" TEST_FLAG="verify" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark* -DfailIfNoTests=false"
# Test selenium with spark module for 1.6.1
- jdk: "oraclejdk7"
env: TEST_SELENIUM="true" SPARK_VER="1.6.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.6 -Phadoop-2.3 -Ppyspark -Pexamples" BUILD_FLAG="package -DskipTests" TEST_FLAG="verify" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark -Dtest=org.apache.zeppelin.AbstractFunctionalSuite -DfailIfNoTests=false"
env: TEST_SELENIUM="true" SCALA_VER="2.10" SPARK_VER="1.6.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.6 -Phadoop-2.3 -Ppyspark -Pexamples" BUILD_FLAG="package -DskipTests" TEST_FLAG="verify" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark -Dtest=org.apache.zeppelin.AbstractFunctionalSuite -DfailIfNoTests=false"
before_install:
- "ls -la .spark-dist ${HOME}/.m2/repository/.cache/maven-download-plugin"
@ -72,6 +76,7 @@ before_install:
- R -e "install.packages('knitr', repos = 'http://cran.us.r-project.org', lib='~/R')"
- export R_LIBS='~/R'
- "/sbin/start-stop-daemon --start --quiet --pidfile /tmp/custom_xvfb_99.pid --make-pidfile --background --exec /usr/bin/Xvfb -- :99 -ac -screen 0 1600x1024x16"
- ./dev/change_scala_version.sh $SCALA_VER
install:
- mvn $BUILD_FLAG $PROFILE -B
@ -95,6 +100,7 @@ after_failure:
- cat zeppelin-distribution/target/zeppelin-*-SNAPSHOT/zeppelin-*-SNAPSHOT/logs/zeppelin*.log
- cat zeppelin-distribution/target/zeppelin-*-SNAPSHOT/zeppelin-*-SNAPSHOT/logs/zeppelin*.out
- cat zeppelin-web/npm-debug.log
- cat spark-*/logs/*
after_script:
- ./testing/stopSparkCluster.sh $SPARK_VER $HADOOP_VER

View file

@ -202,7 +202,7 @@ Zeppelin uses Travis for CI. In the project root there is .travis.yml that confi
```
cd zeppelin-server
HADOOP_HOME=YOUR_HADOOP_HOME JAVA_HOME=YOUR_JAVA_HOME mvn exec:java -Dexec.mainClass="com.nflabs.zeppelin.server.ZeppelinServer" -Dexec.args=""
HADOOP_HOME=YOUR_HADOOP_HOME JAVA_HOME=YOUR_JAVA_HOME mvn exec:java -Dexec.mainClass="org.apache.zeppelin.server.ZeppelinServer" -Dexec.args=""
```
or use daemon script

View file

@ -242,7 +242,8 @@ The following components are provided under the MIT-style license. See project l
The text of each license is also included at licenses/LICENSE-[project]-[version].txt.
(MIT Style) jekyll-table-of-contents (https://github.com/ghiculescu/jekyll-table-of-contents) - https://github.com/ghiculescu/jekyll-table-of-contents/blob/master/LICENSE.txt
(MIT Style) lunr.js (https://github.com/olivernn/lunr.js) - https://github.com/olivernn/lunr.js/blob/v0.7.1/LICENSE
========================================================================
Apache licenses
========================================================================
@ -251,6 +252,7 @@ The following components are provided under the Apache License. See project link
The text of each license is also included at licenses/LICENSE-[project]-[version].txt.
(Apache 2.0) Bootstrap v3.0.2 (http://getbootstrap.com/) - https://github.com/twbs/bootstrap/blob/v3.0.2/LICENSE
(Apache 2.0) Software under ./bigquery/* was developed at Google (http://www.google.com/). Licensed under the Apache v2.0 License.
========================================================================
BSD 3-Clause licenses
@ -270,4 +272,4 @@ BSD 2-Clause licenses
The following components are provided under the BSD 3-Clause license. See file headers and project links for details.
(BSD 2 Clause) portions of SQLLine (http://sqlline.sourceforge.net/) - http://sqlline.sourceforge.net/#license
jdbc/src/main/java/org/apache/zeppelin/jdbc/SqlCompleter.java
jdbc/src/main/java/org/apache/zeppelin/jdbc/SqlCompleter.java

1
NOTICE
View file

@ -4,5 +4,4 @@ Copyright 2015 - 2016 The Apache Software Foundation
This product includes software developed at
The Apache Software Foundation (http://www.apache.org/).
Portions of this software were developed at NFLabs, Inc. (http://www.nflabs.com)

View file

@ -93,9 +93,9 @@ _Notes:_
#### Install maven
```
wget http://www.eu.apache.org/dist/maven/maven-3/3.3.3/binaries/apache-maven-3.3.3-bin.tar.gz
sudo tar -zxf apache-maven-3.3.3-bin.tar.gz -C /usr/local/
sudo ln -s /usr/local/apache-maven-3.3.3/bin/mvn /usr/local/bin/mvn
wget http://www.eu.apache.org/dist/maven/maven-3/3.3.9/binaries/apache-maven-3.3.9-bin.tar.gz
sudo tar -zxf apache-maven-3.3.9-bin.tar.gz -C /usr/local/
sudo ln -s /usr/local/apache-maven-3.3.9/bin/mvn /usr/local/bin/mvn
```
_Notes:_
@ -124,6 +124,7 @@ Set spark major version
Available profiles are
```
-Pspark-2.0
-Pspark-1.6
-Pspark-1.5
-Pspark-1.4
@ -157,6 +158,16 @@ Available profiles are
minor version can be adjusted by `-Dhadoop.version=x.x.x`
##### `-Pscala-[version] (optional)`
set scala version (default 2.10)
Available profiles are
```
-Pscala-2.10
-Pscala-2.11
```
##### `-Pyarn` (optional)
enable YARN support for local mode
@ -199,14 +210,18 @@ Available profiles are
Bulid examples under zeppelin-examples directory
#### Example
Here're some examples:
```sh
# basic build
mvn clean package -Pspark-1.6 -Phadoop-2.4 -Pyarn -Ppyspark
# build with spark-2.0, scala-2.11
./dev/change_scala_version.sh 2.11
mvn clean package -Pspark-2.0 -Phadoop-2.4 -Pyarn -Ppyspark -Psparkr -Pscala-2.11
# build with spark-1.6, scala-2.10
mvn clean package -Pspark-1.6 -Phadoop-2.4 -Pyarn -Ppyspark -Psparkr
# spark-cassandra integration
mvn clean package -Pcassandra-spark-1.5 -Dhadoop.version=2.6.0 -Phadoop-2.6 -DskipTests
@ -292,6 +307,7 @@ For configuration details check __`./conf`__ subdirectory.
To produce a Zeppelin package compiled with Scala 2.11, use the -Pscala-2.11 profile:
```
./dev/change_scala_version.sh 2.11
mvn clean package -Pspark-1.6 -Phadoop-2.4 -Pyarn -Ppyspark -Pscala-2.11 -DskipTests clean install
```

View file

@ -31,7 +31,6 @@
<packaging>jar</packaging>
<version>0.7.0-SNAPSHOT</version>
<name>Zeppelin: Alluxio interpreter</name>
<url>http://www.apache.org</url>
<properties>
<alluxio.version>1.0.0</alluxio.version>

View file

@ -31,7 +31,6 @@
<packaging>jar</packaging>
<version>0.7.0-SNAPSHOT</version>
<name>Zeppelin: Angular interpreter</name>
<url>http://zeppelin.apache.org</url>
<dependencies>
<dependency>

109
bigquery/README.md Normal file
View file

@ -0,0 +1,109 @@
# Overview
BigQuery interpreter for Apache Zeppelin
# Pre requisities
You can follow the instructions at [Apache Zeppelin on Dataproc](https://github.com/GoogleCloudPlatform/dataproc-initialization-actions/blob/master/apache-zeppelin/README.MD) to bring up Zeppelin on Google dataproc.
You could also install and bring up Zeppelin on Google compute Engine.
# Unit Tests
BigQuery Unit tests are excluded as these tests depend on the BigQuery external service. This is because BigQuery does not have a local mock at this point.
If you like to run these tests manually, please follow the following steps:
* [Create a new project](https://support.google.com/cloud/answer/6251787?hl=en)
* [Create a Google Compute Engine instance](https://cloud.google.com/compute/docs/instances/create-start-instance)
* Copy the project ID that you created and add it to the property "projectId" in `resources/constants.json`
* Run the command mvn <options> -Dbigquery.text.exclude='' test -pl bigquery -am
# Interpreter Configuration
Configure the following properties during Interpreter creation.
<table class="table-configuration">
<tr>
<th>Name</th>
<th>Default Value</th>
<th>Description</th>
</tr>
<tr>
<td>zeppelin.bigquery.project_id</td>
<td> </td>
<td>Google Project Id</td>
</tr>
<tr>
<td>zeppelin.bigquery.wait_time</td>
<td>5000</td>
<td>Query Timeout in Milliseconds</td>
</tr>
<tr>
<td>zeppelin.bigquery.max_no_of_rows</td>
<td>100000</td>
<td>Max result set size</td>
</tr>
</table>
# Connection
The Interpreter opens a connection with the BigQuery Service using the supplied Google project ID and the compute environment variables.
# Google BigQuery API Javadoc
[API Javadocs](https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/)
[Source] (http://central.maven.org/maven2/com/google/apis/google-api-services-bigquery/v2-rev265-1.21.0/google-api-services-bigquery-v2-rev265-1.21.0-sources.jar)
We have used the curated veneer version of the Java APIs versus [Idiomatic Java client] (https://github.com/GoogleCloudPlatform/gcloud-java/tree/master/gcloud-java-bigquery) to build the interpreter. This is mainly for usability reasons.
# Enabling the BigQuery Interpreter
In a notebook, to enable the **BigQuery** interpreter, click the **Gear** icon and select **bigquery**.
# Using the BigQuery Interpreter
In a paragraph, use `%bigquery.sql` to select the **BigQuery** interpreter and then input SQL statements against your datasets stored in BigQuery.
You can use [BigQuery SQL Reference](https://cloud.google.com/bigquery/query-reference) to build your own SQL.
For Example, SQL to query for top 10 departure delays across airports using the flights public dataset
```bash
%bigquery.sql
SELECT departure_airport,count(case when departure_delay>0 then 1 else 0 end) as no_of_delays
FROM [bigquery-samples:airline_ontime_data.flights]
group by departure_airport
order by 2 desc
limit 10
```
Another Example, SQL to query for most commonly used java packages from the github data hosted in BigQuery
```bash
%bigquery.sql
SELECT
package,
COUNT(*) count
FROM (
SELECT
REGEXP_EXTRACT(line, r' ([a-z0-9\._]*)\.') package,
id
FROM (
SELECT
SPLIT(content, '\n') line,
id
FROM
[bigquery-public-data:github_repos.sample_contents]
WHERE
content CONTAINS 'import'
AND sample_path LIKE '%.java'
HAVING
LEFT(line, 6)='import' )
GROUP BY
package,
id )
GROUP BY
1
ORDER BY
count DESC
LIMIT
40
```
# Sample Screenshot
![Zeppelin BigQuery](https://cloud.githubusercontent.com/assets/10060731/16938817/b9213ea0-4db6-11e6-8c3b-8149a0bdf874.png)

176
bigquery/pom.xml Normal file
View file

@ -0,0 +1,176 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
~ Licensed to the Apache Software Foundation (ASF) under one or more
~ contributor license agreements. See the NOTICE file distributed with
~ this work for additional information regarding copyright ownership.
~ The ASF licenses this file to You under the Apache License, Version 2.0
~ (the "License"); you may not use this file except in compliance with
~ the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<artifactId>zeppelin</artifactId>
<groupId>org.apache.zeppelin</groupId>
<version>0.7.0-SNAPSHOT</version>
</parent>
<groupId>org.apache.zeppelin</groupId>
<artifactId>zeppelin-bigquery</artifactId>
<packaging>jar</packaging>
<version>0.7.0-SNAPSHOT</version>
<name>Zeppelin: BigQuery interpreter</name>
<dependencies>
<dependency>
<groupId>com.google.apis</groupId>
<artifactId>google-api-services-bigquery</artifactId>
<version>v2-rev265-1.21.0</version>
</dependency>
<dependency>
<groupId>com.google.oauth-client</groupId>
<artifactId>google-oauth-client</artifactId>
<version>${project.oauth.version}</version>
</dependency>
<dependency>
<groupId>com.google.http-client</groupId>
<artifactId>google-http-client-jackson2</artifactId>
<version>${project.http.version}</version>
</dependency>
<dependency>
<groupId>com.google.oauth-client</groupId>
<artifactId>google-oauth-client-jetty</artifactId>
<version>${project.oauth.version}</version>
</dependency>
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>2.6</version>
</dependency>
<dependency>
<groupId>org.apache.zeppelin</groupId>
<artifactId>zeppelin-interpreter</artifactId>
<version>${project.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<properties>
<project.http.version>1.21.0</project.http.version>
<project.oauth.version>1.21.0</project.oauth.version>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<bigquery.test.exclude>**/BigQueryInterpreterTest.java</bigquery.test.exclude>
</properties>
<build>
<plugins>
<plugin>
<artifactId>maven-enforcer-plugin</artifactId>
<version>1.3.1</version>
<executions>
<execution>
<id>enforce</id>
<phase>none</phase>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<excludes>
<exclude>${bigquery.test.exclude}</exclude>
</excludes>
</configuration>
</plugin>
<plugin>
<artifactId>maven-dependency-plugin</artifactId>
<version>2.8</version>
<executions>
<execution>
<id>copy-dependencies</id>
<phase>package</phase>
<goals>
<goal>copy-dependencies</goal>
</goals>
<configuration>
<outputDirectory>${project.build.directory}/../../interpreter/bqsql</outputDirectory>
<overWriteReleases>false</overWriteReleases>
<overWriteSnapshots>false</overWriteSnapshots>
<overWriteIfNewer>true</overWriteIfNewer>
<includeScope>runtime</includeScope>
</configuration>
</execution>
<execution>
<id>copy-artifact</id>
<phase>package</phase>
<goals>
<goal>copy</goal>
</goals>
<configuration>
<outputDirectory>${project.build.directory}/../../interpreter/bqsql</outputDirectory>
<overWriteReleases>false</overWriteReleases>
<overWriteSnapshots>false</overWriteSnapshots>
<overWriteIfNewer>true</overWriteIfNewer>
<includeScope>runtime</includeScope>
<artifactItems>
<artifactItem>
<groupId>${project.groupId}</groupId>
<artifactId>${project.artifactId}</artifactId>
<version>${project.version}</version>
<type>${project.packaging}</type>
</artifactItem>
</artifactItems>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<archive>
<manifest>
<mainClass>
org.apache.zeppelin.bigquery.BigQueryInterpreter
</mainClass>
</manifest>
</archive>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
</plugin>
</plugins>
</build>
</project>

View file

@ -0,0 +1,338 @@
/*
* Copyright 2016 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.bigquery;
import static org.apache.commons.lang.StringUtils.containsIgnoreCase;
import com.google.api.client.http.HttpTransport;
import com.google.api.client.http.javanet.NetHttpTransport;
import com.google.api.client.json.JsonFactory;
import com.google.api.client.googleapis.auth.oauth2.GoogleCredential;
import com.google.api.client.json.jackson2.JacksonFactory;
import com.google.api.services.bigquery.Bigquery;
import com.google.api.services.bigquery.BigqueryScopes;
import com.google.api.client.json.GenericJson;
import com.google.api.services.bigquery.Bigquery.Datasets;
import com.google.api.services.bigquery.BigqueryRequest;
import com.google.api.services.bigquery.model.DatasetList;
import com.google.api.services.bigquery.model.Job;
import com.google.api.services.bigquery.model.TableCell;
import com.google.api.services.bigquery.model.TableFieldSchema;
import com.google.api.services.bigquery.model.TableRow;
import com.google.api.services.bigquery.model.TableSchema;
import com.google.api.services.bigquery.Bigquery.Jobs.GetQueryResults;
import com.google.api.services.bigquery.model.GetQueryResultsResponse;
import com.google.api.services.bigquery.model.QueryRequest;
import com.google.api.services.bigquery.model.QueryResponse;
import com.google.api.services.bigquery.model.JobCancelResponse;
import com.google.gson.Gson;
import java.io.IOException;
import java.util.Collection;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.util.Properties;
import java.util.Set;
import org.apache.zeppelin.interpreter.Interpreter;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterPropertyBuilder;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.InterpreterResult.Code;
import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion;
import org.apache.zeppelin.scheduler.Scheduler;
import org.apache.zeppelin.scheduler.SchedulerFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Function;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.google.common.collect.Sets.SetView;
import java.io.PrintStream;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
/**
* BigQuery interpreter for Zeppelin.
*
* <ul>
* <li>{@code zeppelin.bigquery.project_id} - Project ID in GCP</li>
* <li>{@code zeppelin.bigquery.wait_time} - Query Timeout in ms</li>
* <li>{@code zeppelin.bigquery.max_no_of_rows} - Max Result size</li>
* </ul>
*
* <p>
* How to use: <br/>
* {@code %bigquery.sql<br/>
* {@code
* SELECT departure_airport,count(case when departure_delay>0 then 1 else 0 end) as no_of_delays
* FROM [bigquery-samples:airline_ontime_data.flights]
* group by departure_airport
* order by 2 desc
* limit 10
* }
* </p>
*
*/
public class BigQueryInterpreter extends Interpreter {
private Logger logger = LoggerFactory.getLogger(BigQueryInterpreter.class);
private static final char NEWLINE = '\n';
private static final char TAB = '\t';
private static Bigquery service = null;
//Mutex created to create the singleton in thread-safe fashion.
private static Object serviceLock = new Object();
static final String PROJECT_ID = "zeppelin.bigquery.project_id";
static final String WAIT_TIME = "zeppelin.bigquery.wait_time";
static final String MAX_ROWS = "zeppelin.bigquery.max_no_of_rows";
private static String jobId = null;
private static String projectId = null;
private static final List NO_COMPLETION = new ArrayList<>();
private Exception exceptionOnConnect;
private static final Function<CharSequence, String> sequenceToStringTransformer =
new Function<CharSequence, String>() {
public String apply(CharSequence seq) {
return seq.toString();
}
};
public BigQueryInterpreter(Properties property) {
super(property);
}
//Function to return valid BigQuery Service
@Override
public void open() {
if (service == null) {
synchronized (serviceLock) {
if (service == null) {
try {
service = createAuthorizedClient();
exceptionOnConnect = null;
logger.info("Opened BigQuery SQL Connection");
} catch (IOException e) {
logger.error("Cannot open connection", e);
exceptionOnConnect = e;
close();
}
}
}
}
}
//Function that Creates an authorized client to Google Bigquery.
private static Bigquery createAuthorizedClient() throws IOException {
HttpTransport transport = new NetHttpTransport();
JsonFactory jsonFactory = new JacksonFactory();
GoogleCredential credential = GoogleCredential.getApplicationDefault(transport, jsonFactory);
if (credential.createScopedRequired()) {
Collection<String> bigqueryScopes = BigqueryScopes.all();
credential = credential.createScoped(bigqueryScopes);
}
return new Bigquery.Builder(transport, jsonFactory, credential)
.setApplicationName("Zeppelin/1.0 (GPN:Apache Zeppelin;)").build();
}
//Function that generates and returns the schema and the rows as string
public static String printRows(final GetQueryResultsResponse response) {
StringBuilder msg = null;
msg = new StringBuilder();
try {
for (TableFieldSchema schem: response.getSchema().getFields()) {
msg.append(schem.getName());
msg.append(TAB);
}
msg.append(NEWLINE);
for (TableRow row : response.getRows()) {
for (TableCell field : row.getF()) {
msg.append(field.getV().toString());
msg.append(TAB);
}
msg.append(NEWLINE);
}
return msg.toString();
} catch ( NullPointerException ex ) {
throw new NullPointerException("SQL Execution returned an error!");
}
}
//Function to poll a job for completion. Future use
public static Job pollJob(final Bigquery.Jobs.Get request, final long interval)
throws IOException, InterruptedException {
Job job = request.execute();
while (!job.getStatus().getState().equals("DONE")) {
System.out.println("Job is "
+ job.getStatus().getState()
+ " waiting " + interval + " milliseconds...");
Thread.sleep(interval);
job = request.execute();
}
return job;
}
//Function to page through the results of an arbitrary bigQuery request
public static <T extends GenericJson> Iterator<T> getPages(
final BigqueryRequest<T> requestTemplate) {
class PageIterator implements Iterator<T> {
private BigqueryRequest<T> request;
private boolean hasNext = true;
public PageIterator(final BigqueryRequest<T> requestTemplate) {
this.request = requestTemplate;
}
public boolean hasNext() {
return hasNext;
}
public T next() {
if (!hasNext) {
throw new NoSuchElementException();
}
try {
T response = request.execute();
if (response.containsKey("pageToken")) {
request = request.set("pageToken", response.get("pageToken"));
} else {
hasNext = false;
}
return response;
} catch (IOException e) {
return null;
}
}
public void remove() {
this.next();
}
}
return new PageIterator(requestTemplate);
}
//Function to call bigQuery to run SQL and return results to the Interpreter for output
private InterpreterResult executeSql(String sql) {
int counter = 0;
StringBuilder finalmessage = null;
finalmessage = new StringBuilder("%table ");
String projId = getProperty(PROJECT_ID);
long wTime = Long.parseLong(getProperty(WAIT_TIME));
long maxRows = Long.parseLong(getProperty(MAX_ROWS));
Iterator<GetQueryResultsResponse> pages;
try {
pages = run(sql, projId, wTime, maxRows);
} catch ( IOException ex ) {
logger.error(ex.getMessage());
return new InterpreterResult(Code.ERROR, ex.getMessage());
}
try {
while (pages.hasNext()) {
finalmessage.append(printRows(pages.next()));
}
return new InterpreterResult(Code.SUCCESS, finalmessage.toString());
} catch ( NullPointerException ex ) {
return new InterpreterResult(Code.ERROR, ex.getMessage());
}
}
//Function to run the SQL on bigQuery service
public static Iterator<GetQueryResultsResponse> run(final String queryString,
final String projId, final long wTime, final long maxRows)
throws IOException {
try {
QueryResponse query = service.jobs().query(
projId,
new QueryRequest().setTimeoutMs(wTime).setQuery(queryString).setMaxResults(maxRows))
.execute();
jobId = query.getJobReference().getJobId();
projectId = query.getJobReference().getProjectId();
GetQueryResults getRequest = service.jobs().getQueryResults(
projectId,
jobId);
return getPages(getRequest);
} catch (IOException ex) {
throw ex;
}
}
@Override
public void close() {
logger.info("Close bqsql connection!");
service = null;
}
@Override
public InterpreterResult interpret(String sql, InterpreterContext contextInterpreter) {
logger.info("Run SQL command '{}'", sql);
return executeSql(sql);
}
@Override
public Scheduler getScheduler() {
return SchedulerFactory.singleton().createOrGetFIFOScheduler(
BigQueryInterpreter.class.getName() + this.hashCode());
}
@Override
public FormType getFormType() {
return FormType.SIMPLE;
}
@Override
public int getProgress(InterpreterContext context) {
return 0;
}
@Override
public void cancel(InterpreterContext context) {
logger.info("Trying to Cancel current query statement.");
if (service != null && jobId != null && projectId != null) {
try {
Bigquery.Jobs.Cancel request = service.jobs().cancel(projectId, jobId);
JobCancelResponse response = request.execute();
jobId = null;
logger.info("Query Execution cancelled");
} catch (IOException ex) {
logger.error("Could not cancel the SQL execution");
}
} else {
logger.info("Query Execution was already cancelled");
}
}
@Override
public List<InterpreterCompletion> completion(String buf, int cursor) {
return NO_COMPLETION;
}
}

View file

@ -0,0 +1,5 @@
{
"projectId": "google.com:babupe-df-test",
"oneQuery": "select 1",
"wrongQuery": "select bad syntax"
}

View file

@ -0,0 +1,27 @@
[
{
"group": "bigquery",
"name": "sql",
"className": "org.apache.zeppelin.bigquery.BigQueryInterpreter",
"properties": {
"zeppelin.bigquery.project_id": {
"envName": null,
"propertyName": "zeppelin.bigquery.project_id",
"defaultValue": " ",
"description": "Google Project ID"
},
"zeppelin.bigquery.wait_time": {
"envName": null,
"propertyName": "zeppelin.bigquery.wait_time",
"defaultValue": "5000",
"description": "Query timeout in Milliseconds"
},
"zeppelin.bigquery.max_no_of_rows": {
"envName": null,
"propertyName": "zeppelin.bigquery.max_no_of_rows",
"defaultValue": "100000",
"description": "Maximum number of rows to fetch from BigQuery"
}
}
}
]

View file

@ -0,0 +1,118 @@
/*
* Copyright 2016 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.bigquery;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.Properties;
import org.apache.zeppelin.display.AngularObjectRegistry;
import org.apache.zeppelin.display.GUI;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterContextRunner;
import org.apache.zeppelin.interpreter.InterpreterGroup;
import org.apache.zeppelin.interpreter.InterpreterOutput;
import org.apache.zeppelin.interpreter.InterpreterOutputListener;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.InterpreterResult.Type;
import org.apache.zeppelin.user.AuthenticationInfo;
import org.junit.Before;
import org.junit.Test;
import com.google.gson.Gson;
import com.google.gson.JsonIOException;
import com.google.gson.JsonSyntaxException;
import java.io.FileNotFoundException;
import java.io.InputStream;
import java.io.InputStreamReader;
public class BigQueryInterpreterTest {
protected static class Constants {
private String projectId;
private String oneQuery;
private String wrongQuery;
public String getProjectId() {
return projectId;
}
public String getOne() {
return oneQuery;
}
public String getWrong() {
return wrongQuery;
}
}
@SuppressWarnings("checkstyle:abbreviationaswordinname")
protected static Constants CONSTANTS = null;
public BigQueryInterpreterTest()
throws JsonSyntaxException, JsonIOException, FileNotFoundException {
if (CONSTANTS == null) {
InputStream is = this.getClass().getResourceAsStream("/constants.json");
CONSTANTS = (new Gson()).<Constants>fromJson(new InputStreamReader(is), Constants.class);
}
}
private InterpreterGroup intpGroup;
private BigQueryInterpreter bqInterpreter;
private InterpreterContext context;
@Before
public void setUp() throws Exception {
Properties p = new Properties();
p.setProperty("zeppelin.bigquery.project_id", CONSTANTS.getProjectId());
p.setProperty("zeppelin.bigquery.wait_time", "5000");
p.setProperty("zeppelin.bigquery.max_no_of_rows", "100");
intpGroup = new InterpreterGroup();
bqInterpreter = new BigQueryInterpreter(p);
bqInterpreter.setInterpreterGroup(intpGroup);
bqInterpreter.open();
}
@Test
public void sqlSuccess() {
InterpreterResult ret = bqInterpreter.interpret(CONSTANTS.getOne(), context);
assertEquals(InterpreterResult.Code.SUCCESS, ret.code());
assertEquals(ret.type(), InterpreterResult.Type.TABLE);
}
@Test
public void badSqlSyntaxFails() {
InterpreterResult ret = bqInterpreter.interpret(CONSTANTS.getWrong(), context);
assertEquals(InterpreterResult.Code.ERROR, ret.code());
}
}

View file

@ -74,6 +74,13 @@ function addEachJarInDirRecursive(){
fi
}
function addEachJarInDirRecursiveForIntp(){
if [[ -d "${1}" ]]; then
for jar in $(find -L "${1}" -type f -name '*jar'); do
ZEPPELIN_INTP_CLASSPATH="$jar:$ZEPPELIN_INTP_CLASSPATH"
done
fi
}
function addJarInDir(){
if [[ -d "${1}" ]]; then
@ -81,6 +88,12 @@ function addJarInDir(){
fi
}
function addJarInDirForIntp() {
if [[ -d "${1}" ]]; then
ZEPPELIN_INTP_CLASSPATH="${1}/*:${ZEPPELIN_INTP_CLASSPATH}"
fi
}
ZEPPELIN_COMMANDLINE_MAIN=org.apache.zeppelin.utils.CommandLineUtils
function getZeppelinVersion(){

View file

@ -46,6 +46,14 @@ if exist "%ZEPPELIN_HOME%\zeppelin-interpreter\target\classes" (
set ZEPPELIN_CLASSPATH=%ZEPPELIN_CLASSPATH%;"!ZEPPELIN_INTERPRETER_JAR!"
)
REM add test classes for unittest
if exist "%ZEPPELIN_HOME%\zeppelin-interpreter\target\test-classes" (
set ZEPPELIN_CLASSPATH=%ZEPPELIN_CLASSPATH%;"%ZEPPELIN_HOME%\zeppelin-interpreter\target\test-classes"
)
if exist "%ZEPPELIN_HOME%\zeppelin-zengine\target\test-classes" (
set ZEPPELIN_CLASSPATH=%ZEPPELIN_CLASSPATH%;"%ZEPPELIN_HOME%\zeppelin-zengine\target\test-classes"
)
call "%bin%\functions.cmd" ADDJARINDIR "%ZEPPELIN_HOME%\zeppelin-interpreter\target\lib"
call "%bin%\functions.cmd" ADDJARINDIR "%INTERPRETER_DIR%"

View file

@ -53,18 +53,27 @@ fi
. "${bin}/common.sh"
ZEPPELIN_CLASSPATH+=":${ZEPPELIN_CONF_DIR}"
ZEPPELIN_INTP_CLASSPATH=""
# construct classpath
if [[ -d "${ZEPPELIN_HOME}/zeppelin-interpreter/target/classes" ]]; then
ZEPPELIN_CLASSPATH+=":${ZEPPELIN_HOME}/zeppelin-interpreter/target/classes"
ZEPPELIN_INTP_CLASSPATH+=":${ZEPPELIN_HOME}/zeppelin-interpreter/target/classes"
else
ZEPPELIN_INTERPRETER_JAR="$(ls ${ZEPPELIN_HOME}/lib/zeppelin-interpreter*.jar)"
ZEPPELIN_CLASSPATH+=":${ZEPPELIN_INTERPRETER_JAR}"
ZEPPELIN_INTP_CLASSPATH+=":${ZEPPELIN_INTERPRETER_JAR}"
fi
addJarInDir "${ZEPPELIN_HOME}/zeppelin-interpreter/target/lib"
addJarInDir "${INTERPRETER_DIR}"
# add test classes for unittest
if [[ -d "${ZEPPELIN_HOME}/zeppelin-interpreter/target/test-classes" ]]; then
ZEPPELIN_INTP_CLASSPATH+=":${ZEPPELIN_HOME}/zeppelin-interpreter/target/test-classes"
fi
if [[ -d "${ZEPPELIN_HOME}/zeppelin-zengine/target/test-classes" ]]; then
ZEPPELIN_INTP_CLASSPATH+=":${ZEPPELIN_HOME}/zeppelin-zengine/target/test-classes"
fi
addJarInDirForIntp "${ZEPPELIN_HOME}/zeppelin-interpreter/target/lib"
addJarInDirForIntp "${INTERPRETER_DIR}"
HOSTNAME=$(hostname)
ZEPPELIN_SERVER=org.apache.zeppelin.interpreter.remote.RemoteInterpreterServer
@ -85,7 +94,7 @@ if [[ "${INTERPRETER_ID}" == "spark" ]]; then
export SPARK_SUBMIT="${SPARK_HOME}/bin/spark-submit"
SPARK_APP_JAR="$(ls ${ZEPPELIN_HOME}/interpreter/spark/zeppelin-spark*.jar)"
# This will evantually passes SPARK_APP_JAR to classpath of SparkIMain
ZEPPELIN_CLASSPATH+=${SPARK_APP_JAR}
ZEPPELIN_INTP_CLASSPATH+=":${SPARK_APP_JAR}"
pattern="$SPARK_HOME/python/lib/py4j-*-src.zip"
py4j=($pattern)
@ -96,14 +105,14 @@ if [[ "${INTERPRETER_ID}" == "spark" ]]; then
# add Hadoop jars into classpath
if [[ -n "${HADOOP_HOME}" ]]; then
# Apache
addEachJarInDirRecursive "${HADOOP_HOME}/share"
addEachJarInDirRecursiveForIntp "${HADOOP_HOME}/share"
# CDH
addJarInDir "${HADOOP_HOME}"
addJarInDir "${HADOOP_HOME}/lib"
addJarInDirForIntp "${HADOOP_HOME}"
addJarInDirForIntp "${HADOOP_HOME}/lib"
fi
addJarInDir "${INTERPRETER_DIR}/dep"
addJarInDirForIntp "${INTERPRETER_DIR}/dep"
pattern="${ZEPPELIN_HOME}/interpreter/spark/pyspark/py4j-*-src.zip"
py4j=($pattern)
@ -127,29 +136,29 @@ if [[ "${INTERPRETER_ID}" == "spark" ]]; then
fi
if [[ -n "${HADOOP_CONF_DIR}" ]] && [[ -d "${HADOOP_CONF_DIR}" ]]; then
ZEPPELIN_CLASSPATH+=":${HADOOP_CONF_DIR}"
ZEPPELIN_INTP_CLASSPATH+=":${HADOOP_CONF_DIR}"
fi
export SPARK_CLASSPATH+=":${ZEPPELIN_CLASSPATH}"
export SPARK_CLASSPATH+=":${ZEPPELIN_INTP_CLASSPATH}"
fi
elif [[ "${INTERPRETER_ID}" == "hbase" ]]; then
if [[ -n "${HBASE_CONF_DIR}" ]]; then
ZEPPELIN_CLASSPATH+=":${HBASE_CONF_DIR}"
ZEPPELIN_INTP_CLASSPATH+=":${HBASE_CONF_DIR}"
elif [[ -n "${HBASE_HOME}" ]]; then
ZEPPELIN_CLASSPATH+=":${HBASE_HOME}/conf"
ZEPPELIN_INTP_CLASSPATH+=":${HBASE_HOME}/conf"
else
echo "HBASE_HOME and HBASE_CONF_DIR are not set, configuration might not be loaded"
fi
fi
addJarInDir "${LOCAL_INTERPRETER_REPO}"
addJarInDirForIntp "${LOCAL_INTERPRETER_REPO}"
CLASSPATH+=":${ZEPPELIN_CLASSPATH}"
CLASSPATH+=":${ZEPPELIN_INTP_CLASSPATH}"
if [[ -n "${SPARK_SUBMIT}" ]]; then
${SPARK_SUBMIT} --class ${ZEPPELIN_SERVER} --driver-class-path "${ZEPPELIN_CLASSPATH_OVERRIDES}:${CLASSPATH}" --driver-java-options "${JAVA_INTP_OPTS}" ${SPARK_SUBMIT_OPTIONS} ${SPARK_APP_JAR} ${PORT} &
${SPARK_SUBMIT} --class ${ZEPPELIN_SERVER} --driver-class-path "${ZEPPELIN_INTP_CLASSPATH_OVERRIDES}:${CLASSPATH}" --driver-java-options "${JAVA_INTP_OPTS}" ${SPARK_SUBMIT_OPTIONS} ${SPARK_APP_JAR} ${PORT} &
else
${ZEPPELIN_RUNNER} ${JAVA_INTP_OPTS} ${ZEPPELIN_INTP_MEM} -cp ${ZEPPELIN_CLASSPATH_OVERRIDES}:${CLASSPATH} ${ZEPPELIN_SERVER} ${PORT} &
${ZEPPELIN_RUNNER} ${JAVA_INTP_OPTS} ${ZEPPELIN_INTP_MEM} -cp ${ZEPPELIN_INTP_CLASSPATH_OVERRIDES}:${CLASSPATH} ${ZEPPELIN_SERVER} ${PORT} &
fi
pid=$!

View file

@ -27,12 +27,11 @@
</parent>
<groupId>org.apache.zeppelin</groupId>
<artifactId>zeppelin-cassandra</artifactId>
<artifactId>zeppelin-cassandra_2.10</artifactId>
<packaging>jar</packaging>
<version>0.7.0-SNAPSHOT</version>
<name>Zeppelin: Apache Cassandra interpreter</name>
<description>Zeppelin cassandra support</description>
<url>http://zeppelin.apache.org</url>
<properties>
<cassandra.driver.version>3.0.1</cassandra.driver.version>

View file

@ -17,19 +17,20 @@
#
# [name] [maven artifact] [description]
alluxio org.apache.zeppelin:zeppelin-alluxio:0.6.0 Alluxio interpreter
angular org.apache.zeppelin:zeppelin-angular:0.6.0 HTML and AngularJS view rendering
cassandra org.apache.zeppelin:zeppelin-cassandra:0.6.0 Cassandra interpreter
elasticsearch org.apache.zeppelin:zeppelin-elasticsearch:0.6.0 Elasticsearch interpreter
file org.apache.zeppelin:zeppelin-file:0.6.0 HDFS file interpreter
flink org.apache.zeppelin:zeppelin-flink:0.6.0 Flink interpreter
hbase org.apache.zeppelin:zeppelin-hbase:0.6.0 Hbase interpreter
ignite org.apache.zeppelin:zeppelin-ignite:0.6.0 Ignite interpreter
jdbc org.apache.zeppelin:zeppelin-jdbc:0.6.0 Jdbc interpreter
kylin org.apache.zeppelin:zeppelin-kylin:0.6.0 Kylin interpreter
lens org.apache.zeppelin:zeppelin-lens:0.6.0 Lens interpreter
livy org.apache.zeppelin:zeppelin-livy:0.6.0 Livy interpreter
md org.apache.zeppelin:zeppelin-markdown:0.6.0 Markdown support
postgresql org.apache.zeppelin:zeppelin-postgresql:0.6.0 Postgresql interpreter
python org.apache.zeppelin:zeppelin-python:0.6.0 Python interpreter
shell org.apache.zeppelin:zeppelin-shell:0.6.0 Shell command
alluxio org.apache.zeppelin:zeppelin-alluxio:0.6.1 Alluxio interpreter
angular org.apache.zeppelin:zeppelin-angular:0.6.1 HTML and AngularJS view rendering
bigquery org.apache.zeppelin:zeppelin-bigquery:0.6.1 BigQuery interpreter
cassandra org.apache.zeppelin:zeppelin-cassandra_2.11:0.6.1 Cassandra interpreter built with Scala 2.11
elasticsearch org.apache.zeppelin:zeppelin-elasticsearch:0.6.1 Elasticsearch interpreter
file org.apache.zeppelin:zeppelin-file:0.6.1 HDFS file interpreter
flink org.apache.zeppelin:zeppelin-flink_2.11:0.6.1 Flink interpreter built with Scala 2.11
hbase org.apache.zeppelin:zeppelin-hbase:0.6.1 Hbase interpreter
ignite org.apache.zeppelin:zeppelin-ignite_2.11:0.6.1 Ignite interpreter built with Scala 2.11
jdbc org.apache.zeppelin:zeppelin-jdbc:0.6.1 Jdbc interpreter
kylin org.apache.zeppelin:zeppelin-kylin:0.6.1 Kylin interpreter
lens org.apache.zeppelin:zeppelin-lens:0.6.1 Lens interpreter
livy org.apache.zeppelin:zeppelin-livy:0.6.1 Livy interpreter
md org.apache.zeppelin:zeppelin-markdown:0.6.1 Markdown support
postgresql org.apache.zeppelin:zeppelin-postgresql:0.6.1 Postgresql interpreter
python org.apache.zeppelin:zeppelin-python:0.6.1 Python interpreter
shell org.apache.zeppelin:zeppelin-shell:0.6.1 Shell command

View file

@ -42,6 +42,11 @@ user3 = password4, role2
#ldapRealm.userDnTemplate = uid={0},ou=Users,dc=COMPANY,dc=COM
#ldapRealm.contextFactory.authenticationMechanism = SIMPLE
### A sample for configuring ZeppelinHub Realm
#zeppelinHubRealm = org.apache.zeppelin.realm.ZeppelinHubRealm
## Url of ZeppelinHub
#zeppelinHubRealm.zeppelinhubUrl = https://www.zeppelinhub.com
#securityManager.realms = $zeppelinHubRealm
sessionManager = org.apache.shiro.web.session.mgt.DefaultWebSessionManager

View file

@ -35,6 +35,7 @@ REM set ZEPPELIN_IDENT_STRING REM A string representing this instance of zep
REM set ZEPPELIN_NICENESS REM The scheduling priority for daemons. Defaults to 0.
REM set ZEPPELIN_INTERPRETER_LOCALREPO REM Local repository for interpreter's additional dependency loading
REM set ZEPPELIN_NOTEBOOK_STORAGE REM Refers to pluggable notebook storage class, can have two classes simultaneously with a sync between them (e.g. local and remote).
REM set ZEPPELIN_NOTEBOOK_ONE_WAY_SYNC REM If there are multiple notebook storages, should we treat the first one as the only source of truth?
REM Spark interpreter configuration
@ -62,7 +63,7 @@ REM
REM set ZEPPELIN_SPARK_USEHIVECONTEXT REM Use HiveContext instead of SQLContext if set true. true by default.
REM set ZEPPELIN_SPARK_CONCURRENTSQL REM Execute multiple SQL concurrently if set true. false by default.
REM set ZEPPELIN_SPARK_IMPORTIMPLICIT REM Import implicits, UDF collection, and sql if set true. true by default.
REM set ZEPPELIN_SPARK_MAXRESULT REM Max number of SparkSQL result to display. 1000 by default.
REM set ZEPPELIN_SPARK_MAXRESULT REM Max number of Spark SQL result to display. 1000 by default.
REM ZeppelinHub connection configuration
REM

View file

@ -36,6 +36,7 @@
# export ZEPPELIN_NICENESS # The scheduling priority for daemons. Defaults to 0.
# export ZEPPELIN_INTERPRETER_LOCALREPO # Local repository for interpreter's additional dependency loading
# export ZEPPELIN_NOTEBOOK_STORAGE # Refers to pluggable notebook storage class, can have two classes simultaneously with a sync between them (e.g. local and remote).
# export ZEPPELIN_NOTEBOOK_ONE_WAY_SYNC # If there are multiple notebook storages, should we treat the first one as the only source of truth?
#### Spark interpreter configuration ####
@ -62,7 +63,7 @@
# export ZEPPELIN_SPARK_USEHIVECONTEXT # Use HiveContext instead of SQLContext if set true. true by default.
# export ZEPPELIN_SPARK_CONCURRENTSQL # Execute multiple SQL concurrently if set true. false by default.
# export ZEPPELIN_SPARK_IMPORTIMPLICIT # Import implicits, UDF collection, and sql if set true. true by default.
# export ZEPPELIN_SPARK_MAXRESULT # Max number of SparkSQL result to display. 1000 by default.
# export ZEPPELIN_SPARK_MAXRESULT # Max number of Spark SQL result to display. 1000 by default.
# export ZEPPELIN_WEBSOCKET_MAX_TEXT_MESSAGE_SIZE # Size in characters of the maximum text message to be received by websocket. Defaults to 1024000

View file

@ -164,6 +164,12 @@
<description>notebook persistence layer implementation</description>
</property>
<property>
<name>zeppelin.notebook.one.way.sync</name>
<value>false</value>
<description>If there are multiple notebook storages, should we treat the first one as the only source of truth?</description>
</property>
<property>
<name>zeppelin.interpreter.dir</name>
<value>interpreter</value>
@ -178,7 +184,7 @@
<property>
<name>zeppelin.interpreters</name>
<value>org.apache.zeppelin.spark.SparkInterpreter,org.apache.zeppelin.spark.PySparkInterpreter,org.apache.zeppelin.rinterpreter.RRepl,org.apache.zeppelin.rinterpreter.KnitR,org.apache.zeppelin.spark.SparkRInterpreter,org.apache.zeppelin.spark.SparkSqlInterpreter,org.apache.zeppelin.spark.DepInterpreter,org.apache.zeppelin.markdown.Markdown,org.apache.zeppelin.angular.AngularInterpreter,org.apache.zeppelin.shell.ShellInterpreter,org.apache.zeppelin.file.HDFSFileInterpreter,org.apache.zeppelin.flink.FlinkInterpreter,,org.apache.zeppelin.python.PythonInterpreter,org.apache.zeppelin.lens.LensInterpreter,org.apache.zeppelin.ignite.IgniteInterpreter,org.apache.zeppelin.ignite.IgniteSqlInterpreter,org.apache.zeppelin.cassandra.CassandraInterpreter,org.apache.zeppelin.geode.GeodeOqlInterpreter,org.apache.zeppelin.postgresql.PostgreSqlInterpreter,org.apache.zeppelin.jdbc.JDBCInterpreter,org.apache.zeppelin.kylin.KylinInterpreter,org.apache.zeppelin.elasticsearch.ElasticsearchInterpreter,org.apache.zeppelin.scalding.ScaldingInterpreter,org.apache.zeppelin.alluxio.AlluxioInterpreter,org.apache.zeppelin.hbase.HbaseInterpreter,org.apache.zeppelin.livy.LivySparkInterpreter,org.apache.zeppelin.livy.LivyPySparkInterpreter,org.apache.zeppelin.livy.LivySparkRInterpreter,org.apache.zeppelin.livy.LivySparkSQLInterpreter</value>
<value>org.apache.zeppelin.spark.SparkInterpreter,org.apache.zeppelin.spark.PySparkInterpreter,org.apache.zeppelin.rinterpreter.RRepl,org.apache.zeppelin.rinterpreter.KnitR,org.apache.zeppelin.spark.SparkRInterpreter,org.apache.zeppelin.spark.SparkSqlInterpreter,org.apache.zeppelin.spark.DepInterpreter,org.apache.zeppelin.markdown.Markdown,org.apache.zeppelin.angular.AngularInterpreter,org.apache.zeppelin.shell.ShellInterpreter,org.apache.zeppelin.file.HDFSFileInterpreter,org.apache.zeppelin.flink.FlinkInterpreter,,org.apache.zeppelin.python.PythonInterpreter,org.apache.zeppelin.lens.LensInterpreter,org.apache.zeppelin.ignite.IgniteInterpreter,org.apache.zeppelin.ignite.IgniteSqlInterpreter,org.apache.zeppelin.cassandra.CassandraInterpreter,org.apache.zeppelin.geode.GeodeOqlInterpreter,org.apache.zeppelin.postgresql.PostgreSqlInterpreter,org.apache.zeppelin.jdbc.JDBCInterpreter,org.apache.zeppelin.kylin.KylinInterpreter,org.apache.zeppelin.elasticsearch.ElasticsearchInterpreter,org.apache.zeppelin.scalding.ScaldingInterpreter,org.apache.zeppelin.alluxio.AlluxioInterpreter,org.apache.zeppelin.hbase.HbaseInterpreter,org.apache.zeppelin.livy.LivySparkInterpreter,org.apache.zeppelin.livy.LivyPySparkInterpreter,org.apache.zeppelin.livy.LivySparkRInterpreter,org.apache.zeppelin.livy.LivySparkSQLInterpreter,org.apache.zeppelin.bigquery.BigQueryInterpreter</value>
<description>Comma separated interpreter configurations. First interpreter become a default</description>
</property>

67
dev/change_scala_version.sh Executable file
View file

@ -0,0 +1,67 @@
#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
set -e
VALID_VERSIONS=( 2.10 2.11 )
usage() {
echo "Usage: $(basename $0) [-h|--help] <version>
where :
-h| --help Display this help text
valid version values : ${VALID_VERSIONS[*]}
" 1>&2
exit 1
}
if [[ ($# -ne 1) || ( $1 == "--help") || $1 == "-h" ]]; then
usage
fi
TO_VERSION=$1
check_scala_version() {
for i in ${VALID_VERSIONS[*]}; do [ $i = "$1" ] && return 0; done
echo "Invalid Scala version: $1. Valid versions: ${VALID_VERSIONS[*]}" 1>&2
exit 1
}
check_scala_version "$TO_VERSION"
if [ $TO_VERSION = "2.11" ]; then
FROM_VERSION="2.10"
else
FROM_VERSION="2.11"
fi
sed_i() {
sed -e "$1" "$2" > "$2.tmp" && mv "$2.tmp" "$2"
}
export -f sed_i
BASEDIR=$(dirname $0)/..
find "$BASEDIR" -name 'pom.xml' -not -path '*target*' -print \
-exec bash -c "sed_i 's/\(artifactId.*\)_'$FROM_VERSION'/\1_'$TO_VERSION'/g' {}" \;
# Also update <scala.binary.version> in parent POM
# Match any scala binary version to ensure idempotency
sed_i '1,/<scala\.binary\.version>[0-9]*\.[0-9]*</s/<scala\.binary\.version>[0-9]*\.[0-9]*</<scala.binary.version>'$TO_VERSION'</' \
"$BASEDIR/pom.xml"

View file

@ -66,6 +66,7 @@ function make_binary_release() {
cp -r "${WORKING_DIR}/zeppelin" "${WORKING_DIR}/zeppelin-${RELEASE_VERSION}-bin-${BIN_RELEASE_NAME}"
cd "${WORKING_DIR}/zeppelin-${RELEASE_VERSION}-bin-${BIN_RELEASE_NAME}"
./dev/change_scala_version.sh 2.11
echo "mvn clean package -Pbuild-distr -DskipTests ${BUILD_FLAGS}"
mvn clean package -Pbuild-distr -DskipTests ${BUILD_FLAGS}
if [[ $? -ne 0 ]]; then
@ -102,8 +103,8 @@ function make_binary_release() {
git_clone
make_source_package
make_binary_release all "-Pspark-1.6 -Phadoop-2.4 -Pyarn -Ppyspark -Psparkr -Pr"
make_binary_release netinst "-Pspark-1.6 -Phadoop-2.4 -Pyarn -Ppyspark -Psparkr -Pr -pl !alluxio,!angular,!cassandra,!elasticsearch,!file,!flink,!hbase,!ignite,!jdbc,!kylin,!lens,!livy,!markdown,!postgresql,!python,!shell"
make_binary_release all "-Pspark-2.0 -Phadoop-2.4 -Pyarn -Ppyspark -Psparkr -Pr -Pscala-2.11"
make_binary_release netinst "-Pspark-2.0 -Phadoop-2.4 -Pyarn -Ppyspark -Psparkr -Pr -Pscala-2.11 -pl !alluxio,!angular,!cassandra,!elasticsearch,!file,!flink,!hbase,!ignite,!jdbc,!kylin,!lens,!livy,!markdown,!postgresql,!python,!shell,!bigquery"
# remove non release files and dirs
rm -rf "${WORKING_DIR}/zeppelin"

View file

@ -44,7 +44,7 @@ NC='\033[0m' # No Color
RELEASE_VERSION="$1"
GIT_TAG="$2"
PUBLISH_PROFILES="-Pspark-1.6 -Phadoop-2.4 -Pyarn -Ppyspark -Psparkr -Pr"
PUBLISH_PROFILES="-Pbuild-distr -Pspark-2.0 -Phadoop-2.4 -Pyarn -Ppyspark -Psparkr -Pr"
PROJECT_OPTIONS="-pl !zeppelin-distribution"
NEXUS_STAGING="https://repository.apache.org/service/local/staging"
NEXUS_PROFILE="153446d1ac37c4"
@ -92,13 +92,27 @@ function publish_to_maven() {
tmp_repo="$(mktemp -d /tmp/zeppelin-repo-XXXXX)"
echo "mvn clean install -Ppublish-distr \
-Dmaven.repo.local=${tmp_repo} \
# build with scala-2.10
echo "mvn clean install -DskipTests \
-Dmaven.repo.local=${tmp_repo} -Pscala-2.10 \
${PUBLISH_PROFILES} ${PROJECT_OPTIONS}"
mvn clean install -Ppublish-distr -Dmaven.repo.local="${tmp_repo}" \
mvn clean install -DskipTests -Dmaven.repo.local="${tmp_repo}" -Pscala-2.10 \
${PUBLISH_PROFILES} ${PROJECT_OPTIONS}
if [[ $? -ne 0 ]]; then
echo "Build failed."
echo "Build with scala 2.10 failed."
exit 1
fi
# build with scala-2.11
"${BASEDIR}/change_scala_version.sh" 2.11
echo "mvn clean install -DskipTests \
-Dmaven.repo.local=${tmp_repo} -Pscala-2.11 \
${PUBLISH_PROFILES} ${PROJECT_OPTIONS}"
mvn clean install -DskipTests -Dmaven.repo.local="${tmp_repo}" -Pscala-2.11 \
${PUBLISH_PROFILES} ${PROJECT_OPTIONS}
if [[ $? -ne 0 ]]; then
echo "Build with scala 2.11 failed."
exit 1
fi

View file

@ -27,7 +27,7 @@ GEM
maruku (~> 0.6.0)
pygments.rb (~> 0.5.0)
redcarpet (~> 2.3.0)
safe_yaml (~> 0.9.7)
safe_yaml (~> 1.0.4)
kramdown (1.2.0)
liquid (2.5.4)
listen (1.3.1)
@ -47,7 +47,7 @@ GEM
ffi (>= 0.5.0)
rdiscount (2.1.7)
redcarpet (2.3.0)
safe_yaml (0.9.7)
safe_yaml (1.0.4)
syntax (1.0.0)
yajl-ruby (1.1.0)

View file

@ -1,4 +1,4 @@
## Zeppelin documentation
## Apache Zeppelin documentation
This readme will walk you through building the Zeppelin documentation, which is included here with the Zeppelin source code.
@ -6,15 +6,17 @@ This readme will walk you through building the Zeppelin documentation, which is
## Build documentation
See https://help.github.com/articles/using-jekyll-with-pages#installing-jekyll
**tl;dr version:**
**Requirements**
```
ruby --version >= 1.9.3
ruby --version >= 2.0.0
gem install bundler
# go to /docs under your Zeppelin source
bundle install
```
For the further information about requirements, please see [here](https://help.github.com/articles/setting-up-your-github-pages-site-locally-with-jekyll/#requirements).
*On OS X 10.9 you may need to do "xcode-select --install"*

View file

@ -32,7 +32,6 @@
<li><a href="{{BASE_PATH}}/manual/notebookashomepage.html">Customize Zeppelin Homepage</a></li>
<li role="separator" class="divider"></li>
<li class="title"><span><b>More</b><span></li>
<li><a href="{{BASE_PATH}}/install/virtual_machine.html">Zeppelin on Vagrant VM</a></li>
<li><a href="{{BASE_PATH}}/install/upgrade.html">Upgrade Zeppelin Version</a></li>
</ul>
</li>
@ -48,6 +47,7 @@
<li role="separator" class="divider"></li>
<li class="title"><span><b>Available Interpreters</b><span></li>
<li><a href="{{BASE_PATH}}/interpreter/alluxio.html">Alluxio</a></li>
<li><a href="{{BASE_PATH}}/interpreter/bigquery.html">BigQuery</a></li>
<li><a href="{{BASE_PATH}}/interpreter/cassandra.html">Cassandra</a></li>
<li><a href="{{BASE_PATH}}/interpreter/elasticsearch.html">Elasticsearch</a></li>
<li><a href="{{BASE_PATH}}/interpreter/flink.html">Flink</a></li>
@ -102,6 +102,10 @@
<li><a href="{{BASE_PATH}}/security/notebook_authorization.html">Notebook Authorization</a></li>
<li><a href="{{BASE_PATH}}/security/datasource_authorization.html">Data Source Authorization</a></li>
<li role="separator" class="divider"></li>
<li class="title"><span><b>Advanced</b><span></li>
<li><a href="{{BASE_PATH}}/install/virtual_machine.html">Zeppelin on Vagrant VM</a></li>
<li><a href="{{BASE_PATH}}/install/spark_cluster_mode.html#spark-standalone-mode">Zeppelin on Spark Cluster Mode (Standalone)</a></li>
<li role="separator" class="divider"></li>
<li class="title"><span><b>Contibute</b><span></li>
<li><a href="{{BASE_PATH}}/development/writingzeppelininterpreter.html">Writing Zeppelin Interpreter</a></li>
<li><a href="{{BASE_PATH}}/development/writingzeppelinapplication.html">Writing Zeppelin Application (Experimental)</a></li>
@ -110,7 +114,14 @@
</ul>
</li>
</ul>
<ul class="nav navbar-nav">
<li>
<a href="{{BASE_PATH}}/search.html" class="nav-search-link">
<span class="fa fa-search nav-search-icon"></span>
</a>
</li>
</ul>
</nav><!--/.navbar-collapse -->
</div>
</div>

View file

@ -2,7 +2,7 @@
<html lang="en">
<head>
<meta charset="utf-8">
<title>{{ page.title }}</title>
<title>Apache Zeppelin {{ site.ZEPPELIN_VERSION }} Documentation: {{ page.title }}</title>
{% if page.description %}<meta name="description" content="{{ page.description }}">{% endif %}
<meta name="author" content="{{ site.author.name }}">
@ -34,6 +34,8 @@
<script src="{{ ASSET_PATH }}/js/docs.js"></script>
<script src="{{ ASSET_PATH }}/js/anchor.min.js"></script>
<script src="{{ ASSET_PATH }}/js/toc.js"></script>
<script src="{{ ASSET_PATH }}/js/lunr.min.js"></script>
<script src="{{ ASSET_PATH }}/js/search.js"></script>
<!-- atom & rss feed -->
<link href="{{ BASE_PATH }}{{ site.JB.atom_path }}" type="application/atom+xml" rel="alternate" title="Sitewide ATOM Feed">

View file

@ -569,6 +569,30 @@ a.anchorjs-link:hover { text-decoration: none; }
margin-left: -18px;
}
/* Search Page */
#search p {
font-size: 30px;
font-weight: bold;
color: black;
}
#search_results p {
font-size: 13px;
font-weight: 400;
}
#search_results a {
vertical-align: super;
font-size: 16px;
text-decoration: underline;
}
#search_results .link {
font-size: 13px;
color: #008000;
padding-bottom: 3px;
}
/* Custom, iPhone Retina */
@media only screen and (max-width: 480px) {
.jumbotron h1 {

Binary file not shown.

After

Width:  |  Height:  |  Size: 142 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 201 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 180 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 31 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 25 KiB

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1,71 @@
---
---
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
jQuery(function() {
window.idx = lunr(function () {
this.field('id');
this.field('title');
this.field('content', { boost: 10 });
this.field('group');
});
window.data = $.getJSON('/search_data.json');
window.data.then(function(loaded_data){
$.each(loaded_data, function(index, value){
window.idx.add(
$.extend({ "id": index }, value)
);
});
});
$("#site_search").keyup(function(event){
event.preventDefault();
var query = $("#search_box").val();
var results = window.idx.search(query);
display_search_results(results);
});
$('html').bind('keypress', function(event){
// Since keyup() is operated at the above, disable 'Enter Key' press.
if(event.keyCode == 13) {
return false;
}
});
function display_search_results(results) {
var $search_results = $("#search_results");
var zeppelin_version = {{site.ZEPPELIN_VERSION | jsonify}};
window.data.then(function(loaded_data) {
if (results.length) {
$search_results.empty();
$search_results.prepend('<p class="">Found '+results.length+' result(s)</p><hr>');
results.forEach(function(result) {
var item = loaded_data[result.ref];
var appendString = '<a href="'+item.url+'">'+item.title+'</a><div class="link">'+'https://zeppelin.apache.org/docs/'+zeppelin_version+item.url+'</div><p>'+item.excerpt+'</p><br/>';
$search_results.append(appendString);
});
} else {
$search_results.html('<p>Your search did not match any documents.<br/>Make sure that all words are spelled correctly or try more general keywords.</p>');
}
});
}
});

View file

@ -1,9 +1,23 @@
---
layout: page
title: "How to contribute"
description: "How to contribute"
title: "Contributing to Apache Zeppelin (Code)"
description: "How can you contribute to Apache Zeppelin project? This document covers from setting up your develop environment to making a pull request on Github."
group: development
---
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
{% include JB/setup %}
# Contributing to Apache Zeppelin ( Code )

View file

@ -1,9 +1,23 @@
---
layout: page
title: "How to contribute (website)"
description: "How to contribute (website)"
title: "Contributing to Apache Zeppelin (Website)"
description: "How can you contribute to Apache Zeppelin project website? This document covers from building Zeppelin documentation site to making a pull request on Github."
group: development
---
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
{% include JB/setup %}
# Contributing to Apache Zeppelin ( Website )

View file

@ -1,7 +1,7 @@
---
layout: page
title: "Writing Zeppelin Application"
description: ""
title: "Writing a new Application(Experimental)"
description: "Apache Zeppelin Application is a package that runs on Interpreter process and displays it's output inside of the notebook. Make your own Application in Apache Zeppelin is quite easy."
group: development
---
<!--

View file

@ -1,7 +1,7 @@
---
layout: page
title: "Writing Zeppelin Interpreter"
description: ""
title: "Writing a New Interpreter"
description: "Apache Zeppelin Interpreter is a language backend. Every Interpreters belongs to an InterpreterGroup. Interpreters in the same InterpreterGroup can reference each other."
group: development
---
<!--
@ -156,7 +156,6 @@ println(a)
### 0.6.0 and later
Inside of a notebook, `%[INTERPRETER_GROUP].[INTERPRETER_NAME]` directive will call your interpreter.
Note that the first interpreter configuration in zeppelin.interpreters will be the default one.
You can omit either [INTERPRETER\_GROUP] or [INTERPRETER\_NAME]. If you omit [INTERPRETER\_NAME], then first available interpreter will be selected in the [INTERPRETER\_GROUP].
Likewise, if you skip [INTERPRETER\_GROUP], then [INTERPRETER\_NAME] will be chosen from default interpreter group.

View file

@ -1,7 +1,7 @@
---
layout: page
title: "Angular (backend API)"
description: "Angular (backend API)"
title: "Back-end Angular API in Apache Zeppelin"
description: "Apache Zeppelin provides a gateway between your interpreter and your compiled AngularJS view templates. You can not only update scope variables from your interpreter but also watch them in the interpreter, which is JVM process."
group: display
---
<!--

View file

@ -1,7 +1,7 @@
---
layout: page
title: "Basic Display System"
description: ""
title: "Basic Display System in Apache Zeppelin"
description: "There are 3 basic display systems in Apache Zeppelin. By default, Zeppelin prints interpreter responce as a plain text using text display system. With %html directive, Zeppelin treats your output as HTML. You can also simply use %table display system to leverage Zeppelin's built in visualization."
group: display
---
<!--

View file

@ -1,7 +1,7 @@
---
layout: page
title: "Angular (frontend API)"
description: "Angular (frontend API)"
title: "Front-end Angular API in Apache Zeppelin"
description: "In addition to the back-end API to handle Angular objects binding, Apache Zeppelin exposes a simple AngularJS z object on the front-end side to expose the same capabilities."
group: display
---
<!--

View file

@ -1,8 +1,8 @@
---
layout: page
title: Overview
tagline: Less Development, More analysis!
group: nav-right
title:
description:
group:
---
<!--
Licensed under the Apache License, Version 2.0 (the "License");
@ -49,7 +49,7 @@ Adding new language-backend is really simple. Learn [how to create your own inte
#### Apache Spark integration
Especially, Apache Zeppelin provides built-in [Apache Spark](http://spark.apache.org/) integration. You don't need to build a separate module, plugin or library for it.
<img class="img-responsive" src="./assets/themes/zeppelin/img/spark_logo.jpg" width="140px" />
<img class="img-responsive" src="./assets/themes/zeppelin/img/spark_logo.png" width="140px" />
Apache Zeppelin with Spark integration provides
@ -62,7 +62,7 @@ For the further information about Apache Spark in Apache Zeppelin, please see [S
<br />
## Data visualization
Some basic charts are already included in Apache Zeppelin. Visualizations are not limited to SparkSQL query, any output from any language backend can be recognized and visualized.
Some basic charts are already included in Apache Zeppelin. Visualizations are not limited to Spark SQL query, any output from any language backend can be recognized and visualized.
<div class="row">
<div class="col-md-6">
@ -133,7 +133,6 @@ Join to our [Mailing list](https://zeppelin.apache.org/community.html) and repor
* [Publish your Paragraph](./manual/publish.html) results into your external website
* [Customize Zeppelin Homepage](./manual/notebookashomepage.html) with one of your notebooks
* More
* [Apache Zeppelin on Vagrant VM](./install/virtual_machine.html): a guide for installing Apache Zeppelin on Vagrant virtual machine
* [Upgrade Apache Zeppelin Version](./install/upgrade.html): a manual procedure of upgrading Apache Zeppelin version
####Interpreter
@ -168,6 +167,9 @@ Join to our [Mailing list](https://zeppelin.apache.org/community.html) and repor
* [Shiro Authentication](./security/shiroauthentication.html)
* [Notebook Authorization](./security/notebook_authorization.html)
* [Data Source Authorization](./security/datasource_authorization.html)
* Advanced
* [Apache Zeppelin on Vagrant VM](./install/virtual_machine.html)
* [Zeppelin on Spark Cluster Mode (Standalone via Docker)](./install/spark_cluster_mode.html#spark-standalone-mode)
* Contribute
* [Writing Zeppelin Interpreter](./development/writingzeppelininterpreter.html)
* [Writing Zeppelin Application (Experimental)](./development/writingzeppelinapplication.html)

View file

@ -1,7 +1,7 @@
---
layout: page
title: "Getting Started"
description: ""
title: "Quick Start"
description: "This page will help you to get started and guide you through installation of Apache Zeppelin, running it in the command line and basic configuration options."
group: install
---
<!--
@ -93,8 +93,12 @@ mvn clean package -DskipTests [Options]
Here are some examples with several options
```
# basic build
mvn clean package -Pspark-1.6 -Phadoop-2.4 -Pyarn -Ppyspark
# build with spark-2.0, scala-2.11
./dev/change_scala_version.sh 2.11
mvn clean package -Pspark-2.0 -Phadoop-2.4 -Pyarn -Ppyspark -Psparkr -Pscala-2.11
# build with spark-1.6, scala-2.10
mvn clean package -Pspark-1.6 -Phadoop-2.4 -Pyarn -Ppyspark -Psparkr
# spark-cassandra integration
mvn clean package -Pcassandra-spark-1.5 -Dhadoop.version=2.6.0 -Phadoop-2.6 -DskipTests
@ -370,6 +374,12 @@ You can configure Apache Zeppelin with both **environment variables** in `conf/z
<td>org.apache.zeppelin.notebook.repo.VFSNotebookRepo</td>
<td>Comma separated list of notebook storage</td>
</tr>
<tr>
<td>ZEPPELIN_NOTEBOOK_ONE_WAY_SYNC</td>
<td>zeppelin.notebook.one.way.sync</td>
<td>false</td>
<td>If there are multiple notebook storages, should we treat the first one as the only source of truth?</td>
</tr>
<tr>
<td>ZEPPELIN_INTERPRETERS</td>
<td>zeppelin.interpreters</td>
@ -377,7 +387,10 @@ You can configure Apache Zeppelin with both **environment variables** in `conf/z
<td>org.apache.zeppelin.spark.SparkInterpreter,<br />org.apache.zeppelin.spark.PySparkInterpreter,<br />org.apache.zeppelin.spark.SparkSqlInterpreter,<br />org.apache.zeppelin.spark.DepInterpreter,<br />org.apache.zeppelin.markdown.Markdown,<br />org.apache.zeppelin.shell.ShellInterpreter,<br />
...
</td>
<td>Comma separated interpreter configurations [Class] <br /> The first interpreter will be a default value. <br /> It means only the first interpreter in this list can be available without <code>%interpreter_name</code> annotation in notebook paragraph. </td>
<td>
Comma separated interpreter configurations [Class] <br/>
<span style="font-style:italic">NOTE: This property is deprecated since Zeppelin-0.6.0 and will not be supported from Zeppelin-0.7.0</span>
</td>
</tr>
<tr>
<td>ZEPPELIN_INTERPRETER_DIR</td>
@ -391,4 +404,4 @@ You can configure Apache Zeppelin with both **environment variables** in `conf/z
<td>1024000</td>
<td>Size in characters of the maximum text message to be received by websocket.</td>
</tr>
</table>
</table>

View file

@ -0,0 +1,74 @@
---
layout: page
title: "Apache Zeppelin on Spark cluster mode"
description: ""
group: install
---
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
{% include JB/setup %}
# Apache Zeppelin on Spark Cluster Mode
<div id="toc"></div>
## Overview
[Apache Spark](http://spark.apache.org/) has supported three cluster manager types([Standalone](http://spark.apache.org/docs/latest/spark-standalone.html), [Apache Mesos](http://spark.apache.org/docs/latest/running-on-mesos.html) and [Hadoop YARN](http://spark.apache.org/docs/latest/running-on-yarn.html)) so far.
This document will guide you how you can build and configure the environment on 3 types of Spark cluster manager with Apache Zeppelin using [Docker](https://www.docker.com/) scripts.
So [install docker](https://docs.docker.com/engine/installation/) on the machine first.
## Spark standalone mode
[Spark standalone](http://spark.apache.org/docs/latest/spark-standalone.html) is a simple cluster manager included with Spark that makes it easy to set up a cluster.
You can simply set up Spark standalone environment with below steps.
> **Note :** Since Apache Zeppelin and Spark use same `8080` port for their web UI, you might need to change `zeppelin.server.port` in `conf/zeppelin-site.xml`.
### 1. Build Docker file
You can find docker script files under `scripts/docker/spark-cluster-managers`.
```
cd $ZEPPELIN_HOME/scripts/docker/spark-cluster-managers/spark_standalone
docker build -t "spark_standalone" .
```
### 2. Run docker
```
docker run -it \
-p 8080:8080 \
-p 7077:7077 \
-p 8888:8888 \
-p 8081:8081 \
-h sparkmaster \
--name spark_standalone \
spark_standalone bash;
```
### 3. Configure Spark interpreter in Zeppelin
Set Spark master as `spark://localhost:7077` in Zeppelin **Interpreters** setting page.
<img src="../assets/themes/zeppelin/img/docs-img/standalone_conf.png" />
### 4. Run Zeppelin with Spark interpreter
After running single paragraph with Spark interpreter in Zeppelin, browse `https://localhost:8080` and check whether Spark cluster is running well or not.
<img src="../assets/themes/zeppelin/img/docs-img/spark_ui.png" />
You can also simply verify that Spark is running well in Docker with below command.
```
ps -ef | grep spark
```

View file

@ -1,7 +1,7 @@
---
layout: page
title: "Manual upgrade procedure for Zeppelin"
description: ""
title: "Manual Zeppelin version upgrade procedure"
description: "This document will guide you through a procedure of manual upgrade your Apache Zeppelin instance to a newer version. Apache Zeppelin keeps backward compatibility for the notebook file format."
group: install
---
<!--

View file

@ -1,7 +1,7 @@
---
layout: page
title: "Install A Zeppelin ready Virtual Machine"
description: ""
title: "Apache Zeppelin on Vagrant Virtual Machine"
description: "Apache Zeppelin provides a script for running a virtual machine for development through Vagrant. The script will create a virtual machine with core dependencies pre-installed, required for developing Apache Zeppelin."
group: install
---
<!--
@ -19,17 +19,17 @@ limitations under the License.
-->
{% include JB/setup %}
# Vagrant Virtual Machine for Apache Zeppelin
# Apache Zeppelin on Vagrant Virtual Machine
<div id="toc"></div>
## Overview
Apache Zeppelin distribution includes a scripts directory
Apache Zeppelin distribution includes a script directory
`scripts/vagrant/zeppelin-dev`
This script creates a virtual machine that launches a repeatable, known set of core dependencies required for developing Zeppelin. It can also be used to run an existing Zeppelin build if you don't plan to build from source.
This script creates a virtual machine that launches a repeatable, known set of core dependencies required for developing Zeppelin. It can also be used to run an existing Zeppelin build if you don't plan to build from source.
For PySpark users, this script includes several helpful [Python Libraries](#python-extras).
For SparkR users, this script includes several helpful [R Libraries](#r-extras).
@ -88,7 +88,7 @@ By default, Vagrant will share your project directory (the directory with the Va
Running the following commands in the guest machine should display these expected versions:
`node --version` should report *v0.12.7*
`mvn --version` should report *Apache Maven 3.3.3* and *Java version: 1.7.0_85*
`mvn --version` should report *Apache Maven 3.3.9* and *Java version: 1.7.0_85*
The virtual machine consists of:
@ -96,7 +96,7 @@ The virtual machine consists of:
- Node.js 0.12.7
- npm 2.11.3
- ruby 1.9.3 + rake, make and bundler (only required if building jekyll documentation)
- Maven 3.3.3
- Maven 3.3.9
- Git
- Unzip
- libfontconfig to avoid phatomJs missing dependency issues

View file

@ -1,7 +1,7 @@
---
layout: page
title: "Install Zeppelin to connect with existing YARN cluster"
description: ""
description: "This page describes how to pre-configure a bare metal node, configure Apache Zeppelin and connect it to existing YARN cluster running Hortonworks flavour of Hadoop."
group: install
---
<!--

View file

@ -1,9 +1,22 @@
---
layout: page
title: "Alluxio Interpreter"
description: "Alluxio Interpreter"
title: "Alluxio Interpreter for Apache Zeppelin"
description: "Alluxio is a memory-centric distributed storage system enabling reliable data sharing at memory-speed across cluster frameworks."
group: interpreter
---
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
{% include JB/setup %}
# Alluxio Interpreter for Apache Zeppelin

View file

@ -0,0 +1,127 @@
---
layout: page
title: "BigQuery Interpreter for Apache Zeppelin"
description: "BigQuery is a highly scalable no-ops data warehouse in the Google Cloud Platform."
group: interpreter
---
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
{% include JB/setup %}
# BigQuery Interpreter for Apache Zeppelin
<div id="toc"></div>
## Overview
[BigQuery](https://cloud.google.com/bigquery/what-is-bigquery) is a highly scalable no-ops data warehouse in the Google Cloud Platform. Querying massive datasets can be time consuming and expensive without the right hardware and infrastructure. Google BigQuery solves this problem by enabling super-fast SQL queries against append-only tables using the processing power of Google's infrastructure. Simply move your data into BigQuery and let us handle the hard work. You can control access to both the project and your data based on your business needs, such as giving others the ability to view or query your data.
## Configuration
<table class="table-configuration">
<tr>
<th>Name</th>
<th>Default Value</th>
<th>Description</th>
</tr>
<tr>
<td>zeppelin.bigquery.project_id</td>
<td> </td>
<td>Google Project Id</td>
</tr>
<tr>
<td>zeppelin.bigquery.wait_time</td>
<td>5000</td>
<td>Query Timeout in Milliseconds</td>
</tr>
<tr>
<td>zeppelin.bigquery.max_no_of_rows</td>
<td>100000</td>
<td>Max result set size</td>
</tr>
</table>
## BigQuery API
Zeppelin is built against BigQuery API version v2-rev265-1.21.0 - [API Javadocs](https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/)
## Enabling the BigQuery Interpreter
In a notebook, to enable the **BigQuery** interpreter, click the **Gear** icon and select **bigquery**.
### Setup service account credentials
In order to run BigQuery interpreter outside of Google Cloud Engine you need to provide authentication credentials,
by [following this instructions](https://developers.google.com/identity/protocols/application-default-credentials):
- Go to the [API Console Credentials page](https://console.developers.google.com/project/_/apis/credentials)
- From the project drop-down, select your project.
- On the `Credentials` page, select the `Create credentials` drop-down, then select `Service account key`.
- From the Service account drop-down, select an existing service account or create a new one.
- For `Key type`, select the `JSON` key option, then select `Create`. The file automatically downloads to your computer.
- Put the `*.json` file you just downloaded in a directory of your choosing. This directory must be private (you can't let anyone get access to this), but accessible to your Zeppelin instance.
- Set the environment variable `GOOGLE_APPLICATION_CREDENTIALS` to the path of the JSON file downloaded.
* either though GUI: in interpreter configuration page property names in CAPITAL_CASE set up env vars
* or though `zeppelin-env.sh`: just add it to the end of the file.
## Using the BigQuery Interpreter
In a paragraph, use `%bigquery.sql` to select the **BigQuery** interpreter and then input SQL statements against your datasets stored in BigQuery.
You can use [BigQuery SQL Reference](https://cloud.google.com/bigquery/query-reference) to build your own SQL.
For Example, SQL to query for top 10 departure delays across airports using the flights public dataset
```bash
%bigquery.sql
SELECT departure_airport,count(case when departure_delay>0 then 1 else 0 end) as no_of_delays
FROM [bigquery-samples:airline_ontime_data.flights]
group by departure_airport
order by 2 desc
limit 10
```
Another Example, SQL to query for most commonly used java packages from the github data hosted in BigQuery
```bash
%bigquery.sql
SELECT
package,
COUNT(*) count
FROM (
SELECT
REGEXP_EXTRACT(line, r' ([a-z0-9\._]*)\.') package,
id
FROM (
SELECT
SPLIT(content, '\n') line,
id
FROM
[bigquery-public-data:github_repos.sample_contents]
WHERE
content CONTAINS 'import'
AND sample_path LIKE '%.java'
HAVING
LEFT(line, 6)='import' )
GROUP BY
package,
id )
GROUP BY
1
ORDER BY
count DESC
LIMIT
40
```
## Technical description
For in-depth technical details on current implementation please refer to [bigquery/README.md](https://github.com/apache/zeppelin/blob/master/bigquery/README.md).

View file

@ -1,9 +1,22 @@
---
layout: page
title: "Cassandra Interpreter"
description: "Cassandra Interpreter"
title: "Cassandra CQL Interpreter for Apache Zeppelin"
description: "Apache Cassandra database is the right choice when you need scalability and high availability without compromising performance."
group: interpreter
---
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
{% include JB/setup %}
# Cassandra CQL Interpreter for Apache Zeppelin

View file

@ -1,9 +1,22 @@
---
layout: page
title: "Elasticsearch Interpreter"
description: ""
title: "Elasticsearch Interpreter for Apache Zeppelin"
description: "Elasticsearch is a highly scalable open-source full-text search and analytics engine."
group: interpreter
---
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
{% include JB/setup %}
# Elasticsearch Interpreter for Apache Zeppelin

View file

@ -1,9 +1,22 @@
---
layout: page
title: "Flink Interpreter"
description: ""
title: "Flink Interpreter for Apache Zeppelin"
description: "Apache Flink is an open source platform for distributed stream and batch data processing."
group: interpreter
---
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
{% include JB/setup %}
# Flink interpreter for Apache Zeppelin

View file

@ -1,9 +1,22 @@
---
layout: page
title: "Geode OQL Interpreter"
description: ""
title: "Geode/Gemfire OQL Interpreter for Apache Zeppelin"
description: "Apache Geode (incubating) provides a database-like consistency model, reliable transaction processing and a shared-nothing architecture to maintain very low latency performance with high concurrency processing."
group: interpreter
---
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
{% include JB/setup %}
# Geode/Gemfire OQL Interpreter for Apache Zeppelin

View file

@ -1,9 +1,22 @@
---
layout: page
title: "HBase Shell Interpreter"
description: ""
title: "HBase Shell Interpreter for Apache Zeppelin"
description: "HBase Shell is a JRuby IRB client for Apache HBase. This interpreter provides all capabilities of Apache HBase shell within Apache Zeppelin."
group: interpreter
---
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
{% include JB/setup %}
# HBase Shell Interpreter for Apache Zeppelin

View file

@ -1,9 +1,22 @@
---
layout: page
title: "HDFS File System Interpreter"
description: ""
title: "HDFS File System Interpreter for Apache Zeppelin"
description: "Hadoop File System is a distributed, fault tolerant file system part of the hadoop project and is often used as storage for distributed processing engines like Hadoop MapReduce and Apache Spark or underlying file systems like Alluxio."
group: interpreter
---
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
{% include JB/setup %}
# HDFS File System Interpreter for Apache Zeppelin

View file

@ -1,9 +1,22 @@
---
layout: page
title: "Hive Interpreter"
description: ""
title: "Hive Interpreter for Apache Zeppelin"
description: "Apache Hive data warehouse software facilitates querying and managing large datasets residing in distributed storage. Hive provides a mechanism to project structure onto this data and query the data using a SQL-like language called HiveQL. At the same time this language also allows traditional map/reduce programmers to plug in their custom mappers and reducers when it is inconvenient or inefficient to express this logic in HiveQL."
group: interpreter
---
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
{% include JB/setup %}
# Hive Interpreter for Apache Zeppelin

View file

@ -1,9 +1,22 @@
---
layout: page
title: "Ignite Interpreter"
description: "Ignite user guide"
title: "Ignite Interpreter for Apache Zeppelin"
description: "Apache Ignite in-memory Data Fabric is a high-performance, integrated and distributed in-memory platform for computing and transacting on large-scale data sets in real-time, orders of magnitude faster than possible with traditional disk-based or flash technologies."
group: interpreter
---
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
{% include JB/setup %}
# Ignite Interpreter for Apache Zeppelin

View file

@ -1,13 +1,25 @@
---
layout: page
title: "Generic JDBC Interpreter"
description: "JDBC user guide"
title: "Generic JDBC Interpreter for Apache Zeppelin"
description: "Generic JDBC Interpreter lets you create a JDBC connection to any data source. You can use Postgres, MySql, MariaDB, Redshift, Apache Hive, Apache Phoenix, Apache Drill and Apache Tajo using JDBC interpreter."
group: interpreter
---
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
{% include JB/setup %}
# Generic JDBC Interpreter for Apache Zeppelin
# Generic JDBC Interpreter for Apache Zeppelin
<div id="toc"></div>
@ -21,7 +33,8 @@ This interpreter lets you create a JDBC connection to any data source, by now it
* Redshift
* Apache Hive
* Apache Phoenix
* Apache Drill (Details on using [Drill JDBC Driver](https://drill.apache.org/docs/using-the-jdbc-driverde* Apache Tajo
* Apache Drill (Details on using [Drill JDBC Driver](https://drill.apache.org/docs/using-the-jdbc-driver))
* Apache Tajo
If someone else used another database please report how it works to improve functionality.

View file

@ -1,9 +1,22 @@
---
layout: page
title: "Lens Interpreter"
description: "Lens user guide"
title: "Lens Interpreter for Apache Zeppelin"
description: "Apache Lens provides an Unified Analytics interface. Lens aims to cut the Data Analytics silos by providing a single view of data across multiple tiered data stores and optimal execution environment for the analytical query. It seamlessly integrates Hadoop with traditional data warehouses to appear like one."
group: interpreter
---
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
{% include JB/setup %}
# Lens Interpreter for Apache Zeppelin

View file

@ -1,9 +1,22 @@
---
layout: page
title: "Livy Interpreter"
description: ""
title: "Livy Interpreter for Apache Zeppelin"
description: "Livy is an open source REST interface for interacting with Spark from anywhere. It supports executing snippets of code or programs in a Spark context that runs locally or in YARN."
group: interpreter
---
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
{% include JB/setup %}
# Livy Interpreter for Apache Zeppelin
@ -30,7 +43,7 @@ We added some common configurations for spark, and you can set any configuration
This link contains all spark configurations: http://spark.apache.org/docs/latest/configuration.html#available-properties.
And instead of starting property with `spark.` it should be replaced with `livy.spark.`.
Example: `spark.master` to `livy.spark.master`
<table class="table-configuration">
<tr>
<th>Property</th>
@ -50,7 +63,7 @@ Example: `spark.master` to `livy.spark.master`
<tr>
<td>zeppelin.livy.spark.maxResult</td>
<td>1000</td>
<td>Max number of SparkSQL result to display.</td>
<td>Max number of Spark SQL result to display.</td>
</tr>
<tr>
<td>livy.spark.driver.cores</td>
@ -102,8 +115,31 @@ Example: `spark.master` to `livy.spark.master`
<td></td>
<td>Upper bound for the number of executors.</td>
</tr>
<tr>
<td>livy.spark.jars.packages</td>
<td></td>
<td>Adding extra libraries to livy interpreter</td>
</tr>
</table>
## Adding External libraries
You can load dynamic library to livy interpreter by set `livy.spark.jars.packages` property to comma-separated list of maven coordinates of jars to include on the driver and executor classpaths. The format for the coordinates should be groupId:artifactId:version.
Example
<table class="table-configuration">
<tr>
<th>Property</th>
<th>Example</th>
<th>Description</th>
</tr>
<tr>
<td>livy.spark.jars.packages</td>
<td>io.spray:spray-json_2.10:1.3.1</td>
<td>Adding extra libraries to livy interpreter</td>
</tr>
</table>
## How to use
Basically, you can use

View file

@ -1,9 +1,22 @@
---
layout: page
title: "Markdown Interpreter"
description: "Markdown Interpreter"
title: "Markdown Interpreter for Apache Zeppelin"
description: "Markdown is a plain text formatting syntax designed so that it can be converted to HTML. Apache Zeppelin uses markdown4j."
group: interpreter
---
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
{% include JB/setup %}
# Markdown Interpreter for Apache Zeppelin
@ -12,7 +25,7 @@ group: interpreter
## Overview
[Markdown](http://daringfireball.net/projects/markdown/) is a plain text formatting syntax designed so that it can be converted to HTML.
Zeppelin uses markdown4j. For more examples and extension support, please checkout [here](https://code.google.com/p/markdown4j/).
Apache Zeppelin uses markdown4j. For more examples and extension support, please checkout [here](https://code.google.com/p/markdown4j/).
In Zeppelin notebook, you can use ` %md ` in the beginning of a paragraph to invoke the Markdown interpreter and generate static html from Markdown plain text.
In Zeppelin, Markdown interpreter is enabled by default.

View file

@ -1,9 +1,22 @@
---
layout: page
title: "PostgreSQL and HAWQ Interpreter"
description: ""
title: "PostgreSQL, Apache HAWQ (incubating) Interpreter for Apache Zeppelin"
description: "Apache Zeppelin supports PostgreSQL, Apache HAWQ(incubating) and Greenplum SQL data processing engines."
group: interpreter
---
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
{% include JB/setup %}
# PostgreSQL, Apache HAWQ (incubating) Interpreter for Apache Zeppelin

View file

@ -1,9 +1,22 @@
---
layout: page
title: "Python Interpreter"
description: "Python Interpreter"
title: "Python 2 & 3 Interpreter for Apache Zeppelin"
description: "Python is a programming language that lets you work quickly and integrate systems more effectively."
group: interpreter
---
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
{% include JB/setup %}
# Python 2 & 3 Interpreter for Apache Zeppelin
@ -74,7 +87,7 @@ print("".join(z.checkbox("f3", [("o1","1"), ("o2","2")],["1"])))
## Matplotlib integration
The python interpreter can display matplotlib graph with the function `z.show()`.
You need to have matplotlib module installed and a XServer running to use this functionality !
You need to have matplotlib module installed and a XServer running to use this functionality!
```python
%python
@ -84,12 +97,12 @@ plt.figure()
z.show(plt)
plt.close()
```
z.show function can take optional parameters to adapt graph width and height
The `z.show()` function can take optional parameters to adapt graph dimensions (width and height) as well as output format (png or optionally svg).
```python
%python
z.show(plt, width='50px')
z.show(plt, height='150px')
z.show(plt, height='150px', fmt='svg')
```
<img class="img-responsive" src="../assets/themes/zeppelin/img/docs-img/pythonMatplotlib.png" />

View file

@ -1,9 +1,22 @@
---
layout: page
title: "R Interpreter"
description: ""
title: "R Interpreter for Apache Zeppelin"
description: "R is a free software environment for statistical computing and graphics."
group: interpreter
---
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
{% include JB/setup %}
# R Interpreter for Apache Zeppelin
@ -104,7 +117,7 @@ And vice versa:
* The `knitr` environment is persistent. If you run a chunk from Zeppelin that changes a variable, then run the same chunk again, the variable has already been changed. Use immutable variables.
* (Note that `%spark.r` and `$r` are two different ways of calling the same interpreter, as are `%spark.knitr` and `%knitr`. By default, Zeppelin puts the R interpreters in the `%spark.` Interpreter Group.
* (Note that `%spark.r` and `%r` are two different ways of calling the same interpreter, as are `%spark.knitr` and `%knitr`. By default, Zeppelin puts the R interpreters in the `%spark.` Interpreter Group.
* Using the `%r` interpreter, if you return a data.frame, HTML, or an image, it will dominate the result. So if you execute three commands, and one is `hist()`, all you will see is the histogram, not the results of the other commands. This is a Zeppelin limitation.

View file

@ -1,9 +1,22 @@
---
layout: page
title: "Scalding Interpreter"
description: ""
title: "Scalding Interpreter for Apache Zeppelin"
description: "Scalding is an open source Scala library for writing MapReduce jobs."
group: interpreter
---
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
{% include JB/setup %}
# Scalding Interpreter for Apache Zeppelin

View file

@ -1,40 +1,68 @@
---
layout: page
title: "Shell Interpreter"
description: "Shell Interpreter"
group: interpreter
---
{% include JB/setup %}
# Shell interpreter for Apache Zeppelin
<div id="toc"></div>
## Overview
Shell interpreter uses [Apache Commons Exec](https://commons.apache.org/proper/commons-exec) to execute external processes.
In Zeppelin notebook, you can use ` %sh ` in the beginning of a paragraph to invoke system shell and run commands.
> **Note :** Currently each command runs as the user Zeppelin server is running as.
## Configuration
At the "Interpreters" menu in Zeppelin dropdown menu, you can set the property value for Shell interpreter.
<table class="table-configuration">
<tr>
<th>Name</th>
<th>Value</th>
<th>Description</th>
</tr>
<tr>
<td>shell.command.timeout.millisecs</td>
<td>60000</td>
<td>Shell command time out in millisecs</td>
</tr>
</table>
## Example
The following example demonstrates the basic usage of Shell in a Zeppelin notebook.
<img src="{{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/shell-example.png" />
---
layout: page
title: "Shell interpreter for Apache Zeppelin"
description: "Shell interpreter uses Apache Commons Exec to execute external processes."
group: interpreter
---
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
{% include JB/setup %}
# Shell interpreter for Apache Zeppelin
<div id="toc"></div>
## Overview
Shell interpreter uses [Apache Commons Exec](https://commons.apache.org/proper/commons-exec) to execute external processes.
In Zeppelin notebook, you can use ` %sh ` in the beginning of a paragraph to invoke system shell and run commands.
> **Note :** Currently each command runs as the user Zeppelin server is running as.
## Configuration
At the "Interpreters" menu in Zeppelin dropdown menu, you can set the property value for Shell interpreter.
<table class="table-configuration">
<tr>
<th>Name</th>
<th>Value</th>
<th>Description</th>
</tr>
<tr>
<td>shell.command.timeout.millisecs</td>
<td>60000</td>
<td>Shell command time out in millisecs</td>
</tr>
<tr>
<td>zeppelin.shell.auth.type</td>
<td></td>
<td>Types of authentications' methods supported are SIMPLE, and KERBEROS</td>
</tr>
<tr>
<td>zeppelin.shell.principal</td>
<td></td>
<td>The principal name to load from the keytab</td>
</tr>
<tr>
<td>zeppelin.shell.keytab.location</td>
<td></td>
<td>The path to the keytab file</td>
</tr>
</table>
## Example
The following example demonstrates the basic usage of Shell in a Zeppelin notebook.
<img src="{{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/shell-example.png" />
If you need further information about **Zeppelin Interpreter Setting** for using Shell interpreter, please read [What is interpreter setting?](../manual/interpreters.html#what-is-interpreter-setting) section first.

View file

@ -1,11 +1,23 @@
---
layout: page
title: "Spark Interpreter Group"
description: ""
title: "Apache Spark Interpreter for Apache Zeppelin"
description: "Apache Spark is a fast and general-purpose cluster computing system. It provides high-level APIs in Java, Scala, Python and R, and an optimized engine that supports general execution graphs."
group: interpreter
---
{% include JB/setup %}
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
{% include JB/setup %}
# Spark Interpreter for Apache Zeppelin
@ -79,7 +91,7 @@ You can also set other Spark properties which are not listed in the table. For a
</tr>
<tr>
<td>spark.executor.memory </td>
<td>512m</td>
<td>1g</td>
<td>Executor memory per worker instance. <br/> ex) 512m, 32g</td>
</tr>
<tr>
@ -105,7 +117,7 @@ You can also set other Spark properties which are not listed in the table. For a
<tr>
<td>zeppelin.spark.maxResult</td>
<td>1000</td>
<td>Max number of SparkSQL result to display.</td>
<td>Max number of Spark SQL result to display.</td>
</tr>
<tr>
<td>zeppelin.spark.printREPLOutput</td>

View file

@ -1,7 +1,7 @@
---
layout: page
title: "Dependency Management"
description: ""
title: "Dependency Management for Apache Spark Interpreter"
description: "Include external libraries to Apache Spark Interpreter by setting dependencies in interpreter menu."
group: manual
---
<!--

View file

@ -1,7 +1,7 @@
---
layout: page
title: "Dynamic Form"
description: ""
title: "Dynamic Form in Apache Zeppelin"
description: "Apache Zeppelin dynamically creates input forms. Depending on language backend, there're two different ways to create dynamic form."
group: manual
---
<!--
@ -28,7 +28,7 @@ Custom language backend can select which type of form creation it wants to use.
## Using form Templates
This mode creates form using simple template language. It's simple and easy to use. For example Markdown, Shell, SparkSql language backend uses it.
This mode creates form using simple template language. It's simple and easy to use. For example Markdown, Shell, Spark SQL language backend uses it.
### Text input form

View file

@ -1,7 +1,7 @@
---
layout: page
title: "Dynamic Interpreter Loading"
description: ""
title: "Dynamic Interpreter Loading using REST API"
description: "Apache Zeppelin provides pluggable interpreter architecture which results in a wide and variety of the supported backend system. In this page, we will introduce dynamic interpreter loading using REST API."
group: manual
---
<!--
@ -23,7 +23,7 @@ limitations under the License.
<div id="toc"></div>
Zeppelin provides pluggable interpreter architecture which results in a wide and variety of the supported backend system. In this section, we will introduce **Dynamic interpreter loading** using **REST API**. This concept actually comes from [Zeppelin Helium Proposal](https://cwiki.apache.org/confluence/display/ZEPPELIN/Helium+proposal).
Apache Zeppelin provides pluggable interpreter architecture which results in a wide and variety of the supported backend system. In this section, we will introduce **Dynamic interpreter loading** using **REST API**. This concept actually comes from [Zeppelin Helium Proposal](https://cwiki.apache.org/confluence/display/ZEPPELIN/Helium+proposal).
Before we start, if you are not familiar with the concept of **Zeppelin interpreter**, you can check out [Overview of Zeppelin interpreter](../manual/interpreters.html) first.
## Overview

View file

@ -1,7 +1,7 @@
---
layout: page
title: "Interpreter Installation"
description: ""
title: "Interpreter Installation in Netinst Binary Package"
description: "Apache Zeppelin provides Interpreter Installation mechanism for whom downloaded Zeppelin netinst binary package, or just want to install another 3rd party interpreters."
group: manual
---
<!--
@ -47,6 +47,52 @@ You can get full list of community managed interpreters by running
./bin/install-interpreter.sh --list
```
#### Install interpreter built with Scala 2.10
From version 0.6.1, Zeppelin support both Scala 2.10 and 2.11 for several interpreters as below:
<table class="table-configuration">
<tr>
<th>Name</th>
<th>Maven Artifact for Scala 2.10</th>
<th>Maven Artifact for Scala 2.11</th>
</tr>
<tr>
<td>cassandra</td>
<td>org.apache.zeppelin:zeppelin-cassandra_2.10:0.6.1</td>
<td>org.apache.zeppelin:zeppelin-cassandra_2.11:0.6.1</td>
</tr>
<tr>
<td>flink</td>
<td>org.apache.zeppelin:zeppelin-flink_2.10:0.6.1</td>
<td>org.apache.zeppelin:zeppelin-flink_2.11:0.6.1</td>
</tr>
<tr>
<td>ignite</td>
<td>org.apache.zeppelin:zeppelin-ignite_2.10:0.6.1</td>
<td>org.apache.zeppelin:zeppelin-ignite_2.11:0.6.1</td>
</tr>
<tr>
<td>flink</td>
<td>org.apache.zeppelin:zeppelin-spark_2.10:0.6.1</td>
<td>org.apache.zeppelin:zeppelin-spark_2.11:0.6.1</td>
</tr>
</table>
If you install one of these interpreters only with `--name` option, installer will download interpreter built with Scala 2.11 by default. If you want to specify Scala version, you will need to add `--artifact` option. Here is the example of installing flink interpreter built with Scala 2.10.
```
./bin/install-interpreter.sh --name flink --artifact org.apache.zeppelin:zeppelin-flink_2.10:0.6.1
```
#### Install Spark interpreter built with Scala 2.10
Spark distribution package has been built with Scala 2.10 until 1.6.2. If you have `SPARK_HOME` set pointing to Spark version ealier than 2.0.0, you need to download Spark interpreter packaged with Scala 2.10. To do so, use follow command:
```
rm -rf ./interpreter/spark
./bin/install-interpreter.sh --name spark --artifact org.apache.zeppelin:zeppelin-spark_2.10:0.6.1
```
<br />
Once you have installed interpreters, you need to restart Zeppelin. And then [create interpreter setting](../manual/interpreters.html#what-is-zeppelin-interpreter) and [bind it with your notebook](../manual/interpreters.html#what-is-zeppelin-interpreter-setting).
@ -85,82 +131,87 @@ You can also find the below community managed interpreter list in `conf/interpre
</tr>
<tr>
<td>alluxio</td>
<td>org.apache.zeppelin:zeppelin-alluxio:0.6.0</td>
<td>org.apache.zeppelin:zeppelin-alluxio:0.6.1</td>
<td>Alluxio interpreter</td>
</tr>
<tr>
<td>angular</td>
<td>org.apache.zeppelin:zeppelin-angular:0.6.0</td>
<td>org.apache.zeppelin:zeppelin-angular:0.6.1</td>
<td>HTML and AngularJS view rendering</td>
</tr>
<tr>
<td>bigquery</td>
<td>org.apache.zeppelin:zeppelin-bigquery:0.6.1</td>
<td>BigQuery interpreter</td>
</tr>
<tr>
<td>cassandra</td>
<td>org.apache.zeppelin:zeppelin-cassandra:0.6.0</td>
<td>Cassandra interpreter</td>
<td>org.apache.zeppelin:zeppelin-cassandra\_2.11:0.6.1</td>
<td>Cassandra interpreter built with Scala 2.11</td>
</tr>
<tr>
<td>elasticsearch</td>
<td>org.apache.zeppelin:zeppelin-elasticsearch:0.6.0</td>
<td>org.apache.zeppelin:zeppelin-elasticsearch:0.6.1</td>
<td>Elasticsearch interpreter</td>
</tr>
<tr>
<td>file</td>
<td>org.apache.zeppelin:zeppelin-file:0.6.0</td>
<td>org.apache.zeppelin:zeppelin-file:0.6.1</td>
<td>HDFS file interpreter</td>
</tr>
<tr>
<td>flink</td>
<td>org.apache.zeppelin:zeppelin-flink:0.6.0</td>
<td>Flink interpreter</td>
<td>org.apache.zeppelin:zeppelin-flink\_2.11:0.6.1</td>
<td>Flink interpreter built with Scala 2.11</td>
</tr>
<tr>
<td>hbase</td>
<td>org.apache.zeppelin:zeppelin-hbase:0.6.0</td>
<td>org.apache.zeppelin:zeppelin-hbase:0.6.1</td>
<td>Hbase interpreter</td>
</tr>
<tr>
<td>ignite</td>
<td>org.apache.zeppelin:zeppelin-ignite:0.6.0</td>
<td>Ignite interpreter</td>
<td>org.apache.zeppelin:zeppelin-ignite\_2.11:0.6.1</td>
<td>Ignite interpreter built with Scala 2.11</td>
</tr>
<tr>
<td>jdbc</td>
<td>org.apache.zeppelin:zeppelin-jdbc:0.6.0</td>
<td>org.apache.zeppelin:zeppelin-jdbc:0.6.1</td>
<td>Jdbc interpreter</td>
</tr>
<tr>
<td>kylin</td>
<td>org.apache.zeppelin:zeppelin-kylin:0.6.0</td>
<td>org.apache.zeppelin:zeppelin-kylin:0.6.1</td>
<td>Kylin interpreter</td>
</tr>
<tr>
<td>lens</td>
<td>org.apache.zeppelin:zeppelin-lens:0.6.0</td>
<td>org.apache.zeppelin:zeppelin-lens:0.6.1</td>
<td>Lens interpreter</td>
</tr>
<tr>
<td>livy</td>
<td>org.apache.zeppelin:zeppelin-livy:0.6.0</td>
<td>org.apache.zeppelin:zeppelin-livy:0.6.1</td>
<td>Livy interpreter</td>
</tr>
<tr>
<td>md</td>
<td>org.apache.zeppelin:zeppelin-markdown:0.6.0</td>
<td>org.apache.zeppelin:zeppelin-markdown:0.6.1</td>
<td>Markdown support</td>
</tr>
<tr>
<td>postgresql</td>
<td>org.apache.zeppelin:zeppelin-postgresql:0.6.0</td>
<td>org.apache.zeppelin:zeppelin-postgresql:0.6.1</td>
<td>Postgresql interpreter</td>
</tr>
<tr>
<td>python</td>
<td>org.apache.zeppelin:zeppelin-python:0.6.0</td>
<td>org.apache.zeppelin:zeppelin-python:0.6.1</td>
<td>Python interpreter</td>
</tr>
<tr>
<td>shell</td>
<td>org.apache.zeppelin:zeppelin-shell:0.6.0</td>
<td>org.apache.zeppelin:zeppelin-shell:0.6.1</td>
<td>Shell command</td>
</tr>
</table>

View file

@ -1,7 +1,7 @@
---
layout: page
title: "Interpreters"
description: ""
title: "Interpreters in Apache Zeppelin"
description: "This document explains about the role of interpreters, interpreters group and interpreter settings in Apache Zeppelin. The concept of Zeppelin interpreter allows any language/data-processing-backend to be plugged into Zeppelin."
group: manual
---
<!--
@ -27,7 +27,7 @@ limitations under the License.
In this section, we will explain about the role of interpreters, interpreters group and interpreter settings in Zeppelin.
The concept of Zeppelin interpreter allows any language/data-processing-backend to be plugged into Zeppelin.
Currently, Zeppelin supports many interpreters such as Scala ( with Apache Spark ), Python ( with Apache Spark ), SparkSQL, JDBC, Markdown, Shell and so on.
Currently, Zeppelin supports many interpreters such as Scala ( with Apache Spark ), Python ( with Apache Spark ), Spark SQL, JDBC, Markdown, Shell and so on.
## What is Zeppelin interpreter?
Zeppelin Interpreter is a plug-in which enables Zeppelin users to use a specific language/data-processing-backend. For example, to use Scala code in Zeppelin, you need `%spark` interpreter.
@ -51,7 +51,7 @@ Each notebook can be bound to multiple Interpreter Settings using setting icon o
## What is interpreter group?
Every Interpreter is belonged to an **Interpreter Group**. Interpreter Group is a unit of start/stop interpreter.
By default, every interpreter is belonged to a single group, but the group might contain more interpreters. For example, Spark interpreter group is including Spark support, pySpark, SparkSQL and the dependency loader.
By default, every interpreter is belonged to a single group, but the group might contain more interpreters. For example, Spark interpreter group is including Spark support, pySpark, Spark SQL and the dependency loader.
Technically, Zeppelin interpreters from the same group are running in the same JVM. For more information about this, please checkout [here](../development/writingzeppelininterpreter.html).

View file

@ -1,7 +1,7 @@
---
layout: page
title: "Notebook as Homepage"
description: ""
title: "Customize Apache Zeppelin homepage"
description: "Apache Zeppelin allows you to use one of the notebooks you create as your Zeppelin Homepage. With that you can brand your Zeppelin installation, adjust the instruction to your users needs and even translate to other languages."
group: manual
---
<!--

View file

@ -1,7 +1,7 @@
---
layout: page
title: "Publish your Paragraph"
description: ""
title: "How can you publish your paragraph"
description: "Apache Zeppelin provides a feature for publishing your notebook paragraph results. Using this feature, you can show Zeppelin notebook paragraph results in your own website."
group: manual
---
<!--

View file

@ -1,8 +1,8 @@
---
layout: page
title: "Please contribute"
description: ""
group: development
title:
description:
group:
---
<!--
Licensed under the Apache License, Version 2.0 (the "License");

View file

@ -1,7 +1,7 @@
---
layout: page
title: "Explore Apache Zeppelin UI"
description: "Description of Zeppelin UI Layout"
description: "If you are new to Apache Zeppelin, this document will guide you about the basic components of Zeppelin one by one."
group: quickstart
---
<!--
@ -17,6 +17,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
{% include JB/setup %}
# Explore Apache Zeppelin UI
<div id="toc"></div>

View file

@ -1,7 +1,7 @@
---
layout: page
title: "Tutorial"
description: "Tutorial is valid for Spark 1.3 and higher"
title: "Apache Zeppelin Tutorial"
description: "This tutorial page contains a short walk-through tutorial that uses Apache Spark backend. Please note that this tutorial is valid for Spark 1.3 and higher."
group: quickstart
---
<!--
@ -17,6 +17,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
{% include JB/setup %}
# Zeppelin Tutorial
<div id="toc"></div>

View file

@ -1,7 +1,7 @@
---
layout: page
title: "Configuration REST API"
description: ""
title: "Apache Zeppelin Configuration REST API"
description: "This page contains Apache Zeppelin Configuration REST API information."
group: rest-api
---
<!--

View file

@ -1,7 +1,7 @@
---
layout: page
title: "Credentials REST API"
description: ""
title: "Apache Zeppelin Credential REST API"
description: "This page contains Apache Zeppelin Credential REST API information."
group: rest-api
---
<!--

View file

@ -1,7 +1,7 @@
---
layout: page
title: "Interpreter REST API"
description: ""
title: "Apache Zeppelin Interpreter REST API"
description: "This page contains Apache Zeppelin Interpreter REST API information."
group: rest-api
---
<!--
@ -75,7 +75,7 @@ The role of registered interpreters, settings and interpreters group are describ
"className": "org.apache.zeppelin.spark.SparkInterpreter",
"properties": {
"spark.executor.memory": {
"defaultValue": "512m",
"defaultValue": "1g",
"description": "Executor memory per worker instance. ex) 512m, 32g"
},
"spark.cores.max": {
@ -92,7 +92,7 @@ The role of registered interpreters, settings and interpreters group are describ
"properties": {
"zeppelin.spark.maxResult": {
"defaultValue": "1000",
"description": "Max number of SparkSQL result to display."
"description": "Max number of Spark SQL result to display."
}
},
"path": "/zeppelin/interpreter/spark"
@ -154,7 +154,7 @@ The role of registered interpreters, settings and interpreters group are describ
"group": "spark",
"properties": {
"spark.cores.max": "",
"spark.executor.memory": "512m",
"spark.executor.memory": "1g",
},
"interpreterGroup": [
{
@ -460,4 +460,4 @@ The role of registered interpreters, settings and interpreters group are describ
<td> 500 </td>
</tr>
</table>

View file

@ -1,7 +1,7 @@
---
layout: page
title: "Notebook REST API"
description: ""
title: "Apache Zeppelin Notebook REST API"
description: "This page contains Apache Zeppelin Notebook REST API information."
group: rest-api
---
<!--

View file

@ -21,7 +21,7 @@ limitations under the License.
<div class="row">
<div class="col-md-3">
<a href="assets/themes/zeppelin/img/screenshots/sparksql.png"><img class="thumbnail" src="assets/themes/zeppelin/img/screenshots/sparksql.png" /></a>
<center>SparkSQL with inline visualization</center>
<center>Spark SQL with inline visualization</center>
</div>
<div class="col-md-3">
<a href="assets/themes/zeppelin/img/screenshots/spark.png"><img class="thumbnail" src="assets/themes/zeppelin/img/screenshots/spark.png" /></a>

43
docs/search.md Normal file
View file

@ -0,0 +1,43 @@
---
layout: page
title:
description:
group:
---
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
{% include JB/setup %}
<div id="home-search" class="home">
<span id="search">
<p><i class="glyphicon glyphicon-search"></i> &nbsp;Search Docs</p>
</span>
<br/>
<form role="search" action="{{BASE_PATH}}/search.html" id="site_search">
<div class="input-group" id="search-container">
<input type="text" class="form-control" size="16px" name="q" placeholder="Search all pages" id="search_box">
<span class="input-group-btn">
<button type="reset" class="btn btn-default">
<i class="glyphicon glyphicon-remove" style="color:#777"></i>
</button>
</span>
</div>
</form>
<br/><br/>
<div id="search_results"></div>
</div>

17
docs/search_data.json Normal file
View file

@ -0,0 +1,17 @@
---
layout: null
---
{
{% for page in site.pages %}{% if page.title != nil %}
"{{ page.url | slugify }}": {
"title": "{{ page.title | xml_escape }}",
"content" : "{{page.content | strip_html | strip_newlines | escape | remove: "\"}}",
"url": " {{ page.url | xml_escape }}",
"group": "{{ page.group }}",
"excerpt": {{ page.description | strip_html | truncatewords: 40 | jsonify }}
}
{% unless forloop.last %},{% endunless %}
{% endif %}
{% endfor %}
}

View file

@ -1,7 +1,7 @@
---
layout: page
title: "Authentication for NGINX"
description: "Authentication for NGINX"
description: "There are multiple ways to enable authentication in Apache Zeppelin. This page describes HTTP basic auth using NGINX."
group: security
---
<!--
@ -17,11 +17,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
{% include JB/setup %}
# Authentication for NGINX
<div id="toc"></div>
Authentication is company-specific.
There are multiple ways to enable authentication in Apache Zeppelin. This page describes HTTP basic auth using NGINX.
One option is to use [Basic Access Authentication](https://en.wikipedia.org/wiki/Basic_access_authentication).
## HTTP Basic Authentication using NGINX

View file

@ -1,7 +1,7 @@
---
layout: page
title: "Data Source Authorization"
description: "Data Source Authorization"
title: "Data Source Authorization in Apache Zeppelin"
description: "Apache Zeppelin supports protected data sources. In case of a MySql database, every users can set up their own credentials to access it."
group: security
---
<!--
@ -17,6 +17,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
{% include JB/setup %}
# Data Source Authorization in Apache Zeppelin
<div id="toc"></div>

View file

@ -1,7 +1,7 @@
---
layout: page
title: "Notebook Authorization"
description: "Notebook Authorization"
title: "Notebook Authorization in Apache Zeppelin"
description: "This page will guide you how you can set the permission for Zeppelin notebooks. This document assumes that Apache Shiro authentication was set up."
group: security
---
<!--
@ -17,6 +17,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
{% include JB/setup %}
# Zeppelin Notebook Authorization
<div id="toc"></div>

View file

@ -1,7 +1,7 @@
---
layout: page
title: "Shiro Security for Apache Zeppelin"
description: ""
title: "Apache Shiro Authentication for Apache Zeppelin"
description: "Apache Shiro is a powerful and easy-to-use Java security framework that performs authentication, authorization, cryptography, and session management. This document explains step by step how Shiro can be used for Zeppelin notebook authentication."
group: security
---
<!--
@ -19,7 +19,7 @@ limitations under the License.
-->
{% include JB/setup %}
# Shiro authentication for Apache Zeppelin
# Apache Shiro authentication for Apache Zeppelin
<div id="toc"></div>
@ -105,6 +105,33 @@ finance = *
group1 = *
```
## Configure Realm (optional)
Realms are responsible for authentication and authorization in Apache Zeppelin. By default, Apache Zeppelin uses [IniRealm](https://shiro.apache.org/static/latest/apidocs/org/apache/shiro/realm/text/IniRealm.html) (users and groups are configurable in `conf/shiro.ini` file under `[user]` and `[group]` section). You can also leverage Shiro Realms like [JndiLdapRealm](https://shiro.apache.org/static/latest/apidocs/org/apache/shiro/realm/ldap/JndiLdapRealm.html), [JdbcRealm](https://shiro.apache.org/static/latest/apidocs/org/apache/shiro/realm/jdbc/JdbcRealm.html) or create [our own](https://shiro.apache.org/static/latest/apidocs/org/apache/shiro/realm/AuthorizingRealm.html).
To learn more about Apache Shiro Realm, please check [this documentation](http://shiro.apache.org/realm.html).
We also provide community custom Realms.
### Active Directory
TBD
### LDAP
TBD
### ZeppelinHub
[ZeppelinHub](https://www.zeppelinhub.com) is a service that synchronize your Apache Zeppelin notebooks and enables you to collaborate easily.
To enable login with your ZeppelinHub credential, apply the following change in `conf/shiro.ini` under `[main]` section.
```
### A sample for configuring ZeppelinHub Realm
zeppelinHubRealm = org.apache.zeppelin.realm.ZeppelinHubRealm
## Url of ZeppelinHub
zeppelinHubRealm.zeppelinhubUrl = https://www.zeppelinhub.com
securityManager.realms = $zeppelinHubRealm
```
> Note: ZeppelinHub is not releated to apache Zeppelin project.
## Secure your Zeppelin information (optional)
By default, anyone who defined in `[users]` can share **Interpreter Setting**, **Credential** and **Configuration** information in Apache Zeppelin.
Sometimes you might want to hide these information for your use case.
@ -123,3 +150,4 @@ If you want to grant this permission to other users, you can change **roles[ ]**
<br/>
> **NOTE :** All of the above configurations are defined in the `conf/shiro.ini` file. This documentation is originally from [SECURITY-README.md](https://github.com/apache/zeppelin/blob/master/SECURITY-README.md).

View file

@ -1,7 +1,7 @@
---
layout: page
title: "Storage"
description: "Notebook Storage option for Zeppelin"
title: "Notebook Storage for Apache Zeppelin"
description: Apache Zeppelin has a pluggable notebook storage mechanism controlled by zeppelin.notebook.storage configuration option with multiple implementations."
group: storage
---
<!--
@ -17,6 +17,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
{% include JB/setup %}
# Notebook storage options for Apache Zeppelin
<div id="toc"></div>

View file

@ -31,7 +31,6 @@
<packaging>jar</packaging>
<version>0.7.0-SNAPSHOT</version>
<name>Zeppelin: Elasticsearch interpreter</name>
<url>http://www.apache.org</url>
<properties>
<elasticsearch.version>2.3.3</elasticsearch.version>

View file

@ -30,7 +30,6 @@
<packaging>jar</packaging>
<version>0.7.0-SNAPSHOT</version>
<name>Zeppelin: File System Interpreters</name>
<url>http://www.apache.org</url>
<dependencies>
<dependency>

View file

@ -37,17 +37,6 @@ public class HDFSFileInterpreter extends FileInterpreter {
static final String HDFS_USER = "hdfs.user";
static final String HDFS_MAXLENGTH = "hdfs.maxlength";
static {
Interpreter.register(
"hdfs",
"file",
HDFSFileInterpreter.class.getName(),
new InterpreterPropertyBuilder()
.add(HDFS_URL, "http://localhost:50070/webhdfs/v1/", "The URL for WebHDFS")
.add(HDFS_USER, "hdfs", "The WebHDFS user")
.add(HDFS_MAXLENGTH, "1000", "Maximum number of lines of results fetched").build());
}
Exception exceptionOnConnect = null;
HDFSCommand cmd = null;
Gson gson = null;

View file

@ -0,0 +1,27 @@
[
{
"group": "file",
"name": "hdfs",
"className": "org.apache.zeppelin.file.HDFSFileInterpreter",
"properties": {
"hdfs.url": {
"envName": null,
"propertyName": "hdfs.url",
"defaultValue": "http://localhost:50070/webhdfs/v1/",
"description": "The URL for WebHDFS"
},
"hdfs.user": {
"envName": null,
"propertyName": "hdfs.user",
"defaultValue": "hdfs",
"description": "The WebHDFS user"
},
"hdfs.maxlength": {
"envName": null,
"propertyName": "hdfs.maxlength",
"defaultValue": "1000",
"description": "Maximum number of lines of results fetched"
}
}
}
]

View file

@ -27,12 +27,11 @@
</parent>
<groupId>org.apache.zeppelin</groupId>
<artifactId>zeppelin-flink</artifactId>
<artifactId>zeppelin-flink_2.10</artifactId>
<packaging>jar</packaging>
<version>0.7.0-SNAPSHOT</version>
<name>Zeppelin: Flink</name>
<description>Zeppelin flink support</description>
<url>http://zeppelin.apache.org</url>
<properties>
<flink.version>1.0.3</flink.version>

View file

@ -31,7 +31,6 @@
<packaging>jar</packaging>
<version>0.7.0-SNAPSHOT</version>
<name>Zeppelin: Apache Geode interpreter</name>
<url>http://geode.incubator.apache.org/</url>
<properties>
<geode.version>1.0.0-incubating-SNAPSHOT</geode.version>

View file

@ -30,7 +30,6 @@
<packaging>jar</packaging>
<version>0.7.0-SNAPSHOT</version>
<name>Zeppelin: HBase interpreter</name>
<url>http://www.apache.org</url>
<properties>
<hbase.hbase.version>1.0.0</hbase.hbase.version>

Some files were not shown because too many files have changed in this diff Show more