Merge branch 'master' into ZEPPELIN-960

This commit is contained in:
CloverHearts 2016-08-08 11:22:37 +09:00
commit ff0f21337b
126 changed files with 2935 additions and 1175 deletions

3
.gitignore vendored
View file

@ -105,3 +105,6 @@ tramp
.\#*
*.swp
**/dependency-reduced-pom.xml
# Generated by zeppelin-examples
/helium

View file

@ -33,13 +33,13 @@ addons:
matrix:
include:
# Test all modules with spark-2.0.0-preview and scala 2.11
# Test all modules with spark 2.0.0 and scala 2.11
- jdk: "oraclejdk7"
env: SCALA_VER="2.11" SPARK_VER="2.0.0-preview" HADOOP_VER="2.3" PROFILE="-Pspark-2.0 -Dspark.version=2.0.0-preview -Phadoop-2.3 -Ppyspark -Psparkr -Pscalding -Pexamples -Pscala-2.11" BUILD_FLAG="package -Pbuild-distr" TEST_FLAG="verify -Pusing-packaged-distr" TEST_PROJECTS=""
env: SCALA_VER="2.11" SPARK_VER="2.0.0" HADOOP_VER="2.3" PROFILE="-Pspark-2.0 -Phadoop-2.3 -Ppyspark -Psparkr -Pscalding -Pexamples -Pscala-2.11" BUILD_FLAG="package -Pbuild-distr" TEST_FLAG="verify -Pusing-packaged-distr" TEST_PROJECTS=""
# Test all modules with scala 2.10
- jdk: "oraclejdk7"
env: SCALA_VER="2.10" SPARK_VER="1.6.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.6 -Pr -Phadoop-2.3 -Ppyspark -Psparkr -Pscalding -Pexamples" BUILD_FLAG="package -Pbuild-distr" TEST_FLAG="verify -Pusing-packaged-distr" TEST_PROJECTS=""
env: SCALA_VER="2.10" SPARK_VER="1.6.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.6 -Pr -Phadoop-2.3 -Ppyspark -Psparkr -Pscalding -Pexamples -Pscala-2.10" BUILD_FLAG="package -Pbuild-distr" TEST_FLAG="verify -Pusing-packaged-distr" TEST_PROJECTS=""
# Test all modules with scala 2.11
- jdk: "oraclejdk7"
@ -104,4 +104,3 @@ after_failure:
after_script:
- ./testing/stopSparkCluster.sh $SPARK_VER $HADOOP_VER

View file

@ -202,7 +202,7 @@ Zeppelin uses Travis for CI. In the project root there is .travis.yml that confi
```
cd zeppelin-server
HADOOP_HOME=YOUR_HADOOP_HOME JAVA_HOME=YOUR_JAVA_HOME mvn exec:java -Dexec.mainClass="com.nflabs.zeppelin.server.ZeppelinServer" -Dexec.args=""
HADOOP_HOME=YOUR_HADOOP_HOME JAVA_HOME=YOUR_JAVA_HOME mvn exec:java -Dexec.mainClass="org.apache.zeppelin.server.ZeppelinServer" -Dexec.args=""
```
or use daemon script

View file

@ -251,6 +251,7 @@ The following components are provided under the Apache License. See project link
The text of each license is also included at licenses/LICENSE-[project]-[version].txt.
(Apache 2.0) Bootstrap v3.0.2 (http://getbootstrap.com/) - https://github.com/twbs/bootstrap/blob/v3.0.2/LICENSE
(Apache 2.0) Software under ./bigquery/* was developed at Google (http://www.google.com/). Licensed under the Apache v2.0 License.
========================================================================
BSD 3-Clause licenses
@ -270,4 +271,4 @@ BSD 2-Clause licenses
The following components are provided under the BSD 3-Clause license. See file headers and project links for details.
(BSD 2 Clause) portions of SQLLine (http://sqlline.sourceforge.net/) - http://sqlline.sourceforge.net/#license
jdbc/src/main/java/org/apache/zeppelin/jdbc/SqlCompleter.java
jdbc/src/main/java/org/apache/zeppelin/jdbc/SqlCompleter.java

1
NOTICE
View file

@ -4,5 +4,4 @@ Copyright 2015 - 2016 The Apache Software Foundation
This product includes software developed at
The Apache Software Foundation (http://www.apache.org/).
Portions of this software were developed at NFLabs, Inc. (http://www.nflabs.com)

View file

@ -217,6 +217,7 @@ Here're some examples:
```sh
# build with spark-2.0, scala-2.11
./dev/change_scala_version.sh 2.11
mvn clean package -Pspark-2.0 -Phadoop-2.4 -Pyarn -Ppyspark -Psparkr -Pscala-2.11
# build with spark-1.6, scala-2.10
@ -306,6 +307,7 @@ For configuration details check __`./conf`__ subdirectory.
To produce a Zeppelin package compiled with Scala 2.11, use the -Pscala-2.11 profile:
```
./dev/change_scala_version.sh 2.11
mvn clean package -Pspark-1.6 -Phadoop-2.4 -Pyarn -Ppyspark -Pscala-2.11 -DskipTests clean install
```

View file

@ -31,7 +31,6 @@
<packaging>jar</packaging>
<version>0.7.0-SNAPSHOT</version>
<name>Zeppelin: Alluxio interpreter</name>
<url>http://www.apache.org</url>
<properties>
<alluxio.version>1.0.0</alluxio.version>

View file

@ -31,7 +31,6 @@
<packaging>jar</packaging>
<version>0.7.0-SNAPSHOT</version>
<name>Zeppelin: Angular interpreter</name>
<url>http://zeppelin.apache.org</url>
<dependencies>
<dependency>

109
bigquery/README.md Normal file
View file

@ -0,0 +1,109 @@
# Overview
BigQuery interpreter for Apache Zeppelin
# Pre requisities
You can follow the instructions at [Apache Zeppelin on Dataproc](https://github.com/GoogleCloudPlatform/dataproc-initialization-actions/blob/master/apache-zeppelin/README.MD) to bring up Zeppelin on Google dataproc.
You could also install and bring up Zeppelin on Google compute Engine.
# Unit Tests
BigQuery Unit tests are excluded as these tests depend on the BigQuery external service. This is because BigQuery does not have a local mock at this point.
If you like to run these tests manually, please follow the following steps:
* [Create a new project](https://support.google.com/cloud/answer/6251787?hl=en)
* [Create a Google Compute Engine instance](https://cloud.google.com/compute/docs/instances/create-start-instance)
* Copy the project ID that you created and add it to the property "projectId" in `resources/constants.json`
* Run the command mvn <options> -Dbigquery.text.exclude='' test -pl bigquery -am
# Interpreter Configuration
Configure the following properties during Interpreter creation.
<table class="table-configuration">
<tr>
<th>Name</th>
<th>Default Value</th>
<th>Description</th>
</tr>
<tr>
<td>zeppelin.bigquery.project_id</td>
<td> </td>
<td>Google Project Id</td>
</tr>
<tr>
<td>zeppelin.bigquery.wait_time</td>
<td>5000</td>
<td>Query Timeout in Milliseconds</td>
</tr>
<tr>
<td>zeppelin.bigquery.max_no_of_rows</td>
<td>100000</td>
<td>Max result set size</td>
</tr>
</table>
# Connection
The Interpreter opens a connection with the BigQuery Service using the supplied Google project ID and the compute environment variables.
# Google BigQuery API Javadoc
[API Javadocs](https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/)
[Source] (http://central.maven.org/maven2/com/google/apis/google-api-services-bigquery/v2-rev265-1.21.0/google-api-services-bigquery-v2-rev265-1.21.0-sources.jar)
We have used the curated veneer version of the Java APIs versus [Idiomatic Java client] (https://github.com/GoogleCloudPlatform/gcloud-java/tree/master/gcloud-java-bigquery) to build the interpreter. This is mainly for usability reasons.
# Enabling the BigQuery Interpreter
In a notebook, to enable the **BigQuery** interpreter, click the **Gear** icon and select **bigquery**.
# Using the BigQuery Interpreter
In a paragraph, use `%bigquery.sql` to select the **BigQuery** interpreter and then input SQL statements against your datasets stored in BigQuery.
You can use [BigQuery SQL Reference](https://cloud.google.com/bigquery/query-reference) to build your own SQL.
For Example, SQL to query for top 10 departure delays across airports using the flights public dataset
```bash
%bigquery.sql
SELECT departure_airport,count(case when departure_delay>0 then 1 else 0 end) as no_of_delays
FROM [bigquery-samples:airline_ontime_data.flights]
group by departure_airport
order by 2 desc
limit 10
```
Another Example, SQL to query for most commonly used java packages from the github data hosted in BigQuery
```bash
%bigquery.sql
SELECT
package,
COUNT(*) count
FROM (
SELECT
REGEXP_EXTRACT(line, r' ([a-z0-9\._]*)\.') package,
id
FROM (
SELECT
SPLIT(content, '\n') line,
id
FROM
[bigquery-public-data:github_repos.sample_contents]
WHERE
content CONTAINS 'import'
AND sample_path LIKE '%.java'
HAVING
LEFT(line, 6)='import' )
GROUP BY
package,
id )
GROUP BY
1
ORDER BY
count DESC
LIMIT
40
```
# Sample Screenshot
![Zeppelin BigQuery](https://cloud.githubusercontent.com/assets/10060731/16938817/b9213ea0-4db6-11e6-8c3b-8149a0bdf874.png)

176
bigquery/pom.xml Normal file
View file

@ -0,0 +1,176 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
~ Licensed to the Apache Software Foundation (ASF) under one or more
~ contributor license agreements. See the NOTICE file distributed with
~ this work for additional information regarding copyright ownership.
~ The ASF licenses this file to You under the Apache License, Version 2.0
~ (the "License"); you may not use this file except in compliance with
~ the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<artifactId>zeppelin</artifactId>
<groupId>org.apache.zeppelin</groupId>
<version>0.7.0-SNAPSHOT</version>
</parent>
<groupId>org.apache.zeppelin</groupId>
<artifactId>zeppelin-bigquery</artifactId>
<packaging>jar</packaging>
<version>0.7.0-SNAPSHOT</version>
<name>Zeppelin: BigQuery interpreter</name>
<dependencies>
<dependency>
<groupId>com.google.apis</groupId>
<artifactId>google-api-services-bigquery</artifactId>
<version>v2-rev265-1.21.0</version>
</dependency>
<dependency>
<groupId>com.google.oauth-client</groupId>
<artifactId>google-oauth-client</artifactId>
<version>${project.oauth.version}</version>
</dependency>
<dependency>
<groupId>com.google.http-client</groupId>
<artifactId>google-http-client-jackson2</artifactId>
<version>${project.http.version}</version>
</dependency>
<dependency>
<groupId>com.google.oauth-client</groupId>
<artifactId>google-oauth-client-jetty</artifactId>
<version>${project.oauth.version}</version>
</dependency>
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>2.6</version>
</dependency>
<dependency>
<groupId>org.apache.zeppelin</groupId>
<artifactId>zeppelin-interpreter</artifactId>
<version>${project.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<properties>
<project.http.version>1.21.0</project.http.version>
<project.oauth.version>1.21.0</project.oauth.version>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<bigquery.test.exclude>**/BigQueryInterpreterTest.java</bigquery.test.exclude>
</properties>
<build>
<plugins>
<plugin>
<artifactId>maven-enforcer-plugin</artifactId>
<version>1.3.1</version>
<executions>
<execution>
<id>enforce</id>
<phase>none</phase>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<excludes>
<exclude>${bigquery.test.exclude}</exclude>
</excludes>
</configuration>
</plugin>
<plugin>
<artifactId>maven-dependency-plugin</artifactId>
<version>2.8</version>
<executions>
<execution>
<id>copy-dependencies</id>
<phase>package</phase>
<goals>
<goal>copy-dependencies</goal>
</goals>
<configuration>
<outputDirectory>${project.build.directory}/../../interpreter/bqsql</outputDirectory>
<overWriteReleases>false</overWriteReleases>
<overWriteSnapshots>false</overWriteSnapshots>
<overWriteIfNewer>true</overWriteIfNewer>
<includeScope>runtime</includeScope>
</configuration>
</execution>
<execution>
<id>copy-artifact</id>
<phase>package</phase>
<goals>
<goal>copy</goal>
</goals>
<configuration>
<outputDirectory>${project.build.directory}/../../interpreter/bqsql</outputDirectory>
<overWriteReleases>false</overWriteReleases>
<overWriteSnapshots>false</overWriteSnapshots>
<overWriteIfNewer>true</overWriteIfNewer>
<includeScope>runtime</includeScope>
<artifactItems>
<artifactItem>
<groupId>${project.groupId}</groupId>
<artifactId>${project.artifactId}</artifactId>
<version>${project.version}</version>
<type>${project.packaging}</type>
</artifactItem>
</artifactItems>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<archive>
<manifest>
<mainClass>
org.apache.zeppelin.bigquery.BigQueryInterpreter
</mainClass>
</manifest>
</archive>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
</plugin>
</plugins>
</build>
</project>

View file

@ -0,0 +1,338 @@
/*
* Copyright 2016 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.bigquery;
import static org.apache.commons.lang.StringUtils.containsIgnoreCase;
import com.google.api.client.http.HttpTransport;
import com.google.api.client.http.javanet.NetHttpTransport;
import com.google.api.client.json.JsonFactory;
import com.google.api.client.googleapis.auth.oauth2.GoogleCredential;
import com.google.api.client.json.jackson2.JacksonFactory;
import com.google.api.services.bigquery.Bigquery;
import com.google.api.services.bigquery.BigqueryScopes;
import com.google.api.client.json.GenericJson;
import com.google.api.services.bigquery.Bigquery.Datasets;
import com.google.api.services.bigquery.BigqueryRequest;
import com.google.api.services.bigquery.model.DatasetList;
import com.google.api.services.bigquery.model.Job;
import com.google.api.services.bigquery.model.TableCell;
import com.google.api.services.bigquery.model.TableFieldSchema;
import com.google.api.services.bigquery.model.TableRow;
import com.google.api.services.bigquery.model.TableSchema;
import com.google.api.services.bigquery.Bigquery.Jobs.GetQueryResults;
import com.google.api.services.bigquery.model.GetQueryResultsResponse;
import com.google.api.services.bigquery.model.QueryRequest;
import com.google.api.services.bigquery.model.QueryResponse;
import com.google.api.services.bigquery.model.JobCancelResponse;
import com.google.gson.Gson;
import java.io.IOException;
import java.util.Collection;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.util.Properties;
import java.util.Set;
import org.apache.zeppelin.interpreter.Interpreter;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterPropertyBuilder;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.InterpreterResult.Code;
import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion;
import org.apache.zeppelin.scheduler.Scheduler;
import org.apache.zeppelin.scheduler.SchedulerFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Function;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.google.common.collect.Sets.SetView;
import java.io.PrintStream;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
/**
* BigQuery interpreter for Zeppelin.
*
* <ul>
* <li>{@code zeppelin.bigquery.project_id} - Project ID in GCP</li>
* <li>{@code zeppelin.bigquery.wait_time} - Query Timeout in ms</li>
* <li>{@code zeppelin.bigquery.max_no_of_rows} - Max Result size</li>
* </ul>
*
* <p>
* How to use: <br/>
* {@code %bigquery.sql<br/>
* {@code
* SELECT departure_airport,count(case when departure_delay>0 then 1 else 0 end) as no_of_delays
* FROM [bigquery-samples:airline_ontime_data.flights]
* group by departure_airport
* order by 2 desc
* limit 10
* }
* </p>
*
*/
public class BigQueryInterpreter extends Interpreter {
private Logger logger = LoggerFactory.getLogger(BigQueryInterpreter.class);
private static final char NEWLINE = '\n';
private static final char TAB = '\t';
private static Bigquery service = null;
//Mutex created to create the singleton in thread-safe fashion.
private static Object serviceLock = new Object();
static final String PROJECT_ID = "zeppelin.bigquery.project_id";
static final String WAIT_TIME = "zeppelin.bigquery.wait_time";
static final String MAX_ROWS = "zeppelin.bigquery.max_no_of_rows";
private static String jobId = null;
private static String projectId = null;
private static final List NO_COMPLETION = new ArrayList<>();
private Exception exceptionOnConnect;
private static final Function<CharSequence, String> sequenceToStringTransformer =
new Function<CharSequence, String>() {
public String apply(CharSequence seq) {
return seq.toString();
}
};
public BigQueryInterpreter(Properties property) {
super(property);
}
//Function to return valid BigQuery Service
@Override
public void open() {
if (service == null) {
synchronized (serviceLock) {
if (service == null) {
try {
service = createAuthorizedClient();
exceptionOnConnect = null;
logger.info("Opened BigQuery SQL Connection");
} catch (IOException e) {
logger.error("Cannot open connection", e);
exceptionOnConnect = e;
close();
}
}
}
}
}
//Function that Creates an authorized client to Google Bigquery.
private static Bigquery createAuthorizedClient() throws IOException {
HttpTransport transport = new NetHttpTransport();
JsonFactory jsonFactory = new JacksonFactory();
GoogleCredential credential = GoogleCredential.getApplicationDefault(transport, jsonFactory);
if (credential.createScopedRequired()) {
Collection<String> bigqueryScopes = BigqueryScopes.all();
credential = credential.createScoped(bigqueryScopes);
}
return new Bigquery.Builder(transport, jsonFactory, credential)
.setApplicationName("Zeppelin/1.0 (GPN:Apache Zeppelin;)").build();
}
//Function that generates and returns the schema and the rows as string
public static String printRows(final GetQueryResultsResponse response) {
StringBuilder msg = null;
msg = new StringBuilder();
try {
for (TableFieldSchema schem: response.getSchema().getFields()) {
msg.append(schem.getName());
msg.append(TAB);
}
msg.append(NEWLINE);
for (TableRow row : response.getRows()) {
for (TableCell field : row.getF()) {
msg.append(field.getV().toString());
msg.append(TAB);
}
msg.append(NEWLINE);
}
return msg.toString();
} catch ( NullPointerException ex ) {
throw new NullPointerException("SQL Execution returned an error!");
}
}
//Function to poll a job for completion. Future use
public static Job pollJob(final Bigquery.Jobs.Get request, final long interval)
throws IOException, InterruptedException {
Job job = request.execute();
while (!job.getStatus().getState().equals("DONE")) {
System.out.println("Job is "
+ job.getStatus().getState()
+ " waiting " + interval + " milliseconds...");
Thread.sleep(interval);
job = request.execute();
}
return job;
}
//Function to page through the results of an arbitrary bigQuery request
public static <T extends GenericJson> Iterator<T> getPages(
final BigqueryRequest<T> requestTemplate) {
class PageIterator implements Iterator<T> {
private BigqueryRequest<T> request;
private boolean hasNext = true;
public PageIterator(final BigqueryRequest<T> requestTemplate) {
this.request = requestTemplate;
}
public boolean hasNext() {
return hasNext;
}
public T next() {
if (!hasNext) {
throw new NoSuchElementException();
}
try {
T response = request.execute();
if (response.containsKey("pageToken")) {
request = request.set("pageToken", response.get("pageToken"));
} else {
hasNext = false;
}
return response;
} catch (IOException e) {
return null;
}
}
public void remove() {
this.next();
}
}
return new PageIterator(requestTemplate);
}
//Function to call bigQuery to run SQL and return results to the Interpreter for output
private InterpreterResult executeSql(String sql) {
int counter = 0;
StringBuilder finalmessage = null;
finalmessage = new StringBuilder("%table ");
String projId = getProperty(PROJECT_ID);
long wTime = Long.parseLong(getProperty(WAIT_TIME));
long maxRows = Long.parseLong(getProperty(MAX_ROWS));
Iterator<GetQueryResultsResponse> pages;
try {
pages = run(sql, projId, wTime, maxRows);
} catch ( IOException ex ) {
logger.error(ex.getMessage());
return new InterpreterResult(Code.ERROR, ex.getMessage());
}
try {
while (pages.hasNext()) {
finalmessage.append(printRows(pages.next()));
}
return new InterpreterResult(Code.SUCCESS, finalmessage.toString());
} catch ( NullPointerException ex ) {
return new InterpreterResult(Code.ERROR, ex.getMessage());
}
}
//Function to run the SQL on bigQuery service
public static Iterator<GetQueryResultsResponse> run(final String queryString,
final String projId, final long wTime, final long maxRows)
throws IOException {
try {
QueryResponse query = service.jobs().query(
projId,
new QueryRequest().setTimeoutMs(wTime).setQuery(queryString).setMaxResults(maxRows))
.execute();
jobId = query.getJobReference().getJobId();
projectId = query.getJobReference().getProjectId();
GetQueryResults getRequest = service.jobs().getQueryResults(
projectId,
jobId);
return getPages(getRequest);
} catch (IOException ex) {
throw ex;
}
}
@Override
public void close() {
logger.info("Close bqsql connection!");
service = null;
}
@Override
public InterpreterResult interpret(String sql, InterpreterContext contextInterpreter) {
logger.info("Run SQL command '{}'", sql);
return executeSql(sql);
}
@Override
public Scheduler getScheduler() {
return SchedulerFactory.singleton().createOrGetFIFOScheduler(
BigQueryInterpreter.class.getName() + this.hashCode());
}
@Override
public FormType getFormType() {
return FormType.SIMPLE;
}
@Override
public int getProgress(InterpreterContext context) {
return 0;
}
@Override
public void cancel(InterpreterContext context) {
logger.info("Trying to Cancel current query statement.");
if (service != null && jobId != null && projectId != null) {
try {
Bigquery.Jobs.Cancel request = service.jobs().cancel(projectId, jobId);
JobCancelResponse response = request.execute();
jobId = null;
logger.info("Query Execution cancelled");
} catch (IOException ex) {
logger.error("Could not cancel the SQL execution");
}
} else {
logger.info("Query Execution was already cancelled");
}
}
@Override
public List<InterpreterCompletion> completion(String buf, int cursor) {
return NO_COMPLETION;
}
}

View file

@ -0,0 +1,5 @@
{
"projectId": "google.com:babupe-df-test",
"oneQuery": "select 1",
"wrongQuery": "select bad syntax"
}

View file

@ -0,0 +1,27 @@
[
{
"group": "bigquery",
"name": "sql",
"className": "org.apache.zeppelin.bigquery.BigQueryInterpreter",
"properties": {
"zeppelin.bigquery.project_id": {
"envName": null,
"propertyName": "zeppelin.bigquery.project_id",
"defaultValue": " ",
"description": "Google Project ID"
},
"zeppelin.bigquery.wait_time": {
"envName": null,
"propertyName": "zeppelin.bigquery.wait_time",
"defaultValue": "5000",
"description": "Query timeout in Milliseconds"
},
"zeppelin.bigquery.max_no_of_rows": {
"envName": null,
"propertyName": "zeppelin.bigquery.max_no_of_rows",
"defaultValue": "100000",
"description": "Maximum number of rows to fetch from BigQuery"
}
}
}
]

View file

@ -0,0 +1,118 @@
/*
* Copyright 2016 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.bigquery;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.Properties;
import org.apache.zeppelin.display.AngularObjectRegistry;
import org.apache.zeppelin.display.GUI;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterContextRunner;
import org.apache.zeppelin.interpreter.InterpreterGroup;
import org.apache.zeppelin.interpreter.InterpreterOutput;
import org.apache.zeppelin.interpreter.InterpreterOutputListener;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.InterpreterResult.Type;
import org.apache.zeppelin.user.AuthenticationInfo;
import org.junit.Before;
import org.junit.Test;
import com.google.gson.Gson;
import com.google.gson.JsonIOException;
import com.google.gson.JsonSyntaxException;
import java.io.FileNotFoundException;
import java.io.InputStream;
import java.io.InputStreamReader;
public class BigQueryInterpreterTest {
protected static class Constants {
private String projectId;
private String oneQuery;
private String wrongQuery;
public String getProjectId() {
return projectId;
}
public String getOne() {
return oneQuery;
}
public String getWrong() {
return wrongQuery;
}
}
@SuppressWarnings("checkstyle:abbreviationaswordinname")
protected static Constants CONSTANTS = null;
public BigQueryInterpreterTest()
throws JsonSyntaxException, JsonIOException, FileNotFoundException {
if (CONSTANTS == null) {
InputStream is = this.getClass().getResourceAsStream("/constants.json");
CONSTANTS = (new Gson()).<Constants>fromJson(new InputStreamReader(is), Constants.class);
}
}
private InterpreterGroup intpGroup;
private BigQueryInterpreter bqInterpreter;
private InterpreterContext context;
@Before
public void setUp() throws Exception {
Properties p = new Properties();
p.setProperty("zeppelin.bigquery.project_id", CONSTANTS.getProjectId());
p.setProperty("zeppelin.bigquery.wait_time", "5000");
p.setProperty("zeppelin.bigquery.max_no_of_rows", "100");
intpGroup = new InterpreterGroup();
bqInterpreter = new BigQueryInterpreter(p);
bqInterpreter.setInterpreterGroup(intpGroup);
bqInterpreter.open();
}
@Test
public void sqlSuccess() {
InterpreterResult ret = bqInterpreter.interpret(CONSTANTS.getOne(), context);
assertEquals(InterpreterResult.Code.SUCCESS, ret.code());
assertEquals(ret.type(), InterpreterResult.Type.TABLE);
}
@Test
public void badSqlSyntaxFails() {
InterpreterResult ret = bqInterpreter.interpret(CONSTANTS.getWrong(), context);
assertEquals(InterpreterResult.Code.ERROR, ret.code());
}
}

View file

@ -74,6 +74,13 @@ function addEachJarInDirRecursive(){
fi
}
function addEachJarInDirRecursiveForIntp(){
if [[ -d "${1}" ]]; then
for jar in $(find -L "${1}" -type f -name '*jar'); do
ZEPPELIN_INTP_CLASSPATH="$jar:$ZEPPELIN_INTP_CLASSPATH"
done
fi
}
function addJarInDir(){
if [[ -d "${1}" ]]; then
@ -81,6 +88,12 @@ function addJarInDir(){
fi
}
function addJarInDirForIntp() {
if [[ -d "${1}" ]]; then
ZEPPELIN_INTP_CLASSPATH="${1}/*:${ZEPPELIN_INTP_CLASSPATH}"
fi
}
ZEPPELIN_COMMANDLINE_MAIN=org.apache.zeppelin.utils.CommandLineUtils
function getZeppelinVersion(){

View file

@ -46,6 +46,14 @@ if exist "%ZEPPELIN_HOME%\zeppelin-interpreter\target\classes" (
set ZEPPELIN_CLASSPATH=%ZEPPELIN_CLASSPATH%;"!ZEPPELIN_INTERPRETER_JAR!"
)
REM add test classes for unittest
if exist "%ZEPPELIN_HOME%\zeppelin-interpreter\target\test-classes" (
set ZEPPELIN_CLASSPATH=%ZEPPELIN_CLASSPATH%;"%ZEPPELIN_HOME%\zeppelin-interpreter\target\test-classes"
)
if exist "%ZEPPELIN_HOME%\zeppelin-zengine\target\test-classes" (
set ZEPPELIN_CLASSPATH=%ZEPPELIN_CLASSPATH%;"%ZEPPELIN_HOME%\zeppelin-zengine\target\test-classes"
)
call "%bin%\functions.cmd" ADDJARINDIR "%ZEPPELIN_HOME%\zeppelin-interpreter\target\lib"
call "%bin%\functions.cmd" ADDJARINDIR "%INTERPRETER_DIR%"

View file

@ -53,18 +53,27 @@ fi
. "${bin}/common.sh"
ZEPPELIN_CLASSPATH+=":${ZEPPELIN_CONF_DIR}"
ZEPPELIN_INTP_CLASSPATH=""
# construct classpath
if [[ -d "${ZEPPELIN_HOME}/zeppelin-interpreter/target/classes" ]]; then
ZEPPELIN_CLASSPATH+=":${ZEPPELIN_HOME}/zeppelin-interpreter/target/classes"
ZEPPELIN_INTP_CLASSPATH+=":${ZEPPELIN_HOME}/zeppelin-interpreter/target/classes"
else
ZEPPELIN_INTERPRETER_JAR="$(ls ${ZEPPELIN_HOME}/lib/zeppelin-interpreter*.jar)"
ZEPPELIN_CLASSPATH+=":${ZEPPELIN_INTERPRETER_JAR}"
ZEPPELIN_INTP_CLASSPATH+=":${ZEPPELIN_INTERPRETER_JAR}"
fi
addJarInDir "${ZEPPELIN_HOME}/zeppelin-interpreter/target/lib"
addJarInDir "${INTERPRETER_DIR}"
# add test classes for unittest
if [[ -d "${ZEPPELIN_HOME}/zeppelin-interpreter/target/test-classes" ]]; then
ZEPPELIN_INTP_CLASSPATH+=":${ZEPPELIN_HOME}/zeppelin-interpreter/target/test-classes"
fi
if [[ -d "${ZEPPELIN_HOME}/zeppelin-zengine/target/test-classes" ]]; then
ZEPPELIN_INTP_CLASSPATH+=":${ZEPPELIN_HOME}/zeppelin-zengine/target/test-classes"
fi
addJarInDirForIntp "${ZEPPELIN_HOME}/zeppelin-interpreter/target/lib"
addJarInDirForIntp "${INTERPRETER_DIR}"
HOSTNAME=$(hostname)
ZEPPELIN_SERVER=org.apache.zeppelin.interpreter.remote.RemoteInterpreterServer
@ -85,7 +94,7 @@ if [[ "${INTERPRETER_ID}" == "spark" ]]; then
export SPARK_SUBMIT="${SPARK_HOME}/bin/spark-submit"
SPARK_APP_JAR="$(ls ${ZEPPELIN_HOME}/interpreter/spark/zeppelin-spark*.jar)"
# This will evantually passes SPARK_APP_JAR to classpath of SparkIMain
ZEPPELIN_CLASSPATH+=${SPARK_APP_JAR}
ZEPPELIN_INTP_CLASSPATH+=":${SPARK_APP_JAR}"
pattern="$SPARK_HOME/python/lib/py4j-*-src.zip"
py4j=($pattern)
@ -96,14 +105,14 @@ if [[ "${INTERPRETER_ID}" == "spark" ]]; then
# add Hadoop jars into classpath
if [[ -n "${HADOOP_HOME}" ]]; then
# Apache
addEachJarInDirRecursive "${HADOOP_HOME}/share"
addEachJarInDirRecursiveForIntp "${HADOOP_HOME}/share"
# CDH
addJarInDir "${HADOOP_HOME}"
addJarInDir "${HADOOP_HOME}/lib"
addJarInDirForIntp "${HADOOP_HOME}"
addJarInDirForIntp "${HADOOP_HOME}/lib"
fi
addJarInDir "${INTERPRETER_DIR}/dep"
addJarInDirForIntp "${INTERPRETER_DIR}/dep"
pattern="${ZEPPELIN_HOME}/interpreter/spark/pyspark/py4j-*-src.zip"
py4j=($pattern)
@ -127,29 +136,29 @@ if [[ "${INTERPRETER_ID}" == "spark" ]]; then
fi
if [[ -n "${HADOOP_CONF_DIR}" ]] && [[ -d "${HADOOP_CONF_DIR}" ]]; then
ZEPPELIN_CLASSPATH+=":${HADOOP_CONF_DIR}"
ZEPPELIN_INTP_CLASSPATH+=":${HADOOP_CONF_DIR}"
fi
export SPARK_CLASSPATH+=":${ZEPPELIN_CLASSPATH}"
export SPARK_CLASSPATH+=":${ZEPPELIN_INTP_CLASSPATH}"
fi
elif [[ "${INTERPRETER_ID}" == "hbase" ]]; then
if [[ -n "${HBASE_CONF_DIR}" ]]; then
ZEPPELIN_CLASSPATH+=":${HBASE_CONF_DIR}"
ZEPPELIN_INTP_CLASSPATH+=":${HBASE_CONF_DIR}"
elif [[ -n "${HBASE_HOME}" ]]; then
ZEPPELIN_CLASSPATH+=":${HBASE_HOME}/conf"
ZEPPELIN_INTP_CLASSPATH+=":${HBASE_HOME}/conf"
else
echo "HBASE_HOME and HBASE_CONF_DIR are not set, configuration might not be loaded"
fi
fi
addJarInDir "${LOCAL_INTERPRETER_REPO}"
addJarInDirForIntp "${LOCAL_INTERPRETER_REPO}"
CLASSPATH+=":${ZEPPELIN_CLASSPATH}"
CLASSPATH+=":${ZEPPELIN_INTP_CLASSPATH}"
if [[ -n "${SPARK_SUBMIT}" ]]; then
${SPARK_SUBMIT} --class ${ZEPPELIN_SERVER} --driver-class-path "${ZEPPELIN_CLASSPATH_OVERRIDES}:${CLASSPATH}" --driver-java-options "${JAVA_INTP_OPTS}" ${SPARK_SUBMIT_OPTIONS} ${SPARK_APP_JAR} ${PORT} &
${SPARK_SUBMIT} --class ${ZEPPELIN_SERVER} --driver-class-path "${ZEPPELIN_INTP_CLASSPATH_OVERRIDES}:${CLASSPATH}" --driver-java-options "${JAVA_INTP_OPTS}" ${SPARK_SUBMIT_OPTIONS} ${SPARK_APP_JAR} ${PORT} &
else
${ZEPPELIN_RUNNER} ${JAVA_INTP_OPTS} ${ZEPPELIN_INTP_MEM} -cp ${ZEPPELIN_CLASSPATH_OVERRIDES}:${CLASSPATH} ${ZEPPELIN_SERVER} ${PORT} &
${ZEPPELIN_RUNNER} ${JAVA_INTP_OPTS} ${ZEPPELIN_INTP_MEM} -cp ${ZEPPELIN_INTP_CLASSPATH_OVERRIDES}:${CLASSPATH} ${ZEPPELIN_SERVER} ${PORT} &
fi
pid=$!

View file

@ -32,7 +32,6 @@
<version>0.7.0-SNAPSHOT</version>
<name>Zeppelin: Apache Cassandra interpreter</name>
<description>Zeppelin cassandra support</description>
<url>http://zeppelin.apache.org</url>
<properties>
<cassandra.driver.version>3.0.1</cassandra.driver.version>

View file

@ -17,19 +17,20 @@
#
# [name] [maven artifact] [description]
alluxio org.apache.zeppelin:zeppelin-alluxio:0.6.0 Alluxio interpreter
angular org.apache.zeppelin:zeppelin-angular:0.6.0 HTML and AngularJS view rendering
cassandra org.apache.zeppelin:zeppelin-cassandra:0.6.0 Cassandra interpreter
elasticsearch org.apache.zeppelin:zeppelin-elasticsearch:0.6.0 Elasticsearch interpreter
file org.apache.zeppelin:zeppelin-file:0.6.0 HDFS file interpreter
flink org.apache.zeppelin:zeppelin-flink:0.6.0 Flink interpreter
hbase org.apache.zeppelin:zeppelin-hbase:0.6.0 Hbase interpreter
ignite org.apache.zeppelin:zeppelin-ignite:0.6.0 Ignite interpreter
jdbc org.apache.zeppelin:zeppelin-jdbc:0.6.0 Jdbc interpreter
kylin org.apache.zeppelin:zeppelin-kylin:0.6.0 Kylin interpreter
lens org.apache.zeppelin:zeppelin-lens:0.6.0 Lens interpreter
livy org.apache.zeppelin:zeppelin-livy:0.6.0 Livy interpreter
md org.apache.zeppelin:zeppelin-markdown:0.6.0 Markdown support
postgresql org.apache.zeppelin:zeppelin-postgresql:0.6.0 Postgresql interpreter
python org.apache.zeppelin:zeppelin-python:0.6.0 Python interpreter
shell org.apache.zeppelin:zeppelin-shell:0.6.0 Shell command
alluxio org.apache.zeppelin:zeppelin-alluxio:0.6.1 Alluxio interpreter
angular org.apache.zeppelin:zeppelin-angular:0.6.1 HTML and AngularJS view rendering
bigquery org.apache.zeppelin:zeppelin-bigquery:0.6.1 BigQuery interpreter
cassandra org.apache.zeppelin:zeppelin-cassandra_2.11:0.6.1 Cassandra interpreter built with Scala 2.11
elasticsearch org.apache.zeppelin:zeppelin-elasticsearch:0.6.1 Elasticsearch interpreter
file org.apache.zeppelin:zeppelin-file:0.6.1 HDFS file interpreter
flink org.apache.zeppelin:zeppelin-flink_2.11:0.6.1 Flink interpreter built with Scala 2.11
hbase org.apache.zeppelin:zeppelin-hbase:0.6.1 Hbase interpreter
ignite org.apache.zeppelin:zeppelin-ignite_2.11:0.6.1 Ignite interpreter built with Scala 2.11
jdbc org.apache.zeppelin:zeppelin-jdbc:0.6.1 Jdbc interpreter
kylin org.apache.zeppelin:zeppelin-kylin:0.6.1 Kylin interpreter
lens org.apache.zeppelin:zeppelin-lens:0.6.1 Lens interpreter
livy org.apache.zeppelin:zeppelin-livy:0.6.1 Livy interpreter
md org.apache.zeppelin:zeppelin-markdown:0.6.1 Markdown support
postgresql org.apache.zeppelin:zeppelin-postgresql:0.6.1 Postgresql interpreter
python org.apache.zeppelin:zeppelin-python:0.6.1 Python interpreter
shell org.apache.zeppelin:zeppelin-shell:0.6.1 Shell command

View file

@ -42,6 +42,11 @@ user3 = password4, role2
#ldapRealm.userDnTemplate = uid={0},ou=Users,dc=COMPANY,dc=COM
#ldapRealm.contextFactory.authenticationMechanism = SIMPLE
### A sample for configuring ZeppelinHub Realm
#zeppelinHubRealm = org.apache.zeppelin.realm.ZeppelinHubRealm
## Url of ZeppelinHub
#zeppelinHubRealm.zeppelinhubUrl = https://www.zeppelinhub.com
#securityManager.realms = $zeppelinHubRealm
sessionManager = org.apache.shiro.web.session.mgt.DefaultWebSessionManager

View file

@ -62,7 +62,7 @@ REM
REM set ZEPPELIN_SPARK_USEHIVECONTEXT REM Use HiveContext instead of SQLContext if set true. true by default.
REM set ZEPPELIN_SPARK_CONCURRENTSQL REM Execute multiple SQL concurrently if set true. false by default.
REM set ZEPPELIN_SPARK_IMPORTIMPLICIT REM Import implicits, UDF collection, and sql if set true. true by default.
REM set ZEPPELIN_SPARK_MAXRESULT REM Max number of SparkSQL result to display. 1000 by default.
REM set ZEPPELIN_SPARK_MAXRESULT REM Max number of Spark SQL result to display. 1000 by default.
REM ZeppelinHub connection configuration
REM

View file

@ -62,7 +62,7 @@
# export ZEPPELIN_SPARK_USEHIVECONTEXT # Use HiveContext instead of SQLContext if set true. true by default.
# export ZEPPELIN_SPARK_CONCURRENTSQL # Execute multiple SQL concurrently if set true. false by default.
# export ZEPPELIN_SPARK_IMPORTIMPLICIT # Import implicits, UDF collection, and sql if set true. true by default.
# export ZEPPELIN_SPARK_MAXRESULT # Max number of SparkSQL result to display. 1000 by default.
# export ZEPPELIN_SPARK_MAXRESULT # Max number of Spark SQL result to display. 1000 by default.
# export ZEPPELIN_WEBSOCKET_MAX_TEXT_MESSAGE_SIZE # Size in characters of the maximum text message to be received by websocket. Defaults to 1024000

View file

@ -178,7 +178,7 @@
<property>
<name>zeppelin.interpreters</name>
<value>org.apache.zeppelin.spark.SparkInterpreter,org.apache.zeppelin.spark.PySparkInterpreter,org.apache.zeppelin.rinterpreter.RRepl,org.apache.zeppelin.rinterpreter.KnitR,org.apache.zeppelin.spark.SparkRInterpreter,org.apache.zeppelin.spark.SparkSqlInterpreter,org.apache.zeppelin.spark.DepInterpreter,org.apache.zeppelin.markdown.Markdown,org.apache.zeppelin.angular.AngularInterpreter,org.apache.zeppelin.shell.ShellInterpreter,org.apache.zeppelin.file.HDFSFileInterpreter,org.apache.zeppelin.flink.FlinkInterpreter,,org.apache.zeppelin.python.PythonInterpreter,org.apache.zeppelin.lens.LensInterpreter,org.apache.zeppelin.ignite.IgniteInterpreter,org.apache.zeppelin.ignite.IgniteSqlInterpreter,org.apache.zeppelin.cassandra.CassandraInterpreter,org.apache.zeppelin.geode.GeodeOqlInterpreter,org.apache.zeppelin.postgresql.PostgreSqlInterpreter,org.apache.zeppelin.jdbc.JDBCInterpreter,org.apache.zeppelin.kylin.KylinInterpreter,org.apache.zeppelin.elasticsearch.ElasticsearchInterpreter,org.apache.zeppelin.scalding.ScaldingInterpreter,org.apache.zeppelin.alluxio.AlluxioInterpreter,org.apache.zeppelin.hbase.HbaseInterpreter,org.apache.zeppelin.livy.LivySparkInterpreter,org.apache.zeppelin.livy.LivyPySparkInterpreter,org.apache.zeppelin.livy.LivySparkRInterpreter,org.apache.zeppelin.livy.LivySparkSQLInterpreter</value>
<value>org.apache.zeppelin.spark.SparkInterpreter,org.apache.zeppelin.spark.PySparkInterpreter,org.apache.zeppelin.rinterpreter.RRepl,org.apache.zeppelin.rinterpreter.KnitR,org.apache.zeppelin.spark.SparkRInterpreter,org.apache.zeppelin.spark.SparkSqlInterpreter,org.apache.zeppelin.spark.DepInterpreter,org.apache.zeppelin.markdown.Markdown,org.apache.zeppelin.angular.AngularInterpreter,org.apache.zeppelin.shell.ShellInterpreter,org.apache.zeppelin.file.HDFSFileInterpreter,org.apache.zeppelin.flink.FlinkInterpreter,,org.apache.zeppelin.python.PythonInterpreter,org.apache.zeppelin.lens.LensInterpreter,org.apache.zeppelin.ignite.IgniteInterpreter,org.apache.zeppelin.ignite.IgniteSqlInterpreter,org.apache.zeppelin.cassandra.CassandraInterpreter,org.apache.zeppelin.geode.GeodeOqlInterpreter,org.apache.zeppelin.postgresql.PostgreSqlInterpreter,org.apache.zeppelin.jdbc.JDBCInterpreter,org.apache.zeppelin.kylin.KylinInterpreter,org.apache.zeppelin.elasticsearch.ElasticsearchInterpreter,org.apache.zeppelin.scalding.ScaldingInterpreter,org.apache.zeppelin.alluxio.AlluxioInterpreter,org.apache.zeppelin.hbase.HbaseInterpreter,org.apache.zeppelin.livy.LivySparkInterpreter,org.apache.zeppelin.livy.LivyPySparkInterpreter,org.apache.zeppelin.livy.LivySparkRInterpreter,org.apache.zeppelin.livy.LivySparkSQLInterpreter,org.apache.zeppelin.bigquery.BigQueryInterpreter</value>
<description>Comma separated interpreter configurations. First interpreter become a default</description>
</property>

View file

@ -66,6 +66,7 @@ function make_binary_release() {
cp -r "${WORKING_DIR}/zeppelin" "${WORKING_DIR}/zeppelin-${RELEASE_VERSION}-bin-${BIN_RELEASE_NAME}"
cd "${WORKING_DIR}/zeppelin-${RELEASE_VERSION}-bin-${BIN_RELEASE_NAME}"
./dev/change_scala_version.sh 2.11
echo "mvn clean package -Pbuild-distr -DskipTests ${BUILD_FLAGS}"
mvn clean package -Pbuild-distr -DskipTests ${BUILD_FLAGS}
if [[ $? -ne 0 ]]; then
@ -102,8 +103,8 @@ function make_binary_release() {
git_clone
make_source_package
make_binary_release all "-Pspark-1.6 -Phadoop-2.4 -Pyarn -Ppyspark -Psparkr -Pr"
make_binary_release netinst "-Pspark-1.6 -Phadoop-2.4 -Pyarn -Ppyspark -Psparkr -Pr -pl !alluxio,!angular,!cassandra,!elasticsearch,!file,!flink,!hbase,!ignite,!jdbc,!kylin,!lens,!livy,!markdown,!postgresql,!python,!shell"
make_binary_release all "-Pspark-2.0 -Phadoop-2.4 -Pyarn -Ppyspark -Psparkr -Pr -Pscala-2.11"
make_binary_release netinst "-Pspark-2.0 -Phadoop-2.4 -Pyarn -Ppyspark -Psparkr -Pr -Pscala-2.11 -pl !alluxio,!angular,!cassandra,!elasticsearch,!file,!flink,!hbase,!ignite,!jdbc,!kylin,!lens,!livy,!markdown,!postgresql,!python,!shell"
# remove non release files and dirs
rm -rf "${WORKING_DIR}/zeppelin"

View file

@ -32,7 +32,6 @@
<li><a href="{{BASE_PATH}}/manual/notebookashomepage.html">Customize Zeppelin Homepage</a></li>
<li role="separator" class="divider"></li>
<li class="title"><span><b>More</b><span></li>
<li><a href="{{BASE_PATH}}/install/virtual_machine.html">Zeppelin on Vagrant VM</a></li>
<li><a href="{{BASE_PATH}}/install/upgrade.html">Upgrade Zeppelin Version</a></li>
</ul>
</li>
@ -48,6 +47,7 @@
<li role="separator" class="divider"></li>
<li class="title"><span><b>Available Interpreters</b><span></li>
<li><a href="{{BASE_PATH}}/interpreter/alluxio.html">Alluxio</a></li>
<li><a href="{{BASE_PATH}}/interpreter/bigquery.html">BigQuery</a></li>
<li><a href="{{BASE_PATH}}/interpreter/cassandra.html">Cassandra</a></li>
<li><a href="{{BASE_PATH}}/interpreter/elasticsearch.html">Elasticsearch</a></li>
<li><a href="{{BASE_PATH}}/interpreter/flink.html">Flink</a></li>
@ -102,6 +102,10 @@
<li><a href="{{BASE_PATH}}/security/notebook_authorization.html">Notebook Authorization</a></li>
<li><a href="{{BASE_PATH}}/security/datasource_authorization.html">Data Source Authorization</a></li>
<li role="separator" class="divider"></li>
<li class="title"><span><b>Advanced</b><span></li>
<li><a href="{{BASE_PATH}}/install/virtual_machine.html">Zeppelin on Vagrant VM</a></li>
<li><a href="{{BASE_PATH}}/install/spark_cluster_mode.html#spark-standalone-mode">Zeppelin on Spark Cluster Mode (Standalone)</a></li>
<li role="separator" class="divider"></li>
<li class="title"><span><b>Contibute</b><span></li>
<li><a href="{{BASE_PATH}}/development/writingzeppelininterpreter.html">Writing Zeppelin Interpreter</a></li>
<li><a href="{{BASE_PATH}}/development/writingzeppelinapplication.html">Writing Zeppelin Application (Experimental)</a></li>
@ -113,4 +117,4 @@
</nav><!--/.navbar-collapse -->
</div>
</div>

Binary file not shown.

After

Width:  |  Height:  |  Size: 142 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 201 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 180 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 31 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 25 KiB

View file

@ -156,7 +156,6 @@ println(a)
### 0.6.0 and later
Inside of a notebook, `%[INTERPRETER_GROUP].[INTERPRETER_NAME]` directive will call your interpreter.
Note that the first interpreter configuration in zeppelin.interpreters will be the default one.
You can omit either [INTERPRETER\_GROUP] or [INTERPRETER\_NAME]. If you omit [INTERPRETER\_NAME], then first available interpreter will be selected in the [INTERPRETER\_GROUP].
Likewise, if you skip [INTERPRETER\_GROUP], then [INTERPRETER\_NAME] will be chosen from default interpreter group.

View file

@ -49,7 +49,7 @@ Adding new language-backend is really simple. Learn [how to create your own inte
#### Apache Spark integration
Especially, Apache Zeppelin provides built-in [Apache Spark](http://spark.apache.org/) integration. You don't need to build a separate module, plugin or library for it.
<img class="img-responsive" src="./assets/themes/zeppelin/img/spark_logo.jpg" width="140px" />
<img class="img-responsive" src="./assets/themes/zeppelin/img/spark_logo.png" width="140px" />
Apache Zeppelin with Spark integration provides
@ -62,7 +62,7 @@ For the further information about Apache Spark in Apache Zeppelin, please see [S
<br />
## Data visualization
Some basic charts are already included in Apache Zeppelin. Visualizations are not limited to SparkSQL query, any output from any language backend can be recognized and visualized.
Some basic charts are already included in Apache Zeppelin. Visualizations are not limited to Spark SQL query, any output from any language backend can be recognized and visualized.
<div class="row">
<div class="col-md-6">
@ -133,7 +133,6 @@ Join to our [Mailing list](https://zeppelin.apache.org/community.html) and repor
* [Publish your Paragraph](./manual/publish.html) results into your external website
* [Customize Zeppelin Homepage](./manual/notebookashomepage.html) with one of your notebooks
* More
* [Apache Zeppelin on Vagrant VM](./install/virtual_machine.html): a guide for installing Apache Zeppelin on Vagrant virtual machine
* [Upgrade Apache Zeppelin Version](./install/upgrade.html): a manual procedure of upgrading Apache Zeppelin version
####Interpreter
@ -168,6 +167,9 @@ Join to our [Mailing list](https://zeppelin.apache.org/community.html) and repor
* [Shiro Authentication](./security/shiroauthentication.html)
* [Notebook Authorization](./security/notebook_authorization.html)
* [Data Source Authorization](./security/datasource_authorization.html)
* Advanced
* [Apache Zeppelin on Vagrant VM](./install/virtual_machine.html)
* [Zeppelin on Spark Cluster Mode (Standalone via Docker)](./install/spark_cluster_mode.html#spark-standalone-mode)
* Contribute
* [Writing Zeppelin Interpreter](./development/writingzeppelininterpreter.html)
* [Writing Zeppelin Application (Experimental)](./development/writingzeppelinapplication.html)

View file

@ -94,6 +94,7 @@ Here are some examples with several options
```
# build with spark-2.0, scala-2.11
./dev/change_scala_version.sh 2.11
mvn clean package -Pspark-2.0 -Phadoop-2.4 -Pyarn -Ppyspark -Psparkr -Pscala-2.11
# build with spark-1.6, scala-2.10
@ -380,7 +381,10 @@ You can configure Apache Zeppelin with both **environment variables** in `conf/z
<td>org.apache.zeppelin.spark.SparkInterpreter,<br />org.apache.zeppelin.spark.PySparkInterpreter,<br />org.apache.zeppelin.spark.SparkSqlInterpreter,<br />org.apache.zeppelin.spark.DepInterpreter,<br />org.apache.zeppelin.markdown.Markdown,<br />org.apache.zeppelin.shell.ShellInterpreter,<br />
...
</td>
<td>Comma separated interpreter configurations [Class] <br /> The first interpreter will be a default value. <br /> It means only the first interpreter in this list can be available without <code>%interpreter_name</code> annotation in notebook paragraph. </td>
<td>
Comma separated interpreter configurations [Class] <br/>
<span style="font-style:italic">NOTE: This property is deprecated since Zeppelin-0.6.0 and will not be supported from Zeppelin-0.7.0</span>
</td>
</tr>
<tr>
<td>ZEPPELIN_INTERPRETER_DIR</td>
@ -394,4 +398,4 @@ You can configure Apache Zeppelin with both **environment variables** in `conf/z
<td>1024000</td>
<td>Size in characters of the maximum text message to be received by websocket.</td>
</tr>
</table>
</table>

View file

@ -0,0 +1,74 @@
---
layout: page
title: "Apache Zeppelin on Spark cluster mode"
description: ""
group: install
---
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
{% include JB/setup %}
# Apache Zeppelin on Spark Cluster Mode
<div id="toc"></div>
## Overview
[Apache Spark](http://spark.apache.org/) has supported three cluster manager types([Standalone](http://spark.apache.org/docs/latest/spark-standalone.html), [Apache Mesos](http://spark.apache.org/docs/latest/running-on-mesos.html) and [Hadoop YARN](http://spark.apache.org/docs/latest/running-on-yarn.html)) so far.
This document will guide you how you can build and configure the environment on 3 types of Spark cluster manager with Apache Zeppelin using [Docker](https://www.docker.com/) scripts.
So [install docker](https://docs.docker.com/engine/installation/) on the machine first.
## Spark standalone mode
[Spark standalone](http://spark.apache.org/docs/latest/spark-standalone.html) is a simple cluster manager included with Spark that makes it easy to set up a cluster.
You can simply set up Spark standalone environment with below steps.
> **Note :** Since Apache Zeppelin and Spark use same `8080` port for their web UI, you might need to change `zeppelin.server.port` in `conf/zeppelin-site.xml`.
### 1. Build Docker file
You can find docker script files under `scripts/docker/spark-cluster-managers`.
```
cd $ZEPPELIN_HOME/scripts/docker/spark-cluster-managers/spark_standalone
docker build -t "spark_standalone" .
```
### 2. Run docker
```
docker run -it \
-p 8080:8080 \
-p 7077:7077 \
-p 8888:8888 \
-p 8081:8081 \
-h sparkmaster \
--name spark_standalone \
spark_standalone bash;
```
### 3. Configure Spark interpreter in Zeppelin
Set Spark master as `spark://localhost:7077` in Zeppelin **Interpreters** setting page.
<img src="../assets/themes/zeppelin/img/docs-img/standalone_conf.png" />
### 4. Run Zeppelin with Spark interpreter
After running single paragraph with Spark interpreter in Zeppelin, browse `https://localhost:8080` and check whether Spark cluster is running well or not.
<img src="../assets/themes/zeppelin/img/docs-img/spark_ui.png" />
You can also simply verify that Spark is running well in Docker with below command.
```
ps -ef | grep spark
```

View file

@ -0,0 +1,113 @@
---
layout: page
title: "BigQuery Interpreter"
description: ""
group: interpreter
---
# BigQuery Interpreter for Apache Zeppelin
<div id="toc"></div>
## Overview
[BigQuery](https://cloud.google.com/bigquery/what-is-bigquery) is a highly scalable no-ops data warehouse in the Google Cloud Platform. Querying massive datasets can be time consuming and expensive without the right hardware and infrastructure. Google BigQuery solves this problem by enabling super-fast SQL queries against append-only tables using the processing power of Google's infrastructure. Simply move your data into BigQuery and let us handle the hard work. You can control access to both the project and your data based on your business needs, such as giving others the ability to view or query your data.
## Configuration
<table class="table-configuration">
<tr>
<th>Name</th>
<th>Default Value</th>
<th>Description</th>
</tr>
<tr>
<td>zeppelin.bigquery.project_id</td>
<td> </td>
<td>Google Project Id</td>
</tr>
<tr>
<td>zeppelin.bigquery.wait_time</td>
<td>5000</td>
<td>Query Timeout in Milliseconds</td>
</tr>
<tr>
<td>zeppelin.bigquery.max_no_of_rows</td>
<td>100000</td>
<td>Max result set size</td>
</tr>
</table>
## BigQuery API
Zeppelin is built against BigQuery API version v2-rev265-1.21.0 - [API Javadocs](https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/)
## Enabling the BigQuery Interpreter
In a notebook, to enable the **BigQuery** interpreter, click the **Gear** icon and select **bigquery**.
### Setup service account credentials
In order to run BigQuery interpreter outside of Google Cloud Engine you need to provide authentication credentials,
by [following this instructions](https://developers.google.com/identity/protocols/application-default-credentials):
- Go to the [API Console Credentials page](https://console.developers.google.com/project/_/apis/credentials)
- From the project drop-down, select your project.
- On the `Credentials` page, select the `Create credentials` drop-down, then select `Service account key`.
- From the Service account drop-down, select an existing service account or create a new one.
- For `Key type`, select the `JSON` key option, then select `Create`. The file automatically downloads to your computer.
- Put the `*.json` file you just downloaded in a directory of your choosing. This directory must be private (you can't let anyone get access to this), but accessible to your Zeppelin instance.
- Set the environment variable `GOOGLE_APPLICATION_CREDENTIALS` to the path of the JSON file downloaded.
* either though GUI: in interpreter configuration page property names in CAPITAL_CASE set up env vars
* or though `zeppelin-env.sh`: just add it to the end of the file.
## Using the BigQuery Interpreter
In a paragraph, use `%bigquery.sql` to select the **BigQuery** interpreter and then input SQL statements against your datasets stored in BigQuery.
You can use [BigQuery SQL Reference](https://cloud.google.com/bigquery/query-reference) to build your own SQL.
For Example, SQL to query for top 10 departure delays across airports using the flights public dataset
```bash
%bigquery.sql
SELECT departure_airport,count(case when departure_delay>0 then 1 else 0 end) as no_of_delays
FROM [bigquery-samples:airline_ontime_data.flights]
group by departure_airport
order by 2 desc
limit 10
```
Another Example, SQL to query for most commonly used java packages from the github data hosted in BigQuery
```bash
%bigquery.sql
SELECT
package,
COUNT(*) count
FROM (
SELECT
REGEXP_EXTRACT(line, r' ([a-z0-9\._]*)\.') package,
id
FROM (
SELECT
SPLIT(content, '\n') line,
id
FROM
[bigquery-public-data:github_repos.sample_contents]
WHERE
content CONTAINS 'import'
AND sample_path LIKE '%.java'
HAVING
LEFT(line, 6)='import' )
GROUP BY
package,
id )
GROUP BY
1
ORDER BY
count DESC
LIMIT
40
```
## Technical description
For in-depth technical details on current implementation please refer to [bigquery/README.md](https://github.com/apache/zeppelin/blob/master/bigquery/README.md).

View file

@ -50,7 +50,7 @@ Example: `spark.master` to `livy.spark.master`
<tr>
<td>zeppelin.livy.spark.maxResult</td>
<td>1000</td>
<td>Max number of SparkSQL result to display.</td>
<td>Max number of Spark SQL result to display.</td>
</tr>
<tr>
<td>livy.spark.driver.cores</td>

View file

@ -104,7 +104,7 @@ And vice versa:
* The `knitr` environment is persistent. If you run a chunk from Zeppelin that changes a variable, then run the same chunk again, the variable has already been changed. Use immutable variables.
* (Note that `%spark.r` and `$r` are two different ways of calling the same interpreter, as are `%spark.knitr` and `%knitr`. By default, Zeppelin puts the R interpreters in the `%spark.` Interpreter Group.
* (Note that `%spark.r` and `%r` are two different ways of calling the same interpreter, as are `%spark.knitr` and `%knitr`. By default, Zeppelin puts the R interpreters in the `%spark.` Interpreter Group.
* Using the `%r` interpreter, if you return a data.frame, HTML, or an image, it will dominate the result. So if you execute three commands, and one is `hist()`, all you will see is the histogram, not the results of the other commands. This is a Zeppelin limitation.

View file

@ -79,7 +79,7 @@ You can also set other Spark properties which are not listed in the table. For a
</tr>
<tr>
<td>spark.executor.memory </td>
<td>512m</td>
<td>1g</td>
<td>Executor memory per worker instance. <br/> ex) 512m, 32g</td>
</tr>
<tr>
@ -105,7 +105,7 @@ You can also set other Spark properties which are not listed in the table. For a
<tr>
<td>zeppelin.spark.maxResult</td>
<td>1000</td>
<td>Max number of SparkSQL result to display.</td>
<td>Max number of Spark SQL result to display.</td>
</tr>
<tr>
<td>zeppelin.spark.printREPLOutput</td>

View file

@ -28,7 +28,7 @@ Custom language backend can select which type of form creation it wants to use.
## Using form Templates
This mode creates form using simple template language. It's simple and easy to use. For example Markdown, Shell, SparkSql language backend uses it.
This mode creates form using simple template language. It's simple and easy to use. For example Markdown, Shell, Spark SQL language backend uses it.
### Text input form

View file

@ -47,6 +47,52 @@ You can get full list of community managed interpreters by running
./bin/install-interpreter.sh --list
```
#### Install interpreter built with Scala 2.10
From version 0.6.1, Zeppelin support both Scala 2.10 and 2.11 for several interpreters as below:
<table class="table-configuration">
<tr>
<th>Name</th>
<th>Maven Artifact for Scala 2.10</th>
<th>Maven Artifact for Scala 2.11</th>
</tr>
<tr>
<td>cassandra</td>
<td>org.apache.zeppelin:zeppelin-cassandra_2.10:0.6.1</td>
<td>org.apache.zeppelin:zeppelin-cassandra_2.11:0.6.1</td>
</tr>
<tr>
<td>flink</td>
<td>org.apache.zeppelin:zeppelin-flink_2.10:0.6.1</td>
<td>org.apache.zeppelin:zeppelin-flink_2.11:0.6.1</td>
</tr>
<tr>
<td>ignite</td>
<td>org.apache.zeppelin:zeppelin-ignite_2.10:0.6.1</td>
<td>org.apache.zeppelin:zeppelin-ignite_2.11:0.6.1</td>
</tr>
<tr>
<td>flink</td>
<td>org.apache.zeppelin:zeppelin-spark_2.10:0.6.1</td>
<td>org.apache.zeppelin:zeppelin-spark_2.11:0.6.1</td>
</tr>
</table>
If you install one of these interpreters only with `--name` option, installer will download interpreter built with Scala 2.11 by default. If you want to specify Scala version, you will need to add `--artifact` option. Here is the example of installing flink interpreter built with Scala 2.10.
```
./bin/install-interpreter.sh --name flink --artifact org.apache.zeppelin:zeppelin-flink_2.10:0.6.1
```
#### Install Spark interpreter built with Scala 2.10
Spark distribution package has been built with Scala 2.10 until 1.6.2. If you have `SPARK_HOME` set pointing to Spark version ealier than 2.0.0, you need to download Spark interpreter packaged with Scala 2.10. To do so, use follow command:
```
rm -rf ./interpreter/spark
./bin/install-interpreter.sh --name spark --artifact org.apache.zeppelin:zeppelin-spark_2.10:0.6.1
```
<br />
Once you have installed interpreters, you need to restart Zeppelin. And then [create interpreter setting](../manual/interpreters.html#what-is-zeppelin-interpreter) and [bind it with your notebook](../manual/interpreters.html#what-is-zeppelin-interpreter-setting).
@ -85,82 +131,87 @@ You can also find the below community managed interpreter list in `conf/interpre
</tr>
<tr>
<td>alluxio</td>
<td>org.apache.zeppelin:zeppelin-alluxio:0.6.0</td>
<td>org.apache.zeppelin:zeppelin-alluxio:0.6.1</td>
<td>Alluxio interpreter</td>
</tr>
<tr>
<td>angular</td>
<td>org.apache.zeppelin:zeppelin-angular:0.6.0</td>
<td>org.apache.zeppelin:zeppelin-angular:0.6.1</td>
<td>HTML and AngularJS view rendering</td>
</tr>
<tr>
<td>bigquery</td>
<td>org.apache.zeppelin:zeppelin-bigquery:0.6.1</td>
<td>BigQuery interpreter</td>
</tr>
<tr>
<td>cassandra</td>
<td>org.apache.zeppelin:zeppelin-cassandra:0.6.0</td>
<td>Cassandra interpreter</td>
<td>org.apache.zeppelin:zeppelin-cassandra\_2.11:0.6.1</td>
<td>Cassandra interpreter built with Scala 2.11</td>
</tr>
<tr>
<td>elasticsearch</td>
<td>org.apache.zeppelin:zeppelin-elasticsearch:0.6.0</td>
<td>org.apache.zeppelin:zeppelin-elasticsearch:0.6.1</td>
<td>Elasticsearch interpreter</td>
</tr>
<tr>
<td>file</td>
<td>org.apache.zeppelin:zeppelin-file:0.6.0</td>
<td>org.apache.zeppelin:zeppelin-file:0.6.1</td>
<td>HDFS file interpreter</td>
</tr>
<tr>
<td>flink</td>
<td>org.apache.zeppelin:zeppelin-flink:0.6.0</td>
<td>Flink interpreter</td>
<td>org.apache.zeppelin:zeppelin-flink\_2.11:0.6.1</td>
<td>Flink interpreter built with Scala 2.11</td>
</tr>
<tr>
<td>hbase</td>
<td>org.apache.zeppelin:zeppelin-hbase:0.6.0</td>
<td>org.apache.zeppelin:zeppelin-hbase:0.6.1</td>
<td>Hbase interpreter</td>
</tr>
<tr>
<td>ignite</td>
<td>org.apache.zeppelin:zeppelin-ignite:0.6.0</td>
<td>Ignite interpreter</td>
<td>org.apache.zeppelin:zeppelin-ignite\_2.11:0.6.1</td>
<td>Ignite interpreter built with Scala 2.11</td>
</tr>
<tr>
<td>jdbc</td>
<td>org.apache.zeppelin:zeppelin-jdbc:0.6.0</td>
<td>org.apache.zeppelin:zeppelin-jdbc:0.6.1</td>
<td>Jdbc interpreter</td>
</tr>
<tr>
<td>kylin</td>
<td>org.apache.zeppelin:zeppelin-kylin:0.6.0</td>
<td>org.apache.zeppelin:zeppelin-kylin:0.6.1</td>
<td>Kylin interpreter</td>
</tr>
<tr>
<td>lens</td>
<td>org.apache.zeppelin:zeppelin-lens:0.6.0</td>
<td>org.apache.zeppelin:zeppelin-lens:0.6.1</td>
<td>Lens interpreter</td>
</tr>
<tr>
<td>livy</td>
<td>org.apache.zeppelin:zeppelin-livy:0.6.0</td>
<td>org.apache.zeppelin:zeppelin-livy:0.6.1</td>
<td>Livy interpreter</td>
</tr>
<tr>
<td>md</td>
<td>org.apache.zeppelin:zeppelin-markdown:0.6.0</td>
<td>org.apache.zeppelin:zeppelin-markdown:0.6.1</td>
<td>Markdown support</td>
</tr>
<tr>
<td>postgresql</td>
<td>org.apache.zeppelin:zeppelin-postgresql:0.6.0</td>
<td>org.apache.zeppelin:zeppelin-postgresql:0.6.1</td>
<td>Postgresql interpreter</td>
</tr>
<tr>
<td>python</td>
<td>org.apache.zeppelin:zeppelin-python:0.6.0</td>
<td>org.apache.zeppelin:zeppelin-python:0.6.1</td>
<td>Python interpreter</td>
</tr>
<tr>
<td>shell</td>
<td>org.apache.zeppelin:zeppelin-shell:0.6.0</td>
<td>org.apache.zeppelin:zeppelin-shell:0.6.1</td>
<td>Shell command</td>
</tr>
</table>

View file

@ -27,7 +27,7 @@ limitations under the License.
In this section, we will explain about the role of interpreters, interpreters group and interpreter settings in Zeppelin.
The concept of Zeppelin interpreter allows any language/data-processing-backend to be plugged into Zeppelin.
Currently, Zeppelin supports many interpreters such as Scala ( with Apache Spark ), Python ( with Apache Spark ), SparkSQL, JDBC, Markdown, Shell and so on.
Currently, Zeppelin supports many interpreters such as Scala ( with Apache Spark ), Python ( with Apache Spark ), Spark SQL, JDBC, Markdown, Shell and so on.
## What is Zeppelin interpreter?
Zeppelin Interpreter is a plug-in which enables Zeppelin users to use a specific language/data-processing-backend. For example, to use Scala code in Zeppelin, you need `%spark` interpreter.
@ -51,7 +51,7 @@ Each notebook can be bound to multiple Interpreter Settings using setting icon o
## What is interpreter group?
Every Interpreter is belonged to an **Interpreter Group**. Interpreter Group is a unit of start/stop interpreter.
By default, every interpreter is belonged to a single group, but the group might contain more interpreters. For example, Spark interpreter group is including Spark support, pySpark, SparkSQL and the dependency loader.
By default, every interpreter is belonged to a single group, but the group might contain more interpreters. For example, Spark interpreter group is including Spark support, pySpark, Spark SQL and the dependency loader.
Technically, Zeppelin interpreters from the same group are running in the same JVM. For more information about this, please checkout [here](../development/writingzeppelininterpreter.html).

View file

@ -75,7 +75,7 @@ The role of registered interpreters, settings and interpreters group are describ
"className": "org.apache.zeppelin.spark.SparkInterpreter",
"properties": {
"spark.executor.memory": {
"defaultValue": "512m",
"defaultValue": "1g",
"description": "Executor memory per worker instance. ex) 512m, 32g"
},
"spark.cores.max": {
@ -92,7 +92,7 @@ The role of registered interpreters, settings and interpreters group are describ
"properties": {
"zeppelin.spark.maxResult": {
"defaultValue": "1000",
"description": "Max number of SparkSQL result to display."
"description": "Max number of Spark SQL result to display."
}
},
"path": "/zeppelin/interpreter/spark"
@ -154,7 +154,7 @@ The role of registered interpreters, settings and interpreters group are describ
"group": "spark",
"properties": {
"spark.cores.max": "",
"spark.executor.memory": "512m",
"spark.executor.memory": "1g",
},
"interpreterGroup": [
{
@ -460,4 +460,4 @@ The role of registered interpreters, settings and interpreters group are describ
<td> 500 </td>
</tr>
</table>

View file

@ -21,7 +21,7 @@ limitations under the License.
<div class="row">
<div class="col-md-3">
<a href="assets/themes/zeppelin/img/screenshots/sparksql.png"><img class="thumbnail" src="assets/themes/zeppelin/img/screenshots/sparksql.png" /></a>
<center>SparkSQL with inline visualization</center>
<center>Spark SQL with inline visualization</center>
</div>
<div class="col-md-3">
<a href="assets/themes/zeppelin/img/screenshots/spark.png"><img class="thumbnail" src="assets/themes/zeppelin/img/screenshots/spark.png" /></a>

View file

@ -105,6 +105,33 @@ finance = *
group1 = *
```
## Configure Realm (optional)
Realms are responsible for authentication and authorization in Apache Zeppelin. By default, Apache Zeppelin uses [IniRealm](https://shiro.apache.org/static/latest/apidocs/org/apache/shiro/realm/text/IniRealm.html) (users and groups are configurable in `conf/shiro.ini` file under `[user]` and `[group]` section). You can also leverage Shiro Realms like [JndiLdapRealm](https://shiro.apache.org/static/latest/apidocs/org/apache/shiro/realm/ldap/JndiLdapRealm.html), [JdbcRealm](https://shiro.apache.org/static/latest/apidocs/org/apache/shiro/realm/jdbc/JdbcRealm.html) or create [our own](https://shiro.apache.org/static/latest/apidocs/org/apache/shiro/realm/AuthorizingRealm.html).
To learn more about Apache Shiro Realm, please check [this documentation](http://shiro.apache.org/realm.html).
We also provide community custom Realms.
### Active Directory
TBD
### LDAP
TBD
### ZeppelinHub
[ZeppelinHub](https://www.zeppelinhub.com) is a service that synchronize your Apache Zeppelin notebooks and enables you to collaborate easily.
To enable login with your ZeppelinHub credential, apply the following change in `conf/shiro.ini` under `[main]` section.
```
### A sample for configuring ZeppelinHub Realm
zeppelinHubRealm = org.apache.zeppelin.realm.ZeppelinHubRealm
## Url of ZeppelinHub
zeppelinHubRealm.zeppelinhubUrl = https://www.zeppelinhub.com
securityManager.realms = $zeppelinHubRealm
```
> Note: ZeppelinHub is not releated to apache Zeppelin project.
## Secure your Zeppelin information (optional)
By default, anyone who defined in `[users]` can share **Interpreter Setting**, **Credential** and **Configuration** information in Apache Zeppelin.
Sometimes you might want to hide these information for your use case.
@ -123,3 +150,4 @@ If you want to grant this permission to other users, you can change **roles[ ]**
<br/>
> **NOTE :** All of the above configurations are defined in the `conf/shiro.ini` file. This documentation is originally from [SECURITY-README.md](https://github.com/apache/zeppelin/blob/master/SECURITY-README.md).

View file

@ -31,7 +31,6 @@
<packaging>jar</packaging>
<version>0.7.0-SNAPSHOT</version>
<name>Zeppelin: Elasticsearch interpreter</name>
<url>http://www.apache.org</url>
<properties>
<elasticsearch.version>2.3.3</elasticsearch.version>

View file

@ -30,7 +30,6 @@
<packaging>jar</packaging>
<version>0.7.0-SNAPSHOT</version>
<name>Zeppelin: File System Interpreters</name>
<url>http://www.apache.org</url>
<dependencies>
<dependency>

View file

@ -32,7 +32,6 @@
<version>0.7.0-SNAPSHOT</version>
<name>Zeppelin: Flink</name>
<description>Zeppelin flink support</description>
<url>http://zeppelin.apache.org</url>
<properties>
<flink.version>1.0.3</flink.version>

View file

@ -31,7 +31,6 @@
<packaging>jar</packaging>
<version>0.7.0-SNAPSHOT</version>
<name>Zeppelin: Apache Geode interpreter</name>
<url>http://geode.incubator.apache.org/</url>
<properties>
<geode.version>1.0.0-incubating-SNAPSHOT</geode.version>

View file

@ -30,7 +30,6 @@
<packaging>jar</packaging>
<version>0.7.0-SNAPSHOT</version>
<name>Zeppelin: HBase interpreter</name>
<url>http://www.apache.org</url>
<properties>
<hbase.hbase.version>1.0.0</hbase.hbase.version>

View file

@ -30,7 +30,6 @@
<packaging>jar</packaging>
<version>0.7.0-SNAPSHOT</version>
<name>Zeppelin: Apache Ignite interpreter</name>
<url>http://zeppelin.apache.org</url>
<properties>
<ignite.version>1.5.0.final</ignite.version>

View file

@ -31,7 +31,6 @@
<packaging>jar</packaging>
<version>0.7.0-SNAPSHOT</version>
<name>Zeppelin: JDBC interpreter</name>
<url>http://www.apache.org</url>
<properties>
<postgresql.version>9.4-1201-jdbc41</postgresql.version>

View file

@ -32,8 +32,6 @@
<packaging>jar</packaging>
<version>0.7.0-SNAPSHOT</version>
<name>Zeppelin: Kylin interpreter</name>
<url>http://zeppelin.apache.org</url>
<dependencies>
<dependency>

View file

@ -31,7 +31,6 @@
<packaging>jar</packaging>
<version>0.7.0-SNAPSHOT</version>
<name>Zeppelin: Lens interpreter</name>
<url>http://www.apache.org</url>
<properties>
<lens.version>2.5.0-beta</lens.version>

View file

@ -0,0 +1,176 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS

View file

@ -33,7 +33,6 @@
<packaging>jar</packaging>
<version>0.7.0-SNAPSHOT</version>
<name>Zeppelin: Livy interpreter</name>
<url>http://zeppelin.apache.org</url>
<properties>
<!--TEST-->

View file

@ -93,7 +93,7 @@
"envName": "ZEPPELIN_LIVY_MAXRESULT",
"propertyName": "zeppelin.livy.spark.sql.maxResult",
"defaultValue": "1000",
"description": "Max number of SparkSQL result to display."
"description": "Max number of Spark SQL result to display."
},
"zeppelin.livy.concurrentSQL": {
"propertyName": "zeppelin.livy.concurrentSQL",
@ -116,4 +116,4 @@
"properties": {
}
}
]
]

View file

@ -31,7 +31,6 @@
<packaging>jar</packaging>
<version>0.7.0-SNAPSHOT</version>
<name>Zeppelin: Markdown interpreter</name>
<url>http://zeppelin.apache.org</url>
<dependencies>
<dependency>

16
pom.xml
View file

@ -27,7 +27,7 @@
<version>0.7.0-SNAPSHOT</version>
<name>Zeppelin</name>
<description>Zeppelin project</description>
<url>http://zeppelin.apache.org/</url>
<url>http://zeppelin.apache.org</url>
<parent>
<groupId>org.apache</groupId>
@ -72,6 +72,7 @@
<module>lens</module>
<module>cassandra</module>
<module>elasticsearch</module>
<module>bigquery</module>
<module>alluxio</module>
<module>zeppelin-web</module>
<module>zeppelin-server</module>
@ -341,18 +342,17 @@
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-remote-resources-plugin</artifactId>
<version>1.4</version>
<version>1.5</version>
<executions>
<execution>
<id>process-remote-resources</id>
<goals>
<goal>process</goal>
</goals>
<configuration>
<resourceBundles>
<!-- Will generate META-INF/DEPENDENCIES META-INF/LICENSE META-INF/NOTICE -->
<resourceBundle>org.apache.apache.resources:apache-jar-resource-bundle:1.5-SNAPSHOT</resourceBundle>
<resourceBundle>org.apache:apache-jar-resource-bundle:1.0</resourceBundle>
</resourceBundles>
</configuration>
</execution>
@ -460,6 +460,7 @@
<exclude>spark-*-bin*/**</exclude>
<exclude>.spark-dist/**</exclude>
<exclude>**/interpreter-setting.json</exclude>
<exclude>**/constants.json</exclude>
<!-- bundled from bootstrap -->
<exclude>docs/assets/themes/zeppelin/bootstrap/**</exclude>
@ -651,7 +652,7 @@
<profile>
<id>scala-2.10</id>
<activation>
<property><name>!scala-2.11</name></property>
<activeByDefault>true</activeByDefault>
</activation>
<properties>
<scala.version>2.10.5</scala.version>
@ -661,9 +662,6 @@
<profile>
<id>scala-2.11</id>
<activation>
<property><name>scala-2.11</name></property>
</activation>
<properties>
<scala.version>2.11.7</scala.version>
<scala.binary.version>2.11</scala.binary.version>

View file

@ -31,7 +31,6 @@
<packaging>jar</packaging>
<version>0.7.0-SNAPSHOT</version>
<name>Zeppelin: PostgreSQL interpreter</name>
<url>http://www.apache.org</url>
<properties>
<postgresql.version>9.4-1201-jdbc41</postgresql.version>

View file

@ -31,7 +31,6 @@
<packaging>jar</packaging>
<version>0.7.0-SNAPSHOT</version>
<name>Zeppelin: Python interpreter</name>
<url>http://zeppelin.apache.org</url>
<properties>
<py4j.version>0.9.2</py4j.version>

View file

@ -33,76 +33,78 @@ def intHandler(signum, frame): # Set the signal handler
signal.signal(signal.SIGINT, intHandler)
def help():
print ('%html')
print ('<h2>Python Interpreter help</h2>')
print ('<h3>Python 2 & 3 compatibility</h3>')
print ('<p>The interpreter is compatible with Python 2 & 3.<br/>')
print ('To change Python version, ')
print ('change in the interpreter configuration the python to the ')
print ('desired version (example : python=/usr/bin/python3)</p>')
print ('<h3>Python modules</h3>')
print ('<p>The interpreter can use all modules already installed ')
print ('(with pip, easy_install, etc)</p>')
print ('<h3>Forms</h3>')
print ('You must install py4j in order to use '
'the form feature (pip install py4j)')
print ('<h4>Input form</h4>')
print ('<pre>print (z.input("f1","defaultValue"))</pre>')
print ('<h4>Selection form</h4>')
print ('<pre>print(z.select("f2", [("o1","1"), ("o2","2")],2))</pre>')
print ('<h4>Checkbox form</h4>')
print ('<pre> print("".join(z.checkbox("f3", [("o1","1"), '
'("o2","2")],["1"])))</pre>')
print ('<h3>Matplotlib graph</h3>')
print ('<div>The interpreter can display matplotlib graph with ')
print ('the function z.show()</div>')
print ('<div> You need to already have matplotlib module installed ')
print ('to use this functionality !</div><br/>')
print ('''<pre>import matplotlib.pyplot as plt
plt.figure()
(.. ..)
z.show(plt)
plt.close()
</pre>''')
print ('<div><br/> z.show function can take optional parameters ')
print ('to adapt graph width and height</div>')
print ("<div><b>example </b>:")
print ('''<pre>z.show(plt,width='50px')
z.show(plt,height='150px') </pre></div>''')
print ('<h3>Pandas DataFrame</h3>')
print ('<div> You need to have Pandas module installed ')
print ('to use this functionality (pip install pandas) !</div><br/>')
print ("""
<div>The interpreter can visualize Pandas DataFrame
with the function z.show()
<pre>
import pandas as pd
df = pd.read_csv("bank.csv", sep=";")
z.show(df)
</pre></div>
""")
print ('<h3>SQL over Pandas DataFrame</h3>')
print ('<div> You need to have Pandas&Pandasql modules installed ')
print ('to use this functionality (pip install pandas pandasql) !</div><br/>')
print ("""
<div>Python interpreter group includes %sql interpreter that can query
Pandas DataFrames using SQL and visualize results using Zeppelin Table Display System
print("""%html
<h2>Python Interpreter help</h2>
<pre>
%python
import pandas as pd
df = pd.read_csv("bank.csv", sep=";")
</pre>
<br />
<h3>Python 2 & 3 compatibility</h3>
<p>The interpreter is compatible with Python 2 & 3.<br/>
To change Python version,
change in the interpreter configuration the python to the
desired version (example : python=/usr/bin/python3)</p>
<pre>
%python.sql
%sql
SELECT * from df LIMIT 5
</pre></div>
""")
<h3>Python modules</h3>
<p>The interpreter can use all modules already installed
(with pip, easy_install, etc)</p>
<h3>Forms</h3>
You must install py4j in order to use
the form feature (pip install py4j)
<h4>Input form</h4>
<pre>print (z.input("f1","defaultValue"))</pre>
<h4>Selection form</h4>
<pre>print(z.select("f2", [("o1","1"), ("o2","2")],2))</pre>
<h4>Checkbox form</h4>
<pre> print("".join(z.checkbox("f3", [("o1","1"), ("o2","2")],["1"])))</pre>')
<h3>Matplotlib graph</h3>
<div>The interpreter can display matplotlib graph with
the function z.show()</div>
<div> You need to already have matplotlib module installed
to use this functionality !</div><br/>
<pre>import matplotlib.pyplot as plt
plt.figure()
(.. ..)
z.show(plt)
plt.close()
</pre>
<div><br/> z.show function can take optional parameters
to adapt graph width and height</div>
<div><b>example </b>:
<pre>z.show(plt,width='50px
z.show(plt,height='150px') </pre></div>
<h3>Pandas DataFrame</h3>
<div> You need to have Pandas module installed
to use this functionality (pip install pandas) !</div><br/>
<div>The interpreter can visualize Pandas DataFrame
with the function z.show()
<pre>
import pandas as pd
df = pd.read_csv("bank.csv", sep=";")
z.show(df)
</pre></div>
<h3>SQL over Pandas DataFrame</h3>
<div> You need to have Pandas&Pandasql modules installed
to use this functionality (pip install pandas pandasql) !</div><br/>
<div>Python interpreter group includes %sql interpreter that can query
Pandas DataFrames using SQL and visualize results using Zeppelin Table Display System
<pre>
%python
import pandas as pd
df = pd.read_csv("bank.csv", sep=";")
</pre>
<br />
<pre>
%python.sql
%sql
SELECT * from df LIMIT 5
</pre>
</div>
""")
class PyZeppelinContext(object):
@ -112,18 +114,17 @@ class PyZeppelinContext(object):
errorMsg = "You must install py4j Python module " \
"(pip install py4j) to use Zeppelin dynamic forms features"
def __init__(self, zc):
self.z = zc
def __init__(self):
self.max_result = 1000
def input(self, name, defaultValue=""):
print (self.errorMsg)
print(self.errorMsg)
def select(self, name, options, defaultValue=""):
print (self.errorMsg)
print(self.errorMsg)
def checkbox(self, name, options, defaultChecked=[]):
print (self.errorMsg)
print(self.errorMsg)
def show(self, p, **kwargs):
if hasattr(p, '__name__') and p.__name__ == "matplotlib.pyplot":
@ -140,19 +141,19 @@ class PyZeppelinContext(object):
"""
limit = len(df) > self.max_result
header_buf = io.StringIO("")
header_buf.write(df.columns[0])
header_buf.write(str(df.columns[0]))
for col in df.columns[1:]:
header_buf.write("\t")
header_buf.write(col)
header_buf.write(str(col))
header_buf.write("\n")
body_buf = io.StringIO("")
rows = df.head(self.max_result).values if limit else df.values
for row in rows:
body_buf.write(row[0])
body_buf.write(str(row[0]))
for cell in row[1:]:
body_buf.write("\t")
body_buf.write(cell)
body_buf.write(str(cell))
body_buf.write("\n")
body_buf.seek(0); header_buf.seek(0)
#TODO(bzz): fix it, so it shows red notice, as in Spark
@ -162,21 +163,14 @@ class PyZeppelinContext(object):
#)
body_buf.close(); header_buf.close()
def show_matplotlib(self, p, width="0", height="0", **kwargs):
def show_matplotlib(self, p, width="100%", height="100%", **kwargs):
"""Matplotlib show function
"""
img = io.StringIO()
p.savefig(img, format='svg')
img.seek(0)
style = ""
if (width != "0"):
style += 'width:' + width
if (height != "0"):
if (len(style) != 0):
style += ","
style += 'height:' + height
print("%html <div style='" + style + "'>" + img.read() + "<div>")
p.savefig(img, format="svg")
html = "%html <div style='width:{width};height:{height}'>{image}<div>"
print(html.format(width=width, height=height, image=img.getvalue()))
img.close()
z = PyZeppelinContext("")
z = PyZeppelinContext()

View file

@ -25,11 +25,11 @@ java_import(gateway.jvm, "org.apache.zeppelin.display.Input")
class Py4jZeppelinContext(PyZeppelinContext):
"""A context impl that uses Py4j to communicate to JVM
"""
def __init__(self, zc):
super(Py4jZeppelinContext, self).__init__(zc)
def __init__(self, z):
self.z = z
self.paramOption = gateway.jvm.org.apache.zeppelin.display.Input.ParamOption
self.javaList = gateway.jvm.java.util.ArrayList
self.max_result = 1000 #TODO(bzz): read `zeppelin.python.maxResult` from JVM
self.max_result = self.z.getMaxResult()
def input(self, name, defaultValue=""):
return self.z.getGui().input(name, defaultValue)

View file

@ -153,4 +153,24 @@ public class PythonInterpreterPandasSqlTest {
assertTrue(ret.message().length() > 0);
}
@Test
public void showDataFrame() {
InterpreterResult ret;
ret = python.interpret("import pandas as pd", context);
ret = python.interpret("import numpy as np", context);
// given a Pandas DataFrame with non-text data
ret = python.interpret("d1 = {1 : [np.nan, 1, 2, 3], 'two' : [3., 4., 5., 6.7]}", context);
ret = python.interpret("df1 = pd.DataFrame(d1)", context);
assertEquals(ret.message(), InterpreterResult.Code.SUCCESS, ret.code());
// when
ret = python.interpret("z.show(df1)", context);
// then
assertEquals(ret.message(), InterpreterResult.Code.SUCCESS, ret.code());
assertEquals(ret.message(), Type.TABLE, ret.type());
assertTrue(ret.message().indexOf("nan") > 0);
assertTrue(ret.message().indexOf("6.7") > 0);
}
}

View file

@ -42,7 +42,7 @@ import org.junit.Test;
public class PythonInterpreterWithPythonInstalledTest {
@Test
public void badSqlSyntaxFails() {
public void badPythonSyntaxFails() {
//given
PythonInterpreter realPython = new PythonInterpreter(
PythonInterpreterTest.getPythonTestProperties());
@ -58,4 +58,21 @@ public class PythonInterpreterWithPythonInstalledTest {
assertTrue(ret.message().length() > 0);
}
@Test
public void goodPythonSyntaxRuns() {
//given
PythonInterpreter realPython = new PythonInterpreter(
PythonInterpreterTest.getPythonTestProperties());
realPython.open();
//when
InterpreterResult ret = realPython.interpret("help()", null);
//then
assertNotNull("Interpreter returned 'null'", ret);
//System.out.println("\nInterpreter response: \n" + ret.message());
assertEquals(InterpreterResult.Code.SUCCESS, ret.code());
assertTrue(ret.message().length() > 0);
}
}

View file

@ -380,7 +380,7 @@
<profile>
<id>scala-2.10</id>
<activation>
<property><name>!scala-2.11</name></property>
<activeByDefault>true</activeByDefault>
</activation>
<properties>
<extra.source.dir>src/main/scala-2.10</extra.source.dir>
@ -390,9 +390,6 @@
<profile>
<id>scala-2.11</id>
<activation>
<property><name>scala-2.11</name></property>
</activation>
<properties>
<extra.source.dir>src/main/scala-2.11</extra.source.dir>
<extra.testsource.dir>src/test/scala/scala-2.11</extra.testsource.dir>

View file

@ -31,7 +31,6 @@
<packaging>jar</packaging>
<version>0.7.0-SNAPSHOT</version>
<name>Zeppelin: Scalding interpreter</name>
<url>http://zeppelin.apache.org</url>
<properties>
<hadoop.version>2.6.0</hadoop.version>

View file

@ -0,0 +1,54 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
FROM centos:centos6
MAINTAINER hsshim@nflabs.com
ENV SPARK_PROFILE 1.6
ENV SPARK_VERSION 1.6.2
ENV HADOOP_PROFILE 2.3
ENV SPARK_HOME /usr/local/spark
# Update the image with the latest packages
RUN yum update -y; yum clean all
# Get utils
RUN yum install -y \
wget \
tar \
curl \
&& \
yum clean all
# Remove old jdk
RUN yum remove java; yum remove jdk
# install jdk7
RUN yum install -y java-1.7.0-openjdk-devel
ENV JAVA_HOME /usr/lib/jvm/java
ENV PATH $PATH:$JAVA_HOME/bin
# install spark
RUN curl -s http://apache.mirror.cdnetworks.com/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop$HADOOP_PROFILE.tgz | tar -xz -C /usr/local/
RUN cd /usr/local && ln -s spark-$SPARK_VERSION-bin-hadoop$HADOOP_PROFILE spark
# update boot script
COPY entrypoint.sh /etc/entrypoint.sh
RUN chown root.root /etc/entrypoint.sh
RUN chmod 700 /etc/entrypoint.sh
#spark
EXPOSE 8080 7077 8888 8081
ENTRYPOINT ["/etc/entrypoint.sh"]

View file

@ -0,0 +1,31 @@
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
export SPARK_MASTER_PORT=7077
# run spark
cd /usr/local/spark/sbin
./start-master.sh
./start-slave.sh spark://`hostname`:$SPARK_MASTER_PORT
CMD=${1:-"exit 0"}
if [[ "$CMD" == "-d" ]];
then
service sshd stop
/usr/sbin/sshd -D -d
else
/bin/bash -c "$*"
fi

View file

@ -31,7 +31,6 @@
<packaging>jar</packaging>
<version>0.7.0-SNAPSHOT</version>
<name>Zeppelin: Shell interpreter</name>
<url>http://zeppelin.apache.org</url>
<dependencies>
<dependency>

View file

@ -24,6 +24,7 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.commons.exec.CommandLine;
import org.apache.commons.exec.DefaultExecutor;
@ -50,7 +51,7 @@ public class ShellInterpreter extends Interpreter {
private static final String TIMEOUT_PROPERTY = "shell.command.timeout.millisecs";
private final boolean isWindows = System.getProperty("os.name").startsWith("Windows");
private final String shell = isWindows ? "cmd /c" : "bash -c";
private Map<String, DefaultExecutor> executors;
ConcurrentHashMap<String, DefaultExecutor> executors;
public ShellInterpreter(Properties property) {
super(property);
@ -59,7 +60,7 @@ public class ShellInterpreter extends Interpreter {
@Override
public void open() {
LOGGER.info("Command timeout property: {}", getProperty(TIMEOUT_PROPERTY));
executors = new HashMap<String, DefaultExecutor>();
executors = new ConcurrentHashMap<String, DefaultExecutor>();
if (!StringUtils.isAnyEmpty(getProperty("zeppelin.shell.auth.type"))) {
ShellSecurityImpl.createSecureConfiguration(getProperty(), shell);
}
@ -73,7 +74,6 @@ public class ShellInterpreter extends Interpreter {
public InterpreterResult interpret(String cmd, InterpreterContext contextInterpreter) {
LOGGER.debug("Run shell command '" + cmd + "'");
OutputStream outStream = new ByteArrayOutputStream();
OutputStream errStream = new ByteArrayOutputStream();
CommandLine cmdLine = CommandLine.parse(shell);
// the Windows CMD shell doesn't handle multiline statements,
@ -86,7 +86,7 @@ public class ShellInterpreter extends Interpreter {
try {
DefaultExecutor executor = new DefaultExecutor();
executor.setStreamHandler(new PumpStreamHandler(outStream, errStream));
executor.setStreamHandler(new PumpStreamHandler(outStream, outStream));
executor.setWatchdog(new ExecuteWatchdog(Long.valueOf(getProperty(TIMEOUT_PROPERTY))));
executors.put(contextInterpreter.getParagraphId(), executor);
int exitVal = executor.execute(cmdLine);
@ -97,7 +97,7 @@ public class ShellInterpreter extends Interpreter {
int exitValue = e.getExitValue();
LOGGER.error("Can not run " + cmd, e);
Code code = Code.ERROR;
String message = errStream.toString();
String message = outStream.toString();
if (exitValue == 143) {
code = Code.INCOMPLETE;
message += "Paragraph received a SIGTERM.\n";
@ -109,16 +109,16 @@ public class ShellInterpreter extends Interpreter {
} catch (IOException e) {
LOGGER.error("Can not run " + cmd, e);
return new InterpreterResult(Code.ERROR, e.getMessage());
} finally {
executors.remove(contextInterpreter.getParagraphId());
}
}
@Override
public void cancel(InterpreterContext context) {
for (String paragraphId : executors.keySet()) {
if (paragraphId.equals(context.getParagraphId())) {
DefaultExecutor executor = executors.get(paragraphId);
executor.getWatchdog().destroyProcess();
}
DefaultExecutor executor = executors.remove(context.getParagraphId());
if (executor != null) {
executor.getWatchdog().destroyProcess();
}
}

View file

@ -1,59 +1,78 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.shell;
import static org.junit.Assert.assertEquals;
import java.util.Properties;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.InterpreterResult.Code;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
public class ShellInterpreterTest {
private ShellInterpreter shell;
@Before
public void setUp() throws Exception {
Properties p = new Properties();
p.setProperty("shell.command.timeout.millisecs", "60000");
shell = new ShellInterpreter(p);
}
@After
public void tearDown() throws Exception {
}
@Test
public void test() {
shell.open();
InterpreterContext context = new InterpreterContext("", "1", "", "", null, null, null, null, null, null, null);
InterpreterResult result = new InterpreterResult(Code.ERROR);
if (System.getProperty("os.name").startsWith("Windows")) {
result = shell.interpret("dir", context);
} else {
result = shell.interpret("ls", context);
}
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.shell;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.util.Properties;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.InterpreterResult.Code;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
public class ShellInterpreterTest {
private ShellInterpreter shell;
@Before
public void setUp() throws Exception {
Properties p = new Properties();
p.setProperty("shell.command.timeout.millisecs", "60000");
shell = new ShellInterpreter(p);
}
@After
public void tearDown() throws Exception {
}
@Test
public void test() {
shell.open();
InterpreterContext context = new InterpreterContext("", "1", "", "", null, null, null, null, null, null, null);
InterpreterResult result = new InterpreterResult(Code.ERROR);
if (System.getProperty("os.name").startsWith("Windows")) {
result = shell.interpret("dir", context);
} else {
result = shell.interpret("ls", context);
}
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertTrue(shell.executors.isEmpty());
// it should be fine to cancel a statement that has been completed.
shell.cancel(context);
assertTrue(shell.executors.isEmpty());
}
@Test
public void testInvalidCommand(){
shell.open();
InterpreterContext context = new InterpreterContext("","1","","",null,null,null,null,null,null,null);
InterpreterResult result = new InterpreterResult(Code.ERROR);
if (System.getProperty("os.name").startsWith("Windows")) {
result = shell.interpret("invalid_command\ndir",context);
} else {
result = shell.interpret("invalid_command\nls",context);
}
assertEquals(InterpreterResult.Code.SUCCESS,result.code());
assertTrue(result.message().contains("invalid_command"));
}
}

View file

@ -33,7 +33,6 @@
<version>0.7.0-SNAPSHOT</version>
<name>Zeppelin: Spark dependencies</name>
<description>Zeppelin spark support</description>
<url>http://zeppelin.apache.org</url>
<properties>
<!-- library version defined in this section brought from spark 1.4.1 and it's dependency.

View file

@ -32,13 +32,12 @@
<version>0.7.0-SNAPSHOT</version>
<name>Zeppelin: Spark</name>
<description>Zeppelin spark support</description>
<url>http://zeppelin.apache.org</url>
<properties>
<jsoup.version>1.8.2</jsoup.version>
<mockito.version>1.10.19</mockito.version>
<powermock.version>1.6.4</powermock.version>
<spark.version>1.6.2</spark.version>
<spark.version>2.0.0</spark.version>
</properties>
<dependencies>
@ -435,11 +434,517 @@
</executions>
</plugin>
<!-- exclude sparkr by default. sparkr is enabled by profile 'sparkr' -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<excludes>
<exclude>**/SparkRInterpreter.java</exclude>
</excludes>
<testExcludes>
<testExclude>**/SparkRInterpreterTest.java</testExclude>
<testExclude>**/ZeppelinRTest.java</testExclude>
</testExcludes>
</configuration>
</plugin>
<plugin>
<groupId>org.scala-tools</groupId>
<artifactId>maven-scala-plugin</artifactId>
<configuration>
<excludes>
<exclude>**/ZeppelinR.scala</exclude>
<exclude>**/SparkRBackend.scala</exclude>
</excludes>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<excludes>
<exclude>**/SparkRInterpreterTest.java</exclude>
</excludes>
</configuration>
</plugin>
</plugins>
</build>
<profiles>
<!-- to deactivate 'exclude-sparkr' automatically when 'spark' is activated -->
<profile>
<id>spark-1.1</id>
<dependencies>
</dependencies>
<properties>
<spark.version>1.1.1</spark.version>
<akka.version>2.2.3-shaded-protobuf</akka.version>
</properties>
</profile>
<profile>
<id>cassandra-spark-1.1</id>
<dependencies>
<dependency>
<groupId>com.datastax.spark</groupId>
<artifactId>spark-cassandra-connector_${scala.binary.version}</artifactId>
<version>1.1.1</version>
<exclusions>
<exclusion>
<groupId>org.joda</groupId>
<artifactId>joda-convert</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
<properties>
<spark.version>1.1.1</spark.version>
<akka.version>2.2.3-shaded-protobuf</akka.version>
</properties>
</profile>
<profile>
<id>spark-1.2</id>
<dependencies>
</dependencies>
<properties>
<spark.version>1.2.1</spark.version>
</properties>
</profile>
<profile>
<id>cassandra-spark-1.2</id>
<properties>
<spark.version>1.2.1</spark.version>
</properties>
<dependencies>
<dependency>
<groupId>com.datastax.spark</groupId>
<artifactId>spark-cassandra-connector_${scala.binary.version}</artifactId>
<version>1.2.1</version>
<exclusions>
<exclusion>
<groupId>org.joda</groupId>
<artifactId>joda-convert</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
</profile>
<profile>
<id>spark-1.3</id>
<properties>
<spark.version>1.3.1</spark.version>
</properties>
<dependencies>
</dependencies>
</profile>
<profile>
<id>cassandra-spark-1.3</id>
<properties>
<spark.version>1.3.0</spark.version>
</properties>
<dependencies>
<dependency>
<groupId>com.datastax.spark</groupId>
<artifactId>spark-cassandra-connector_${scala.binary.version}</artifactId>
<version>1.3.1</version>
<exclusions>
<exclusion>
<groupId>org.joda</groupId>
<artifactId>joda-convert</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
</profile>
<profile>
<id>spark-1.4</id>
<properties>
<spark.version>1.4.1</spark.version>
</properties>
<dependencies>
</dependencies>
</profile>
<profile>
<id>cassandra-spark-1.4</id>
<properties>
<spark.version>1.4.1</spark.version>
</properties>
<dependencies>
<dependency>
<groupId>com.datastax.spark</groupId>
<artifactId>spark-cassandra-connector_${scala.binary.version}</artifactId>
<version>1.4.0</version>
<exclusions>
<exclusion>
<groupId>org.joda</groupId>
<artifactId>joda-convert</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
</profile>
<profile>
<id>spark-1.5</id>
<properties>
<spark.version>1.5.2</spark.version>
<akka.group>com.typesafe.akka</akka.group>
<akka.version>2.3.11</akka.version>
<protobuf.version>2.5.0</protobuf.version>
</properties>
<dependencies>
</dependencies>
</profile>
<profile>
<id>cassandra-spark-1.5</id>
<properties>
<spark.version>1.5.1</spark.version>
<akka.group>com.typesafe.akka</akka.group>
<akka.version>2.3.11</akka.version>
<protobuf.version>2.5.0</protobuf.version>
<guava.version>16.0.1</guava.version>
</properties>
<dependencies>
<dependency>
<groupId>com.datastax.spark</groupId>
<artifactId>spark-cassandra-connector_${scala.binary.version}</artifactId>
<version>1.5.0</version>
<exclusions>
<exclusion>
<groupId>org.joda</groupId>
<artifactId>joda-convert</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
</profile>
<profile>
<id>spark-1.6</id>
<properties>
<spark.version>1.6.1</spark.version>
<py4j.version>0.9</py4j.version>
<akka.group>com.typesafe.akka</akka.group>
<akka.version>2.3.11</akka.version>
<protobuf.version>2.5.0</protobuf.version>
</properties>
</profile>
<profile>
<id>spark-2.0</id>
<activation>
<activeByDefault>true</activeByDefault>
</activation>
<properties>
<spark.version>2.0.0</spark.version>
<protobuf.version>2.5.0</protobuf.version>
<py4j.version>0.10.1</py4j.version>
<scala.version>2.11.8</scala.version>
</properties>
</profile>
<profile>
<id>hadoop-0.23</id>
<!-- SPARK-1121: Adds an explicit dependency on Avro to work around a
Hadoop 0.23.X issue -->
<dependencies>
<dependency>
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
</dependency>
</dependencies>
<properties>
<hadoop.version>0.23.10</hadoop.version>
</properties>
</profile>
<profile>
<id>hadoop-1</id>
<properties>
<hadoop.version>1.0.4</hadoop.version>
<avro.mapred.classifier>hadoop1</avro.mapred.classifier>
<codehaus.jackson.version>1.8.8</codehaus.jackson.version>
<akka.group>org.spark-project.akka</akka.group>
</properties>
</profile>
<profile>
<id>hadoop-2.2</id>
<properties>
<hadoop.version>2.2.0</hadoop.version>
<protobuf.version>2.5.0</protobuf.version>
<avro.mapred.classifier>hadoop2</avro.mapred.classifier>
</properties>
</profile>
<profile>
<id>hadoop-2.3</id>
<properties>
<hadoop.version>2.3.0</hadoop.version>
<protobuf.version>2.5.0</protobuf.version>
<jets3t.version>0.9.3</jets3t.version>
<avro.mapred.classifier>hadoop2</avro.mapred.classifier>
</properties>
</profile>
<profile>
<id>hadoop-2.4</id>
<properties>
<hadoop.version>2.4.0</hadoop.version>
<protobuf.version>2.5.0</protobuf.version>
<jets3t.version>0.9.3</jets3t.version>
<avro.mapred.classifier>hadoop2</avro.mapred.classifier>
</properties>
</profile>
<profile>
<id>hadoop-2.6</id>
<properties>
<hadoop.version>2.6.0</hadoop.version>
<protobuf.version>2.5.0</protobuf.version>
<jets3t.version>0.9.3</jets3t.version>
<avro.mapred.classifier>hadoop2</avro.mapred.classifier>
</properties>
</profile>
<profile>
<id>hadoop-2.7</id>
<properties>
<hadoop.version>2.7.2</hadoop.version>
<protobuf.version>2.5.0</protobuf.version>
<jets3t.version>0.9.0</jets3t.version>
<avro.mapred.classifier>hadoop2</avro.mapred.classifier>
</properties>
</profile>
<profile>
<id>mapr3</id>
<activation>
<activeByDefault>false</activeByDefault>
</activation>
<properties>
<hadoop.version>1.0.3-mapr-3.0.3</hadoop.version>
<yarn.version>2.3.0-mapr-4.0.0-FCS</yarn.version>
<jets3t.version>0.7.1</jets3t.version>
</properties>
<repositories>
<repository>
<id>mapr-releases</id>
<url>http://repository.mapr.com/maven/</url>
<snapshots>
<enabled>false</enabled>
</snapshots>
<releases>
<enabled>true</enabled>
</releases>
</repository>
</repositories>
</profile>
<profile>
<id>mapr40</id>
<activation>
<activeByDefault>false</activeByDefault>
</activation>
<properties>
<hadoop.version>2.4.1-mapr-1503</hadoop.version>
<yarn.version>2.4.1-mapr-1503</yarn.version>
<jets3t.version>0.9.3</jets3t.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.curator</groupId>
<artifactId>curator-recipes</artifactId>
<version>2.4.0</version>
<exclusions>
<exclusion>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
<version>3.4.5-mapr-1503</version>
</dependency>
</dependencies>
<repositories>
<repository>
<id>mapr-releases</id>
<url>http://repository.mapr.com/maven/</url>
<snapshots>
<enabled>false</enabled>
</snapshots>
<releases>
<enabled>true</enabled>
</releases>
</repository>
</repositories>
</profile>
<profile>
<id>mapr41</id>
<activation>
<activeByDefault>false</activeByDefault>
</activation>
<properties>
<hadoop.version>2.5.1-mapr-1503</hadoop.version>
<yarn.version>2.5.1-mapr-1503</yarn.version>
<jets3t.version>0.7.1</jets3t.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.curator</groupId>
<artifactId>curator-recipes</artifactId>
<version>2.4.0</version>
<exclusions>
<exclusion>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
<version>3.4.5-mapr-1503</version>
</dependency>
</dependencies>
<repositories>
<repository>
<id>mapr-releases</id>
<url>http://repository.mapr.com/maven/</url>
<snapshots>
<enabled>false</enabled>
</snapshots>
<releases>
<enabled>true</enabled>
</releases>
</repository>
</repositories>
</profile>
<profile>
<id>mapr50</id>
<activation>
<activeByDefault>false</activeByDefault>
</activation>
<properties>
<hadoop.version>2.7.0-mapr-1506</hadoop.version>
<yarn.version>2.7.0-mapr-1506</yarn.version>
<jets3t.version>0.9.3</jets3t.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.curator</groupId>
<artifactId>curator-recipes</artifactId>
<version>2.4.0</version>
<exclusions>
<exclusion>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
<version>3.4.5-mapr-1503</version>
</dependency>
</dependencies>
<repositories>
<repository>
<id>mapr-releases</id>
<url>http://repository.mapr.com/maven/</url>
<snapshots>
<enabled>false</enabled>
</snapshots>
<releases>
<enabled>true</enabled>
</releases>
</repository>
</repositories>
</profile>
<profile>
<id>mapr51</id>
<activation>
<activeByDefault>false</activeByDefault>
</activation>
<properties>
<hadoop.version>2.7.0-mapr-1602</hadoop.version>
<yarn.version>2.7.0-mapr-1602</yarn.version>
<jets3t.version>0.9.3</jets3t.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.curator</groupId>
<artifactId>curator-recipes</artifactId>
<version>2.4.0</version>
<exclusions>
<exclusion>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
<version>3.4.5-mapr-1503</version>
</dependency>
</dependencies>
<repositories>
<repository>
<id>mapr-releases</id>
<url>http://repository.mapr.com/maven/</url>
<snapshots>
<enabled>false</enabled>
</snapshots>
<releases>
<enabled>true</enabled>
</releases>
</repository>
</repositories>
</profile>
<profile>
<id>yarn</id>
<properties>
<yarn.version>${hadoop.version}</yarn.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-yarn_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-api</artifactId>
<version>${yarn.version}</version>
</dependency>
</dependencies>
</profile>
<!-- include sparkr in the build -->
<profile>
<id>sparkr</id>
<build>
@ -454,36 +959,21 @@
<directory>src/main/sparkr-resources</directory>
</resource>
</resources>
</build>
</profile>
<profile>
<id>exclude-sparkr</id>
<activation>
<activeByDefault>true</activeByDefault>
</activation>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<excludes>
<exclude>**/SparkRInterpreter.java</exclude>
</excludes>
<testExcludes>
<testExclude>**/SparkRInterpreterTest.java</testExclude>
<testExclude>**/ZeppelinRTest.java</testExclude>
</testExcludes>
<excludes combine.self="override"></excludes>
<testExcludes combine.self="override"></testExcludes>
</configuration>
</plugin>
<plugin>
<groupId>org.scala-tools</groupId>
<artifactId>maven-scala-plugin</artifactId>
<configuration>
<excludes>
<exclude>**/ZeppelinR.scala</exclude>
<exclude>**/SparkRBackend.scala</exclude>
<excludes combine.self="override">
</excludes>
</configuration>
</plugin>
@ -491,8 +981,7 @@
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<excludes>
<exclude>**/SparkRInterpreterTest.java</exclude>
<excludes combine.self="override">
</excludes>
</configuration>
</plugin>

View file

@ -0,0 +1,116 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.spark;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;
/**
* Minor modification of LogOutputStream of apache commons exec.
* LogOutputStream of apache commons exec has one issue that method flush doesn't throw IOException,
* so that SparkOutputStream can not extend it correctly.
*/
public abstract class LogOutputStream extends OutputStream {
private static final int INTIAL_SIZE = 132;
private static final int CR = 13;
private static final int LF = 10;
private final ByteArrayOutputStream buffer;
private boolean skip;
private final int level;
public LogOutputStream() {
this(999);
}
public LogOutputStream(int level) {
this.buffer = new ByteArrayOutputStream(132);
this.skip = false;
this.level = level;
}
@Override
public void write(int cc) throws IOException {
byte c = (byte) cc;
if (c != 10 && c != 13) {
this.buffer.write(cc);
} else if (!this.skip) {
this.processBuffer();
}
this.skip = c == 13;
}
@Override
public void flush() throws IOException {
if (this.buffer.size() > 0) {
this.processBuffer();
}
}
@Override
public void close() throws IOException {
if (this.buffer.size() > 0) {
this.processBuffer();
}
super.close();
}
public int getMessageLevel() {
return this.level;
}
@Override
public void write(byte[] b, int off, int len) throws IOException {
int offset = off;
int blockStartOffset = off;
for (int remaining = len; remaining > 0; blockStartOffset = offset) {
while (remaining > 0 && b[offset] != 10 && b[offset] != 13) {
++offset;
--remaining;
}
int blockLength = offset - blockStartOffset;
if (blockLength > 0) {
this.buffer.write(b, blockStartOffset, blockLength);
}
while (remaining > 0 && (b[offset] == 10 || b[offset] == 13)) {
this.write(b[offset]);
++offset;
--remaining;
}
}
}
protected void processBuffer() {
this.processLine(this.buffer.toString());
this.buffer.reset();
}
protected void processLine(String line) {
this.processLine(line, this.level);
}
protected abstract void processLine(String var1, int var2);
}

View file

@ -179,7 +179,7 @@ public class PySparkInterpreter extends Interpreter implements ExecuteResultHand
cmd.addArgument(Integer.toString(port), false);
cmd.addArgument(Integer.toString(getSparkInterpreter().getSparkVersion().toNumber()), false);
executor = new DefaultExecutor();
outputStream = new SparkOutputStream();
outputStream = new SparkOutputStream(logger);
PipedOutputStream ps = new PipedOutputStream();
in = null;
try {

View file

@ -18,6 +18,7 @@
package org.apache.zeppelin.spark;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.lang.reflect.Constructor;
import java.lang.reflect.Field;
@ -32,6 +33,7 @@ import java.util.concurrent.atomic.AtomicInteger;
import com.google.common.base.Joiner;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.spark.SparkConf;
import org.apache.spark.SparkContext;
import org.apache.spark.SparkEnv;
@ -65,10 +67,8 @@ import scala.Enumeration.Value;
import scala.collection.Iterator;
import scala.collection.JavaConversions;
import scala.collection.JavaConverters;
import scala.collection.convert.WrapAsJava;
import scala.collection.Seq;
import scala.collection.convert.WrapAsJava$;
import scala.collection.convert.WrapAsScala;
import scala.collection.mutable.HashMap;
import scala.collection.mutable.HashSet;
import scala.reflect.io.AbstractFile;
@ -112,7 +112,7 @@ public class SparkInterpreter extends Interpreter {
/**
* completer - org.apache.spark.repl.SparkJLineCompletion (scala 2.10)
*/
private Object completer;
private Object completer = null;
private Map<String, Object> binder;
private SparkVersion sparkVersion;
@ -122,7 +122,7 @@ public class SparkInterpreter extends Interpreter {
public SparkInterpreter(Properties property) {
super(property);
out = new SparkOutputStream();
out = new SparkOutputStream(logger);
}
public SparkInterpreter(Properties property, SparkContext sc) {
@ -236,10 +236,7 @@ public class SparkInterpreter extends Interpreter {
*/
private SQLContext getSQLContext_2() {
if (sqlc == null) {
sqlc = (SQLContext) Utils.invokeMethod(sparkSession, "wrapped");
if (sqlc == null) {
sqlc = (SQLContext) Utils.invokeMethod(sparkSession, "sqlContext");
}
sqlc = (SQLContext) Utils.invokeMethod(sparkSession, "sqlContext");
}
return sqlc;
}
@ -327,6 +324,7 @@ public class SparkInterpreter extends Interpreter {
}
}
setupConfForPySpark(conf);
Class SparkSession = Utils.findClass("org.apache.spark.sql.SparkSession");
Object builder = Utils.invokeStaticMethod(SparkSession, "builder");
Utils.invokeMethod(builder, "config", new Class[]{ SparkConf.class }, new Object[]{ conf });
@ -440,8 +438,12 @@ public class SparkInterpreter extends Interpreter {
conf.set(key, val);
}
}
setupConfForPySpark(conf);
SparkContext sparkContext = new SparkContext(conf);
return sparkContext;
}
//TODO(jongyoul): Move these codes into PySparkInterpreter.java
private void setupConfForPySpark(SparkConf conf) {
String pysparkBasePath = getSystemDefault("SPARK_HOME", null, null);
File pysparkPath;
if (null == pysparkBasePath) {
@ -454,7 +456,8 @@ public class SparkInterpreter extends Interpreter {
}
//Only one of py4j-0.9-src.zip and py4j-0.8.2.1-src.zip should exist
String[] pythonLibs = new String[]{"pyspark.zip", "py4j-0.9-src.zip", "py4j-0.8.2.1-src.zip"};
String[] pythonLibs = new String[]{"pyspark.zip", "py4j-0.9-src.zip", "py4j-0.8.2.1-src.zip",
"py4j-0.10.1-src.zip"};
ArrayList<String> pythonLibUris = new ArrayList<>();
for (String lib : pythonLibs) {
File libFile = new File(pysparkPath, lib);
@ -484,9 +487,6 @@ public class SparkInterpreter extends Interpreter {
if (getProperty("master").equals("yarn-client")) {
conf.set("spark.yarn.isPython", "true");
}
SparkContext sparkContext = new SparkContext(conf);
return sparkContext;
}
static final String toString(Object o) {
@ -524,6 +524,21 @@ public class SparkInterpreter extends Interpreter {
@Override
public void open() {
// set properties and do login before creating any spark stuff for secured cluster
if (getProperty("master").equals("yarn-client")) {
System.setProperty("SPARK_YARN_MODE", "true");
}
if (getProperty().contains("spark.yarn.keytab") &&
getProperty().contains("spark.yarn.principal")) {
try {
String keytab = getProperty().getProperty("spark.yarn.keytab");
String principal = getProperty().getProperty("spark.yarn.principal");
UserGroupInformation.loginUserFromKeytab(principal, keytab);
} catch (IOException e) {
throw new RuntimeException("Can not pass kerberos authentication", e);
}
}
conf = new SparkConf();
URL[] urls = getClassloaderUrls();
@ -703,11 +718,25 @@ public class SparkInterpreter extends Interpreter {
logger.error(e.getMessage(), e);
}
}
}
if (Utils.findClass("org.apache.spark.repl.SparkJLineCompletion", true) != null) {
completer = Utils.instantiateClass(
"org.apache.spark.repl.SparkJLineCompletion",
new Class[]{Utils.findClass("org.apache.spark.repl.SparkIMain")},
new Object[]{intp});
} else if (Utils.findClass(
"scala.tools.nsc.interpreter.PresentationCompilerCompleter", true) != null) {
completer = Utils.instantiateClass(
"scala.tools.nsc.interpreter.PresentationCompilerCompleter",
new Class[]{ IMain.class },
new Object[]{ intp });
} else if (Utils.findClass(
"scala.tools.nsc.interpreter.JLineCompletion", true) != null) {
completer = Utils.instantiateClass(
"scala.tools.nsc.interpreter.JLineCompletion",
new Class[]{ IMain.class },
new Object[]{ intp });
}
if (Utils.isSpark2()) {
@ -886,6 +915,11 @@ public class SparkInterpreter extends Interpreter {
@Override
public List<InterpreterCompletion> completion(String buf, int cursor) {
if (completer == null) {
logger.warn("Can't find completer");
return new LinkedList<InterpreterCompletion>();
}
if (buf.length() < cursor) {
cursor = buf.length();
}
@ -894,22 +928,18 @@ public class SparkInterpreter extends Interpreter {
completionText = "";
cursor = completionText.length();
}
if (Utils.isScala2_10()) {
ScalaCompleter c = (ScalaCompleter) Utils.invokeMethod(completer, "completer");
Candidates ret = c.complete(completionText, cursor);
List<String> candidates = WrapAsJava$.MODULE$.seqAsJavaList(ret.candidates());
List<InterpreterCompletion> completions = new LinkedList<InterpreterCompletion>();
ScalaCompleter c = (ScalaCompleter) Utils.invokeMethod(completer, "completer");
Candidates ret = c.complete(completionText, cursor);
for (String candidate : candidates) {
completions.add(new InterpreterCompletion(candidate, candidate));
}
List<String> candidates = WrapAsJava$.MODULE$.seqAsJavaList(ret.candidates());
List<InterpreterCompletion> completions = new LinkedList<InterpreterCompletion>();
return completions;
} else {
return new LinkedList<InterpreterCompletion>();
for (String candidate : candidates) {
completions.add(new InterpreterCompletion(candidate, candidate));
}
return completions;
}
private String getCompletionTargetString(String text, int cursor) {

View file

@ -17,17 +17,20 @@
package org.apache.zeppelin.spark;
import org.apache.zeppelin.interpreter.InterpreterOutput;
import org.slf4j.Logger;
import java.io.IOException;
import java.io.OutputStream;
/**
* InterpreterOutput can be attached / detached.
*/
public class SparkOutputStream extends OutputStream {
public class SparkOutputStream extends LogOutputStream {
public static Logger logger;
InterpreterOutput interpreterOutput;
public SparkOutputStream() {
public SparkOutputStream(Logger logger) {
this.logger = logger;
}
public InterpreterOutput getInterpreterOutput() {
@ -40,6 +43,7 @@ public class SparkOutputStream extends OutputStream {
@Override
public void write(int b) throws IOException {
super.write(b);
if (interpreterOutput != null) {
interpreterOutput.write(b);
}
@ -47,6 +51,7 @@ public class SparkOutputStream extends OutputStream {
@Override
public void write(byte [] b) throws IOException {
super.write(b);
if (interpreterOutput != null) {
interpreterOutput.write(b);
}
@ -54,13 +59,20 @@ public class SparkOutputStream extends OutputStream {
@Override
public void write(byte [] b, int offset, int len) throws IOException {
super.write(b, offset, len);
if (interpreterOutput != null) {
interpreterOutput.write(b, offset, len);
}
}
@Override
protected void processLine(String s, int i) {
logger.debug("Interpreter output:" + s);
}
@Override
public void close() throws IOException {
super.close();
if (interpreterOutput != null) {
interpreterOutput.close();
}
@ -68,6 +80,7 @@ public class SparkOutputStream extends OutputStream {
@Override
public void flush() throws IOException {
super.flush();
if (interpreterOutput != null) {
interpreterOutput.flush();
}

View file

@ -21,6 +21,7 @@ import static org.apache.zeppelin.spark.ZeppelinRDisplay.render;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.spark.SparkContext;
import org.apache.spark.SparkRBackend;
import org.apache.zeppelin.interpreter.*;
import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion;
@ -70,11 +71,16 @@ public class SparkRInterpreter extends Interpreter {
int port = SparkRBackend.port();
SparkInterpreter sparkInterpreter = getSparkInterpreter();
ZeppelinRContext.setSparkContext(sparkInterpreter.getSparkContext());
SparkContext sc = sparkInterpreter.getSparkContext();
SparkVersion sparkVersion = new SparkVersion(sc.version());
ZeppelinRContext.setSparkContext(sc);
if (Utils.isSpark2()) {
ZeppelinRContext.setSparkSession(sparkInterpreter.getSparkSession());
}
ZeppelinRContext.setSqlContext(sparkInterpreter.getSQLContext());
ZeppelinRContext.setZepplinContext(sparkInterpreter.getZeppelinContext());
zeppelinR = new ZeppelinR(rCmdPath, sparkRLibPath, port);
zeppelinR = new ZeppelinR(rCmdPath, sparkRLibPath, port, sparkVersion);
try {
zeppelinR.open();
} catch (IOException e) {

View file

@ -56,10 +56,16 @@ class Utils {
}
static Class findClass(String name) {
return findClass(name, false);
}
static Class findClass(String name, boolean silence) {
try {
return Utils.class.forName(name);
} catch (ClassNotFoundException e) {
logger.error(e.getMessage(), e);
if (!silence) {
logger.error(e.getMessage(), e);
}
return null;
}
}

View file

@ -36,6 +36,7 @@ import java.util.Map;
public class ZeppelinR implements ExecuteResultHandler {
Logger logger = LoggerFactory.getLogger(ZeppelinR.class);
private final String rCmdPath;
private final SparkVersion sparkVersion;
private DefaultExecutor executor;
private SparkOutputStream outputStream;
private PipedOutputStream input;
@ -107,9 +108,11 @@ public class ZeppelinR implements ExecuteResultHandler {
* @param rCmdPath R repl commandline path
* @param libPath sparkr library path
*/
public ZeppelinR(String rCmdPath, String libPath, int sparkRBackendPort) {
public ZeppelinR(String rCmdPath, String libPath, int sparkRBackendPort,
SparkVersion sparkVersion) {
this.rCmdPath = rCmdPath;
this.libPath = libPath;
this.sparkVersion = sparkVersion;
this.port = sparkRBackendPort;
try {
File scriptFile = File.createTempFile("zeppelin_sparkr-", ".R");
@ -137,9 +140,10 @@ public class ZeppelinR implements ExecuteResultHandler {
cmd.addArgument(Integer.toString(hashCode()));
cmd.addArgument(Integer.toString(port));
cmd.addArgument(libPath);
cmd.addArgument(Integer.toString(sparkVersion.toNumber()));
executor = new DefaultExecutor();
outputStream = new SparkOutputStream();
outputStream = new SparkOutputStream(logger);
input = new PipedOutputStream();
PipedInputStream in = new PipedInputStream(input);

View file

@ -27,6 +27,7 @@ public class ZeppelinRContext {
private static SparkContext sparkContext;
private static SQLContext sqlContext;
private static ZeppelinContext zeppelinContext;
private static Object sparkSession;
public static void setSparkContext(SparkContext sparkContext) {
ZeppelinRContext.sparkContext = sparkContext;
@ -40,6 +41,10 @@ public class ZeppelinRContext {
ZeppelinRContext.sqlContext = sqlContext;
}
public static void setSparkSession(Object sparkSession) {
ZeppelinRContext.sparkSession = sparkSession;
}
public static SparkContext getSparkContext() {
return sparkContext;
}
@ -52,4 +57,7 @@ public class ZeppelinRContext {
return zeppelinContext;
}
public static Object getSparkSession() {
return sparkSession;
}
}

View file

@ -21,6 +21,7 @@ args <- commandArgs(trailingOnly = TRUE)
hashCode <- as.integer(args[1])
port <- as.integer(args[2])
libPath <- args[3]
version <- as.integer(args[4])
rm(args)
print(paste("Port ", toString(port)))
@ -41,6 +42,10 @@ assign(".scStartTime", as.integer(Sys.time()), envir = SparkR:::.sparkREnv)
# setup spark env
assign(".sc", SparkR:::callJStatic("org.apache.zeppelin.spark.ZeppelinRContext", "getSparkContext"), envir = SparkR:::.sparkREnv)
assign("sc", get(".sc", envir = SparkR:::.sparkREnv), envir=.GlobalEnv)
if (version >= 200) {
assign(".sparkRsession", SparkR:::callJStatic("org.apache.zeppelin.spark.ZeppelinRContext", "getSparkSession"), envir = SparkR:::.sparkREnv)
assign("spark", get(".sparkRsession", envir = SparkR:::.sparkREnv), envir = .GlobalEnv)
}
assign(".sqlc", SparkR:::callJStatic("org.apache.zeppelin.spark.ZeppelinRContext", "getSqlContext"), envir = SparkR:::.sparkREnv)
assign("sqlContext", get(".sqlc", envir = SparkR:::.sparkREnv), envir = .GlobalEnv)
assign(".zeppelinContext", SparkR:::callJStatic("org.apache.zeppelin.spark.ZeppelinRContext", "getZeppelinContext"), envir = .GlobalEnv)

View file

@ -46,7 +46,7 @@
"envName": "ZEPPELIN_SPARK_MAXRESULT",
"propertyName": "zeppelin.spark.maxResult",
"defaultValue": "1000",
"description": "Max number of SparkSQL result to display."
"description": "Max number of Spark SQL result to display."
},
"master": {
"envName": "MASTER",
@ -77,7 +77,7 @@
"envName": "ZEPPELIN_SPARK_MAXRESULT",
"propertyName": "zeppelin.spark.maxResult",
"defaultValue": "1000",
"description": "Max number of SparkSQL result to display."
"description": "Max number of Spark SQL result to display."
},
"zeppelin.spark.importImplicit": {
"envName": "ZEPPELIN_SPARK_IMPORTIMPLICIT",

View file

@ -27,19 +27,20 @@ from pyspark.storagelevel import StorageLevel
from pyspark.accumulators import Accumulator, AccumulatorParam
from pyspark.broadcast import Broadcast
from pyspark.serializers import MarshalSerializer, PickleSerializer
import ast
# for back compatibility
from pyspark.sql import SQLContext, HiveContext, Row
class Logger(object):
def __init__(self):
self.out = ""
pass
def write(self, message):
intp.appendOutput(message)
def reset(self):
self.out = ""
pass
def flush(self):
pass
@ -230,7 +231,7 @@ while True :
try:
stmts = req.statements().split("\n")
jobGroup = req.jobGroup()
final_code = None
final_code = []
for s in stmts:
if s == None:
@ -241,15 +242,27 @@ while True :
if len(s_stripped) == 0 or s_stripped.startswith("#"):
continue
if final_code:
final_code += "\n" + s
else:
final_code = s
final_code.append(s)
if final_code:
compiledCode = compile(final_code, "<string>", "exec")
# use exec mode to compile the statements except the last statement,
# so that the last statement's evaluation will be printed to stdout
sc.setJobGroup(jobGroup, "Zeppelin")
eval(compiledCode)
code = compile('\n'.join(final_code), '<stdin>', 'exec', ast.PyCF_ONLY_AST, 1)
to_run_exec, to_run_single = code.body[:-1], code.body[-1:]
try:
for node in to_run_exec:
mod = ast.Module([node])
code = compile(mod, '<stdin>', 'exec')
exec(code)
for node in to_run_single:
mod = ast.Interactive([node])
code = compile(mod, '<stdin>', 'single')
exec(code)
except:
raise Execution(sys.exc_info())
intp.setStatementsFinished("", False)
except Py4JJavaError:

View file

@ -46,7 +46,7 @@
"envName": "ZEPPELIN_SPARK_MAXRESULT",
"propertyName": "zeppelin.spark.maxResult",
"defaultValue": "1000",
"description": "Max number of SparkSQL result to display."
"description": "Max number of Spark SQL result to display."
},
"master": {
"envName": "MASTER",
@ -77,7 +77,7 @@
"envName": "ZEPPELIN_SPARK_MAXRESULT",
"propertyName": "zeppelin.spark.maxResult",
"defaultValue": "1000",
"description": "Max number of SparkSQL result to display."
"description": "Max number of Spark SQL result to display."
},
"zeppelin.spark.importImplicit": {
"envName": "ZEPPELIN_SPARK_IMPORTIMPLICIT",

View file

@ -19,16 +19,16 @@ package org.apache.zeppelin.spark;
import static org.junit.Assert.*;
import java.io.BufferedReader;
import java.io.File;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Properties;
import org.apache.spark.SparkConf;
import org.apache.spark.SparkContext;
import org.apache.spark.repl.SparkILoop;
import org.apache.zeppelin.display.AngularObjectRegistry;
import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion;
import org.apache.zeppelin.resource.LocalResourcePool;
import org.apache.zeppelin.resource.WellKnownResourceName;
import org.apache.zeppelin.user.AuthenticationInfo;
@ -42,7 +42,6 @@ import org.junit.Test;
import org.junit.runners.MethodSorters;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.tools.nsc.interpreter.IMain;
@FixMethodOrder(MethodSorters.NAME_ASCENDING)
public class SparkInterpreterTest {
@ -282,4 +281,10 @@ public class SparkInterpreterTest {
assertEquals(Code.ERROR, repl2.interpret(ddl, context).code());
repl2.close();
}
@Test
public void testCompletion() {
List<InterpreterCompletion> completions = repl.completion("sc.", "sc.".length());
assertTrue(completions.size() > 0);
}
}

View file

@ -31,7 +31,6 @@
<packaging>jar</packaging>
<version>0.7.0-SNAPSHOT</version>
<name>Zeppelin: Display system apis</name>
<url>http://zeppelin.apache.org</url>
<dependencyManagement>
<dependencies>
@ -95,10 +94,6 @@
<profiles>
<profile>
<id>scala-2.11</id>
<activation>
<property><name>scala-2.11</name></property>
</activation>
<dependencies>
<dependency>
<groupId>org.scala-lang.modules</groupId>

View file

@ -75,7 +75,7 @@
<dependencies>
<dependency>
<artifactId>zeppelin-server_2.10</artifactId>
<artifactId>zeppelin-server</artifactId>
<groupId>${project.groupId}</groupId>
<version>${project.version}</version>
</dependency>
@ -114,10 +114,6 @@
<profiles>
<profile>
<id>scala-2.11</id>
<activation>
<property><name>scala-2.11</name></property>
</activation>
<dependencyManagement>
<dependencies>
<dependency>

View file

@ -4,6 +4,7 @@
(Apache 2.0) JavaEWAH v0.7.9 (https://github.com/lemire/javaewah) - https://github.com/lemire/javaewah/blob/master/LICENSE-2.0.txt
The following components are provided under Apache License.
@ -73,7 +74,7 @@ The following components are provided under Apache License.
(Apache 2.0) json-flattener (com.github.wnameless:json-flattener:0.1.6 - https://github.com/wnameless/json-flattener)
(Apache 2.0) Spatial4J (com.spatial4j:spatial4j:0.4.1 - https://github.com/spatial4j/spatial4j)
(Apache 2.0) T-Digest (com.tdunning:t-digest:3.0 - https://github.com/tdunning/t-digest)
(Apache 2.0) Netty (io.netty:netty:3.8.0.Final - http://netty.io/)
(Apache 2.0) Netty (io.netty:netty:3.10.5.Final - http://netty.io/)
(Apache 2.0) Lucene Common Analyzers (org.apache.lucene:lucene-analyzers-common:5.3.1 - http://lucene.apache.org/lucene-parent/lucene-analyzers-common)
(Apache 2.0) Lucene Memory (org.apache.lucene:lucene-backward-codecs:5.3.1 - http://lucene.apache.org/lucene-parent/lucene-backward-codecs)
(Apache 2.0) Lucene Core (org.apache.lucene:lucene-core:5.3.1 - http://lucene.apache.org/lucene-parent/lucene-core)
@ -102,7 +103,17 @@ The following components are provided under Apache License.
(Apache 2.0) Roboto Font (https://github.com/google/roboto/)
(Apache 2.0) stream (com.clearspring.analytics:stream:2.7.0) - https://github.com/addthis/stream-lib/blob/v2.7.0/LICENSE.txt
(Apache 2.0) io.dropwizard.metrics:3.1.2 - https://github.com/dropwizard/metrics/blob/v3.1.2/LICENSE
(Apache 2.0) Google BigQuery API for Java (com.google.api.services.bigquery:v2-rev265-1.21.0 - https://cloud.google.com/bigquery/)
(Apache 2.0) Google APIs Client Library for Java (com.google.api-client:1.21.0 - https://github.com/google/google-api-java-client)
(Apache 2.0) The Guava project contains several of Google's core libraries that we rely on in our Java-based projects (com.google.guava:guava-jdk5:17.0 - https://github.com/google/guava)
(Apache 2.0) Google OAuth Client Library for Java (com.google.oauth-client:google-oauth-client:1.21.0 - https://github.com/google/google-oauth-java-client)
(Apache 2.0) Google HTTP Client Library for Java (com.google.http-client:google-http-client:1.21.0 - https://github.com/google/google-http-java-client/tree/dev/google-http-client)
(Apache 2.0) Google OAuth Jetty Client Library for Java (com.google.oauth-client:google-oauth-client-jetty:1.21.0 - https://github.com/google/google-oauth-java-client/tree/dev/google-oauth-client-jetty)
(Apache 2.0) Google OAuth Client Library for Java6 (com.google.oauth-client:google-oauth-client-java6:1.21.0 - https://github.com/google/google-oauth-java-client/tree/dev/google-oauth-client-java6)
(Apache 2.0) The core jetty server artifact (org.mortbay.jetty:jetty:6.1.26 - http://javadox.com/org.mortbay.jetty/jetty/6.1.26/overview-tree.html)
(Apache 2.0) Utility classes for Jetty (org.mortbay.jetty:jetty-util:6.1.26 - http://javadox.com/org.mortbay.jetty/jetty/6.1.26/overview-tree.html)
(Apache 2.0) Servlet API (org.mortbay.jetty:servlet-api:2.5-20081211 - https://en.wikipedia.org/wiki/Jetty_(web_server))
(Apache 2.0) Google HTTP Client Library for Java (com.google.http-client:google-http-client-jackson2:1.21.0 - https://github.com/google/google-http-java-client/tree/dev/google-http-client-jackson2)
========================================================================
MIT licenses
@ -171,7 +182,6 @@ The following components are provided under the BSD-style License.
(New BSD License) JGit (org.eclipse.jgit:org.eclipse.jgit:jar:4.1.1.201511131810-r - https://eclipse.org/jgit/)
(New BSD License) Kryo (com.esotericsoftware.kryo:kryo:3.0.3 - http://code.google.com/p/kryo/)
(New BSD License) leveldbjni (org.fusesource.leveldbjni:leveldbjni-all:1.8) - https://github.com/fusesource/leveldbjni/blob/leveldbjni-1.8/license.txt
(New BSD License) MinLog (com.esotericsoftware.minlog:minlog:1.3 - http://code.google.com/p/minlog/)
(New BSD License) ReflectASM (com.esotericsoftware.reflectasm:reflectasm:1.07 - http://code.google.com/p/reflectasm/)
(BSD-like) Scala Library (org.scala-lang:scala-library:2.11.7 - http://www.scala-lang.org/)

View file

@ -32,7 +32,6 @@
<version>0.7.0-SNAPSHOT</version>
<name>Zeppelin: Examples</name>
<description>Zeppelin examples</description>
<url>http://zeppelin.apache.org</url>
<modules>
<module>zeppelin-example-clock</module>

View file

@ -31,7 +31,6 @@
<packaging>jar</packaging>
<version>0.7.0-SNAPSHOT</version>
<name>Zeppelin: Example application - Clock</name>
<url>http://zeppelin.apache.org</url>
<dependencies>
<dependency>

View file

@ -31,7 +31,6 @@
<packaging>jar</packaging>
<version>0.7.0-SNAPSHOT</version>
<name>Zeppelin: Example application - Horizontal Bar chart</name>
<url>http://zeppelin.apache.org</url>
<dependencies>
<dependency>

View file

@ -34,7 +34,6 @@
<version>0.7.0-SNAPSHOT</version>
<name>Zeppelin: Interpreter</name>
<description>Zeppelin Interpreter</description>
<url>http://zeppelin.apache.org</url>
<properties>
<commons-lang.version>3.4</commons-lang.version>

View file

@ -27,11 +27,10 @@
</parent>
<groupId>org.apache.zeppelin</groupId>
<artifactId>zeppelin-server_2.10</artifactId>
<artifactId>zeppelin-server</artifactId>
<packaging>jar</packaging>
<version>0.7.0-SNAPSHOT</version>
<name>Zeppelin: Server</name>
<url>http://www.nflabs.com</url>
<properties>
<cxf.version>2.7.7</cxf.version>
@ -414,10 +413,6 @@
<profiles>
<profile>
<id>scala-2.11</id>
<activation>
<property><name>scala-2.11</name></property>
</activation>
<dependencyManagement>
<dependencies>
<dependency>

View file

@ -0,0 +1,199 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.realm;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.methods.PutMethod;
import org.apache.commons.httpclient.methods.StringRequestEntity;
import org.apache.commons.lang3.StringUtils;
import org.apache.shiro.authc.AccountException;
import org.apache.shiro.authc.AuthenticationException;
import org.apache.shiro.authc.AuthenticationInfo;
import org.apache.shiro.authc.AuthenticationToken;
import org.apache.shiro.authc.SimpleAuthenticationInfo;
import org.apache.shiro.authc.UsernamePasswordToken;
import org.apache.shiro.authz.AuthorizationInfo;
import org.apache.shiro.realm.AuthorizingRealm;
import org.apache.shiro.subject.PrincipalCollection;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Joiner;
import com.google.gson.Gson;
import com.google.gson.JsonParseException;
/**
* A {@code Realm} implementation that uses the ZeppelinHub to authenticate users.
*
*/
public class ZeppelinHubRealm extends AuthorizingRealm {
private static final Logger LOG = LoggerFactory.getLogger(ZeppelinHubRealm.class);
private static final String DEFAULT_ZEPPELINHUB_URL = "https://www.zeppelinhub.com";
private static final String USER_LOGIN_API_ENDPOINT = "api/v1/users/login";
private static final String JSON_CONTENT_TYPE = "application/json";
private static final String UTF_8_ENCODING = "UTF-8";
private static final AtomicInteger INSTANCE_COUNT = new AtomicInteger();
private final HttpClient httpClient;
private final Gson gson;
private String zeppelinhubUrl;
private String name;
public ZeppelinHubRealm() {
super();
LOG.debug("Init ZeppelinhubRealm");
//TODO(anthonyc): think about more setting for this HTTP client.
// eg: if user uses proxy etcetc...
httpClient = new HttpClient();
gson = new Gson();
name = getClass().getName() + "_" + INSTANCE_COUNT.getAndIncrement();
}
@Override
protected AuthenticationInfo doGetAuthenticationInfo(AuthenticationToken authToken)
throws AuthenticationException {
UsernamePasswordToken token = (UsernamePasswordToken) authToken;
if (StringUtils.isBlank(token.getUsername())) {
throw new AccountException("Empty usernames are not allowed by this realm.");
}
String loginPayload = createLoginPayload(token.getUsername(), token.getPassword());
User user = authenticateUser(loginPayload);
LOG.debug("{} successfully login via ZeppelinHub", user.login);
return new SimpleAuthenticationInfo(user.login, token.getPassword(), name);
}
@Override
protected AuthorizationInfo doGetAuthorizationInfo(PrincipalCollection principals) {
// TODO(xxx): future work will be done here.
return null;
}
protected void onInit() {
super.onInit();
}
/**
* Setter of ZeppelinHub URL, this will be called by Shiro based on zeppelinhubUrl property
* in shiro.ini file.</p>
* It will also perform a check of ZeppelinHub url {@link #isZeppelinHubUrlValid},
* if the url is not valid, the default zeppelinhub url will be used.
*
* @param url
*/
public void setZeppelinhubUrl(String url) {
if (StringUtils.isBlank(url)) {
LOG.warn("Zeppelinhub url is empty, setting up default url {}", DEFAULT_ZEPPELINHUB_URL);
zeppelinhubUrl = DEFAULT_ZEPPELINHUB_URL;
} else {
zeppelinhubUrl = (isZeppelinHubUrlValid(url) ? url : DEFAULT_ZEPPELINHUB_URL);
LOG.info("Setting up Zeppelinhub url to {}", zeppelinhubUrl);
}
}
/**
* Send to ZeppelinHub a login request based on the request body which is a JSON that contains 2
* fields "login" and "password".
*
* @param requestBody JSON string of ZeppelinHub payload.
* @return Account object with login, name (if set in ZeppelinHub), and mail.
* @throws AuthenticationException if fail to login.
*/
protected User authenticateUser(String requestBody) {
PutMethod put = new PutMethod(Joiner.on("/").join(zeppelinhubUrl, USER_LOGIN_API_ENDPOINT));
String responseBody = StringUtils.EMPTY;
try {
put.setRequestEntity(new StringRequestEntity(requestBody, JSON_CONTENT_TYPE, UTF_8_ENCODING));
int statusCode = httpClient.executeMethod(put);
if (statusCode != HttpStatus.SC_OK) {
LOG.error("Cannot login user, HTTP status code is {} instead on 200 (OK)", statusCode);
put.releaseConnection();
throw new AuthenticationException("Couldnt login to ZeppelinHub. "
+ "Login or password incorrect");
}
responseBody = put.getResponseBodyAsString();
put.releaseConnection();
} catch (IOException e) {
LOG.error("Cannot login user", e);
throw new AuthenticationException(e.getMessage());
}
User account = null;
try {
account = gson.fromJson(responseBody, User.class);
} catch (JsonParseException e) {
LOG.error("Cannot deserialize ZeppelinHub response to User instance", e);
throw new AuthenticationException("Cannot login to ZeppelinHub");
}
return account;
}
/**
* Create a JSON String that represent login payload.</p>
* Payload will look like:
* <code>
* {
* 'login': 'userLogin',
* 'password': 'userpassword'
* }
* </code>
* @param login
* @param pwd
* @return
*/
protected String createLoginPayload(String login, char[] pwd) {
StringBuilder sb = new StringBuilder("{\"login\":\"");
return sb.append(login).append("\", \"password\":\"").append(pwd).append("\"}").toString();
}
/**
* Perform a Simple URL check by using <code>URI(url).toURL()</code>.
* If the url is not valid, the try-catch condition will catch the exceptions and return false,
* otherwise true will be returned.
*
* @param url
* @return
*/
protected boolean isZeppelinHubUrlValid(String url) {
boolean valid;
try {
new URI(url).toURL();
valid = true;
} catch (URISyntaxException | MalformedURLException e) {
LOG.error("Zeppelinhub url is not valid, default ZeppelinHub url will be used.", e);
valid = false;
}
return valid;
}
/**
* Helper class that will be use to deserialize ZeppelinHub response.
*/
protected class User {
public String login;
public String email;
public String name;
}
}

View file

@ -67,6 +67,25 @@ public class GetUserList {
return userList;
}
/***
* Get user roles from shiro.ini
* @param r
* @return
*/
public List<String> getRolesList(IniRealm r) {
List<String> roleList = new ArrayList<>();
Map getIniRoles = r.getIni().get("roles");
if (getIniRoles != null) {
Iterator it = getIniRoles.entrySet().iterator();
while (it.hasNext()) {
Map.Entry pair = (Map.Entry) it.next();
roleList.add(pair.getKey().toString().trim());
}
}
return roleList;
}
/**
* function to extract users from LDAP
*/

View file

@ -18,9 +18,9 @@
package org.apache.zeppelin.rest;
import org.apache.commons.lang3.StringUtils;
import org.apache.shiro.realm.Realm;
import org.apache.shiro.realm.jdbc.JdbcRealm;
import org.apache.shiro.realm.ldap.AbstractLdapRealm;
import org.apache.shiro.realm.ldap.JndiLdapRealm;
import org.apache.shiro.realm.text.IniRealm;
import org.apache.zeppelin.annotation.ZeppelinApi;
@ -98,6 +98,7 @@ public class SecurityRestApi {
public Response getUserList(@PathParam("searchText") final String searchText) {
List<String> usersList = new ArrayList<>();
List<String> rolesList = new ArrayList<>();
try {
GetUserList getUserListObj = new GetUserList();
Collection realmsList = SecurityUtils.getRealmsList();
@ -107,6 +108,7 @@ public class SecurityRestApi {
String name = realm.getName();
if (name.equals("iniRealm")) {
usersList.addAll(getUserListObj.getUserList((IniRealm) realm));
rolesList.addAll(getUserListObj.getRolesList((IniRealm) realm));
} else if (name.equals("ldapRealm")) {
usersList.addAll(getUserListObj.getUserList((JndiLdapRealm) realm, searchText));
} else if (name.equals("activeDirectoryRealm")) {
@ -120,8 +122,10 @@ public class SecurityRestApi {
} catch (Exception e) {
LOG.error("Exception in retrieving Users from realms ", e);
}
List<String> autoSuggestList = new ArrayList<>();
List<String> autoSuggestUserList = new ArrayList<>();
List<String> autoSuggestRoleList = new ArrayList<>();
Collections.sort(usersList);
Collections.sort(rolesList);
Collections.sort(usersList, new Comparator<String>() {
@Override
public int compare(String o1, String o2) {
@ -134,18 +138,28 @@ public class SecurityRestApi {
}
});
int maxLength = 0;
for (int i = 0; i < usersList.size(); i++) {
String userLowerCase = usersList.get(i).toLowerCase();
String searchTextLowerCase = searchText.toLowerCase();
if (userLowerCase.indexOf(searchTextLowerCase) != -1) {
for (String user : usersList) {
if (StringUtils.containsIgnoreCase(user, searchText)) {
autoSuggestUserList.add(user);
maxLength++;
autoSuggestList.add(usersList.get(i));
}
if (maxLength == 5) {
break;
}
}
return new JsonResponse<>(Response.Status.OK, "", autoSuggestList).build();
for (String role : rolesList) {
if (StringUtils.containsIgnoreCase(role, searchText)) {
autoSuggestRoleList.add(role);
}
}
Map<String, List> returnListMap = new HashMap<>();
returnListMap.put("users", autoSuggestUserList);
returnListMap.put("roles", autoSuggestRoleList);
return new JsonResponse<>(Response.Status.OK, "", returnListMap).build();
}
}

View file

@ -161,12 +161,12 @@ public class AuthenticationIT extends AbstractZeppelinIT {
pollingWait(By.xpath("//span[@tooltip='Note permissions']"),
MAX_BROWSER_TIMEOUT_SEC).click();
pollingWait(By.xpath("//input[@ng-model='permissions.owners']"), MAX_BROWSER_TIMEOUT_SEC)
.sendKeys("finance");
pollingWait(By.xpath("//input[@ng-model='permissions.readers']"), MAX_BROWSER_TIMEOUT_SEC)
.sendKeys("finance");
pollingWait(By.xpath("//input[@ng-model='permissions.writers']"), MAX_BROWSER_TIMEOUT_SEC)
.sendKeys("finance");
pollingWait(By.xpath(".//*[@id='selectOwners']/following::span//input"),
MAX_BROWSER_TIMEOUT_SEC).sendKeys("finance ");
pollingWait(By.xpath(".//*[@id='selectReaders']/following::span//input"),
MAX_BROWSER_TIMEOUT_SEC).sendKeys("finance ");
pollingWait(By.xpath(".//*[@id='selectWriters']/following::span//input"),
MAX_BROWSER_TIMEOUT_SEC).sendKeys("finance ");
pollingWait(By.xpath("//button[@ng-click='savePermissions()']"), MAX_BROWSER_TIMEOUT_SEC)
.sendKeys(Keys.ENTER);

View file

@ -140,6 +140,25 @@ public class SparkParagraphIT extends AbstractZeppelinIT {
paragraph1Result.getText().toString(), CoreMatchers.equalTo("test loop 0\ntest loop 1\ntest loop 2")
);
// the last statement's evaluation result is printed
setTextOfParagraph(2, "%pyspark\\n" +
"sc.version\\n" +
"1+1");
runParagraph(2);
try {
waitForParagraph(2, "FINISHED");
} catch (TimeoutException e) {
waitForParagraph(2, "ERROR");
collector.checkThat("Paragraph from SparkParagraphIT of testPySpark status: ",
"ERROR", CoreMatchers.equalTo("FINISHED")
);
}
WebElement paragraph2Result = driver.findElement(By.xpath(
getParagraphXPath(2) + "//div[@class=\"tableDisplay\"]"));
collector.checkThat("Paragraph from SparkParagraphIT of testPySpark result: ",
paragraph2Result.getText().toString(), CoreMatchers.equalTo("2")
);
} catch (Exception e) {
handleException("Exception in SparkParagraphIT while testPySpark", e);
}

View file

@ -69,7 +69,7 @@ public class SecurityRestApiTest extends AbstractTestRestApi {
get.addRequestHeader("Origin", "http://localhost");
Map<String, Object> resp = gson.fromJson(get.getResponseBodyAsString(),
new TypeToken<Map<String, Object>>(){}.getType());
List<String> userList = (List<String>) resp.get("body");
List<String> userList = (List) ((Map) resp.get("body")).get("users");
collector.checkThat("Search result size", userList.size(),
CoreMatchers.equalTo(1));
collector.checkThat("Search result contains admin", userList.contains("admin"),
@ -80,7 +80,7 @@ public class SecurityRestApiTest extends AbstractTestRestApi {
notUser.addRequestHeader("Origin", "http://localhost");
Map<String, Object> notUserResp = gson.fromJson(notUser.getResponseBodyAsString(),
new TypeToken<Map<String, Object>>(){}.getType());
List<String> emptyUserList = (List<String>) notUserResp.get("body");
List<String> emptyUserList = (List) ((Map) notUserResp.get("body")).get("users");
collector.checkThat("Search result size", emptyUserList.size(),
CoreMatchers.equalTo(0));

Some files were not shown because too many files have changed in this diff Show more