Merge remote-tracking branch 'origin/master' into ZEPPELIN-1483

# Conflicts:
#	zeppelin-server/src/main/java/org/apache/zeppelin/socket/NotebookServer.java
#	zeppelin-zengine/src/test/java/org/apache/zeppelin/notebook/NotebookTest.java
This commit is contained in:
Prabhjyot Singh 2016-10-20 12:15:29 +05:30
commit 2484833d54
195 changed files with 11044 additions and 6276 deletions

View file

@ -44,7 +44,7 @@ matrix:
# Test all modules with scala 2.10
- jdk: "oraclejdk7"
env: SCALA_VER="2.10" SPARK_VER="1.6.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.6 -Pr -Phadoop-2.3 -Ppyspark -Psparkr -Pscalding -Pexamples -Pscala-2.10" BUILD_FLAG="package -Pbuild-distr -DskipRat" TEST_FLAG="verify -Pusing-packaged-distr -DskipRat" TEST_PROJECTS=""
env: SCALA_VER="2.10" SPARK_VER="1.6.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.6 -Pr -Phadoop-2.3 -Ppyspark -Psparkr -Pscalding -Pbeam -Pexamples -Pscala-2.10" BUILD_FLAG="package -Pbuild-distr -DskipRat" TEST_FLAG="verify -Pusing-packaged-distr -DskipRat" TEST_PROJECTS=""
# Test all modules with scala 2.11
- jdk: "oraclejdk7"
@ -52,23 +52,23 @@ matrix:
# Test spark module for 1.5.2
- jdk: "oraclejdk7"
env: SCALA_VER="2.10" SPARK_VER="1.5.2" HADOOP_VER="2.3" PROFILE="-Pspark-1.5 -Pr -Phadoop-2.3 -Ppyspark -Psparkr" BUILD_FLAG="package -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark,r -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark* -DfailIfNoTests=false"
env: SCALA_VER="2.10" SPARK_VER="1.5.2" HADOOP_VER="2.3" PROFILE="-Pspark-1.5 -Pr -Phadoop-2.3 -Ppyspark -Psparkr" BUILD_FLAG="package -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark,r -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark.* -DfailIfNoTests=false"
# Test spark module for 1.4.1
- jdk: "oraclejdk7"
env: SCALA_VER="2.10" SPARK_VER="1.4.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.4 -Pr -Phadoop-2.3 -Ppyspark -Psparkr" BUILD_FLAG="package -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark,r -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark* -DfailIfNoTests=false"
env: SCALA_VER="2.10" SPARK_VER="1.4.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.4 -Pr -Phadoop-2.3 -Ppyspark -Psparkr" BUILD_FLAG="package -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark,r -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark.* -DfailIfNoTests=false"
# Test spark module for 1.3.1
- jdk: "oraclejdk7"
env: SCALA_VER="2.10" SPARK_VER="1.3.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.3 -Phadoop-2.3 -Ppyspark" BUILD_FLAG="package -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark* -DfailIfNoTests=false"
env: SCALA_VER="2.10" SPARK_VER="1.3.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.3 -Phadoop-2.3 -Ppyspark" BUILD_FLAG="package -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark.* -DfailIfNoTests=false"
# Test spark module for 1.2.2
- jdk: "oraclejdk7"
env: SCALA_VER="2.10" SPARK_VER="1.2.2" HADOOP_VER="2.3" PROFILE="-Pspark-1.2 -Phadoop-2.3 -Ppyspark" BUILD_FLAG="package -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark* -DfailIfNoTests=false"
env: SCALA_VER="2.10" SPARK_VER="1.2.2" HADOOP_VER="2.3" PROFILE="-Pspark-1.2 -Phadoop-2.3 -Ppyspark" BUILD_FLAG="package -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark.* -DfailIfNoTests=false"
# Test spark module for 1.1.1
- jdk: "oraclejdk7"
env: SCALA_VER="2.10" SPARK_VER="1.1.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.1 -Phadoop-2.3 -Ppyspark" BUILD_FLAG="package -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark* -DfailIfNoTests=false"
env: SCALA_VER="2.10" SPARK_VER="1.1.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.1 -Phadoop-2.3 -Ppyspark" BUILD_FLAG="package -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark.* -DfailIfNoTests=false"
# Test selenium with spark module for 1.6.1
- jdk: "oraclejdk7"
@ -89,7 +89,6 @@ install:
before_script:
- travis_retry ./testing/downloadSpark.sh $SPARK_VER $HADOOP_VER
- ./testing/startSparkCluster.sh $SPARK_VER $HADOOP_VER
- echo "export SPARK_HOME=`pwd`/spark-$SPARK_VER-bin-hadoop$HADOOP_VER" > conf/zeppelin-env.sh
- tail conf/zeppelin-env.sh
@ -109,6 +108,3 @@ after_failure:
- cat zeppelin-web/npm-debug.log
- cat spark-*/logs/*
after_script:
- ./testing/stopSparkCluster.sh $SPARK_VER $HADOOP_VER

View file

@ -34,9 +34,6 @@ import org.apache.zeppelin.scheduler.SchedulerFactory;
*
*/
public class AngularInterpreter extends Interpreter {
static {
Interpreter.register("angular", AngularInterpreter.class.getName());
}
public AngularInterpreter(Properties property) {
super(property);

View file

@ -0,0 +1,10 @@
[
{
"group": "angular",
"name": "angular",
"className": "org.apache.zeppelin.angular.AngularInterpreter",
"properties": {
}
}
]

25
beam/README.md Normal file
View file

@ -0,0 +1,25 @@
# Overview
Beam interpreter for Apache Zeppelin
# Architecture
Current interpreter implementation supports the static repl. It compiles the code in memory, execute it and redirect the output to zeppelin.
## Building the Beam Interpreter
You have to first build the Beam interpreter by enable the **beam** profile as follows:
```
mvn clean package -Pbeam -DskipTests
```
### Notice
- Flink runner comes with binary compiled for scala 2.10. So, currently we support only Scala 2.10
### Technical overview
* Upon starting an interpreter, an instance of `JavaCompiler` is created.
* When the user runs commands with beam, the `JavaParser` go through the code to get a class that contains the main method.
* Then it replaces the class name with random class name to avoid overriding while compilation. it creates new out & err stream to get the data in new stream instead of the console, to redirect output to zeppelin.
* If there is any error during compilation, it can catch and redirect to zeppelin.

320
beam/pom.xml Normal file
View file

@ -0,0 +1,320 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
~ Licensed to the Apache Software Foundation (ASF) under one or more
~ contributor license agreements. See the NOTICE file distributed with
~ this work for additional information regarding copyright ownership.
~ The ASF licenses this file to You under the Apache License, Version 2.0
~ (the "License"); you may not use this file except in compliance with
~ the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<artifactId>zeppelin</artifactId>
<groupId>org.apache.zeppelin</groupId>
<version>0.7.0-SNAPSHOT</version>
<relativePath>..</relativePath>
</parent>
<groupId>org.apache.zeppelin</groupId>
<artifactId>zeppelin-beam</artifactId>
<packaging>jar</packaging>
<version>0.7.0-SNAPSHOT</version>
<name>Zeppelin: Beam interpreter</name>
<properties>
<beam.hadoop.version>2.3.0</beam.hadoop.version>
<beam.spark.version>1.6.2</beam.spark.version>
<beam.beam.version>0.2.0-incubating</beam.beam.version>
</properties>
<dependencies>
<dependency>
<groupId>io.netty</groupId>
<artifactId>netty-all</artifactId>
<version>4.1.1.Final</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.10</artifactId>
<version>${beam.spark.version}</version>
<exclusions>
<exclusion>
<artifactId>slf4j-log4j12</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
<exclusion>
<artifactId>netty-all</artifactId>
<groupId>io.netty</groupId>
</exclusion>
<exclusion>
<artifactId>akka-actor_2.10</artifactId>
<groupId>org.spark-project.akka</groupId>
</exclusion>
<exclusion>
<artifactId>akka-remote_2.10</artifactId>
<groupId>org.spark-project.akka</groupId>
</exclusion>
<exclusion>
<artifactId>akka-slf4j_2.10</artifactId>
<groupId>org.spark-project.akka</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.10</artifactId>
<version>${beam.spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>${beam.hadoop.version}</version>
<exclusions>
<exclusion>
<artifactId>slf4j-log4j12</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${beam.hadoop.version}</version>
<exclusions>
<exclusion>
<artifactId>slf4j-log4j12</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>${beam.hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${beam.hadoop.version}</version>
<exclusions>
<exclusion>
<artifactId>slf4j-log4j12</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-annotations</artifactId>
<version>${beam.hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-common</artifactId>
<version>${beam.hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-common</artifactId>
<version>${beam.hadoop.version}</version>
<exclusions>
<exclusion>
<artifactId>slf4j-log4j12</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.thoughtworks.qdox</groupId>
<artifactId>qdox</artifactId>
<version>2.0-M3</version>
</dependency>
<dependency>
<groupId>org.apache.beam</groupId>
<artifactId>beam-runners-parent</artifactId>
<version>${beam.beam.version}</version>
<type>pom</type>
</dependency>
<dependency>
<groupId>org.apache.beam</groupId>
<artifactId>beam-runners-core-java</artifactId>
<version>${beam.beam.version}</version>
<exclusions>
<exclusion>
<artifactId>google-http-client-jackson2</artifactId>
<groupId>com.google.http-client</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.beam</groupId>
<artifactId>beam-runners-direct-java</artifactId>
<version>${beam.beam.version}</version>
</dependency>
<dependency>
<groupId>org.apache.beam</groupId>
<artifactId>beam-runners-flink_2.10</artifactId>
<version>${beam.beam.version}</version>
<exclusions>
<exclusion>
<artifactId>slf4j-log4j12</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
<exclusion>
<artifactId>netty-all</artifactId>
<groupId>io.netty</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.beam</groupId>
<artifactId>beam-runners-flink_2.10-examples</artifactId>
<version>${beam.beam.version}</version>
<exclusions>
<exclusion>
<artifactId>slf4j-log4j12</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>javax.servlet</groupId>
<artifactId>javax.servlet-api</artifactId>
<version>3.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.beam</groupId>
<artifactId>beam-runners-google-cloud-dataflow-java</artifactId>
<version>${beam.beam.version}</version>
<exclusions>
<exclusion>
<artifactId>google-http-client-jackson2</artifactId>
<groupId>com.google.http-client</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.beam</groupId>
<artifactId>beam-runners-spark</artifactId>
<version>${beam.beam.version}</version>
<type>jar</type>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>zeppelin-interpreter</artifactId>
<version>${project.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-exec</artifactId>
<version>1.3</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-deploy-plugin</artifactId>
<version>2.7</version>
<configuration>
<skip>true</skip>
</configuration>
</plugin>
<plugin>
<artifactId>maven-enforcer-plugin</artifactId>
<version>1.3.1</version>
<executions>
<execution>
<id>enforce</id>
<phase>none</phase>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-dependency-plugin</artifactId>
<version>2.8</version>
<executions>
<execution>
<id>copy-dependencies</id>
<phase>package</phase>
<goals>
<goal>copy-dependencies</goal>
</goals>
<configuration>
<outputDirectory>${project.build.directory}/../../interpreter/beam</outputDirectory>
<overWriteReleases>false</overWriteReleases>
<overWriteSnapshots>false</overWriteSnapshots>
<overWriteIfNewer>true</overWriteIfNewer>
<includeScope>runtime</includeScope>
</configuration>
</execution>
<execution>
<id>copy-artifact</id>
<phase>package</phase>
<goals>
<goal>copy</goal>
</goals>
<configuration>
<outputDirectory>${project.build.directory}/../../interpreter/beam</outputDirectory>
<overWriteReleases>false</overWriteReleases>
<overWriteSnapshots>false</overWriteSnapshots>
<overWriteIfNewer>true</overWriteIfNewer>
<includeScope>runtime</includeScope>
<artifactItems>
<artifactItem>
<groupId>${project.groupId}</groupId>
<artifactId>${project.artifactId}</artifactId>
<version>${project.version}</version>
<type>${project.packaging}</type>
</artifactItem>
</artifactItems>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View file

@ -0,0 +1,99 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.beam;
import java.io.File;
import java.util.Collections;
import java.util.List;
import java.util.Properties;
import java.util.UUID;
import org.apache.zeppelin.interpreter.Interpreter;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Beam interpreter
*
*/
public class BeamInterpreter extends Interpreter {
Logger logger = LoggerFactory.getLogger(BeamInterpreter.class);
public BeamInterpreter(Properties property) {
super(property);
}
@Override
public void open() {
}
@Override
public void close() {
File dir = new File(".");
// delete all .class files created while compilation process
for (int i = 0; i < dir.list().length; i++) {
File f = dir.listFiles()[i];
if (f.getAbsolutePath().endsWith(".class")) {
f.delete();
}
}
}
@Override
public InterpreterResult interpret(String code, InterpreterContext context) {
// choosing new name to class containing Main method
String generatedClassName = "C" + UUID.randomUUID().toString().replace("-", "");
try {
String res = StaticRepl.execute(generatedClassName, code);
return new InterpreterResult(InterpreterResult.Code.SUCCESS, res);
} catch (Exception e) {
logger.error("Exception in Interpreter while interpret", e);
return new InterpreterResult(InterpreterResult.Code.ERROR, e.getMessage());
}
}
@Override
public void cancel(InterpreterContext context) {
}
@Override
public FormType getFormType() {
return FormType.SIMPLE;
}
@Override
public int getProgress(InterpreterContext context) {
return 0;
}
@Override
public List<InterpreterCompletion> completion(String buf, int cursor) {
return Collections.emptyList();
}
}

View file

@ -0,0 +1,185 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.beam;
import javax.tools.Diagnostic;
import javax.tools.DiagnosticCollector;
import javax.tools.JavaCompiler;
import javax.tools.JavaCompiler.CompilationTask;
import javax.tools.JavaFileObject;
import javax.tools.SimpleJavaFileObject;
import javax.tools.ToolProvider;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.thoughtworks.qdox.JavaProjectBuilder;
import com.thoughtworks.qdox.model.JavaClass;
import com.thoughtworks.qdox.model.JavaSource;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.PrintStream;
import java.io.StringReader;
import java.lang.reflect.InvocationTargetException;
import java.net.URI;
import java.net.URL;
import java.net.URLClassLoader;
import java.util.Arrays;
import java.util.List;
/**
*
* StaticRepl for compling the java code in memory
*
*/
public class StaticRepl {
static Logger logger = LoggerFactory.getLogger(StaticRepl.class);
public static String execute(String generatedClassName, String code) throws Exception {
JavaCompiler compiler = ToolProvider.getSystemJavaCompiler();
DiagnosticCollector<JavaFileObject> diagnostics = new DiagnosticCollector<JavaFileObject>();
// Java parasing
JavaProjectBuilder builder = new JavaProjectBuilder();
JavaSource src = builder.addSource(new StringReader(code));
// get all classes in code (paragraph)
List<JavaClass> classes = src.getClasses();
String mainClassName = null;
// Searching for class containing Main method
for (int i = 0; i < classes.size(); i++) {
boolean hasMain = false;
for (int j = 0; j < classes.get(i).getMethods().size(); j++) {
if (classes.get(i).getMethods().get(j).getName().equals("main") && classes.get(i)
.getMethods().get(j).isStatic()) {
mainClassName = classes.get(i).getName();
hasMain = true;
break;
}
}
if (hasMain == true) {
break;
}
}
// if there isn't Main method, will retuen error
if (mainClassName == null) {
logger.error("Exception for Main method", "There isn't any class "
+ "containing static main method.");
throw new Exception("There isn't any class containing static main method.");
}
// replace name of class containing Main method with generated name
code = code.replace(mainClassName, generatedClassName);
JavaFileObject file = new JavaSourceFromString(generatedClassName, code.toString());
Iterable<? extends JavaFileObject> compilationUnits = Arrays.asList(file);
ByteArrayOutputStream baosOut = new ByteArrayOutputStream();
ByteArrayOutputStream baosErr = new ByteArrayOutputStream();
// Creating new stream to get the output data
PrintStream newOut = new PrintStream(baosOut);
PrintStream newErr = new PrintStream(baosErr);
// Save the old System.out!
PrintStream oldOut = System.out;
PrintStream oldErr = System.err;
// Tell Java to use your special stream
System.setOut(newOut);
System.setErr(newErr);
CompilationTask task = compiler.getTask(null, null, diagnostics, null, null, compilationUnits);
// executing the compilation process
boolean success = task.call();
// if success is false will get error
if (!success) {
for (Diagnostic diagnostic : diagnostics.getDiagnostics()) {
if (diagnostic.getLineNumber() == -1) {
continue;
}
System.err.println("line " + diagnostic.getLineNumber() + " : "
+ diagnostic.getMessage(null));
}
System.out.flush();
System.err.flush();
System.setOut(oldOut);
System.setErr(oldErr);
logger.error("Exception in Interpreter while compilation", baosErr.toString());
throw new Exception(baosErr.toString());
} else {
try {
// creating new class loader
URLClassLoader classLoader = URLClassLoader.newInstance(new URL[] { new File("").toURI()
.toURL() });
// execute the Main method
Class.forName(generatedClassName, true, classLoader)
.getDeclaredMethod("main", new Class[] { String[].class })
.invoke(null, new Object[] { null });
System.out.flush();
System.err.flush();
// set the stream to old stream
System.setOut(oldOut);
System.setErr(oldErr);
return baosOut.toString();
} catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException
| InvocationTargetException e) {
logger.error("Exception in Interpreter while execution", e);
System.err.println(e);
e.printStackTrace(newErr);
throw new Exception(baosErr.toString(), e);
} finally {
System.out.flush();
System.err.flush();
System.setOut(oldOut);
System.setErr(oldErr);
}
}
}
}
class JavaSourceFromString extends SimpleJavaFileObject {
final String code;
JavaSourceFromString(String name, String code) {
super(URI.create("string:///" + name.replace('.', '/') + Kind.SOURCE.extension), Kind.SOURCE);
this.code = code;
}
@Override
public CharSequence getCharContent(boolean ignoreEncodingErrors) {
return code;
}
}

View file

@ -0,0 +1,11 @@
[
{
"group": "beam",
"name": "beam",
"className": "org.apache.zeppelin.beam.BeamInterpreter",
"defaultInterpreter": true,
"properties": {
}
}
]

View file

@ -0,0 +1,100 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.beam;
import static org.junit.Assert.assertEquals;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.Properties;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
/**
*
* BeamInterpreterTest
*
*/
public class BeamInterpreterTest {
private static BeamInterpreter beam;
private static InterpreterContext context;
@BeforeClass
public static void setUp() {
Properties p = new Properties();
beam = new BeamInterpreter(p);
beam.open();
context = new InterpreterContext(null, null, null, null, null, null, null, null, null, null,
null);
}
@AfterClass
public static void tearDown() {
beam.close();
}
@Test
public void testStaticRepl() {
StringWriter writer = new StringWriter();
PrintWriter out = new PrintWriter(writer);
out.println("public class HelloWorld {");
out.println(" public static void main(String args[]) {");
out.println(" System.out.println(\"This is in another java file\");");
out.println(" }");
out.println("}");
out.close();
InterpreterResult res = beam.interpret(writer.toString(), context);
assertEquals(InterpreterResult.Code.SUCCESS, res.code());
}
@Test
public void testStaticReplWithoutMain() {
StringBuffer sourceCode = new StringBuffer();
sourceCode.append("package org.mdkt;\n");
sourceCode.append("public class HelloClass {\n");
sourceCode.append(" public String hello() { return \"hello\"; }");
sourceCode.append("}");
InterpreterResult res = beam.interpret(sourceCode.toString(), context);
assertEquals(InterpreterResult.Code.ERROR, res.code());
}
@Test
public void testStaticReplWithSyntaxError() {
StringWriter writer = new StringWriter();
PrintWriter out = new PrintWriter(writer);
out.println("public class HelloWorld {");
out.println(" public static void main(String args[]) {");
out.println(" System.out.prin(\"This is in another java file\");");
out.println(" }");
out.println("}");
out.close();
InterpreterResult res = beam.interpret(writer.toString(), context);
assertEquals(InterpreterResult.Code.ERROR, res.code());
}
}

View file

@ -69,6 +69,10 @@ if not defined ZEPPELIN_MEM (
set ZEPPELIN_MEM=-Xms1024m -Xmx1024m -XX:MaxPermSize=512m
)
if not defined ZEPPELIN_INTP_MEM (
set ZEPPELIN_INTP_MEM=-Xms1024m -Xmx1024m -XX:MaxPermSize=512m
)
if not defined ZEPPELIN_JAVA_OPTS (
set ZEPPELIN_JAVA_OPTS=-Dfile.encoding=%ZEPPELIN_ENCODING% %ZEPPELIN_MEM%
) else (

View file

@ -113,10 +113,14 @@ if [[ -z "${ZEPPELIN_ENCODING}" ]]; then
export ZEPPELIN_ENCODING="UTF-8"
fi
if [[ -z "$ZEPPELIN_MEM" ]]; then
if [[ -z "${ZEPPELIN_MEM}" ]]; then
export ZEPPELIN_MEM="-Xms1024m -Xmx1024m -XX:MaxPermSize=512m"
fi
if [[ -z "${ZEPPELIN_INTP_MEM}" ]]; then
export ZEPPELIN_INTP_MEM="-Xms1024m -Xmx1024m -XX:MaxPermSize=512m"
fi
JAVA_OPTS+=" ${ZEPPELIN_JAVA_OPTS} -Dfile.encoding=${ZEPPELIN_ENCODING} ${ZEPPELIN_MEM}"
JAVA_OPTS+=" -Dlog4j.configuration=file://${ZEPPELIN_CONF_DIR}/log4j.properties"
export JAVA_OPTS

View file

@ -149,6 +149,28 @@ elif [[ "${INTERPRETER_ID}" == "hbase" ]]; then
else
echo "HBASE_HOME and HBASE_CONF_DIR are not set, configuration might not be loaded"
fi
elif [[ "${INTERPRETER_ID}" == "pig" ]]; then
# autodetect HADOOP_CONF_HOME by heuristic
if [[ -n "${HADOOP_HOME}" ]] && [[ -z "${HADOOP_CONF_DIR}" ]]; then
if [[ -d "${HADOOP_HOME}/etc/hadoop" ]]; then
export HADOOP_CONF_DIR="${HADOOP_HOME}/etc/hadoop"
elif [[ -d "/etc/hadoop/conf" ]]; then
export HADOOP_CONF_DIR="/etc/hadoop/conf"
fi
fi
if [[ -n "${HADOOP_CONF_DIR}" ]] && [[ -d "${HADOOP_CONF_DIR}" ]]; then
ZEPPELIN_INTP_CLASSPATH+=":${HADOOP_CONF_DIR}"
fi
# autodetect TEZ_CONF_DIR
if [[ -n "${TEZ_CONF_DIR}" ]]; then
ZEPPELIN_INTP_CLASSPATH+=":${TEZ_CONF_DIR}"
elif [[ -d "/etc/tez/conf" ]]; then
ZEPPELIN_INTP_CLASSPATH+=":/etc/tez/conf"
else
echo "TEZ_CONF_DIR is not set, configuration might not be loaded"
fi
fi
addJarInDirForIntp "${LOCAL_INTERPRETER_REPO}"

View file

@ -151,111 +151,6 @@ public class CassandraInterpreter extends Interpreter {
super(properties);
}
static {
LOGGER.info("Bootstrapping Cassandra Interpreter");
Interpreter.register("cassandra", "cassandra", CassandraInterpreter.class.getName(),
new InterpreterPropertyBuilder()
.add(CASSANDRA_HOSTS, DEFAULT_HOST,
"Comma separated Cassandra hosts (DNS name or " +
"IP address). Default = localhost. Ex: '192.168.0.12,node2,node3'")
.add(CASSANDRA_PORT, DEFAULT_PORT, "Cassandra native port. Default = 9042")
.add(CASSANDRA_PROTOCOL_VERSION, DEFAULT_PROTOCOL_VERSION,
"Cassandra protocol version. Default = 4")
.add(CASSANDRA_CLUSTER_NAME, DEFAULT_CLUSTER, "Cassandra cluster name. " +
"Default = 'Test Cluster'")
.add(CASSANDRA_KEYSPACE_NAME, DEFAULT_KEYSPACE, "Cassandra keyspace name. " +
"Default = 'system'")
.add(CASSANDRA_COMPRESSION_PROTOCOL, DEFAULT_COMPRESSION,
"Cassandra compression protocol. " +
"Available values: NONE, SNAPPY, LZ4. Default = NONE")
.add(CASSANDRA_CREDENTIALS_USERNAME, DEFAULT_CREDENTIAL,
"Cassandra credentials username. " +
"Default = 'none'")
.add(CASSANDRA_CREDENTIALS_PASSWORD, DEFAULT_CREDENTIAL,
"Cassandra credentials password. " +
"Default = 'none'")
.add(CASSANDRA_LOAD_BALANCING_POLICY, DEFAULT_POLICY, "Cassandra Load Balancing Policy. " +
"Default = new TokenAwarePolicy(new DCAwareRoundRobinPolicy())")
.add(CASSANDRA_RETRY_POLICY, DEFAULT_POLICY, "Cassandra Retry Policy. " +
"Default = DefaultRetryPolicy.INSTANCE")
.add(CASSANDRA_RECONNECTION_POLICY, DEFAULT_POLICY, "Cassandra Reconnection Policy. " +
"Default = new ExponentialReconnectionPolicy(1000, 10 * 60 * 1000)")
.add(CASSANDRA_SPECULATIVE_EXECUTION_POLICY, DEFAULT_POLICY,
"Cassandra Speculative Execution Policy. " +
"Default = NoSpeculativeExecutionPolicy.INSTANCE")
.add(CASSANDRA_INTERPRETER_PARALLELISM, DEFAULT_PARALLELISM,
"Cassandra interpreter parallelism" +
".Default = 10")
.add(CASSANDRA_MAX_SCHEMA_AGREEMENT_WAIT_SECONDS,
DEFAULT_MAX_SCHEMA_AGREEMENT_WAIT_SECONDS + ""
, "Cassandra max schema agreement wait in second" +
".Default = ProtocolOptions.DEFAULT_MAX_SCHEMA_AGREEMENT_WAIT_SECONDS")
.add(CASSANDRA_POOLING_NEW_CONNECTION_THRESHOLD_LOCAL,
DEFAULT_NEW_CONNECTION_THRESHOLD_LOCAL,
"Cassandra new connection threshold local. " +
"Protocol V2 and below default = 100" +
"Protocol V3 and above default = 800")
.add(CASSANDRA_POOLING_NEW_CONNECTION_THRESHOLD_REMOTE,
DEFAULT_NEW_CONNECTION_THRESHOLD_REMOTE,
"Cassandra new connection threshold remove. " +
"Protocol V2 and below default = 100" +
"Protocol V3 and above default = 200")
.add(CASSANDRA_POOLING_CORE_CONNECTION_PER_HOST_LOCAL,
DEFAULT_CORE_CONNECTION_PER_HOST_LOCAL,
"Cassandra core connection per host local. " +
"Protocol V2 and below default = 2" +
"Protocol V3 and above default = 1")
.add(CASSANDRA_POOLING_CORE_CONNECTION_PER_HOST_REMOTE,
DEFAULT_CORE_CONNECTION_PER_HOST_REMOTE,
"Cassandra core connection per host remove. " +
"Protocol V2 and below default = 1" +
"Protocol V3 and above default = 1")
.add(CASSANDRA_POOLING_MAX_CONNECTION_PER_HOST_LOCAL,
DEFAULT_MAX_CONNECTION_PER_HOST_LOCAL,
"Cassandra max connection per host local. " +
"Protocol V2 and below default = 8" +
"Protocol V3 and above default = 1")
.add(CASSANDRA_POOLING_MAX_CONNECTION_PER_HOST_REMOTE,
DEFAULT_MAX_CONNECTION_PER_HOST_REMOTE,
"Cassandra max connection per host remote. " +
"Protocol V2 and below default = 2" +
"Protocol V3 and above default = 1")
.add(CASSANDRA_POOLING_MAX_REQUESTS_PER_CONNECTION_LOCAL,
DEFAULT_MAX_REQUEST_PER_CONNECTION_LOCAL,
"Cassandra max request per connection local. " +
"Protocol V2 and below default = 128" +
"Protocol V3 and above default = 1024")
.add(CASSANDRA_POOLING_MAX_REQUESTS_PER_CONNECTION_REMOTE,
DEFAULT_MAX_REQUEST_PER_CONNECTION_REMOTE,
"Cassandra max request per connection remote. " +
"Protocol V2 and below default = 128" +
"Protocol V3 and above default = 256")
.add(CASSANDRA_POOLING_IDLE_TIMEOUT_SECONDS, DEFAULT_IDLE_TIMEOUT,
"Cassandra idle time out in seconds. Default = 120")
.add(CASSANDRA_POOLING_POOL_TIMEOUT_MILLIS, DEFAULT_POOL_TIMEOUT,
"Cassandra pool time out in millisecs. Default = 5000")
.add(CASSANDRA_POOLING_HEARTBEAT_INTERVAL_SECONDS, DEFAULT_HEARTBEAT_INTERVAL,
"Cassandra pool heartbeat interval in secs. Default = 30")
.add(CASSANDRA_QUERY_DEFAULT_CONSISTENCY, DEFAULT_CONSISTENCY,
"Cassandra query default consistency level. Default = ONE")
.add(CASSANDRA_QUERY_DEFAULT_SERIAL_CONSISTENCY, DEFAULT_SERIAL_CONSISTENCY,
"Cassandra query default serial consistency level. Default = SERIAL")
.add(CASSANDRA_QUERY_DEFAULT_FETCH_SIZE, DEFAULT_FETCH_SIZE,
"Cassandra query default fetch size. Default = 5000")
.add(CASSANDRA_SOCKET_CONNECTION_TIMEOUT_MILLIS, DEFAULT_CONNECTION_TIMEOUT,
"Cassandra socket default connection timeout in millisecs. Default = 5000")
.add(CASSANDRA_SOCKET_READ_TIMEOUT_MILLIS, DEFAULT_READ_TIMEOUT,
"Cassandra socket read timeout in millisecs. Default = 12000")
.add(CASSANDRA_SOCKET_TCP_NO_DELAY, DEFAULT_TCP_NO_DELAY,
"Cassandra socket TCP no delay. Default = true")
.build());
}
@Override
public void open() {

View file

@ -0,0 +1,195 @@
[
{
"group": "cassandra",
"name": "cassandra",
"className": "org.apache.zeppelin.cassandra.CassandraInterpreter",
"properties": {
"cassandra.hosts": {
"envName": null,
"propertyName": "cassandra.hosts",
"defaultValue": "localhost",
"description": "Comma separated Cassandra hosts (DNS name or IP address). Default = localhost. Ex: '192.168.0.12,node2,node3'"
},
"cassandra.native.port": {
"envName": null,
"propertyName": "cassandra.native.port",
"defaultValue": "9042",
"description": "Cassandra native port. Default = 9042"
},
"cassandra.protocol.version": {
"envName": null,
"propertyName": "cassandra.protocol.version",
"defaultValue": "4",
"description": "Cassandra protocol version. Default = 4"
},
"cassandra.cluster": {
"envName": null,
"propertyName": "cassandra.cluster",
"defaultValue": "Test Cluster",
"description": "Cassandra cluster name. Default = 'Test Cluster'"
},
"cassandra.keyspace": {
"envName": null,
"propertyName": "cassandra.keyspace",
"defaultValue": "system",
"description": "Cassandra keyspace name. Default = 'system'"
},
"cassandra.compression.protocol": {
"envName": null,
"propertyName": "cassandra.compression.protocol",
"defaultValue": "NONE",
"description": "Cassandra compression protocol. Available values: NONE, SNAPPY, LZ4. Default = NONE"
},
"cassandra.credentials.username": {
"envName": null,
"propertyName": "cassandra.credentials.username",
"defaultValue": "none",
"description": "Cassandra credentials username. Default = 'none'"
},
"cassandra.credentials.password": {
"envName": null,
"propertyName": "cassandra.credentials.password",
"defaultValue": "none",
"description": "Cassandra credentials password. Default = 'none'"
},
"cassandra.load.balancing.policy": {
"envName": null,
"propertyName": "cassandra.load.balancing.policy",
"defaultValue": "DEFAULT",
"description": "Cassandra Load Balancing Policy. Default = new TokenAwarePolicy(new DCAwareRoundRobinPolicy())"
},
"cassandra.retry.policy": {
"envName": null,
"propertyName": "cassandra.retry.policy",
"defaultValue": "DEFAULT",
"description": "Cassandra Retry Policy. Default = DefaultRetryPolicy.INSTANCE"
},
"cassandra.reconnection.policy": {
"envName": null,
"propertyName": "cassandra.reconnection.policy",
"defaultValue": "DEFAULT",
"description": "Cassandra Reconnection Policy. Default = new ExponentialReconnectionPolicy(1000, 10 * 60 * 1000)"
},
"cassandra.speculative.execution.policy": {
"envName": null,
"propertyName": "cassandra.speculative.execution.policy",
"defaultValue": "DEFAULT",
"description": "Cassandra Speculative Execution Policy. Default = NoSpeculativeExecutionPolicy.INSTANCE"
},
"cassandra.interpreter.parallelism": {
"envName": null,
"propertyName": "cassandra.interpreter.parallelism",
"defaultValue": "10",
"description": "Cassandra interpreter parallelism.Default = 10"
},
"cassandra.max.schema.agreement.wait.second": {
"envName": null,
"propertyName": "cassandra.max.schema.agreement.wait.second",
"defaultValue": "10",
"description": "Cassandra max schema agreement wait in second.Default = ProtocolOptions.DEFAULT_MAX_SCHEMA_AGREEMENT_WAIT_SECONDS"
},
"cassandra.pooling.new.connection.threshold.local": {
"envName": null,
"propertyName": "cassandra.pooling.new.connection.threshold.local",
"defaultValue": "100",
"description": "Cassandra new connection threshold local. Protocol V2 and below default = 100 Protocol V3 and above default = 800"
},
"cassandra.pooling.new.connection.threshold.remote": {
"envName": null,
"propertyName": "cassandra.pooling.new.connection.threshold.remote",
"defaultValue": "100",
"description": "Cassandra new connection threshold remove. Protocol V2 and below default = 100 Protocol V3 and above default = 200"
},
"cassandra.pooling.core.connection.per.host.local": {
"envName": null,
"propertyName": "cassandra.pooling.core.connection.per.host.local",
"defaultValue": "2",
"description": "Cassandra core connection per host local. Protocol V2 and below default = 2 Protocol V3 and above default = 1"
},
"cassandra.pooling.core.connection.per.host.remote": {
"envName": null,
"propertyName": "cassandra.pooling.core.connection.per.host.remote",
"defaultValue": "1",
"description": "Cassandra core connection per host remove. Protocol V2 and below default = 1 Protocol V3 and above default = 1"
},
"cassandra.pooling.max.connection.per.host.local": {
"envName": null,
"propertyName": "cassandra.pooling.max.connection.per.host.local",
"defaultValue": "8",
"description": "Cassandra max connection per host local. Protocol V2 and below default = 8 Protocol V3 and above default = 1"
},
"cassandra.pooling.max.connection.per.host.remote": {
"envName": null,
"propertyName": "cassandra.pooling.max.connection.per.host.remote",
"defaultValue": "2",
"description": "Cassandra max connection per host remote. Protocol V2 and below default = 2 Protocol V3 and above default = 1"
},
"cassandra.pooling.max.request.per.connection.local": {
"envName": null,
"propertyName": "cassandra.pooling.max.request.per.connection.local",
"defaultValue": "1024",
"description": "Cassandra max request per connection local. Protocol V2 and below default = 128 Protocol V3 and above default = 1024"
},
"cassandra.pooling.max.request.per.connection.remote": {
"envName": null,
"propertyName": "cassandra.pooling.max.request.per.connection.remote",
"defaultValue": "256",
"description": "Cassandra max request per connection remote. Protocol V2 and below default = 128 Protocol V3 and above default = 256"
},
"cassandra.pooling.idle.timeout.seconds": {
"envName": null,
"propertyName": "cassandra.pooling.idle.timeout.seconds",
"defaultValue": "120",
"description": "Cassandra idle time out in seconds. Default = 120"
},
"cassandra.pooling.pool.timeout.millisecs": {
"envName": null,
"propertyName": "cassandra.pooling.pool.timeout.millisecs",
"defaultValue": "5000",
"description": "Cassandra pool time out in millisecs. Default = 5000"
},
"cassandra.pooling.heartbeat.interval.seconds": {
"envName": null,
"propertyName": "cassandra.pooling.heartbeat.interval.seconds",
"defaultValue": "30",
"description": "Cassandra pool heartbeat interval in secs. Default = 30"
},
"cassandra.query.default.consistency": {
"envName": null,
"propertyName": "cassandra.query.default.consistency",
"defaultValue": "ONE",
"description": "Cassandra query default consistency level. Default = ONE"
},
"cassandra.query.default.serial.consistency": {
"envName": null,
"propertyName": "cassandra.query.default.serial.consistency",
"defaultValue": "SERIAL",
"description": "Cassandra query default serial consistency level. Default = SERIAL"
},
"cassandra.query.default.fetchSize": {
"envName": null,
"propertyName": "cassandra.query.default.fetchSize",
"defaultValue": "5000",
"description": "Cassandra query default fetch size. Default = 5000"
},
"cassandra.socket.connection.timeout.millisecs": {
"envName": null,
"propertyName": "cassandra.socket.connection.timeout.millisecs",
"defaultValue": "5000",
"description": "Cassandra socket default connection timeout in millisecs. Default = 5000"
},
"cassandra.socket.read.timeout.millisecs": {
"envName": null,
"propertyName": "cassandra.socket.read.timeout.millisecs",
"defaultValue": "12000",
"description": "Cassandra socket read timeout in millisecs. Default = 12000"
},
"cassandra.socket.tcp.no_delay": {
"envName": null,
"propertyName": "cassandra.socket.tcp.no_delay",
"defaultValue": "true",
"description": "Cassandra socket TCP no delay. Default = true"
}
}
}
]

View file

@ -19,6 +19,7 @@
alluxio org.apache.zeppelin:zeppelin-alluxio:0.6.1 Alluxio interpreter
angular org.apache.zeppelin:zeppelin-angular:0.6.1 HTML and AngularJS view rendering
beam org.apache.zeppelin:zeppelin-beam:0.6.1 Beam interpreter
bigquery org.apache.zeppelin:zeppelin-bigquery:0.6.1 BigQuery interpreter
cassandra org.apache.zeppelin:zeppelin-cassandra_2.11:0.6.1 Cassandra interpreter built with Scala 2.11
elasticsearch org.apache.zeppelin:zeppelin-elasticsearch:0.6.1 Elasticsearch interpreter
@ -31,6 +32,7 @@ kylin org.apache.zeppelin:zeppelin-kylin:0.6.1 Kylin in
lens org.apache.zeppelin:zeppelin-lens:0.6.1 Lens interpreter
livy org.apache.zeppelin:zeppelin-livy:0.6.1 Livy interpreter
md org.apache.zeppelin:zeppelin-markdown:0.6.1 Markdown support
pig org.apache.zeppelin:zeppelin-pig:0.6.1 Pig interpreter
postgresql org.apache.zeppelin:zeppelin-postgresql:0.6.1 Postgresql interpreter
python org.apache.zeppelin:zeppelin-python:0.6.1 Python interpreter
shell org.apache.zeppelin:zeppelin-shell:0.6.1 Shell command

View file

@ -19,8 +19,8 @@ REM
REM set JAVA_HOME=
REM set MASTER= REM Spark master url. eg. spark://master_addr:7077. Leave empty if you want to use local mode.
REM set ZEPPELIN_JAVA_OPTS REM Additional jvm options. for example, set ZEPPELIN_JAVA_OPTS="-Dspark.executor.memory=8g -Dspark.cores.max=16"
REM set ZEPPELIN_MEM REM Zeppelin jvm mem options Default -Xmx1024m -XX:MaxPermSize=512m
REM set ZEPPELIN_INTP_MEM REM zeppelin interpreter process jvm mem options.
REM set ZEPPELIN_MEM REM Zeppelin jvm mem options Default -Xms1024m -Xmx1024m -XX:MaxPermSize=512m
REM set ZEPPELIN_INTP_MEM REM zeppelin interpreter process jvm mem options. Default -Xmx1024m -Xms1024m -XX:MaxPermSize=512m
REM set ZEPPELIN_INTP_JAVA_OPTS REM zeppelin interpreter process jvm options.
REM set ZEPPELIN_LOG_DIR REM Where log files are stored. PWD by default.

View file

@ -19,9 +19,10 @@
# export JAVA_HOME=
# export MASTER= # Spark master url. eg. spark://master_addr:7077. Leave empty if you want to use local mode.
# export ZEPPELIN_JAVA_OPTS # Additional jvm options. for example, export ZEPPELIN_JAVA_OPTS="-Dspark.executor.memory=8g -Dspark.cores.max=16"
# export ZEPPELIN_MEM # Zeppelin jvm mem options Default -Xmx1024m -XX:MaxPermSize=512m
# export ZEPPELIN_INTP_MEM # zeppelin interpreter process jvm mem options.
# export ZEPPELIN_MEM # Zeppelin jvm mem options Default -Xms1024m -Xmx1024m -XX:MaxPermSize=512m
# export ZEPPELIN_INTP_MEM # zeppelin interpreter process jvm mem options. Default -Xms1024m -Xmx1024m -XX:MaxPermSize=512m
# export ZEPPELIN_INTP_JAVA_OPTS # zeppelin interpreter process jvm options.
# export ZEPPELIN_SSL_PORT # ssl port (used when ssl environment variable is set to true)
# export ZEPPELIN_LOG_DIR # Where log files are stored. PWD by default.
# export ZEPPELIN_PID_DIR # The pid files are stored. ${ZEPPELIN_HOME}/run by default.

View file

@ -31,6 +31,12 @@
<description>Server port.</description>
</property>
<property>
<name>zeppelin.server.ssl.port</name>
<value>8443</value>
<description>Server ssl port. (used when ssl property is set to true)</description>
</property>
<property>
<name>zeppelin.server.context.path</name>
<value>/</value>
@ -184,13 +190,13 @@
<property>
<name>zeppelin.interpreters</name>
<value>org.apache.zeppelin.spark.SparkInterpreter,org.apache.zeppelin.spark.PySparkInterpreter,org.apache.zeppelin.rinterpreter.RRepl,org.apache.zeppelin.rinterpreter.KnitR,org.apache.zeppelin.spark.SparkRInterpreter,org.apache.zeppelin.spark.SparkSqlInterpreter,org.apache.zeppelin.spark.DepInterpreter,org.apache.zeppelin.markdown.Markdown,org.apache.zeppelin.angular.AngularInterpreter,org.apache.zeppelin.shell.ShellInterpreter,org.apache.zeppelin.file.HDFSFileInterpreter,org.apache.zeppelin.flink.FlinkInterpreter,,org.apache.zeppelin.python.PythonInterpreter,org.apache.zeppelin.lens.LensInterpreter,org.apache.zeppelin.ignite.IgniteInterpreter,org.apache.zeppelin.ignite.IgniteSqlInterpreter,org.apache.zeppelin.cassandra.CassandraInterpreter,org.apache.zeppelin.geode.GeodeOqlInterpreter,org.apache.zeppelin.postgresql.PostgreSqlInterpreter,org.apache.zeppelin.jdbc.JDBCInterpreter,org.apache.zeppelin.kylin.KylinInterpreter,org.apache.zeppelin.elasticsearch.ElasticsearchInterpreter,org.apache.zeppelin.scalding.ScaldingInterpreter,org.apache.zeppelin.alluxio.AlluxioInterpreter,org.apache.zeppelin.hbase.HbaseInterpreter,org.apache.zeppelin.livy.LivySparkInterpreter,org.apache.zeppelin.livy.LivyPySparkInterpreter,org.apache.zeppelin.livy.LivySparkRInterpreter,org.apache.zeppelin.livy.LivySparkSQLInterpreter,org.apache.zeppelin.bigquery.BigQueryInterpreter</value>
<value>org.apache.zeppelin.spark.SparkInterpreter,org.apache.zeppelin.spark.PySparkInterpreter,org.apache.zeppelin.rinterpreter.RRepl,org.apache.zeppelin.rinterpreter.KnitR,org.apache.zeppelin.spark.SparkRInterpreter,org.apache.zeppelin.spark.SparkSqlInterpreter,org.apache.zeppelin.spark.DepInterpreter,org.apache.zeppelin.markdown.Markdown,org.apache.zeppelin.angular.AngularInterpreter,org.apache.zeppelin.shell.ShellInterpreter,org.apache.zeppelin.file.HDFSFileInterpreter,org.apache.zeppelin.flink.FlinkInterpreter,,org.apache.zeppelin.python.PythonInterpreter,org.apache.zeppelin.lens.LensInterpreter,org.apache.zeppelin.ignite.IgniteInterpreter,org.apache.zeppelin.ignite.IgniteSqlInterpreter,org.apache.zeppelin.cassandra.CassandraInterpreter,org.apache.zeppelin.geode.GeodeOqlInterpreter,org.apache.zeppelin.postgresql.PostgreSqlInterpreter,org.apache.zeppelin.jdbc.JDBCInterpreter,org.apache.zeppelin.kylin.KylinInterpreter,org.apache.zeppelin.elasticsearch.ElasticsearchInterpreter,org.apache.zeppelin.scalding.ScaldingInterpreter,org.apache.zeppelin.alluxio.AlluxioInterpreter,org.apache.zeppelin.hbase.HbaseInterpreter,org.apache.zeppelin.livy.LivySparkInterpreter,org.apache.zeppelin.livy.LivyPySparkInterpreter,org.apache.zeppelin.livy.LivySparkRInterpreter,org.apache.zeppelin.livy.LivySparkSQLInterpreter,org.apache.zeppelin.bigquery.BigQueryInterpreter,org.apache.zeppelin.beam.BeamInterpreter,org.apache.zeppelin.pig.PigInterpreter, org.apache.zeppelin.pig.PigQueryInterpreter</value>
<description>Comma separated interpreter configurations. First interpreter become a default</description>
</property>
<property>
<name>zeppelin.interpreter.group.order</name>
<value>spark,md,angular,sh,livy,alluxio,file,psql,flink,python,ignite,lens,cassandra,geode,kylin,elasticsearch,scalding,jdbc,hbase,bigquery</value>
<value>spark,md,angular,sh,livy,alluxio,file,psql,flink,python,ignite,lens,cassandra,geode,kylin,elasticsearch,scalding,jdbc,hbase,bigquery,beam</value>
<description></description>
</property>

View file

@ -44,7 +44,7 @@ NC='\033[0m' # No Color
RELEASE_VERSION="$1"
GIT_TAG="$2"
PUBLISH_PROFILES="-Pbuild-distr -Pspark-2.0 -Phadoop-2.4 -Pyarn -Ppyspark -Psparkr -Pr"
PUBLISH_PROFILES="-Ppublish-distr -Pspark-2.0 -Phadoop-2.4 -Pyarn -Ppyspark -Psparkr -Pr"
PROJECT_OPTIONS="-pl !zeppelin-distribution"
NEXUS_STAGING="https://repository.apache.org/service/local/staging"
NEXUS_PROFILE="153446d1ac37c4"

View file

@ -33,6 +33,7 @@
<li role="separator" class="divider"></li>
<li class="title"><span><b>More</b><span></li>
<li><a href="{{BASE_PATH}}/install/upgrade.html">Upgrade Zeppelin Version</a></li>
<li><a href="{{BASE_PATH}}/quickstart/install_with_flink_and_spark_cluster.html">Install Zeppelin with Flink and Spark Clusters Tutorial</a></li>
</ul>
</li>
<li>
@ -47,6 +48,7 @@
<li role="separator" class="divider"></li>
<li class="title"><span><b>Available Interpreters</b><span></li>
<li><a href="{{BASE_PATH}}/interpreter/alluxio.html">Alluxio</a></li>
<li><a href="{{BASE_PATH}}/interpreter/beam.html">Beam</a></li>
<li><a href="{{BASE_PATH}}/interpreter/bigquery.html">BigQuery</a></li>
<li><a href="{{BASE_PATH}}/interpreter/cassandra.html">Cassandra</a></li>
<li><a href="{{BASE_PATH}}/interpreter/elasticsearch.html">Elasticsearch</a></li>
@ -60,6 +62,7 @@
<li><a href="{{BASE_PATH}}/interpreter/lens.html">Lens</a></li>
<li><a href="{{BASE_PATH}}/interpreter/livy.html">Livy</a></li>
<li><a href="{{BASE_PATH}}/interpreter/markdown.html">Markdown</a></li>
<li><a href="{{BASE_PATH}}/interpreter/pig.html">Pig</a></li>
<li><a href="{{BASE_PATH}}/interpreter/python.html">Python</a></li>
<li><a href="{{BASE_PATH}}/interpreter/postgresql.html">Postgresql, HAWQ</a></li>
<li><a href="{{BASE_PATH}}/interpreter/r.html">R</a></li>
@ -107,6 +110,7 @@
<li><a href="{{BASE_PATH}}/install/spark_cluster_mode.html#spark-standalone-mode">Zeppelin on Spark Cluster Mode (Standalone)</a></li>
<li><a href="{{BASE_PATH}}/install/spark_cluster_mode.html#spark-on-yarn-mode">Zeppelin on Spark Cluster Mode (YARN)</a></li>
<li><a href="{{BASE_PATH}}/install/spark_cluster_mode.html#spark-on-mesos-mode">Zeppelin on Spark Cluster Mode (Mesos)</a></li>
<li><a href="{{BASE_PATH}}/install/cdh.html">Zeppelin on CDH</a></li>
<li role="separator" class="divider"></li>
<li class="title"><span><b>Contibute</b><span></li>
<li><a href="{{BASE_PATH}}/development/writingzeppelininterpreter.html">Writing Zeppelin Interpreter</a></li>
@ -115,8 +119,6 @@
<li><a href="{{BASE_PATH}}/development/howtocontributewebsite.html">How to contribute (website)</a></li>
</ul>
</li>
</ul>
<ul class="nav navbar-nav">
<li>
<a href="{{BASE_PATH}}/search.html" class="nav-search-link">
<span class="fa fa-search nav-search-icon"></span>

View file

@ -619,6 +619,10 @@ and (max-width: 1024px) {
.navbar-collapse.collapse {
padding-right: 0;
}
.navbar-fixed-top > .container {
width: 800px;
}
}
/* master branch docs dropdown menu */

Binary file not shown.

After

Width:  |  Height:  |  Size: 122 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 108 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 104 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 117 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 168 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 153 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 352 KiB

After

Width:  |  Height:  |  Size: 86 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 104 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 80 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 152 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 134 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 168 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 157 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 171 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 128 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 139 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 133 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 41 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 35 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

View file

@ -1,6 +1,6 @@
---
layout: nil
title : Atom Feed
title :
---
<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">

View file

@ -172,6 +172,7 @@ Join to our [Mailing list](https://zeppelin.apache.org/community.html) and repor
* [Zeppelin on Spark Cluster Mode (Standalone via Docker)](./install/spark_cluster_mode.html#spark-standalone-mode)
* [Zeppelin on Spark Cluster Mode (YARN via Docker)](./install/spark_cluster_mode.html#spark-on-yarn-mode)
* [Zeppelin on Spark Cluster Mode (Mesos via Docker)](./install/spark_cluster_mode.html#spark-on-mesos-mode)
* [Zeppelin on CDH (via Docker)](./install/cdh.html)
* Contribute
* [Writing Zeppelin Interpreter](./development/writingzeppelininterpreter.html)
* [Writing Zeppelin Application (Experimental)](./development/writingzeppelinapplication.html)

100
docs/install/cdh.md Normal file
View file

@ -0,0 +1,100 @@
---
layout: page
title: "Apache Zeppelin on CDH"
description: "This document will guide you how you can build and configure the environment on CDH with Apache Zeppelin using docker scripts."
group: install
---
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
{% include JB/setup %}
# Apache Zeppelin on CDH
<div id="toc"></div>
### 1. Import Cloudera QuickStart Docker image
>[Cloudera](http://www.cloudera.com/) has officially provided CDH Docker Hub in their own container. Please check [this guide page](http://www.cloudera.com/documentation/enterprise/latest/topics/quickstart_docker_container.html#cloudera_docker_container) for more information.
You can import the Docker image by pulling it from Cloudera Docker Hub.
```
docker pull cloudera/quickstart:latest
```
### 2. Run docker
```
docker run -it \
-p 80:80 \
-p 4040:4040 \
-p 8020:8020 \
-p 8022:8022 \
-p 8030:8030 \
-p 8032:8032 \
-p 8033:8033 \
-p 8040:8040 \
-p 8042:8042 \
-p 8088:8088 \
-p 8480:8480 \
-p 8485:8485 \
-p 8888:8888 \
-p 9083:9083 \
-p 10020:10020 \
-p 10033:10033 \
-p 18088:18088 \
-p 19888:19888 \
-p 25000:25000 \
-p 25010:25010 \
-p 25020:25020 \
-p 50010:50010 \
-p 50020:50020 \
-p 50070:50070 \
-p 50075:50075 \
-h quickstart.cloudera --privileged=true \
agitated_payne_backup /usr/bin/docker-quickstart;
```
### 3. Verify running CDH
To verify the application is running well, check the web UI for HDFS on `http://<hostname>:50070/` and YARN on `http://<hostname>:8088/cluster`.
### 4. Configure Spark interpreter in Zeppelin
Set following configurations to `conf/zeppelin-env.sh`.
```
export MASTER=yarn-client
export HADOOP_CONF_DIR=[your_hadoop_conf_path]
export SPARK_HOME=[your_spark_home_path]
```
`HADOOP_CONF_DIR`(Hadoop configuration path) is defined in `/scripts/docker/spark-cluster-managers/cdh/hdfs_conf`.
Don't forget to set Spark `master` as `yarn-client` in Zeppelin **Interpreters** setting page like below.
<img src="../assets/themes/zeppelin/img/docs-img/zeppelin_yarn_conf.png" />
### 5. Run Zeppelin with Spark interpreter
After running a single paragraph with Spark interpreter in Zeppelin,
<img src="../assets/themes/zeppelin/img/docs-img/zeppelin_with_cdh.png" />
<br/>
browse `http://<hostname>:8088/cluster/apps` to check Zeppelin application is running well or not.
<img src="../assets/themes/zeppelin/img/docs-img/cdh_yarn_applications.png" />

View file

@ -1,7 +1,7 @@
---
layout: page
title: "Quick Start"
description: "This page will help you to get started and guide you through installation of Apache Zeppelin, running it in the command line and basic configuration options."
description: "This page will help you get started and will guide you through installing Apache Zeppelin, running it in the command line and configuring options."
group: install
---
<!--
@ -20,14 +20,14 @@ limitations under the License.
{% include JB/setup %}
# Quick Start
Welcome to your first trial to explore Apache Zeppelin!
This page will help you to get started and here is the list of topics covered.
<div id="toc"></div>
Welcome to Apache Zeppelin! On this page are instructions to help you get started.
## Installation
Apache Zeppelin officially supports and is tested on next environments.
Apache Zeppelin officially supports and is tested on the following environments:
<table class="table-configuration">
<tr>
@ -44,21 +44,22 @@ Apache Zeppelin officially supports and is tested on next environments.
</tr>
</table>
There are two options to install Apache Zeppelin on your machine. One is [downloading pre-built binary package](#downloading-binary-package) from the archive.
You can download not only the latest stable version but also the older one if you need.
The other option is [building from the source](#building-from-source).
Although it can be unstable somehow since it is on development status, you can explore newly added feature and change it as you want.
To install Apache Zeppelin, you have two options:
* You can [download pre-built binary packages](#downloading-binary-package) from the archive. This is usually easier than building from source, and you can download the latest stable version (or older versions, if necessary).
* You can also [build from source](#building-from-source). This gives you a development version of Zeppelin, which is more unstable but has new features.
### Downloading Binary Package
If you want to install Apache Zeppelin with a stable binary package, please visit [Apache Zeppelin download Page](http://zeppelin.apache.org/download.html).
Stable binary packages are available on the [Apache Zeppelin Download Page](http://zeppelin.apache.org/download.html). You can download a default package with all interpreters, or you can download the *net-install* package, which lets you choose which interpreters to install.
If you have downloaded `netinst` binary, [install additional interpreters](../manual/interpreterinstallation.html) before you start Zeppelin. Or simply run `./bin/install-interpreter.sh --all`.
If you downloaded the default package, just unpack it in a directory of your choice and you're ready to go. If you downloaded the *net-install* package, you should manually [install additional interpreters](../manual/interpreterinstallation.html) first. You can also install everything by running `./bin/install-interpreter.sh --all`.
After unpacking, jump to [Starting Apache Zeppelin with Command Line](#starting-apache-zeppelin-with-command-line) section.
After unpacking, jump to the [Starting Apache Zeppelin with Command Line](#starting-apache-zeppelin-with-command-line).
### Building from Source
If you want to build from the source, the software below needs to be installed on your system.
If you want to build from source, you must first install the following dependencies:
<table class="table-configuration">
<tr>
@ -67,7 +68,7 @@ If you want to build from the source, the software below needs to be installed o
</tr>
<tr>
<td>Git</td>
<td></td>
<td>(Any Version)</td>
</tr>
<tr>
<td>Maven</td>
@ -75,22 +76,23 @@ If you want to build from the source, the software below needs to be installed o
</tr>
</table>
If you don't have it installed yet, please check [Before Build](https://github.com/apache/zeppelin/blob/master/README.md#before-build) section and follow step by step instructions from there.
If you haven't installed Git and Maven yet, check the [Before Build](https://github.com/apache/zeppelin/blob/master/README.md#before-build) section and follow the step by step instructions from there.
####1. Clone Apache Zeppelin repository
####1. Clone the Apache Zeppelin repository
```
git clone https://github.com/apache/zeppelin.git
```
####2. Build source with options
Each interpreters requires different build options. For the further information about options, please see [Build](https://github.com/apache/zeppelin#build) section.
Each interpreter requires different build options. For more information about build options, please see the [Build](https://github.com/apache/zeppelin#build) section.
```
mvn clean package -DskipTests [Options]
```
Here are some examples with several options
Here are some examples with several options:
```
# build with spark-2.0, scala-2.11
@ -110,24 +112,26 @@ mvn clean package -Pspark-1.5 -Dhadoop.version=2.6.0-cdh5.5.0 -Phadoop-2.6 -Pven
mvn clean package -Pspark-1.5 -Pmapr50 -DskipTests
```
For the further information about building with source, please see [README.md](https://github.com/apache/zeppelin/blob/master/README.md) in Zeppelin repository.
For further information about building from source, please see [README.md](https://github.com/apache/zeppelin/blob/master/README.md) in the Zeppelin repository.
## Starting Apache Zeppelin with Command Line
#### Start Zeppelin
## Starting Apache Zeppelin from the Command Line
#### Starting Apache Zeppelin
On all platforms except for Windows:
```
bin/zeppelin-daemon.sh start
```
If you are using Windows
If you are using Windows:
```
bin\zeppelin.cmd
```
After successful start, visit [http://localhost:8080](http://localhost:8080) with your web browser.
After Zeppelin has started successfully, go to [http://localhost:8080](http://localhost:8080) with your web browser.
#### Stop Zeppelin
#### Stopping Zeppelin
```
bin/zeppelin-daemon.sh stop
@ -137,10 +141,10 @@ bin/zeppelin-daemon.sh stop
> **Note :** The below description was written based on Ubuntu Linux.
Apache Zeppelin can be auto started as a service with an init script, such as services managed by **upstart**.
Apache Zeppelin can be auto-started as a service with an init script, using a service manager like **upstart**.
The following is an example of upstart script to be saved as `/etc/init/zeppelin.conf`
This also allows the service to be managed with commands such as
This is an example upstart script saved as `/etc/init/zeppelin.conf`
This allows the service to be managed with commands such as
```
sudo service zeppelin start
@ -174,24 +178,25 @@ chdir /usr/share/zeppelin
exec bin/zeppelin-daemon.sh upstart
```
## What is the next?
Congratulation on your successful Apache Zeppelin installation! Here are two next steps you might need.
## Next Steps:
#### If you are new to Apache Zeppelin
* For an in-depth overview of Apache Zeppelin UI, head to [Explore Apache Zeppelin UI](../quickstart/explorezeppelinui.html).
* After getting familiar with Apache Zeppelin UI, have fun with a short walk-through [Tutorial](../quickstart/tutorial.html) that uses Apache Spark backend.
* If you need more configuration setting for Apache Zeppelin, jump to the next section: [Apache Zeppelin Configuration](#apache-zeppelin-configuration).
Congratulations, you have successfully installed Apache Zeppelin! Here are two next steps you might find useful:
#### If you are new to Apache Zeppelin...
* For an in-depth overview of the Apache Zeppelin UI, head to [Explore Apache Zeppelin UI](../quickstart/explorezeppelinui.html).
* After getting familiar with the Apache Zeppelin UI, have fun with a short walk-through [Tutorial](../quickstart/tutorial.html) that uses the Apache Spark backend.
* If you need more configuration for Apache Zeppelin, jump to the next section: [Apache Zeppelin Configuration](#apache-zeppelin-configuration).
#### If you need more information about Spark or JDBC interpreter setting
* Apache Zeppelin provides deep integration with [Apache Spark](http://spark.apache.org/). For the further informtation, see [Spark Interpreter for Apache Zeppelin](../interpreter/spark.html).
* Also, you can use generic JDBC connections in Apache Zeppelin. Go to [Generic JDBC Interpreter for Apache Zeppelin](../interpreter/jdbc.html).
#### If you need more information about Spark or JDBC interpreter settings...
* Apache Zeppelin provides deep integration with [Apache Spark](http://spark.apache.org/). For more informtation, see [Spark Interpreter for Apache Zeppelin](../interpreter/spark.html).
* You can also use generic JDBC connections in Apache Zeppelin. Go to [Generic JDBC Interpreter for Apache Zeppelin](../interpreter/jdbc.html).
#### If you are in multi-user environment
* You can set permissions for your notebooks and secure data resource in multi-user environment. Go to **More** -> **Security** section.
#### If you are in a multi-user environment...
* You can set permissions for your notebooks and secure data resource in a multi-user environment. Go to **More** -> **Security** section.
## Apache Zeppelin Configuration
You can configure Apache Zeppelin with both **environment variables** in `conf/zeppelin-env.sh` (`conf\zeppelin-env.cmd` for Windows) and **Java properties** in `conf/zeppelin-site.xml`. If both are defined, then the **environment variables** will take priority.
You can configure Apache Zeppelin with either **environment variables** in `conf/zeppelin-env.sh` (`conf\zeppelin-env.cmd` for Windows) or **Java properties** in `conf/zeppelin-site.xml`. If both are defined, then the **environment variables** will take priority.
<table class="table-configuration">
<tr>
@ -206,6 +211,12 @@ You can configure Apache Zeppelin with both **environment variables** in `conf/z
<td>8080</td>
<td>Zeppelin server port</td>
</tr>
<tr>
<td>ZEPPELIN_SSL_PORT</td>
<td>zeppelin.server.ssl.port</td>
<td>8443</td>
<td>Zeppelin Server ssl port (used when ssl environment/property is set to true)</td>
</tr>
<tr>
<td>ZEPPELIN_MEM</td>
<td>N/A</td>
@ -228,19 +239,19 @@ You can configure Apache Zeppelin with both **environment variables** in `conf/z
<td>ZEPPELIN_ALLOWED_ORIGINS</td>
<td>zeppelin.server.allowed.origins</td>
<td>*</td>
<td>Enables a way to specify a ',' separated list of allowed origins for rest and websockets. <br /> i.e. http://localhost:8080 </td>
<td>Enables a way to specify a ',' separated list of allowed origins for REST and websockets. <br /> i.e. http://localhost:8080 </td>
</tr>
<tr>
<td>N/A</td>
<td>zeppelin.anonymous.allowed</td>
<td>true</td>
<td>Anonymous user is allowed by default.</td>
<td>The anonymous user is allowed by default.</td>
</tr>
<tr>
<td>ZEPPELIN_SERVER_CONTEXT_PATH</td>
<td>zeppelin.server.context.path</td>
<td>/</td>
<td>A context path of the web application</td>
<td>Context path of the web application</td>
</tr>
<tr>
<td>ZEPPELIN_SSL</td>
@ -300,19 +311,19 @@ You can configure Apache Zeppelin with both **environment variables** in `conf/z
<td>ZEPPELIN_NOTEBOOK_HOMESCREEN</td>
<td>zeppelin.notebook.homescreen</td>
<td></td>
<td>A notebook id displayed in Apache Zeppelin homescreen <br />i.e. 2A94M5J1Z</td>
<td>Display notebook IDs on the Apache Zeppelin homescreen <br />i.e. 2A94M5J1Z</td>
</tr>
<tr>
<td>ZEPPELIN_NOTEBOOK_HOMESCREEN_HIDE</td>
<td>zeppelin.notebook.homescreen.hide</td>
<td>false</td>
<td>This value can be "true" when to hide the notebook id set by <code>ZEPPELIN_NOTEBOOK_HOMESCREEN</code> on the Apache Zeppelin homescreen. <br />For the further information, please read <a href="../manual/notebookashomepage.html">Customize your Zeppelin homepage</a>.</td>
<td>Hide the notebook ID set by <code>ZEPPELIN_NOTEBOOK_HOMESCREEN</code> on the Apache Zeppelin homescreen. <br />For the further information, please read <a href="../manual/notebookashomepage.html">Customize your Zeppelin homepage</a>.</td>
</tr>
<tr>
<td>ZEPPELIN_WAR_TEMPDIR</td>
<td>zeppelin.war.tempdir</td>
<td>webapps</td>
<td>A location of jetty temporary directory</td>
<td>Location of the jetty temporary directory</td>
</tr>
<tr>
<td>ZEPPELIN_NOTEBOOK_DIR</td>
@ -330,7 +341,7 @@ You can configure Apache Zeppelin with both **environment variables** in `conf/z
<td>ZEPPELIN_NOTEBOOK_S3_USER</td>
<td>zeppelin.notebook.s3.user</td>
<td>user</td>
<td>A user name of S3 bucket<br />i.e. <code>bucket/user/notebook/2A94M5J1Z/note.json</code></td>
<td>User name of an S3 bucket<br />i.e. <code>bucket/user/notebook/2A94M5J1Z/note.json</code></td>
</tr>
<tr>
<td>ZEPPELIN_NOTEBOOK_S3_ENDPOINT</td>
@ -360,25 +371,25 @@ You can configure Apache Zeppelin with both **environment variables** in `conf/z
<td>ZEPPELIN_NOTEBOOK_AZURE_SHARE</td>
<td>zeppelin.notebook.azure.share</td>
<td>zeppelin</td>
<td>Share where the notebook files will be saved</td>
<td>Azure Share where the notebook files will be saved</td>
</tr>
<tr>
<td>ZEPPELIN_NOTEBOOK_AZURE_USER</td>
<td>zeppelin.notebook.azure.user</td>
<td>user</td>
<td>An optional user name of Azure file share<br />i.e. <code>share/user/notebook/2A94M5J1Z/note.json</code></td>
<td>Optional user name of an Azure file share<br />i.e. <code>share/user/notebook/2A94M5J1Z/note.json</code></td>
</tr>
<tr>
<td>ZEPPELIN_NOTEBOOK_STORAGE</td>
<td>zeppelin.notebook.storage</td>
<td>org.apache.zeppelin.notebook.repo.VFSNotebookRepo</td>
<td>Comma separated list of notebook storage</td>
<td>Comma separated list of notebook storage locations</td>
</tr>
<tr>
<td>ZEPPELIN_NOTEBOOK_ONE_WAY_SYNC</td>
<td>zeppelin.notebook.one.way.sync</td>
<td>false</td>
<td>If there are multiple notebook storages, should we treat the first one as the only source of truth?</td>
<td>If there are multiple notebook storage locations, should we treat the first one as the only source of truth?</td>
</tr>
<tr>
<td>ZEPPELIN_INTERPRETERS</td>
@ -389,7 +400,7 @@ You can configure Apache Zeppelin with both **environment variables** in `conf/z
</td>
<td>
Comma separated interpreter configurations [Class] <br/>
<span style="font-style:italic">NOTE: This property is deprecated since Zeppelin-0.6.0 and will not be supported from Zeppelin-0.7.0</span>
<span style="font-style:italic">NOTE: This property is deprecated since Zeppelin-0.6.0 and will not be supported from Zeppelin-0.7.0 on.</span>
</td>
</tr>
<tr>
@ -402,6 +413,6 @@ You can configure Apache Zeppelin with both **environment variables** in `conf/z
<td>ZEPPELIN_WEBSOCKET_MAX_TEXT_MESSAGE_SIZE</td>
<td>zeppelin.websocket.max.text.message.size</td>
<td>1024000</td>
<td>Size in characters of the maximum text message to be received by websocket.</td>
<td>Size (in characters) of the maximum text message that can be received by websocket.</td>
</tr>
</table>

View file

@ -49,5 +49,6 @@ So, copying `notebook` and `conf` directory should be enough.
### Upgrading from Zeppelin 0.6 to 0.7
- From 0.7, we don't use `ZEPPELIN_JAVA_OPTS` as default value of `ZEPPELIN_INTP_JAVA_OPTS` and also the same for `ZEPPELIN_MEM`/`ZEPPELIN_INTP_MEM`. If user want to configure the jvm opts of interpreter process, please set `ZEPPELIN_INTP_JAVA_OPTS` and `ZEPPELIN_INTP_MEM` explicitly.
- From 0.7, we don't use `ZEPPELIN_JAVA_OPTS` as default value of `ZEPPELIN_INTP_JAVA_OPTS` and also the same for `ZEPPELIN_MEM`/`ZEPPELIN_INTP_MEM`. If user want to configure the jvm opts of interpreter process, please set `ZEPPELIN_INTP_JAVA_OPTS` and `ZEPPELIN_INTP_MEM` explicitly. If you don't set `ZEPPELIN_INTP_MEM`, Zeppelin will set it to `-Xms1024m -Xmx1024m -XX:MaxPermSize=512m` by default.
- Mapping from `%jdbc(prefix)` to `%prefix` is no longer available. Instead, you can use %[interpreter alias] with multiple interpreter setttings on GUI.
- Usage of `ZEPPELIN_PORT` is not supported in ssl mode. Instead use `ZEPPELIN_SSL_PORT` to configure the ssl port. Value from `ZEPPELIN_PORT` is used only when `ZEPPELIN_SSL` is set to `false`.

124
docs/interpreter/beam.md Normal file
View file

@ -0,0 +1,124 @@
---
layout: page
title: Beam interpreter in Apache Zeppelin
description: Apache Beam is an open source, unified programming model that you can use to create a data processing pipeline.
group: interpreter
---
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
{% include JB/setup %}
# Beam interpreter for Apache Zeppelin
<div id="toc"></div>
## Overview
[Apache Beam](http://beam.incubator.apache.org) is an open source unified platform for data processing pipelines. A pipeline can be build using one of the Beam SDKs.
The execution of the pipeline is done by different Runners. Currently, Beam supports Apache Flink Runner, Apache Spark Runner, and Google Dataflow Runner.
## How to use
Basically, you can write normal Beam java code where you can determine the Runner. You should write the main method inside a class becuase the interpreter invoke this main to execute the pipeline. Unlike Zeppelin normal pattern, each paragraph is considered as a separate job, there isn't any relation to any other paragraph.
The following is a demonstration of a word count example with data represented in array of strings
But it can read data from files by replacing `Create.of(SENTENCES).withCoder(StringUtf8Coder.of())` with `TextIO.Read.from("path/to/filename.txt")`
```java
%beam
// most used imports
import org.apache.beam.sdk.coders.StringUtf8Coder;
import org.apache.beam.sdk.transforms.Create;
import java.io.Serializable;
import java.util.Arrays;
import java.util.List;
import java.util.ArrayList;
import org.apache.spark.api.java.*;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.SparkConf;
import org.apache.spark.streaming.*;
import org.apache.spark.SparkContext;
import org.apache.beam.runners.direct.*;
import org.apache.beam.sdk.runners.*;
import org.apache.beam.sdk.options.*;
import org.apache.beam.runners.spark.*;
import org.apache.beam.runners.spark.io.ConsoleIO;
import org.apache.beam.runners.flink.*;
import org.apache.beam.runners.flink.examples.WordCount.Options;
import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.io.TextIO;
import org.apache.beam.sdk.options.PipelineOptionsFactory;
import org.apache.beam.sdk.transforms.Count;
import org.apache.beam.sdk.transforms.DoFn;
import org.apache.beam.sdk.transforms.MapElements;
import org.apache.beam.sdk.transforms.ParDo;
import org.apache.beam.sdk.transforms.SimpleFunction;
import org.apache.beam.sdk.values.KV;
import org.apache.beam.sdk.options.PipelineOptions;
public class MinimalWordCount {
static List<String> s = new ArrayList<>();
static final String[] SENTENCES_ARRAY = new String[] {
"Hadoop is the Elephant King!",
"A yellow and elegant thing.",
"He never forgets",
"Useful data, or lets",
"An extraneous element cling!",
"A wonderful king is Hadoop.",
"The elephant plays well with Sqoop.",
"But what helps him to thrive",
"Are Impala, and Hive,",
"And HDFS in the group.",
"Hadoop is an elegant fellow.",
"An elephant gentle and mellow.",
"He never gets mad,",
"Or does anything bad,",
"Because, at his core, he is yellow",
};
static final List<String> SENTENCES = Arrays.asList(SENTENCES_ARRAY);
public static void main(String[] args) {
Options options = PipelineOptionsFactory.create().as(Options.class);
options.setRunner(FlinkRunner.class);
Pipeline p = Pipeline.create(options);
p.apply(Create.of(SENTENCES).withCoder(StringUtf8Coder.of()))
.apply("ExtractWords", ParDo.of(new DoFn<String, String>() {
@Override
public void processElement(ProcessContext c) {
for (String word : c.element().split("[^a-zA-Z']+")) {
if (!word.isEmpty()) {
c.output(word);
}
}
}
}))
.apply(Count.<String> perElement())
.apply("FormatResults", ParDo.of(new DoFn<KV<String, Long>, String>() {
@Override
public void processElement(DoFn<KV<String, Long>, String>.ProcessContext arg0)
throws Exception {
s.add("\n" + arg0.element().getKey() + "\t" + arg0.element().getValue());
}
}));
p.run();
System.out.println("%table word\tcount");
for (int i = 0; i < s.size(); i++) {
System.out.print(s.get(i));
}
}
}
```

View file

@ -25,63 +25,136 @@ limitations under the License.
## Overview
This interpreter lets you create a JDBC connection to any data source, by now it has been tested with:
JDBC interpreter lets you create a JDBC connection to any data sources seamlessly. By now, it has been tested with:
* Postgres
* MySql
* MariaDB
* Redshift
* Apache Hive
* Apache Phoenix
* Apache Drill (Details on using [Drill JDBC Driver](https://drill.apache.org/docs/using-the-jdbc-driver))
* Apache Tajo
<div class="row" style="margin: 30px auto;">
<div class="col-md-6">
<img src="../assets/themes/zeppelin/img/docs-img/tested_databases.png" width="300px"/>
</div>
<div class="col-md-6">
<li style="padding-bottom: 5px; list-style: circle">
<a href="http://www.postgresql.org/" target="_blank">Postgresql</a> -
<a href="https://jdbc.postgresql.org/" target="_blank">JDBC Driver</a>
</li>
<li style="padding-bottom: 5px; list-style: circle">
<a href="https://www.mysql.com/" target="_blank">Mysql</a> -
<a href="https://dev.mysql.com/downloads/connector/j/" target="_blank">JDBC Driver</a>
</li>
<li style="padding-bottom: 5px; list-style: circle">
<a href="https://mariadb.org/" target="_blank">MariaDB</a> -
<a href="https://mariadb.com/kb/en/mariadb/about-mariadb-connector-j/" target="_blank">JDBC Driver</a>
</li>
<li style="padding-bottom: 5px; list-style: circle">
<a href="https://aws.amazon.com/documentation/redshift/" target="_blank">Redshift</a> -
<a href="https://docs.aws.amazon.com/redshift/latest/mgmt/configure-jdbc-connection.html" target="_blank">JDBC Driver</a>
</li>
<li style="padding-bottom: 5px; list-style: circle">
<a href="https://hive.apache.org/" target="_blank">Apache Hive</a> -
<a href="https://cwiki.apache.org/confluence/display/Hive/HiveClient#HiveClient-JDBC" target="_blank">JDBC Driver</a>
</li>
<li style="padding-bottom: 5px; list-style: circle">
<a href="https://phoenix.apache.org/" target="_blank">Apache Phoenix</a> itself is a JDBC driver
</li>
<li style="padding-bottom: 5px; list-style: circle">
<a href="https://drill.apache.org/" target="_blank">Apache Drill</a> -
<a href="https://drill.apache.org/docs/using-the-jdbc-driver" target="_blank">JDBC Driver</a>
</li>
<li style="padding-bottom: 5px; list-style: circle">
<a href="http://tajo.apache.org/" target="_blank">Apache Tajo</a> -
<a href="https://tajo.apache.org/docs/current/jdbc_driver.html" target="_blank">JDBC Driver</a>
</li>
</div>
</div>
If someone else used another database please report how it works to improve functionality.
If you are using other databases not in the above list, please feel free to share your use case. It would be helpful to improve the functionality of JDBC interpreter.
## Create Interpreter
## Create a new JDBC Interpreter
When you create a interpreter by default use PostgreSQL with the next properties:
First, click `+ Create` button at the top-right corner in the interpreter setting page.
<img src="../assets/themes/zeppelin/img/docs-img/click_create_button.png" width="600px"/>
Fill `Interpreter name` field with whatever you want to use as the alias(e.g. mysql, mysql2, hive, redshift, and etc..). Please note that this alias will be used as `%interpreter_name` to call the interpreter in the paragraph.
Then select `jdbc` as an `Interpreter group`.
<img src="../assets/themes/zeppelin/img/docs-img/select_name_and_group.png" width="200px"/>
The default driver of JDBC interpreter is set as `PostgreSQL`. It means Zeppelin includes `PostgreSQL` driver jar in itself.
So you don't need to add any dependencies(e.g. the artifact name or path for `PostgreSQL` driver jar) for `PostgreSQL` connection.
The JDBC interpreter properties are defined by default like below.
<table class="table-configuration">
<tr>
<th>name</th>
<th>value</th>
<th>Name</th>
<th>Default Value</th>
<th>Description</th>
</tr>
<tr>
<td>common.max_count</td>
<td>1000</td>
<td>The maximun number of SQL result to display</td>
</tr>
<tr>
<td>default.driver</td>
<td>org.postgresql.Driver</td>
<td>JDBC Driver Name</td>
</tr>
<tr>
<td>default.password</td>
<td>********</td>
<td></td>
<td>The JDBC user password</td>
</tr>
<tr>
<td>default.url</td>
<td>jdbc:postgresql://localhost:5432/</td>
<td>The URL for JDBC</td>
</tr>
<tr>
<td>default.user</td>
<td>gpadmin</td>
</tr>
<td>The JDBC user name</td>
</tr>
</table>
It is not necessary to add driver jar to the classpath for PostgreSQL as it is included in Zeppelin.
If you want to connect other databases such as `Mysql`, `Redshift` and `Hive`, you need to edit the property values.
The below example is for `Mysql` connection.
### Simple connection
<img src="../assets/themes/zeppelin/img/docs-img/edit_properties.png" width="600px" />
Prior to creating the interpreter it is necessary to add maven coordinate or path of the JDBC driver to the Zeppelin classpath. To do this you must edit dependencies artifact(ex. `mysql:mysql-connector-java:5.1.38`) in interpreter menu as shown:
The last step is **Dependency Setting**. Since Zeppelin only includes `PostgreSQL` driver jar by default, you need to add each driver's maven coordinates or JDBC driver's jar file path for the other databases.
<div class="row">
<div class="col-md-11">
<img src="../assets/themes/zeppelin/img/docs-img/jdbc-simple-connection-setting.png" />
</div>
</div>
<img src="../assets/themes/zeppelin/img/docs-img/edit_dependencies.png" width="600px" />
To create the interpreter you need to specify connection parameters as shown in the table.
That's it. You can find more JDBC connection setting examples([Mysql](#mysql), [Apache Hive](#apache-hive), [Apache Phoenix](#apache-phoenix), and [Apache Tajo](#apache-tajo)) in [this section](#examples).
## More properties
There are more JDBC interpreter properties you can specify like below.
<table class="table-configuration">
<tr>
<th>Property Name</th>
<th>Description</th>
</tr>
<tr>
<td>common.max_result</td>
<td>Max number of SQL result to display to prevent the browser overload. This is common properties for all connections</td>
</tr>
<tr>
<td>zeppelin.jdbc.auth.type</td>
<td>Types of authentications' methods supported are <code>SIMPLE</code>, and <code>KERBEROS</code></td>
</tr>
<tr>
<td>zeppelin.jdbc.principal</td>
<td>The principal name to load from the keytab</td>
</tr>
<tr>
<td>zeppelin.jdbc.keytab.location</td>
<td>The path to the keytab file</td>
</tr>
</table>
You can also add more properties by using this [method](http://docs.oracle.com/javase/7/docs/api/java/sql/DriverManager.html#getConnection%28java.lang.String,%20java.util.Properties%29).
For example, if a connection needs a schema parameter, it would have to add the property as follows:
<table class="table-configuration">
<tr>
@ -89,280 +162,263 @@ To create the interpreter you need to specify connection parameters as shown in
<th>value</th>
</tr>
<tr>
<td>common.max_count</td>
<td>1000</td>
<td>default.schema</td>
<td>schema_name</td>
</tr>
<tr>
<td>default.driver</td>
<td>driver name</td>
</tr>
<tr>
<td>default.password</td>
<td>********</td>
</tr>
<tr>
<td>default.url</td>
<td>jdbc url</td>
</tr>
<tr>
<td>default.user</td>
<td>user name</td>
</tr>
</table>
### Multiple connections
## Binding JDBC interpter to notebook
To bind the interpreters created in the interpreter setting page, click the gear icon at the top-right corner.
JDBC interpreter also allows connections to multiple data sources. It is necessary to set a prefix for each connection to reference it in the paragraph in the form of `%jdbc(prefix)`. Before you create the interpreter it is necessary to add each driver's maven coordinates or JDBC driver's jar file path to the Zeppelin classpath. To do this you must edit the dependencies of JDBC interpreter in interpreter menu as following:
<img src="../assets/themes/zeppelin/img/docs-img/click_interpreter_binding_button.png" width="600px" />
<div class="row">
<div class="col-md-11">
<img src="../assets/themes/zeppelin/img/docs-img/jdbc-multi-connection-setting.png" />
</div>
</div>
Select(blue) or deselect(white) the interpreter buttons depending on your use cases.
If you need to use more than one interpreter in the notebook, activate several buttons.
Don't forget to click `Save` button, or you will face `Interpreter *** is not found` error.
You can add all the jars you need to make multiple connections into the same JDBC interpreter. To create the interpreter you must specify the parameters. For example we will create two connections to MySQL and Redshift, the respective prefixes are `default` and `redshift`:
<img src="../assets/themes/zeppelin/img/docs-img/jdbc_interpreter_binding.png" width="550px" />
## How to use
### Run the paragraph with JDBC interpreter
To test whether your databases and Zeppelin are successfully connected or not, type `%jdbc_interpreter_name`(e.g. `%mysql`) at the top of the paragraph and run `show databases`.
```sql
%jdbc_interpreter_name
show databases
```
If the paragraph is `FINISHED` without any errors, a new paragraph will be automatically added after the previous one with `%jdbc_interpreter_name`.
So you don't need to type this prefix in every paragraphs' header.
<img src="../assets/themes/zeppelin/img/docs-img/run_paragraph_with_jdbc.png" width="600px" />
### Apply Zeppelin Dynamic Forms
You can leverage [Zeppelin Dynamic Form](../manual/dynamicform.html) inside your queries. You can use both the `text input` and `select form` parametrization features.
```sql
%jdbc_interpreter_name
SELECT name, country, performer
FROM demo.performers
WHERE name='{{"{{performer=Sheryl Crow|Doof|Fanfarlo|Los Paranoia"}}}}'
```
## Examples
Here are some examples you can refer to. Including the below connectors, you can connect every databases as long as it can be configured with it's JDBC driver.
### Mysql
<img src="../assets/themes/zeppelin/img/docs-img/mysql_setting.png" width="600px" />
##### Properties
<table class="table-configuration">
<tr>
<th>name</th>
<th>value</th>
</tr>
<tr>
<td>common.max_count</td>
<td>1000</td>
<th>Name</th>
<th>Value</th>
</tr>
<tr>
<td>default.driver</td>
<td>com.mysql.jdbc.Driver</td>
</tr>
<tr>
<td>default.password</td>
<td>********</td>
</tr>
<tr>
<td>default.url</td>
<td>jdbc:mysql://localhost:3306/</td>
</tr>
<tr>
<td>default.user</td>
<td>mysql-user</td>
<td>mysql_user</td>
</tr>
<tr>
<td>redshift.driver</td>
<td>com.amazon.redshift.jdbc4.Driver</td>
<td>default.password</td>
<td>mysql_password</td>
</tr>
<tr>
<td>redshift.password</td>
<td>********</td>
</tr>
<tr>
<td>redshift.url</td>
<td>jdbc:redshift://examplecluster.abc123xyz789.us-west-2.redshift.amazonaws.com:5439</td>
</tr>
<tr>
<td>redshift.user</td>
<td>redshift-user</td>
</tr>
</table>
## Bind to Notebook
In the `Notebook` click on the `settings` icon at the top-right corner. Use select/deselect to specify the interpreters to be used in the `Notebook`.
## More Properties
You can modify the interpreter configuration in the `Interpreter` section. The most common properties are as follows, but you can specify other properties that need to be connected.
<table class="table-configuration">
<tr>
<th>Property Name</th>
<th>Description</th>
</tr>
<tr>
<td>{prefix}.url</td>
<td>JDBC URL to connect, the URL must include the name of the database </td>
</tr>
<tr>
<td>{prefix}.user</td>
<td>JDBC user name</td>
</tr>
<tr>
<td>{prefix}.password</td>
<td>JDBC password</td>
</tr>
<tr>
<td>{prefix}.driver</td>
<td>JDBC driver name.</td>
</tr>
<tr>
<td>common.max_result</td>
<td>Max number of SQL result to display to prevent the browser overload. This is common properties for all connections</td>
</tr>
<tr>
<td>zeppelin.jdbc.auth.type</td>
<td>Types of authentications' methods supported are SIMPLE, and KERBEROS</td>
</tr>
<tr>
<td>zeppelin.jdbc.principal</td>
<td>The principal name to load from the keytab</td>
</tr>
<tr>
<td>zeppelin.jdbc.keytab.location</td>
<td>The path to the keytab file</td>
</tr>
</table>
To develop this functionality use this [method](http://docs.oracle.com/javase/7/docs/api/java/sql/DriverManager.html#getConnection%28java.lang.String,%20java.util.Properties%29). For example if a connection needs a schema parameter, it would have to add the property as follows:
##### Dependencies
<table class="table-configuration">
<tr>
<th>name</th>
<th>value</th>
<th>Artifact</th>
<th>Excludes</th>
</tr>
<tr>
<td>{prefix}.schema</td>
<td>schema_name</td>
<td>mysql:mysql-connector-java:5.1.38</td>
<td></td>
</tr>
</table>
## Examples
### Apache Hive
### Hive
<img src="../assets/themes/zeppelin/img/docs-img/hive_setting.png" width="600px" />
#### Properties
<table class="table-configuration">
<tr>
<th>Name</th>
<th>Value</th>
</tr>
<tr>
<td>hive.driver</td>
<td>org.apache.hive.jdbc.HiveDriver</td>
</tr>
<tr>
<td>hive.url</td>
<td>jdbc:hive2://localhost:10000</td>
</tr>
<tr>
<td>hive.user</td>
<td>hive_user</td>
</tr>
<tr>
<td>hive.password</td>
<td>hive_password</td>
</tr>
</table>
##### Properties
<table class="table-configuration">
<tr>
<th>Name</th>
<th>Value</th>
</tr>
<tr>
<td>default.driver</td>
<td>org.apache.hive.jdbc.HiveDriver</td>
</tr>
<tr>
<td>default.url</td>
<td>jdbc:hive2://localhost:10000</td>
</tr>
<tr>
<td>default.user</td>
<td>hive_user</td>
</tr>
<tr>
<td>default.password</td>
<td>hive_password</td>
</tr>
</table>
#### Dependencies
<table class="table-configuration">
<tr>
<th>Artifact</th>
<th>Excludes</th>
</tr>
<tr>
<td>org.apache.hive:hive-jdbc:0.14.0</td>
<td></td>
</tr>
<tr>
<td>org.apache.hadoop:hadoop-common:2.6.0</td>
<td></td>
</tr>
</table>
##### Dependencies
<table class="table-configuration">
<tr>
<th>Artifact</th>
<th>Excludes</th>
</tr>
<tr>
<td>org.apache.hive:hive-jdbc:0.14.0</td>
<td></td>
</tr>
<tr>
<td>org.apache.hadoop:hadoop-common:2.6.0</td>
<td></td>
</tr>
</table>
### Phoenix
#### Properties
<table class="table-configuration">
<tr>
<th>Name</th>
<th>Value</th>
</tr>
<tr>
<td>phoenix.driver</td>
<td>org.apache.phoenix.jdbc.PhoenixDriver</td>
</tr>
<tr>
<td>phoenix.url</td>
<td>jdbc:phoenix:localhost:2181:/hbase-unsecure</td>
</tr>
<tr>
<td>phoenix.user</td>
<td>phoenix_user</td>
</tr>
<tr>
<td>phoenix.password</td>
<td>phoenix_password</td>
</tr>
</table>
#### Dependencies
<table class="table-configuration">
<tr>
<th>Artifact</th>
<th>Excludes</th>
</tr>
<tr>
<td>org.apache.phoenix:phoenix-core:4.4.0-HBase-1.0</td>
<td></td>
</tr>
</table>
### Apache Phoenix
### Tajo
#### Properties
<table class="table-configuration">
<tr>
<th>Name</th>
<th>Value</th>
</tr>
<tr>
<td>tajo.driver</td>
<td>org.apache.tajo.jdbc.TajoDriver</td>
</tr>
<tr>
<td>tajo.url</td>
<td>jdbc:tajo://localhost:26002/default</td>
</tr>
</table>
Phoenix supports `thick` and `thin` connection types:
#### Dependencies
<table class="table-configuration">
<tr>
<th>Artifact</th>
<th>Excludes</th>
</tr>
<tr>
<td>org.apache.tajo:tajo-jdbc:0.11.0</td>
<td></td>
</tr>
</table>
- [Thick client](#thick-client-connection) is faster, but must connect directly to ZooKeeper and HBase RegionServers.
- [Thin client](#thin-client-connection) has fewer dependencies and connects through a [Phoenix Query Server](http://phoenix.apache.org/server.html) instance.
Use the appropriate `default.driver`, `default.url`, and the dependency artifact for your connection type.
#### Thick client connection
<img src="../assets/themes/zeppelin/img/docs-img/phoenix_thick_setting.png" width="600px" />
##### Properties
<table class="table-configuration">
<tr>
<th>Name</th>
<th>Value</th>
</tr>
<tr>
<td>default.driver</td>
<td>org.apache.phoenix.jdbc.PhoenixDriver</td>
</tr>
<tr>
<td>default.url</td>
<td>jdbc:phoenix:localhost:2181:/hbase-unsecure</td>
</tr>
<tr>
<td>default.user</td>
<td>phoenix_user</td>
</tr>
<tr>
<td>default.password</td>
<td>phoenix_password</td>
</tr>
</table>
##### Dependencies
<table class="table-configuration">
<tr>
<th>Artifact</th>
<th>Excludes</th>
</tr>
<tr>
<td>org.apache.phoenix:phoenix-core:4.4.0-HBase-1.0</td>
<td></td>
</tr>
</table>
#### Thin client connection
<img src="../assets/themes/zeppelin/img/docs-img/phoenix_thin_setting.png" width="600px" />
##### Properties
<table class="table-configuration">
<tr>
<th>Name</th>
<th>Value</th>
</tr>
<tr>
<td>default.driver</td>
<td>org.apache.phoenix.queryserver.client.Driver</td>
</tr>
<tr>
<td>default.url</td>
<td>jdbc:phoenix:thin:url=http://localhost:8765;serialization=PROTOBUF</td>
</tr>
<tr>
<td>default.user</td>
<td>phoenix_user</td>
</tr>
<tr>
<td>default.password</td>
<td>phoenix_password</td>
</tr>
</table>
##### Dependencies
## How to use
Before Adding one of the below dependencies, check the Phoenix version first.
<table class="table-configuration">
<tr>
<th>Artifact</th>
<th>Excludes</th>
<th>Description</th>
</tr>
<tr>
<td>org.apache.phoenix:phoenix-server-client:4.7.0-HBase-1.1</td>
<td></td>
<td>For Phoenix <code>4.7</code></td>
</tr>
<tr>
<td>org.apache.phoenix:phoenix-queryserver-client:4.8.0-HBase-1.2</td>
<td></td>
<td>For Phoenix <code>4.8+</code></td>
</tr>
</table>
### Reference in paragraph
### Apache Tajo
Start the paragraphs with the `%jdbc`, this will use the `default` prefix for connection. If you want to use other connection you should specify the prefix of it as follows `%jdbc(prefix)`:
<img src="../assets/themes/zeppelin/img/docs-img/tajo_setting.png" width="600px" />
```sql
%jdbc
SELECT * FROM db_name;
##### Properties
<table class="table-configuration">
<tr>
<th>Name</th>
<th>Value</th>
</tr>
<tr>
<td>default.driver</td>
<td>org.apache.tajo.jdbc.TajoDriver</td>
</tr>
<tr>
<td>default.url</td>
<td>jdbc:tajo://localhost:26002/default</td>
</tr>
</table>
```
##### Dependencies
<table class="table-configuration">
<tr>
<th>Artifact</th>
<th>Excludes</th>
</tr>
<tr>
<td>org.apache.tajo:tajo-jdbc:0.11.0</td>
<td></td>
</tr>
</table>
or
```sql
%jdbc(prefix)
SELECT * FROM db_name;
```
### Apply Zeppelin Dynamic Forms
You can leverage [Zeppelin Dynamic Form](../manual/dynamicform.html) inside your queries. You can use both the `text input` and `select form` parametrization features
```sql
%jdbc(prefix)
SELECT name, country, performer
FROM demo.performers
WHERE name='{{performer=Sheryl Crow|Doof|Fanfarlo|Los Paranoia}}'
```
## Bugs & Reporting
If you find a bug for this interpreter, please create a [JIRA]( https://issues.apache.org/jira/browse/ZEPPELIN-382?jql=project%20%3D%20ZEPPELIN) ticket.
## Bug reporting
If you find a bug using JDBC interpreter, please create a [JIRA](https://issues.apache.org/jira/browse/ZEPPELIN) ticket.

View file

@ -65,7 +65,12 @@ Example: `spark.master` to `livy.spark.master`
<td>1000</td>
<td>Max number of Spark SQL result to display.</td>
</tr>
<tr>
<tr>
<td>zeppelin.livy.displayAppInfo</td>
<td>false</td>
<td>Whether to display app info</td>
</tr>
<tr>
<td>livy.spark.driver.cores</td>
<td></td>
<td>Driver cores. ex) 1, 2.</td>

97
docs/interpreter/pig.md Normal file
View file

@ -0,0 +1,97 @@
---
layout: page
title: "Pig Interpreter for Apache Zeppelin"
description: "Apache Pig is a platform for analyzing large data sets that consists of a high-level language for expressing data analysis programs, coupled with infrastructure for evaluating these programs."
group: manual
---
{% include JB/setup %}
# Pig Interpreter for Apache Zeppelin
<div id="toc"></div>
## Overview
[Apache Pig](https://pig.apache.org/) is a platform for analyzing large data sets that consists of a high-level language for expressing data analysis programs, coupled with infrastructure for evaluating these programs. The salient property of Pig programs is that their structure is amenable to substantial parallelization, which in turns enables them to handle very large data sets.
## Supported interpreter type
- `%pig.script` (default)
All the pig script can run in this type of interpreter, and display type is plain text.
- `%pig.query`
Almost the same as `%pig.script`. The only difference is that you don't need to add alias in the last statement. And the display type is table.
## Supported runtime mode
- Local
- MapReduce
- Tez (Only Tez 0.7 is supported)
## How to use
### How to setup Pig
- Local Mode
Nothing needs to be done for local mode
- MapReduce Mode
HADOOP\_CONF\_DIR needs to be specified in `ZEPPELIN_HOME/conf/zeppelin-env.sh`.
- Tez Mode
HADOOP\_CONF\_DIR and TEZ\_CONF\_DIR needs to be specified in `ZEPPELIN_HOME/conf/zeppelin-env.sh`.
### How to configure interpreter
At the Interpreters menu, you have to create a new Pig interpreter. Pig interpreter has below properties by default.
<table class="table-configuration">
<tr>
<th>Property</th>
<th>Default</th>
<th>Description</th>
</tr>
<tr>
<td>zeppelin.pig.execType</td>
<td>mapreduce</td>
<td>Execution mode for pig runtime. local | mapreduce | tez </td>
</tr>
<tr>
<td>zeppelin.pig.includeJobStats</td>
<td>false</td>
<td>whether display jobStats info in <code>%pig.script</code></td>
</tr>
<tr>
<td>zeppelin.pig.maxResult</td>
<td>1000</td>
<td>max row number displayed in <code>%pig.query</code></td>
</tr>
</table>
### Example
##### pig
```
%pig
raw_data = load 'dataset/sf_crime/train.csv' using PigStorage(',') as (Dates,Category,Descript,DayOfWeek,PdDistrict,Resolution,Address,X,Y);
b = group raw_data all;
c = foreach b generate COUNT($1);
dump c;
```
##### pig.query
```
%pig.query
b = foreach raw_data generate Category;
c = group b by Category;
foreach c generate group as category, COUNT($1) as count;
```
Data is shared between `%pig` and `%pig.query`, so that you can do some common work in `%pig`, and do different kinds of query based on the data of `%pig`.

View file

@ -68,6 +68,7 @@ When your code requires external library, instead of doing download/copy/restart
<li> If you need to resolve dependencies from other than central maven repository or
local ~/.m2 repository, hit <i class="fa fa-plus"></i> icon next to repository lists. </li>
<li> Fill out the form and click 'Add' button, then you will be able to see that new repository is added. </li>
<li> Optionally, if you are behind a corporate firewall, you can specify also all proxy settings so that Zeppelin can download the dependencies using the given credentials</li>
</ol>
</div>
</div>

View file

@ -82,3 +82,49 @@ interpreter.start()
The above code will start interpreter thread inside your process. Once the interpreter is started you can configure zeppelin to connect to RemoteInterpreter by checking **Connect to existing process** checkbox and then provide **Host** and **Port** on which interpreter porocess is listening as shown in the image below:
<img src="../assets/themes/zeppelin/img/screenshots/existing_interpreter.png" width="450px">
## (Experimental) Interpreter Execution Hooks
Zeppelin allows for users to specify additional code to be executed by an interpreter at pre and post-paragraph code execution. This is primarily useful if you need to run the same set of code for all of the paragraphs within your notebook at specific times. Currently, this feature is only available for the spark and pyspark interpreters. To specify your hook code, you may use '`z.registerHook()`. For example, enter the following into one paragraph:
```python
%pyspark
z.registerHook("post_exec", "print 'This code should be executed before the parapgraph code!'")
z.registerHook("pre_exec", "print 'This code should be executed after the paragraph code!'")
```
These calls will not take into effect until the next time you run a paragraph. In another paragraph, enter
```python
%pyspark
print "This code should be entered into the paragraph by the user!"
```
The output should be:
```
This code should be executed before the paragraph code!
This code should be entered into the paragraph by the user!
This code should be executed after the paragraph code!
```
If you ever need to know the hook code, use `z.getHook()`:
```python
%pyspark
print z.getHook("post_exec")
```
```
print 'This code should be executed after the paragraph code!'
```
Any call to `z.registerHook()` will automatically overwrite what was previously registered. To completely unregister a hook event, use `z.unregisterHook(eventCode)`. Currently only `"post_exec"` and `"pre_exec"` are valid event codes for the Zeppelin Hook Registry system.
Finally, the hook registry is internally shared by other interpreters in the same group. This would allow for hook code for one interpreter REPL to be set by another as follows:
```scala
%spark
z.unregisterHook("post_exec", "pyspark")
```
The API is identical for both the spark (scala) and pyspark (python) implementations.
### Caveats
Calls to `z.registerHook("pre_exec", ...)` should be made with care. If there are errors in your specified hook code, this will cause the interpreter REPL to become unable to execute any code pass the pre-execute stage making it impossible for direct calls to `z.unregisterHook()` to take into effect. Current workarounds include calling `z.unregisterHook()` from a different interpreter REPL in the same interpreter group (see above) or manually restarting the interpreter group in the UI.

View file

@ -0,0 +1,414 @@
---
layout: page
title: "Install Zeppelin with Flink and Spark in cluster mode"
description: "Tutorial is valid for Spark 1.6.x and Flink 1.1.2"
group: tutorial
---
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
This tutorial is extremely entry-level. It assumes no prior knowledge of Linux, git, or other tools. If you carefully type what I tell you when I tell you, you should be able to get Zeppelin running.
## Installing Zeppelin with Flink and Spark in cluster mode
This tutorial assumes the user has a machine (real or [virtual](https://www.virtualbox.org/wiki/Downloads) with a fresh, minimal installation of [Ubuntu 14.04.3 Server](http://www.ubuntu.com/download/server).
**Note:** On the size requirements of the Virtual Machine, some users reported trouble when using the default virtual machine sizes, specifically that the hard drive needed to be at least 16GB- other users did not have this issue.
There are many good tutorials on how to install Ubuntu Server on a virtual box, [here is one of them](http://ilearnstack.com/2013/04/13/setting-ubuntu-vm-in-virtualbox/)
### Required Programs
Assuming the minimal install, there are several programs that we will need to install before Zeppelin, Flink, and Spark.
- git
- openssh-server
- OpenJDK 7
- Maven 3.1+
For git, openssh-server, and OpenJDK 7 we will be using the apt package manager.
##### git
From the command prompt:
```
sudo apt-get install git
```
##### openssh-server
```
sudo apt-get install openssh-server
```
##### OpenJDK 7
```
sudo apt-get install openjdk-7-jdk openjdk-7-jre-lib
```
*A note for those using Ubuntu 16.04*: To install `openjdk-7` on Ubuntu 16.04, one must add a repository. [Source](http://askubuntu.com/questions/761127/ubuntu-16-04-and-openjdk-7)
``` bash
sudo add-apt-repository ppa:openjdk-r/ppa
sudo apt-get update
sudo apt-get install openjdk-7-jdk openjdk-7-jre-lib
```
##### Maven 3.1+
Zeppelin requires maven version 3.x. The version available in the repositories at the time of writing is 2.x, so maven must be installed manually.
Purge any existing versions of maven.
```
sudo apt-get purge maven maven2
```
Download the maven 3.3.9 binary.
```
wget "http://www.us.apache.org/dist/maven/maven-3/3.3.9/binaries/apache-maven-3.3.9-bin.tar.gz"
```
Unarchive the binary and move to the `/usr/local` directory.
```
tar -zxvf apache-maven-3.3.9-bin.tar.gz
sudo mv ./apache-maven-3.3.9 /usr/local
```
Create symbolic links in `/usr/bin`.
```
sudo ln -s /usr/local/apache-maven-3.3.9/bin/mvn /usr/bin/mvn
```
### Installing Zeppelin
This provides a quick overview of Zeppelin installation from source, however the reader is encouraged to review the [Zeppelin Installation Guide](../install/install.html)
From the command prompt:
Clone Zeppelin.
```
git clone https://github.com/apache/zeppelin.git
```
Enter the Zeppelin root directory.
```
cd zeppelin
```
Package Zeppelin.
```
mvn clean package -DskipTests -Pspark-1.6 -Dflink.version=1.1.2
```
`-DskipTests` skips build tests- you're not developing (yet), so you don't need to do tests, the clone version *should* build.
`-Pspark-1.6` tells maven to build a Zeppelin with Spark 1.6. This is important because Zeppelin has its own Spark interpreter and the versions must be the same.
`-Dflink.version=1.1.2` tells maven specifically to build Zeppelin with Flink version 1.1.2.
**Note:** You may wish to include additional build flags such as `-Ppyspark` or `-Psparkr`. See [the build section of github for more details](https://github.com/apache/zeppelin#build).
**Note:** You can build against any version of Spark that has a Zeppelin build profile available. The key is to make sure you check out the matching version of Spark to build. At the time of this writing, Spark 1.6 was the most recent Spark version available.
**Note:** On build failures. Having installed Zeppelin close to 30 times now, I will tell you that sometimes the build fails for seemingly no reason.
As long as you didn't edit any code, it is unlikely the build is failing because of something you did. What does tend to happen, is some dependency that maven is trying to download is unreachable. If your build fails on this step here are some tips:
- Don't get discouraged.
- Scroll up and read through the logs. There will be clues there.
- Retry (that is, run the `mvn clean package -DskipTests -Pspark-1.6` again)
- If there were clues that a dependency couldn't be downloaded wait a few hours or even days and retry again. Open source software when compiling is trying to download all of the dependencies it needs, if a server is off-line there is nothing you can do but wait for it to come back.
- Make sure you followed all of the steps carefully.
- Ask the community to help you. Go [here](http://zeppelin.apache.org/community.html) and join the user mailing list. People are there to help you. Make sure to copy and paste the build output (everything that happened in the console) and include that in your message.
Start the Zeppelin daemon.
```
bin/zeppelin-daemon.sh start
```
Use `ifconfig` to determine the host machine's IP address. If you are not familiar with how to do this, a fairly comprehensive post can be found [here](http://www.cyberciti.biz/faq/how-to-find-out-the-ip-address-assigned-to-eth0-and-display-ip-only/).
Open a web-browser on a machine connected to the same network as the host (or in the host operating system if using a virtual machine). Navigate to http://`yourip`:8080, where yourip is the IP address you found in `ifconfig`.
See the [Zeppelin tutorial](../tutorial/tutorial.md) for basic Zeppelin usage. It is also advised that you take a moment to check out the tutorial notebook that is included with each Zeppelin install, and to familiarize yourself with basic notebook functionality.
##### Flink Test
Create a new notebook named "Flink Test" and copy and paste the following code.
```scala
%flink // let Zeppelin know what interpreter to use.
val text = env.fromElements("In the time of chimpanzees, I was a monkey", // some lines of text to analyze
"Butane in my veins and I'm out to cut the junkie",
"With the plastic eyeballs, spray paint the vegetables",
"Dog food stalls with the beefcake pantyhose",
"Kill the headlights and put it in neutral",
"Stock car flamin' with a loser in the cruise control",
"Baby's in Reno with the Vitamin D",
"Got a couple of couches, sleep on the love seat",
"Someone came in sayin' I'm insane to complain",
"About a shotgun wedding and a stain on my shirt",
"Don't believe everything that you breathe",
"You get a parking violation and a maggot on your sleeve",
"So shave your face with some mace in the dark",
"Savin' all your food stamps and burnin' down the trailer park",
"Yo, cut it")
/* The meat and potatoes:
this tells Flink to iterate through the elements, in this case strings,
transform the string to lower case and split the string at white space into individual words
then finally aggregate the occurrence of each word.
This creates the count variable which is a list of tuples of the form (word, occurances)
counts.collect().foreach(println(_)) // execute the script and print each element in the counts list
*/
val counts = text.flatMap{ _.toLowerCase.split("\\W+") }.map { (_,1) }.groupBy(0).sum(1)
counts.collect().foreach(println(_)) // execute the script and print each element in the counts list
```
Run the code to make sure the built-in Zeppelin Flink interpreter is working properly.
##### Spark Test
Create a new notebook named "Spark Test" and copy and paste the following code.
```scala
%spark // let Zeppelin know what interpreter to use.
val text = sc.parallelize(List("In the time of chimpanzees, I was a monkey", // some lines of text to analyze
"Butane in my veins and I'm out to cut the junkie",
"With the plastic eyeballs, spray paint the vegetables",
"Dog food stalls with the beefcake pantyhose",
"Kill the headlights and put it in neutral",
"Stock car flamin' with a loser in the cruise control",
"Baby's in Reno with the Vitamin D",
"Got a couple of couches, sleep on the love seat",
"Someone came in sayin' I'm insane to complain",
"About a shotgun wedding and a stain on my shirt",
"Don't believe everything that you breathe",
"You get a parking violation and a maggot on your sleeve",
"So shave your face with some mace in the dark",
"Savin' all your food stamps and burnin' down the trailer park",
"Yo, cut it"))
/* The meat and potatoes:
this tells spark to iterate through the elements, in this case strings,
transform the string to lower case and split the string at white space into individual words
then finally aggregate the occurrence of each word.
This creates the count variable which is a list of tuples of the form (word, occurances)
*/
val counts = text.flatMap { _.toLowerCase.split("\\W+") }
.map { (_,1) }
.reduceByKey(_ + _)
counts.collect().foreach(println(_)) // execute the script and print each element in the counts list
```
Run the code to make sure the built-in Zeppelin Flink interpreter is working properly.
Finally, stop the Zeppelin daemon. From the command prompt run:
```
bin/zeppelin-daemon.sh stop
```
### Installing Clusters
##### Flink Cluster
###### Download Binaries
Building from source is recommended where possible, for simplicity in this tutorial we will download Flink and Spark Binaries.
To download the Flink Binary use `wget`
```bash
wget "http://mirror.cogentco.com/pub/apache/flink/flink-1.0.3/flink-1.0.3-bin-hadoop24-scala_2.10.tgz"
tar -xzvf flink-1.0.3-bin-hadoop24-scala_2.10.tgz
```
This will download Flink 1.0.3, compatible with Hadoop 2.4. You do not have to install Hadoop for this binary to work, but if you are using Hadoop, please change `24` to your appropriate version.
Start the Flink Cluster.
```bash
flink-1.0.3/bin/start-cluster.sh
```
###### Building From source
If you wish to build Flink from source, the following will be instructive. Note that if you have downloaded and used the binary version this should be skipped. The changing nature of build tools and versions across platforms makes this section somewhat precarious. For example, Java8 and Maven 3.0.3 are recommended for building Flink, which are not recommended for Zeppelin at the time of writing. If the user wishes to attempt to build from source, this section will provide some reference. If errors are encountered, please contact the Apache Flink community.
See the [Flink Installation guide](https://github.com/apache/flink/blob/master/README.md) for more detailed instructions.
Return to the directory where you have been downloading, this tutorial assumes that is `$HOME`. Clone Flink, check out release-1.0, and build.
```
cd $HOME
git clone https://github.com/apache/flink.git
cd flink
git checkout release-1.0
mvn clean install -DskipTests
```
Start the Flink Cluster in stand-alone mode
```
build-target/bin/start-cluster.sh
```
###### Ensure the cluster is up
In a browser, navigate to http://`yourip`:8082 to see the Flink Web-UI. Click on 'Task Managers' in the left navigation bar. Ensure there is at least one Task Manager present.
<center>![alt text](../assets/themes/zeppelin/img/screenshots/flink-webui.png "The Flink Web-UI")</center>
If no task managers are present, restart the Flink cluster with the following commands:
(if binaries)
```
flink-1.0.3/bin/stop-cluster.sh
flink-1.0.3/bin/start-cluster.sh
```
(if built from source)
```
build-target/bin/stop-cluster.sh
build-target/bin/start-cluster.sh
```
##### Spark 1.6 Cluster
###### Download Binaries
Building from source is recommended where possible, for simplicity in this tutorial we will download Flink and Spark Binaries.
Using binaries is also
To download the Spark Binary use `wget`
```bash
wget "http://mirrors.koehn.com/apache/spark/spark-1.6.1/spark-1.6.1-bin-hadoop2.4.tgz"
tar -xzvf spark-1.6.1-bin-hadoop2.4.tgz
mv spark-1.6.1-bin-hadoop4.4 spark
```
This will download Spark 1.6.1, compatible with Hadoop 2.4. You do not have to install Hadoop for this binary to work, but if you are using Hadoop, please change `2.4` to your appropriate version.
###### Building From source
Spark is an extraordinarily large project, which takes considerable time to download and build. It is also prone to build failures for similar reasons listed in the Flink section. If the user wishes to attempt to build from source, this section will provide some reference. If errors are encountered, please contact the Apache Spark community.
See the [Spark Installation](https://github.com/apache/spark/blob/master/README.md) guide for more detailed instructions.
Return to the directory where you have been downloading, this tutorial assumes that is $HOME. Clone Spark, check out branch-1.6, and build.
**Note:** Recall, we're only checking out 1.6 because it is the most recent Spark for which a Zeppelin profile exists at
the time of writing. You are free to check out other version, just make sure you build Zeppelin against the correct version of Spark.
```
cd $HOME
```
Clone, check out, and build Spark version 1.6.x.
```
git clone https://github.com/apache/spark.git
cd spark
git checkout branch-1.6
mvn clean package -DskipTests
```
###### Start the Spark cluster
Return to the `$HOME` directory.
```bash
cd $HOME
```
Start the Spark cluster in stand alone mode, specifying the webui-port as some port other than 8080 (the webui-port of Zeppelin).
```
spark/sbin/start-master.sh --webui-port 8082
```
**Note:** Why `--webui-port 8082`? There is a digression toward the end of this document that explains this.
Open a browser and navigate to http://`yourip`:8082 to ensure the Spark master is running.
<center>![alt text](../assets/themes/zeppelin/img/screenshots/spark-master-webui1.png "It should look like this...")</center>
Toward the top of the page there will be a *URL*: spark://`yourhost`:7077. Note this URL, the Spark Master URI, it will be needed in subsequent steps.
Start the slave using the URI from the Spark master WebUI:
```
spark/sbin/start-slave.sh spark://yourhostname:7077
```
Return to the root directory and start the Zeppelin daemon.
```
cd $HOME
zeppelin/bin/zeppelin-daemon.sh start
```
##### Configure Interpreters
Open a web browser and go to the Zeppelin web-ui at http://yourip:8080.
Now go back to the Zeppelin web-ui at http://`yourip`:8080 and this time click on *anonymous* at the top right, which will open a drop-down menu, select *Interpreters* to enter interpreter configuration.
In the Spark section, click the edit button in the top right corner to make the property values editable (looks like a pencil).
The only field that needs to be edited in the Spark interpreter is the master field. Change this value from `local[*]` to the URL you used to start the slave, mine was `spark://ubuntu:7077`.
Click *Save* to update the parameters, and click *OK* when it asks you about restarting the interpreter.
Now scroll down to the Flink section. Click the edit button and change the value of *host* from `local` to `localhost`. Click *Save* again.
Reopen the examples and execute them again (I.e. you need to click the play button at the top of the screen, or the button on the paragraph .
You should be able check the Flink and Spark webuis (at something like http://`yourip`:8081, http://`yourip`:8082, http://`yourip`:8083) and see that jobs have been run against the clusters.
**Digression** Sorry to be vague and use terms such as 'something like', but exactly what web-ui is at what port is going to depend on what order you started things.
What is really going on here is you are pointing your browser at specific ports, namely 8081, 8082, and 8083. Flink and Spark all want to put their web-ui on port 8080, but are
well behaved and will take the next port available. Since Zeppelin started first, it will get port 8080. When Flink starts (assuming you started Flink first), it will try to bind to
port 8080, see that it is already taken, and go to the next one available, hopefully 8081. Spark has a webui for the master and the slave, so when they start they will try to bind to 8080
already taken by Zeppelin), then 8081 (already taken by Flink's webui), then 8082. If everything goes smoothy and you followed the directions precisely, the webuis should be 8081 and 8082.
It *is* possible to specify the port you want the webui to bind to (at the command line by passing the `--webui-port <port>` flag when you start the Flink and Spark, where `<port>` is the port
you want to see that webui on. You can also set the default webui port of Spark and Flink (and Zeppelin) in the configuration files, but this is a tutorial for novices and slightly out of scope.
### Next Steps
Check out the [tutorial](./tutorial.md) for more cool things you can do with your new toy!
[Join the community](http://zeppelin.apache.org/community.html), ask questions and contribute! Every little bit helps.

View file

@ -1,6 +1,6 @@
---
layout: nil
title : RSS Feed
title :
---
<?xml version="1.0" encoding="UTF-8" ?>

View file

@ -57,5 +57,5 @@ You have to store the password information for users.
## Please note
As a first step of data source authentication feature, [ZEPPELIN-828](https://issues.apache.org/jira/browse/ZEPPELIN-828) was proposed and implemented in Pull Request [#860](https://github.com/apache/zeppelin/pull/860).
Currently, only customized 3rd party interpreters can use this feature. We are planning to apply this mechanism to [the community interpreters](../manual/interpreterinstallation.md#available-community-managed-interpreters) in the near future.
Currently, only customized 3rd party interpreters can use this feature. We are planning to apply this mechanism to [the community managed interpreters](../manual/interpreterinstallation.html#available-community-managed-interpreters) in the near future.
Please keep track [ZEPPELIN-1070](https://issues.apache.org/jira/browse/ZEPPELIN-1070).

View file

@ -1,6 +1,6 @@
---
# Remember to set production_url in your _config.yml file!
title : Sitemap
title :
---
{% for page in site.pages %}
{{site.production_url}}{{ page.url }}{% endfor %}

View file

@ -22,6 +22,7 @@ import java.net.InetAddress;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
@ -35,7 +36,6 @@ import java.util.regex.Pattern;
import org.apache.commons.lang.StringUtils;
import org.apache.zeppelin.interpreter.Interpreter;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterPropertyBuilder;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion;
import org.elasticsearch.action.delete.DeleteResponse;
@ -48,6 +48,8 @@ import org.elasticsearch.client.Client;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.InetSocketTransportAddress;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
@ -437,14 +439,37 @@ public class ElasticsearchInterpreter extends Interpreter {
resMsg = XContentHelper.toString((InternalSingleBucketAggregation) agg).toString();
}
else if (agg instanceof InternalMultiBucketAggregation) {
final StringBuffer buffer = new StringBuffer("key\tdoc_count");
final Set<String> headerKeys = new HashSet<>();
final List<Map<String, Object>> buckets = new LinkedList<>();
final InternalMultiBucketAggregation multiBucketAgg = (InternalMultiBucketAggregation) agg;
for (MultiBucketsAggregation.Bucket bucket : multiBucketAgg.getBuckets()) {
buffer.append("\n")
.append(bucket.getKeyAsString())
.append("\t")
.append(bucket.getDocCount());
try {
final XContentBuilder builder = XContentFactory.jsonBuilder();
bucket.toXContent(builder, null);
final Map<String, Object> bucketMap = JsonFlattener.flattenAsMap(builder.string());
headerKeys.addAll(bucketMap.keySet());
buckets.add(bucketMap);
}
catch (IOException e) {
logger.error("Processing bucket: " + e.getMessage(), e);
}
}
final StringBuffer buffer = new StringBuffer();
final String[] keys = headerKeys.toArray(new String[0]);
for (String key: keys) {
buffer.append("\t" + key);
}
buffer.deleteCharAt(0);
for (Map<String, Object> bucket : buckets) {
buffer.append("\n");
for (String key: keys) {
buffer.append(bucket.get(key)).append("\t");
}
buffer.deleteCharAt(buffer.length() - 1);
}
resType = InterpreterResult.Type.TABLE;

View file

@ -21,7 +21,12 @@ import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.junit.Assert.assertEquals;
import java.io.IOException;
import java.util.*;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.Properties;
import java.util.UUID;
import org.apache.commons.lang.math.RandomUtils;
import org.apache.zeppelin.interpreter.InterpreterResult;
@ -178,6 +183,11 @@ public class ElasticsearchInterpreterTest {
res = interpreter.interpret("search /logs { \"aggs\" : { \"status_count\" : " +
" { \"terms\" : { \"field\" : \"status\" } } } }", null);
assertEquals(Code.SUCCESS, res.code());
res = interpreter.interpret("search /logs { \"aggs\" : { " +
" \"length\" : { \"terms\": { \"field\": \"status\" }, " +
" \"aggs\" : { \"sum_length\" : { \"sum\" : { \"field\" : \"content_length\" } }, \"sum_status\" : { \"sum\" : { \"field\" : \"status\" } } } } } }", null);
assertEquals(Code.SUCCESS, res.code());
}
@Test

View file

@ -104,6 +104,12 @@
<version>1.0.8</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-dbcp2</artifactId>
<version>2.0.1</version>
</dependency>
</dependencies>
<build>

View file

@ -19,15 +19,16 @@ import java.io.*;
import java.nio.charset.StandardCharsets;
import java.io.IOException;
import java.security.PrivilegedExceptionAction;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.sql.Statement;
import java.sql.*;
import java.util.*;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.dbcp2.ConnectionFactory;
import org.apache.commons.dbcp2.DriverManagerConnectionFactory;
import org.apache.commons.dbcp2.PoolableConnectionFactory;
import org.apache.commons.dbcp2.PoolingDriver;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.pool2.ObjectPool;
import org.apache.commons.pool2.impl.GenericObjectPool;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.zeppelin.interpreter.Interpreter;
import org.apache.zeppelin.interpreter.InterpreterContext;
@ -48,7 +49,7 @@ import com.google.common.collect.Sets.SetView;
/**
* JDBC interpreter for Zeppelin. This interpreter can also be used for accessing HAWQ,
* GreenplumDB, MariaDB, MySQL, Postgres and Redshit.
* GreenplumDB, MariaDB, MySQL, Postgres and Redshift.
*
* <ul>
* <li>{@code default.url} - JDBC URL to connect to.</li>
@ -99,14 +100,15 @@ public class JDBCInterpreter extends Interpreter {
static final String EMPTY_COLUMN_VALUE = "";
private final String CONCURRENT_EXECUTION_KEY = "zeppelin.jdbc.concurrent.use";
private final String CONCURRENT_EXECUTION_COUNT = "zeppelin.jdbc.concurrent.max_connection";
private final String DBCP_STRING = "jdbc:apache:commons:dbcp:";
private final HashMap<String, Properties> propertiesMap;
private final Map<String, Statement> paragraphIdStatementMap;
private final Map<String, ArrayList<Connection>> propertyKeyUnusedConnectionListMap;
private final Map<String, Connection> paragraphIdConnectionMap;
private final Map<String, PoolingDriver> poolingDriverMap;
private final Map<String, SqlCompleter> propertyKeySqlCompleterMap;
@ -122,9 +124,8 @@ public class JDBCInterpreter extends Interpreter {
public JDBCInterpreter(Properties property) {
super(property);
propertiesMap = new HashMap<>();
propertyKeyUnusedConnectionListMap = new HashMap<>();
paragraphIdStatementMap = new HashMap<>();
paragraphIdConnectionMap = new HashMap<>();
poolingDriverMap = new HashMap<>();
propertyKeySqlCompleterMap = new HashMap<>();
}
@ -193,22 +194,41 @@ public class JDBCInterpreter extends Interpreter {
return completer;
}
private boolean isConnectionInPool(String driverName) {
if (poolingDriverMap.containsKey(driverName)) return true;
return false;
}
private void createConnectionPool(String url, String propertyKey, Properties properties) {
ConnectionFactory connectionFactory =
new DriverManagerConnectionFactory(url, properties);
PoolableConnectionFactory poolableConnectionFactory = new PoolableConnectionFactory(
connectionFactory, null);
ObjectPool connectionPool = new GenericObjectPool(poolableConnectionFactory);
poolableConnectionFactory.setPool(connectionPool);
PoolingDriver driver = new PoolingDriver();
driver.registerPool(propertyKey, connectionPool);
poolingDriverMap.put(propertyKey, driver);
}
private Connection getConnectionFromPool(String url, String propertyKey, Properties properties)
throws SQLException {
if (!isConnectionInPool(propertyKey)) {
createConnectionPool(url, propertyKey, properties);
}
return DriverManager.getConnection(DBCP_STRING + propertyKey);
}
public Connection getConnection(String propertyKey, String user)
throws ClassNotFoundException, SQLException, InterpreterException {
Connection connection = null;
if (propertyKey == null || propertiesMap.get(propertyKey) == null) {
return null;
}
if (propertyKeyUnusedConnectionListMap.containsKey(propertyKey)) {
ArrayList<Connection> connectionList = propertyKeyUnusedConnectionListMap.get(propertyKey);
if (0 != connectionList.size()) {
connection = propertyKeyUnusedConnectionListMap.get(propertyKey).remove(0);
if (null != connection && connection.isClosed()) {
connection.close();
connection = null;
}
}
}
if (null == connection) {
final Properties properties = (Properties) propertiesMap.get(propertyKey).clone();
logger.info(properties.getProperty(DRIVER_KEY));
@ -222,16 +242,16 @@ public class JDBCInterpreter extends Interpreter {
switch (authType) {
case KERBEROS:
if (user == null) {
connection = DriverManager.getConnection(url, properties);
connection = getConnectionFromPool(url, propertyKey, properties);
} else {
if ("hive".equalsIgnoreCase(propertyKey)) {
connection = DriverManager.getConnection(url + ";hive.server2.proxy.user=" + user,
properties);
connection = getConnectionFromPool(url + ";hive.server2.proxy.user=" + user,
propertyKey, properties);
} else {
UserGroupInformation ugi = null;
try {
ugi = UserGroupInformation.createProxyUser(user,
UserGroupInformation.getCurrentUser());
UserGroupInformation.getCurrentUser());
} catch (Exception e) {
logger.error("Error in createProxyUser", e);
StringBuilder stringBuilder = new StringBuilder();
@ -239,11 +259,13 @@ public class JDBCInterpreter extends Interpreter {
stringBuilder.append(e.getCause());
throw new InterpreterException(stringBuilder.toString());
}
final String poolKey = propertyKey;
try {
connection = ugi.doAs(new PrivilegedExceptionAction<Connection>() {
@Override
public Connection run() throws Exception {
return DriverManager.getConnection(url, properties);
return getConnectionFromPool(url, poolKey, properties);
}
});
} catch (Exception e) {
@ -258,7 +280,7 @@ public class JDBCInterpreter extends Interpreter {
break;
default:
connection = DriverManager.getConnection(url, properties);
connection = getConnectionFromPool(url, propertyKey, properties);
}
}
}
@ -266,75 +288,41 @@ public class JDBCInterpreter extends Interpreter {
return connection;
}
public Statement getStatement(String propertyKey, String paragraphId,
InterpreterContext interpreterContext)
throws SQLException, ClassNotFoundException, InterpreterException {
Connection connection;
if (paragraphIdConnectionMap.containsKey(paragraphId +
interpreterContext.getAuthenticationInfo().getUser())) {
connection = paragraphIdConnectionMap.get(paragraphId +
interpreterContext.getAuthenticationInfo().getUser());
} else {
connection = getConnection(propertyKey, interpreterContext.getAuthenticationInfo().getUser());
private void initStatementMap() {
for (Statement statement : paragraphIdStatementMap.values()) {
try {
statement.close();
} catch (Exception e) {
logger.error("Error while closing paragraphIdStatementMap statement...", e);
}
}
if (connection == null) {
return null;
}
Statement statement = connection.createStatement();
if (isStatementClosed(statement)) {
connection = getConnection(propertyKey, interpreterContext.getAuthenticationInfo().getUser());
statement = connection.createStatement();
}
paragraphIdConnectionMap.put(paragraphId + interpreterContext.getAuthenticationInfo().getUser(),
connection);
paragraphIdStatementMap.put(paragraphId + interpreterContext.getAuthenticationInfo().getUser(),
statement);
return statement;
paragraphIdStatementMap.clear();
}
private boolean isStatementClosed(Statement statement) {
try {
return statement.isClosed();
} catch (Throwable t) {
logger.debug("{} doesn't support isClosed method", statement);
return false;
private void initConnectionPoolMap() throws SQLException {
Iterator<String> it = poolingDriverMap.keySet().iterator();
while (it.hasNext()) {
String driverName = it.next();
poolingDriverMap.get(driverName).closePool(driverName);
it.remove();
}
poolingDriverMap.clear();
}
private void saveStatement(String key, Statement statement) throws SQLException {
paragraphIdStatementMap.put(key, statement);
statement.setMaxRows(getMaxResult());
}
private void removeStatement(String key) {
paragraphIdStatementMap.remove(key);
}
@Override
public void close() {
try {
for (List<Connection> connectionList : propertyKeyUnusedConnectionListMap.values()) {
for (Connection c : connectionList) {
try {
c.close();
} catch (Exception e) {
logger.error("Error while closing propertyKeyUnusedConnectionListMap connection...", e);
}
}
}
for (Statement statement : paragraphIdStatementMap.values()) {
try {
statement.close();
} catch (Exception e) {
logger.error("Error while closing paragraphIdStatementMap statement...", e);
}
}
paragraphIdStatementMap.clear();
for (Connection connection : paragraphIdConnectionMap.values()) {
try {
connection.close();
} catch (Exception e) {
logger.error("Error while closing paragraphIdConnectionMap connection...", e);
}
}
paragraphIdConnectionMap.clear();
initStatementMap();
initConnectionPoolMap();
} catch (Exception e) {
logger.error("Error while closing...", e);
}
@ -342,17 +330,21 @@ public class JDBCInterpreter extends Interpreter {
private InterpreterResult executeSql(String propertyKey, String sql,
InterpreterContext interpreterContext) {
String paragraphId = interpreterContext.getParagraphId();
Connection connection;
Statement statement;
ResultSet resultSet = null;
try {
connection = getConnection(propertyKey, interpreterContext.getAuthenticationInfo().getUser());
if (connection == null) {
return new InterpreterResult(Code.ERROR, "Prefix not found.");
}
Statement statement = getStatement(propertyKey, paragraphId, interpreterContext);
statement = connection.createStatement();
if (statement == null) {
return new InterpreterResult(Code.ERROR, "Prefix not found.");
}
statement.setMaxRows(getMaxResult());
StringBuilder msg = null;
boolean isTableType = false;
@ -364,8 +356,9 @@ public class JDBCInterpreter extends Interpreter {
isTableType = true;
}
ResultSet resultSet = null;
try {
saveStatement(paragraphId +
interpreterContext.getAuthenticationInfo().getUser(), statement);
boolean isResultSetAvailable = statement.execute(sql);
@ -408,16 +401,24 @@ public class JDBCInterpreter extends Interpreter {
msg.append(updateCount).append(NEWLINE);
}
} finally {
try {
if (resultSet != null) {
if (resultSet != null) {
try {
resultSet.close();
}
statement.close();
} finally {
statement = null;
} catch (SQLException e) { /*ignored*/ }
}
if (statement != null) {
try {
statement.close();
} catch (SQLException e) { /*ignored*/ }
}
if (connection != null) {
try {
connection.close();
} catch (SQLException e) { /*ignored*/ }
}
removeStatement(paragraphId +
interpreterContext.getAuthenticationInfo().getUser());
}
return new InterpreterResult(Code.SUCCESS, msg.toString());
} catch (Exception e) {
@ -452,7 +453,6 @@ public class JDBCInterpreter extends Interpreter {
cmd = cmd.trim();
logger.info("PropertyKey: {}, SQL command: '{}'", propertyKey, cmd);
return executeSql(propertyKey, cmd, contextInterpreter);
}

View file

@ -63,96 +63,6 @@
"propertyName": "zeppelin.jdbc.principal",
"defaultValue": "",
"description": "Kerberos principal"
},
"hive.url": {
"envName": null,
"propertyName": "hive.url",
"defaultValue": "jdbc:hive2://localhost:10000",
"description": ""
},
"hive.user": {
"envName": null,
"propertyName": "hive.user",
"defaultValue": "hive",
"description": ""
},
"hive.password": {
"envName": null,
"propertyName": "hive.password",
"defaultValue": "",
"description": ""
},
"hive.driver": {
"envName": null,
"propertyName": "hive.driver",
"defaultValue": "org.apache.hive.jdbc.HiveDriver",
"description": ""
},
"phoenix.url": {
"envName": null,
"propertyName": "phoenix.url",
"defaultValue": "jdbc:phoenix:localhost:2181:/hbase-unsecure",
"description": ""
},
"phoenix.user": {
"envName": null,
"propertyName": "phoenix.user",
"defaultValue": "phoenixuser",
"description": ""
},
"phoenix.password": {
"envName": null,
"propertyName": "phoenix.password",
"defaultValue": "",
"description": ""
},
"phoenix.driver": {
"envName": null,
"propertyName": "phoenix.driver",
"defaultValue": "org.apache.phoenix.jdbc.PhoenixDriver",
"description": ""
},
"phoenix.hbase.client.retries.number": {
"envName": null,
"propertyName": "phoenix.hbase.client.retries.number",
"defaultValue": "1",
"description": "Maximum retries. Used as maximum for all retryable operations such as the getting of a cell's value, starting a row update, etc."
},
"tajo.url": {
"envName": null,
"propertyName": "tajo.url",
"defaultValue": "jdbc:tajo://localhost:26002/default",
"description": ""
},
"tajo.driver": {
"envName": null,
"propertyName": "tajo.driver",
"defaultValue": "org.apache.tajo.jdbc.TajoDriver",
"description": ""
},
"psql.url": {
"envName": null,
"propertyName": "psql.url",
"defaultValue": "jdbc:postgresql://localhost:5432/",
"description": ""
},
"psql.user": {
"envName": null,
"propertyName": "psql.user",
"defaultValue": "phoenixuser",
"description": ""
},
"psql.password": {
"envName": null,
"propertyName": "psql.password",
"defaultValue": "",
"description": ""
},
"psql.driver": {
"envName": null,
"propertyName": "psql.driver",
"defaultValue": "org.postgresql.Driver",
"description": ""
}
},
"editor": {

View file

@ -56,24 +56,6 @@ public class KylinInterpreter extends Interpreter {
static final Pattern KYLIN_TABLE_FORMAT_REGEX_LABEL = Pattern.compile("\"label\":\"(.*?)\"");
static final Pattern KYLIN_TABLE_FORMAT_REGEX = Pattern.compile("\"results\":\\[\\[\"(.*?)\"]]");
static {
Interpreter.register(
"kylin",
"kylin",
KylinInterpreter.class.getName(),
new InterpreterPropertyBuilder()
.add(KYLIN_USERNAME, "ADMIN", "username for kylin user")
.add(KYLIN_PASSWORD, "KYLIN", "password for kylin user")
.add(KYLIN_QUERY_API_URL, "http://<host>:<port>/kylin/api/query", "Kylin API.")
.add(KYLIN_QUERY_PROJECT, "default", "kylin project name")
.add(KYLIN_QUERY_OFFSET, "0", "kylin query offset")
.add(KYLIN_QUERY_LIMIT, "5000", "kylin query limit")
.add(KYLIN_QUERY_ACCEPT_PARTIAL, "true", "The kylin query partial flag").build());
}
public KylinInterpreter(Properties property) {
super(property);
}

View file

@ -0,0 +1,54 @@
[
{
"group": "kylin",
"name": "kylin",
"className": "org.apache.zeppelin.kylin.KylinInterpreter",
"properties": {
"kylin.api.url": {
"envName": null,
"propertyName": "kylin.api.url",
"defaultValue": "http://localhost:7070/kylin/api/query",
"description": "Kylin API"
},
"kylin.api.user": {
"envName": null,
"propertyName": "kylin.api.user",
"defaultValue": "ADMIN",
"description": "username for kylin user"
},
"kylin.api.password": {
"envName": null,
"propertyName": "kylin.api.password",
"defaultValue": "KYLIN",
"description": "password for kylin user"
},
"kylin.query.project": {
"envName": null,
"propertyName": "kylin.query.project",
"defaultValue": "default",
"description": "kylin project name"
},
"kylin.query.offset": {
"envName": null,
"propertyName": "kylin.query.offset",
"defaultValue": "0",
"description": "kylin query offset"
},
"kylin.query.limit": {
"envName": null,
"propertyName": "kylin.query.limit",
"defaultValue": "5000",
"description": "kylin query limit"
},
"kylin.query.ispartial": {
"envName": null,
"propertyName": "kylin.query.ispartial",
"defaultValue": "true",
"description": "The kylin query partial flag"
}
},
"editor": {
"language": "sql"
}
}
]

View file

@ -20,8 +20,7 @@ import org.apache.http.client.methods.HttpPost;
import org.apache.http.message.AbstractHttpMessage;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.kylin.KylinInterpreter;
import org.junit.After;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import java.io.ByteArrayInputStream;
@ -33,19 +32,23 @@ import java.util.Properties;
import static org.junit.Assert.assertEquals;
public class KylinInterpreterTest {
@Before
public void setUp() throws Exception {
}
static final Properties kylinProperties = new Properties();
@After
public void tearDown() throws Exception {
@BeforeClass
public static void setUpClass() {
kylinProperties.put("kylin.api.url", "http://localhost:7070/kylin/api/query");
kylinProperties.put("kylin.api.user", "ADMIN");
kylinProperties.put("kylin.api.password", "KYLIN");
kylinProperties.put("kylin.query.project", "default");
kylinProperties.put("kylin.query.offset", "0");
kylinProperties.put("kylin.query.limit", "5000");
kylinProperties.put("kylin.query.ispartial", "true");
}
@Test
public void test(){
KylinInterpreter t = new MockKylinInterpreter(new Properties());
KylinInterpreter t = new MockKylinInterpreter(kylinProperties);
InterpreterResult result = t.interpret(
"select a.date,sum(b.measure) as measure from kylin_fact_table a " +
"inner join kylin_lookup_table b on a.date=b.date group by a.date", null);
@ -198,4 +201,4 @@ class MockEntity implements HttpEntity{
public void consumeContent() throws IOException {
}
}
}

View file

@ -72,6 +72,16 @@
<groupId>org.apache.lens</groupId>
<artifactId>lens-client</artifactId>
<version>${lens.version}</version>
<exclusions>
<exclusion>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
</exclusion>
<exclusion>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-core</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>

View file

@ -20,6 +20,7 @@ package org.apache.zeppelin.livy;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import com.google.gson.reflect.TypeToken;
import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.zeppelin.interpreter.InterpreterContext;
@ -130,30 +131,26 @@ public class LivyHelper {
}
}
protected void initializeSpark(final InterpreterContext context,
final Map<String, Integer> userSessionMap) throws Exception {
interpret("val sqlContext = new org.apache.spark.sql.SQLContext(sc)\n" +
"import sqlContext.implicits._", context, userSessionMap);
}
public InterpreterResult interpretInput(String stringLines,
final InterpreterContext context,
final Map<String, Integer> userSessionMap,
LivyOutputStream out) {
LivyOutputStream out,
String appId,
String webUI,
boolean displayAppInfo) {
try {
out.setInterpreterOutput(context.out);
context.out.clear();
String incomplete = "";
boolean inComment = false;
String[] lines = stringLines.split("\n");
String[] linesToRun = new String[lines.length + 1];
for (int i = 0; i < lines.length; i++) {
linesToRun[i] = lines[i];
}
linesToRun[lines.length] = "print(\"\")";
out.setInterpreterOutput(context.out);
context.out.clear();
Code r = null;
String incomplete = "";
boolean inComment = false;
StringBuilder outputBuilder = new StringBuilder();
for (int l = 0; l < linesToRun.length; l++) {
String s = linesToRun[l];
// check if next line starts with "." (but not ".." or "./") it is treated as an invocation
@ -202,7 +199,7 @@ public class LivyHelper {
} else if (r == Code.INCOMPLETE) {
incomplete += s + "\n";
} else {
out.write((res.message() + "\n").getBytes(Charset.forName("UTF-8")));
outputBuilder.append(res.message() + "\n");
incomplete = "";
}
}
@ -211,10 +208,20 @@ public class LivyHelper {
out.setInterpreterOutput(null);
return new InterpreterResult(r, "Incomplete expression");
} else {
if (displayAppInfo) {
out.write("%angular ");
out.write("<pre><code>");
out.write(outputBuilder.toString());
out.write("</code></pre>");
out.write("<hr/>");
out.write("Spark Application Id:" + appId + "<br/>");
out.write("Spark WebUI: <a href=" + webUI + ">" + webUI + "</a>");
} else {
out.write(outputBuilder.toString());
}
out.setInterpreterOutput(null);
return new InterpreterResult(Code.SUCCESS);
}
} catch (Exception e) {
LOGGER.error("error in interpretInput", e);
return new InterpreterResult(Code.ERROR, e.getMessage());
@ -225,16 +232,6 @@ public class LivyHelper {
final InterpreterContext context,
final Map<String, Integer> userSessionMap)
throws Exception {
stringLines = stringLines
//for "\n" present in string
.replaceAll("\\\\n", "\\\\\\\\n")
//for new line present in string
.replaceAll("\\n", "\\\\n")
// for \" present in string
.replaceAll("\\\\\"", "\\\\\\\\\"")
// for " present in string
.replaceAll("\"", "\\\\\"");
if (stringLines.trim().equals("")) {
return new InterpreterResult(Code.SUCCESS, "");
}
@ -301,7 +298,7 @@ public class LivyHelper {
+ userSessionMap.get(context.getAuthenticationInfo().getUser())
+ "/statements",
"POST",
"{\"code\": \"" + lines + "\" }",
"{\"code\": \"" + StringEscapeUtils.escapeJson(lines) + "\"}",
context.getParagraphId());
if (json.matches("^(\")?Session (\'[0-9]\' )?not found(.?\"?)$")) {
throw new Exception("Exception: Session not found, Livy server would have restarted, " +
@ -346,6 +343,7 @@ public class LivyHelper {
protected String executeHTTP(String targetURL, String method, String jsonData, String paragraphId)
throws Exception {
LOGGER.debug("Call rest api in {}, method: {}, jsonData: {}", targetURL, method, jsonData);
RestTemplate restTemplate = getRestTemplate();
HttpHeaders headers = new HttpHeaders();
headers.add("Content-Type", "application/json");

View file

@ -17,6 +17,8 @@
package org.apache.zeppelin.livy;
import org.apache.zeppelin.interpreter.InterpreterOutput;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.io.OutputStream;
@ -25,6 +27,8 @@ import java.io.OutputStream;
* InterpreterOutput can be attached / detached.
*/
public class LivyOutputStream extends OutputStream {
private static Logger LOGGER = LoggerFactory.getLogger(LivyOutputStream.class);
InterpreterOutput interpreterOutput;
public LivyOutputStream() {
@ -52,6 +56,11 @@ public class LivyOutputStream extends OutputStream {
}
}
public void write(String text) throws IOException {
LOGGER.debug("livy output:" + text);
write(text.getBytes("UTF-8"));
}
@Override
public void write(byte[] b, int offset, int len) throws IOException {
if (interpreterOutput != null) {

View file

@ -38,13 +38,20 @@ public class LivySparkInterpreter extends Interpreter {
private LivyOutputStream out;
protected static Map<String, Integer> userSessionMap;
protected static Map<Integer, String> sessionId2AppIdMap;
protected static Map<Integer, String> sessionId2WebUIMap;
private LivyHelper livyHelper;
private boolean displayAppInfo;
public LivySparkInterpreter(Properties property) {
super(property);
userSessionMap = new HashMap<>();
sessionId2AppIdMap = new HashMap<>();
sessionId2WebUIMap = new HashMap<>();
livyHelper = new LivyHelper(property);
out = new LivyOutputStream();
this.displayAppInfo = Boolean.parseBoolean(getProperty("zeppelin.livy.displayAppInfo"));
}
protected static Map<String, Integer> getUserSessionMap() {
@ -67,25 +74,42 @@ public class LivySparkInterpreter extends Interpreter {
@Override
public InterpreterResult interpret(String line, InterpreterContext interpreterContext) {
try {
Integer sessionId = null;
if (userSessionMap.get(interpreterContext.getAuthenticationInfo().getUser()) == null) {
try {
userSessionMap.put(
interpreterContext.getAuthenticationInfo().getUser(),
livyHelper.createSession(
interpreterContext,
"spark")
);
livyHelper.initializeSpark(interpreterContext, userSessionMap);
sessionId = livyHelper.createSession(interpreterContext, "spark");
userSessionMap.put(interpreterContext.getAuthenticationInfo().getUser(), sessionId);
if (displayAppInfo) {
String appId = extractStatementResult(
livyHelper.interpret("sc.applicationId", interpreterContext, userSessionMap)
.message());
livyHelper.interpret(
"val webui=sc.getClass.getMethod(\"ui\").invoke(sc).asInstanceOf[Some[_]].get",
interpreterContext, userSessionMap);
String webUI = extractStatementResult(
livyHelper.interpret(
"webui.getClass.getMethod(\"appUIAddress\").invoke(webui)",
interpreterContext, userSessionMap).message());
sessionId2AppIdMap.put(sessionId, appId);
sessionId2WebUIMap.put(sessionId, webUI);
LOGGER.info("Create livy session with sessionId: {}, appId: {}, webUI: {}",
sessionId, appId, webUI);
} else {
LOGGER.info("Create livy session with sessionId: {}", sessionId);
}
} catch (Exception e) {
LOGGER.error("Exception in LivySparkInterpreter while interpret ", e);
return new InterpreterResult(InterpreterResult.Code.ERROR, e.getMessage());
}
} else {
sessionId = userSessionMap.get(interpreterContext.getAuthenticationInfo().getUser());
}
if (line == null || line.trim().length() == 0) {
return new InterpreterResult(InterpreterResult.Code.SUCCESS, "");
}
return livyHelper.interpretInput(line, interpreterContext, userSessionMap, out);
return livyHelper.interpretInput(line, interpreterContext, userSessionMap, out,
sessionId2AppIdMap.get(sessionId), sessionId2WebUIMap.get(sessionId), displayAppInfo);
} catch (Exception e) {
LOGGER.error("Exception in LivySparkInterpreter while interpret ", e);
return new InterpreterResult(InterpreterResult.Code.ERROR,
@ -93,6 +117,23 @@ public class LivySparkInterpreter extends Interpreter {
}
}
/**
* Extract the eval result of spark shell, e.g. extract application_1473129941656_0048
* from following:
* res0: String = application_1473129941656_0048
* @param result
* @return
*/
private static String extractStatementResult(String result) {
int pos = -1;
if ((pos = result.indexOf("=")) >= 0) {
return result.substring(pos + 1).trim();
} else {
throw new RuntimeException("No result can be extracted from '" + result + "', " +
"something must be wrong");
}
}
@Override
public void cancel(InterpreterContext context) {
livyHelper.cancelHTTP(context.getParagraphId());

View file

@ -65,7 +65,6 @@ public class LivySparkSQLInterpreter extends Interpreter {
interpreterContext,
"spark")
);
livyHelper.initializeSpark(interpreterContext, userSessionMap);
} catch (Exception e) {
LOGGER.error("Exception in LivySparkSQLInterpreter while interpret ", e);
return new InterpreterResult(InterpreterResult.Code.ERROR, e.getMessage());

View file

@ -86,6 +86,11 @@
"propertyName": "livy.spark.jars.packages",
"defaultValue": "",
"description": "Adding extra libraries to livy interpreter"
},
"livy.spark.displayAppInfo": {
"propertyName": "zeppelin.livy.displayAppInfo",
"defaultValue": "false",
"description": "Whether display app info"
}
},
"editor": {

View file

@ -34,8 +34,8 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/** MarkdownInterpreter interpreter for Zeppelin. */
public class MarkdownInterpreter extends Interpreter {
private static final Logger LOGGER = LoggerFactory.getLogger(MarkdownInterpreter.class);
public class Markdown extends Interpreter {
private static final Logger LOGGER = LoggerFactory.getLogger(Markdown.class);
private MarkdownParser parser;
@ -60,7 +60,7 @@ public class MarkdownInterpreter extends Interpreter {
public static final String PARSER_TYPE_PEGDOWN = "pegdown";
public static final String PARSER_TYPE_MARKDOWN4J = "markdown4j";
public MarkdownInterpreter(Properties property) {
public Markdown(Properties property) {
super(property);
}
@ -114,7 +114,7 @@ public class MarkdownInterpreter extends Interpreter {
@Override
public Scheduler getScheduler() {
return SchedulerFactory.singleton()
.createOrGetParallelScheduler(MarkdownInterpreter.class.getName() + this.hashCode(), 5);
.createOrGetParallelScheduler(Markdown.class.getName() + this.hashCode(), 5);
}
@Override

View file

@ -2,7 +2,7 @@
{
"group": "md",
"name": "md",
"className": "org.apache.zeppelin.markdown.MarkdownInterpreter",
"className": "org.apache.zeppelin.markdown.Markdown",
"properties": {
"markdown.parser.type": {
"envName": "MARKDOWN_PARSER_TYPE",

View file

@ -28,13 +28,13 @@ import static org.junit.Assert.assertEquals;
public class Markdown4jParserTest {
MarkdownInterpreter md;
Markdown md;
@Before
public void setUp() throws Exception {
Properties props = new Properties();
props.put(MarkdownInterpreter.MARKDOWN_PARSER_TYPE, MarkdownInterpreter.PARSER_TYPE_MARKDOWN4J);
md = new MarkdownInterpreter(props);
props.put(Markdown.MARKDOWN_PARSER_TYPE, Markdown.PARSER_TYPE_MARKDOWN4J);
md = new Markdown(props);
md.open();
}

View file

@ -29,13 +29,13 @@ import org.junit.Test;
public class PegdownParserTest {
MarkdownInterpreter md;
Markdown md;
@Before
public void setUp() throws Exception {
Properties props = new Properties();
props.put(MarkdownInterpreter.MARKDOWN_PARSER_TYPE, MarkdownInterpreter.PARSER_TYPE_PEGDOWN);
md = new MarkdownInterpreter(props);
props.put(Markdown.MARKDOWN_PARSER_TYPE, Markdown.PARSER_TYPE_PEGDOWN);
md = new Markdown(props);
md.open();
}

184
pig/pom.xml Normal file
View file

@ -0,0 +1,184 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
~ Licensed to the Apache Software Foundation (ASF) under one or more
~ contributor license agreements. See the NOTICE file distributed with
~ this work for additional information regarding copyright ownership.
~ The ASF licenses this file to You under the Apache License, Version 2.0
~ (the "License"); you may not use this file except in compliance with
~ the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<artifactId>zeppelin</artifactId>
<groupId>org.apache.zeppelin</groupId>
<version>0.7.0-SNAPSHOT</version>
</parent>
<groupId>org.apache.zeppelin</groupId>
<artifactId>zeppelin-pig</artifactId>
<packaging>jar</packaging>
<version>0.7.0-SNAPSHOT</version>
<name>Zeppelin: Apache Pig Interpreter</name>
<description>Zeppelin interpreter for Apache Pig</description>
<url>http://zeppelin.apache.org</url>
<properties>
<pig.version>0.16.0</pig.version>
<hadoop.version>2.6.0</hadoop.version>
<tez.version>0.7.0</tez.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.zeppelin</groupId>
<artifactId>zeppelin-interpreter</artifactId>
<version>${project.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.pig</groupId>
<artifactId>pig</artifactId>
<classifier>h2</classifier>
<version>${pig.version}</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.tez</groupId>
<artifactId>tez-api</artifactId>
<version>${tez.version}</version>
</dependency>
<dependency>
<groupId>org.apache.tez</groupId>
<artifactId>tez-common</artifactId>
<version>${tez.version}</version>
</dependency>
<dependency>
<groupId>org.apache.tez</groupId>
<artifactId>tez-dag</artifactId>
<version>${tez.version}</version>
</dependency>
<dependency>
<groupId>org.apache.tez</groupId>
<artifactId>tez-runtime-library</artifactId>
<version>${tez.version}</version>
</dependency>
<dependency>
<groupId>org.apache.tez</groupId>
<artifactId>tez-runtime-internals</artifactId>
<version>${tez.version}</version>
</dependency>
<dependency>
<groupId>org.apache.tez</groupId>
<artifactId>tez-mapreduce</artifactId>
<version>${tez.version}</version>
</dependency>
<dependency>
<groupId>org.apache.tez</groupId>
<artifactId>tez-yarn-timeline-history-with-acls</artifactId>
<version>${tez.version}</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<artifactId>maven-enforcer-plugin</artifactId>
<version>1.3.1</version>
<executions>
<execution>
<id>enforce</id>
<phase>none</phase>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-dependency-plugin</artifactId>
<version>2.8</version>
<executions>
<execution>
<id>copy-dependencies</id>
<phase>package</phase>
<goals>
<goal>copy-dependencies</goal>
</goals>
<configuration>
<outputDirectory>${project.build.directory}/../../interpreter/pig
</outputDirectory>
<overWriteReleases>false</overWriteReleases>
<overWriteSnapshots>false</overWriteSnapshots>
<overWriteIfNewer>true</overWriteIfNewer>
<includeScope>runtime</includeScope>
</configuration>
</execution>
<execution>
<id>copy-artifact</id>
<phase>package</phase>
<goals>
<goal>copy</goal>
</goals>
<configuration>
<outputDirectory>${project.build.directory}/../../interpreter/pig
</outputDirectory>
<overWriteReleases>false</overWriteReleases>
<overWriteSnapshots>false</overWriteSnapshots>
<overWriteIfNewer>true</overWriteIfNewer>
<includeScope>runtime</includeScope>
<artifactItems>
<artifactItem>
<groupId>${project.groupId}</groupId>
<artifactId>${project.artifactId}</artifactId>
<version>${project.version}</version>
<type>${project.packaging}</type>
</artifactItem>
</artifactItems>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View file

@ -0,0 +1,100 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.pig;
import org.apache.hadoop.conf.Configuration;
import org.apache.pig.PigServer;
import org.apache.pig.backend.BackendException;
import org.apache.pig.backend.hadoop.executionengine.HExecutionEngine;
import org.apache.pig.backend.hadoop.executionengine.Launcher;
import org.apache.zeppelin.interpreter.Interpreter;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.scheduler.Scheduler;
import org.apache.zeppelin.scheduler.SchedulerFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.lang.reflect.Field;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
/**
*
*/
public abstract class BasePigInterpreter extends Interpreter {
private static Logger LOGGER = LoggerFactory.getLogger(BasePigInterpreter.class);
protected ConcurrentHashMap<String, PigScriptListener> listenerMap = new ConcurrentHashMap<>();
public BasePigInterpreter(Properties property) {
super(property);
}
@Override
public void cancel(InterpreterContext context) {
LOGGER.info("Cancel paragraph:" + context.getParagraphId());
PigScriptListener listener = listenerMap.get(context.getParagraphId());
if (listener != null) {
Set<String> jobIds = listener.getJobIds();
if (jobIds.isEmpty()) {
LOGGER.info("No job is started, so can not cancel paragraph:" + context.getParagraphId());
}
for (String jobId : jobIds) {
LOGGER.info("Kill jobId:" + jobId);
HExecutionEngine engine =
(HExecutionEngine) getPigServer().getPigContext().getExecutionEngine();
try {
Field launcherField = HExecutionEngine.class.getDeclaredField("launcher");
launcherField.setAccessible(true);
Launcher launcher = (Launcher) launcherField.get(engine);
// It doesn't work for Tez Engine due to PIG-5035
launcher.killJob(jobId, new Configuration());
} catch (NoSuchFieldException | BackendException | IllegalAccessException e) {
LOGGER.error("Fail to cancel paragraph:" + context.getParagraphId(), e);
}
}
} else {
LOGGER.warn("No PigScriptListener found, can not cancel paragraph:"
+ context.getParagraphId());
}
}
@Override
public FormType getFormType() {
return FormType.SIMPLE;
}
@Override
public int getProgress(InterpreterContext context) {
PigScriptListener listener = listenerMap.get(context.getParagraphId());
if (listener != null) {
return listener.getProgress();
}
return 0;
}
@Override
public Scheduler getScheduler() {
return SchedulerFactory.singleton().createOrGetFIFOScheduler(
PigInterpreter.class.getName() + this.hashCode());
}
public abstract PigServer getPigServer();
}

View file

@ -0,0 +1,137 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.pig;
import org.apache.commons.io.output.ByteArrayOutputStream;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.apache.pig.PigServer;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.tools.pigstats.*;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.InterpreterResult.Code;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import java.util.*;
/**
* Pig interpreter for Zeppelin.
*/
public class PigInterpreter extends BasePigInterpreter {
private static Logger LOGGER = LoggerFactory.getLogger(PigInterpreter.class);
private PigServer pigServer;
private boolean includeJobStats = false;
public PigInterpreter(Properties property) {
super(property);
}
@Override
public void open() {
String execType = getProperty("zeppelin.pig.execType");
if (execType == null) {
execType = "mapreduce";
}
String includeJobStats = getProperty("zeppelin.pig.includeJobStats");
if (includeJobStats != null) {
this.includeJobStats = Boolean.parseBoolean(includeJobStats);
}
try {
pigServer = new PigServer(execType);
} catch (IOException e) {
LOGGER.error("Fail to initialize PigServer", e);
throw new RuntimeException("Fail to initialize PigServer", e);
}
}
@Override
public void close() {
pigServer = null;
}
@Override
public InterpreterResult interpret(String cmd, InterpreterContext contextInterpreter) {
// remember the origial stdout, because we will redirect stdout to capture
// the pig dump output.
PrintStream originalStdOut = System.out;
ByteArrayOutputStream bytesOutput = new ByteArrayOutputStream();
File tmpFile = null;
try {
tmpFile = PigUtils.createTempPigScript(cmd);
System.setOut(new PrintStream(bytesOutput));
// each thread should its own ScriptState & PigStats
ScriptState.start(pigServer.getPigContext().getExecutionEngine().instantiateScriptState());
// reset PigStats, otherwise you may get the PigStats of last job in the same thread
// because PigStats is ThreadLocal variable
PigStats.start(pigServer.getPigContext().getExecutionEngine().instantiatePigStats());
PigScriptListener scriptListener = new PigScriptListener();
ScriptState.get().registerListener(scriptListener);
listenerMap.put(contextInterpreter.getParagraphId(), scriptListener);
pigServer.registerScript(tmpFile.getAbsolutePath());
} catch (IOException e) {
if (e instanceof FrontendException) {
FrontendException fe = (FrontendException) e;
if (!fe.getMessage().contains("Backend error :")) {
// If the error message contains "Backend error :", that means the exception is from
// backend.
LOGGER.error("Fail to run pig script.", e);
return new InterpreterResult(Code.ERROR, ExceptionUtils.getStackTrace(e));
}
}
PigStats stats = PigStats.get();
if (stats != null) {
String errorMsg = PigUtils.extactJobStats(stats);
if (errorMsg != null) {
LOGGER.error("Fail to run pig script, " + errorMsg);
return new InterpreterResult(Code.ERROR, errorMsg);
}
}
LOGGER.error("Fail to run pig script.", e);
return new InterpreterResult(Code.ERROR, ExceptionUtils.getStackTrace(e));
} finally {
System.setOut(originalStdOut);
listenerMap.remove(contextInterpreter.getParagraphId());
if (tmpFile != null) {
tmpFile.delete();
}
}
StringBuilder outputBuilder = new StringBuilder();
PigStats stats = PigStats.get();
if (stats != null && includeJobStats) {
String jobStats = PigUtils.extactJobStats(stats);
if (jobStats != null) {
outputBuilder.append(jobStats);
}
}
outputBuilder.append(bytesOutput.toString());
return new InterpreterResult(Code.SUCCESS, outputBuilder.toString());
}
public PigServer getPigServer() {
return pigServer;
}
}

View file

@ -0,0 +1,172 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.pig;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.apache.pig.PigServer;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.logicalLayer.schema.Schema;
import org.apache.pig.tools.pigstats.PigStats;
import org.apache.pig.tools.pigstats.ScriptState;
import org.apache.zeppelin.interpreter.*;
import org.apache.zeppelin.interpreter.InterpreterResult.Code;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Properties;
/**
*
*/
public class PigQueryInterpreter extends BasePigInterpreter {
private static Logger LOGGER = LoggerFactory.getLogger(PigQueryInterpreter.class);
private PigServer pigServer;
private int maxResult;
public PigQueryInterpreter(Properties properties) {
super(properties);
}
@Override
public void open() {
pigServer = getPigInterpreter().getPigServer();
maxResult = Integer.parseInt(getProperty("zeppelin.pig.maxResult"));
}
@Override
public void close() {
}
@Override
public InterpreterResult interpret(String st, InterpreterContext context) {
// '-' is invalid for pig alias
String alias = "paragraph_" + context.getParagraphId().replace("-", "_");
String[] lines = st.split("\n");
List<String> queries = new ArrayList<String>();
for (int i = 0; i < lines.length; ++i) {
if (i == lines.length - 1) {
lines[i] = alias + " = " + lines[i];
}
queries.add(lines[i]);
}
StringBuilder resultBuilder = new StringBuilder("%table ");
try {
File tmpScriptFile = PigUtils.createTempPigScript(queries);
// each thread should its own ScriptState & PigStats
ScriptState.start(pigServer.getPigContext().getExecutionEngine().instantiateScriptState());
// reset PigStats, otherwise you may get the PigStats of last job in the same thread
// because PigStats is ThreadLocal variable
PigStats.start(pigServer.getPigContext().getExecutionEngine().instantiatePigStats());
PigScriptListener scriptListener = new PigScriptListener();
ScriptState.get().registerListener(scriptListener);
listenerMap.put(context.getParagraphId(), scriptListener);
pigServer.registerScript(tmpScriptFile.getAbsolutePath());
Schema schema = pigServer.dumpSchema(alias);
boolean schemaKnown = (schema != null);
if (schemaKnown) {
for (int i = 0; i < schema.size(); ++i) {
Schema.FieldSchema field = schema.getField(i);
resultBuilder.append(field.alias);
if (i != schema.size() - 1) {
resultBuilder.append("\t");
}
}
resultBuilder.append("\n");
}
Iterator<Tuple> iter = pigServer.openIterator(alias);
boolean firstRow = true;
int index = 0;
while (iter.hasNext() && index <= maxResult) {
index++;
Tuple tuple = iter.next();
if (firstRow && !schemaKnown) {
for (int i = 0; i < tuple.size(); ++i) {
resultBuilder.append("c_" + i + "\t");
}
resultBuilder.append("\n");
firstRow = false;
}
resultBuilder.append(StringUtils.join(tuple, "\t"));
resultBuilder.append("\n");
}
if (index >= maxResult && iter.hasNext()) {
resultBuilder.append("\n<font color=red>Results are limited by " + maxResult + ".</font>");
}
} catch (IOException e) {
// Extract error in the following order
// 1. catch FrontendException, FrontendException happens in the query compilation phase.
// 2. PigStats, This is execution error
// 3. Other errors.
if (e instanceof FrontendException) {
FrontendException fe = (FrontendException) e;
if (!fe.getMessage().contains("Backend error :")) {
LOGGER.error("Fail to run pig query.", e);
return new InterpreterResult(Code.ERROR, ExceptionUtils.getStackTrace(e));
}
}
PigStats stats = PigStats.get();
if (stats != null) {
String errorMsg = PigUtils.extactJobStats(stats);
if (errorMsg != null) {
return new InterpreterResult(Code.ERROR, errorMsg);
}
}
LOGGER.error("Fail to run pig query.", e);
return new InterpreterResult(Code.ERROR, ExceptionUtils.getStackTrace(e));
} finally {
listenerMap.remove(context.getParagraphId());
}
return new InterpreterResult(Code.SUCCESS, resultBuilder.toString());
}
@Override
public PigServer getPigServer() {
return this.pigServer;
}
private PigInterpreter getPigInterpreter() {
LazyOpenInterpreter lazy = null;
PigInterpreter pig = null;
Interpreter p = getInterpreterInTheSameSessionByClassName(PigInterpreter.class.getName());
while (p instanceof WrappedInterpreter) {
if (p instanceof LazyOpenInterpreter) {
lazy = (LazyOpenInterpreter) p;
}
p = ((WrappedInterpreter) p).getInnerInterpreter();
}
pig = (PigInterpreter) p;
if (lazy != null) {
lazy.open();
}
return pig;
}
}

View file

@ -0,0 +1,94 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.pig;
import org.apache.pig.impl.plan.OperatorPlan;
import org.apache.pig.tools.pigstats.JobStats;
import org.apache.pig.tools.pigstats.OutputStats;
import org.apache.pig.tools.pigstats.PigProgressNotificationListener;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.HashSet;
import java.util.Set;
/**
*
*/
public class PigScriptListener implements PigProgressNotificationListener {
private static Logger LOGGER = LoggerFactory.getLogger(PigScriptListener.class);
private Set<String> jobIds = new HashSet();
private int progress;
@Override
public void initialPlanNotification(String scriptId, OperatorPlan<?> plan) {
}
@Override
public void launchStartedNotification(String scriptId, int numJobsToLaunch) {
}
@Override
public void jobsSubmittedNotification(String scriptId, int numJobsSubmitted) {
}
@Override
public void jobStartedNotification(String scriptId, String assignedJobId) {
this.jobIds.add(assignedJobId);
}
@Override
public void jobFinishedNotification(String scriptId, JobStats jobStats) {
}
@Override
public void jobFailedNotification(String scriptId, JobStats jobStats) {
}
@Override
public void outputCompletedNotification(String scriptId, OutputStats outputStats) {
}
@Override
public void progressUpdatedNotification(String scriptId, int progress) {
LOGGER.debug("scriptId:" + scriptId + ", progress:" + progress);
this.progress = progress;
}
@Override
public void launchCompletedNotification(String scriptId, int numJobsSucceeded) {
}
public Set<String> getJobIds() {
return jobIds;
}
public int getProgress() {
return progress;
}
}

View file

@ -0,0 +1,292 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.pig;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.apache.pig.PigRunner;
import org.apache.pig.backend.hadoop.executionengine.tez.TezExecType;
import org.apache.pig.tools.pigstats.InputStats;
import org.apache.pig.tools.pigstats.JobStats;
import org.apache.pig.tools.pigstats.OutputStats;
import org.apache.pig.tools.pigstats.PigStats;
import org.apache.pig.tools.pigstats.mapreduce.MRJobStats;
import org.apache.pig.tools.pigstats.mapreduce.SimplePigStats;
import org.apache.pig.tools.pigstats.tez.TezDAGStats;
import org.apache.pig.tools.pigstats.tez.TezPigScriptStats;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.lang.reflect.Field;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Map;
/**
*
*/
public class PigUtils {
private static Logger LOGGER = LoggerFactory.getLogger(PigUtils.class);
protected static final String DATE_FORMAT = "yyyy-MM-dd HH:mm:ss";
public static File createTempPigScript(String content) throws IOException {
File tmpFile = File.createTempFile("zeppelin", "pig");
LOGGER.debug("Create pig script file:" + tmpFile.getAbsolutePath());
FileWriter writer = new FileWriter(tmpFile);
IOUtils.write(content, writer);
writer.close();
return tmpFile.getAbsoluteFile();
}
public static File createTempPigScript(List<String> lines) throws IOException {
return createTempPigScript(StringUtils.join(lines, "\n"));
}
public static String extactJobStats(PigStats stats) {
if (stats instanceof SimplePigStats) {
return extractFromSimplePigStats((SimplePigStats) stats);
} else if (stats instanceof TezPigScriptStats) {
return extractFromTezPigStats((TezPigScriptStats) stats);
} else {
throw new RuntimeException("Unrecognized stats type:" + stats.getClass().getSimpleName());
}
}
public static String extractFromSimplePigStats(SimplePigStats stats) {
try {
Field userIdField = PigStats.class.getDeclaredField("userId");
userIdField.setAccessible(true);
String userId = (String) (userIdField.get(stats));
Field startTimeField = PigStats.class.getDeclaredField("startTime");
startTimeField.setAccessible(true);
long startTime = (Long) (startTimeField.get(stats));
Field endTimeField = PigStats.class.getDeclaredField("endTime");
endTimeField.setAccessible(true);
long endTime = (Long) (endTimeField.get(stats));
if (stats.getReturnCode() == PigRunner.ReturnCode.UNKNOWN) {
LOGGER.warn("unknown return code, can't display the results");
return null;
}
if (stats.getPigContext() == null) {
LOGGER.warn("unknown exec type, don't display the results");
return null;
}
SimpleDateFormat sdf = new SimpleDateFormat(DATE_FORMAT);
StringBuilder sb = new StringBuilder();
sb.append("\nHadoopVersion\tPigVersion\tUserId\tStartedAt\tFinishedAt\tFeatures\n");
sb.append(stats.getHadoopVersion()).append("\t").append(stats.getPigVersion()).append("\t")
.append(userId).append("\t")
.append(sdf.format(new Date(startTime))).append("\t")
.append(sdf.format(new Date(endTime))).append("\t")
.append(stats.getFeatures()).append("\n");
sb.append("\n");
if (stats.getReturnCode() == PigRunner.ReturnCode.SUCCESS) {
sb.append("Success!\n");
} else if (stats.getReturnCode() == PigRunner.ReturnCode.PARTIAL_FAILURE) {
sb.append("Some jobs have failed! Stop running all dependent jobs\n");
} else {
sb.append("Failed!\n");
}
sb.append("\n");
Field jobPlanField = PigStats.class.getDeclaredField("jobPlan");
jobPlanField.setAccessible(true);
PigStats.JobGraph jobPlan = (PigStats.JobGraph) jobPlanField.get(stats);
if (stats.getReturnCode() == PigRunner.ReturnCode.SUCCESS
|| stats.getReturnCode() == PigRunner.ReturnCode.PARTIAL_FAILURE) {
sb.append("Job Stats (time in seconds):\n");
sb.append(MRJobStats.SUCCESS_HEADER).append("\n");
List<JobStats> arr = jobPlan.getSuccessfulJobs();
for (JobStats js : arr) {
sb.append(js.getDisplayString());
}
sb.append("\n");
}
if (stats.getReturnCode() == PigRunner.ReturnCode.FAILURE
|| stats.getReturnCode() == PigRunner.ReturnCode.PARTIAL_FAILURE) {
sb.append("Failed Jobs:\n");
sb.append(MRJobStats.FAILURE_HEADER).append("\n");
List<JobStats> arr = jobPlan.getFailedJobs();
for (JobStats js : arr) {
sb.append(js.getDisplayString());
}
sb.append("\n");
}
sb.append("Input(s):\n");
for (InputStats is : stats.getInputStats()) {
sb.append(is.getDisplayString());
}
sb.append("\n");
sb.append("Output(s):\n");
for (OutputStats ds : stats.getOutputStats()) {
sb.append(ds.getDisplayString());
}
sb.append("\nCounters:\n");
sb.append("Total records written : " + stats.getRecordWritten()).append("\n");
sb.append("Total bytes written : " + stats.getBytesWritten()).append("\n");
sb.append("Spillable Memory Manager spill count : "
+ stats.getSMMSpillCount()).append("\n");
sb.append("Total bags proactively spilled: "
+ stats.getProactiveSpillCountObjects()).append("\n");
sb.append("Total records proactively spilled: "
+ stats.getProactiveSpillCountRecords()).append("\n");
sb.append("\nJob DAG:\n").append(jobPlan.toString());
return "Script Statistics: \n" + sb.toString();
} catch (Exception e) {
LOGGER.error("Can not extract message from SimplePigStats", e);
return "Can not extract message from SimpelPigStats," + ExceptionUtils.getStackTrace(e);
}
}
private static String extractFromTezPigStats(TezPigScriptStats stats) {
try {
Field userIdField = PigStats.class.getDeclaredField("userId");
userIdField.setAccessible(true);
String userId = (String) (userIdField.get(stats));
Field startTimeField = PigStats.class.getDeclaredField("startTime");
startTimeField.setAccessible(true);
long startTime = (Long) (startTimeField.get(stats));
Field endTimeField = PigStats.class.getDeclaredField("endTime");
endTimeField.setAccessible(true);
long endTime = (Long) (endTimeField.get(stats));
SimpleDateFormat sdf = new SimpleDateFormat(DATE_FORMAT);
StringBuilder sb = new StringBuilder();
sb.append("\n");
sb.append(String.format("%1$20s: %2$-100s%n", "HadoopVersion", stats.getHadoopVersion()));
sb.append(String.format("%1$20s: %2$-100s%n", "PigVersion", stats.getPigVersion()));
sb.append(String.format("%1$20s: %2$-100s%n", "TezVersion", TezExecType.getTezVersion()));
sb.append(String.format("%1$20s: %2$-100s%n", "UserId", userId));
sb.append(String.format("%1$20s: %2$-100s%n", "FileName", stats.getFileName()));
sb.append(String.format("%1$20s: %2$-100s%n", "StartedAt", sdf.format(new Date(startTime))));
sb.append(String.format("%1$20s: %2$-100s%n", "FinishedAt", sdf.format(new Date(endTime))));
sb.append(String.format("%1$20s: %2$-100s%n", "Features", stats.getFeatures()));
sb.append("\n");
if (stats.getReturnCode() == PigRunner.ReturnCode.SUCCESS) {
sb.append("Success!\n");
} else if (stats.getReturnCode() == PigRunner.ReturnCode.PARTIAL_FAILURE) {
sb.append("Some tasks have failed! Stop running all dependent tasks\n");
} else {
sb.append("Failed!\n");
}
sb.append("\n");
// Print diagnostic info in case of failure
if (stats.getReturnCode() == PigRunner.ReturnCode.FAILURE
|| stats.getReturnCode() == PigRunner.ReturnCode.PARTIAL_FAILURE) {
if (stats.getErrorMessage() != null) {
String[] lines = stats.getErrorMessage().split("\n");
for (int i = 0; i < lines.length; i++) {
String s = lines[i].trim();
if (i == 0 || !org.apache.commons.lang.StringUtils.isEmpty(s)) {
sb.append(String.format("%1$20s: %2$-100s%n", i == 0 ? "ErrorMessage" : "", s));
}
}
sb.append("\n");
}
}
Field tezDAGStatsMapField = TezPigScriptStats.class.getDeclaredField("tezDAGStatsMap");
tezDAGStatsMapField.setAccessible(true);
Map<String, TezDAGStats> tezDAGStatsMap =
(Map<String, TezDAGStats>) tezDAGStatsMapField.get(stats);
int count = 0;
for (TezDAGStats dagStats : tezDAGStatsMap.values()) {
sb.append("\n");
sb.append("DAG " + count++ + ":\n");
sb.append(dagStats.getDisplayString());
sb.append("\n");
}
sb.append("Input(s):\n");
for (InputStats is : stats.getInputStats()) {
sb.append(is.getDisplayString().trim()).append("\n");
}
sb.append("\n");
sb.append("Output(s):\n");
for (OutputStats os : stats.getOutputStats()) {
sb.append(os.getDisplayString().trim()).append("\n");
}
return "Script Statistics:\n" + sb.toString();
} catch (Exception e) {
LOGGER.error("Can not extract message from SimplePigStats", e);
return "Can not extract message from SimpelPigStats," + ExceptionUtils.getStackTrace(e);
}
}
public static List<String> extractJobIds(PigStats stat) {
if (stat instanceof SimplePigStats) {
return extractJobIdsFromSimplePigStats((SimplePigStats) stat);
} else if (stat instanceof TezPigScriptStats) {
return extractJobIdsFromTezPigStats((TezPigScriptStats) stat);
} else {
throw new RuntimeException("Unrecognized stats type:" + stat.getClass().getSimpleName());
}
}
public static List<String> extractJobIdsFromSimplePigStats(SimplePigStats stat) {
List<String> jobIds = new ArrayList<>();
try {
Field jobPlanField = PigStats.class.getDeclaredField("jobPlan");
jobPlanField.setAccessible(true);
PigStats.JobGraph jobPlan = (PigStats.JobGraph) jobPlanField.get(stat);
List<JobStats> arr = jobPlan.getJobList();
for (JobStats js : arr) {
jobIds.add(js.getJobId());
}
return jobIds;
} catch (Exception e) {
LOGGER.error("Can not extract jobIds from SimpelPigStats", e);
throw new RuntimeException("Can not extract jobIds from SimpelPigStats", e);
}
}
public static List<String> extractJobIdsFromTezPigStats(TezPigScriptStats stat) {
List<String> jobIds = new ArrayList<>();
try {
Field tezDAGStatsMapField = TezPigScriptStats.class.getDeclaredField("tezDAGStatsMap");
tezDAGStatsMapField.setAccessible(true);
Map<String, TezDAGStats> tezDAGStatsMap =
(Map<String, TezDAGStats>) tezDAGStatsMapField.get(stat);
for (TezDAGStats dagStats : tezDAGStatsMap.values()) {
LOGGER.debug("Tez JobId:" + dagStats.getJobId());
jobIds.add(dagStats.getJobId());
}
return jobIds;
} catch (Exception e) {
LOGGER.error("Can not extract jobIds from TezPigScriptStats", e);
throw new RuntimeException("Can not extract jobIds from TezPigScriptStats", e);
}
}
}

View file

@ -0,0 +1,46 @@
[
{
"group": "pig",
"name": "script",
"className": "org.apache.zeppelin.pig.PigInterpreter",
"properties": {
"zeppelin.pig.execType": {
"envName": null,
"propertyName": "zeppelin.pig.execType",
"defaultValue": "mapreduce",
"description": "local | mapreduce | tez"
},
"zeppelin.pig.includeJobStats": {
"envName": null,
"propertyName": "zeppelin.pig.includeJobStats",
"defaultValue": "false",
"description": "flag to include job stats in output"
}
},
"editor": {
"language": "pig"
}
},
{
"group": "pig",
"name": "query",
"className": "org.apache.zeppelin.pig.PigQueryInterpreter",
"properties": {
"zeppelin.pig.execType": {
"envName": null,
"propertyName": "zeppelin.pig.execType",
"defaultValue": "mapreduce",
"description": "local | mapreduce | tez"
},
"zeppelin.pig.maxResult": {
"envName": null,
"propertyName": "zeppelin.pig.maxResult",
"defaultValue": "1000",
"description": "max row number for %pig.query"
}
},
"editor": {
"language": "pig"
}
}
]

View file

@ -0,0 +1,155 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.pig;
import org.apache.commons.io.IOUtils;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.InterpreterResult.Code;
import org.apache.zeppelin.interpreter.InterpreterResult.Type;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Properties;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
public class PigInterpreterTest {
private PigInterpreter pigInterpreter;
private InterpreterContext context;
@Before
public void setUp() {
Properties properties = new Properties();
properties.put("zeppelin.pig.execType", "local");
pigInterpreter = new PigInterpreter(properties);
pigInterpreter.open();
context = new InterpreterContext(null, "paragraph_id", null, null, null, null, null, null, null,
null, null);
}
@After
public void tearDown() {
pigInterpreter.close();
}
@Test
public void testBasics() throws IOException {
String content = "1\tandy\n"
+ "2\tpeter\n";
File tmpFile = File.createTempFile("zeppelin", "test");
FileWriter writer = new FileWriter(tmpFile);
IOUtils.write(content, writer);
writer.close();
// simple pig script using dump
String pigscript = "a = load '" + tmpFile.getAbsolutePath() + "';"
+ "dump a;";
InterpreterResult result = pigInterpreter.interpret(pigscript, context);
assertEquals(Type.TEXT, result.type());
assertEquals(Code.SUCCESS, result.code());
assertTrue(result.message().contains("(1,andy)\n(2,peter)"));
// describe
pigscript = "a = load '" + tmpFile.getAbsolutePath() + "' as (id: int, name: bytearray);"
+ "describe a;";
result = pigInterpreter.interpret(pigscript, context);
assertEquals(Type.TEXT, result.type());
assertEquals(Code.SUCCESS, result.code());
assertTrue(result.message().contains("a: {id: int,name: bytearray}"));
// syntax error (compilation error)
pigscript = "a = loa '" + tmpFile.getAbsolutePath() + "';"
+ "describe a;";
result = pigInterpreter.interpret(pigscript, context);
assertEquals(Type.TEXT, result.type());
assertEquals(Code.ERROR, result.code());
assertTrue(result.message().contains("Syntax error, unexpected symbol at or near 'a'"));
// execution error
pigscript = "a = load 'invalid_path';"
+ "dump a;";
result = pigInterpreter.interpret(pigscript, context);
assertEquals(Type.TEXT, result.type());
assertEquals(Code.ERROR, result.code());
assertTrue(result.message().contains("Input path does not exist"));
}
@Test
public void testIncludeJobStats() throws IOException {
Properties properties = new Properties();
properties.put("zeppelin.pig.execType", "local");
properties.put("zeppelin.pig.includeJobStats", "true");
pigInterpreter = new PigInterpreter(properties);
pigInterpreter.open();
String content = "1\tandy\n"
+ "2\tpeter\n";
File tmpFile = File.createTempFile("zeppelin", "test");
FileWriter writer = new FileWriter(tmpFile);
IOUtils.write(content, writer);
writer.close();
// simple pig script using dump
String pigscript = "a = load '" + tmpFile.getAbsolutePath() + "';"
+ "dump a;";
InterpreterResult result = pigInterpreter.interpret(pigscript, context);
assertEquals(Type.TEXT, result.type());
assertEquals(Code.SUCCESS, result.code());
assertTrue(result.message().contains("Counters:"));
assertTrue(result.message().contains("(1,andy)\n(2,peter)"));
// describe
pigscript = "a = load '" + tmpFile.getAbsolutePath() + "' as (id: int, name: bytearray);"
+ "describe a;";
result = pigInterpreter.interpret(pigscript, context);
assertEquals(Type.TEXT, result.type());
assertEquals(Code.SUCCESS, result.code());
// no job is launched, so no jobStats
assertTrue(!result.message().contains("Counters:"));
assertTrue(result.message().contains("a: {id: int,name: bytearray}"));
// syntax error (compilation error)
pigscript = "a = loa '" + tmpFile.getAbsolutePath() + "';"
+ "describe a;";
result = pigInterpreter.interpret(pigscript, context);
assertEquals(Type.TEXT, result.type());
assertEquals(Code.ERROR, result.code());
// no job is launched, so no jobStats
assertTrue(!result.message().contains("Counters:"));
assertTrue(result.message().contains("Syntax error, unexpected symbol at or near 'a'"));
// execution error
pigscript = "a = load 'invalid_path';"
+ "dump a;";
result = pigInterpreter.interpret(pigscript, context);
assertEquals(Type.TEXT, result.type());
assertEquals(Code.ERROR, result.code());
assertTrue(result.message().contains("Counters:"));
assertTrue(result.message().contains("Input path does not exist"));
}
}

View file

@ -0,0 +1,153 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.pig;
import org.apache.commons.io.IOUtils;
import org.apache.zeppelin.interpreter.Interpreter;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterGroup;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.junit.After;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
/**
*
*/
public class PigQueryInterpreterTest {
private PigInterpreter pigInterpreter;
private PigQueryInterpreter pigQueryInterpreter;
private InterpreterContext context;
@Before
public void setUp() {
Properties properties = new Properties();
properties.put("zeppelin.pig.execType", "local");
properties.put("zeppelin.pig.maxResult", "20");
pigInterpreter = new PigInterpreter(properties);
pigQueryInterpreter = new PigQueryInterpreter(properties);
List<Interpreter> interpreters = new ArrayList();
interpreters.add(pigInterpreter);
interpreters.add(pigQueryInterpreter);
InterpreterGroup group = new InterpreterGroup();
group.put("note_id", interpreters);
pigInterpreter.setInterpreterGroup(group);
pigQueryInterpreter.setInterpreterGroup(group);
pigInterpreter.open();
pigQueryInterpreter.open();
context = new InterpreterContext(null, "paragraph_id", null, null, null, null, null, null, null,
null, null);
}
@After
public void tearDown() {
pigInterpreter.close();
pigQueryInterpreter.close();
}
@Test
public void testBasics() throws IOException {
String content = "andy\tmale\t10\n"
+ "peter\tmale\t20\n"
+ "amy\tfemale\t14\n";
File tmpFile = File.createTempFile("zeppelin", "test");
FileWriter writer = new FileWriter(tmpFile);
IOUtils.write(content, writer);
writer.close();
// run script in PigInterpreter
String pigscript = "a = load '" + tmpFile.getAbsolutePath() + "' as (name, gender, age);\n"
+ "a2 = load 'invalid_path' as (name, gender, age);\n"
+ "dump a;";
InterpreterResult result = pigInterpreter.interpret(pigscript, context);
assertEquals(InterpreterResult.Type.TEXT, result.type());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertTrue(result.message().contains("(andy,male,10)\n(peter,male,20)\n(amy,female,14)"));
// run single line query in PigQueryInterpreter
String query = "foreach a generate name, age;";
result = pigQueryInterpreter.interpret(query, context);
assertEquals(InterpreterResult.Type.TABLE, result.type());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals("name\tage\nandy\t10\npeter\t20\namy\t14\n", result.message());
// run multiple line query in PigQueryInterpreter
query = "b = group a by gender;\nforeach b generate group as gender, COUNT($1) as count;";
result = pigQueryInterpreter.interpret(query, context);
assertEquals(InterpreterResult.Type.TABLE, result.type());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertEquals("gender\tcount\nmale\t2\nfemale\t1\n", result.message());
// syntax error in PigQueryInterpereter
query = "b = group a by invalid_column;\nforeach b generate group as gender, COUNT($1) as count;";
result = pigQueryInterpreter.interpret(query, context);
assertEquals(InterpreterResult.Type.TEXT, result.type());
assertEquals(InterpreterResult.Code.ERROR, result.code());
assertTrue(result.message().contains("Projected field [invalid_column] does not exist in schema"));
// execution error in PigQueryInterpreter
query = "foreach a2 generate name, age;";
result = pigQueryInterpreter.interpret(query, context);
assertEquals(InterpreterResult.Type.TEXT, result.type());
assertEquals(InterpreterResult.Code.ERROR, result.code());
assertTrue(result.message().contains("Input path does not exist"));
}
@Test
public void testMaxResult() throws IOException {
StringBuilder content = new StringBuilder();
for (int i=0;i<30;++i) {
content.append(i + "\tname_" + i + "\n");
}
File tmpFile = File.createTempFile("zeppelin", "test");
FileWriter writer = new FileWriter(tmpFile);
IOUtils.write(content, writer);
writer.close();
// run script in PigInterpreter
String pigscript = "a = load '" + tmpFile.getAbsolutePath() + "' as (id, name);";
InterpreterResult result = pigInterpreter.interpret(pigscript, context);
assertEquals(InterpreterResult.Type.TEXT, result.type());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
// empty output
assertTrue(result.message().isEmpty());
// run single line query in PigQueryInterpreter
String query = "foreach a generate id;";
result = pigQueryInterpreter.interpret(query, context);
assertEquals(InterpreterResult.Type.TABLE, result.type());
assertEquals(InterpreterResult.Code.SUCCESS, result.code());
assertTrue(result.message().contains("id\n0\n1\n2"));
assertTrue(result.message().contains("Results are limited by 20"));
}
}

View file

@ -0,0 +1,22 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
log4j.rootLogger = INFO, stdout
log4j.appender.stdout = org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout = org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%5p [%d] ({%t} %F[%M]:%L) - %m%n

View file

@ -62,6 +62,7 @@
<module>shell</module>
<module>livy</module>
<module>hbase</module>
<module>pig</module>
<module>postgresql</module>
<module>jdbc</module>
<module>file</module>
@ -579,6 +580,13 @@
</modules>
</profile>
<profile>
<id>beam</id>
<modules>
<module>beam</module>
</modules>
</profile>
<profile>
<id>examples</id>
<modules>
@ -746,6 +754,7 @@
<exclude>.spark-dist/**</exclude>
<exclude>**/interpreter-setting.json</exclude>
<exclude>**/constants.json</exclude>
<exclude>scripts/**</exclude>
<!-- bundled from bootstrap -->
<exclude>docs/assets/themes/zeppelin/bootstrap/**</exclude>

View file

@ -0,0 +1,6 @@
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://0.0.0.0:8020</value>
</property>
</configuration>

View file

@ -0,0 +1,64 @@
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.data.dir</name>
<value>/data/hdfs</value>
<final>true</final>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
<property>
<name>dfs.client.use.datanode.hostname</name>
<value>true</value>
<description>Whether clients should use datanode hostnames when
connecting to datanodes.
</description>
</property>
<property>
<name>dfs.datanode.use.datanode.hostname</name>
<value>true</value>
<description>Whether datanodes should use datanode hostnames when
connecting to other datanodes for data transfer.
</description>
</property>
<property>
<name>dfs.datanode.address</name>
<value>0.0.0.0:50010</value>
<description>
The address where the datanode server will listen to.
If the port is 0 then the server will start on a free port.
</description>
</property>
<property>
<name>dfs.datanode.http.address</name>
<value>0.0.0.0:50075</value>
<description>
The datanode http server address and port.
If the port is 0 then the server will start on a free port.
</description>
</property>
<property>
<name>dfs.datanode.ipc.address</name>
<value>0.0.0.0:50020</value>
<description>
The datanode ipc server address and port.
If the port is 0 then the server will start on a free port.
</description>
</property>
</configuration>

View file

@ -0,0 +1,6 @@
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>

View file

@ -0,0 +1,26 @@
<configuration>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>0.0.0.0:8030</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>0.0.0.0:8032</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>0.0.0.0:8088</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>0.0.0.0:8031</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>0.0.0.0:8033</value>
</property>
<property>
<name>yarn.application.classpath</name>
<value>/usr/local/hadoop/etc/hadoop, /usr/local/hadoop/share/hadoop/common/*, /usr/local/hadoop/share/hadoop/common/lib/*, /usr/local/hadoop/share/hadoop/hdfs/*, /usr/local/hadoop/share/hadoop/hdfs/lib/*, /usr/local/hadoop/share/hadoop/mapreduce/*, /usr/local/hadoop/share/hadoop/mapreduce/lib/*, /usr/local/hadoop/share/hadoop/yarn/*, /usr/local/hadoop/share/hadoop/yarn/lib/*, /usr/local/hadoop/share/spark/*</value>
</property>
</configuration>

View file

@ -525,9 +525,9 @@
<activeByDefault>true</activeByDefault>
</activation>
<properties>
<spark.version>2.0.0</spark.version>
<spark.version>2.0.1</spark.version>
<protobuf.version>2.5.0</protobuf.version>
<py4j.version>0.10.1</py4j.version>
<py4j.version>0.10.3</py4j.version>
<scala.version>2.11.8</scala.version>
</properties>
</profile>

View file

@ -37,7 +37,7 @@
<jsoup.version>1.8.2</jsoup.version>
<mockito.version>1.10.19</mockito.version>
<powermock.version>1.6.4</powermock.version>
<spark.version>2.0.0</spark.version>
<spark.version>2.0.1</spark.version>
</properties>
<dependencies>
@ -519,9 +519,9 @@
<activeByDefault>true</activeByDefault>
</activation>
<properties>
<spark.version>2.0.0</spark.version>
<spark.version>2.0.1</spark.version>
<protobuf.version>2.5.0</protobuf.version>
<py4j.version>0.10.1</py4j.version>
<py4j.version>0.10.3</py4j.version>
<scala.version>2.11.8</scala.version>
</properties>
</profile>

View file

@ -49,6 +49,7 @@ import org.apache.spark.ui.jobs.JobProgressListener;
import org.apache.zeppelin.interpreter.Interpreter;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterException;
import org.apache.zeppelin.interpreter.InterpreterHookRegistry;
import org.apache.zeppelin.interpreter.InterpreterProperty;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.InterpreterResult.Code;
@ -101,6 +102,7 @@ public class SparkInterpreter extends Interpreter {
private SparkConf conf;
private static SparkContext sc;
private static SQLContext sqlc;
private static InterpreterHookRegistry hooks;
private static SparkEnv env;
private static Object sparkSession; // spark 2.x
private static JobProgressListener sparkListener;
@ -384,6 +386,7 @@ public class SparkInterpreter extends Interpreter {
}
String classServerUri = null;
String replClassOutputDirectory = null;
try { // in case of spark 1.1x, spark 1.2x
Method classServer = intp.getClass().getMethod("classServer");
@ -407,6 +410,16 @@ public class SparkInterpreter extends Interpreter {
}
}
if (classServerUri == null) {
try { // for RcpEnv
Method getClassOutputDirectory = intp.getClass().getMethod("getClassOutputDirectory");
File classOutputDirectory = (File) getClassOutputDirectory.invoke(intp);
replClassOutputDirectory = classOutputDirectory.getAbsolutePath();
} catch (NoSuchMethodException | SecurityException | IllegalAccessException
| IllegalArgumentException | InvocationTargetException e) {
// continue
}
}
if (Utils.isScala2_11()) {
classServer = createHttpServer(outputDir);
@ -421,6 +434,10 @@ public class SparkInterpreter extends Interpreter {
conf.set("spark.repl.class.uri", classServerUri);
}
if (replClassOutputDirectory != null) {
conf.set("spark.repl.class.outputDir", replClassOutputDirectory);
}
if (jars.length > 0) {
conf.setJars(jars);
}
@ -464,7 +481,7 @@ public class SparkInterpreter extends Interpreter {
//Only one of py4j-0.9-src.zip and py4j-0.8.2.1-src.zip should exist
String[] pythonLibs = new String[]{"pyspark.zip", "py4j-0.9-src.zip", "py4j-0.8.2.1-src.zip",
"py4j-0.10.1-src.zip"};
"py4j-0.10.1-src.zip", "py4j-0.10.3-src.zip"};
ArrayList<String> pythonLibUris = new ArrayList<>();
for (String lib : pythonLibs) {
File libFile = new File(pysparkPath, lib);
@ -584,6 +601,24 @@ public class SparkInterpreter extends Interpreter {
argList.add(arg);
}
DepInterpreter depInterpreter = getDepInterpreter();
String depInterpreterClasspath = "";
if (depInterpreter != null) {
SparkDependencyContext depc = depInterpreter.getDependencyContext();
if (depc != null) {
List<File> files = depc.getFiles();
if (files != null) {
for (File f : files) {
if (depInterpreterClasspath.length() > 0) {
depInterpreterClasspath += File.pathSeparator;
}
depInterpreterClasspath += f.getAbsolutePath();
}
}
}
}
if (Utils.isScala2_10()) {
scala.collection.immutable.List<String> list =
JavaConversions.asScalaBuffer(argList).toList();
@ -611,10 +646,22 @@ public class SparkInterpreter extends Interpreter {
argList.add("-Yrepl-class-based");
argList.add("-Yrepl-outdir");
argList.add(outputDir.getAbsolutePath());
String classpath = "";
if (conf.contains("spark.jars")) {
String jars = StringUtils.join(conf.get("spark.jars").split(","), File.separator);
classpath = StringUtils.join(conf.get("spark.jars").split(","), File.separator);
}
if (!depInterpreterClasspath.isEmpty()) {
if (!classpath.isEmpty()) {
classpath += File.separator;
}
classpath += depInterpreterClasspath;
}
if (!classpath.isEmpty()) {
argList.add("-classpath");
argList.add(jars);
argList.add(classpath);
}
scala.collection.immutable.List<String> list =
@ -626,6 +673,7 @@ public class SparkInterpreter extends Interpreter {
// set classpath for scala compiler
PathSetting pathSettings = settings.classpath();
String classpath = "";
List<File> paths = currentClassPath();
for (File f : paths) {
if (classpath.length() > 0) {
@ -644,21 +692,10 @@ public class SparkInterpreter extends Interpreter {
}
// add dependency from DepInterpreter
DepInterpreter depInterpreter = getDepInterpreter();
if (depInterpreter != null) {
SparkDependencyContext depc = depInterpreter.getDependencyContext();
if (depc != null) {
List<File> files = depc.getFiles();
if (files != null) {
for (File f : files) {
if (classpath.length() > 0) {
classpath += File.pathSeparator;
}
classpath += f.getAbsolutePath();
}
}
}
if (classpath.length() > 0) {
classpath += File.pathSeparator;
}
classpath += depInterpreterClasspath;
// add dependency from local repo
String localRepo = getProperty("zeppelin.interpreter.localRepo");
@ -778,8 +815,10 @@ public class SparkInterpreter extends Interpreter {
sqlc = getSQLContext();
dep = getDependencyResolver();
hooks = getInterpreterGroup().getInterpreterHookRegistry();
z = new ZeppelinContext(sc, sqlc, null, dep,
z = new ZeppelinContext(sc, sqlc, null, dep, hooks,
Integer.parseInt(getProperty("zeppelin.spark.maxResult")));
interpret("@transient val _binder = new java.util.HashMap[String, Object]()");

View file

@ -24,14 +24,18 @@ import static scala.collection.JavaConversions.collectionAsScalaIterable;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.HashMap;
import org.apache.spark.SparkContext;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.sql.catalyst.expressions.Attribute;
import org.apache.zeppelin.annotation.ZeppelinApi;
import org.apache.zeppelin.annotation.Experimental;
import org.apache.zeppelin.display.AngularObject;
import org.apache.zeppelin.display.AngularObjectRegistry;
import org.apache.zeppelin.display.AngularObjectWatcher;
@ -40,6 +44,7 @@ import org.apache.zeppelin.display.Input.ParamOption;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterContextRunner;
import org.apache.zeppelin.interpreter.InterpreterException;
import org.apache.zeppelin.interpreter.InterpreterHookRegistry;
import org.apache.zeppelin.spark.dep.SparkDependencyResolver;
import org.apache.zeppelin.resource.Resource;
import org.apache.zeppelin.resource.ResourcePool;
@ -52,19 +57,53 @@ import scala.Unit;
* Spark context for zeppelin.
*/
public class ZeppelinContext {
// Map interpreter class name (to be used by hook registry) from
// given replName in parapgraph
private static final Map<String, String> interpreterClassMap;
static {
interpreterClassMap = new HashMap<String, String>();
interpreterClassMap.put("spark", "org.apache.zeppelin.spark.SparkInterpreter");
interpreterClassMap.put("sql", "org.apache.zeppelin.spark.SparkSqlInterpreter");
interpreterClassMap.put("dep", "org.apache.zeppelin.spark.DepInterpreter");
interpreterClassMap.put("pyspark", "org.apache.zeppelin.spark.PySparkInterpreter");
}
private SparkDependencyResolver dep;
private InterpreterContext interpreterContext;
private int maxResult;
private List<Class> supportedClasses;
private InterpreterHookRegistry hooks;
public ZeppelinContext(SparkContext sc, SQLContext sql,
InterpreterContext interpreterContext,
SparkDependencyResolver dep,
InterpreterHookRegistry hooks,
int maxResult) {
this.sc = sc;
this.sqlContext = sql;
this.interpreterContext = interpreterContext;
this.dep = dep;
this.hooks = hooks;
this.maxResult = maxResult;
this.supportedClasses = new ArrayList<>();
try {
supportedClasses.add(this.getClass().forName("org.apache.spark.sql.Dataset"));
} catch (ClassNotFoundException e) {
}
try {
supportedClasses.add(this.getClass().forName("org.apache.spark.sql.DataFrame"));
} catch (ClassNotFoundException e) {
}
try {
supportedClasses.add(this.getClass().forName("org.apache.spark.sql.SchemaRDD"));
} catch (ClassNotFoundException e) {
}
if (supportedClasses.isEmpty()) {
throw new InterpreterException("Can not road Dataset/DataFrame/SchemaRDD class");
}
}
public SparkContext sc;
@ -161,33 +200,8 @@ public class ZeppelinContext {
@ZeppelinApi
public void show(Object o, int maxResult) {
Class cls = null;
try {
cls = this.getClass().forName("org.apache.spark.sql.Dataset");
} catch (ClassNotFoundException e) {
}
if (cls == null) {
try {
cls = this.getClass().forName("org.apache.spark.sql.DataFrame");
} catch (ClassNotFoundException e) {
}
}
if (cls == null) {
try {
cls = this.getClass().forName("org.apache.spark.sql.SchemaRDD");
} catch (ClassNotFoundException e) {
}
}
if (cls == null) {
throw new InterpreterException("Can not road Dataset/DataFrame/SchemaRDD class");
}
try {
if (cls.isInstance(o)) {
if (supportedClasses.contains(o.getClass())) {
interpreterContext.out.write(showDF(sc, interpreterContext, o, maxResult));
} else {
interpreterContext.out.write(o.toString());
@ -210,6 +224,12 @@ public class ZeppelinContext {
sc.setJobGroup(jobGroup, "Zeppelin", false);
try {
// convert it to DataFrame if it is Dataset, as we will iterate all the records
// and assume it is type Row.
if (df.getClass().getCanonicalName().equals("org.apache.spark.sql.Dataset")) {
Method convertToDFMethod = df.getClass().getMethod("toDF");
df = convertToDFMethod.invoke(df);
}
take = df.getClass().getMethod("take", int.class);
rows = (Object[]) take.invoke(df, maxResult + 1);
} catch (NoSuchMethodException | SecurityException | IllegalAccessException
@ -695,6 +715,90 @@ public class ZeppelinContext {
registry.remove(name, noteId, null);
}
/**
* Get the interpreter class name from name entered in paragraph
* @param replName if replName is a valid className, return that instead.
*/
public String getClassNameFromReplName(String replName) {
for (String name : interpreterClassMap.values()) {
if (replName.equals(name)) {
return replName;
}
}
if (replName.contains("spark.")) {
replName = replName.replace("spark.", "");
}
return interpreterClassMap.get(replName);
}
/**
* General function to register hook event
* @param event The type of event to hook to (pre_exec, post_exec)
* @param cmd The code to be executed by the interpreter on given event
* @param replName Name of the interpreter
*/
@Experimental
public void registerHook(String event, String cmd, String replName) {
String noteId = interpreterContext.getNoteId();
String className = getClassNameFromReplName(replName);
hooks.register(noteId, className, event, cmd);
}
/**
* registerHook() wrapper for current repl
* @param event The type of event to hook to (pre_exec, post_exec)
* @param cmd The code to be executed by the interpreter on given event
*/
@Experimental
public void registerHook(String event, String cmd) {
String className = interpreterContext.getClassName();
registerHook(event, cmd, className);
}
/**
* Get the hook code
* @param event The type of event to hook to (pre_exec, post_exec)
* @param replName Name of the interpreter
*/
@Experimental
public String getHook(String event, String replName) {
String noteId = interpreterContext.getNoteId();
String className = getClassNameFromReplName(replName);
return hooks.get(noteId, className, event);
}
/**
* getHook() wrapper for current repl
* @param event The type of event to hook to (pre_exec, post_exec)
*/
@Experimental
public String getHook(String event) {
String className = interpreterContext.getClassName();
return getHook(event, className);
}
/**
* Unbind code from given hook event
* @param event The type of event to hook to (pre_exec, post_exec)
* @param replName Name of the interpreter
*/
@Experimental
public void unregisterHook(String event, String replName) {
String noteId = interpreterContext.getNoteId();
String className = getClassNameFromReplName(replName);
hooks.unregister(noteId, className, event);
}
/**
* unregisterHook() wrapper for current repl
* @param event The type of event to hook to (pre_exec, post_exec)
*/
@Experimental
public void unregisterHook(String event) {
String className = interpreterContext.getClassName();
unregisterHook(event, className);
}
/**
* Add object into resource pool

View file

@ -141,6 +141,9 @@ public class ZeppelinR implements ExecuteResultHandler {
cmd.addArgument(Integer.toString(port));
cmd.addArgument(libPath);
cmd.addArgument(Integer.toString(sparkVersion.toNumber()));
// dump out the R command to facilitate manually running it, e.g. for fault diagnosis purposes
logger.debug(cmd.toString());
executor = new DefaultExecutor();
outputStream = new SparkOutputStream(logger);

View file

@ -80,16 +80,16 @@ class PyZeppelinContext(dict):
def get(self, key):
return self.__getitem__(key)
def input(self, name, defaultValue = ""):
def input(self, name, defaultValue=""):
return self.z.input(name, defaultValue)
def select(self, name, options, defaultValue = ""):
def select(self, name, options, defaultValue=""):
# auto_convert to ArrayList doesn't match the method signature on JVM side
tuples = list(map(lambda items: self.__tupleToScalaTuple2(items), options))
iterables = gateway.jvm.scala.collection.JavaConversions.collectionAsScalaIterable(tuples)
return self.z.select(name, defaultValue, iterables)
def checkbox(self, name, options, defaultChecked = None):
def checkbox(self, name, options, defaultChecked=None):
if defaultChecked is None:
defaultChecked = list(map(lambda items: items[0], options))
optionTuples = list(map(lambda items: self.__tupleToScalaTuple2(items), options))
@ -99,6 +99,23 @@ class PyZeppelinContext(dict):
checkedIterables = self.z.checkbox(name, defaultCheckedIterables, optionIterables)
return gateway.jvm.scala.collection.JavaConversions.asJavaCollection(checkedIterables)
def registerHook(self, event, cmd, replName=None):
if replName is None:
self.z.registerHook(event, cmd)
else:
self.z.registerHook(event, cmd, replName)
def unregisterHook(self, event, replName=None):
if replName is None:
self.z.unregisterHook(event)
else:
self.z.unregisterHook(event, replName)
def getHook(self, event, replName=None):
if replName is None:
return self.z.getHook(event)
return self.z.getHook(event, replName)
def __tupleToScalaTuple2(self, tuple):
if (len(tuple) == 2):
return gateway.jvm.scala.Tuple2(tuple[0], tuple[1])
@ -219,14 +236,12 @@ jconf = intp.getSparkConf()
conf = SparkConf(_jvm = gateway.jvm, _jconf = jconf)
sc = SparkContext(jsc=jsc, gateway=gateway, conf=conf)
if sparkVersion.isSpark2():
sqlc = SQLContext(sparkContext=sc, jsqlContext=intp.getSQLContext())
spark = SparkSession(sc, intp.getSparkSession())
sqlc = spark._wrapped
else:
sqlc = SQLContext(sparkContext=sc, sqlContext=intp.getSQLContext())
sqlContext = sqlc
if sparkVersion.isSpark2():
spark = SparkSession(sc, intp.getSparkSession())
completion = PySparkCompletion(intp)
z = PyZeppelinContext(intp.getZeppelinContext())

View file

@ -178,20 +178,29 @@ public class SparkInterpreterTest {
@Test
public void testCreateDataFrame() {
repl.interpret("case class Person(name:String, age:Int)\n", context);
repl.interpret("val people = sc.parallelize(Seq(Person(\"moon\", 33), Person(\"jobs\", 51), Person(\"gates\", 51), Person(\"park\", 34)))\n", context);
repl.interpret("people.toDF.count", context);
assertEquals(new Long(4), context.getResourcePool().get(
context.getNoteId(),
context.getParagraphId(),
WellKnownResourceName.ZeppelinReplResult.toString()).get());
if (getSparkVersionNumber() >= 13) {
repl.interpret("case class Person(name:String, age:Int)\n", context);
repl.interpret("val people = sc.parallelize(Seq(Person(\"moon\", 33), Person(\"jobs\", 51), Person(\"gates\", 51), Person(\"park\", 34)))\n", context);
repl.interpret("people.toDF.count", context);
assertEquals(new Long(4), context.getResourcePool().get(
context.getNoteId(),
context.getParagraphId(),
WellKnownResourceName.ZeppelinReplResult.toString()).get());
}
}
@Test
public void testZShow() {
String code = "";
repl.interpret("case class Person(name:String, age:Int)\n", context);
repl.interpret("val people = sc.parallelize(Seq(Person(\"moon\", 33), Person(\"jobs\", 51), Person(\"gates\", 51), Person(\"park\", 34)))\n", context);
assertEquals(Code.SUCCESS, repl.interpret("z.show(people.toDF)", context).code());
if (getSparkVersionNumber() < 13) {
repl.interpret("people.registerTempTable(\"people\")", context);
code = "z.show(sqlc.sql(\"select * from people\"))";
} else {
code = "z.show(people.toDF)";
}
assertEquals(Code.SUCCESS, repl.interpret(code, context).code());
}
@Test
@ -203,14 +212,15 @@ public class SparkInterpreterTest {
if (getSparkVersionNumber() <= 11) { // spark 1.2 or later does not allow create multiple SparkContext in the same jvm by default.
// create new interpreter
Properties p = new Properties();
SparkInterpreter repl2 = new SparkInterpreter(p);
SparkInterpreter repl2 = new SparkInterpreter(getSparkTestProperties());
repl2.setInterpreterGroup(intpGroup);
intpGroup.get("note").add(repl2);
repl2.open();
repl.interpret("case class Man(name:String, age:Int)", context);
repl.interpret("val man = sc.parallelize(Seq(Man(\"moon\", 33), Man(\"jobs\", 51), Man(\"gates\", 51), Man(\"park\", 34)))", context);
assertEquals(Code.SUCCESS, repl.interpret("man.take(3)", context).code());
repl2.getSparkContext().stop();
repl2.interpret("case class Man(name:String, age:Int)", context);
repl2.interpret("val man = sc.parallelize(Seq(Man(\"moon\", 33), Man(\"jobs\", 51), Man(\"gates\", 51), Man(\"park\", 34)))", context);
assertEquals(Code.SUCCESS, repl2.interpret("man.take(3)", context).code());
repl2.close();
}
}
@ -253,33 +263,37 @@ public class SparkInterpreterTest {
@Test
public void testEnableImplicitImport() {
// Set option of importing implicits to "true", and initialize new Spark repl
Properties p = getSparkTestProperties();
p.setProperty("zeppelin.spark.importImplicit", "true");
SparkInterpreter repl2 = new SparkInterpreter(p);
repl2.setInterpreterGroup(intpGroup);
intpGroup.get("note").add(repl2);
if (getSparkVersionNumber() >= 13) {
// Set option of importing implicits to "true", and initialize new Spark repl
Properties p = getSparkTestProperties();
p.setProperty("zeppelin.spark.importImplicit", "true");
SparkInterpreter repl2 = new SparkInterpreter(p);
repl2.setInterpreterGroup(intpGroup);
intpGroup.get("note").add(repl2);
repl2.open();
String ddl = "val df = Seq((1, true), (2, false)).toDF(\"num\", \"bool\")";
assertEquals(Code.SUCCESS, repl2.interpret(ddl, context).code());
repl2.close();
repl2.open();
String ddl = "val df = Seq((1, true), (2, false)).toDF(\"num\", \"bool\")";
assertEquals(Code.SUCCESS, repl2.interpret(ddl, context).code());
repl2.close();
}
}
@Test
public void testDisableImplicitImport() {
// Set option of importing implicits to "false", and initialize new Spark repl
// this test should return error status when creating DataFrame from sequence
Properties p = getSparkTestProperties();
p.setProperty("zeppelin.spark.importImplicit", "false");
SparkInterpreter repl2 = new SparkInterpreter(p);
repl2.setInterpreterGroup(intpGroup);
intpGroup.get("note").add(repl2);
if (getSparkVersionNumber() >= 13) {
// Set option of importing implicits to "false", and initialize new Spark repl
// this test should return error status when creating DataFrame from sequence
Properties p = getSparkTestProperties();
p.setProperty("zeppelin.spark.importImplicit", "false");
SparkInterpreter repl2 = new SparkInterpreter(p);
repl2.setInterpreterGroup(intpGroup);
intpGroup.get("note").add(repl2);
repl2.open();
String ddl = "val df = Seq((1, true), (2, false)).toDF(\"num\", \"bool\")";
assertEquals(Code.ERROR, repl2.interpret(ddl, context).code());
repl2.close();
repl2.open();
String ddl = "val df = Seq((1, true), (2, false)).toDF(\"num\", \"bool\")";
assertEquals(Code.ERROR, repl2.interpret(ddl, context).code());
repl2.close();
}
}
@Test

View file

@ -1,67 +0,0 @@
#!/bin/bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
if [[ "$#" -ne 2 ]]; then
echo "usage) $0 [spark version] [hadoop version]"
echo " eg) $0 1.3.1 2.6"
exit 1
fi
SPARK_VERSION="${1}"
HADOOP_VERSION="${2}"
echo "${SPARK_VERSION}" | grep "^1.[123].[0-9]" > /dev/null
if [[ "$?" -eq 0 ]]; then
echo "${SPARK_VERSION}" | grep "^1.[12].[0-9]" > /dev/null
if [[ "$?" -eq 0 ]]; then
SPARK_VER_RANGE="<=1.2"
else
SPARK_VER_RANGE="<=1.3"
fi
else
SPARK_VER_RANGE=">1.3"
fi
set -xe
FWDIR="$(dirname "${BASH_SOURCE-$0}")"
ZEPPELIN_HOME="$(cd "${FWDIR}/.."; pwd)"
SPARK_ARCHIVE="spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}"
export SPARK_HOME="${ZEPPELIN_HOME}/${SPARK_ARCHIVE}"
echo "SPARK_HOME is ${SPARK_HOME}"
# create PID dir. test case detect pid file so they can select active spark home dir for test
export SPARK_PID_DIR="${SPARK_HOME}/run"
mkdir -p "${SPARK_PID_DIR}"
# start
export SPARK_MASTER_PORT=7071
export SPARK_MASTER_WEBUI_PORT=7072
export SPARK_WORKER_WEBUI_PORT=8082
${SPARK_HOME}/sbin/start-master.sh
if [ "${SPARK_VER_RANGE}" == "<=1.3" ]||[ "${SPARK_VER_RANGE}" == "<=1.2" ]; then
# spark 1.3 or prior
${SPARK_HOME}/sbin/start-slave.sh 1 `hostname`:${SPARK_MASTER_PORT}
else
${SPARK_HOME}/sbin/start-slave.sh spark://`hostname`:7071
fi
set +xe

View file

@ -42,10 +42,12 @@ The following components are provided under Apache License.
(Apache 2.0) Apache Kylin (http://kylin.apache.org/)
(Apache 2.0) Apache Lens (http://lens.apache.org/)
(Apache 2.0) Apache Flink (http://flink.apache.org/)
(Apache 2.0) Apache Beam (http://beam.apache.org/)
(Apache 2.0) Apache Thrift (http://thrift.apache.org/)
(Apache 2.0) Apache Lucene (https://lucene.apache.org/)
(Apache 2.0) Apache Zookeeper (org.apache.zookeeper:zookeeper:jar:3.4.5 - http://zookeeper.apache.org/)
(Apache 2.0) Chill (com.twitter:chill-java:jar:0.8.0 - https://github.com/twitter/chill/)
(Apache 2.0) QDox (com.thoughtworks.qdox:qdox:jar:2.0-M3 - https://github.com/paul-hammant/qdox/)
(Apache 2.0) Codehaus Plexus (org.codehaus.plexus:plexus:jar:1.5.6 - https://codehaus-plexus.github.io/)
(Apache 2.0) findbugs jsr305 (com.google.code.findbugs:jsr305:jar:1.3.9 - http://findbugs.sourceforge.net/)
(Apache 2.0) Google Guava (com.google.guava:guava:15.0 - https://code.google.com/p/guava-libraries/)
@ -114,10 +116,53 @@ The following components are provided under Apache License.
(Apache 2.0) Utility classes for Jetty (org.mortbay.jetty:jetty-util:6.1.26 - http://javadox.com/org.mortbay.jetty/jetty/6.1.26/overview-tree.html)
(Apache 2.0) Servlet API (org.mortbay.jetty:servlet-api:2.5-20081211 - https://en.wikipedia.org/wiki/Jetty_(web_server))
(Apache 2.0) Google HTTP Client Library for Java (com.google.http-client:google-http-client-jackson2:1.21.0 - https://github.com/google/google-http-java-client/tree/dev/google-http-client-jackson2)
(Apache 2.0) angular-esri-map (https://github.com/Esri/angular-esri-map)
(Apache 2.0) pegdown (org.pegdown:pegdown:1.6.0 - https://github.com/sirthias/pegdown)
(Apache 2.0) parboiled-java (org.parboiled:parboiled-java:1.1.7 - https://github.com/sirthias/parboiled)
(Apache 2.0) parboiled-core (org.parboiled:parboiled-core:1.1.7 - https://github.com/sirthias/parboiled)
(Apache 2.0) ZkClient (com.101tec:zkclient:0.7 - https://github.com/sgroschupf/zkclient)
(Apache 2.0) jackson-module-scala (com.fasterxml.jackson.module:jackson-module-scala_2.10:2.4.4 - http://wiki.fasterxml.com/JacksonModuleScala)
(Apache 2.0) BigQuery API v2-rev295-1.22.0 (com.google.apis:google-api-services-bigquery:v2-rev295-1.22.0 - http://nexus.sonatype.org/oss-repository-hosting.html/google-api-services-bigquery)
(Apache 2.0) Google Cloud Debugger API v2-rev8-1.22.0 (com.google.apis:google-api-services-clouddebugger:v2-rev8-1.22.0 - http://nexus.sonatype.org/oss-repository-hosting.html/google-api-services-clouddebugger)
(Apache 2.0) Google Dataflow API v1b3-rev30-1.22.0 (com.google.apis:google-api-services-dataflow:v1b3-rev30-1.22.0 - http://nexus.sonatype.org/oss-repository-hosting.html/google-api-services-dataflow)
(Apache 2.0) Google Cloud Pub/Sub API v1-rev10-1.22.0 (com.google.apis:google-api-services-pubsub:v1-rev10-1.22.0 - http://nexus.sonatype.org/oss-repository-hosting.html/google-api-services-pubsub)
(Apache 2.0) Cloud Storage JSON API v1-rev71-1.22.0 (com.google.apis:google-api-services-storage:v1-rev71-1.22.0 - http://nexus.sonatype.org/oss-repository-hosting.html/google-api-services-storage)
(Apache 2.0) gcsio.jar (com.google.cloud.bigdataoss:gcsio:1.4.5 - https://github.com/GoogleCloudPlatform/BigData-interop/gcsio/)
(Apache 2.0) util (com.google.cloud.bigdataoss:util:1.4.5 - https://github.com/GoogleCloudPlatform/BigData-interop/util/)
(Apache 2.0) Google Guice - Core Library (com.google.inject:guice:3.0 - http://code.google.com/p/google-guice/guice/)
(Apache 2.0) OkHttp (com.squareup.okhttp:okhttp:2.5.0 - https://github.com/square/okhttp/okhttp)
(Apache 2.0) Okio (com.squareup.okio:okio:1.6.0 - https://github.com/square/okio/okio)
(Apache 2.0) config (com.typesafe:config:1.2.1 - https://github.com/typesafehub/config)
(Apache 2.0) akka-actor (com.typesafe.akka:akka-actor_2.10:2.3.7 - http://akka.io/)
(Apache 2.0) akka-remote (com.typesafe.akka:akka-remote_2.10:2.3.7 - http://akka.io/)
(Apache 2.0) akka-slf4j (com.typesafe.akka:akka-slf4j_2.10:2.3.7 - http://akka.io/)
(Apache 2.0) Metrics Core Library (com.yammer.metrics:metrics-core:2.2.0 - http://metrics.codahale.com/metrics-core/)
(Apache 2.0) Commons BeanUtils Bean Collections (commons-beanutils:commons-beanutils-bean-collections:1.8.3 - http://commons.apache.org/beanutils/)
(Apache 2.0) Metrics Core (io.dropwizard.metrics:metrics-core:3.1.0 - http://metrics.codahale.com/metrics-core/)
(Apache 2.0) Graphite Integration for Metrics (io.dropwizard.metrics:metrics-graphite:3.1.0 - http://metrics.codahale.com/metrics-graphite/)
(Apache 2.0) Jackson Integration for Metrics (io.dropwizard.metrics:metrics-json:3.1.0 - http://metrics.codahale.com/metrics-json/)
(Apache 2.0) JVM Integration for Metrics (io.dropwizard.metrics:metrics-jvm:3.1.0 - http://metrics.codahale.com/metrics-jvm/)
(Apache 2.0) Apache Log4j (log4j:log4j:1.2.17 - http://logging.apache.org/log4j/1.2/)
(Apache 2.0) Apache Avro IPC (org.apache.avro:avro-ipc:1.8.1 - http://avro.apache.org)
(Apache 2.0) Apache Avro Mapred API (org.apache.avro:avro-mapred:1.8.1 - http://avro.apache.org/avro-mapred)
(Apache 2.0) Apache Ivy (org.apache.ivy:ivy:2.4.0 - http://ant.apache.org/ivy/)
(Apache 2.0) Apache Kafka (org.apache.kafka:kafka-clients:0.8.2.2 - http://kafka.apache.org)
(Apache 2.0) Apache Kafka (org.apache.kafka:kafka_2.10:0.8.2.2 - http://kafka.apache.org)
(Apache 2.0) mesos (org.apache.mesos:mesos:0.21.1 - http://mesos.apache.org)
(Apache 2.0) Apache Sling JSON Library (org.apache.sling:org.apache.sling.commons.json:2.0.6 - http://sling.apache.org/org.apache.sling.commons.json)
(Apache 2.0) Apache Velocity (org.apache.velocity:velocity:1.7 - http://velocity.apache.org/engine/devel/)
(Apache 2.0) jasper-compiler (tomcat:jasper-compiler:5.5.23 - http://tomcat.apache.org/jasper-compiler)
(Apache 2.0) jasper-runtime (tomcat:jasper-runtime:5.5.23 - http://tomcat.apache.org/jasper-runtime)
(Apache 2.0) Tachyon Project Core (org.tachyonproject:tachyon:0.6.4 - http://tachyonproject.org/tachyon/)
(Apache 2.0) Tachyon Project Client (org.tachyonproject:tachyon-client:0.6.4 - http://tachyonproject.org/tachyon-client/)
(Apache 2.0) javax.inject (javax.inject:javax.inject:1 - http://code.google.com/p/atinject/)
(Apache 2.0) Apache Pig (org.apache.pig:0.16 - http://pig.apache.org)
(Apache 2.0) tez-api (org.apache.tez:tez-api:0.7.0 - http://tez.apache.org)
(Apache 2.0) tez-common (org.apache.tez:tez-common:0.7.0 - http://tez.apache.org)
(Apache 2.0) tez-dag (org.apache.tez:tez-dag:0.7.0 - http://tez.apache.org)
(Apache 2.0) tez-runtime-library (org.apache.tez:runtime-library:0.7.0 - http://tez.apache.org)
(Apache 2.0) tez-runtime-internals (org.apache.tez:tez-runtime-internals:0.7.0 - http://tez.apache.org)
(Apache 2.0) tez-mapreduce (org.apache.tez:tez-mapreduce:0.7.0 - http://tez.apache.org)
(Apache 2.0) tez-yarn-timeline-history-with-acls (org.apache.tez:tez-yarn-timeline-history-with-acls:0.7.0 - http://tez.apache.org)
========================================================================
MIT licenses
@ -155,7 +200,7 @@ The text of each license is also included at licenses/LICENSE-[project]-[version
(The MIT License) AnchorJS (https://github.com/bryanbraun/anchorjs) - https://github.com/bryanbraun/anchorjs/blob/master/README.md#license
(The MIT License) moment-duration-format v1.3.0 (https://github.com/jsmreese/moment-duration-format) - https://github.com/jsmreese/moment-duration-format/blob/master/LICENSE
(The MIT License) github-markdown-css 2.4.0 (https://github.com/sindresorhus/github-markdown-css) - https://github.com/sindresorhus/github-markdown-css/blob/gh-pages/license
(The MIT License) scopt (com.github.scopt:scopt_2.10:3.2.0 - https://github.com/scopt/scopt)
The following components are provided under the MIT License.
(The MIT License) Objenesis (org.objenesis:objenesis:2.1 - https://github.com/easymock/objenesis) - Copyright (c) 2006-2015 the original author and authors
@ -180,9 +225,22 @@ The text of each license is also included at licenses/LICENSE-[project]-[version
(BSD 3 Clause) highlightjs v9.4.0 (https://highlightjs.org/) - https://github.com/isagalaev/highlight.js/blob/9.4.0/LICENSE
(BSD 3 Clause) hamcrest v1.3 (http://hamcrest.org/JavaHamcrest/) - http://opensource.org/licenses/BSD-3-Clause
(BSD Style) JLine v2.12.1 (https://github.com/jline/jline2) - https://github.com/jline/jline2/blob/master/LICENSE.txt
(BSD New license) Google Auth Library for Java - Credentials (com.google.auth:google-auth-library-credentials:0.4.0 - https://github.com/google/google-auth-library-java/google-auth-library-credentials)
(BSD New license) Google Auth Library for Java - OAuth2 HTTP (com.google.auth:google-auth-library-oauth2-http:0.4.0 - https://github.com/google/google-auth-library-java/google-auth-library-oauth2-http)
(New BSD license) Protocol Buffer Java API (com.google.protobuf:protobuf-java-util:3.0.0-beta-2 - https://developers.google.com/protocol-buffers/)
(New BSD license) Protocol Buffer JavaNano API (com.google.protobuf.nano:protobuf-javanano:3.0.0-alpha-5 - https://developers.google.com/protocol-buffers/)
(BSD) JSch (com.jcraft:jsch:0.1.42 - http://www.jcraft.com/jsch/)
(BSD 3-Clause) io.grpc:grpc-all (io.grpc:grpc-all:0.14.1 - https://github.com/grpc/grpc-java)
(BSD 3-Clause) io.grpc:grpc-auth (io.grpc:grpc-auth:0.14.1 - https://github.com/grpc/grpc-java)
(BSD 3-Clause) io.grpc:grpc-core (io.grpc:grpc-core:0.14.1 - https://github.com/grpc/grpc-java)
(BSD 3-Clause) io.grpc:grpc-netty (io.grpc:grpc-netty:0.14.1 - https://github.com/grpc/grpc-java)
(BSD 3-Clause) io.grpc:grpc-okhttp (io.grpc:grpc-okhttp:0.14.1 - https://github.com/grpc/grpc-java)
(BSD 3-Clause) io.grpc:grpc-protobuf (io.grpc:grpc-protobuf:0.14.1 - https://github.com/grpc/grpc-java)
(BSD 3-Clause) io.grpc:grpc-protobuf-lite (io.grpc:grpc-protobuf-lite:0.14.1 - https://github.com/grpc/grpc-java)
(BSD 3-Clause) io.grpc:grpc-protobuf-nano (io.grpc:grpc-protobuf-nano:0.14.1 - https://github.com/grpc/grpc-java)
(BSD 3-Clause) io.grpc:grpc-stub (io.grpc:grpc-stub:0.14.1 - https://github.com/grpc/grpc-java)
The following components are provided under the BSD-style License.
(New BSD License) JGit (org.eclipse.jgit:org.eclipse.jgit:jar:4.1.1.201511131810-r - https://eclipse.org/jgit/)
@ -201,7 +259,8 @@ The following components are provided under the BSD-style License.
(BSD-like) ASM asm-utils (org.ow2.asm:asm-utils:5.0.3 - http://asm.ow2.org/) - Copyright (c) 2000-2011 INRIA, France Telecom
(New BSD License) Markdown4j (org.commonjava.googlecode.markdown4j:markdown4j:jar:2.2-cj-1.0 - https://code.google.com/p/markdown4j/)
(New BSD License) Py4J (net.sf.py4j:py4j:0.9 - http://py4j.sourceforge.net/)
(New BSD License) Py4J (net.sf.py4j:py4j:0.10.1 - http://py4j.sourceforge.net/) - https://github.com/bartdag/py4j/blob/0.10.1/LICENSE.txt
(New BSD License) Py4J (net.sf.py4j:py4j:0.10.3 - http://py4j.sourceforge.net/) - https://github.com/bartdag/py4j/blob/0.10.3/LICENSE.txt
(New BSD License) Markdown4j (org.commonjava.googlecode.markdown4j:markdown4j:jar:2.2-cj-1.0 - https://code.google.com/p/markdown4j/)
(BSD 3 Clause) Paranamer (com.thoughtworks.paranamer:paranamer:jar:2.6) - https://github.com/paul-hammant/paranamer/blob/paranamer-parent-2.6/LICENSE.txt
(BSD 3 Clause) netlib core (com.github.fommil.netlib:core:1.1.2 - https://github.com/fommil/netlib-java/core)
(BSD 3 Clause) JPMML-Model (org.jpmml:pmml-model:1.2.7 - https://github.com/jpmml/jpmml-model)
@ -225,8 +284,15 @@ The following components are provided under the CDDL License.
(CDDL 1.1) Jersey (com.sun.jersey:jersey:jar:1.9 - https://jersey.java.net/)
(CDDL 1.1) jersey-core (org.glassfish.jersey.core:jersey-core:2.22.2 - https://jersey.java.net/)
(CDDL 1.1) hk2 (org.glassfish.hk2 - https://hk2.java.net/2.5.0-b03/)
(CDDL 1.1) jersey-core (com.sun.jersey:jersey-core:1.9 - https://jersey.java.net/jersey-core/)
(CDDL 1.1) jersey-json (com.sun.jersey:jersey-json:1.9 - https://jersey.java.net/jersey-json/)
(CDDL 1.1) jersey-server (com.sun.jersey:jersey-server:1.9 - https://jersey.java.net/jersey-server/)
(CDDL 1.1) jersey-guice (com.sun.jersey.contribs:jersey-guice:1.9 - https://jersey.java.net/jersey-contribs/jersey-guice/)
(CDDL 1.1) JAXB RI (com.sun.xml.bind:jaxb-impl:2.2.3-1 - http://jaxb.java.net/)
(CDDL 1.0) Java Servlet API (javax.servlet:javax.servlet-api:3.1.0 - http://servlet-spec.java.net)
(CDDL 1.1) (GPL2 w/ CPE) JAXB API bundle for GlassFish V3 (javax.xml.bind:jaxb-api:2.2.2 - https://jaxb.dev.java.net/)
(CDDL 1.0) (GNU General Public Library) Streaming API for XML (javax.xml.stream:stax-api:1.0-2 - no url defined)
========================================================================
EPL license
@ -271,3 +337,5 @@ Creative Commons CC0 (http://creativecommons.org/publicdomain/zero/1.0/)
(CC0 1.0 Universal) JSR166e (com.twitter:jsr166e:1.1.0 - http://github.com/twitter/jsr166e)
(Public Domain, per Creative Commons CC0) HdrHistogram (org.hdrhistogram:HdrHistogram:2.1.6 - http://hdrhistogram.github.io/HdrHistogram/)
(Public Domain) XZ for Java (org.tukaani:xz:1.0 - http://tukaani.org/xz/java.html)
(Public Domain) AOP alliance (aopalliance:aopalliance:1.0 - http://aopalliance.sourceforge.net)

View file

@ -72,7 +72,7 @@ public abstract class AbstractDependencyResolver {
}
}
public void addRepo(String id, String url, boolean snapshot, Authentication auth) {
public void addRepo(String id, String url, boolean snapshot, Authentication auth, Proxy proxy) {
synchronized (repos) {
delRepo(id);
RemoteRepository rr = new RemoteRepository(id, "default", url);
@ -81,6 +81,7 @@ public abstract class AbstractDependencyResolver {
RepositoryPolicy.UPDATE_POLICY_DAILY,
RepositoryPolicy.CHECKSUM_POLICY_WARN));
rr.setAuthentication(auth);
rr.setProxy(proxy);
repos.add(rr);
}
}

View file

@ -33,6 +33,7 @@ import org.slf4j.LoggerFactory;
import org.sonatype.aether.RepositoryException;
import org.sonatype.aether.artifact.Artifact;
import org.sonatype.aether.collection.CollectRequest;
import org.sonatype.aether.collection.DependencyCollectionException;
import org.sonatype.aether.graph.Dependency;
import org.sonatype.aether.graph.DependencyFilter;
import org.sonatype.aether.repository.RemoteRepository;
@ -42,6 +43,7 @@ import org.sonatype.aether.util.artifact.DefaultArtifact;
import org.sonatype.aether.util.artifact.JavaScopes;
import org.sonatype.aether.util.filter.DependencyFilterUtils;
import org.sonatype.aether.util.filter.PatternExclusionsDependencyFilter;
import org.sonatype.aether.util.graph.DefaultDependencyNode;
/**
@ -104,6 +106,21 @@ public class DependencyResolver extends AbstractDependencyResolver {
return libs;
}
public synchronized void copyLocalDependency(String srcPath, File destPath)
throws IOException {
if (StringUtils.isBlank(srcPath)) {
return;
}
File srcFile = new File(srcPath);
File destFile = new File(destPath, srcFile.getName());
if (!destFile.exists() || !FileUtils.contentEquals(srcFile, destFile)) {
FileUtils.copyFile(srcFile, destFile);
logger.info("copy {} to {}", srcFile.getAbsolutePath(), destPath);
}
}
private List<File> loadFromMvn(String artifact, Collection<String> excludes)
throws RepositoryException {
Collection<String> allExclusions = new LinkedList<String>();
@ -142,11 +159,11 @@ public class DependencyResolver extends AbstractDependencyResolver {
*/
@Override
public List<ArtifactResult> getArtifactsWithDep(String dependency,
Collection<String> excludes) throws RepositoryException {
Collection<String> excludes) throws RepositoryException {
Artifact artifact = new DefaultArtifact(dependency);
DependencyFilter classpathFilter = DependencyFilterUtils.classpathFilter(JavaScopes.COMPILE);
PatternExclusionsDependencyFilter exclusionFilter =
new PatternExclusionsDependencyFilter(excludes);
new PatternExclusionsDependencyFilter(excludes);
CollectRequest collectRequest = new CollectRequest();
collectRequest.setRoot(new Dependency(artifact, JavaScopes.COMPILE));
@ -157,7 +174,11 @@ public class DependencyResolver extends AbstractDependencyResolver {
}
}
DependencyRequest dependencyRequest = new DependencyRequest(collectRequest,
DependencyFilterUtils.andFilter(exclusionFilter, classpathFilter));
return system.resolveDependencies(session, dependencyRequest).getArtifactResults();
DependencyFilterUtils.andFilter(exclusionFilter, classpathFilter));
try {
return system.resolveDependencies(session, dependencyRequest).getArtifactResults();
} catch (NullPointerException ex) {
throw new RepositoryException(String.format("Cannot fetch dependencies for %s", dependency));
}
}
}

Some files were not shown because too many files have changed in this diff Show more