mirror of
https://github.com/apache/zeppelin
synced 2026-05-24 09:38:26 +00:00
Incorporated feedback
This commit is contained in:
parent
764385c9bf
commit
569757f6fd
7 changed files with 115 additions and 35 deletions
93
bigquery/README.md
Normal file
93
bigquery/README.md
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
# Overview
|
||||
BigQuery interpreter for Apache Zeppelin
|
||||
|
||||
# Pre requisities
|
||||
You can follow the instructions at [Apache Zeppelin on Dataproc](https://github.com/GoogleCloudPlatform/dataproc-initialization-actions/blob/master/apache-zeppelin/README.MD) to bring up Zeppelin on Google dataproc.
|
||||
You could also install and bring up Zeppelin on Google compute Engine.
|
||||
|
||||
# Interpreter Configuration
|
||||
|
||||
Configure the following properties during Interpreter creation.
|
||||
|
||||
<table class="table-configuration">
|
||||
<tr>
|
||||
<th>Name</th>
|
||||
<th>Default Value</th>
|
||||
<th>Description</th>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>zeppelin.bigquery.project_id</td>
|
||||
<td> </td>
|
||||
<td>Google Project Id</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>zeppelin.bigquery.wait_time</td>
|
||||
<td>5000</td>
|
||||
<td>Query Timeout in Milliseconds</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>zeppelin.bigquery.max_no_of_rows</td>
|
||||
<td>100000</td>
|
||||
<td>Max result set size</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
# Connection
|
||||
The Interpreter opens a connection with the BigQuery Service using the supplied Google project ID and the compute environment variables.
|
||||
|
||||
# Google BigQuery API Javadoc
|
||||
[API Javadocs](https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/)
|
||||
|
||||
# Enabling the BigQuery Interpreter
|
||||
|
||||
In a notebook, to enable the **BigQuery** interpreter, click the **Gear** icon and select **bigquery**.
|
||||
|
||||
# Using the BigQuery Interpreter
|
||||
|
||||
In a paragraph, use `%bigquery.sql` to select the **BigQuery** interpreter and then input SQL statements against your datasets stored in BigQuery.
|
||||
You can use [BigQuery SQL Reference](https://cloud.google.com/bigquery/query-reference) to build your own SQL.
|
||||
|
||||
For Example, SQL to query for top 10 departure delays across airports using the flights public dataset
|
||||
|
||||
```bash
|
||||
%bigquery.sql
|
||||
SELECT departure_airport,count(case when departure_delay>0 then 1 else 0 end) as no_of_delays
|
||||
FROM [bigquery-samples:airline_ontime_data.flights]
|
||||
group by departure_airport
|
||||
order by 2 desc
|
||||
limit 10
|
||||
```
|
||||
|
||||
Another Example, SQL to query for most commonly used java packages from the github data hosted in BigQuery
|
||||
|
||||
```bash
|
||||
%bigquery.sql
|
||||
SELECT
|
||||
package,
|
||||
COUNT(*) count
|
||||
FROM (
|
||||
SELECT
|
||||
REGEXP_EXTRACT(line, r' ([a-z0-9\._]*)\.') package,
|
||||
id
|
||||
FROM (
|
||||
SELECT
|
||||
SPLIT(content, '\n') line,
|
||||
id
|
||||
FROM
|
||||
[bigquery-public-data:github_repos.sample_contents]
|
||||
WHERE
|
||||
content CONTAINS 'import'
|
||||
AND sample_path LIKE '%.java'
|
||||
HAVING
|
||||
LEFT(line, 6)='import' )
|
||||
GROUP BY
|
||||
package,
|
||||
id )
|
||||
GROUP BY
|
||||
1
|
||||
ORDER BY
|
||||
count DESC
|
||||
LIMIT
|
||||
40
|
||||
```
|
||||
|
||||
|
|
@ -84,12 +84,6 @@
|
|||
<artifactId>guava</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>jline</groupId>
|
||||
<artifactId>jline</artifactId>
|
||||
<version>2.12.1</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
|
|
@ -178,7 +172,7 @@
|
|||
<archive>
|
||||
<manifest>
|
||||
<mainClass>
|
||||
org.apache.zeppelin.bigquery.bigQueryInterpreter
|
||||
org.apache.zeppelin.bigquery.BigQueryInterpreter
|
||||
</mainClass>
|
||||
</manifest>
|
||||
</archive>
|
||||
|
|
|
|||
|
|
@ -97,9 +97,9 @@ import java.util.NoSuchElementException;
|
|||
*/
|
||||
|
||||
|
||||
public class bigQueryInterpreter extends Interpreter {
|
||||
public class BigQueryInterpreter extends Interpreter {
|
||||
|
||||
private Logger logger = LoggerFactory.getLogger(bigQueryInterpreter.class);
|
||||
private Logger logger = LoggerFactory.getLogger(BigQueryInterpreter.class);
|
||||
private static final char NEWLINE = '\n';
|
||||
private static final char TAB = '\t';
|
||||
private static Bigquery service = null;
|
||||
|
|
@ -123,7 +123,7 @@ public class bigQueryInterpreter extends Interpreter {
|
|||
}
|
||||
};
|
||||
|
||||
public bigQueryInterpreter(Properties property) {
|
||||
public BigQueryInterpreter(Properties property) {
|
||||
super(property);
|
||||
}
|
||||
|
||||
|
|
@ -138,7 +138,7 @@ public class bigQueryInterpreter extends Interpreter {
|
|||
service = createAuthorizedClient();
|
||||
exceptionOnConnect = null;
|
||||
logger.info("Opened BigQuery SQL Connection");
|
||||
} catch (IOException e) {
|
||||
} catch (IOException e) {
|
||||
logger.error("Cannot open connection", e);
|
||||
exceptionOnConnect = e;
|
||||
close();
|
||||
|
|
@ -181,8 +181,7 @@ public class bigQueryInterpreter extends Interpreter {
|
|||
msg.append(NEWLINE);
|
||||
}
|
||||
return msg.toString();
|
||||
}
|
||||
catch ( NullPointerException ex ) {
|
||||
} catch ( NullPointerException ex ) {
|
||||
throw new NullPointerException("SQL Execution returned an error!");
|
||||
}
|
||||
}
|
||||
|
|
@ -221,14 +220,11 @@ public class bigQueryInterpreter extends Interpreter {
|
|||
T response = request.execute();
|
||||
if (response.containsKey("pageToken")) {
|
||||
request = request.set("pageToken", response.get("pageToken"));
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
hasNext = false;
|
||||
}
|
||||
return response;
|
||||
}
|
||||
catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
} catch (IOException e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
@ -255,8 +251,7 @@ public class bigQueryInterpreter extends Interpreter {
|
|||
finalmessage.append(printRows(pages.next()));
|
||||
}
|
||||
return new InterpreterResult(Code.SUCCESS, finalmessage.toString());
|
||||
}
|
||||
catch ( NullPointerException ex ) {
|
||||
} catch ( NullPointerException ex ) {
|
||||
return new InterpreterResult(Code.ERROR, ex.getMessage());
|
||||
}
|
||||
}
|
||||
|
|
@ -275,9 +270,7 @@ public class bigQueryInterpreter extends Interpreter {
|
|||
projectId,
|
||||
jobId);
|
||||
return getPages(getRequest);
|
||||
}
|
||||
catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
} catch (IOException ex) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
@ -299,7 +292,7 @@ public class bigQueryInterpreter extends Interpreter {
|
|||
@Override
|
||||
public Scheduler getScheduler() {
|
||||
return SchedulerFactory.singleton().createOrGetFIFOScheduler(
|
||||
bigQueryInterpreter.class.getName() + this.hashCode());
|
||||
BigQueryInterpreter.class.getName() + this.hashCode());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
@ -323,12 +316,10 @@ public class bigQueryInterpreter extends Interpreter {
|
|||
JobCancelResponse response = request.execute();
|
||||
jobId = null;
|
||||
logger.info("Query Execution cancelled");
|
||||
}
|
||||
catch (IOException ex) {
|
||||
} catch (IOException ex) {
|
||||
logger.error("Could not cancel the SQL execution");
|
||||
}
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
logger.info("Query Execution was already cancelled");
|
||||
}
|
||||
}
|
||||
|
|
@ -2,7 +2,7 @@
|
|||
{
|
||||
"group": "bigquery",
|
||||
"name": "sql",
|
||||
"className": "org.apache.zeppelin.bigquery.bigQueryInterpreter",
|
||||
"className": "org.apache.zeppelin.bigquery.BigQueryInterpreter",
|
||||
"properties": {
|
||||
"zeppelin.bigquery.project_id": {
|
||||
"envName": null,
|
||||
|
|
|
|||
|
|
@ -38,6 +38,8 @@ import org.apache.zeppelin.user.AuthenticationInfo;
|
|||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
import com.google.cloud.bigquery.testing.RemoteBigQueryHelper;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
import com.google.gson.JsonIOException;
|
||||
import com.google.gson.JsonSyntaxException;
|
||||
|
|
@ -46,7 +48,7 @@ import java.io.FileNotFoundException;
|
|||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
|
||||
public class bigQueryInterpreterTest {
|
||||
public class BigQueryInterpreterTest {
|
||||
|
||||
protected static class Constants {
|
||||
private String projectId;
|
||||
|
|
@ -70,7 +72,7 @@ public class bigQueryInterpreterTest {
|
|||
@SuppressWarnings("checkstyle:abbreviationaswordinname")
|
||||
protected static Constants CONSTANTS = null;
|
||||
|
||||
public bigQueryInterpreterTest()
|
||||
public BigQueryInterpreterTest()
|
||||
throws JsonSyntaxException, JsonIOException, FileNotFoundException {
|
||||
if (CONSTANTS == null) {
|
||||
InputStream is = this.getClass().getResourceAsStream("/constants.json");
|
||||
|
|
@ -79,7 +81,7 @@ public class bigQueryInterpreterTest {
|
|||
}
|
||||
|
||||
private InterpreterGroup intpGroup;
|
||||
private bigQueryInterpreter bqInterpreter;
|
||||
private BigQueryInterpreter bqInterpreter;
|
||||
|
||||
private InterpreterContext context;
|
||||
|
||||
|
|
@ -92,7 +94,7 @@ public class bigQueryInterpreterTest {
|
|||
|
||||
intpGroup = new InterpreterGroup();
|
||||
|
||||
bqInterpreter = new bigQueryInterpreter(p);
|
||||
bqInterpreter = new BigQueryInterpreter(p);
|
||||
bqInterpreter.setInterpreterGroup(intpGroup);
|
||||
bqInterpreter.open();
|
||||
|
||||
|
|
@ -48,6 +48,7 @@
|
|||
<li role="separator" class="divider"></li>
|
||||
<li class="title"><span><b>Available Interpreters</b><span></li>
|
||||
<li><a href="{{BASE_PATH}}/interpreter/alluxio.html">Alluxio</a></li>
|
||||
<li><a href="{{BASE_PATH}}/interpreter/bigquery.html">BigQuery</a></li>
|
||||
<li><a href="{{BASE_PATH}}/interpreter/cassandra.html">Cassandra</a></li>
|
||||
<li><a href="{{BASE_PATH}}/interpreter/elasticsearch.html">Elasticsearch</a></li>
|
||||
<li><a href="{{BASE_PATH}}/interpreter/flink.html">Flink</a></li>
|
||||
|
|
@ -66,7 +67,6 @@
|
|||
<li><a href="{{BASE_PATH}}/interpreter/scalding.html">Scalding</a></li>
|
||||
<li><a href="{{BASE_PATH}}/interpreter/shell.html">Shell</a></li>
|
||||
<li><a href="{{BASE_PATH}}/interpreter/spark.html">Spark</a></li>
|
||||
<li><a href="{{BASE_PATH}}/interpreter/bigquery.html">BigQuery</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li>
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ group: interpreter
|
|||
|
||||
|
||||
## BigQuery API
|
||||
Zeppelin is built against BigQuery API version v2-rev265-1.21.0.
|
||||
Zeppelin is built against BigQuery API version v2-rev265-1.21.0 - [API Javadocs](https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/)
|
||||
|
||||
## Enabling the BigQuery Interpreter
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue