Incorporated feedback

This commit is contained in:
Babu Prasad Elumalai 2016-07-15 21:36:10 +00:00
parent 764385c9bf
commit 569757f6fd
7 changed files with 115 additions and 35 deletions

93
bigquery/README.md Normal file
View file

@ -0,0 +1,93 @@
# Overview
BigQuery interpreter for Apache Zeppelin
# Pre requisities
You can follow the instructions at [Apache Zeppelin on Dataproc](https://github.com/GoogleCloudPlatform/dataproc-initialization-actions/blob/master/apache-zeppelin/README.MD) to bring up Zeppelin on Google dataproc.
You could also install and bring up Zeppelin on Google compute Engine.
# Interpreter Configuration
Configure the following properties during Interpreter creation.
<table class="table-configuration">
<tr>
<th>Name</th>
<th>Default Value</th>
<th>Description</th>
</tr>
<tr>
<td>zeppelin.bigquery.project_id</td>
<td> </td>
<td>Google Project Id</td>
</tr>
<tr>
<td>zeppelin.bigquery.wait_time</td>
<td>5000</td>
<td>Query Timeout in Milliseconds</td>
</tr>
<tr>
<td>zeppelin.bigquery.max_no_of_rows</td>
<td>100000</td>
<td>Max result set size</td>
</tr>
</table>
# Connection
The Interpreter opens a connection with the BigQuery Service using the supplied Google project ID and the compute environment variables.
# Google BigQuery API Javadoc
[API Javadocs](https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/)
# Enabling the BigQuery Interpreter
In a notebook, to enable the **BigQuery** interpreter, click the **Gear** icon and select **bigquery**.
# Using the BigQuery Interpreter
In a paragraph, use `%bigquery.sql` to select the **BigQuery** interpreter and then input SQL statements against your datasets stored in BigQuery.
You can use [BigQuery SQL Reference](https://cloud.google.com/bigquery/query-reference) to build your own SQL.
For Example, SQL to query for top 10 departure delays across airports using the flights public dataset
```bash
%bigquery.sql
SELECT departure_airport,count(case when departure_delay>0 then 1 else 0 end) as no_of_delays
FROM [bigquery-samples:airline_ontime_data.flights]
group by departure_airport
order by 2 desc
limit 10
```
Another Example, SQL to query for most commonly used java packages from the github data hosted in BigQuery
```bash
%bigquery.sql
SELECT
package,
COUNT(*) count
FROM (
SELECT
REGEXP_EXTRACT(line, r' ([a-z0-9\._]*)\.') package,
id
FROM (
SELECT
SPLIT(content, '\n') line,
id
FROM
[bigquery-public-data:github_repos.sample_contents]
WHERE
content CONTAINS 'import'
AND sample_path LIKE '%.java'
HAVING
LEFT(line, 6)='import' )
GROUP BY
package,
id )
GROUP BY
1
ORDER BY
count DESC
LIMIT
40
```

View file

@ -84,12 +84,6 @@
<artifactId>guava</artifactId>
</dependency>
<dependency>
<groupId>jline</groupId>
<artifactId>jline</artifactId>
<version>2.12.1</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
@ -178,7 +172,7 @@
<archive>
<manifest>
<mainClass>
org.apache.zeppelin.bigquery.bigQueryInterpreter
org.apache.zeppelin.bigquery.BigQueryInterpreter
</mainClass>
</manifest>
</archive>

View file

@ -97,9 +97,9 @@ import java.util.NoSuchElementException;
*/
public class bigQueryInterpreter extends Interpreter {
public class BigQueryInterpreter extends Interpreter {
private Logger logger = LoggerFactory.getLogger(bigQueryInterpreter.class);
private Logger logger = LoggerFactory.getLogger(BigQueryInterpreter.class);
private static final char NEWLINE = '\n';
private static final char TAB = '\t';
private static Bigquery service = null;
@ -123,7 +123,7 @@ public class bigQueryInterpreter extends Interpreter {
}
};
public bigQueryInterpreter(Properties property) {
public BigQueryInterpreter(Properties property) {
super(property);
}
@ -138,7 +138,7 @@ public class bigQueryInterpreter extends Interpreter {
service = createAuthorizedClient();
exceptionOnConnect = null;
logger.info("Opened BigQuery SQL Connection");
} catch (IOException e) {
} catch (IOException e) {
logger.error("Cannot open connection", e);
exceptionOnConnect = e;
close();
@ -181,8 +181,7 @@ public class bigQueryInterpreter extends Interpreter {
msg.append(NEWLINE);
}
return msg.toString();
}
catch ( NullPointerException ex ) {
} catch ( NullPointerException ex ) {
throw new NullPointerException("SQL Execution returned an error!");
}
}
@ -221,14 +220,11 @@ public class bigQueryInterpreter extends Interpreter {
T response = request.execute();
if (response.containsKey("pageToken")) {
request = request.set("pageToken", response.get("pageToken"));
}
else {
} else {
hasNext = false;
}
return response;
}
catch (IOException e) {
e.printStackTrace();
} catch (IOException e) {
return null;
}
}
@ -255,8 +251,7 @@ public class bigQueryInterpreter extends Interpreter {
finalmessage.append(printRows(pages.next()));
}
return new InterpreterResult(Code.SUCCESS, finalmessage.toString());
}
catch ( NullPointerException ex ) {
} catch ( NullPointerException ex ) {
return new InterpreterResult(Code.ERROR, ex.getMessage());
}
}
@ -275,9 +270,7 @@ public class bigQueryInterpreter extends Interpreter {
projectId,
jobId);
return getPages(getRequest);
}
catch (IOException e) {
e.printStackTrace();
} catch (IOException ex) {
return null;
}
}
@ -299,7 +292,7 @@ public class bigQueryInterpreter extends Interpreter {
@Override
public Scheduler getScheduler() {
return SchedulerFactory.singleton().createOrGetFIFOScheduler(
bigQueryInterpreter.class.getName() + this.hashCode());
BigQueryInterpreter.class.getName() + this.hashCode());
}
@Override
@ -323,12 +316,10 @@ public class bigQueryInterpreter extends Interpreter {
JobCancelResponse response = request.execute();
jobId = null;
logger.info("Query Execution cancelled");
}
catch (IOException ex) {
} catch (IOException ex) {
logger.error("Could not cancel the SQL execution");
}
}
else {
} else {
logger.info("Query Execution was already cancelled");
}
}

View file

@ -2,7 +2,7 @@
{
"group": "bigquery",
"name": "sql",
"className": "org.apache.zeppelin.bigquery.bigQueryInterpreter",
"className": "org.apache.zeppelin.bigquery.BigQueryInterpreter",
"properties": {
"zeppelin.bigquery.project_id": {
"envName": null,

View file

@ -38,6 +38,8 @@ import org.apache.zeppelin.user.AuthenticationInfo;
import org.junit.Before;
import org.junit.Test;
import com.google.cloud.bigquery.testing.RemoteBigQueryHelper;
import com.google.gson.Gson;
import com.google.gson.JsonIOException;
import com.google.gson.JsonSyntaxException;
@ -46,7 +48,7 @@ import java.io.FileNotFoundException;
import java.io.InputStream;
import java.io.InputStreamReader;
public class bigQueryInterpreterTest {
public class BigQueryInterpreterTest {
protected static class Constants {
private String projectId;
@ -70,7 +72,7 @@ public class bigQueryInterpreterTest {
@SuppressWarnings("checkstyle:abbreviationaswordinname")
protected static Constants CONSTANTS = null;
public bigQueryInterpreterTest()
public BigQueryInterpreterTest()
throws JsonSyntaxException, JsonIOException, FileNotFoundException {
if (CONSTANTS == null) {
InputStream is = this.getClass().getResourceAsStream("/constants.json");
@ -79,7 +81,7 @@ public class bigQueryInterpreterTest {
}
private InterpreterGroup intpGroup;
private bigQueryInterpreter bqInterpreter;
private BigQueryInterpreter bqInterpreter;
private InterpreterContext context;
@ -92,7 +94,7 @@ public class bigQueryInterpreterTest {
intpGroup = new InterpreterGroup();
bqInterpreter = new bigQueryInterpreter(p);
bqInterpreter = new BigQueryInterpreter(p);
bqInterpreter.setInterpreterGroup(intpGroup);
bqInterpreter.open();

View file

@ -48,6 +48,7 @@
<li role="separator" class="divider"></li>
<li class="title"><span><b>Available Interpreters</b><span></li>
<li><a href="{{BASE_PATH}}/interpreter/alluxio.html">Alluxio</a></li>
<li><a href="{{BASE_PATH}}/interpreter/bigquery.html">BigQuery</a></li>
<li><a href="{{BASE_PATH}}/interpreter/cassandra.html">Cassandra</a></li>
<li><a href="{{BASE_PATH}}/interpreter/elasticsearch.html">Elasticsearch</a></li>
<li><a href="{{BASE_PATH}}/interpreter/flink.html">Flink</a></li>
@ -66,7 +67,6 @@
<li><a href="{{BASE_PATH}}/interpreter/scalding.html">Scalding</a></li>
<li><a href="{{BASE_PATH}}/interpreter/shell.html">Shell</a></li>
<li><a href="{{BASE_PATH}}/interpreter/spark.html">Spark</a></li>
<li><a href="{{BASE_PATH}}/interpreter/bigquery.html">BigQuery</a></li>
</ul>
</li>
<li>

View file

@ -38,7 +38,7 @@ group: interpreter
## BigQuery API
Zeppelin is built against BigQuery API version v2-rev265-1.21.0.
Zeppelin is built against BigQuery API version v2-rev265-1.21.0 - [API Javadocs](https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/)
## Enabling the BigQuery Interpreter