ZEPPELIN-1153 comments committed

This commit is contained in:
Babu Prasad Elumalai 2016-07-15 03:00:38 +00:00
parent 8fa647b975
commit 17f6d893c7
4 changed files with 219 additions and 0 deletions

View file

@ -0,0 +1,5 @@
{
"projectId": "google.com:babupe-df-test",
"oneQuery": "select 1",
"wrongQuery": "select bad syntax"
}

View file

@ -0,0 +1,27 @@
[
{
"group": "bigquery",
"name": "sql",
"className": "org.apache.zeppelin.bigquery.bigQueryInterpreter",
"properties": {
"zeppelin.bigquery.project_id": {
"envName": null,
"propertyName": "zeppelin.bigquery.project_id",
"defaultValue": " ",
"description": "Google Project ID"
},
"zeppelin.bigquery.wait_time": {
"envName": null,
"propertyName": "zeppelin.bigquery.wait_time",
"defaultValue": "5000",
"description": "Query timeout in Milliseconds"
},
"zeppelin.bigquery.max_no_of_rows": {
"envName": null,
"propertyName": "zeppelin.bigquery.max_no_of_rows",
"defaultValue": "100000",
"description": "Maximum number of rows to fetch from BigQuery"
}
}
}
]

View file

@ -0,0 +1,117 @@
/**
* Copyright 2016 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.bigquery;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.Properties;
import org.apache.zeppelin.display.AngularObjectRegistry;
import org.apache.zeppelin.display.GUI;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterContextRunner;
import org.apache.zeppelin.interpreter.InterpreterGroup;
import org.apache.zeppelin.interpreter.InterpreterOutput;
import org.apache.zeppelin.interpreter.InterpreterOutputListener;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.InterpreterResult.Type;
import org.apache.zeppelin.user.AuthenticationInfo;
import org.junit.Before;
import org.junit.Test;
import com.google.gson.Gson;
import com.google.gson.JsonIOException;
import com.google.gson.JsonSyntaxException;
import java.io.FileNotFoundException;
import java.io.InputStream;
import java.io.InputStreamReader;
public class bigQueryInterpreterTest {
protected static class Constants {
private String projectId;
private String oneQuery;
private String wrongQuery;
public String getProjectId() {
return projectId;
}
public String getOne() {
return oneQuery;
}
public String getWrong() {
return wrongQuery;
}
}
@SuppressWarnings("checkstyle:abbreviationaswordinname")
protected static Constants CONSTANTS = null;
public bigQueryInterpreterTest()
throws JsonSyntaxException, JsonIOException, FileNotFoundException {
if (CONSTANTS == null) {
InputStream is = this.getClass().getResourceAsStream("/constants.json");
CONSTANTS = (new Gson()).<Constants>fromJson(new InputStreamReader(is), Constants.class);
}
}
private InterpreterGroup intpGroup;
private bigQueryInterpreter bqInterpreter;
private InterpreterContext context;
@Before
public void setUp() throws Exception {
Properties p = new Properties();
p.setProperty("zeppelin.bigquery.project_id", CONSTANTS.getProjectId());
p.setProperty("zeppelin.bigquery.wait_time", "5000");
p.setProperty("zeppelin.bigquery.max_no_of_rows", "100");
intpGroup = new InterpreterGroup();
bqInterpreter = new bigQueryInterpreter(p);
bqInterpreter.setInterpreterGroup(intpGroup);
bqInterpreter.open();
}
@Test
public void sqlSuccess() {
InterpreterResult ret = bqInterpreter.interpret(CONSTANTS.getOne(), context);
assertEquals(InterpreterResult.Code.SUCCESS, ret.code());
assertEquals(ret.type(), InterpreterResult.Type.TABLE);
}
@Test
public void badSqlSyntaxFails() {
InterpreterResult ret = bqInterpreter.interpret(CONSTANTS.getWrong(), context);
assertEquals(InterpreterResult.Code.ERROR, ret.code());
}
}

View file

@ -0,0 +1,70 @@
---
layout: page
title: "BigQuery Interpreter"
description: ""
group: interpreter
---
# BigQuery Interpreter for Apache Zeppelin
<div id="toc"></div>
## Overview
[BigQuery](https://cloud.google.com/bigquery/what-is-bigquery) is a highly scalable no-ops data warehouse in the Google Cloud Platform. Querying massive datasets can be time consuming and expensive without the right hardware and infrastructure. Google BigQuery solves this problem by enabling super-fast SQL queries against append-only tables using the processing power of Google's infrastructure. Simply move your data into BigQuery and let us handle the hard work. You can control access to both the project and your data based on your business needs, such as giving others the ability to view or query your data.
## BigQuery API
Zeppelin is built against BigQuery API version v2-rev265-1.21.0.
## Enabling the BigQuery Interpreter
In a notebook, to enable the **BigQuery** interpreter, click the **Gear** icon and select **bigquery**.
## Using the BigQuery Interpreter
In a paragraph, use `%bigquery.sql` to select the **BigQuery** interpreter and then input SQL statements against your datasets stored in BigQuery.
You can use [BigQuery SQL Reference](https://cloud.google.com/bigquery/query-reference) to build your own SQL.
For Example, SQL to query for top 10 departure delays across airports using the flights public dataset
```bash
%bigquery.sql
SELECT departure_airport,count(case when departure_delay>0 then 1 else 0 end) as no_of_delays
FROM [bigquery-samples:airline_ontime_data.flights]
group by departure_airport
order by 2 desc
limit 10
```
Another Example, SQL to query for most commonly used java packages from the github data hosted in BigQuery
```bash
%bigquery.sql
SELECT
package,
COUNT(*) count
FROM (
SELECT
REGEXP_EXTRACT(line, r' ([a-z0-9\._]*)\.') package,
id
FROM (
SELECT
SPLIT(content, '\n') line,
id
FROM
[bigquery-public-data:github_repos.sample_contents]
WHERE
content CONTAINS 'import'
AND sample_path LIKE '%.java'
HAVING
LEFT(line, 6)='import' )
GROUP BY
package,
id )
GROUP BY
1
ORDER BY
count DESC
LIMIT
40
```