mirror of
https://github.com/apache/zeppelin
synced 2026-05-24 09:38:26 +00:00
Add scio to beam group
This commit is contained in:
parent
cd79fc8360
commit
49cf0eb482
5 changed files with 38 additions and 35 deletions
|
|
@ -102,7 +102,13 @@
|
|||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.zeppelin</groupId>
|
||||
<artifactId>zeppelin-scio_${scala.binary.version}</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-hdfs</artifactId>
|
||||
|
|
|
|||
|
|
@ -9,5 +9,27 @@
|
|||
"editor": {
|
||||
"editOnDblClick": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"group": "beam",
|
||||
"name": "scio",
|
||||
"className": "org.apache.zeppelin.scio.ScioInterpreter",
|
||||
"properties": {
|
||||
"zeppelin.scio.argz": {
|
||||
"envName": "ZEPPELIN_SCIO_ARGZ",
|
||||
"propertyName": "zeppelin.scio.argz",
|
||||
"defaultValue": "--runner=InProcessPipelineRunner",
|
||||
"description": "Scio interpreter wide arguments"
|
||||
},
|
||||
"zeppelin.scio.maxResult": {
|
||||
"envName": "ZEPPELIN_SCIO_MAXRESULT",
|
||||
"propertyName": "zeppelin.scio.maxResult",
|
||||
"defaultValue": "1000",
|
||||
"description": "Max number of SCollection results to display."
|
||||
}
|
||||
},
|
||||
"editor": {
|
||||
"language": "scala"
|
||||
}
|
||||
}
|
||||
]
|
||||
|
|
|
|||
|
|
@ -48,14 +48,14 @@ Scio is a Scala DSL for [Google Cloud Dataflow](https://github.com/GoogleCloudPl
|
|||
|
||||
## Enabling the Scio Interpreter
|
||||
|
||||
In a notebook, to enable the **Scio** interpreter, click the **Gear** icon and select **scio**.
|
||||
In a notebook, to enable the **Scio** interpreter, click the **Gear** icon and select **beam** (**beam.scio**).
|
||||
|
||||
## Using the Scio Interpreter
|
||||
|
||||
In a paragraph, use `%scio` to select the **Scio** interpreter. You can use it much the same way as vanilla Scala REPL and [Scio REPL](https://github.com/spotify/scio/wiki/Scio-REPL). State (like variables, imports, execution etc) is shared among all *Scio* paragraphs. There is a special variable **argz** which holds arguments from Scio interpreter settings. The easiest way to proceed is to create a Scio context via standard `ContextAndArgs`.
|
||||
In a paragraph, use `$beam.scio` to select the **Scio** interpreter. You can use it much the same way as vanilla Scala REPL and [Scio REPL](https://github.com/spotify/scio/wiki/Scio-REPL). State (like variables, imports, execution etc) is shared among all *Scio* paragraphs. There is a special variable **argz** which holds arguments from Scio interpreter settings. The easiest way to proceed is to create a Scio context via standard `ContextAndArgs`.
|
||||
|
||||
```scala
|
||||
%scio
|
||||
$beam.scio
|
||||
val (sc, args) = ContextAndArgs(argz)
|
||||
```
|
||||
|
||||
|
|
@ -64,7 +64,7 @@ Use `sc` context the way you would in regular pipeline/REPL.
|
|||
Example:
|
||||
|
||||
```scala
|
||||
%scio
|
||||
$beam.scio
|
||||
val (sc, args) = ContextAndArgs(argz)
|
||||
sc.parallelize(Seq("foo", "foo", "bar")).countByValue.closeAndDisplay()
|
||||
```
|
||||
|
|
@ -108,7 +108,7 @@ There are different helper methods for different objects. You can easily display
|
|||
#### BigQuery example:
|
||||
|
||||
```scala
|
||||
%scio
|
||||
$beam.scio
|
||||
@BigQueryType.fromQuery("""|SELECT departure_airport,count(case when departure_delay>0 then 1 else 0 end) as no_of_delays
|
||||
|FROM [bigquery-samples:airline_ontime_data.flights]
|
||||
|group by departure_airport
|
||||
|
|
@ -122,7 +122,7 @@ sc.bigQuerySelect(Flights.query).closeAndDisplay(Flights.schema)
|
|||
#### BigQuery typed example:
|
||||
|
||||
```scala
|
||||
%scio
|
||||
$beam.scio
|
||||
@BigQueryType.fromQuery("""|SELECT departure_airport,count(case when departure_delay>0 then 1 else 0 end) as no_of_delays
|
||||
|FROM [bigquery-samples:airline_ontime_data.flights]
|
||||
|group by departure_airport
|
||||
|
|
@ -136,7 +136,7 @@ sc.typedBigQuery[Flights]().flatMap(_.no_of_delays).mean.closeAndDisplay()
|
|||
#### Avro example:
|
||||
|
||||
```scala
|
||||
%scio
|
||||
$beam.scio
|
||||
import com.spotify.data.ExampleAvro
|
||||
|
||||
val (sc, args) = ContextAndArgs(argz)
|
||||
|
|
@ -146,7 +146,7 @@ sc.avroFile[ExampleAvro]("gs://<bucket>/tmp/my.avro").take(10).closeAndDisplay()
|
|||
#### Avro example with a view schema:
|
||||
|
||||
```scala
|
||||
%scio
|
||||
$beam.scio
|
||||
import com.spotify.data.ExampleAvro
|
||||
import org.apache.avro.Schema
|
||||
|
||||
|
|
|
|||
|
|
@ -1,25 +0,0 @@
|
|||
[
|
||||
{
|
||||
"group": "scio",
|
||||
"name": "scio",
|
||||
"className": "org.apache.zeppelin.scio.ScioInterpreter",
|
||||
"defaultInterpreter": true,
|
||||
"properties": {
|
||||
"zeppelin.scio.argz": {
|
||||
"envName": "ZEPPELIN_SCIO_ARGZ",
|
||||
"propertyName": "zeppelin.scio.argz",
|
||||
"defaultValue": "--runner=InProcessPipelineRunner",
|
||||
"description": "Scio interpreter wide arguments"
|
||||
},
|
||||
"zeppelin.scio.maxResult": {
|
||||
"envName": "ZEPPELIN_SCIO_MAXRESULT",
|
||||
"propertyName": "zeppelin.scio.maxResult",
|
||||
"defaultValue": "1000",
|
||||
"description": "Max number of SCollection results to display."
|
||||
}
|
||||
},
|
||||
"editor": {
|
||||
"language": "scala"
|
||||
}
|
||||
}
|
||||
]
|
||||
|
|
@ -45,7 +45,7 @@ import scala.tools.nsc.util.ClassPath
|
|||
* <p>
|
||||
* How to use: <br/>
|
||||
* {@code
|
||||
* %scio
|
||||
* $beam.scio
|
||||
* val (sc, args) = ContextAndArgs(argz)
|
||||
* sc.parallelize(Seq("foo", "foo", "bar")).countByValue.closeAndDisplay()
|
||||
* }
|
||||
|
|
|
|||
Loading…
Reference in a new issue