Address comments

2026-05-24 09:38:26 +00:00 · 2017-06-25 10:08:12 +08:00 · 2017-06-25 10:08:12 +08:00 · d4e9a6db42
commit d4e9a6db42
parent 4b4e3dbd0d
6 changed files with 35 additions and 30 deletions
--- a/7
+++ b/7
@ -276,3 +276,10 @@ The following components are provided under the BSD 2-Clause license.  See file

  (BSD 2 Clause) portions of SQLLine (http://sqlline.sourceforge.net/) - http://sqlline.sourceforge.net/#license
   jdbc/src/main/java/org/apache/zeppelin/jdbc/SqlCompleter.java
+
+========================================================================
+Jython Software License
+========================================================================
+The following components are provided under the Jython Software License.  See file headers and project links for details.
+
+  (Jython Software License) jython-standalone - http://www.jython.org/
--- a/docs/interpreter/pig.md
+++ b/docs/interpreter/pig.md
@ -29,47 +29,39 @@ which in turns enables them to handle very large data sets.
    - No pig alias in the last statement in `%pig.query` (read the examples below).
    - The last statement must be in single line in `%pig.query`
    
-## Supported runtime mode
-  - Local
-  - MapReduce
-  - Tez_Local (Only Tez 0.7 is supported)
-  - Tez  (Only Tez 0.7 is supported)
-  - Spark_Local (Only Spark 1.6.x is supported, by default it is Spark 1.6.3)
-  - Spark (Only Spark 1.6.x is supported, by default it is Spark 1.6.3)

 ## How to use

-### How to setup Pig
+### How to setup Pig execution modes.

 - Local Mode

-    Nothing needs to be done for local mode
+    Set `zeppelin.pig.execType` as `local`.

 - MapReduce Mode

-    HADOOP\_CONF\_DIR needs to be specified in `ZEPPELIN_HOME/conf/zeppelin-env.sh`.
+    Set `zeppelin.pig.execType` as `mapreduce`. HADOOP\_CONF\_DIR needs to be specified in `ZEPPELIN_HOME/conf/zeppelin-env.sh`.

 - Tez Local Mode
    
-    Nothing needs to be done for tez local mode
+    Only Tez 0.7 is supported. Set `zeppelin.pig.execType` as `tez_local`.
    
 - Tez Mode

-    HADOOP\_CONF\_DIR and TEZ\_CONF\_DIR needs to be specified in `ZEPPELIN_HOME/conf/zeppelin-env.sh`.
+    Only Tez 0.7 is supported. Set `zeppelin.pig.execType` as `tez`. HADOOP\_CONF\_DIR and TEZ\_CONF\_DIR needs to be specified in `ZEPPELIN_HOME/conf/zeppelin-env.sh`.

 - Spark Local Mode
    
-    Nothing needs to be done for spark local mode
+    Only Spark 1.6.x is supported, by default it is Spark 1.6.3. Set `zeppelin.pig.execType` as `spark_local`.
    
 - Spark Mode
    
-    For now, only yarn-client mode is supported.  To enable it, you need to set property SPARK_MASTER to yarn-client
-    and set SPARK_JAR to the spark assembly file uploaded to hdfs.
+    Only Spark 1.6.x is supported, by default it is Spark 1.6.3. Set `zeppelin.pig.execType` as `spark`. For now, only yarn-client mode is supported. To enable it, you need to set property `SPARK_MASTER` to yarn-client and set `SPARK_JAR` to the spark assembly jar.
        
 ### How to choose custom Spark Version

-By default, Pig Interpreter would use Spark 1.6.3, if you want to use another Spark Version, 
-you need to rebuild the zeppelin by specifying the custom spark version via -Dpig.spark.version=<custom_spark_version> in the maven build command.
+By default, Pig Interpreter would use Spark 1.6.3 built with scala 2.10, if you want to use another spark version or scala version, 
+you need to rebuild Zeppelin by specifying the custom Spark version via -Dpig.spark.version=<custom_spark_version> and scala version via -Dpig.scala.version=<scala_version> in the maven build command.

 ### How to configure interpreter

@ -117,7 +109,8 @@ So you can use that to find app running in YARN RM UI.
    <tr>
        <td>SPARK_JAR</td>
        <td></td>
-        <td>The spark assembly jar you uploaded to hdfs</td>
+        <td>The spark assembly jar, both jar in local or hdfs is supported. Put it on hdfs could have
+        performance benefit</td>
    </tr>
 </table>  

--- a/pig/pom.xml
+++ b/pig/pom.xml
@ -40,7 +40,7 @@
        <hadoop.version>2.6.0</hadoop.version>
        <tez.version>0.7.0</tez.version>
        <pig.spark.version>1.6.3</pig.spark.version>
-        <pig.scala.version>2.11</pig.scala.version>
+        <pig.scala.version>2.10</pig.scala.version>
    </properties>

    <dependencies>
--- a/pig/src/main/java/org/apache/zeppelin/pig/PigInterpreter.java
+++ b/pig/src/main/java/org/apache/zeppelin/pig/PigInterpreter.java
@ -99,6 +99,10 @@ public class PigInterpreter extends BasePigInterpreter {
      listenerMap.put(contextInterpreter.getParagraphId(), scriptListener);
      pigServer.registerScript(tmpFile.getAbsolutePath());
    } catch (IOException e) {
+      // 1. catch FrontendException, FrontendException happens in the query compilation phase.
+      // 2. catch ParseException for syntax error
+      // 3. PigStats, This is execution error
+      // 4. Other errors.
      if (e instanceof FrontendException) {
        FrontendException fe = (FrontendException) e;
        if (!fe.getMessage().contains("Backend error :")) {
@ -109,7 +113,7 @@ public class PigInterpreter extends BasePigInterpreter {
        }
      }
      if (e.getCause() instanceof ParseException) {
-        return new InterpreterResult(Code.ERROR, e.getMessage());
+        return new InterpreterResult(Code.ERROR, e.getCause().getMessage());
      }
      PigStats stats = PigStats.get();
      if (stats != null) {
--- a/pig/src/main/java/org/apache/zeppelin/pig/PigQueryInterpreter.java
+++ b/pig/src/main/java/org/apache/zeppelin/pig/PigQueryInterpreter.java
@ -126,8 +126,9 @@ public class PigQueryInterpreter extends BasePigInterpreter {
    } catch (IOException e) {
      // Extract error in the following order
      // 1. catch FrontendException, FrontendException happens in the query compilation phase.
-      // 2. PigStats, This is execution error
-      // 3. Other errors.
+      // 2. catch ParseException for syntax error
+      // 3. PigStats, This is execution error
+      // 4. Other errors.
      if (e instanceof FrontendException) {
        FrontendException fe = (FrontendException) e;
        if (!fe.getMessage().contains("Backend error :")) {
--- a/pig/src/main/resources/interpreter-setting.json
+++ b/pig/src/main/resources/interpreter-setting.json
@ -8,7 +8,7 @@
        "envName": null,
        "propertyName": "zeppelin.pig.execType",
        "defaultValue": "mapreduce",
-        "description": "local | mapreduce | tez"
+        "description": "local | mapreduce | tez_local | tez | spark_local | spark"
      },
      "zeppelin.pig.includeJobStats": {
        "envName": null,
@ -16,11 +16,17 @@
        "defaultValue": "false",
        "description": "flag to include job stats in output"
      },
-      "zeppelin.pig.execType": {
+      "SPARK_MASTER": {
        "envName": "SPARK_MASTER",
        "propertyName": "SPARK_MASTER",
        "defaultValue": "local",
        "description": "local | yarn-client"
+      },
+      "SPARK_JAR": {
+        "envName": "SPARK_JAR",
+        "propertyName": "SPARK_JAR",
+        "defaultValue": "",
+        "description": "spark assembly jar uploaded in hdfs"
      }
    },
    "editor": {
@ -33,12 +39,6 @@
    "name": "query",
    "className": "org.apache.zeppelin.pig.PigQueryInterpreter",
    "properties": {
-      "zeppelin.pig.execType": {
-        "envName": null,
-        "propertyName": "zeppelin.pig.execType",
-        "defaultValue": "mapreduce",
-        "description": "local | mapreduce | tez"
-      },
      "zeppelin.pig.maxResult": {
        "envName": null,
        "propertyName": "zeppelin.pig.maxResult",