[ZEPPELIN-18] Running pyspark without deploying python libraries to every yarn node

- rebasing
This commit is contained in:
Jongyoul Lee 2015-06-25 14:33:15 +09:00
parent 3f87f448a4
commit 32fd9e1376

View file

@ -48,6 +48,8 @@
<akka.group>org.spark-project.akka</akka.group>
<akka.version>2.3.4-spark</akka.version>
<spark.download.url>http://www.apache.org/dist/spark/spark-${spark.version}/spark-${spark.version}.tgz</spark.download.url>
</properties>
<repositories>
@ -912,6 +914,51 @@
</executions>
</plugin>
<!-- for pyspark -->
<plugin>
<groupId>com.googlecode.maven-download-plugin</groupId>
<artifactId>download-maven-plugin</artifactId>
<version>1.2.1</version>
<executions>
<execution>
<id>download-pyspark-files</id>
<phase>prepare-package</phase>
<goals>
<goal>wget</goal>
</goals>
<configuration>
<url>${spark.download.url}</url>
<unpack>true</unpack>
<outputDirectory>${project.build.directory}/spark-dist</outputDirectory>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-antrun-plugin</artifactId>
<version>1.7</version>
<executions>
<execution>
<id>download-and-zip-pyspark-files</id>
<phase>package</phase>
<goals>
<goal>run</goal>
</goals>
<configuration>
<target>
<delete dir="${project.build.directory}/../../python" />
<zip destfile="${project.build.directory}/../../python/pyspark.zip"
basedir="${project.build.directory}/spark-dist/spark-${spark.version}/python/pyspark"/>
<copy
file="${project.build.directory}/spark-dist/spark-${spark.version}/python/lib/py4j-0.8.2.1-src.zip"
todir="${project.build.directory}/../../python"/>
</target>
</configuration>
</execution>
</executions>
</plugin>
<!-- Plugin to compile Scala code -->
<plugin>
<groupId>org.scala-tools</groupId>