mirror of
https://github.com/apache/zeppelin
synced 2026-05-24 09:38:26 +00:00
[ZEPPELIN-18] Running pyspark without deploying python libraries to every yarn node
- rebasing
This commit is contained in:
parent
0a2d90eb4f
commit
64b819582f
2 changed files with 74 additions and 61 deletions
132
spark/pom.xml
132
spark/pom.xml
|
|
@ -726,6 +726,77 @@
|
|||
</dependencies>
|
||||
</profile>
|
||||
|
||||
<profile>
|
||||
<id>yarn-pyspark</id>
|
||||
<properties>
|
||||
<spark.download.url>http://www.apache.org/dist/spark/spark-${spark.version}/spark-${spark.version}.tgz
|
||||
</spark.download.url>
|
||||
</properties>
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>com.googlecode.maven-download-plugin</groupId>
|
||||
<artifactId>download-maven-plugin</artifactId>
|
||||
<version>1.2.1</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>download-pyspark-files</id>
|
||||
<phase>validate</phase>
|
||||
<goals>
|
||||
<goal>wget</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<url>${spark.download.url}</url>
|
||||
<unpack>true</unpack>
|
||||
<outputDirectory>${project.build.directory}/spark-dist</outputDirectory>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-clean-plugin</artifactId>
|
||||
<configuration>
|
||||
<filesets>
|
||||
<fileset>
|
||||
<directory>${basedir}/../python/build</directory>
|
||||
</fileset>
|
||||
<fileset>
|
||||
<directory>${project.build.direcoty}/spark-dist</directory>
|
||||
</fileset>
|
||||
</filesets>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-antrun-plugin</artifactId>
|
||||
<version>1.7</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>download-and-zip-pyspark-files</id>
|
||||
<phase>generate-resources</phase>
|
||||
<goals>
|
||||
<goal>run</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<target>
|
||||
<delete dir="../python"/>
|
||||
<copy todir="../python">
|
||||
<fileset dir="${project.build.directory}/spark-dist/spark-${spark.version}/python"/>
|
||||
</copy>
|
||||
<unzip src="../python/lib/py4j-0.8.2.1-src.zip"
|
||||
dest="../python/build"/>
|
||||
<zip destfile="${project.build.directory}/../../python/lib/pyspark.zip"
|
||||
basedir="${project.build.directory}/spark-dist/spark-${spark.version}/python"
|
||||
includes="pyspark/*.py,pyspark/**/*.py"/>
|
||||
</target>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</profile>
|
||||
|
||||
<!-- Build without Hadoop dependencies that are included in some runtime environments. -->
|
||||
<profile>
|
||||
<id>hadoop-provided</id>
|
||||
|
|
@ -907,67 +978,6 @@
|
|||
</executions>
|
||||
</plugin>
|
||||
|
||||
<!-- for pyspark -->
|
||||
<plugin>
|
||||
<groupId>com.googlecode.maven-download-plugin</groupId>
|
||||
<artifactId>download-maven-plugin</artifactId>
|
||||
<version>1.2.1</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>download-pyspark-files</id>
|
||||
<phase>validate</phase>
|
||||
<goals>
|
||||
<goal>wget</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<url>${spark.download.url}</url>
|
||||
<unpack>true</unpack>
|
||||
<outputDirectory>${project.build.directory}/spark-dist</outputDirectory>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-clean-plugin</artifactId>
|
||||
<configuration>
|
||||
<filesets>
|
||||
<fileset>
|
||||
<directory>${basedir}/../python/build</directory>
|
||||
</fileset>
|
||||
<fileset>
|
||||
<directory>${project.build.direcoty}/spark-dist</directory>
|
||||
</fileset>
|
||||
</filesets>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-antrun-plugin</artifactId>
|
||||
<version>1.7</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>download-and-zip-pyspark-files</id>
|
||||
<phase>generate-resources</phase>
|
||||
<goals>
|
||||
<goal>run</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<target>
|
||||
<delete dir="../python" />
|
||||
<copy todir="../python">
|
||||
<fileset dir="${project.build.directory}/spark-dist/spark-${spark.version}/python"/>
|
||||
</copy>
|
||||
<unzip src="../python/lib/py4j-0.8.2.1-src.zip"
|
||||
dest="../python/build"/>
|
||||
<zip destfile="${project.build.directory}/../../python/lib/pyspark.zip"
|
||||
basedir="${project.build.directory}/spark-dist/spark-${spark.version}/python"
|
||||
includes="pyspark/*.py,pyspark/**/*.py"/>
|
||||
</target>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
|
||||
<!-- Plugin to compile Scala code -->
|
||||
<plugin>
|
||||
<groupId>org.scala-tools</groupId>
|
||||
|
|
|
|||
|
|
@ -73,6 +73,9 @@
|
|||
<fileSet>
|
||||
<directory>../notebook</directory>
|
||||
</fileSet>
|
||||
<fileSet>
|
||||
<directory>../python</directory>
|
||||
</fileSet>
|
||||
</fileSets>
|
||||
<!--<fileSet>
|
||||
<directory>zeppelin-cli/target</directory>
|
||||
|
|
|
|||
Loading…
Reference in a new issue