[ZEPPELIN-116] Made add_mahout.py script more resilient

This commit is contained in:
rawkintrevo 2016-11-04 16:34:48 -05:00
parent 7e83832537
commit e7d4e120da
2 changed files with 18 additions and 8 deletions

View file

@ -185,7 +185,7 @@
"result": {
"code": "SUCCESS",
"type": "HTML",
"msg": "\u003cp\u003eAfter the interpreters are created you will need to \u0027bind\u0027 them by clicking on the little gear in the top right corner, scrolling to the top, and clicking on \u003ccode\u003emahoutFlink\u003c/code\u003e and \u003ccode\u003emahoutSpark\u003c/code\u003e so that they are highlighted in blue.\u003c/p\u003e\n\u003ch4\u003eRunning Mahout code\u003c/h4\u003e\n\u003cp\u003eYou will need to import certain libraries, and declare the \u003cem\u003eMahout Distributed Context\u003c/em\u003e when you first start your notebook using the interpreters.\u003c/p\u003e\n\u003cp\u003eIf using Apache Flink the code you need to run is:\u003c/p\u003e\n\u003cpre\u003e\u003ccode class\u003d\"scala\"\u003e%flinkMahout\n\nimport org.apache.flink.api.scala._\nimport org.apache.mahout.math.drm._\nimport org.apache.mahout.math.drm.RLikeDrmOps._\nimport org.apache.mahout.flinkbindings._\nimport org.apache.mahout.math._\nimport scalabindings._\nimport RLikeOps._\n\n\nimplicit val ctx \u003d new FlinkDistributedContext(benv)\n\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eIf using Apache Spark the code you need to run is\u003c/p\u003e\n\u003cpre\u003e\u003ccode class\u003d\"scala\"\u003e%sparkMahout\n\nimport org.apache.mahout.math._\nimport org.apache.mahout.math.scalabindings._\nimport org.apache.mahout.math.drm._\nimport org.apache.mahout.math.scalabindings.RLikeOps._\nimport org.apache.mahout.math.drm.RLikeDrmOps._\nimport org.apache.mahout.sparkbindings._\n\nimplicit val sdc: org.apache.mahout.sparkbindings.SparkDistributedContext \u003d sc2sdc(sc)\n\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003e\u003cstrong\u003eNote: For Apache Mahout on Apache Spark you must be running Spark 1.5.x or 1.6.x. We are working hard on supporting Spark 2.0\u003c/strong\u003e\n\u003cbr /\u003eIn the meantime, feel free to play with Mahout on Flink and then simple \u003cem\u003ecopy and paste your Mahout code to Spark once it is supported!\u003c/em\u003e\u003c/p\u003e\n\u003ch3\u003eA Side by Side Example\u003c/h3\u003e\n"
"msg": "\u003cp\u003eAfter the interpreters are created you will need to \u0027bind\u0027 them by clicking on the little gear in the top right corner, scrolling to the top, and clicking on \u003ccode\u003emahoutFlink\u003c/code\u003e and \u003ccode\u003emahoutSpark\u003c/code\u003e so that they are highlighted in blue.\u003c/p\u003e\n\u003ch4\u003eRunning Mahout code\u003c/h4\u003e\n\u003cp\u003eYou will need to import certain libraries, and declare the \u003cem\u003eMahout Distributed Context\u003c/em\u003e when you first start your notebook using the interpreters.\u003c/p\u003e\n\u003cp\u003eIf using Apache Flink the code you need to run is:\u003c/p\u003e\n\u003cpre\u003e\u003ccode class\u003d\"scala\"\u003e%flinkMahout\n\nimport org.apache.flink.api.scala._\nimport org.apache.mahout.math.drm._\nimport org.apache.mahout.math.drm.RLikeDrmOps._\nimport org.apache.mahout.flinkbindings._\nimport org.apache.mahout.math._\nimport scalabindings._\nimport RLikeOps._\n\n\n@transient implicit val ctx \u003d new FlinkDistributedContext(benv)\n\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003eIf using Apache Spark the code you need to run is\u003c/p\u003e\n\u003cpre\u003e\u003ccode class\u003d\"scala\"\u003e%sparkMahout\n\nimport org.apache.mahout.math._\nimport org.apache.mahout.math.scalabindings._\nimport org.apache.mahout.math.drm._\nimport org.apache.mahout.math.scalabindings.RLikeOps._\nimport org.apache.mahout.math.drm.RLikeDrmOps._\nimport org.apache.mahout.sparkbindings._\n\nimplicit val sdc: org.apache.mahout.sparkbindings.SparkDistributedContext \u003d sc2sdc(sc)\n\u003c/code\u003e\u003c/pre\u003e\n\u003cp\u003e\u003cstrong\u003eNote: For Apache Mahout on Apache Spark you must be running Spark 1.5.x or 1.6.x. We are working hard on supporting Spark 2.0\u003c/strong\u003e\n\u003cbr /\u003eIn the meantime, feel free to play with Mahout on Flink and then simple \u003cem\u003ecopy and paste your Mahout code to Spark once it is supported!\u003c/em\u003e\u003c/p\u003e\n\u003ch3\u003eA Side by Side Example\u003c/h3\u003e\n"
},
"dateCreated": "Sep 27, 2016 4:18:50 AM",
"dateStarted": "Sep 28, 2016 10:17:05 AM",

View file

@ -22,7 +22,7 @@ import json
from os.path import isfile
from os import getcwd
from subprocess import call
from subprocess import call, check_call
#######################################################################################################################
@ -57,17 +57,26 @@ class ZeppelinTerpWrangler:
return terp_id
def _terpExists(self, terpName):
terp_id = self._getTerpID(terpName)
if terp_id == None:
return False
return True
def createTerp(self, original_terp_name, new_terp_name ):
new_terp_id = new_terp_name
if self._terpExists(new_terp_name):
del self.interpreter_json['interpreterSettings'][self._getTerpID(new_terp_name)]
orig_terp_id = self._getTerpID(original_terp_name)
from copy import deepcopy
self.interpreter_json['interpreterSettings'][new_terp_id] = deepcopy(
self.interpreter_json['interpreterSettings'][orig_terp_id])
self.interpreter_json['interpreterSettings'][orig_terp_id])
self.interpreter_json['interpreterSettings'][new_terp_id]['name'] = new_terp_name
self.interpreter_json['interpreterSettings'][new_terp_id]['id'] = new_terp_id
print "created new terp '%s' from terp '%s" % (new_terp_name, original_terp_name)
print "created new interpreter '%s' from interpreter '%s" % (new_terp_name, original_terp_name)
def _readTerpJson(self):
with open(self.interpreter_json_path) as f:
@ -146,7 +155,6 @@ class ZeppelinTerpWrangler:
for t in terpDeps:
self._addTerpDep(terpName, t)
#######################################################################################################################
# Need to be sure we know where Zeppelin Top directory is so we can edit conf files
#
@ -212,8 +220,10 @@ def download_mahout():
return True
if download_mahout():
call(['wget', mahout_bin_url], cwd= zeppelin_home)
call(['tar', 'xzf', tar_name], cwd= zeppelin_home)
check_call(['wget', mahout_bin_url], cwd= zeppelin_home)
check_call(['tar', 'xzf', tar_name], cwd= zeppelin_home)
if args.mahout_home:
mahout_home = args.mahout_home
@ -260,7 +270,7 @@ else:
#######################################################################################################################
if not args.restart_later:
print "restarting Apache Zeppelin to load new interpreters..."
call(["bin/zeppelin-daemon.sh", 'restart'], cwd= zeppelin_home)
check_call(["bin/zeppelin-daemon.sh", 'restart'], cwd= zeppelin_home)
else:
print "--restart_later flag detected: remember to restart Zeppelin to see new Mahout interpreters!!"