Add new matplotlib backend for python/pyspark interpreters

This commit is contained in:
Alex Goodman 2016-10-05 15:26:14 -07:00
parent dd20e7bf8b
commit edf750af31
8 changed files with 389 additions and 5 deletions

1
.gitignore vendored
View file

@ -1,4 +1,5 @@
*.class
*.pyc
# Package Files #
*.jar

View file

@ -0,0 +1,216 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This file provides a static (non-interactive) matplotlib plotting backend
# for zeppelin notebooks for use with the python/pyspark interpreters
from __future__ import print_function
import base64
from io import BytesIO
try:
from StringIO import StringIO
except ImportError:
from io import StringIO
import mpl_config
import matplotlib
from matplotlib._pylab_helpers import Gcf
from matplotlib.backends.backend_agg import new_figure_manager, FigureCanvasAgg
from matplotlib.backend_bases import ShowBase, FigureManagerBase
from matplotlib.figure import Figure
########################################################################
#
# The following functions and classes are for pylab and implement
# window/figure managers, etc...
#
########################################################################
class Show(ShowBase):
'''
A callable object that displays the figures to the screen. Valid kwargs
include figure width and height (in units supported by the div tag), block
(allows users to override blocking behavior regardless of whether or not
interactive mode is enabled, currently unused) and close (Implicitly call
matplotlib.pyplot.close('all') with each call to show()).
'''
def __call__(self, close=None, block=None, **kwargs):
if close is None:
close = mpl_config.get('close')
try:
managers = Gcf.get_all_fig_managers()
if not managers:
return
# Tell zeppelin that the output will be html using the %html magic
# We want to do this only once to avoid seeing "%html" printed
# directly to the outout when multiple figures are displayed from
# one paragraph.
print("%html")
# Show all open figures
for manager in managers:
manager.show(**kwargs)
finally:
# This closes all the figures if close is set to True.
if close and Gcf.get_all_fig_managers():
Gcf.destroy_all()
class FigureCanvasZInline(FigureCanvasAgg):
"""
The canvas the figure renders into. Calls the draw and print fig
methods, creates the renderers, etc...
"""
def draw_idle(self, *args, **kwargs):
"""
Called when the figure gets updated (eg through a plotting command).
This is overriden to allow open figures to be reshown after they
are updated when mpl_config.get('close') is False.
"""
if not self._is_idle_drawing:
with self._idle_draw_cntx():
self.draw(*args, **kwargs)
draw_if_interactive()
class FigureManagerZInline(FigureManagerBase):
"""
Wrap everything up into a window for the pylab interface
"""
def __init__(self, canvas, num):
FigureManagerBase.__init__(self, canvas, num)
self._shown = False
def show(self, **kwargs):
if not self._shown:
zdisplay(self.canvas.figure, **kwargs)
else:
self.canvas.draw_idle()
self._shown = True
def draw_if_interactive():
"""
If interactive mode is on, this allows for updating properties of
the figure when each new plotting command is called.
"""
manager = Gcf.get_active()
# Don't bother continuing if we aren't in interactive mode
# or if there are no active figures
if not matplotlib.is_interactive() or manager is None:
return
# Allow for figure to be reshown if close is false since
# this function call implies that it has been updated
if not mpl_config.get('close'):
manager._shown = False
def new_figure_manager(num, *args, **kwargs):
"""
Create a new figure manager instance
"""
# if a main-level app must be created, this (and
# new_figure_manager_given_figure) is the usual place to
# do it -- see backend_wx, backend_wxagg and backend_tkagg for
# examples. Not all GUIs require explicit instantiation of a
# main-level app (egg backend_gtk, backend_gtkagg) for pylab
FigureClass = kwargs.pop('FigureClass', Figure)
thisFig = FigureClass(*args, **kwargs)
return new_figure_manager_given_figure(num, thisFig)
def new_figure_manager_given_figure(num, figure):
"""
Create a new figure manager instance for the given figure.
"""
canvas = FigureCanvasZInline(figure)
manager = FigureManagerZInline(canvas, num)
return manager
########################################################################
#
# Backend specific functions
#
########################################################################
def zdisplay(fig, **kwargs):
"""
Publishes a matplotlib figure to the notebook paragraph output.
"""
# kwargs can be width or height (in units supported by div tag)
width = kwargs.pop('width', 'auto')
height = kwargs.pop('height', 'auto')
fmt = kwargs.get('format', mpl_config.get('format'))
# Check if format is supported
supported_formats = mpl_config.get('supported_formats')
if fmt not in supported_formats:
raise ValueError("Unsupported format %s" %fmt)
# For SVG the data string has to be unicode, not bytes
if fmt == 'svg':
buf = StringIO()
fig.canvas.print_figure(buf, **kwargs)
img_str = buf.getvalue()
# This is needed to ensure the SVG image is the correct size.
# We should find a better way to do this...
width = '{}px'.format(mpl_config.get('width'))
height = '{}px'.format(mpl_config.get('height'))
else:
# Express the image as bytes
buf = BytesIO()
fig.canvas.print_figure(buf, **kwargs)
img_str = b"data:image/%s;base64," %fmt
img_str += base64.b64encode(buf.getvalue())
img_tag = "<img src={img} style='width={width};height:{height}'>"
# Python3 forces all strings to default to unicode, but for raster image
# formats (eg png, jpg), we want to work with bytes. Thus this step is
# needed to ensure compatability for all python versions.
img_str = img_str.decode("ascii")
img_str = img_tag.format(img=img_str, width=width, height=height)
# Print the image to the notebook paragraph via the %html magic
html = "<div style='width:{width};height:{height}'>{img}<div>"
print(html.format(width=width, height=height, img=img_str))
buf.close()
def displayhook():
"""
Called post paragraph execution if interactive mode is on
"""
if matplotlib.is_interactive():
show()
########################################################################
#
# Now just provide the standard names that backend.__init__ is expecting
#
########################################################################
# Create a reference to the show function we are using. This is what actually
# gets called by matplotlib.pyplot.show().
show = Show()
# Default FigureCanvas and FigureManager classes to use from the backend
FigureCanvas = FigureCanvasZInline
FigureManager = FigureManagerZInline

92
lib/python/mpl_config.py Normal file
View file

@ -0,0 +1,92 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This module provides utitlites for users to configure the inline plotting
# backend through a PyZeppelinContext instance (eg, through z.configure_mpl())
import matplotlib
def configure(**kwargs):
"""
Generic configure function.
Usage: configure(prop1='foo', prop2='bar', ...)
Currently supported zeppelin-specific properties are:
close - If true, close all figures once shown.
width, height - Default width / height of the figure in pixels.
fontsize - Font size.
dpi - dpi of the figure.
fmt - Figure format
supported_formats - Supported Figure formats ()
interactive - If true show all figures without explicit call to show()
via a post-execute hook.
"""
_config.update(**kwargs)
# Broadcast relevant changes to matplotlib RC
_on_config_change()
def get(key):
"""
Get the configuration info given a key
"""
return _config[key]
def _on_config_change():
# dpi
dpi = _config['dpi']
matplotlib.rcParams['savefig.dpi'] = dpi
matplotlib.rcParams['figure.dpi'] = dpi
# Width and height
width = float(_config['width']) / dpi
height = float(_config['height']) / dpi
matplotlib.rcParams['figure.figsize'] = (width, height)
# Font size
fontsize = _config['fontsize']
matplotlib.rcParams['font.size'] = fontsize
# Default Figure Format
fmt = _config['format']
supported_formats = _config['supported_formats']
if fmt not in supported_formats:
raise ValueError("Unsupported format %s" %fmt)
matplotlib.rcParams['savefig.format'] = fmt
# Interactive mode
interactive = _config['interactive']
matplotlib.interactive(interactive)
def _init_config():
dpi = matplotlib.rcParams['savefig.dpi']
fmt = matplotlib.rcParams['savefig.format']
width, height = matplotlib.rcParams['figure.figsize']
fontsize = matplotlib.rcParams['font.size']
_config['dpi'] = dpi
_config['format'] = fmt
_config['width'] = width*dpi
_config['height'] = height*dpi
_config['fontsize'] = fontsize
_config['close'] = True
_config['interactive'] = matplotlib.is_interactive()
_config['supported_formats'] = ['png', 'jpg', 'svg']
_config = {}
_init_config()

View file

@ -32,6 +32,7 @@ import org.apache.zeppelin.interpreter.Interpreter;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.InterpreterResult.Code;
import org.apache.zeppelin.interpreter.InterpreterHookRegistry.HookType;
import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion;
import org.apache.zeppelin.scheduler.Job;
import org.apache.zeppelin.scheduler.Scheduler;
@ -68,6 +69,9 @@ public class PythonInterpreter extends Interpreter {
@Override
public void open() {
// Add matplotlib display hook
registerHook(HookType.POST_EXEC_DEV, "z._displayhook()");
LOG.info("Starting Python interpreter ---->");
LOG.info("Python path is set to:" + property.getProperty(ZEPPELIN_PYTHON));

View file

@ -16,7 +16,7 @@
# PYTHON 2 / 3 compatibility :
# bootstrap.py must be runnable with Python 2 or 3
# Remove interactive mode displayhook
import os
import sys
import signal
import base64
@ -117,6 +117,7 @@ class PyZeppelinContext(object):
def __init__(self):
self.max_result = 1000
self._displayhook = lambda *args: None
def input(self, name, defaultValue=""):
print(self.errorMsg)
@ -164,7 +165,7 @@ class PyZeppelinContext(object):
#)
body_buf.close(); header_buf.close()
def show_matplotlib(self, p, fmt="png", width="auto", height="auto",
def show_matplotlib(self, p, fmt="png", width="auto", height="auto",
**kwargs):
"""Matplotlib show function
"""
@ -187,6 +188,37 @@ class PyZeppelinContext(object):
html = "%html <div style='width:{width};height:{height}'>{img}<div>"
print(html.format(width=width, height=height, img=img_str))
img.close()
def configure_mpl(self, **kwargs):
import mpl_config
mpl_config.configure(**kwargs)
def _setup_matplotlib(self):
# If we don't have matplotlib installed don't bother continuing
try:
import matplotlib
except ImportError:
pass
# Make sure custom backends are available in the PYTHONPATH
cwd = os.getcwd()
mpl_path = os.path.join(cwd, 'lib', 'python')
if mpl_path not in sys.path:
sys.path.append(mpl_path)
# Finally check if backend exists, and if so configure as appropriate
try:
matplotlib.use('module://backend_zinline')
import backend_zinline
# Everything looks good so make config assuming that we are using
# an inline backend
self._displayhook = backend_zinline.displayhook
self.configure_mpl(width=600, height=400, dpi=72,
fontsize=10, interactive=True, format='png')
except ImportError:
# Fall back to Agg if no custom backend installed
matplotlib.use('Agg')
z = PyZeppelinContext()
z._setup_matplotlib()

View file

@ -49,6 +49,7 @@ import org.apache.zeppelin.interpreter.Interpreter;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterException;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.InterpreterHookRegistry.HookType;
import org.apache.zeppelin.interpreter.LazyOpenInterpreter;
import org.apache.zeppelin.interpreter.WrappedInterpreter;
import org.apache.zeppelin.interpreter.InterpreterResult.Code;
@ -111,6 +112,8 @@ public class PySparkInterpreter extends Interpreter implements ExecuteResultHand
@Override
public void open() {
// Add matplotlib display hook
registerHook(HookType.POST_EXEC_DEV, "z._displayhook()");
DepInterpreter depInterpreter = getDepInterpreter();
// load libraries from Dependency Interpreter

View file

@ -15,7 +15,7 @@
# limitations under the License.
#
import sys, getopt, traceback, json, re
import os, sys, getopt, traceback, json, re
from py4j.java_gateway import java_import, JavaGateway, GatewayClient
from py4j.protocol import Py4JJavaError
@ -50,6 +50,7 @@ class Logger(object):
class PyZeppelinContext(dict):
def __init__(self, zc):
self.z = zc
self._displayhook = lambda *args: None
def show(self, obj):
from pyspark.sql import DataFrame
@ -116,6 +117,38 @@ class PyZeppelinContext(dict):
return self.z.getHook(event)
return self.z.getHook(event, replName)
def _setup_matplotlib(self):
# If we don't have matplotlib installed don't bother continuing
try:
import matplotlib
except ImportError:
return
# Make sure custom backends are available in the PYTHONPATH
cwd = os.getcwd()
mpl_path = os.path.join(cwd, 'lib', 'python')
if mpl_path not in sys.path:
sys.path.append(mpl_path)
# Finally check if backend exists, and if so configure as appropriate
try:
matplotlib.use('module://backend_zinline')
import backend_zinline
# Everything looks good so make config assuming that we are using
# an inline backend
self._displayhook = backend_zinline.displayhook
self.configure_mpl(width=600, height=400, dpi=72,
fontsize=10, interactive=True, format='png')
except ImportError:
# Fall back to Agg if no custom backend installed
matplotlib.use('Agg')
return
def configure_mpl(self, **kwargs):
import mpl_config
mpl_config.configure(**kwargs)
def __tupleToScalaTuple2(self, tuple):
if (len(tuple) == 2):
return gateway.jvm.scala.Tuple2(tuple[0], tuple[1])
@ -244,6 +277,7 @@ sqlContext = sqlc
completion = PySparkCompletion(intp)
z = PyZeppelinContext(intp.getZeppelinContext())
z._setup_matplotlib()
while True :
req = intp.getStatements()

View file

@ -29,7 +29,7 @@
<dependencySets>
<dependencySet>
<!-- Enable access to all projects in the current multimodule build!
<!-- Enable access to all projects in the current multimodule build!
<useAllReactorProjects>true</useAllReactorProjects> -->
<!-- Now, select which projects to include in this module-set. -->
<includes>
@ -62,6 +62,9 @@
<directoryMode>0755</directoryMode>
<fileMode>0755</fileMode>
</fileSet>
<fileSet>
<directory>../lib</directory>
</fileSet>
<fileSet>
<directory>../licenses</directory>
</fileSet>
@ -92,4 +95,3 @@
</fileSets>-->
</assembly>