mirror of
https://github.com/apache/zeppelin
synced 2026-05-24 09:38:26 +00:00
Add new matplotlib backend for python/pyspark interpreters
This commit is contained in:
parent
dd20e7bf8b
commit
edf750af31
8 changed files with 389 additions and 5 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -1,4 +1,5 @@
|
|||
*.class
|
||||
*.pyc
|
||||
|
||||
# Package Files #
|
||||
*.jar
|
||||
|
|
|
|||
216
lib/python/backend_zinline.py
Normal file
216
lib/python/backend_zinline.py
Normal file
|
|
@ -0,0 +1,216 @@
|
|||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# This file provides a static (non-interactive) matplotlib plotting backend
|
||||
# for zeppelin notebooks for use with the python/pyspark interpreters
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import base64
|
||||
from io import BytesIO
|
||||
try:
|
||||
from StringIO import StringIO
|
||||
except ImportError:
|
||||
from io import StringIO
|
||||
|
||||
import mpl_config
|
||||
import matplotlib
|
||||
from matplotlib._pylab_helpers import Gcf
|
||||
from matplotlib.backends.backend_agg import new_figure_manager, FigureCanvasAgg
|
||||
from matplotlib.backend_bases import ShowBase, FigureManagerBase
|
||||
from matplotlib.figure import Figure
|
||||
|
||||
########################################################################
|
||||
#
|
||||
# The following functions and classes are for pylab and implement
|
||||
# window/figure managers, etc...
|
||||
#
|
||||
########################################################################
|
||||
|
||||
class Show(ShowBase):
|
||||
'''
|
||||
A callable object that displays the figures to the screen. Valid kwargs
|
||||
include figure width and height (in units supported by the div tag), block
|
||||
(allows users to override blocking behavior regardless of whether or not
|
||||
interactive mode is enabled, currently unused) and close (Implicitly call
|
||||
matplotlib.pyplot.close('all') with each call to show()).
|
||||
'''
|
||||
def __call__(self, close=None, block=None, **kwargs):
|
||||
if close is None:
|
||||
close = mpl_config.get('close')
|
||||
try:
|
||||
managers = Gcf.get_all_fig_managers()
|
||||
if not managers:
|
||||
return
|
||||
|
||||
# Tell zeppelin that the output will be html using the %html magic
|
||||
# We want to do this only once to avoid seeing "%html" printed
|
||||
# directly to the outout when multiple figures are displayed from
|
||||
# one paragraph.
|
||||
print("%html")
|
||||
|
||||
# Show all open figures
|
||||
for manager in managers:
|
||||
manager.show(**kwargs)
|
||||
finally:
|
||||
# This closes all the figures if close is set to True.
|
||||
if close and Gcf.get_all_fig_managers():
|
||||
Gcf.destroy_all()
|
||||
|
||||
|
||||
class FigureCanvasZInline(FigureCanvasAgg):
|
||||
"""
|
||||
The canvas the figure renders into. Calls the draw and print fig
|
||||
methods, creates the renderers, etc...
|
||||
"""
|
||||
def draw_idle(self, *args, **kwargs):
|
||||
"""
|
||||
Called when the figure gets updated (eg through a plotting command).
|
||||
This is overriden to allow open figures to be reshown after they
|
||||
are updated when mpl_config.get('close') is False.
|
||||
"""
|
||||
if not self._is_idle_drawing:
|
||||
with self._idle_draw_cntx():
|
||||
self.draw(*args, **kwargs)
|
||||
draw_if_interactive()
|
||||
|
||||
|
||||
class FigureManagerZInline(FigureManagerBase):
|
||||
"""
|
||||
Wrap everything up into a window for the pylab interface
|
||||
"""
|
||||
def __init__(self, canvas, num):
|
||||
FigureManagerBase.__init__(self, canvas, num)
|
||||
self._shown = False
|
||||
|
||||
def show(self, **kwargs):
|
||||
if not self._shown:
|
||||
zdisplay(self.canvas.figure, **kwargs)
|
||||
else:
|
||||
self.canvas.draw_idle()
|
||||
|
||||
self._shown = True
|
||||
|
||||
|
||||
def draw_if_interactive():
|
||||
"""
|
||||
If interactive mode is on, this allows for updating properties of
|
||||
the figure when each new plotting command is called.
|
||||
"""
|
||||
manager = Gcf.get_active()
|
||||
|
||||
# Don't bother continuing if we aren't in interactive mode
|
||||
# or if there are no active figures
|
||||
if not matplotlib.is_interactive() or manager is None:
|
||||
return
|
||||
|
||||
# Allow for figure to be reshown if close is false since
|
||||
# this function call implies that it has been updated
|
||||
if not mpl_config.get('close'):
|
||||
manager._shown = False
|
||||
|
||||
|
||||
def new_figure_manager(num, *args, **kwargs):
|
||||
"""
|
||||
Create a new figure manager instance
|
||||
"""
|
||||
# if a main-level app must be created, this (and
|
||||
# new_figure_manager_given_figure) is the usual place to
|
||||
# do it -- see backend_wx, backend_wxagg and backend_tkagg for
|
||||
# examples. Not all GUIs require explicit instantiation of a
|
||||
# main-level app (egg backend_gtk, backend_gtkagg) for pylab
|
||||
FigureClass = kwargs.pop('FigureClass', Figure)
|
||||
thisFig = FigureClass(*args, **kwargs)
|
||||
return new_figure_manager_given_figure(num, thisFig)
|
||||
|
||||
|
||||
def new_figure_manager_given_figure(num, figure):
|
||||
"""
|
||||
Create a new figure manager instance for the given figure.
|
||||
"""
|
||||
canvas = FigureCanvasZInline(figure)
|
||||
manager = FigureManagerZInline(canvas, num)
|
||||
return manager
|
||||
|
||||
|
||||
########################################################################
|
||||
#
|
||||
# Backend specific functions
|
||||
#
|
||||
########################################################################
|
||||
|
||||
def zdisplay(fig, **kwargs):
|
||||
"""
|
||||
Publishes a matplotlib figure to the notebook paragraph output.
|
||||
"""
|
||||
# kwargs can be width or height (in units supported by div tag)
|
||||
width = kwargs.pop('width', 'auto')
|
||||
height = kwargs.pop('height', 'auto')
|
||||
fmt = kwargs.get('format', mpl_config.get('format'))
|
||||
|
||||
# Check if format is supported
|
||||
supported_formats = mpl_config.get('supported_formats')
|
||||
if fmt not in supported_formats:
|
||||
raise ValueError("Unsupported format %s" %fmt)
|
||||
|
||||
# For SVG the data string has to be unicode, not bytes
|
||||
if fmt == 'svg':
|
||||
buf = StringIO()
|
||||
fig.canvas.print_figure(buf, **kwargs)
|
||||
img_str = buf.getvalue()
|
||||
|
||||
# This is needed to ensure the SVG image is the correct size.
|
||||
# We should find a better way to do this...
|
||||
width = '{}px'.format(mpl_config.get('width'))
|
||||
height = '{}px'.format(mpl_config.get('height'))
|
||||
else:
|
||||
# Express the image as bytes
|
||||
buf = BytesIO()
|
||||
fig.canvas.print_figure(buf, **kwargs)
|
||||
img_str = b"data:image/%s;base64," %fmt
|
||||
img_str += base64.b64encode(buf.getvalue())
|
||||
img_tag = "<img src={img} style='width={width};height:{height}'>"
|
||||
|
||||
# Python3 forces all strings to default to unicode, but for raster image
|
||||
# formats (eg png, jpg), we want to work with bytes. Thus this step is
|
||||
# needed to ensure compatability for all python versions.
|
||||
img_str = img_str.decode("ascii")
|
||||
img_str = img_tag.format(img=img_str, width=width, height=height)
|
||||
|
||||
# Print the image to the notebook paragraph via the %html magic
|
||||
html = "<div style='width:{width};height:{height}'>{img}<div>"
|
||||
print(html.format(width=width, height=height, img=img_str))
|
||||
buf.close()
|
||||
|
||||
def displayhook():
|
||||
"""
|
||||
Called post paragraph execution if interactive mode is on
|
||||
"""
|
||||
if matplotlib.is_interactive():
|
||||
show()
|
||||
|
||||
########################################################################
|
||||
#
|
||||
# Now just provide the standard names that backend.__init__ is expecting
|
||||
#
|
||||
########################################################################
|
||||
|
||||
# Create a reference to the show function we are using. This is what actually
|
||||
# gets called by matplotlib.pyplot.show().
|
||||
show = Show()
|
||||
|
||||
# Default FigureCanvas and FigureManager classes to use from the backend
|
||||
FigureCanvas = FigureCanvasZInline
|
||||
FigureManager = FigureManagerZInline
|
||||
92
lib/python/mpl_config.py
Normal file
92
lib/python/mpl_config.py
Normal file
|
|
@ -0,0 +1,92 @@
|
|||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# This module provides utitlites for users to configure the inline plotting
|
||||
# backend through a PyZeppelinContext instance (eg, through z.configure_mpl())
|
||||
|
||||
import matplotlib
|
||||
|
||||
def configure(**kwargs):
|
||||
"""
|
||||
Generic configure function.
|
||||
Usage: configure(prop1='foo', prop2='bar', ...)
|
||||
Currently supported zeppelin-specific properties are:
|
||||
close - If true, close all figures once shown.
|
||||
width, height - Default width / height of the figure in pixels.
|
||||
fontsize - Font size.
|
||||
dpi - dpi of the figure.
|
||||
fmt - Figure format
|
||||
supported_formats - Supported Figure formats ()
|
||||
interactive - If true show all figures without explicit call to show()
|
||||
via a post-execute hook.
|
||||
|
||||
"""
|
||||
_config.update(**kwargs)
|
||||
|
||||
# Broadcast relevant changes to matplotlib RC
|
||||
_on_config_change()
|
||||
|
||||
|
||||
def get(key):
|
||||
"""
|
||||
Get the configuration info given a key
|
||||
"""
|
||||
return _config[key]
|
||||
|
||||
|
||||
def _on_config_change():
|
||||
# dpi
|
||||
dpi = _config['dpi']
|
||||
matplotlib.rcParams['savefig.dpi'] = dpi
|
||||
matplotlib.rcParams['figure.dpi'] = dpi
|
||||
|
||||
# Width and height
|
||||
width = float(_config['width']) / dpi
|
||||
height = float(_config['height']) / dpi
|
||||
matplotlib.rcParams['figure.figsize'] = (width, height)
|
||||
|
||||
# Font size
|
||||
fontsize = _config['fontsize']
|
||||
matplotlib.rcParams['font.size'] = fontsize
|
||||
|
||||
# Default Figure Format
|
||||
fmt = _config['format']
|
||||
supported_formats = _config['supported_formats']
|
||||
if fmt not in supported_formats:
|
||||
raise ValueError("Unsupported format %s" %fmt)
|
||||
matplotlib.rcParams['savefig.format'] = fmt
|
||||
|
||||
# Interactive mode
|
||||
interactive = _config['interactive']
|
||||
matplotlib.interactive(interactive)
|
||||
|
||||
|
||||
def _init_config():
|
||||
dpi = matplotlib.rcParams['savefig.dpi']
|
||||
fmt = matplotlib.rcParams['savefig.format']
|
||||
width, height = matplotlib.rcParams['figure.figsize']
|
||||
fontsize = matplotlib.rcParams['font.size']
|
||||
_config['dpi'] = dpi
|
||||
_config['format'] = fmt
|
||||
_config['width'] = width*dpi
|
||||
_config['height'] = height*dpi
|
||||
_config['fontsize'] = fontsize
|
||||
_config['close'] = True
|
||||
_config['interactive'] = matplotlib.is_interactive()
|
||||
_config['supported_formats'] = ['png', 'jpg', 'svg']
|
||||
|
||||
|
||||
_config = {}
|
||||
_init_config()
|
||||
|
|
@ -32,6 +32,7 @@ import org.apache.zeppelin.interpreter.Interpreter;
|
|||
import org.apache.zeppelin.interpreter.InterpreterContext;
|
||||
import org.apache.zeppelin.interpreter.InterpreterResult;
|
||||
import org.apache.zeppelin.interpreter.InterpreterResult.Code;
|
||||
import org.apache.zeppelin.interpreter.InterpreterHookRegistry.HookType;
|
||||
import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion;
|
||||
import org.apache.zeppelin.scheduler.Job;
|
||||
import org.apache.zeppelin.scheduler.Scheduler;
|
||||
|
|
@ -68,6 +69,9 @@ public class PythonInterpreter extends Interpreter {
|
|||
|
||||
@Override
|
||||
public void open() {
|
||||
// Add matplotlib display hook
|
||||
registerHook(HookType.POST_EXEC_DEV, "z._displayhook()");
|
||||
|
||||
LOG.info("Starting Python interpreter ---->");
|
||||
LOG.info("Python path is set to:" + property.getProperty(ZEPPELIN_PYTHON));
|
||||
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@
|
|||
# PYTHON 2 / 3 compatibility :
|
||||
# bootstrap.py must be runnable with Python 2 or 3
|
||||
|
||||
# Remove interactive mode displayhook
|
||||
import os
|
||||
import sys
|
||||
import signal
|
||||
import base64
|
||||
|
|
@ -117,6 +117,7 @@ class PyZeppelinContext(object):
|
|||
|
||||
def __init__(self):
|
||||
self.max_result = 1000
|
||||
self._displayhook = lambda *args: None
|
||||
|
||||
def input(self, name, defaultValue=""):
|
||||
print(self.errorMsg)
|
||||
|
|
@ -164,7 +165,7 @@ class PyZeppelinContext(object):
|
|||
#)
|
||||
body_buf.close(); header_buf.close()
|
||||
|
||||
def show_matplotlib(self, p, fmt="png", width="auto", height="auto",
|
||||
def show_matplotlib(self, p, fmt="png", width="auto", height="auto",
|
||||
**kwargs):
|
||||
"""Matplotlib show function
|
||||
"""
|
||||
|
|
@ -187,6 +188,37 @@ class PyZeppelinContext(object):
|
|||
html = "%html <div style='width:{width};height:{height}'>{img}<div>"
|
||||
print(html.format(width=width, height=height, img=img_str))
|
||||
img.close()
|
||||
|
||||
def configure_mpl(self, **kwargs):
|
||||
import mpl_config
|
||||
mpl_config.configure(**kwargs)
|
||||
|
||||
def _setup_matplotlib(self):
|
||||
# If we don't have matplotlib installed don't bother continuing
|
||||
try:
|
||||
import matplotlib
|
||||
except ImportError:
|
||||
pass
|
||||
# Make sure custom backends are available in the PYTHONPATH
|
||||
cwd = os.getcwd()
|
||||
mpl_path = os.path.join(cwd, 'lib', 'python')
|
||||
if mpl_path not in sys.path:
|
||||
sys.path.append(mpl_path)
|
||||
|
||||
# Finally check if backend exists, and if so configure as appropriate
|
||||
try:
|
||||
matplotlib.use('module://backend_zinline')
|
||||
import backend_zinline
|
||||
|
||||
# Everything looks good so make config assuming that we are using
|
||||
# an inline backend
|
||||
self._displayhook = backend_zinline.displayhook
|
||||
self.configure_mpl(width=600, height=400, dpi=72,
|
||||
fontsize=10, interactive=True, format='png')
|
||||
except ImportError:
|
||||
# Fall back to Agg if no custom backend installed
|
||||
matplotlib.use('Agg')
|
||||
|
||||
|
||||
z = PyZeppelinContext()
|
||||
z._setup_matplotlib()
|
||||
|
|
|
|||
|
|
@ -49,6 +49,7 @@ import org.apache.zeppelin.interpreter.Interpreter;
|
|||
import org.apache.zeppelin.interpreter.InterpreterContext;
|
||||
import org.apache.zeppelin.interpreter.InterpreterException;
|
||||
import org.apache.zeppelin.interpreter.InterpreterResult;
|
||||
import org.apache.zeppelin.interpreter.InterpreterHookRegistry.HookType;
|
||||
import org.apache.zeppelin.interpreter.LazyOpenInterpreter;
|
||||
import org.apache.zeppelin.interpreter.WrappedInterpreter;
|
||||
import org.apache.zeppelin.interpreter.InterpreterResult.Code;
|
||||
|
|
@ -111,6 +112,8 @@ public class PySparkInterpreter extends Interpreter implements ExecuteResultHand
|
|||
|
||||
@Override
|
||||
public void open() {
|
||||
// Add matplotlib display hook
|
||||
registerHook(HookType.POST_EXEC_DEV, "z._displayhook()");
|
||||
DepInterpreter depInterpreter = getDepInterpreter();
|
||||
|
||||
// load libraries from Dependency Interpreter
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@
|
|||
# limitations under the License.
|
||||
#
|
||||
|
||||
import sys, getopt, traceback, json, re
|
||||
import os, sys, getopt, traceback, json, re
|
||||
|
||||
from py4j.java_gateway import java_import, JavaGateway, GatewayClient
|
||||
from py4j.protocol import Py4JJavaError
|
||||
|
|
@ -50,6 +50,7 @@ class Logger(object):
|
|||
class PyZeppelinContext(dict):
|
||||
def __init__(self, zc):
|
||||
self.z = zc
|
||||
self._displayhook = lambda *args: None
|
||||
|
||||
def show(self, obj):
|
||||
from pyspark.sql import DataFrame
|
||||
|
|
@ -116,6 +117,38 @@ class PyZeppelinContext(dict):
|
|||
return self.z.getHook(event)
|
||||
return self.z.getHook(event, replName)
|
||||
|
||||
def _setup_matplotlib(self):
|
||||
# If we don't have matplotlib installed don't bother continuing
|
||||
try:
|
||||
import matplotlib
|
||||
except ImportError:
|
||||
return
|
||||
|
||||
# Make sure custom backends are available in the PYTHONPATH
|
||||
cwd = os.getcwd()
|
||||
mpl_path = os.path.join(cwd, 'lib', 'python')
|
||||
if mpl_path not in sys.path:
|
||||
sys.path.append(mpl_path)
|
||||
|
||||
# Finally check if backend exists, and if so configure as appropriate
|
||||
try:
|
||||
matplotlib.use('module://backend_zinline')
|
||||
import backend_zinline
|
||||
|
||||
# Everything looks good so make config assuming that we are using
|
||||
# an inline backend
|
||||
self._displayhook = backend_zinline.displayhook
|
||||
self.configure_mpl(width=600, height=400, dpi=72,
|
||||
fontsize=10, interactive=True, format='png')
|
||||
except ImportError:
|
||||
# Fall back to Agg if no custom backend installed
|
||||
matplotlib.use('Agg')
|
||||
return
|
||||
|
||||
def configure_mpl(self, **kwargs):
|
||||
import mpl_config
|
||||
mpl_config.configure(**kwargs)
|
||||
|
||||
def __tupleToScalaTuple2(self, tuple):
|
||||
if (len(tuple) == 2):
|
||||
return gateway.jvm.scala.Tuple2(tuple[0], tuple[1])
|
||||
|
|
@ -244,6 +277,7 @@ sqlContext = sqlc
|
|||
|
||||
completion = PySparkCompletion(intp)
|
||||
z = PyZeppelinContext(intp.getZeppelinContext())
|
||||
z._setup_matplotlib()
|
||||
|
||||
while True :
|
||||
req = intp.getStatements()
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@
|
|||
|
||||
<dependencySets>
|
||||
<dependencySet>
|
||||
<!-- Enable access to all projects in the current multimodule build!
|
||||
<!-- Enable access to all projects in the current multimodule build!
|
||||
<useAllReactorProjects>true</useAllReactorProjects> -->
|
||||
<!-- Now, select which projects to include in this module-set. -->
|
||||
<includes>
|
||||
|
|
@ -62,6 +62,9 @@
|
|||
<directoryMode>0755</directoryMode>
|
||||
<fileMode>0755</fileMode>
|
||||
</fileSet>
|
||||
<fileSet>
|
||||
<directory>../lib</directory>
|
||||
</fileSet>
|
||||
<fileSet>
|
||||
<directory>../licenses</directory>
|
||||
</fileSet>
|
||||
|
|
@ -92,4 +95,3 @@
|
|||
</fileSets>-->
|
||||
|
||||
</assembly>
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue