zeppelin/python/src/main/resources/bootstrap.py
Alex Goodman 582981677c ZEPPELIN-1328 - z.show in python interpreter does not display PNG images in python 3
### What is this PR for?
Support for plotting PNG images via matplotlib inline for the python interpreter was recently added (#1329). However, these changes did not work for python3 since it handles strings differently. This PR aims to make the inline plotting compatible with both python 2 and 3.

### What type of PR is it?
Bug Fix

### What is the Jira issue?
* [ZEPPELIN-1328](https://issues.apache.org/jira/browse/ZEPPELIN-1328)

### How should this be tested?
In a python interpreteter cell, make sure the following produce an image:
```python
%python
import matplotlib.pyplot as plt
import numpy as np

x = np.arange(5)
plt.plot(x)
z.show(plt, fmt='png') # Repeat for fmt='svg'
```
This should be tested for both python2 and 3 interpreters (via the interpreter settings page).

### Questions:
* Does the licenses files need update? No
* Is there breaking changes for older versions? No
* Does this needs documentation? No

Author: Alex Goodman <agoodm@users.noreply.github.com>

Closes #1343 from agoodm/ZEPPELIN-1328 and squashes the following commits:

772313f [Alex Goodman] Redo io import structure to make z.show() work for both matplotlib plots and pandas dataframes in python2/3
6a8f3ab [Alex Goodman] Add python3 support for matplotlib inline plotting in python interpreter
2016-08-22 22:37:43 +09:00

194 lines
6.5 KiB
Python

# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# PYTHON 2 / 3 compatibility :
# bootstrap.py must be runnable with Python 2 or 3
# Remove interactive mode displayhook
import sys
import signal
import base64
from io import BytesIO
try:
from StringIO import StringIO
except ImportError:
from io import StringIO
def intHandler(signum, frame): # Set the signal handler
print ("Paragraph interrupted")
raise KeyboardInterrupt()
signal.signal(signal.SIGINT, intHandler)
# set prompt as empty string so that java side don't need to remove the prompt.
sys.ps1=""
def help():
print("""%html
<h2>Python Interpreter help</h2>
<h3>Python 2 & 3 compatibility</h3>
<p>The interpreter is compatible with Python 2 & 3.<br/>
To change Python version,
change in the interpreter configuration the python to the
desired version (example : python=/usr/bin/python3)</p>
<h3>Python modules</h3>
<p>The interpreter can use all modules already installed
(with pip, easy_install, etc)</p>
<h3>Forms</h3>
You must install py4j in order to use
the form feature (pip install py4j)
<h4>Input form</h4>
<pre>print (z.input("f1","defaultValue"))</pre>
<h4>Selection form</h4>
<pre>print(z.select("f2", [("o1","1"), ("o2","2")],2))</pre>
<h4>Checkbox form</h4>
<pre> print("".join(z.checkbox("f3", [("o1","1"), ("o2","2")],["1"])))</pre>')
<h3>Matplotlib graph</h3>
<div>The interpreter can display matplotlib graph with
the function z.show()</div>
<div> You need to already have matplotlib module installed
to use this functionality !</div><br/>
<pre>import matplotlib.pyplot as plt
plt.figure()
(.. ..)
z.show(plt)
plt.close()
</pre>
<div><br/> z.show function can take optional parameters
to adapt graph dimensions (width and height) and format (png or svg)</div>
<div><b>example </b>:
<pre>z.show(plt,width='50px
z.show(plt,height='150px', fmt='svg') </pre></div>
<h3>Pandas DataFrame</h3>
<div> You need to have Pandas module installed
to use this functionality (pip install pandas) !</div><br/>
<div>The interpreter can visualize Pandas DataFrame
with the function z.show()
<pre>
import pandas as pd
df = pd.read_csv("bank.csv", sep=";")
z.show(df)
</pre></div>
<h3>SQL over Pandas DataFrame</h3>
<div> You need to have Pandas&Pandasql modules installed
to use this functionality (pip install pandas pandasql) !</div><br/>
<div>Python interpreter group includes %sql interpreter that can query
Pandas DataFrames using SQL and visualize results using Zeppelin Table Display System
<pre>
%python
import pandas as pd
df = pd.read_csv("bank.csv", sep=";")
</pre>
<br />
<pre>
%python.sql
%sql
SELECT * from df LIMIT 5
</pre>
</div>
""")
class PyZeppelinContext(object):
""" If py4j is detected, these class will be override
with the implementation in bootstrap_input.py
"""
errorMsg = "You must install py4j Python module " \
"(pip install py4j) to use Zeppelin dynamic forms features"
def __init__(self):
self.max_result = 1000
self.py3 = bool(sys.version_info >= (3,))
def input(self, name, defaultValue=""):
print(self.errorMsg)
def select(self, name, options, defaultValue=""):
print(self.errorMsg)
def checkbox(self, name, options, defaultChecked=[]):
print(self.errorMsg)
def show(self, p, **kwargs):
if hasattr(p, '__name__') and p.__name__ == "matplotlib.pyplot":
self.show_matplotlib(p, **kwargs)
elif type(p).__name__ == "DataFrame": # does not play well with sub-classes
# `isinstance(p, DataFrame)` would req `import pandas.core.frame.DataFrame`
# and so a dependency on pandas
self.show_dataframe(p, **kwargs)
elif hasattr(p, '__call__'):
p() #error reporting
def show_dataframe(self, df, **kwargs):
"""Pretty prints DF using Table Display System
"""
limit = len(df) > self.max_result
header_buf = StringIO("")
header_buf.write(str(df.columns[0]))
for col in df.columns[1:]:
header_buf.write("\t")
header_buf.write(str(col))
header_buf.write("\n")
body_buf = StringIO("")
rows = df.head(self.max_result).values if limit else df.values
for row in rows:
body_buf.write(str(row[0]))
for cell in row[1:]:
body_buf.write("\t")
body_buf.write(str(cell))
body_buf.write("\n")
body_buf.seek(0); header_buf.seek(0)
#TODO(bzz): fix it, so it shows red notice, as in Spark
print("%table " + header_buf.read() + body_buf.read()) # +
# ("\n<font color=red>Results are limited by {}.</font>" \
# .format(self.max_result) if limit else "")
#)
body_buf.close(); header_buf.close()
def show_matplotlib(self, p, width="100%", height="100%",
fmt='png', **kwargs):
"""Matplotlib show function
"""
if fmt == 'png':
img = BytesIO()
p.savefig(img, format=fmt)
html = "%html <img src={img} width={width}, height={height}>"
img_str = b"data:image/png;base64,"
img_str += base64.b64encode(img.getvalue().strip())
# Need to do this for python3 compatibility
if self.py3:
img_str = img_str.decode('ascii')
elif fmt == 'svg':
img = StringIO()
p.savefig(img, format=fmt)
html = "%html <div style='width:{width};height:{height}'>{img}<div>"
img_str = img.getvalue()
else:
raise ValueError("fmt must be 'png' or 'svg'")
print(html.format(width=width, height=height, img=img_str))
img.close()
z = PyZeppelinContext()