# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# PYTHON 2 / 3 compatibility :
# bootstrap.py must be runnable with Python 2 or 3
# Remove interactive mode displayhook
import sys
import signal
import base64
from io import BytesIO
try:
from StringIO import StringIO
except ImportError:
from io import StringIO
def intHandler(signum, frame): # Set the signal handler
print ("Paragraph interrupted")
raise KeyboardInterrupt()
signal.signal(signal.SIGINT, intHandler)
# set prompt as empty string so that java side don't need to remove the prompt.
sys.ps1=""
def help():
print("""%html
Python Interpreter help
Python 2 & 3 compatibility
The interpreter is compatible with Python 2 & 3.
To change Python version,
change in the interpreter configuration the python to the
desired version (example : python=/usr/bin/python3)
Python modules
The interpreter can use all modules already installed
(with pip, easy_install, etc)
Forms
You must install py4j in order to use
the form feature (pip install py4j)
Input form
print (z.input("f1","defaultValue"))
Selection form
print(z.select("f2", [("o1","1"), ("o2","2")],2))
Checkbox form
print("".join(z.checkbox("f3", [("o1","1"), ("o2","2")],["1"])))')
Matplotlib graph
The interpreter can display matplotlib graph with
the function z.show()
You need to already have matplotlib module installed
to use this functionality !
import matplotlib.pyplot as plt
plt.figure()
(.. ..)
z.show(plt)
plt.close()
z.show function can take optional parameters
to adapt graph dimensions (width and height) and format (png or svg)
example :
z.show(plt,width='50px
z.show(plt,height='150px', fmt='svg')
Pandas DataFrame
You need to have Pandas module installed
to use this functionality (pip install pandas) !
The interpreter can visualize Pandas DataFrame
with the function z.show()
import pandas as pd
df = pd.read_csv("bank.csv", sep=";")
z.show(df)
SQL over Pandas DataFrame
You need to have Pandas&Pandasql modules installed
to use this functionality (pip install pandas pandasql) !
Python interpreter group includes %sql interpreter that can query
Pandas DataFrames using SQL and visualize results using Zeppelin Table Display System
%python
import pandas as pd
df = pd.read_csv("bank.csv", sep=";")
%python.sql
%sql
SELECT * from df LIMIT 5
""")
class PyZeppelinContext(object):
""" If py4j is detected, these class will be override
with the implementation in bootstrap_input.py
"""
errorMsg = "You must install py4j Python module " \
"(pip install py4j) to use Zeppelin dynamic forms features"
def __init__(self):
self.max_result = 1000
self.py3 = bool(sys.version_info >= (3,))
def input(self, name, defaultValue=""):
print(self.errorMsg)
def select(self, name, options, defaultValue=""):
print(self.errorMsg)
def checkbox(self, name, options, defaultChecked=[]):
print(self.errorMsg)
def show(self, p, **kwargs):
if hasattr(p, '__name__') and p.__name__ == "matplotlib.pyplot":
self.show_matplotlib(p, **kwargs)
elif type(p).__name__ == "DataFrame": # does not play well with sub-classes
# `isinstance(p, DataFrame)` would req `import pandas.core.frame.DataFrame`
# and so a dependency on pandas
self.show_dataframe(p, **kwargs)
elif hasattr(p, '__call__'):
p() #error reporting
def show_dataframe(self, df, **kwargs):
"""Pretty prints DF using Table Display System
"""
limit = len(df) > self.max_result
header_buf = StringIO("")
header_buf.write(str(df.columns[0]))
for col in df.columns[1:]:
header_buf.write("\t")
header_buf.write(str(col))
header_buf.write("\n")
body_buf = StringIO("")
rows = df.head(self.max_result).values if limit else df.values
for row in rows:
body_buf.write(str(row[0]))
for cell in row[1:]:
body_buf.write("\t")
body_buf.write(str(cell))
body_buf.write("\n")
body_buf.seek(0); header_buf.seek(0)
#TODO(bzz): fix it, so it shows red notice, as in Spark
print("%table " + header_buf.read() + body_buf.read()) # +
# ("\nResults are limited by {}." \
# .format(self.max_result) if limit else "")
#)
body_buf.close(); header_buf.close()
def show_matplotlib(self, p, width="100%", height="100%",
fmt='png', **kwargs):
"""Matplotlib show function
"""
if fmt == 'png':
img = BytesIO()
p.savefig(img, format=fmt)
html = "%html
"
img_str = b"data:image/png;base64,"
img_str += base64.b64encode(img.getvalue().strip())
# Need to do this for python3 compatibility
if self.py3:
img_str = img_str.decode('ascii')
elif fmt == 'svg':
img = StringIO()
p.savefig(img, format=fmt)
html = "%html {img}
"
img_str = img.getvalue()
else:
raise ValueError("fmt must be 'png' or 'svg'")
print(html.format(width=width, height=height, img=img_str))
img.close()
z = PyZeppelinContext()