# Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # PYTHON 2 / 3 compatibility : # bootstrap.py must be runnable with Python 2 or 3 # Remove interactive mode displayhook import sys import signal import base64 from io import BytesIO try: from StringIO import StringIO except ImportError: from io import StringIO def intHandler(signum, frame): # Set the signal handler print ("Paragraph interrupted") raise KeyboardInterrupt() signal.signal(signal.SIGINT, intHandler) # set prompt as empty string so that java side don't need to remove the prompt. sys.ps1="" def help(): print("""%html

Python Interpreter help

Python 2 & 3 compatibility

The interpreter is compatible with Python 2 & 3.
To change Python version, change in the interpreter configuration the python to the desired version (example : python=/usr/bin/python3)

Python modules

The interpreter can use all modules already installed (with pip, easy_install, etc)

Forms

You must install py4j in order to use the form feature (pip install py4j)

Input form

print (z.input("f1","defaultValue"))

Selection form

print(z.select("f2", [("o1","1"), ("o2","2")],2))

Checkbox form

 print("".join(z.checkbox("f3", [("o1","1"), ("o2","2")],["1"])))
')

Matplotlib graph

The interpreter can display matplotlib graph with the function z.show()
You need to already have matplotlib module installed to use this functionality !

import matplotlib.pyplot as plt
 plt.figure()
 (.. ..)
 z.show(plt)
 plt.close()
 

z.show function can take optional parameters to adapt graph dimensions (width and height) and format (png or svg)
example :
z.show(plt,width='50px
 z.show(plt,height='150px', fmt='svg') 

Pandas DataFrame

You need to have Pandas module installed to use this functionality (pip install pandas) !

The interpreter can visualize Pandas DataFrame with the function z.show()
 import pandas as pd
 df = pd.read_csv("bank.csv", sep=";")
 z.show(df)
 

SQL over Pandas DataFrame

You need to have Pandas&Pandasql modules installed to use this functionality (pip install pandas pandasql) !

Python interpreter group includes %sql interpreter that can query Pandas DataFrames using SQL and visualize results using Zeppelin Table Display System
 %python
 import pandas as pd
 df = pd.read_csv("bank.csv", sep=";")
 

 %python.sql
 %sql
 SELECT * from df LIMIT 5
 
""") class PyZeppelinContext(object): """ If py4j is detected, these class will be override with the implementation in bootstrap_input.py """ errorMsg = "You must install py4j Python module " \ "(pip install py4j) to use Zeppelin dynamic forms features" def __init__(self): self.max_result = 1000 self.py3 = bool(sys.version_info >= (3,)) def input(self, name, defaultValue=""): print(self.errorMsg) def select(self, name, options, defaultValue=""): print(self.errorMsg) def checkbox(self, name, options, defaultChecked=[]): print(self.errorMsg) def show(self, p, **kwargs): if hasattr(p, '__name__') and p.__name__ == "matplotlib.pyplot": self.show_matplotlib(p, **kwargs) elif type(p).__name__ == "DataFrame": # does not play well with sub-classes # `isinstance(p, DataFrame)` would req `import pandas.core.frame.DataFrame` # and so a dependency on pandas self.show_dataframe(p, **kwargs) elif hasattr(p, '__call__'): p() #error reporting def show_dataframe(self, df, **kwargs): """Pretty prints DF using Table Display System """ limit = len(df) > self.max_result header_buf = StringIO("") header_buf.write(str(df.columns[0])) for col in df.columns[1:]: header_buf.write("\t") header_buf.write(str(col)) header_buf.write("\n") body_buf = StringIO("") rows = df.head(self.max_result).values if limit else df.values for row in rows: body_buf.write(str(row[0])) for cell in row[1:]: body_buf.write("\t") body_buf.write(str(cell)) body_buf.write("\n") body_buf.seek(0); header_buf.seek(0) #TODO(bzz): fix it, so it shows red notice, as in Spark print("%table " + header_buf.read() + body_buf.read()) # + # ("\nResults are limited by {}." \ # .format(self.max_result) if limit else "") #) body_buf.close(); header_buf.close() def show_matplotlib(self, p, width="100%", height="100%", fmt='png', **kwargs): """Matplotlib show function """ if fmt == 'png': img = BytesIO() p.savefig(img, format=fmt) html = "%html " img_str = b"data:image/png;base64," img_str += base64.b64encode(img.getvalue().strip()) # Need to do this for python3 compatibility if self.py3: img_str = img_str.decode('ascii') elif fmt == 'svg': img = StringIO() p.savefig(img, format=fmt) html = "%html
{img}
" img_str = img.getvalue() else: raise ValueError("fmt must be 'png' or 'svg'") print(html.format(width=width, height=height, img=img_str)) img.close() z = PyZeppelinContext()