mirror of
https://github.com/apache/zeppelin
synced 2026-05-24 09:38:26 +00:00
Add draft implementation of %python.sql for DataFrames
This commit is contained in:
parent
bd714c2b96
commit
11ba4902f6
4 changed files with 245 additions and 1 deletions
|
|
@ -35,7 +35,10 @@
|
|||
|
||||
<properties>
|
||||
<py4j.version>0.9.2</py4j.version>
|
||||
<python.test.exclude>**/PythonInterpreterWithPythonInstalledTest.java</python.test.exclude>
|
||||
<python.test.exclude>
|
||||
**/PythonInterpreterWithPythonInstalledTest.java,
|
||||
**/PythonPandasSqlInterpreterTest.java
|
||||
</python.test.exclude>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
|
|
|
|||
|
|
@ -0,0 +1,88 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.zeppelin.python;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Properties;
|
||||
|
||||
import org.apache.zeppelin.interpreter.Interpreter;
|
||||
import org.apache.zeppelin.interpreter.InterpreterContext;
|
||||
import org.apache.zeppelin.interpreter.InterpreterResult;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* SQL over Pandas DataFrame interpreter for %python group
|
||||
*
|
||||
* Match experience of %sparpk.sql over Spark DataFrame
|
||||
*/
|
||||
public class PythonPandasSqlInterpreter extends Interpreter {
|
||||
private static final Logger LOG = LoggerFactory.getLogger(PythonPandasSqlInterpreter.class);
|
||||
|
||||
private String SQL_BOOTSTRAP_FILE_PY = "/bootstrap_sql.py";
|
||||
|
||||
public PythonPandasSqlInterpreter(Properties property) {
|
||||
super(property);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void open() {
|
||||
LOG.info("Open Python SQL interpreter instance: {}", this.toString());
|
||||
|
||||
//TODO(bzz): check by importing and catching ImportError
|
||||
//if (pandasAndNumpyAndPandasqlAreInstalled) {
|
||||
try {
|
||||
LOG.info("Bootstrap {} interpreter with {}", this.toString(), SQL_BOOTSTRAP_FILE_PY);
|
||||
PythonInterpreter python = (PythonInterpreter) this.getInterpreterInTheSameSessionByClassName(
|
||||
PythonInterpreter.class.getName());
|
||||
python.bootStrapInterpreter(SQL_BOOTSTRAP_FILE_PY);
|
||||
} catch (IOException e) {
|
||||
LOG.error("Can't execute " + SQL_BOOTSTRAP_FILE_PY + " to import SQL dependencies", e);
|
||||
}
|
||||
//}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
LOG.info("Close Python SQL interpreter instance: {}", this.toString());
|
||||
}
|
||||
|
||||
@Override
|
||||
public InterpreterResult interpret(String st, InterpreterContext context) {
|
||||
LOG.info("Running SQL query: '{}' over Pandas DataFrame", st);
|
||||
Interpreter python = this.getInterpreterInTheSameSessionByClassName(
|
||||
PythonInterpreter.class.getName());
|
||||
return python.interpret("print pysqldf('" + st + "')", context);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void cancel(InterpreterContext context) {
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public FormType getFormType() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getProgress(InterpreterContext context) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
}
|
||||
27
python/src/main/resources/bootstrap_sql.py
Normal file
27
python/src/main/resources/bootstrap_sql.py
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# This is for org.apache.zeppelin.python.PythonPandasSqlInterpreterTest
|
||||
# It requires next dependencies to be installed:
|
||||
# - numpy
|
||||
# - pandas
|
||||
# - pandasql
|
||||
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from pandasql import sqldf
|
||||
|
||||
pysqldf = lambda q: sqldf(q, globals())
|
||||
|
|
@ -0,0 +1,126 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.zeppelin.python;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedList;
|
||||
import java.util.Properties;
|
||||
|
||||
import org.apache.zeppelin.display.AngularObjectRegistry;
|
||||
import org.apache.zeppelin.display.GUI;
|
||||
import org.apache.zeppelin.interpreter.InterpreterContext;
|
||||
import org.apache.zeppelin.interpreter.InterpreterContextRunner;
|
||||
import org.apache.zeppelin.interpreter.InterpreterGroup;
|
||||
import org.apache.zeppelin.interpreter.InterpreterOutput;
|
||||
import org.apache.zeppelin.interpreter.InterpreterOutputListener;
|
||||
import org.apache.zeppelin.interpreter.InterpreterResult;
|
||||
import org.apache.zeppelin.interpreter.InterpreterResult.Type;
|
||||
import org.apache.zeppelin.user.AuthenticationInfo;
|
||||
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
* In order for this test to work, test env must have installed:
|
||||
* <ol>
|
||||
* - <li>Python</li>
|
||||
* - <li>NumPy</li>
|
||||
* - <li>Pandas DataFrame</li>
|
||||
* <ol>
|
||||
*
|
||||
* To run manually on such environment, use:
|
||||
* <code>
|
||||
* mvn "-Dtest=org.apache.zeppelin.python.PythonPandasSqlInterpreterTest" test -pl python
|
||||
* </code>
|
||||
*/
|
||||
public class PythonPandasSqlInterpreterTest {
|
||||
|
||||
private InterpreterGroup intpGroup;
|
||||
private PythonPandasSqlInterpreter sql;
|
||||
private PythonInterpreter python;
|
||||
|
||||
private InterpreterContext context;
|
||||
|
||||
@Before
|
||||
public void setUp() throws Exception {
|
||||
Properties p = new Properties();
|
||||
p.setProperty("zeppelin.python", "python");
|
||||
p.setProperty("zeppelin.python.maxResult", "100");
|
||||
|
||||
intpGroup = new InterpreterGroup();
|
||||
|
||||
python = new PythonInterpreter(p);
|
||||
python.setInterpreterGroup(intpGroup);
|
||||
python.open();
|
||||
|
||||
sql = new PythonPandasSqlInterpreter(p);
|
||||
sql.setInterpreterGroup(intpGroup);
|
||||
|
||||
intpGroup.put("note", Arrays.asList(python, sql));
|
||||
|
||||
context = new InterpreterContext("note", "id", "title", "text", new AuthenticationInfo(),
|
||||
new HashMap<String, Object>(), new GUI(),
|
||||
new AngularObjectRegistry(intpGroup.getId(), null), null,
|
||||
new LinkedList<InterpreterContextRunner>(), new InterpreterOutput(
|
||||
new InterpreterOutputListener() {
|
||||
@Override public void onAppend(InterpreterOutput out, byte[] line) {}
|
||||
@Override public void onUpdate(InterpreterOutput out, byte[] output) {}
|
||||
}));
|
||||
|
||||
//important to be last step
|
||||
sql.open();
|
||||
//it depends on python interpreter presence in the same group
|
||||
}
|
||||
|
||||
//@Test
|
||||
public void sqlOverTestDataPrintsTable() {
|
||||
//given
|
||||
// `import pandas as pd` and `import numpy as np` done
|
||||
// DataFrame \w test data
|
||||
python.interpret("df2 = pd.DataFrame({ 'age' : np.array([33, 51, 51, 34]), "+
|
||||
"'name' : pd.Categorical(['moon','jobs','gates','park'])})", context);
|
||||
|
||||
|
||||
//when
|
||||
InterpreterResult ret = sql.interpret("select name, age from test where age < 40", context);
|
||||
assertEquals(InterpreterResult.Code.SUCCESS, ret.code());
|
||||
assertEquals(Type.TABLE, ret.type());
|
||||
assertEquals("name\tage\nmoon\t33\npark\t34\n", ret.message());
|
||||
|
||||
assertEquals(InterpreterResult.Code.SUCCESS, sql.interpret("select case when name==\"aa\" then name else name end from test", context).code());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void badSqlSyntaxFails() {
|
||||
//when
|
||||
InterpreterResult ret = sql.interpret("select wrong syntax", context);
|
||||
|
||||
//then
|
||||
assertNotNull("Interpreter returned 'null'", ret);
|
||||
//System.out.println("\nInterpreter response: \n" + ret.message());
|
||||
assertEquals(InterpreterResult.Code.ERROR, ret.code());
|
||||
assertTrue(ret.message().length() > 0);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
Loading…
Reference in a new issue