mirror of
https://github.com/apache/zeppelin
synced 2026-05-24 09:38:26 +00:00
ZEPPELIN-501: refactoring + handling index Create\Delete
This commit is contained in:
parent
b13d5fbc4d
commit
825b266daa
5 changed files with 157 additions and 105 deletions
|
|
@ -401,9 +401,9 @@ public class NotebookRestApi {
|
|||
*/
|
||||
@GET
|
||||
@Path("search")
|
||||
public Response search(@QueryParam("q") String query) {
|
||||
LOG.info("Searching notebooks for {}", query);
|
||||
List<Map<String, String>> notebooksFound = notebookIndex.search(query);
|
||||
public Response search(@QueryParam("q") String queryTerm) {
|
||||
LOG.info("Searching notebooks for {}", queryTerm);
|
||||
List<Map<String, String>> notebooksFound = notebookIndex.query(queryTerm);
|
||||
LOG.info("Notbooks {} found", notebooksFound.size());
|
||||
return new JsonResponse<>(Status.OK, notebooksFound).build();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -373,8 +373,6 @@ public class Note implements Serializable, JobListener {
|
|||
}
|
||||
|
||||
public void persist() throws IOException {
|
||||
//TODO(bzz): update index
|
||||
//notebookIndex.
|
||||
snapshotAngularObjectRegistry();
|
||||
repo.save(this);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -101,7 +101,7 @@ public class Notebook {
|
|||
if (this.notebookIndex != null) {
|
||||
long start = System.nanoTime();
|
||||
logger.info("Notebook indexing started...");
|
||||
notebookIndex.index(notes.values());
|
||||
notebookIndex.addIndexDocs(notes.values());
|
||||
logger.info("Notebook indexing finished: {} indexed in {}s", notes.size(),
|
||||
TimeUnit.NANOSECONDS.toSeconds(start - System.nanoTime()));
|
||||
}
|
||||
|
|
@ -115,11 +115,14 @@ public class Notebook {
|
|||
* @throws IOException
|
||||
*/
|
||||
public Note createNote() throws IOException {
|
||||
Note note;
|
||||
if (conf.getBoolean(ConfVars.ZEPPELIN_NOTEBOOK_AUTO_INTERPRETER_BINDING)) {
|
||||
return createNote(replFactory.getDefaultInterpreterSettingList());
|
||||
note = createNote(replFactory.getDefaultInterpreterSettingList());
|
||||
} else {
|
||||
return createNote(null);
|
||||
note = createNote(null);
|
||||
}
|
||||
notebookIndex.addIndexDoc(note);
|
||||
return note;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -139,6 +142,7 @@ public class Notebook {
|
|||
bindInterpretersToNote(note.id(), interpreterIds);
|
||||
}
|
||||
|
||||
notebookIndex.addIndexDoc(note);
|
||||
note.persist();
|
||||
return note;
|
||||
}
|
||||
|
|
@ -169,6 +173,8 @@ public class Notebook {
|
|||
for (Paragraph p : paragraphs) {
|
||||
newNote.addCloneParagraph(p);
|
||||
}
|
||||
|
||||
notebookIndex.addIndexDoc(newNote);
|
||||
newNote.persist();
|
||||
return newNote;
|
||||
}
|
||||
|
|
@ -208,9 +214,11 @@ public class Notebook {
|
|||
|
||||
public void removeNote(String id) {
|
||||
Note note;
|
||||
|
||||
synchronized (notes) {
|
||||
note = notes.remove(id);
|
||||
}
|
||||
notebookIndex.deleteIndexDocs(note);
|
||||
|
||||
// remove from all interpreter instance's angular object registry
|
||||
for (InterpreterSetting settings : replFactory.get()) {
|
||||
|
|
@ -369,12 +377,9 @@ public class Notebook {
|
|||
}
|
||||
synchronized (notes) {
|
||||
List<Note> noteList = new ArrayList<Note>(notes.values());
|
||||
Collections.sort(noteList, new Comparator() {
|
||||
Collections.sort(noteList, new Comparator<Note>() {
|
||||
@Override
|
||||
public int compare(Object one, Object two) {
|
||||
Note note1 = (Note) one;
|
||||
Note note2 = (Note) two;
|
||||
|
||||
public int compare(Note note1, Note note2) {
|
||||
String name1 = note1.id();
|
||||
if (note1.getName() != null) {
|
||||
name1 = note1.getName();
|
||||
|
|
@ -383,7 +388,6 @@ public class Notebook {
|
|||
if (note2.getName() != null) {
|
||||
name2 = note2.getName();
|
||||
}
|
||||
((Note) one).getName();
|
||||
return name1.compareTo(name2);
|
||||
}
|
||||
});
|
||||
|
|
|
|||
|
|
@ -42,6 +42,7 @@ import org.apache.lucene.queryparser.classic.QueryParser;
|
|||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.WildcardQuery;
|
||||
import org.apache.lucene.search.highlight.Highlighter;
|
||||
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
|
||||
import org.apache.lucene.search.highlight.QueryScorer;
|
||||
|
|
@ -55,11 +56,12 @@ import org.apache.zeppelin.notebook.Paragraph;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.google.common.base.Joiner;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
/**
|
||||
* Service for search (indexing and query) the notebooks
|
||||
* Service for search (both, indexing and query) the notebooks
|
||||
*
|
||||
* TODO(bzz): document thread-safety
|
||||
*/
|
||||
|
|
@ -91,7 +93,7 @@ public class SearchService {
|
|||
* @param queryStr a query
|
||||
* @return A list of matching paragraphs (id, text, snippet w/ highlight)
|
||||
*/
|
||||
public List<Map<String, String>> search(String queryStr) {
|
||||
public List<Map<String, String>> query(String queryStr) {
|
||||
if (null == ramDirectory) {
|
||||
throw new IllegalStateException(
|
||||
"Something went wrong on instance creation time, index dir is null");
|
||||
|
|
@ -163,18 +165,38 @@ public class SearchService {
|
|||
return matchingParagraphs;
|
||||
}
|
||||
|
||||
public void updateIndexDoc(Note note) throws IOException {
|
||||
updateDoc(note.getId(), note.getName(), null);
|
||||
}
|
||||
|
||||
void updateIndexDoc(Note note, Paragraph p) throws IOException {
|
||||
updateDoc(note.getId(), note.getName(), p);
|
||||
}
|
||||
|
||||
private void updateDoc(String noteId, String noteName, Paragraph p) throws IOException {
|
||||
String id = formatId(noteId, p);
|
||||
Document doc = newDocument(id, noteName, p);
|
||||
try {
|
||||
writer.updateDocument(new Term(ID_FIELD, id), doc);
|
||||
writer.commit();
|
||||
} catch (IOException e) {
|
||||
LOG.error("Failed to updaet index of notebook {}", noteId, e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Indexes full collection of notes: all the paragraphs + Note names
|
||||
*
|
||||
* @param collection of Notes
|
||||
*/
|
||||
public void index(Collection<Note> collection) {
|
||||
public void addIndexDocs(Collection<Note> collection) {
|
||||
int docsIndexed = 0;
|
||||
long start = System.nanoTime();
|
||||
try {
|
||||
indexDocs(writer, collection);
|
||||
long end = System.nanoTime();
|
||||
LOG.info("Indexing {} notebooks took {}ms",
|
||||
collection.size(), TimeUnit.NANOSECONDS.toMillis(end - start));
|
||||
for (Note note : collection) {
|
||||
addIndexDoc(note);
|
||||
docsIndexed++;
|
||||
}
|
||||
} catch (IOException e) {
|
||||
LOG.error("Failed to index all Notebooks", e);
|
||||
} finally {
|
||||
|
|
@ -183,19 +205,42 @@ public class SearchService {
|
|||
} catch (IOException e) {
|
||||
LOG.error("Failed to save index", e);
|
||||
}
|
||||
long end = System.nanoTime();
|
||||
LOG.info("Indexing {} notebooks took {}ms",
|
||||
docsIndexed, TimeUnit.NANOSECONDS.toMillis(end - start));
|
||||
}
|
||||
}
|
||||
|
||||
public void updateDoc(String noteId, String noteName, Paragraph p) throws IOException {
|
||||
Document doc = newDocument(noteId, noteName, p);
|
||||
try {
|
||||
writer.updateDocument(new Term(ID_FIELD, formatId(noteId, p.getId())), doc);
|
||||
writer.commit();
|
||||
} catch (Exception e) {
|
||||
LOG.error("Failed to index all Notebooks", e);
|
||||
/**
|
||||
* Indexes the given notebook
|
||||
*
|
||||
* @throws IOException If there is a low-level I/O error
|
||||
*/
|
||||
public void addIndexDoc(Note note) throws IOException {
|
||||
indexNoteName(writer, note.getId(), note.getName());
|
||||
for (Paragraph doc : note.getParagraphs()) {
|
||||
if (doc.getText() == null) {
|
||||
LOG.debug("Skipping empty paragraph");
|
||||
continue;
|
||||
}
|
||||
indexDoc(writer, note.getId(), note.getName(), doc);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Deletes all docs no given Note from index
|
||||
*/
|
||||
public void deleteIndexDocs(Note note) {
|
||||
LOG.debug("Deleting note {}, out of: {}", note.getId(), writer.numDocs());
|
||||
try {
|
||||
writer.deleteDocuments(new WildcardQuery(new Term(ID_FIELD, note.getId() + "*")));
|
||||
writer.commit();
|
||||
} catch (IOException e) {
|
||||
LOG.error("Failed to delete a notebook {} from index", note, e);
|
||||
}
|
||||
LOG.debug("Done, index contains {} docs now" + writer.numDocs());
|
||||
}
|
||||
|
||||
/**
|
||||
* Frees the recourses used by Lucene index
|
||||
*/
|
||||
|
|
@ -207,31 +252,6 @@ public class SearchService {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Indexes the given list of notebooks
|
||||
*
|
||||
* @param writer
|
||||
* Writer to the index where the given file/dir info will be stored
|
||||
* @param path
|
||||
* The file to index, or the directory to recurse into to find files
|
||||
* to index
|
||||
* @throws IOException
|
||||
* If there is a low-level I/O error
|
||||
*/
|
||||
void indexDocs(final IndexWriter writer, Collection<Note> notes) throws IOException {
|
||||
for (Note note : notes) {
|
||||
indexNoteName(writer, note.getId(), note.getName());
|
||||
for (Paragraph doc : note.getParagraphs()) {
|
||||
if (doc.getText() == null) {
|
||||
LOG.debug("Skipping empty paragraph");
|
||||
continue;
|
||||
}
|
||||
indexParagraph(writer, note.getId(), note.getName(), doc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Indexes a notebook name
|
||||
* @throws IOException
|
||||
|
|
@ -242,58 +262,58 @@ public class SearchService {
|
|||
LOG.debug("Skipping empty notebook name");
|
||||
return;
|
||||
}
|
||||
Document doc = newDocument(noteId, noteName);
|
||||
w.addDocument(doc);
|
||||
indexDoc(w, noteId, noteName, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Indexes a single paragraph = document
|
||||
* Indexes a single document
|
||||
* - code of the paragraph (if non-null)
|
||||
* - or just note name
|
||||
*/
|
||||
void indexParagraph(IndexWriter w, String noteId, String noteName, Paragraph p)
|
||||
private void indexDoc(IndexWriter w, String noteId, String noteName, Paragraph p)
|
||||
throws IOException {
|
||||
Document doc = newDocument(noteId, noteName, p);
|
||||
String id = formatId(noteId, p);
|
||||
Document doc = newDocument(id, noteName, p);
|
||||
w.addDocument(doc);
|
||||
}
|
||||
|
||||
private Document newDocument(String noteId, String noteName, Paragraph p) {
|
||||
Document doc = new Document();
|
||||
|
||||
String id = formatId(noteId, p.getId());
|
||||
Field pathField = new StringField(ID_FIELD, id, Field.Store.YES);
|
||||
doc.add(pathField);
|
||||
|
||||
doc.add(new StringField("title", noteName, Field.Store.YES));
|
||||
|
||||
Date date = p.getDateStarted() != null ? p.getDateStarted() : p.getDateCreated();
|
||||
doc.add(new LongField("modified", date.getTime(), Field.Store.NO));
|
||||
doc.add(new TextField(SEARCH_FIELD, p.getText(), Field.Store.YES));
|
||||
return doc;
|
||||
}
|
||||
|
||||
//TODO(bzz): refactor and re-use code from above
|
||||
private Document newDocument(String noteId, String noteName) {
|
||||
Document doc = new Document();
|
||||
|
||||
Field pathField = new StringField(ID_FIELD, noteId, Field.Store.YES);
|
||||
doc.add(pathField);
|
||||
|
||||
doc.add(new StringField("title", noteName, Field.Store.YES));
|
||||
|
||||
//doc.add(new LongField("modified", date.getTime(), Field.Store.NO));
|
||||
doc.add(new TextField(SEARCH_FIELD, noteName, Field.Store.YES));
|
||||
return doc;
|
||||
}
|
||||
|
||||
/**
|
||||
* ID looks like '<note-id>/paragraph/<paragraph-id>'
|
||||
*
|
||||
* @param noteId If of the Note
|
||||
* @param paragraphId Id of the paragraph
|
||||
* If paragraph is not null, indexes code in the paragraph,
|
||||
* otherwise indexes the notebook name.
|
||||
*
|
||||
* @param id id of the document, different for Note name and paragraph
|
||||
* @param noteName name of the note
|
||||
* @param p paragraph
|
||||
* @return
|
||||
*/
|
||||
private String formatId(String noteId, String paragraphId) {
|
||||
return String.format("%s/paragraph/%s", noteId, paragraphId);
|
||||
private Document newDocument(String id, String noteName, Paragraph p) {
|
||||
Document doc = new Document();
|
||||
|
||||
Field pathField = new StringField(ID_FIELD, id, Field.Store.YES);
|
||||
doc.add(pathField);
|
||||
doc.add(new StringField("title", noteName, Field.Store.YES));
|
||||
|
||||
if (null != p) {
|
||||
doc.add(new TextField(SEARCH_FIELD, p.getText(), Field.Store.YES));
|
||||
Date date = p.getDateStarted() != null ? p.getDateStarted() : p.getDateCreated();
|
||||
doc.add(new LongField("modified", date.getTime(), Field.Store.NO));
|
||||
} else {
|
||||
doc.add(new TextField(SEARCH_FIELD, noteName, Field.Store.YES));
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
|
||||
/**
|
||||
* If paragraph is not null, id is <noteId>/paragraphs/<paragraphId>,
|
||||
* otherwise it's just <noteId>.
|
||||
*/
|
||||
static String formatId(String noteId, Paragraph p) {
|
||||
String id = noteId;
|
||||
if (null != p) {
|
||||
id = Joiner.on('/').join(id, "paragraphs", p.getId());
|
||||
}
|
||||
return id;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
package org.apache.zeppelin.search;
|
||||
|
||||
import static com.google.common.truth.Truth.assertThat;
|
||||
import static org.apache.zeppelin.search.SearchService.formatId;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
|
@ -25,6 +26,7 @@ import java.util.Map;
|
|||
|
||||
import org.apache.zeppelin.notebook.Note;
|
||||
import org.apache.zeppelin.notebook.Paragraph;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
|
|
@ -37,6 +39,11 @@ public class SearchServiceTest {
|
|||
notebookIndex = new SearchService();
|
||||
}
|
||||
|
||||
@After
|
||||
public void shutDown() {
|
||||
notebookIndex.close();
|
||||
}
|
||||
|
||||
@Test public void canIndexNotebook() {
|
||||
//give
|
||||
Note note1 = newNoteWithParapgraph("Notebook1", "test");
|
||||
|
|
@ -44,33 +51,33 @@ public class SearchServiceTest {
|
|||
List<Note> notebook = Arrays.asList(note1, note2);
|
||||
|
||||
//when
|
||||
notebookIndex.index(notebook);
|
||||
notebookIndex.addIndexDocs(notebook);
|
||||
}
|
||||
|
||||
@Test public void canIndexAndQuery() {
|
||||
//given
|
||||
Note note1 = newNoteWithParapgraph("Notebook1", "test");
|
||||
Note note2 = newNoteWithParapgraphs("Notebook2", "not test", "not test at all");
|
||||
notebookIndex.index(Arrays.asList(note1, note2));
|
||||
notebookIndex.addIndexDocs(Arrays.asList(note1, note2));
|
||||
|
||||
//when
|
||||
List<Map<String, String>> results = notebookIndex.search("all");
|
||||
List<Map<String, String>> results = notebookIndex.query("all");
|
||||
|
||||
//then
|
||||
assertThat(results).isNotEmpty();
|
||||
assertThat(results.size()).isEqualTo(1);
|
||||
assertThat(results.get(0)).containsEntry("id",
|
||||
String.format("%s/paragraph/%s", note2.getId(), note2.getLastParagraph().getId()));
|
||||
assertThat(results.get(0))
|
||||
.containsEntry("id", formatId(note2.getId(), note2.getLastParagraph()));
|
||||
}
|
||||
|
||||
@Test public void canIndexAndQueryByNotebookName() {
|
||||
//given
|
||||
Note note1 = newNoteWithParapgraph("Notebook1", "test");
|
||||
Note note2 = newNoteWithParapgraphs("Notebook2", "not test", "not test at all");
|
||||
notebookIndex.index(Arrays.asList(note1, note2));
|
||||
notebookIndex.addIndexDocs(Arrays.asList(note1, note2));
|
||||
|
||||
//when
|
||||
List<Map<String, String>> results = notebookIndex.search("Notebook1");
|
||||
List<Map<String, String>> results = notebookIndex.query("Notebook1");
|
||||
|
||||
//then
|
||||
assertThat(results).isNotEmpty();
|
||||
|
|
@ -83,7 +90,7 @@ public class SearchServiceTest {
|
|||
public void canNotSearchBeforeIndexing() {
|
||||
//given NO notebookIndex.index() was called
|
||||
//when
|
||||
List<Map<String, String>> result = notebookIndex.search("anything");
|
||||
List<Map<String, String>> result = notebookIndex.query("anything");
|
||||
//then
|
||||
assertThat(result).isEmpty();
|
||||
//assert logs were printed
|
||||
|
|
@ -94,21 +101,44 @@ public class SearchServiceTest {
|
|||
//given
|
||||
Note note1 = newNoteWithParapgraph("Notebook1", "test");
|
||||
Note note2 = newNoteWithParapgraphs("Notebook2", "not test", "not test at all");
|
||||
notebookIndex.index(Arrays.asList(note1, note2));
|
||||
notebookIndex.addIndexDocs(Arrays.asList(note1, note2));
|
||||
|
||||
//when
|
||||
Paragraph p2 = note2.getLastParagraph();
|
||||
p2.setText("test indeed");
|
||||
notebookIndex.updateDoc(note2.getId(), note2.getName(), p2);
|
||||
notebookIndex.updateIndexDoc(note2, p2);
|
||||
|
||||
//then
|
||||
List<Map<String, String>> results = notebookIndex.search("all");
|
||||
List<Map<String, String>> results = notebookIndex.query("all");
|
||||
assertThat(results).isEmpty();
|
||||
|
||||
results = notebookIndex.search("indeed");
|
||||
results = notebookIndex.query("indeed");
|
||||
assertThat(results).isNotEmpty();
|
||||
}
|
||||
|
||||
@Test public void canDeleteFromIndex() throws IOException {
|
||||
//given
|
||||
Note note1 = newNoteWithParapgraph("Notebook1", "test");
|
||||
Note note2 = newNoteWithParapgraphs("Notebook2", "not test", "not test at all");
|
||||
notebookIndex.addIndexDocs(Arrays.asList(note1, note2));
|
||||
assertThat(resultForQuery("Notebook2")).isNotEmpty();
|
||||
|
||||
//when
|
||||
notebookIndex.deleteIndexDocs(note2);
|
||||
|
||||
//then
|
||||
assertThat(notebookIndex.query("all")).isEmpty();
|
||||
assertThat(resultForQuery("Notebook2")).isEmpty();
|
||||
|
||||
List<Map<String, String>> results = resultForQuery("test");
|
||||
assertThat(results).isNotEmpty();
|
||||
assertThat(results.size()).isEqualTo(1);
|
||||
}
|
||||
|
||||
private List<Map<String, String>> resultForQuery(String q) {
|
||||
return notebookIndex.query(q);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new Note \w given name,
|
||||
* adds a new paragraph \w given text
|
||||
|
|
|
|||
Loading…
Reference in a new issue