ZEPPELIN-501: refactoring + handling index Create\Delete

This commit is contained in:
Alexander Bezzubov 2015-12-18 15:31:07 +09:00
parent b13d5fbc4d
commit 825b266daa
5 changed files with 157 additions and 105 deletions

View file

@ -401,9 +401,9 @@ public class NotebookRestApi {
*/
@GET
@Path("search")
public Response search(@QueryParam("q") String query) {
LOG.info("Searching notebooks for {}", query);
List<Map<String, String>> notebooksFound = notebookIndex.search(query);
public Response search(@QueryParam("q") String queryTerm) {
LOG.info("Searching notebooks for {}", queryTerm);
List<Map<String, String>> notebooksFound = notebookIndex.query(queryTerm);
LOG.info("Notbooks {} found", notebooksFound.size());
return new JsonResponse<>(Status.OK, notebooksFound).build();
}

View file

@ -373,8 +373,6 @@ public class Note implements Serializable, JobListener {
}
public void persist() throws IOException {
//TODO(bzz): update index
//notebookIndex.
snapshotAngularObjectRegistry();
repo.save(this);
}

View file

@ -101,7 +101,7 @@ public class Notebook {
if (this.notebookIndex != null) {
long start = System.nanoTime();
logger.info("Notebook indexing started...");
notebookIndex.index(notes.values());
notebookIndex.addIndexDocs(notes.values());
logger.info("Notebook indexing finished: {} indexed in {}s", notes.size(),
TimeUnit.NANOSECONDS.toSeconds(start - System.nanoTime()));
}
@ -115,11 +115,14 @@ public class Notebook {
* @throws IOException
*/
public Note createNote() throws IOException {
Note note;
if (conf.getBoolean(ConfVars.ZEPPELIN_NOTEBOOK_AUTO_INTERPRETER_BINDING)) {
return createNote(replFactory.getDefaultInterpreterSettingList());
note = createNote(replFactory.getDefaultInterpreterSettingList());
} else {
return createNote(null);
note = createNote(null);
}
notebookIndex.addIndexDoc(note);
return note;
}
/**
@ -139,6 +142,7 @@ public class Notebook {
bindInterpretersToNote(note.id(), interpreterIds);
}
notebookIndex.addIndexDoc(note);
note.persist();
return note;
}
@ -169,6 +173,8 @@ public class Notebook {
for (Paragraph p : paragraphs) {
newNote.addCloneParagraph(p);
}
notebookIndex.addIndexDoc(newNote);
newNote.persist();
return newNote;
}
@ -208,9 +214,11 @@ public class Notebook {
public void removeNote(String id) {
Note note;
synchronized (notes) {
note = notes.remove(id);
}
notebookIndex.deleteIndexDocs(note);
// remove from all interpreter instance's angular object registry
for (InterpreterSetting settings : replFactory.get()) {
@ -369,12 +377,9 @@ public class Notebook {
}
synchronized (notes) {
List<Note> noteList = new ArrayList<Note>(notes.values());
Collections.sort(noteList, new Comparator() {
Collections.sort(noteList, new Comparator<Note>() {
@Override
public int compare(Object one, Object two) {
Note note1 = (Note) one;
Note note2 = (Note) two;
public int compare(Note note1, Note note2) {
String name1 = note1.id();
if (note1.getName() != null) {
name1 = note1.getName();
@ -383,7 +388,6 @@ public class Notebook {
if (note2.getName() != null) {
name2 = note2.getName();
}
((Note) one).getName();
return name1.compareTo(name2);
}
});

View file

@ -42,6 +42,7 @@ import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
@ -55,11 +56,12 @@ import org.apache.zeppelin.notebook.Paragraph;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Joiner;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
/**
* Service for search (indexing and query) the notebooks
* Service for search (both, indexing and query) the notebooks
*
* TODO(bzz): document thread-safety
*/
@ -91,7 +93,7 @@ public class SearchService {
* @param queryStr a query
* @return A list of matching paragraphs (id, text, snippet w/ highlight)
*/
public List<Map<String, String>> search(String queryStr) {
public List<Map<String, String>> query(String queryStr) {
if (null == ramDirectory) {
throw new IllegalStateException(
"Something went wrong on instance creation time, index dir is null");
@ -163,18 +165,38 @@ public class SearchService {
return matchingParagraphs;
}
public void updateIndexDoc(Note note) throws IOException {
updateDoc(note.getId(), note.getName(), null);
}
void updateIndexDoc(Note note, Paragraph p) throws IOException {
updateDoc(note.getId(), note.getName(), p);
}
private void updateDoc(String noteId, String noteName, Paragraph p) throws IOException {
String id = formatId(noteId, p);
Document doc = newDocument(id, noteName, p);
try {
writer.updateDocument(new Term(ID_FIELD, id), doc);
writer.commit();
} catch (IOException e) {
LOG.error("Failed to updaet index of notebook {}", noteId, e);
}
}
/**
* Indexes full collection of notes: all the paragraphs + Note names
*
* @param collection of Notes
*/
public void index(Collection<Note> collection) {
public void addIndexDocs(Collection<Note> collection) {
int docsIndexed = 0;
long start = System.nanoTime();
try {
indexDocs(writer, collection);
long end = System.nanoTime();
LOG.info("Indexing {} notebooks took {}ms",
collection.size(), TimeUnit.NANOSECONDS.toMillis(end - start));
for (Note note : collection) {
addIndexDoc(note);
docsIndexed++;
}
} catch (IOException e) {
LOG.error("Failed to index all Notebooks", e);
} finally {
@ -183,19 +205,42 @@ public class SearchService {
} catch (IOException e) {
LOG.error("Failed to save index", e);
}
long end = System.nanoTime();
LOG.info("Indexing {} notebooks took {}ms",
docsIndexed, TimeUnit.NANOSECONDS.toMillis(end - start));
}
}
public void updateDoc(String noteId, String noteName, Paragraph p) throws IOException {
Document doc = newDocument(noteId, noteName, p);
try {
writer.updateDocument(new Term(ID_FIELD, formatId(noteId, p.getId())), doc);
writer.commit();
} catch (Exception e) {
LOG.error("Failed to index all Notebooks", e);
/**
* Indexes the given notebook
*
* @throws IOException If there is a low-level I/O error
*/
public void addIndexDoc(Note note) throws IOException {
indexNoteName(writer, note.getId(), note.getName());
for (Paragraph doc : note.getParagraphs()) {
if (doc.getText() == null) {
LOG.debug("Skipping empty paragraph");
continue;
}
indexDoc(writer, note.getId(), note.getName(), doc);
}
}
/**
* Deletes all docs no given Note from index
*/
public void deleteIndexDocs(Note note) {
LOG.debug("Deleting note {}, out of: {}", note.getId(), writer.numDocs());
try {
writer.deleteDocuments(new WildcardQuery(new Term(ID_FIELD, note.getId() + "*")));
writer.commit();
} catch (IOException e) {
LOG.error("Failed to delete a notebook {} from index", note, e);
}
LOG.debug("Done, index contains {} docs now" + writer.numDocs());
}
/**
* Frees the recourses used by Lucene index
*/
@ -207,31 +252,6 @@ public class SearchService {
}
}
/**
* Indexes the given list of notebooks
*
* @param writer
* Writer to the index where the given file/dir info will be stored
* @param path
* The file to index, or the directory to recurse into to find files
* to index
* @throws IOException
* If there is a low-level I/O error
*/
void indexDocs(final IndexWriter writer, Collection<Note> notes) throws IOException {
for (Note note : notes) {
indexNoteName(writer, note.getId(), note.getName());
for (Paragraph doc : note.getParagraphs()) {
if (doc.getText() == null) {
LOG.debug("Skipping empty paragraph");
continue;
}
indexParagraph(writer, note.getId(), note.getName(), doc);
}
}
}
/**
* Indexes a notebook name
* @throws IOException
@ -242,58 +262,58 @@ public class SearchService {
LOG.debug("Skipping empty notebook name");
return;
}
Document doc = newDocument(noteId, noteName);
w.addDocument(doc);
indexDoc(w, noteId, noteName, null);
}
/**
* Indexes a single paragraph = document
* Indexes a single document
* - code of the paragraph (if non-null)
* - or just note name
*/
void indexParagraph(IndexWriter w, String noteId, String noteName, Paragraph p)
private void indexDoc(IndexWriter w, String noteId, String noteName, Paragraph p)
throws IOException {
Document doc = newDocument(noteId, noteName, p);
String id = formatId(noteId, p);
Document doc = newDocument(id, noteName, p);
w.addDocument(doc);
}
private Document newDocument(String noteId, String noteName, Paragraph p) {
Document doc = new Document();
String id = formatId(noteId, p.getId());
Field pathField = new StringField(ID_FIELD, id, Field.Store.YES);
doc.add(pathField);
doc.add(new StringField("title", noteName, Field.Store.YES));
Date date = p.getDateStarted() != null ? p.getDateStarted() : p.getDateCreated();
doc.add(new LongField("modified", date.getTime(), Field.Store.NO));
doc.add(new TextField(SEARCH_FIELD, p.getText(), Field.Store.YES));
return doc;
}
//TODO(bzz): refactor and re-use code from above
private Document newDocument(String noteId, String noteName) {
Document doc = new Document();
Field pathField = new StringField(ID_FIELD, noteId, Field.Store.YES);
doc.add(pathField);
doc.add(new StringField("title", noteName, Field.Store.YES));
//doc.add(new LongField("modified", date.getTime(), Field.Store.NO));
doc.add(new TextField(SEARCH_FIELD, noteName, Field.Store.YES));
return doc;
}
/**
* ID looks like '<note-id>/paragraph/<paragraph-id>'
*
* @param noteId If of the Note
* @param paragraphId Id of the paragraph
* If paragraph is not null, indexes code in the paragraph,
* otherwise indexes the notebook name.
*
* @param id id of the document, different for Note name and paragraph
* @param noteName name of the note
* @param p paragraph
* @return
*/
private String formatId(String noteId, String paragraphId) {
return String.format("%s/paragraph/%s", noteId, paragraphId);
private Document newDocument(String id, String noteName, Paragraph p) {
Document doc = new Document();
Field pathField = new StringField(ID_FIELD, id, Field.Store.YES);
doc.add(pathField);
doc.add(new StringField("title", noteName, Field.Store.YES));
if (null != p) {
doc.add(new TextField(SEARCH_FIELD, p.getText(), Field.Store.YES));
Date date = p.getDateStarted() != null ? p.getDateStarted() : p.getDateCreated();
doc.add(new LongField("modified", date.getTime(), Field.Store.NO));
} else {
doc.add(new TextField(SEARCH_FIELD, noteName, Field.Store.YES));
}
return doc;
}
/**
* If paragraph is not null, id is <noteId>/paragraphs/<paragraphId>,
* otherwise it's just <noteId>.
*/
static String formatId(String noteId, Paragraph p) {
String id = noteId;
if (null != p) {
id = Joiner.on('/').join(id, "paragraphs", p.getId());
}
return id;
}
}

View file

@ -17,6 +17,7 @@
package org.apache.zeppelin.search;
import static com.google.common.truth.Truth.assertThat;
import static org.apache.zeppelin.search.SearchService.formatId;
import java.io.IOException;
import java.util.Arrays;
@ -25,6 +26,7 @@ import java.util.Map;
import org.apache.zeppelin.notebook.Note;
import org.apache.zeppelin.notebook.Paragraph;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
@ -37,6 +39,11 @@ public class SearchServiceTest {
notebookIndex = new SearchService();
}
@After
public void shutDown() {
notebookIndex.close();
}
@Test public void canIndexNotebook() {
//give
Note note1 = newNoteWithParapgraph("Notebook1", "test");
@ -44,33 +51,33 @@ public class SearchServiceTest {
List<Note> notebook = Arrays.asList(note1, note2);
//when
notebookIndex.index(notebook);
notebookIndex.addIndexDocs(notebook);
}
@Test public void canIndexAndQuery() {
//given
Note note1 = newNoteWithParapgraph("Notebook1", "test");
Note note2 = newNoteWithParapgraphs("Notebook2", "not test", "not test at all");
notebookIndex.index(Arrays.asList(note1, note2));
notebookIndex.addIndexDocs(Arrays.asList(note1, note2));
//when
List<Map<String, String>> results = notebookIndex.search("all");
List<Map<String, String>> results = notebookIndex.query("all");
//then
assertThat(results).isNotEmpty();
assertThat(results.size()).isEqualTo(1);
assertThat(results.get(0)).containsEntry("id",
String.format("%s/paragraph/%s", note2.getId(), note2.getLastParagraph().getId()));
assertThat(results.get(0))
.containsEntry("id", formatId(note2.getId(), note2.getLastParagraph()));
}
@Test public void canIndexAndQueryByNotebookName() {
//given
Note note1 = newNoteWithParapgraph("Notebook1", "test");
Note note2 = newNoteWithParapgraphs("Notebook2", "not test", "not test at all");
notebookIndex.index(Arrays.asList(note1, note2));
notebookIndex.addIndexDocs(Arrays.asList(note1, note2));
//when
List<Map<String, String>> results = notebookIndex.search("Notebook1");
List<Map<String, String>> results = notebookIndex.query("Notebook1");
//then
assertThat(results).isNotEmpty();
@ -83,7 +90,7 @@ public class SearchServiceTest {
public void canNotSearchBeforeIndexing() {
//given NO notebookIndex.index() was called
//when
List<Map<String, String>> result = notebookIndex.search("anything");
List<Map<String, String>> result = notebookIndex.query("anything");
//then
assertThat(result).isEmpty();
//assert logs were printed
@ -94,21 +101,44 @@ public class SearchServiceTest {
//given
Note note1 = newNoteWithParapgraph("Notebook1", "test");
Note note2 = newNoteWithParapgraphs("Notebook2", "not test", "not test at all");
notebookIndex.index(Arrays.asList(note1, note2));
notebookIndex.addIndexDocs(Arrays.asList(note1, note2));
//when
Paragraph p2 = note2.getLastParagraph();
p2.setText("test indeed");
notebookIndex.updateDoc(note2.getId(), note2.getName(), p2);
notebookIndex.updateIndexDoc(note2, p2);
//then
List<Map<String, String>> results = notebookIndex.search("all");
List<Map<String, String>> results = notebookIndex.query("all");
assertThat(results).isEmpty();
results = notebookIndex.search("indeed");
results = notebookIndex.query("indeed");
assertThat(results).isNotEmpty();
}
@Test public void canDeleteFromIndex() throws IOException {
//given
Note note1 = newNoteWithParapgraph("Notebook1", "test");
Note note2 = newNoteWithParapgraphs("Notebook2", "not test", "not test at all");
notebookIndex.addIndexDocs(Arrays.asList(note1, note2));
assertThat(resultForQuery("Notebook2")).isNotEmpty();
//when
notebookIndex.deleteIndexDocs(note2);
//then
assertThat(notebookIndex.query("all")).isEmpty();
assertThat(resultForQuery("Notebook2")).isEmpty();
List<Map<String, String>> results = resultForQuery("test");
assertThat(results).isNotEmpty();
assertThat(results.size()).isEqualTo(1);
}
private List<Map<String, String>> resultForQuery(String q) {
return notebookIndex.query(q);
}
/**
* Creates a new Note \w given name,
* adds a new paragraph \w given text