caio.co/de/cerberus

Drop support for `findSimilar(long, int)`

While I'm not particularly bothered by the index size increase that
this feature requires, it's simpler and a lot more efficient to
just pre-compute the similarities and persist in the db.
Id
7d15a9afc51b67060f294d0b91242b907e2e64ed
Author
Caio
Commit time
2019-04-13T20:47:03+02:00

Modified src/main/java/co/caio/cerberus/search/Indexer.java

@@ -1,8 +1,7
package co.caio.cerberus.search;

import static co.caio.cerberus.search.IndexField.*;

-import co.caio.cerberus.lucene.TextFieldWithVectors;
import co.caio.cerberus.model.Recipe;
import java.io.IOException;
import java.nio.file.Path;
@@ -140,9 +139,9
recipe.instructions().forEach(i -> doc.add(new TextField(INSTRUCTIONS, i, Store.NO)));
recipe.ingredients().forEach(i -> doc.add(new TextField(INGREDIENTS, i, Store.NO)));

- doc.add(new TextFieldWithVectors(FULL_RECIPE, recipe.name()));
- recipe.instructions().forEach(i -> doc.add(new TextFieldWithVectors(FULL_RECIPE, i)));
- recipe.ingredients().forEach(i -> doc.add(new TextFieldWithVectors(FULL_RECIPE, i)));
+ doc.add(new TextField(FULL_RECIPE, recipe.name(), Store.NO));
+ recipe.instructions().forEach(i -> doc.add(new TextField(FULL_RECIPE, i, Store.NO)));
+ recipe.ingredients().forEach(i -> doc.add(new TextField(FULL_RECIPE, i, Store.NO)));

recipe
.diets()

Modified src/main/java/co/caio/cerberus/search/Searcher.java

@@ -16,8 +16,6

SearchResult findSimilar(String recipeText, int maxResults);

- SearchResult findSimilar(long recipeId, int maxResults);
-
int numDocs();

class Builder {

Modified src/main/java/co/caio/cerberus/search/SearcherImpl.java

@@ -35,9 +35,6
private static final Sort sortTotalTime = integerSorterWithDefault(TOTAL_TIME);
private static final Sort sortCalories = integerSorterWithDefault(CALORIES);

- private static final SearchResult EMPTY_SEARCH_RESULT =
- new SearchResult.Builder().totalHits(0).build();
-
private final IndexSearcher indexSearcher;
private final TaxonomyReader taxonomyReader;
private final IndexConfiguration indexConfiguration;
@@ -89,48 +86,6
} catch (IOException wrapped) {
throw new SearcherException(wrapped);
}
- }
-
- @Override
- public SearchResult findSimilar(long recipeId, int maxResults) {
- try {
- var docId = findDocId(recipeId);
-
- if (docId.isEmpty()) {
- return EMPTY_SEARCH_RESULT;
- }
-
- // We use `maxResults + 1` because we'll filter out the
- // given recipeId from the results
- var result = indexSearcher.search(moreLikeThis.like(docId.getAsInt()), maxResults + 1);
-
- var builder = new SearchResult.Builder();
-
- int totalHits = 0;
- for (int i = 0; i < result.scoreDocs.length && totalHits <= maxResults; i++) {
- Document doc = indexSearcher.doc(result.scoreDocs[i].doc);
- long foundRecipeId = doc.getField(RECIPE_ID).numericValue().longValue();
-
- if (foundRecipeId != recipeId) {
- builder.addRecipe(foundRecipeId);
- totalHits++;
- }
- }
-
- return builder.totalHits(totalHits).build();
- } catch (IOException wrapped) {
- throw new SearcherException(wrapped);
- }
- }
-
- OptionalInt findDocId(long recipeId) throws IOException {
- var result = indexSearcher.search(LongPoint.newExactQuery(RECIPE_ID, recipeId), 1);
-
- if (result.scoreDocs.length == 0) {
- return OptionalInt.empty();
- }
-
- return OptionalInt.of(result.scoreDocs[0].doc);
}

public int numDocs() {

Modified src/test/java/co/caio/cerberus/search/SearcherTest.java

@@ -383,17 +383,4
// }
});
}
-
- @Test
- void similarToRecipeIdNeverIncludesSelf() {
- final int maxResults = 100;
- Util.getSampleRecipes()
- .forEach(
- sampleRecipe -> {
- var similar = searcher.findSimilar(sampleRecipe.recipeId(), maxResults);
-
- assertTrue(similar.totalHits() > 0 && similar.totalHits() <= maxResults);
- assertEquals(-1, similar.recipeIds().indexOf(sampleRecipe.recipeId()));
- });
- }
}

Deleted src/main/java/co/caio/cerberus/lucene/TextFieldWithVectors.java

@@ -1,22 +1,0
-package co.caio.cerberus.lucene;
-
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.FieldType;
-import org.apache.lucene.index.IndexOptions;
-
-public class TextFieldWithVectors extends Field {
- // Tokenized, with vectors, not stored
- public static final FieldType TYPE;
-
- static {
- TYPE = new FieldType();
- TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
- TYPE.setTokenized(true);
- TYPE.setStoreTermVectors(true);
- TYPE.freeze();
- }
-
- public TextFieldWithVectors(String name, String value) {
- super(name, value, TYPE);
- }
-}

Deleted src/test/java/co/caio/cerberus/search/SearcherImplTest.java

@@ -1,26 +1,0
-package co.caio.cerberus.search;
-
-import static org.junit.jupiter.api.Assertions.*;
-
-import co.caio.cerberus.Util;
-import java.io.IOException;
-import org.junit.jupiter.api.Test;
-
-class SearcherImplTest {
-
- @Test
- void canFindEveryIndexedRecipe() {
- var searcher = (SearcherImpl) Util.getTestIndexer().buildSearcher();
-
- Util.getSampleRecipes()
- .forEach(
- sampleRecipe -> {
- try {
- var maybeDocId = searcher.findDocId(sampleRecipe.recipeId());
- assertTrue(maybeDocId.isPresent());
- } catch (IOException wrapped) {
- throw new RuntimeException(wrapped);
- }
- });
- }
-}