caio.co/de/cerberus

Get rid of Indexer.Builder

Same reasoning as in 6935ad40bbce0da7484ef239ad93b5b9d1379e05
Id
f6c468acab47b3e767bb6a384be1c6b6a9c96cea
Author
Caio
Commit time
2019-05-28T22:59:17+02:00

Modified src/test/java/co/caio/cerberus/Util.java

@@ -80,25 +80,22
throw new RuntimeException(rethrown);
}

- indexer =
- new Indexer.Builder()
- .dataDirectory(testDataDir)
- .categoryExtractor(
- new CategoryExtractor.Builder()
- .addCategory(
- "diet",
- true,
- recipe ->
- recipe
- .diets()
- .entrySet()
- .stream()
- .filter(es -> es.getValue() == 1f)
- .map(Entry::getKey)
- .collect(Collectors.toSet()))
- .build())
- .createMode()
+ var extractor =
+ new CategoryExtractor.Builder()
+ .addCategory(
+ "diet",
+ true,
+ recipe ->
+ recipe
+ .diets()
+ .entrySet()
+ .stream()
+ .filter(es -> es.getValue() == 1f)
+ .map(Entry::getKey)
+ .collect(Collectors.toSet()))
.build();
+
+ indexer = Indexer.Factory.open(testDataDir, extractor);

getSampleRecipes()
.forEach(

Modified src/main/java/co/caio/cerberus/search/IndexConfiguration.java

@@ -25,10 +25,6
private final FacetsConfig facetsConfig;
private final Analyzer analyzer;

- Path getBaseDirectory() {
- return baseDirectory;
- }
-
private final Path baseDirectory;

IndexConfiguration(Path baseDirectory, Set<String> multiValuedDimensions) {

Modified src/main/java/co/caio/cerberus/search/Indexer.java

@@ -1,20 +1,8
package co.caio.cerberus.search;

-import static co.caio.cerberus.search.IndexField.*;
-
import co.caio.cerberus.model.Recipe;
import java.io.IOException;
import java.nio.file.Path;
-import java.util.Map;
-import java.util.Set;
-import java.util.function.Function;
-import org.apache.lucene.document.*;
-import org.apache.lucene.document.Field.Store;
-import org.apache.lucene.facet.FacetField;
-import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.store.Directory;

public interface Indexer {
void addRecipe(Recipe recipe) throws IOException;
@@ -27,225 +15,19

void mergeSegments() throws IOException;

- Searcher buildSearcher();
-
- class Builder {
- private Directory indexDirectory;
- private Directory taxonomyDirectory;
- private IndexConfiguration indexConfiguration;
- private IndexWriterConfig writerConfig;
- private IndexWriterConfig.OpenMode openMode;
- private Path dataDirectory;
- private CategoryExtractor categoryExtractor;
-
- Builder reset() {
- dataDirectory = null;
- indexDirectory = null;
- taxonomyDirectory = null;
- writerConfig = null;
- openMode = null;
- indexConfiguration = null;
- categoryExtractor = null;
- return this;
- }
-
- public Builder categoryExtractor(CategoryExtractor extractor) {
- categoryExtractor = extractor;
- return this;
- }
-
- public Builder dataDirectory(Path dir) {
- if (!dir.toFile().isDirectory()) {
- throw new IndexBuilderException(String.format("'%s' is not a directory", dir));
- }
-
- dataDirectory = dir;
-
- return this;
- }
-
- private Builder openMode(IndexWriterConfig.OpenMode mode) {
- openMode = mode;
- return this;
- }
-
- public Builder createMode() {
- return openMode(IndexWriterConfig.OpenMode.CREATE);
- }
-
- public Builder appendMode() {
- return openMode(IndexWriterConfig.OpenMode.APPEND);
- }
-
- public Indexer build() {
- if (openMode == null) {
- throw new IndexBuilderException("Missing `openMode`");
- }
-
- if (dataDirectory == null) {
- throw new IndexBuilderException("dataDirectory() not set");
- }
-
- if (categoryExtractor == null) {
- categoryExtractor = CategoryExtractor.NOOP;
- }
-
- indexConfiguration =
- new IndexConfiguration(dataDirectory, categoryExtractor.multiValuedCategories());
- indexConfiguration.save();
-
- indexDirectory = indexConfiguration.openIndexDirectory();
- taxonomyDirectory = indexConfiguration.openTaxonomyDirectory();
-
- writerConfig = new IndexWriterConfig(indexConfiguration.getAnalyzer());
- writerConfig.setOpenMode(openMode);
-
+ class Factory {
+ public static Indexer open(Path dir, CategoryExtractor extractor) {
try {
- return new IndexerImpl(
- new IndexWriter(indexDirectory, writerConfig),
- new DirectoryTaxonomyWriter(taxonomyDirectory, openMode),
- indexConfiguration,
- categoryExtractor.categoryToExtractor());
- } catch (IOException e) {
- throw new IndexBuilderException(String.format("Failure creating index writer: %s", e));
- }
- }
-
- private static final class IndexerImpl implements Indexer {
- private final IndexWriter indexWriter;
- private final DirectoryTaxonomyWriter taxonomyWriter;
- private final IndexConfiguration indexConfiguration;
- private final Map<String, Function<Recipe, Set<String>>> categoryExtractors;
-
- private IndexerImpl(
- IndexWriter writer,
- DirectoryTaxonomyWriter taxWriter,
- IndexConfiguration conf,
- Map<String, Function<Recipe, Set<String>>> categoryExtractors) {
- this.indexWriter = writer;
- this.taxonomyWriter = taxWriter;
- this.indexConfiguration = conf;
- this.categoryExtractors = categoryExtractors;
- }
-
- @Override
- public void addRecipe(Recipe recipe) throws IOException {
- var doc = new Document();
-
- doc.add(new StoredField(RECIPE_ID, recipe.recipeId()));
- doc.add(new LongPoint(RECIPE_ID, recipe.recipeId()));
-
- doc.add(new TextField(FULL_RECIPE, recipe.name(), Store.NO));
- recipe.instructions().forEach(i -> doc.add(new TextField(FULL_RECIPE, i, Store.NO)));
- recipe.ingredients().forEach(i -> doc.add(new TextField(FULL_RECIPE, i, Store.NO)));
-
- recipe
- .diets()
- .forEach(
- (diet, score) -> {
- if (score > 0) {
- doc.add(new FloatPoint(getFieldNameForDiet(diet), score));
- }
- });
-
- var numIngredients = recipe.ingredients().size();
- doc.add(new IntPoint(NUM_INGREDIENTS, numIngredients));
- doc.add(new NumericDocValuesField(NUM_INGREDIENTS, numIngredients));
-
- // Timing
-
- recipe
- .prepTime()
- .ifPresent(
- value -> {
- doc.add(new IntPoint(PREP_TIME, value));
- // For sorting
- doc.add(new NumericDocValuesField(PREP_TIME, value));
- });
-
- recipe
- .cookTime()
- .ifPresent(
- value -> {
- doc.add(new IntPoint(COOK_TIME, value));
- // For sorting
- doc.add(new NumericDocValuesField(COOK_TIME, value));
- });
-
- recipe
- .totalTime()
- .ifPresent(
- value -> {
- doc.add(new IntPoint(TOTAL_TIME, value));
- // For sorting
- doc.add(new NumericDocValuesField(TOTAL_TIME, value));
- });
-
- // Nutrition
-
- recipe
- .calories()
- .ifPresent(
- value -> {
- doc.add(new IntPoint(CALORIES, value));
- // For sorting
- doc.add(new NumericDocValuesField(CALORIES, value));
- });
-
- recipe.fatContent().ifPresent(value -> doc.add(new FloatPoint(FAT_CONTENT, (float) value)));
-
- recipe
- .proteinContent()
- .ifPresent(value -> doc.add(new FloatPoint(PROTEIN_CONTENT, (float) value)));
-
- recipe
- .carbohydrateContent()
- .ifPresent(value -> doc.add(new FloatPoint(CARBOHYDRATE_CONTENT, (float) value)));
-
- categoryExtractors.forEach(
- (dimension, getLabels) ->
- getLabels
- .apply(recipe)
- .forEach(
- label -> {
- doc.add(new FacetField(dimension, label));
- }));
-
- indexWriter.addDocument(indexConfiguration.getFacetsConfig().build(taxonomyWriter, doc));
- }
-
- @Override
- public int numDocs() {
- return indexWriter.getDocStats().numDocs;
- }
-
- @Override
- public void mergeSegments() throws IOException {
- indexWriter.forceMerge(1, true);
- }
-
- @Override
- public void close() throws IOException {
- indexWriter.close();
- taxonomyWriter.close();
- }
-
- @Override
- public void commit() throws IOException {
- indexWriter.commit();
- taxonomyWriter.commit();
- }
-
- @Override
- public Searcher buildSearcher() {
- return Searcher.Factory.open(indexConfiguration.getBaseDirectory());
+ return new IndexerImpl(dir, extractor);
+ } catch (Exception wrapped) {
+ throw new IndexerException(wrapped);
}
}
}

- class IndexBuilderException extends RuntimeException {
- IndexBuilderException(String message) {
- super(message);
+ class IndexerException extends RuntimeException {
+ IndexerException(Exception e) {
+ super(e);
}
}
}

Modified src/test/java/co/caio/cerberus/search/CategoryExtractorTest.java

@@ -50,8 +50,7
})
.build();

- var indexer =
- new Indexer.Builder().dataDirectory(dataDir).categoryExtractor(ce).createMode().build();
+ var indexer = Indexer.Factory.open(dataDir, ce);

var categoryToWantedPerLabel =
Map.of(
@@ -68,8 +67,9
indexer.addRecipe(fakeRecipe(6, 22, 500));

indexer.commit();
+ indexer.close();

- var searcher = indexer.buildSearcher();
+ var searcher = Searcher.Factory.open(dataDir);
var facets =
searcher.search(new SearchQuery.Builder().fulltext("*").maxFacets(3).build()).facets();

Modified src/test/java/co/caio/cerberus/search/IndexerTest.java

@@ -9,32 +9,18
import org.junit.jupiter.api.io.TempDir;

class IndexerTest {
- @Test
- void badUsage() {
- var exc = Indexer.IndexBuilderException.class;
- var builder = new Indexer.Builder();
- assertThrows(exc, builder::build);
- assertThrows(exc, () -> builder.reset().createMode().build());
- assertThrows(
- exc,
- () -> builder.reset().dataDirectory(Path.of("/this/doesnt/exist")).createMode().build());
- }

@Test
void simpleLocalIndexer(@TempDir Path tempDir) throws IOException {
- var index = new Indexer.Builder().dataDirectory(tempDir).createMode().build();
+ var index = Indexer.Factory.open(tempDir, CategoryExtractor.NOOP);
assertEquals(0, index.numDocs());
index.addRecipe(Util.getBasicRecipe());
assertEquals(1, index.numDocs());
index.close();

// Reopening it should still allow us to read its documents
- var newIndexSameDir = new Indexer.Builder().dataDirectory(tempDir).appendMode().build();
+ var newIndexSameDir = Indexer.Factory.open(tempDir, CategoryExtractor.NOOP);
assertEquals(1, newIndexSameDir.numDocs());
newIndexSameDir.close();
-
- // But opening should erase the old data
- var destructiveIndex = new Indexer.Builder().dataDirectory(tempDir).createMode().build();
- assertEquals(0, destructiveIndex.numDocs());
}
}

Modified src/test/java/co/caio/cerberus/search/SearcherTest.java

@@ -30,7 +30,7

@BeforeAll
static void prepare() {
- searcher = Util.getTestIndexer().buildSearcher();
+ searcher = Searcher.Factory.open(Util.getTestDataDir());
assertEquals(Util.expectedIndexSize(), searcher.numDocs());
}

@@ -106,7 +106,7

@Test
void dietThreshold(@TempDir Path tmpDir) throws Exception {
- var indexer = new Indexer.Builder().dataDirectory(tmpDir).createMode().build();
+ var indexer = Indexer.Factory.open(tmpDir, CategoryExtractor.NOOP);

var recipeBuilder =
new Recipe.Builder()
@@ -121,8 +121,9
indexer.addRecipe(recipeBuilder.putDiets("keto", 0.6F).build());
indexer.addRecipe(recipeBuilder.putDiets("keto", 1F).build());
indexer.commit();
+ indexer.close();

- var searcher = indexer.buildSearcher();
+ var searcher = Searcher.Factory.open(tmpDir);
var sqb = new SearchQuery.Builder();

assertEquals(1, searcher.search(sqb.diet("keto").build()).totalHits());

Created src/main/java/co/caio/cerberus/search/IndexerImpl.java

@@ -1,0 +1,160
+package co.caio.cerberus.search;
+
+import static co.caio.cerberus.search.IndexField.CALORIES;
+import static co.caio.cerberus.search.IndexField.CARBOHYDRATE_CONTENT;
+import static co.caio.cerberus.search.IndexField.COOK_TIME;
+import static co.caio.cerberus.search.IndexField.FAT_CONTENT;
+import static co.caio.cerberus.search.IndexField.FULL_RECIPE;
+import static co.caio.cerberus.search.IndexField.NUM_INGREDIENTS;
+import static co.caio.cerberus.search.IndexField.PREP_TIME;
+import static co.caio.cerberus.search.IndexField.PROTEIN_CONTENT;
+import static co.caio.cerberus.search.IndexField.RECIPE_ID;
+import static co.caio.cerberus.search.IndexField.TOTAL_TIME;
+import static co.caio.cerberus.search.IndexField.getFieldNameForDiet;
+import static org.apache.lucene.index.IndexWriterConfig.OpenMode.CREATE_OR_APPEND;
+
+import co.caio.cerberus.model.Recipe;
+import java.io.IOException;
+import java.nio.file.Path;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.FloatPoint;
+import org.apache.lucene.document.IntPoint;
+import org.apache.lucene.document.LongPoint;
+import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.document.StoredField;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.facet.FacetField;
+import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+
+public final class IndexerImpl implements Indexer {
+ private final IndexWriter indexWriter;
+ private final DirectoryTaxonomyWriter taxonomyWriter;
+ private final IndexConfiguration indexConfiguration;
+ private final CategoryExtractor categoryExtractor;
+
+ IndexerImpl(Path dir, CategoryExtractor extractor) throws IOException {
+ categoryExtractor = extractor;
+ indexConfiguration = new IndexConfiguration(dir, categoryExtractor.multiValuedCategories());
+ indexConfiguration.save();
+
+ var writerConfig = new IndexWriterConfig(indexConfiguration.getAnalyzer());
+ writerConfig.setOpenMode(CREATE_OR_APPEND);
+
+ indexWriter = new IndexWriter(indexConfiguration.openIndexDirectory(), writerConfig);
+ taxonomyWriter =
+ new DirectoryTaxonomyWriter(indexConfiguration.openTaxonomyDirectory(), CREATE_OR_APPEND);
+ }
+
+ @Override
+ public void addRecipe(Recipe recipe) throws IOException {
+ var doc = new Document();
+
+ doc.add(new StoredField(RECIPE_ID, recipe.recipeId()));
+ doc.add(new LongPoint(RECIPE_ID, recipe.recipeId()));
+
+ doc.add(new TextField(FULL_RECIPE, recipe.name(), Store.NO));
+ recipe.instructions().forEach(i -> doc.add(new TextField(FULL_RECIPE, i, Store.NO)));
+ recipe.ingredients().forEach(i -> doc.add(new TextField(FULL_RECIPE, i, Store.NO)));
+
+ recipe
+ .diets()
+ .forEach(
+ (diet, score) -> {
+ if (score > 0) {
+ doc.add(new FloatPoint(getFieldNameForDiet(diet), score));
+ }
+ });
+
+ var numIngredients = recipe.ingredients().size();
+ doc.add(new IntPoint(NUM_INGREDIENTS, numIngredients));
+ doc.add(new NumericDocValuesField(NUM_INGREDIENTS, numIngredients));
+
+ // Timing
+
+ recipe
+ .prepTime()
+ .ifPresent(
+ value -> {
+ doc.add(new IntPoint(PREP_TIME, value));
+ // For sorting
+ doc.add(new NumericDocValuesField(PREP_TIME, value));
+ });
+
+ recipe
+ .cookTime()
+ .ifPresent(
+ value -> {
+ doc.add(new IntPoint(COOK_TIME, value));
+ // For sorting
+ doc.add(new NumericDocValuesField(COOK_TIME, value));
+ });
+
+ recipe
+ .totalTime()
+ .ifPresent(
+ value -> {
+ doc.add(new IntPoint(TOTAL_TIME, value));
+ // For sorting
+ doc.add(new NumericDocValuesField(TOTAL_TIME, value));
+ });
+
+ // Nutrition
+
+ recipe
+ .calories()
+ .ifPresent(
+ value -> {
+ doc.add(new IntPoint(CALORIES, value));
+ // For sorting
+ doc.add(new NumericDocValuesField(CALORIES, value));
+ });
+
+ recipe.fatContent().ifPresent(value -> doc.add(new FloatPoint(FAT_CONTENT, (float) value)));
+
+ recipe
+ .proteinContent()
+ .ifPresent(value -> doc.add(new FloatPoint(PROTEIN_CONTENT, (float) value)));
+
+ recipe
+ .carbohydrateContent()
+ .ifPresent(value -> doc.add(new FloatPoint(CARBOHYDRATE_CONTENT, (float) value)));
+
+ categoryExtractor
+ .categoryToExtractor()
+ .forEach(
+ (dimension, getLabels) ->
+ getLabels
+ .apply(recipe)
+ .forEach(
+ label -> {
+ doc.add(new FacetField(dimension, label));
+ }));
+
+ indexWriter.addDocument(indexConfiguration.getFacetsConfig().build(taxonomyWriter, doc));
+ }
+
+ @Override
+ public int numDocs() {
+ return indexWriter.getDocStats().numDocs;
+ }
+
+ @Override
+ public void mergeSegments() throws IOException {
+ indexWriter.forceMerge(1, true);
+ }
+
+ @Override
+ public void close() throws IOException {
+ indexWriter.close();
+ taxonomyWriter.close();
+ }
+
+ @Override
+ public void commit() throws IOException {
+ indexWriter.commit();
+ taxonomyWriter.commit();
+ }
+}