caio.co/de/cerberus

Merge branch 'sdb'

Id
31097c0db40790490088c0cc1ddc9e0a4a289993
Author
Caio
Commit time
2019-05-21T13:23:39+02:00

Modified README.md

@@ -1,11 +1,10
# Cerberus

Cerberus is the Search and Metadata Retrieval library for [gula.recipes][gula]
-and makes use of [Lucene][lucene] for searching and [Chronicle-Map][cm] for
-metadata persistence / memory-mapping. Metadata is stored as [FlatBuffers][].
+and makes use of [Lucene][lucene] for searching and a plain binary file filled
+with [FlatBuffers][] to persist and access metadata.

[gula]: https://gula.recipes
-[cm]: https://github.com/OpenHFT/Chronicle-Map/
[lucene]: http://lucene.apache.org/core/
[FlatBuffers]: https://google.github.io/flatbuffers/

Modified pom.xml

@@ -22,7 +22,6
<flatbuffers.version>1.10.0.2</flatbuffers.version>
<flatbuffers.sources>${basedir}/src/main/flatbuffers</flatbuffers.sources>
<flatbuffers.generated.sources>${project.build.directory}/generated-sources/java</flatbuffers.generated.sources>
- <chronicle.version>3.17.2</chronicle.version>
</properties>

<dependencies>
@@ -81,18 +80,6
<dependency>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
- </dependency>
-
- <dependency>
- <groupId>net.openhft</groupId>
- <artifactId>chronicle-map</artifactId>
- <version>${chronicle.version}</version>
- <exclusions>
- <exclusion>
- <groupId>com.sun.java</groupId>
- <artifactId>tools</artifactId>
- </exclusion>
- </exclusions>
</dependency>

<dependency>

Modified src/test/java/co/caio/cerberus/Util.java

@@ -7,6 +7,7
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.datatype.jdk8.Jdk8Module;
import java.io.BufferedReader;
+import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.file.Files;
import java.nio.file.Path;
@@ -122,6 +123,22
assertionNumbers.load(Util.class.getResource("/assertions.properties").openStream());
} catch (Exception rethrown) {
throw new RuntimeException(rethrown);
+ }
+
+ deleteOnExit(testDataDir);
+ }
+
+ private static void deleteOnExit(Path path) {
+ path.toFile().deleteOnExit();
+
+ if (!path.toFile().isDirectory()) {
+ return;
+ }
+
+ try (var items = Files.list(path)) {
+ items.forEach(Util::deleteOnExit);
+ } catch (IOException wrapped) {
+ throw new RuntimeException(wrapped);
}
}

Modified src/main/java/co/caio/cerberus/db/RecipeMetadataDatabase.java

@@ -7,8 +7,6

Optional<RecipeMetadata> findById(long recipeId);

- List<RecipeMetadata> findAllById(List<Long> recipeIds);
-
void saveAll(List<RecipeMetadata> recipes);

default void close() {}

Modified src/test/java/co/caio/cerberus/db/HashMapRecipeMetadataDatabase.java

@@ -1,6 +1,5
package co.caio.cerberus.db;

-import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -17,13 +16,6
@Override
public Optional<RecipeMetadata> findById(long recipeId) {
return Optional.ofNullable(map.get(recipeId));
- }
-
- @Override
- public List<RecipeMetadata> findAllById(List<Long> recipeIds) {
- var result = new ArrayList<RecipeMetadata>();
- recipeIds.forEach(id -> findById(id).ifPresent(result::add));
- return result;
}

@Override

Modified src/test/java/co/caio/cerberus/lucene/FloatAssociationsThresholdCountTest.java

@@ -1,9 +1,9
package co.caio.cerberus.lucene;

import static org.junit.jupiter.api.Assertions.assertEquals;

import java.io.IOException;
-import java.nio.file.Files;
+import java.nio.file.Path;
import java.util.Map;
import org.apache.lucene.document.Document;
import org.apache.lucene.facet.FacetsCollector;
@@ -21,6 +21,7
import org.apache.lucene.store.FSDirectory;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;

class FloatAssociationsThresholdCountTest {

@@ -84,9 +85,9
}

@BeforeAll
- static void setUp() throws IOException {
- final var indexDir = FSDirectory.open(Files.createTempDirectory("lucene-test"));
- final var taxoDir = FSDirectory.open(Files.createTempDirectory("lucene-test-taxo"));
+ static void setUp(@TempDir Path tmpDir) throws IOException {
+ final var indexDir = FSDirectory.open(tmpDir.resolve("index"));
+ final var taxoDir = FSDirectory.open(tmpDir.resolve("taxo"));

config = new FacetsConfig();
config.setMultiValued("score", true);

Modified src/test/java/co/caio/cerberus/search/CategoryExtractorTest.java

@@ -5,18 +5,19
import co.caio.cerberus.model.Recipe;
import co.caio.cerberus.model.SearchQuery;
import java.io.IOException;
-import java.nio.file.Files;
+import java.nio.file.Path;
import java.util.HashSet;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.stream.Collectors;
import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;

class CategoryExtractorTest {

@Test
- void basicFunctionality() throws IOException {
+ void basicFunctionality(@TempDir Path dataDir) throws IOException {

var ce =
new CategoryExtractor.Builder()
@@ -49,7 +50,6
})
.build();

- var dataDir = Files.createTempDirectory("extractor-");
var indexer =
new Indexer.Builder().dataDirectory(dataDir).categoryExtractor(ce).createMode().build();

Modified src/test/java/co/caio/cerberus/search/IndexConfigurationTest.java

@@ -1,28 +1,28
package co.caio.cerberus.search;

import static org.junit.jupiter.api.Assertions.*;

import co.caio.cerberus.search.IndexConfiguration.IndexConfigurationException;
import java.io.IOException;
-import java.nio.file.Files;
+import java.nio.file.Path;
import java.util.Map.Entry;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.lucene.facet.FacetsConfig;
import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;

class IndexConfigurationTest {

@Test
- void canCreateEmpty() {
- assertDoesNotThrow(
- () -> new IndexConfiguration(Files.createTempDirectory("indexconfig-"), Set.of()));
+ void canCreateEmpty(@TempDir Path tempDir) {
+ assertDoesNotThrow(() -> new IndexConfiguration(tempDir, Set.of()));
}

@Test
- void multiValuedDimensionsConfiguresFacetConfig() throws IOException {
+ void multiValuedDimensionsConfiguresFacetConfig(@TempDir Path tempDir) throws IOException {
var originalMv = Set.of("a", "b", "c");
- var config = new IndexConfiguration(Files.createTempDirectory("indexconfig-"), originalMv);
+ var config = new IndexConfiguration(tempDir, originalMv);

var configuredMv = extractMultiValued(config.getFacetsConfig());

@@ -30,15 +30,13
}

@Test
- void cantLoadFromConfigIfItDoesNotExist() {
+ void cantLoadFromConfigIfItDoesNotExist(@TempDir Path tempDir) {
assertThrows(
- IndexConfigurationException.class,
- () -> IndexConfiguration.fromBaseDirectory(Files.createTempDirectory("indexconfig-")));
+ IndexConfigurationException.class, () -> IndexConfiguration.fromBaseDirectory(tempDir));
}

@Test
- void loadFromConfigWorks() throws IOException {
- var base = Files.createTempDirectory("indexconfig-");
+ void loadFromConfigWorks(@TempDir Path base) {
var multiValued = Set.of("a", "b", "c", "d");
var config = new IndexConfiguration(base, multiValued);

Modified src/test/java/co/caio/cerberus/search/IndexerTest.java

@@ -1,12 +1,12
package co.caio.cerberus.search;

import static org.junit.jupiter.api.Assertions.*;

import co.caio.cerberus.Util;
import java.io.IOException;
-import java.nio.file.Files;
import java.nio.file.Path;
import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;

class IndexerTest {
@Test
@@ -21,8 +21,7
}

@Test
- void simpleLocalIndexer() throws IOException {
- var tempDir = Files.createTempDirectory("cerberus-test");
+ void simpleLocalIndexer(@TempDir Path tempDir) throws IOException {
var index = new Indexer.Builder().dataDirectory(tempDir).createMode().build();
assertEquals(0, index.numDocs());
index.addRecipe(Util.getBasicRecipe());

Modified src/test/java/co/caio/cerberus/search/SearcherTest.java

@@ -16,7 +16,6
import co.caio.cerberus.model.SearchQuery.SortOrder;
import co.caio.cerberus.search.IndexConfiguration.IndexConfigurationException;
import co.caio.cerberus.search.Searcher.Builder.SearcherBuilderException;
-import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import java.util.OptionalInt;
@@ -25,6 +24,7
import org.apache.lucene.search.MatchNoDocsQuery;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;

class SearcherTest {
private static Searcher searcher;
@@ -109,12 +109,8
}

@Test
- void dietThreshold() throws Exception {
- var indexer =
- new Indexer.Builder()
- .dataDirectory(Files.createTempDirectory("threshold-test"))
- .createMode()
- .build();
+ void dietThreshold(@TempDir Path tmpDir) throws Exception {
+ var indexer = new Indexer.Builder().dataDirectory(tmpDir).createMode().build();

var recipeBuilder =
new Recipe.Builder()

Renamed src/main/java/co/caio/cerberus/db/ChronicleRecipeMetadataDatabase.java to src/test/java/co/caio/cerberus/db/SimpleRecipeMetadataDatabaseTest.java

@@ -1,99 +1,74
package co.caio.cerberus.db;

-import java.io.IOException;
-import java.nio.ByteBuffer;
+import static org.junit.jupiter.api.Assertions.*;
+
+import co.caio.cerberus.Util;
+import co.caio.cerberus.db.RecipeMetadataDatabase.RecipeMetadataDbException;
import java.nio.file.Path;
-import java.util.ArrayList;
import java.util.List;
-import java.util.Optional;
-import net.openhft.chronicle.map.ChronicleMap;
-import net.openhft.chronicle.map.ChronicleMapBuilder;
+import java.util.stream.Collectors;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;

-public class ChronicleRecipeMetadataDatabase implements RecipeMetadataDatabase {
+class SimpleRecipeMetadataDatabaseTest {

- private static final String DATABASE_NAME = "recipe-metadata";
+ @Test
+ void canSaveAndReadSamples(@TempDir Path dbPath) {
+ var numSamples = 10;

- final ChronicleMap<Long, ByteBuffer> backingMap;
+ var writer = new SimpleRecipeMetadataDatabase.Writer(dbPath);

- private ChronicleRecipeMetadataDatabase(ChronicleMap<Long, ByteBuffer> backingMap) {
- this.backingMap = backingMap;
- }
+ var samples =
+ Util.getSampleRecipes()
+ .map(RecipeMetadata::fromRecipe)
+ .limit(numSamples)
+ .peek(writer::addRecipe)
+ .collect(Collectors.toList());

- public static RecipeMetadataDatabase open(Path databasePath) {
- try {
- var map =
- ChronicleMapBuilder.of(Long.class, ByteBuffer.class)
- .name(DATABASE_NAME)
- .createPersistedTo(databasePath.toFile());
- return new ChronicleRecipeMetadataDatabase(map);
- } catch (IOException rethrown) {
- throw new RecipeMetadataDbException(rethrown);
- }
- }
+ writer.close();

- public static RecipeMetadataDatabase create(
- Path databasePath, double averageValueBytesSize, long numberOfEntries) {
- try {
- var map =
- ChronicleMapBuilder.of(Long.class, ByteBuffer.class)
- .name(DATABASE_NAME)
- .constantKeySizeBySample(1L)
- .averageValueSize(averageValueBytesSize)
- .entries(numberOfEntries)
- .createPersistedTo(databasePath.toFile());
- return new WriteableChronicleRecipeMetadataDatabase(map);
- } catch (IOException rethrown) {
- throw new RecipeMetadataDbException(rethrown);
- }
- }
+ var db = new SimpleRecipeMetadataDatabase(dbPath);

- private RecipeMetadata get(long recipeId) {
- var buffer = backingMap.get(recipeId);
- if (buffer != null) {
- return RecipeMetadata.fromFlatRecipe(FlatBufferSerializer.INSTANCE.readRecipe(buffer));
- } else {
- return null;
- }
- }
-
- @Override
- public void close() {
- backingMap.close();
- }
-
- @Override
- public Optional<RecipeMetadata> findById(long recipeId) {
- return Optional.ofNullable(get(recipeId));
- }
-
- @Override
- public List<RecipeMetadata> findAllById(List<Long> recipeIds) {
- var result = new ArrayList<RecipeMetadata>(recipeIds.size());
- recipeIds.forEach(
- id -> {
- var recipe = get(id);
- if (recipe != null) {
- result.add(recipe);
- }
+ samples.forEach(
+ r -> {
+ var dbRecipe = db.findById(r.getRecipeId());
+ assertTrue(dbRecipe.isPresent());
+ assertEquals(r.getRecipeId(), dbRecipe.get().getRecipeId());
});
- return result;
+
+ assertEquals(numSamples, db.size());
}

- @Override
- public void saveAll(List<RecipeMetadata> recipes) {
- throw new RecipeMetadataDbException("Database is open as read-only");
+ @Test
+ void canCreateEmptyDb(@TempDir Path dbPath) {
+ new SimpleRecipeMetadataDatabase.Writer(dbPath).close();
+ assertDoesNotThrow(() -> new SimpleRecipeMetadataDatabase(dbPath));
+ assertEquals(0, new SimpleRecipeMetadataDatabase(dbPath).size());
}

- static class WriteableChronicleRecipeMetadataDatabase extends ChronicleRecipeMetadataDatabase {
+ @Test
+ void cannotOpenInvalidDir() {
+ assertThrows(
+ RecipeMetadataDbException.class,
+ () -> new SimpleRecipeMetadataDatabase(Path.of("/does/not/exist")));
+ }

- WriteableChronicleRecipeMetadataDatabase(ChronicleMap<Long, ByteBuffer> backingMap) {
- super(backingMap);
- }
+ @Test
+ void cannotWriteToExistingDb(@TempDir Path dbPath) {

- @Override
- public void saveAll(List<RecipeMetadata> recipes) {
- recipes.forEach(
- rm -> backingMap.put(rm.getRecipeId(), FlatBufferSerializer.INSTANCE.flattenRecipe(rm)));
- }
+ // First open+close should work
+ assertDoesNotThrow(() -> new SimpleRecipeMetadataDatabase.Writer(dbPath).close());
+ // Trying to open a write when a database exists should fail
+ assertThrows(
+ RecipeMetadataDbException.class, () -> new SimpleRecipeMetadataDatabase.Writer(dbPath));
+ }
+
+ @Test
+ void saveAllIsNotAllowed(@TempDir Path dbPath) {
+ new SimpleRecipeMetadataDatabase.Writer(dbPath).close();
+ var db = new SimpleRecipeMetadataDatabase(dbPath);
+ assertThrows(
+ RecipeMetadataDbException.class,
+ () -> db.saveAll(List.of(RecipeMetadata.fromRecipe(Util.getBasicRecipe()))));
}
}

Created src/main/java/co/caio/cerberus/db/SimpleRecipeMetadataDatabase.java

@@ -1,0 +1,153
+package co.caio.cerberus.db;
+
+import co.caio.cerberus.flatbuffers.FlatRecipe;
+import com.carrotsearch.hppc.LongIntHashMap;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+import java.nio.channels.FileChannel.MapMode;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.List;
+import java.util.Optional;
+
+public class SimpleRecipeMetadataDatabase implements RecipeMetadataDatabase {
+
+ private static final String FILE_OFFSETS = "offsets.sdb";
+ private static final String FILE_DATA = "data.sdb";
+
+ private static final int OFFSET_NOT_FOUND = -1;
+
+ private final LongIntHashMap idToOffset;
+ private final ByteBuffer rawData;
+
+ public int size() {
+ return idToOffset.size();
+ }
+
+ public SimpleRecipeMetadataDatabase(Path baseDir) {
+
+ if (!baseDir.toFile().isDirectory()) {
+ throw new RecipeMetadataDbException("Not a directory: " + baseDir);
+ }
+
+ try (var raf = new RandomAccessFile(baseDir.resolve(FILE_OFFSETS).toFile(), "r")) {
+
+ int size = raf.readInt();
+ if (size < 0) {
+ throw new RecipeMetadataDbException("Invalid offsets file length");
+ }
+
+ idToOffset = new LongIntHashMap(size);
+
+ while (size-- > 0) {
+ idToOffset.put(raf.readLong(), raf.readInt());
+ }
+
+ } catch (IOException e) {
+ throw new RecipeMetadataDbException(e);
+ }
+
+ try {
+ var dataPath = baseDir.resolve(FILE_DATA);
+
+ rawData =
+ new RandomAccessFile(dataPath.toFile(), "rw")
+ .getChannel()
+ .map(MapMode.READ_ONLY, 0, Files.size(dataPath));
+
+ } catch (IOException e) {
+ throw new RecipeMetadataDbException(e);
+ }
+ }
+
+ @Override
+ public Optional<RecipeMetadata> findById(long recipeId) {
+ int offset = idToOffset.getOrDefault(recipeId, OFFSET_NOT_FOUND);
+
+ if (offset == OFFSET_NOT_FOUND) {
+ return Optional.empty();
+ }
+
+ var buffer = rawData.asReadOnlyBuffer().position(offset);
+
+ return Optional.of(RecipeMetadata.fromFlatRecipe(FlatRecipe.getRootAsFlatRecipe(buffer)));
+ }
+
+ @Override
+ public void saveAll(List<RecipeMetadata> recipes) {
+ throw new RecipeMetadataDbException("Read-only! Use the Writer inner class to create a db");
+ }
+
+ public static class Writer {
+
+ int numRecipes;
+ final FileChannel dataChannel;
+ final RandomAccessFile offsetsFile;
+
+ public Writer(Path baseDir) {
+
+ this.numRecipes = 0;
+
+ try {
+ Files.createDirectories(baseDir);
+ } catch (IOException wrapped) {
+ throw new RecipeMetadataDbException(wrapped);
+ }
+
+ var dataPath = baseDir.resolve(FILE_DATA);
+ var offsetsPath = baseDir.resolve(FILE_OFFSETS);
+
+ if (dataPath.toFile().exists() || offsetsPath.toFile().exists()) {
+ throw new RecipeMetadataDbException("Database already exists at given path");
+ }
+
+ try {
+ this.dataChannel = new RandomAccessFile(dataPath.toFile(), "rw").getChannel();
+ this.offsetsFile = new RandomAccessFile(offsetsPath.toFile(), "rw");
+
+ } catch (FileNotFoundException wrapped) {
+ throw new RecipeMetadataDbException(wrapped);
+ }
+
+ try {
+ // First bytes are for the number of items in the database
+ // we set to -1 here and, during close(), configure the
+ // correct value
+ this.offsetsFile.writeInt(-1);
+ } catch (IOException wrapped) {
+ throw new RecipeMetadataDbException(wrapped);
+ }
+ }
+
+ public void addRecipe(RecipeMetadata recipe) {
+ // XXX Not thread safe
+ try {
+ int offset = (int) dataChannel.position();
+ dataChannel.write(FlatBufferSerializer.INSTANCE.flattenRecipe(recipe));
+
+ offsetsFile.writeLong(recipe.getRecipeId());
+ offsetsFile.writeInt(offset);
+
+ this.numRecipes++;
+ } catch (IOException e) {
+ throw new RecipeMetadataDbException(e);
+ }
+ }
+
+ public void close() {
+ try {
+ // Write the number of recipes to the beginning of the offsets file
+ offsetsFile.seek(0);
+ offsetsFile.writeInt(numRecipes);
+
+ dataChannel.close();
+ offsetsFile.close();
+ } catch (IOException wrapped) {
+ throw new RecipeMetadataDbException(wrapped);
+ }
+ }
+ }
+}

Deleted src/test/java/co/caio/cerberus/db/ChronicleRecipeMetadataDatabaseTest.java

@@ -1,84 +1,0
-package co.caio.cerberus.db;
-
-import static org.junit.jupiter.api.Assertions.*;
-
-import co.caio.cerberus.Util;
-import co.caio.cerberus.db.RecipeMetadataDatabase.RecipeMetadataDbException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.util.List;
-import java.util.stream.Collectors;
-import org.junit.jupiter.api.AfterAll;
-import org.junit.jupiter.api.BeforeAll;
-import org.junit.jupiter.api.Test;
-
-class ChronicleRecipeMetadataDatabaseTest {
-
- private static Path rwdbPath;
- private static RecipeMetadataDatabase testRWDb;
-
- @BeforeAll
- static void createTmpDir() throws Exception {
- rwdbPath = Files.createTempDirectory("chronicle-test").resolve("test.db");
- testRWDb = ChronicleRecipeMetadataDatabase.create(rwdbPath, 2000, Util.expectedIndexSize());
- }
-
- @AfterAll
- static void closeDb() {
- testRWDb.close();
- }
-
- @Test
- void cantOpenADbThatDoesntExist() {
- assertThrows(
- RecipeMetadataDbException.class,
- () -> ChronicleRecipeMetadataDatabase.open(Path.of("/this/doesnt/exist.db")));
- }
-
- @Test
- void canOpenAfterCreation() {
- assertDoesNotThrow(
- () -> {
- var ro = ChronicleRecipeMetadataDatabase.open(rwdbPath);
- ro.close();
- });
- }
-
- @Test
- void cannotWriteToReadOnlyDb() {
- var roDb = ChronicleRecipeMetadataDatabase.open(rwdbPath);
- assertThrows(
- RecipeMetadataDbException.class,
- () -> roDb.saveAll(List.of(RecipeMetadata.fromRecipe(Util.getBasicRecipe()))));
- roDb.close();
- }
-
- @Test
- void canWriteToRwDb() {
- var recipes =
- Util.getSampleRecipes().map(RecipeMetadata::fromRecipe).collect(Collectors.toList());
-
- assertDoesNotThrow(() -> testRWDb.saveAll(recipes));
-
- for (RecipeMetadata rm : recipes) {
- var maybeRm = testRWDb.findById(rm.getRecipeId());
- assertTrue(maybeRm.isPresent());
- assertEquals(rm.getRecipeId(), maybeRm.get().getRecipeId());
- }
-
- var recipeIds =
- recipes.stream().limit(10).map(RecipeMetadata::getRecipeId).collect(Collectors.toList());
- var fetched = testRWDb.findAllById(recipeIds);
-
- assertEquals(recipeIds.size(), fetched.size());
- for (int i = 0; i < recipeIds.size(); i++) {
- var rm = fetched.get(i);
- assertEquals(recipeIds.get(i).longValue(), rm.getRecipeId());
- }
- }
-
- @Test
- void unexistingIdResultIsEmpty() {
- assertTrue(testRWDb.findById(-42).isEmpty());
- }
-}