caio.co/de/cerberus

WIP: Simple database to replace ChronicleMap

The main reason I'm using ChronicleMap is because I wanted an easy
Map interface with persistence and off-heap memory. Getting rid of
ChronicleMap reduces complexity, increases performance (though that
is negligible, db get()s are far from being the bottleneck) and
allows me to push Jdk12 forward.

This patch implements a working alternative which can be simplified
to a flat file with one recipe serialized (as a flatbuffer) after
another. In order to speed up loading I also write to an extra file
which contains the total number of recipes and recipe_id to offset
associations. The offset lookup table is kept in heap backed by
HPCC's primitive collections. Very little validation is done and
the code is totally susceptible to bad input attacks, but I have
full control of it, so :shrug:

A trivial benchmark such as:

```
public class MyBenchmark {

  @State(Scope.Benchmark)
  public static class MyState {

    RecipeMetadataDatabase chronicle;
    RecipeMetadataDatabase sdb;
    long[] ids =
        new long[] {289492, 707192, 1061982, 1708006, 1659287, 1653257, 901573, 1557621, 1639379};

    public MyState() {
      var cerberusPath = System.getProperty("cerberus");
      var sdbPath = System.getProperty("sdb");

      this.chronicle = ChronicleRecipeMetadataDatabase.open(Path.of(cerberusPath));
      this.sdb = new SimpleRecipeMetadataDatabase(Path.of(sdbPath));
    }
  }

  public MyBenchmark() {}

  private void check(RecipeMetadataDatabase db, long[] ids) {
    for (long id : ids) {
      var recipe = db.findById(id);
      assert recipe.isPresent();
      if (recipe.get().getRecipeId() != id) {
        throw new RuntimeException("oof!");
      }
    }
  }

  @Benchmark
  public void getChronicle(MyState state) {
    check(state.chronicle, state.ids);
  }

  @Benchmark
  public void getSdb(MyState state) {
    check(state.sdb, state.ids);
  }
}
```

Shows that at least things aren't broken. Yet.

> Benchmark                  Mode  Cnt        Score       Error  Units
> MyBenchmark.getChronicle  thrpt    5    62327.352 ±   214.437  ops/s
> MyBenchmark.getSdb        thrpt    5  2697423.234 ± 28008.573  ops/s
Id
562d12df4eae5467664ef83a1f0ace7fe79d0681
Author
Caio
Commit time
2019-05-20T15:58:47+02:00

Created src/main/java/co/caio/cerberus/db/SimpleRecipeMetadataDatabase.java

@@ -1,0 +1,151
+package co.caio.cerberus.db;
+
+import co.caio.cerberus.flatbuffers.FlatRecipe;
+import com.carrotsearch.hppc.LongIntHashMap;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+import java.nio.channels.FileChannel.MapMode;
+import java.nio.file.Files;
+import java.nio.file.InvalidPathException;
+import java.nio.file.Path;
+import java.util.List;
+import java.util.Optional;
+
+public class SimpleRecipeMetadataDatabase implements RecipeMetadataDatabase {
+
+ static final String FILE_OFFSETS = "offsets.sdb";
+ static final String FILE_DATA = "data.sdb";
+
+ final Path baseDir;
+ // Fixme maybe scatter? rewrite key?
+ final LongIntHashMap idToOffset;
+ final ByteBuffer rawData;
+
+ public SimpleRecipeMetadataDatabase(Path baseDir) {
+
+ if (!baseDir.toFile().isDirectory()) {
+ throw new InvalidPathException(baseDir.toString(), "Not a directory");
+ }
+
+ this.baseDir = baseDir;
+
+ try (var raf = new RandomAccessFile(baseDir.resolve(FILE_OFFSETS).toFile(), "r")) {
+
+ int size = raf.readInt();
+ assert size > 0;
+
+ idToOffset = new LongIntHashMap(size);
+
+ while (size-- > 0) {
+ idToOffset.put(raf.readLong(), raf.readInt());
+ }
+
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+
+ try {
+ var dataPath = baseDir.resolve(FILE_DATA);
+
+ rawData =
+ new RandomAccessFile(dataPath.toFile(), "rw")
+ .getChannel()
+ .map(MapMode.READ_ONLY, 0, Files.size(dataPath));
+
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ static final int OFFSET_NOT_FOUND = -1;
+
+ @Override
+ public Optional<RecipeMetadata> findById(long recipeId) {
+ int offset = idToOffset.getOrDefault(recipeId, OFFSET_NOT_FOUND);
+
+ if (offset == OFFSET_NOT_FOUND) {
+ return Optional.empty();
+ }
+
+ var buffer = rawData.asReadOnlyBuffer().position(offset);
+
+ return Optional.of(RecipeMetadata.fromFlatRecipe(FlatRecipe.getRootAsFlatRecipe(buffer)));
+ }
+
+ @Override
+ public List<RecipeMetadata> findAllById(List<Long> recipeIds) {
+ return null;
+ }
+
+ @Override
+ public void saveAll(List<RecipeMetadata> recipes) {
+ throw new RuntimeException("Read-only!");
+ }
+
+ public static class Writer {
+
+ int numRecipes;
+ final FileChannel dataChannel;
+ final RandomAccessFile offsetsFile;
+
+ public Writer(Path baseDir) {
+
+ this.numRecipes = 0;
+
+ try {
+ Files.createDirectories(baseDir);
+ } catch (IOException wrapped) {
+ throw new RuntimeException(wrapped);
+ }
+
+ var dataPath = baseDir.resolve(FILE_DATA);
+ var offsetsPath = baseDir.resolve(FILE_OFFSETS);
+
+ if (dataPath.toFile().exists() || offsetsPath.toFile().exists()) {
+ throw new InvalidPathException(baseDir.toString(), "Database already exists at given path");
+ }
+
+ try {
+ this.dataChannel = new RandomAccessFile(dataPath.toFile(), "rw").getChannel();
+ this.offsetsFile = new RandomAccessFile(offsetsPath.toFile(), "rw");
+
+ } catch (FileNotFoundException wrapped) {
+ throw new RuntimeException(wrapped);
+ }
+
+ try {
+ this.offsetsFile.writeInt(0);
+ } catch (IOException wrapped) {
+ throw new RuntimeException(wrapped);
+ }
+ }
+
+ public void addRecipe(RecipeMetadata recipe) {
+ try {
+ int offset = (int) dataChannel.position();
+ dataChannel.write(FlatBufferSerializer.INSTANCE.flattenRecipe(recipe));
+
+ offsetsFile.writeLong(recipe.getRecipeId());
+ offsetsFile.writeInt(offset);
+
+ this.numRecipes++;
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ public void close() {
+ try {
+ dataChannel.close();
+ offsetsFile.seek(0);
+ offsetsFile.writeInt(numRecipes);
+ offsetsFile.close();
+ } catch (IOException wrapped) {
+ throw new RuntimeException(wrapped);
+ }
+ }
+ }
+}

Created src/test/java/co/caio/cerberus/db/SimpleRecipeMetadataDatabaseTest.java

@@ -1,0 +1,36
+package co.caio.cerberus.db;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+import co.caio.cerberus.Util;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.util.stream.Collectors;
+import org.junit.jupiter.api.Test;
+
+class SimpleRecipeMetadataDatabaseTest {
+
+ @Test
+ void create() throws IOException {
+ var dbPath = Files.createTempDirectory("sdb");
+
+ var writer = new SimpleRecipeMetadataDatabase.Writer(dbPath);
+
+ var samples =
+ Util.getSampleRecipes()
+ .map(RecipeMetadata::fromRecipe)
+ .peek(writer::addRecipe)
+ .collect(Collectors.toList());
+
+ writer.close();
+
+ var db = new SimpleRecipeMetadataDatabase(dbPath);
+
+ samples.forEach(
+ r -> {
+ var dbRecipe = db.findById(r.getRecipeId());
+ assertTrue(dbRecipe.isPresent());
+ assertEquals(r.getRecipeId(), dbRecipe.get().getRecipeId());
+ });
+ }
+}