caio.co/de/cerberus

Expose findSimilar(recipeId, maxResults)

This patch exposes a `Searcher.findSimilar(long, int)` API so that
we can make use of already indexed data to find similar recipes
to a given (known) recipe id.

It's effectively more efficient `Searcher.findSimilar(String, int)`
for when we already know the recipe we're querying for similarities.
Id
87703da650bdfb83cfa0ea9673cfcae0e1a32577
Author
Caio
Commit time
2019-04-05T12:24:35+02:00

Modified src/main/java/co/caio/cerberus/search/Searcher.java

@@ -16,6 +16,8

SearchResult findSimilar(String recipeText, int maxResults);

+ SearchResult findSimilar(long recipeId, int maxResults);
+
int numDocs();

class Builder {

Modified src/main/java/co/caio/cerberus/search/SearcherImpl.java

@@ -35,6 +35,9
private static final Sort sortTotalTime = integerSorterWithDefault(TOTAL_TIME);
private static final Sort sortCalories = integerSorterWithDefault(CALORIES);

+ private static final SearchResult EMPTY_SEARCH_RESULT =
+ new SearchResult.Builder().totalHits(0).build();
+
private final IndexSearcher indexSearcher;
private final TaxonomyReader taxonomyReader;
private final IndexConfiguration indexConfiguration;
@@ -51,6 +54,8
moreLikeThis.setAnalyzer(indexConfiguration.getAnalyzer());
// Ignore words that occurr in more than 50% of recipes
moreLikeThis.setMaxDocFreqPct(50);
+ // Relevant for docId-based similarity
+ moreLikeThis.setFieldNames(new String[] {FULL_RECIPE});
}

private static Sort integerSorterWithDefault(String fieldName) {
@@ -81,6 +86,38
}

return builder.build();
+ } catch (IOException wrapped) {
+ throw new SearcherException(wrapped);
+ }
+ }
+
+ @Override
+ public SearchResult findSimilar(long recipeId, int maxResults) {
+ try {
+ var docId = findDocId(recipeId);
+
+ if (docId.isEmpty()) {
+ return EMPTY_SEARCH_RESULT;
+ }
+
+ // We use `maxResults + 1` because we'll filter out the
+ // given recipeId from the results
+ var result = indexSearcher.search(moreLikeThis.like(docId.getAsInt()), maxResults + 1);
+
+ var builder = new SearchResult.Builder();
+
+ int totalHits = 0;
+ for (int i = 0; i < result.scoreDocs.length; i++) {
+ Document doc = indexSearcher.doc(result.scoreDocs[i].doc);
+ long foundRecipeId = doc.getField(RECIPE_ID).numericValue().longValue();
+
+ if (foundRecipeId != recipeId) {
+ builder.addRecipe(foundRecipeId);
+ totalHits++;
+ }
+ }
+
+ return builder.totalHits(totalHits).build();
} catch (IOException wrapped) {
throw new SearcherException(wrapped);
}

Modified src/test/java/co/caio/cerberus/search/SearcherTest.java

@@ -383,4 +383,17
// }
});
}
+
+ @Test
+ void similarToRecipeIdNeverIncludesSelf() {
+ Util.getSampleRecipes()
+ .limit(10)
+ .forEach(
+ sampleRecipe -> {
+ var similar = searcher.findSimilar(sampleRecipe.recipeId(), 100);
+
+ assertTrue(similar.totalHits() > 0);
+ assertEquals(-1, similar.recipeIds().indexOf(sampleRecipe.recipeId()));
+ });
+ }
}