Expose findSimilar(recipeId, maxResults)
This patch exposes a `Searcher.findSimilar(long, int)` API so that we can make use of already indexed data to find similar recipes to a given (known) recipe id. It's effectively more efficient `Searcher.findSimilar(String, int)` for when we already know the recipe we're querying for similarities.
- Id
- 87703da650bdfb83cfa0ea9673cfcae0e1a32577
- Author
- Caio
- Commit time
- 2019-04-05T12:24:35+02:00
Modified src/main/java/co/caio/cerberus/search/Searcher.java
SearchResult findSimilar(String recipeText, int maxResults);
+ SearchResult findSimilar(long recipeId, int maxResults);
+
int numDocs();
class Builder {
Modified src/main/java/co/caio/cerberus/search/SearcherImpl.java
private static final Sort sortTotalTime = integerSorterWithDefault(TOTAL_TIME);
private static final Sort sortCalories = integerSorterWithDefault(CALORIES);
+ private static final SearchResult EMPTY_SEARCH_RESULT =
+ new SearchResult.Builder().totalHits(0).build();
+
private final IndexSearcher indexSearcher;
private final TaxonomyReader taxonomyReader;
private final IndexConfiguration indexConfiguration;
moreLikeThis.setAnalyzer(indexConfiguration.getAnalyzer());
// Ignore words that occurr in more than 50% of recipes
moreLikeThis.setMaxDocFreqPct(50);
+ // Relevant for docId-based similarity
+ moreLikeThis.setFieldNames(new String[] {FULL_RECIPE});
}
private static Sort integerSorterWithDefault(String fieldName) {
}
return builder.build();
+ } catch (IOException wrapped) {
+ throw new SearcherException(wrapped);
+ }
+ }
+
+ @Override
+ public SearchResult findSimilar(long recipeId, int maxResults) {
+ try {
+ var docId = findDocId(recipeId);
+
+ if (docId.isEmpty()) {
+ return EMPTY_SEARCH_RESULT;
+ }
+
+ // We use `maxResults + 1` because we'll filter out the
+ // given recipeId from the results
+ var result = indexSearcher.search(moreLikeThis.like(docId.getAsInt()), maxResults + 1);
+
+ var builder = new SearchResult.Builder();
+
+ int totalHits = 0;
+ for (int i = 0; i < result.scoreDocs.length; i++) {
+ Document doc = indexSearcher.doc(result.scoreDocs[i].doc);
+ long foundRecipeId = doc.getField(RECIPE_ID).numericValue().longValue();
+
+ if (foundRecipeId != recipeId) {
+ builder.addRecipe(foundRecipeId);
+ totalHits++;
+ }
+ }
+
+ return builder.totalHits(totalHits).build();
} catch (IOException wrapped) {
throw new SearcherException(wrapped);
}
Modified src/test/java/co/caio/cerberus/search/SearcherTest.java
// }
});
}
+
+ @Test
+ void similarToRecipeIdNeverIncludesSelf() {
+ Util.getSampleRecipes()
+ .limit(10)
+ .forEach(
+ sampleRecipe -> {
+ var similar = searcher.findSimilar(sampleRecipe.recipeId(), 100);
+
+ assertTrue(similar.totalHits() > 0);
+ assertEquals(-1, similar.recipeIds().indexOf(sampleRecipe.recipeId()));
+ });
+ }
}