Tune MoreLikeThis query building parameters
Filter stop words, use a real threshold for doc frequency instead of a percentage.
- Id
- 428834263d405ad6fb2abf85516e40c77cebdc5e
- Author
- Caio
- Commit time
- 2019-04-13T20:47:22+02:00
Modified src/main/java/co/caio/cerberus/search/SearcherImpl.java
import co.caio.cerberus.model.SearchResult;
import java.io.IOException;
import java.io.StringReader;
-import java.util.OptionalInt;
+import org.apache.lucene.analysis.en.EnglishAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FloatPoint;
import org.apache.lucene.document.IntPoint;
-import org.apache.lucene.document.LongPoint;
import org.apache.lucene.facet.FacetResult;
import org.apache.lucene.facet.FacetsCollector;
import org.apache.lucene.facet.taxonomy.FastTaxonomyFacetCounts;
moreLikeThis = new MoreLikeThis(builder.getIndexReader());
moreLikeThis.setAnalyzer(indexConfiguration.getAnalyzer());
- // Ignore words that occurr in more than 50% of recipes
- moreLikeThis.setMaxDocFreqPct(50);
- // Relevant for docId-based similarity
- moreLikeThis.setFieldNames(new String[] {FULL_RECIPE});
+ moreLikeThis.setStopWords(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET);
+ moreLikeThis.setMaxDocFreq(10000);
}
private static Sort integerSorterWithDefault(String fieldName) {