Stop generating custom collector code
It works! My only gripe is that this new API loses the ability to use an embedded database (say, a hashmap) to read the metadata since it requires static lifetimes everywhere. I'm focusing on bytes fast fields, so requiring ownership is fine for now. It would be nice to implement `Featurefor &Input` and use the same collector code tho, so we'll see...
- Id
- df04d7385144599c86563cafa94f60956e65e579
- Author
- Caio
- Commit time
- 2020-02-04T17:17:05+01:00
Modified cantine_derive/tests/basic.rs
-use std::{collections::HashMap, convert::TryFrom, ops::Range, sync::Arc};
+use std::{
+ collections::HashMap,
+ convert::TryFrom,
+ ops::Range,
+ sync::{Arc, Mutex},
+};
use tantivy::{
query::AllQuery,
Document, Index, SegmentReader,
};
-use cantine_derive::{FilterAndAggregation, RangeStats};
+use cantine_derive::{FeatureCollector, FilterAndAggregation, RangeStats};
#[derive(FilterAndAggregation, Default)]
pub struct Feat {
let index = Index::create_in_ram(builder.build());
- let mut writer = index.writer_with_num_threads(1, 50_000_000)?;
+ let mut writer = index.writer_with_num_threads(1, 3_000_000)?;
let mut db = HashMap::new();
let mut add_feat = |id: u64, feat| {
d: vec![42.0..100.0],
};
- let db = Arc::new(db);
- let collector = FeatCollector::new(query, move |seg_reader: &SegmentReader| {
- let id_reader = seg_reader.fast_fields().u64(id_field).unwrap();
- let db = db.clone();
- move |doc, query, agg| {
- let id = id_reader.get(doc);
- agg.collect(query, db.get(&id).unwrap());
- }
- });
+ let db = Arc::new(Mutex::new(db));
+ let collector =
+ FeatureCollector::<Feat, _, _>::new(query, move |seg_reader: &SegmentReader| {
+ let id_reader = seg_reader.fast_fields().u64(id_field).unwrap();
+ let db = db.clone();
+ move |doc| {
+ let id = id_reader.get(doc);
+ db.lock().unwrap().remove(&id)
+ }
+ });
let reader = index.reader()?;
let searcher = reader.searcher();
Modified cantine_derive/internal/src/lib.rs
let agg_query = make_agg_query(&input);
let agg_result = make_agg_result(&input);
- let collector = make_collector(&input);
+ let collector = impl_collector_traits(&input);
TokenStream::from(quote! {
#filter_query
})
}
-fn make_collector(input: &DeriveInput) -> TokenStream2 {
+fn impl_collector_traits(input: &DeriveInput) -> TokenStream2 {
let meta = &input.ident;
let agg = format_ident!("{}AggregationResult", meta);
let query = format_ident!("{}AggregationQuery", meta);
- let collector = format_ident!("{}Collector", meta);
- let segment_collector = format_ident!("{}SegmentColletor", meta);
-
quote! {
- pub struct #collector<F> {
- agg: #agg,
- query: #query,
- reader_factory: F,
- }
-
- impl<F, R> #collector<F>
- where
- F: 'static + Sync + Fn(&tantivy::SegmentReader) -> R,
- R: 'static + Fn(tantivy::DocId, &#query, &mut #agg),
- {
- pub fn new(query: #query, reader_factory: F) -> Self {
- let agg = <#agg>::from(&query);
- Self {
- agg, query, reader_factory
- }
+ impl cantine_derive::Mergeable for #agg {
+ fn merge_same_size(&mut self, other: &Self) {
+ <#agg>::merge_same_size(self, other);
}
}
- pub struct #segment_collector<F> {
- agg: #agg,
- query: #query,
- reader: F,
- }
+ impl cantine_derive::Feature<#query> for #meta {
+ type Agg = #agg;
- impl<F, R> tantivy::collector::Collector for #collector<F>
- where
- F: 'static + Sync + Fn(&tantivy::SegmentReader) -> R,
- R: 'static + Fn(tantivy::DocId, &#query, &mut #agg),
- {
- type Fruit = #agg;
- type Child = #segment_collector<R>;
-
- fn for_segment(
- &self,
- _segment_id: tantivy::SegmentLocalId,
- segment_reader: &tantivy::SegmentReader,
- ) -> tantivy::Result<Self::Child> {
- Ok(#segment_collector {
- agg: self.agg.clone(),
- query: self.query.clone(),
- reader: (self.reader_factory)(segment_reader),
- })
- }
-
- fn requires_scoring(&self) -> bool {
- false
- }
-
- fn merge_fruits(&self, fruits: Vec<Self::Fruit>) -> tantivy::Result<Self::Fruit> {
- let mut iter = fruits.into_iter();
-
- let mut first = iter.next().expect("Always at least one fruit");
-
- for fruit in iter {
- first.merge_same_size(&fruit);
- }
-
- Ok(first)
+ fn collect_into(&self, query: &#query, agg: &mut #agg) {
+ agg.collect(&query, &self);
}
}
- impl<F> tantivy::collector::SegmentCollector for #segment_collector<F>
- where
- F: 'static + Fn(tantivy::DocId, &#query, &mut #agg),
- {
- type Fruit = #agg;
+ impl cantine_derive::Feature<#query> for &#meta {
+ type Agg = #agg;
- fn collect(&mut self, doc: tantivy::DocId, _score: tantivy::Score) {
- (self.reader)(doc, &self.query, &mut self.agg);
- }
-
- fn harvest(self) -> Self::Fruit {
- self.agg
+ fn collect_into(&self, query: &#query, agg: &mut #agg) {
+ agg.collect(&query, &self);
}
}
}