caio.co/de/cantine

Store floats in a u64 field

I still hit the fast field crash when indexing fast float fieds
in parallel. My guess is that somewhere along the code it assumes
as fast field readers are loaded (maybe prior to the second merging
thread?) but floats are relatively rare in model and then it blows
up.

Anyway, I'll investigate that later - let's get this working first.
Id
8699b095edf5715b11a3aedf61ab4bdfeee0c31a
Author
Caio
Commit time
2019-12-12T08:25:06+01:00

Modified crates/cantine_derive/src/lib.rs

@@ -8,7 +8,6
PathArguments, Type, Visibility,
};

-// TODO split derives
#[proc_macro_derive(FilterAndAggregation)]
pub fn derive_filter_and_agg(input: TokenStream) -> TokenStream {
let input = parse_macro_input!(input as DeriveInput);
@@ -151,17 +150,12
let method = match field_type {
FieldType::UNSIGNED => quote!(add_u64_field),
FieldType::SIGNED => quote!(add_i64_field),
- FieldType::FLOAT => quote!(add_f64_field),
+ // NOTE floats are stored as u64
+ FieldType::FLOAT => quote!(add_u64_field),
};

- match field_type {
- // FIXME tantivy 0.11+
- FieldType::FLOAT => quote_spanned! { field.span()=>
- #name: builder.#method(#quoted, tantivy::schema::INDEXED)
- },
- _ => quote_spanned! { field.span()=>
- #name: builder.#method(#quoted, tantivy::schema::INDEXED | tantivy::schema::FAST)
- },
+ quote_spanned! { field.span()=>
+ #name: builder.#method(#quoted, tantivy::schema::INDEXED | tantivy::schema::FAST)
}
});

@@ -234,8 +228,14
let field_type = get_field_type(&ty);

let convert_code = if is_largest {
- quote_spanned! { field.span()=>
- let value = value;
+ match field_type {
+ // NOTE floats are stored as u64
+ FieldType::FLOAT => quote_spanned! { field.span()=>
+ let value = value.to_bits();
+ },
+ _ => quote_spanned! { field.span()=>
+ let value = value;
+ },
}
} else {
match field_type {
@@ -246,7 +246,7
let value = i64::from(value);
},
FieldType::FLOAT => quote_spanned! { field.span()=>
- let value = f64::from(value);
+ let value = f64::from(value).to_bits();
},
}
};
@@ -254,7 +254,8
let add_code = match field_type {
FieldType::UNSIGNED => quote!(doc.add_u64(self.#name, value);),
FieldType::SIGNED => quote!(doc.add_i64(self.#name, value);),
- FieldType::FLOAT => quote!(doc.add_f64(self.#name, value);),
+ // NOTE floats are stored as u64
+ FieldType::FLOAT => quote!(doc.add_u64(self.#name, value);),
};

if is_optional {
@@ -285,7 +286,6
}

impl std::convert::TryFrom<&tantivy::schema::Schema> for #index_name {
- // TODO better errors
type Error = tantivy::TantivyError;

fn try_from(schema: &tantivy::schema::Schema) -> std::result::Result<Self, Self::Error> {

Modified crates/cantine_derive/tests/basic.rs

@@ -204,10 +204,14

// Set values are filled properly
assert_eq!(Some(&Value::U64(10)), doc.get_first(fields.a));
- assert_eq!(Some(&Value::F64(0.0)), doc.get_first(fields.c));
- assert_eq!(Some(&Value::F64(0.42)), doc.get_first(fields.d));
// Unsed optional values aren't added
assert_eq!(None, doc.get_first(fields.b));
+ // Float values are stored as U64
+ assert_eq!(Some(&Value::U64(0.0f64.to_bits())), doc.get_first(fields.c));
+ assert_eq!(
+ Some(&Value::U64(0.42f64.to_bits())),
+ doc.get_first(fields.d)
+ );
}

#[test]