From 480763db0d670ed9b8ac40be6ab9515bf13dee51 Mon Sep 17 00:00:00 2001 From: PSeitz Date: Wed, 16 Aug 2023 18:19:42 +0200 Subject: [PATCH] track memory arena memory usage (#2148) --- examples/warmer.rs | 2 +- src/aggregation/bucket/term_agg.rs | 1 + src/collector/filter_collector_wrapper.rs | 4 ++-- src/collector/tests.rs | 2 +- src/collector/top_score_collector.rs | 16 ++++++++-------- src/core/index.rs | 2 +- src/core/tests.rs | 2 +- src/query/range_query/range_query.rs | 2 +- src/snippet/mod.rs | 2 +- stacker/src/arena_hashmap.rs | 2 +- 10 files changed, 18 insertions(+), 17 deletions(-) diff --git a/examples/warmer.rs b/examples/warmer.rs index 0574b337a5..225eda6c52 100644 --- a/examples/warmer.rs +++ b/examples/warmer.rs @@ -143,7 +143,7 @@ fn main() -> tantivy::Result<()> { const SNEAKERS: ProductId = 23222; let index = Index::create_in_ram(schema); - let mut writer = index.writer_with_num_threads(1, 10_000_000)?; + let mut writer = index.writer_with_num_threads(1, 15_000_000)?; writer.add_document(doc!(product_id=>OLIVE_OIL, text=>"cooking olive oil from greece"))?; writer.add_document(doc!(product_id=>GLOVES, text=>"kitchen gloves, perfect for cooking"))?; writer.add_document(doc!(product_id=>SNEAKERS, text=>"uber sweet sneakers"))?; diff --git a/src/aggregation/bucket/term_agg.rs b/src/aggregation/bucket/term_agg.rs index 79db14c971..66db88f61e 100644 --- a/src/aggregation/bucket/term_agg.rs +++ b/src/aggregation/bucket/term_agg.rs @@ -1269,6 +1269,7 @@ mod tests { ]; let index = get_test_index_from_terms(false, &terms_per_segment)?; + assert_eq!(index.searchable_segments().unwrap().len(), 2); let agg_req: Aggregations = serde_json::from_value(json!({ "my_texts": { diff --git a/src/collector/filter_collector_wrapper.rs b/src/collector/filter_collector_wrapper.rs index a961c4edc1..3eb3849347 100644 --- a/src/collector/filter_collector_wrapper.rs +++ b/src/collector/filter_collector_wrapper.rs @@ -38,7 +38,7 @@ use crate::{DocId, Score, SegmentReader, TantivyError}; /// let schema = schema_builder.build(); /// let index = Index::create_in_ram(schema); /// -/// let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?; +/// let mut index_writer = index.writer_with_num_threads(1, 20_000_000)?; /// index_writer.add_document(doc!(title => "The Name of the Wind", price => 30_200u64))?; /// index_writer.add_document(doc!(title => "The Diary of Muadib", price => 29_240u64))?; /// index_writer.add_document(doc!(title => "A Dairy Cow", price => 21_240u64))?; @@ -216,7 +216,7 @@ where /// let schema = schema_builder.build(); /// let index = Index::create_in_ram(schema); /// -/// let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?; +/// let mut index_writer = index.writer_with_num_threads(1, 20_000_000)?; /// index_writer.add_document(doc!(title => "The Name of the Wind", barcode => &b"010101"[..]))?; /// index_writer.add_document(doc!(title => "The Diary of Muadib", barcode => &b"110011"[..]))?; /// index_writer.add_document(doc!(title => "A Dairy Cow", barcode => &b"110111"[..]))?; diff --git a/src/collector/tests.rs b/src/collector/tests.rs index b545413531..702657e779 100644 --- a/src/collector/tests.rs +++ b/src/collector/tests.rs @@ -26,7 +26,7 @@ pub fn test_filter_collector() -> crate::Result<()> { let schema = schema_builder.build(); let index = Index::create_in_ram(schema); - let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?; + let mut index_writer = index.writer_with_num_threads(1, 20_000_000)?; index_writer.add_document(doc!(title => "The Name of the Wind", price => 30_200u64, date => DateTime::from_utc(OffsetDateTime::parse("1898-04-09T00:00:00+00:00", &Rfc3339).unwrap())))?; index_writer.add_document(doc!(title => "The Diary of Muadib", price => 29_240u64, date => DateTime::from_utc(OffsetDateTime::parse("2020-04-09T00:00:00+00:00", &Rfc3339).unwrap())))?; index_writer.add_document(doc!(title => "The Diary of Anne Frank", price => 18_240u64, date => DateTime::from_utc(OffsetDateTime::parse("2019-04-20T00:00:00+00:00", &Rfc3339).unwrap())))?; diff --git a/src/collector/top_score_collector.rs b/src/collector/top_score_collector.rs index 95f3161e5d..23152ddebc 100644 --- a/src/collector/top_score_collector.rs +++ b/src/collector/top_score_collector.rs @@ -105,7 +105,7 @@ where /// let schema = schema_builder.build(); /// let index = Index::create_in_ram(schema); /// -/// let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?; +/// let mut index_writer = index.writer_with_num_threads(1, 20_000_000)?; /// index_writer.add_document(doc!(title => "The Name of the Wind"))?; /// index_writer.add_document(doc!(title => "The Diary of Muadib"))?; /// index_writer.add_document(doc!(title => "A Dairy Cow"))?; @@ -210,7 +210,7 @@ impl TopDocs { /// let schema = schema_builder.build(); /// let index = Index::create_in_ram(schema); /// - /// let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?; + /// let mut index_writer = index.writer_with_num_threads(1, 20_000_000)?; /// index_writer.add_document(doc!(title => "The Name of the Wind"))?; /// index_writer.add_document(doc!(title => "The Diary of Muadib"))?; /// index_writer.add_document(doc!(title => "A Dairy Cow"))?; @@ -261,7 +261,7 @@ impl TopDocs { /// # let schema = schema_builder.build(); /// # /// # let index = Index::create_in_ram(schema); - /// # let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?; + /// # let mut index_writer = index.writer_with_num_threads(1, 20_000_000)?; /// # index_writer.add_document(doc!(title => "The Name of the Wind", rating => 92u64))?; /// # index_writer.add_document(doc!(title => "The Diary of Muadib", rating => 97u64))?; /// # index_writer.add_document(doc!(title => "A Dairy Cow", rating => 63u64))?; @@ -349,7 +349,7 @@ impl TopDocs { /// # let schema = schema_builder.build(); /// # /// # let index = Index::create_in_ram(schema); - /// # let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?; + /// # let mut index_writer = index.writer_with_num_threads(1, 20_000_000)?; /// # index_writer.add_document(doc!(title => "MadCow Inc.", revenue => 92_000_000i64))?; /// # index_writer.add_document(doc!(title => "Zozo Cow KKK", revenue => 119_000_000i64))?; /// # index_writer.add_document(doc!(title => "Declining Cow", revenue => -63_000_000i64))?; @@ -449,7 +449,7 @@ impl TopDocs { /// fn create_index() -> tantivy::Result { /// let schema = create_schema(); /// let index = Index::create_in_ram(schema); - /// let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?; + /// let mut index_writer = index.writer_with_num_threads(1, 20_000_000)?; /// let product_name = index.schema().get_field("product_name").unwrap(); /// let popularity: Field = index.schema().get_field("popularity").unwrap(); /// index_writer.add_document(doc!(product_name => "The Diary of Muadib", popularity => 1u64))?; @@ -556,7 +556,7 @@ impl TopDocs { /// # fn main() -> tantivy::Result<()> { /// # let schema = create_schema(); /// # let index = Index::create_in_ram(schema); - /// # let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?; + /// # let mut index_writer = index.writer_with_num_threads(1, 20_000_000)?; /// # let product_name = index.schema().get_field("product_name").unwrap(); /// # /// let popularity: Field = index.schema().get_field("popularity").unwrap(); @@ -752,7 +752,7 @@ mod tests { let schema = schema_builder.build(); let index = Index::create_in_ram(schema); // writing the segment - let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?; + let mut index_writer = index.writer_with_num_threads(1, 20_000_000)?; index_writer.add_document(doc!(text_field=>"Hello happy tax payer."))?; index_writer.add_document(doc!(text_field=>"Droopy says hello happy tax payer"))?; index_writer.add_document(doc!(text_field=>"I like Droopy"))?; @@ -1122,7 +1122,7 @@ mod tests { mut doc_adder: impl FnMut(&mut IndexWriter), ) -> (Index, Box) { let index = Index::create_in_ram(schema); - let mut index_writer = index.writer_with_num_threads(1, 10_000_000).unwrap(); + let mut index_writer = index.writer_with_num_threads(1, 15_000_000).unwrap(); doc_adder(&mut index_writer); index_writer.commit().unwrap(); let query_parser = QueryParser::for_index(&index, vec![query_field]); diff --git a/src/core/index.rs b/src/core/index.rs index dcc49b9022..27af48d43c 100644 --- a/src/core/index.rs +++ b/src/core/index.rs @@ -565,7 +565,7 @@ impl Index { /// Using a single thread gives us a deterministic allocation of DocId. #[cfg(test)] pub fn writer_for_tests(&self) -> crate::Result { - self.writer_with_num_threads(1, 10_000_000) + self.writer_with_num_threads(1, 15_000_000) } /// Creates a multithreaded writer diff --git a/src/core/tests.rs b/src/core/tests.rs index d8af5e7d0d..aaa8628bd0 100644 --- a/src/core/tests.rs +++ b/src/core/tests.rs @@ -283,7 +283,7 @@ fn test_single_segment_index_writer() -> crate::Result<()> { let directory = RamDirectory::default(); let mut single_segment_index_writer = Index::builder() .schema(schema) - .single_segment_index_writer(directory, 10_000_000)?; + .single_segment_index_writer(directory, 15_000_000)?; for _ in 0..10 { let doc = doc!(text_field=>"hello"); single_segment_index_writer.add_document(doc)?; diff --git a/src/query/range_query/range_query.rs b/src/query/range_query/range_query.rs index 71b62ae8f5..242ea023a9 100644 --- a/src/query/range_query/range_query.rs +++ b/src/query/range_query/range_query.rs @@ -48,7 +48,7 @@ use crate::{DateTime, DocId, Score}; /// let schema = schema_builder.build(); /// /// let index = Index::create_in_ram(schema); -/// let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?; +/// let mut index_writer = index.writer_with_num_threads(1, 20_000_000)?; /// for year in 1950u64..2017u64 { /// let num_docs_within_year = 10 + (year - 1950) * (year - 1950); /// for _ in 0..num_docs_within_year { diff --git a/src/snippet/mod.rs b/src/snippet/mod.rs index 68d6bc6ee5..e99028b1df 100644 --- a/src/snippet/mod.rs +++ b/src/snippet/mod.rs @@ -262,7 +262,7 @@ fn is_sorted(mut it: impl Iterator) -> bool { /// # let text_field = schema_builder.add_text_field("text", TEXT); /// # let schema = schema_builder.build(); /// # let index = Index::create_in_ram(schema); -/// # let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?; +/// # let mut index_writer = index.writer_with_num_threads(1, 20_000_000)?; /// # let doc = doc!(text_field => r#"Comme je descendais des Fleuves impassibles, /// # Je ne me sentis plus guidé par les haleurs : /// # Des Peaux-Rouges criards les avaient pris pour cibles, diff --git a/stacker/src/arena_hashmap.rs b/stacker/src/arena_hashmap.rs index 254be2de3d..931ae613b7 100644 --- a/stacker/src/arena_hashmap.rs +++ b/stacker/src/arena_hashmap.rs @@ -164,7 +164,7 @@ impl ArenaHashMap { #[inline] pub fn mem_usage(&self) -> usize { - self.table.len() * mem::size_of::() + self.table.len() * mem::size_of::() + self.memory_arena.mem_usage() } #[inline]