track memory arena memory usage (#2148)

quickwit-oss · Aug 16, 2023 · 480763d · 480763d
1 parent 62ece86
commit 480763d
Show file tree

Hide file tree

Showing 10 changed files with 18 additions and 17 deletions.
diff --git a/examples/warmer.rs b/examples/warmer.rs
@@ -143,7 +143,7 @@ fn main() -> tantivy::Result<()> {
     const SNEAKERS: ProductId = 23222;
 
     let index = Index::create_in_ram(schema);
-    let mut writer = index.writer_with_num_threads(1, 10_000_000)?;
+    let mut writer = index.writer_with_num_threads(1, 15_000_000)?;
     writer.add_document(doc!(product_id=>OLIVE_OIL, text=>"cooking olive oil from greece"))?;
     writer.add_document(doc!(product_id=>GLOVES, text=>"kitchen gloves, perfect for cooking"))?;
     writer.add_document(doc!(product_id=>SNEAKERS, text=>"uber sweet sneakers"))?;

diff --git a/src/aggregation/bucket/term_agg.rs b/src/aggregation/bucket/term_agg.rs
@@ -1269,6 +1269,7 @@ mod tests {
         ];
 
         let index = get_test_index_from_terms(false, &terms_per_segment)?;
+        assert_eq!(index.searchable_segments().unwrap().len(), 2);
 
         let agg_req: Aggregations = serde_json::from_value(json!({
             "my_texts": {

diff --git a/src/collector/filter_collector_wrapper.rs b/src/collector/filter_collector_wrapper.rs
@@ -38,7 +38,7 @@ use crate::{DocId, Score, SegmentReader, TantivyError};
 /// let schema = schema_builder.build();
 /// let index = Index::create_in_ram(schema);
 ///
-/// let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
+/// let mut index_writer = index.writer_with_num_threads(1, 20_000_000)?;
 /// index_writer.add_document(doc!(title => "The Name of the Wind", price => 30_200u64))?;
 /// index_writer.add_document(doc!(title => "The Diary of Muadib", price => 29_240u64))?;
 /// index_writer.add_document(doc!(title => "A Dairy Cow", price => 21_240u64))?;
@@ -216,7 +216,7 @@ where
 /// let schema = schema_builder.build();
 /// let index = Index::create_in_ram(schema);
 ///
-/// let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
+/// let mut index_writer = index.writer_with_num_threads(1, 20_000_000)?;
 /// index_writer.add_document(doc!(title => "The Name of the Wind", barcode => &b"010101"[..]))?;
 /// index_writer.add_document(doc!(title => "The Diary of Muadib", barcode => &b"110011"[..]))?;
 /// index_writer.add_document(doc!(title => "A Dairy Cow", barcode => &b"110111"[..]))?;

diff --git a/src/collector/tests.rs b/src/collector/tests.rs
@@ -26,7 +26,7 @@ pub fn test_filter_collector() -> crate::Result<()> {
     let schema = schema_builder.build();
     let index = Index::create_in_ram(schema);
 
-    let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
+    let mut index_writer = index.writer_with_num_threads(1, 20_000_000)?;
     index_writer.add_document(doc!(title => "The Name of the Wind", price => 30_200u64, date => DateTime::from_utc(OffsetDateTime::parse("1898-04-09T00:00:00+00:00", &Rfc3339).unwrap())))?;
     index_writer.add_document(doc!(title => "The Diary of Muadib", price => 29_240u64, date => DateTime::from_utc(OffsetDateTime::parse("2020-04-09T00:00:00+00:00", &Rfc3339).unwrap())))?;
     index_writer.add_document(doc!(title => "The Diary of Anne Frank", price => 18_240u64, date => DateTime::from_utc(OffsetDateTime::parse("2019-04-20T00:00:00+00:00", &Rfc3339).unwrap())))?;

diff --git a/src/collector/top_score_collector.rs b/src/collector/top_score_collector.rs
@@ -105,7 +105,7 @@ where
 /// let schema = schema_builder.build();
 /// let index = Index::create_in_ram(schema);
 ///
-/// let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
+/// let mut index_writer = index.writer_with_num_threads(1, 20_000_000)?;
 /// index_writer.add_document(doc!(title => "The Name of the Wind"))?;
 /// index_writer.add_document(doc!(title => "The Diary of Muadib"))?;
 /// index_writer.add_document(doc!(title => "A Dairy Cow"))?;
@@ -210,7 +210,7 @@ impl TopDocs {
     /// let schema = schema_builder.build();
     /// let index = Index::create_in_ram(schema);
     ///
-    /// let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
+    /// let mut index_writer = index.writer_with_num_threads(1, 20_000_000)?;
     /// index_writer.add_document(doc!(title => "The Name of the Wind"))?;
     /// index_writer.add_document(doc!(title => "The Diary of Muadib"))?;
     /// index_writer.add_document(doc!(title => "A Dairy Cow"))?;
@@ -261,7 +261,7 @@ impl TopDocs {
     /// #   let schema = schema_builder.build();
     /// #
     /// #   let index = Index::create_in_ram(schema);
-    /// #   let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
+    /// #   let mut index_writer = index.writer_with_num_threads(1, 20_000_000)?;
     /// #   index_writer.add_document(doc!(title => "The Name of the Wind", rating => 92u64))?;
     /// #   index_writer.add_document(doc!(title => "The Diary of Muadib", rating => 97u64))?;
     /// #   index_writer.add_document(doc!(title => "A Dairy Cow", rating => 63u64))?;
@@ -349,7 +349,7 @@ impl TopDocs {
     /// #   let schema = schema_builder.build();
     /// #
     /// #   let index = Index::create_in_ram(schema);
-    /// #   let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
+    /// #   let mut index_writer = index.writer_with_num_threads(1, 20_000_000)?;
     /// #   index_writer.add_document(doc!(title => "MadCow Inc.", revenue => 92_000_000i64))?;
     /// #   index_writer.add_document(doc!(title => "Zozo Cow KKK", revenue => 119_000_000i64))?;
     /// #   index_writer.add_document(doc!(title => "Declining Cow", revenue => -63_000_000i64))?;
@@ -449,7 +449,7 @@ impl TopDocs {
     /// fn create_index() -> tantivy::Result<Index> {
     ///   let schema = create_schema();
     ///   let index = Index::create_in_ram(schema);
-    ///   let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
+    ///   let mut index_writer = index.writer_with_num_threads(1, 20_000_000)?;
     ///   let product_name = index.schema().get_field("product_name").unwrap();
     ///   let popularity: Field = index.schema().get_field("popularity").unwrap();
     ///   index_writer.add_document(doc!(product_name => "The Diary of Muadib", popularity => 1u64))?;
@@ -556,7 +556,7 @@ impl TopDocs {
     /// # fn main() -> tantivy::Result<()> {
     /// #   let schema = create_schema();
     /// #   let index = Index::create_in_ram(schema);
-    /// #   let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
+    /// #   let mut index_writer = index.writer_with_num_threads(1, 20_000_000)?;
     /// #   let product_name = index.schema().get_field("product_name").unwrap();
     /// #
     /// let popularity: Field = index.schema().get_field("popularity").unwrap();
@@ -752,7 +752,7 @@ mod tests {
         let schema = schema_builder.build();
         let index = Index::create_in_ram(schema);
         // writing the segment
-        let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
+        let mut index_writer = index.writer_with_num_threads(1, 20_000_000)?;
         index_writer.add_document(doc!(text_field=>"Hello happy tax payer."))?;
         index_writer.add_document(doc!(text_field=>"Droopy says hello happy tax payer"))?;
         index_writer.add_document(doc!(text_field=>"I like Droopy"))?;
@@ -1122,7 +1122,7 @@ mod tests {
         mut doc_adder: impl FnMut(&mut IndexWriter),
     ) -> (Index, Box<dyn Query>) {
         let index = Index::create_in_ram(schema);
-        let mut index_writer = index.writer_with_num_threads(1, 10_000_000).unwrap();
+        let mut index_writer = index.writer_with_num_threads(1, 15_000_000).unwrap();
         doc_adder(&mut index_writer);
         index_writer.commit().unwrap();
         let query_parser = QueryParser::for_index(&index, vec![query_field]);

diff --git a/src/core/index.rs b/src/core/index.rs
@@ -565,7 +565,7 @@ impl Index {
     /// Using a single thread gives us a deterministic allocation of DocId.
     #[cfg(test)]
     pub fn writer_for_tests(&self) -> crate::Result<IndexWriter> {
-        self.writer_with_num_threads(1, 10_000_000)
+        self.writer_with_num_threads(1, 15_000_000)
     }
 
     /// Creates a multithreaded writer

diff --git a/src/core/tests.rs b/src/core/tests.rs
@@ -283,7 +283,7 @@ fn test_single_segment_index_writer() -> crate::Result<()> {
     let directory = RamDirectory::default();
     let mut single_segment_index_writer = Index::builder()
         .schema(schema)
-        .single_segment_index_writer(directory, 10_000_000)?;
+        .single_segment_index_writer(directory, 15_000_000)?;
     for _ in 0..10 {
         let doc = doc!(text_field=>"hello");
         single_segment_index_writer.add_document(doc)?;

diff --git a/src/query/range_query/range_query.rs b/src/query/range_query/range_query.rs
@@ -48,7 +48,7 @@ use crate::{DateTime, DocId, Score};
 /// let schema = schema_builder.build();
 ///
 /// let index = Index::create_in_ram(schema);
-/// let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
+/// let mut index_writer = index.writer_with_num_threads(1, 20_000_000)?;
 /// for year in 1950u64..2017u64 {
 ///     let num_docs_within_year = 10 + (year - 1950) * (year - 1950);
 ///     for _ in 0..num_docs_within_year {

diff --git a/src/snippet/mod.rs b/src/snippet/mod.rs
@@ -262,7 +262,7 @@ fn is_sorted(mut it: impl Iterator<Item = usize>) -> bool {
 /// #    let text_field = schema_builder.add_text_field("text", TEXT);
 /// #    let schema = schema_builder.build();
 /// #    let index = Index::create_in_ram(schema);
-/// #    let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
+/// #    let mut index_writer = index.writer_with_num_threads(1, 20_000_000)?;
 /// #    let doc = doc!(text_field => r#"Comme je descendais des Fleuves impassibles,
 /// #   Je ne me sentis plus guidé par les haleurs :
 /// #  Des Peaux-Rouges criards les avaient pris pour cibles,

diff --git a/stacker/src/arena_hashmap.rs b/stacker/src/arena_hashmap.rs
@@ -164,7 +164,7 @@ impl ArenaHashMap {
 
     #[inline]
     pub fn mem_usage(&self) -> usize {
-        self.table.len() * mem::size_of::<KeyValue>()
+        self.table.len() * mem::size_of::<KeyValue>() + self.memory_arena.mem_usage()
     }
 
     #[inline]