From 6335df06344066b8942c2bdfcfee837abfe0da3d Mon Sep 17 00:00:00 2001 From: Charlie Swanson Date: Mon, 12 Sep 2016 15:32:31 -0400 Subject: [PATCH 1/5] PERF-714 Add a regression test for SERVER-25950. Adds a test for a pipeline requiring only one field out of many. --- testcases/pipelines.js | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/testcases/pipelines.js b/testcases/pipelines.js index b98cfe83..4c37b22a 100644 --- a/testcases/pipelines.js +++ b/testcases/pipelines.js @@ -208,6 +208,18 @@ tests.push(testCaseGenerator({ pipeline: [{$group: {_id: "$_idMod10", avg: {$avg: "$_id"}}}] })); +tests.push(testCaseGenerator({ + name: "Group.OneFieldReferencedOutOfMany", + docGenerator: function basicGroupDocGenerator(i) { + var doc = {_id: i, _idMod10: i % 10}; + for (var j = 0; j < 100; j++) { + doc["field" + j] = i; + } + return doc; + }, + pipeline: [{$group: {_id: "$_idMod10"}}] +})); + tests.push(testCaseGenerator({ name: "Limit", nDocs: 500, From ffdff4f131f00b4891bb663806aa8dc97c3e2fec Mon Sep 17 00:00:00 2001 From: David Storch Date: Fri, 7 Oct 2016 17:21:20 -0400 Subject: [PATCH 2/5] PERF-754 add benchmarks for collation --- testcases/simple_insert.js | 46 +++++++++++++ testcases/simple_query.js | 132 +++++++++++++++++++++++++++++++++++++ 2 files changed, 178 insertions(+) diff --git a/testcases/simple_insert.js b/testcases/simple_insert.js index 1ca4c4b6..29e31847 100644 --- a/testcases/simple_insert.js +++ b/testcases/simple_insert.js @@ -213,3 +213,49 @@ tests.push( { name: "Insert.JustNumIndexed", { x: { "#SEQ_INT": { seq_id: 0, start: 0, step: 1, unique: true } } } } ] } ); + +/* + * Setup: Create an empty collection with a simple default collation and index field 'a'. + * + * Test: Repeatedly insert an indexed 10 character string. + */ +tests.push( { name: "InsertIndexedStringsSimpleCollation", + tags: ['insert','indexed','regression','collation'], + pre: function( collection ) { + var testDB = collection.getDB(); + var collName = collection.getName(); + collection.drop(); + testDB.createCollection(collName, { collation: { locale: "simple" } } ); + collection.ensureIndex( { a: 1 } ); + }, + ops: [ + { op: "insert", doc: { a: { "#RAND_STRING": [10] } } } + ] } ); + +/* + * Setup: Create an empty collection with a non-simple default collation and index field 'a'. We set + * several collation options in an attempt to make the collation processing in ICU more expensive. + * + * Test: Repeatedly insert an indexed 10 character string. + * + * Comparing this test against InsertIndexedStringsSimpleCollation should indicate the overhead + * associated with generating index keys for an index with a non-simple collation. + */ +tests.push( { name: "InsertIndexedStringsNonSimpleCollation", + tags: ['insert','indexed','regression','collation'], + pre: function( collection ) { + var testDB = collection.getDB(); + var collName = collection.getName(); + collection.drop(); + var myCollation = { + locale : "en", + strength : 5, + backwards : true, + normalization : true, + }; + testDB.createCollection(collName, { collation: myCollation } ); + collection.ensureIndex( { a: 1 } ); + }, + ops: [ + { op: "insert", doc: { a: { "#RAND_STRING": [10] } } } + ] } ); diff --git a/testcases/simple_query.js b/testcases/simple_query.js index ceb49c84..6b63d61a 100644 --- a/testcases/simple_query.js +++ b/testcases/simple_query.js @@ -172,6 +172,138 @@ tests.push( { name: "Queries.TwoInts", } ] } ); +/* + * Setup: Create a collection with a non-simple default collation, and insert indexed strings. We + * set several collation options in an attempt to make the collation processing in ICU more + * expensive. + * + * Test: Query for a range of strings using the non-simple default collation. + */ +tests.push( { name: "Queries.StringRangeWithNonSimpleCollation", + tags: ['query','indexed','collation'], + pre: function( collection ) { + var testDB = collection.getDB(); + var collName = collection.getName(); + collection.drop(); + var myCollation = { + locale : "en", + strength : 5, + backwards : true, + normalization : true, + }; + testDB.createCollection(collName, { collation: myCollation } ); + var docs = []; + for ( var i = 0; i < 4800; i++ ) { + var j = i + (1 * 1000 * 1000 * 1000); + docs.push( { x : j.toString() } ); + } + collection.insert(docs); + collection.getDB().getLastError(); + collection.ensureIndex( { x : 1 } ); + }, + ops : [ + { op: "find", query: { x: { $gte: "1000002400", $lt: "1000002404" } } } + ] } ); + +/* + * Setup: Create a collection and insert indexed strings. + * + * Test: Query for a range of strings using the simple collation. + * + * Comparing this test against StringRangeWithNonSimpleCollation is useful for determining the + * performance impact of queries with non-simple collations whose string comparison predicates are + * indexed. + */ +tests.push( { name: "Queries.StringRangeWithSimpleCollation", + tags: ['query','indexed','collation'], + pre: function( collection ) { + var testDB = collection.getDB(); + var collName = collection.getName(); + collection.drop(); + testDB.createCollection(collName, { collation: { locale: "simple" } } ); + var docs = []; + for ( var i = 0; i < 4800; i++ ) { + var j = i + (1 * 1000 * 1000 * 1000); + docs.push( { x : j.toString() } ); + } + collection.insert(docs); + collection.getDB().getLastError(); + collection.ensureIndex( { x : 1 } ); + }, + ops : [ + { op: "find", query: { x: { $gte: "1000002400", $lt: "1000002404" } } } + ] } ); + +/* + * Setup: Create a collection with a non-simple default collation and insert a small number of + * documents with strings. We set several collation options in an attempt to make the collation + * processing in ICU more expensive. + * + * Test: Issue queries that must perform a collection scan, filtering the documents with an $in + * predicate. Request a sort which the query system must satisfy by sorting the documents in memory + * according to the collation. + */ +tests.push( { name: "Queries.StringUnindexedInPredWithNonSimpleCollation", + tags: ['query','regression','collation'], + pre: function( collection ) { + var testDB = collection.getDB(); + var collName = collection.getName(); + collection.drop(); + var myCollation = { + locale : "en", + strength : 5, + backwards : true, + normalization : true, + }; + testDB.createCollection(collName, { collation: myCollation } ); + var docs = []; + for ( var i = 0; i < 10; i++ ) { + docs.push( { x : i.toString() } ); + } + collection.insert(docs); + collection.getDB().getLastError(); + }, + ops : [ + { op: "find", + query: { + $query: { x: { $in: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"] } }, + $orderby: { x: 1 }, + } } + ] } ); + +/* + * Setup: Create a collection with the simple default collation and insert a small number of + * documents with strings. + * + * Test: Issue queries that must perform a collection scan, filtering the documents with an $in + * predicate. Request a sort which the query system must satisfy by sorting the documents in memory. + * + * Comparing this test against StringUnidexedInPredWithNonSimpleCollation is useful for determining + * the performance impact of queries with non-simple collations whose string comparison predicates + * are unindexed, in addition to the perf impact of an in-memory SORT stage which uses a collator. + */ +tests.push( { name: "Queries.StringUnindexedInPredWithSimpleCollation", + tags: ['query','regression','collation'], + pre: function( collection ) { + var testDB = collection.getDB(); + var collName = collection.getName(); + collection.drop(); + testDB.createCollection(collName, { collation: { locale: "simple" } } ); + var docs = []; + for ( var i = 0; i < 10; i++ ) { + docs.push( { x : i.toString() } ); + } + collection.insert(docs); + collection.getDB().getLastError(); + }, + ops : [ + { op: "find", + query: { + $query: { x: { $in: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"] } }, + $orderby: { x: 1 }, + } } + ] } ); + // PROJECTION TESTS /* From 11872d8e2787bf0cca58783e93142a53124db2e7 Mon Sep 17 00:00:00 2001 From: RoobinGood Date: Fri, 21 Oct 2016 21:08:57 +0300 Subject: [PATCH 3/5] PERF-776: Pass --host arg to shell call Signed-off-by: dalyd --- benchrun.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/benchrun.py b/benchrun.py index 30673c35..690b42c0 100644 --- a/benchrun.py +++ b/benchrun.py @@ -150,14 +150,16 @@ def main(): args.includeFilter = '%' # Print version info. - call([args.shellpath, "--norc", "--port", args.port, "--eval", - "print('db version: ' + db.version());" + call([args.shellpath, "--norc", + "--host", args.hostname, "--port", args.port, + "--eval", "print('db version: ' + db.version());" " db.serverBuildInfo().gitVersion;"]) print("") - # Open a mongo shell subprocess and load necessary files. - mongo_proc = Popen([args.shellpath, "--norc", "--quiet", "--port", args.port], stdin=PIPE, stdout=PIPE) + mongo_proc = Popen([args.shellpath, "--norc", "--quiet", + "--host", args.hostname, "--port", args.port], + stdin=PIPE, stdout=PIPE) # load test files load_file_in_shell(mongo_proc, 'util/utils.js') From 21015e7bf053c5e6cfd51e24f33d536ffd6f61ad Mon Sep 17 00:00:00 2001 From: James Wahlin Date: Mon, 7 Nov 2016 12:34:09 -0500 Subject: [PATCH 4/5] SERVER-23992 Add $graphLookup tests; Increase $lookup test throughput --- testcases/pipelines.js | 199 +++++++++++++++++++++++++++++++++++------ 1 file changed, 170 insertions(+), 29 deletions(-) diff --git a/testcases/pipelines.js b/testcases/pipelines.js index 4c37b22a..d6cf5350 100644 --- a/testcases/pipelines.js +++ b/testcases/pipelines.js @@ -94,7 +94,7 @@ function testCaseGenerator(options) { pipeline.push({$skip: 1e9}); } return { - tags: options.tags || ["aggregation", "regression"], + tags: ["aggregation", "regression"].concat(options.tags), name: "Aggregation." + options.name, pre: options.pre || populatorGenerator(nDocs, options.indices || [], @@ -226,35 +226,45 @@ tests.push(testCaseGenerator({ pipeline: [{$limit: 250}] })); -// $lookup tests need two collections, so they use their own setup code. +/** + * Data population function used by the 'Lookup' and 'LookupViaGraphLookup' tests. + */ +function basicLookupPopulator(sourceCollection) { + const lookupCollName = sourceCollection.getName() + "_lookup"; + let lookupCollection = sourceCollection.getDB()[lookupCollName]; + const nDocs = 100; + + sourceCollection.drop(); + lookupCollection.drop(); + + let sourceBulk = sourceCollection.initializeUnorderedBulkOp(); + let lookupBulk = lookupCollection.initializeUnorderedBulkOp(); + for (let i = 0; i < nDocs; i++) { + sourceBulk.insert({_id: i, foreignKey: i}); + lookupBulk.insert({_id: i}); + } + sourceBulk.execute(); + lookupBulk.execute(); +} + +/** + * Data cleanup function used by the 'Lookup' and 'LookupViaGraphLookup' tests. + */ +function basicLookupCleanup(sourceCollection) { + const lookupCollName = sourceCollection.getName() + "_lookup"; + let lookupCollection = sourceCollection.getDB()[lookupCollName]; + sourceCollection.drop(); + lookupCollection.drop(); +} + +// Basic $lookup test. $lookup tests need two collections, so they use their own setup code. tests.push(testCaseGenerator({ name: "Lookup", // The setup function is only given one collection, but $lookup needs two. We'll treat the given // one as the source collection, and create a second one with the name of the first plus // '_lookup', which we'll use to look up from. - pre: function lookupPopulator(sourceCollection) { - var lookupCollName = sourceCollection.getName() + "_lookup"; - var lookupCollection = sourceCollection.getDB()[lookupCollName]; - var nDocs = 500; - - sourceCollection.drop(); - lookupCollection.drop(); - - var sourceBulk = sourceCollection.initializeUnorderedBulkOp(); - var lookupBulk = lookupCollection.initializeUnorderedBulkOp(); - for (var i = 0; i < nDocs; i++) { - sourceBulk.insert({_id: i, foreignKey: i}); - lookupBulk.insert({_id: i}); - } - sourceBulk.execute(); - lookupBulk.execute(); - }, - post: function lookupPost(sourceCollection) { - var lookupCollName = sourceCollection.getName() + "_lookup"; - var lookupCollection = sourceCollection.getDB()[lookupCollName]; - sourceCollection.drop(); - lookupCollection.drop(); - }, + pre: basicLookupPopulator, + post: basicLookupCleanup, pipeline: [ { $lookup: { @@ -264,7 +274,27 @@ tests.push(testCaseGenerator({ as: "match" } } - ] + ], + tags: ["lookup"] +})); + +// Mimics the basic 'Lookup' test using $graphLookup for comparison. +tests.push(testCaseGenerator({ + name: "LookupViaGraphLookup", + pre: basicLookupPopulator, + post: basicLookupCleanup, + pipeline: [ + { + $graphLookup: { + from: "#B_COLL_lookup", + startWith: "$foreignKey", + connectFromField: "foreignKey", + connectToField: "_id", + as: "match" + } + } + ], + tags: ["lookup"] })); tests.push(testCaseGenerator({ @@ -275,7 +305,7 @@ tests.push(testCaseGenerator({ pre: function lookupPopulator(ordersCollection) { var productCollName = ordersCollection.getName() + "_lookup"; var productsCollection = ordersCollection.getDB()[productCollName]; - var nDocs = 500; + var nDocs = 20; productsCollection.drop(); ordersCollection.drop(); @@ -289,7 +319,7 @@ tests.push(testCaseGenerator({ productsBulk.insert({_id: i}); // Each order will contain a random number of products in an array. - var nProducts = Random.randInt(100); + var nProducts = Random.randInt(10); var products = []; for (var p = 0; p < nProducts; p++) { products.push({_id: Random.randInt(nDocs), quantity: Random.randInt(20)}); @@ -322,7 +352,118 @@ tests.push(testCaseGenerator({ as: "product" } } - ] + ], + tags: ["lookup"] +})); + +tests.push(testCaseGenerator({ + name: "GraphLookupSocialite", + pre: function socialitePopulator(userCollection) { + const followerCollName = userCollection.getName() + "_follower"; + let followerCollection = userCollection.getDB()[followerCollName]; + + userCollection.drop(); + followerCollection.drop(); + + const userDocs = [ + {_id: "djw", fullname: "Darren", country: "Australia"}, + {_id: "bmw", fullname: "Bob", country: "Germany"}, + {_id: "jsr", fullname: "Jared", country: "USA"}, + {_id: "ftr", fullname: "Frank", country: "Canada"}, + {_id: "jhw", fullname: "James", country: "USA"}, + {_id: "cxs", fullname: "Charlie", country: "USA"}, + {_id: "sss", fullname: "Stephen", country: "Australia"}, + {_id: "ada", fullname: "Adam", country: "Ireland"}, + {_id: "mar", fullname: "Mark", country: "Ireland"}, + ]; + + let userBulk = userCollection.initializeUnorderedBulkOp(); + userDocs.forEach(function(userDoc) { + userBulk.insert(userDoc); + }); + userBulk.execute(); + + const followers = [ + {_f: "djw", _t: "jsr"}, + {_f: "jsr", _t: "bmw"}, + {_f: "ftr", _t: "bmw"}, + {_f: "jhw", _t: "bmw"}, + {_f: "sss", _t: "jhw"}, + {_f: "cxs", _t: "sss"}, + {_f: "aaa", _t: "cxs"}, + {_f: "djw", _t: "cxs"}, + {_f: "djw", _t: "jhw"}, + {_f: "djw", _t: "sss"}, + {_f: "djw", _t: "ftr"}, + {_f: "djw", _t: "bmw"}, + {_f: "ada", _t: "mar"}, + ]; + + let followerBulk = followerCollection.initializeUnorderedBulkOp(); + followers.forEach(function(follower) { + followerBulk.insert(follower); + }); + followerBulk.execute(); + }, + post: function lookupPost(userCollection) { + const followerCollName = userCollection.getName() + "_follower"; + let followerCollection = userCollection.getDB()[followerCollName]; + userCollection.drop(); + followerCollection.drop(); + }, + pipeline: [ + { + $graphLookup: { + from: "#B_COLL_follower", + startWith: "$_id", + connectFromField: "_t", + connectToField: "_f", + as: "network" + } + }, + {$unwind: "$network"}, + {$project: {_id: "$network._t"}} + ], + tags: ["lookup"] +})); + +tests.push(testCaseGenerator({ + name: "GraphLookupNeighbors", + pre: function neighborPopulator(sourceCollection) { + const neighborCollName = sourceCollection.getName() + "_neighbor"; + let neighborCollection = sourceCollection.getDB()[neighborCollName]; + + sourceCollection.drop(); + neighborCollection.drop(); + + let bulk = neighborCollection.initializeUnorderedBulkOp(); + for (var i = 0; i < 100; i++) { + bulk.insert({_id: i, neighbors: [i - 1, i + 1]}); + } + bulk.execute(); + + sourceCollection.insert({starting: 50}); + }, + post: function lookupPost(sourceCollection) { + const neighborCollName = sourceCollection.getName() + "_follower"; + let neighborCollection = sourceCollection.getDB()[neighborCollName]; + sourceCollection.drop(); + neighborCollection.drop(); + }, + pipeline: [ + { + $graphLookup: { + from: "#B_COLL_neighbor", + startWith: "$starting", + connectFromField: "neighbors", + connectToField: "_id", + maxDepth: 10, + depthField: "distence", + as: "integers" + } + } + ], + tags: ["lookup"] })); tests.push(testCaseGenerator({ From 80570d3f5e3cc12ee8a469e64d025600782448bb Mon Sep 17 00:00:00 2001 From: Charlie Swanson Date: Thu, 10 Nov 2016 13:58:42 -0500 Subject: [PATCH 5/5] PERF-784 Add perf tests for SERVER-22631 --- testcases/pipelines.js | 53 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 49 insertions(+), 4 deletions(-) diff --git a/testcases/pipelines.js b/testcases/pipelines.js index d6cf5350..6458549e 100644 --- a/testcases/pipelines.js +++ b/testcases/pipelines.js @@ -472,10 +472,55 @@ tests.push(testCaseGenerator({ docGenerator: function simpleMatchDocGenerator(i) { return {_id: i}; }, - // Add a $project stage before the $match stage to ensure the $match isn't pushed down to the - // query layer. - pipeline: [{$project: {_id: 0, _idTimes10: {$multiply: ["$_id", 10]}}}, - {$match: {_idTimes10: {$lt: 2500}}}] + // Add a $skip stage before the $match stage to ensure the $match isn't pushed down to the query + // layer. A $skip of 0 will be optimized out, so we need to skip at least one. + pipeline: [{$skip: 1}, {$match: {_idTimes10: {$lt: 250}}}] +})); + +/** + * Makes a document generator which creates a document with 50 fields with the same value, and a + * 'predicate' field set to 0 if 'i' is even and 1 otherwise. + */ +function docGenerator50FieldsOnePredicate(i) { + var doc = {}; + for (var j = 0; j < 50; j++) { + doc["field" + j] = "placeholder kinda big"; + } + doc.predicate = i % 2; + return doc; +} + +tests.push(testCaseGenerator({ + name: "MatchOneFieldFromBigDocument", + nDocs: 1000, + docGenerator: docGenerator50FieldsOnePredicate, + // Add a $skip stage before the $match stage to ensure the $match isn't pushed down to the query + // layer. A $skip of 0 will be optimized out, so we need to skip at least one. + pipeline: [{$skip: 1}, {$match: {predicate: {$eq: 0}}}] +})); + +tests.push(testCaseGenerator({ + name: "MatchManyFieldsFromBigDocument", + nDocs: 1000, + docGenerator: docGenerator50FieldsOnePredicate, + // Add a $skip stage before the $match stage to ensure the $match isn't pushed down to the + // query layer. A $skip of 0 will be optimized out, so we need to skip at least one. + pipeline: [ + {$skip: 1}, + {$match: { + predicate: {$eq: 0}, + // The following are present just to increase the number of fields we need to serialize + // to BSON to perform the match. + field0: {$type: "string"}, + field1: {$type: "string"}, + field2: {$type: "string"}, + field10: {$type: "string"}, + field25: {$type: "string"}, + field40: {$type: "string"}, + field48: {$type: "string"}, + field49: {$type: "string"}, + }} + ] })); tests.push(testCaseGenerator({