PBM fix for the tests with external backup #169
GitHub Actions / JUnit Test Report
failed
Sep 18, 2024 in 0s
31 tests run, 20 passed, 9 skipped, 2 failed.
Annotations
Check failure on line 86 in pbm-functional/pytest/test_PBM-1297.py
github-actions / JUnit Test Report
test_PBM-1297.test_logical_pitr_PBM_T253
AssertionError: Starting restore 2024-09-18T08:46:36.896473457Z to point-in-time 2024-09-18T08:45:01 from '2024-09-18T08:43:38Z'...Started logical restore.
Waiting to finish...Error: operation failed with: reply oplog: replay chunk 1726649022.1726649100: apply oplog for chunk: applying a transaction entry: apply txn: {
"Timestamp": {
"T": 1726649070,
"I": 17
},
"Term": 1,
"Hash": null,
"Version": 2,
"Operation": "c",
"Namespace": "admin.$cmd",
"Object": [
{
"Key": "applyOps",
"Value": [
[
{
"Key": "op",
"Value": "c"
},
{
"Key": "ns",
"Value": "config.$cmd"
},
{
"Key": "ui",
"Value": {
"Subtype": 4,
"Data": "N7GDc/KIQISjbBE8Q1CJBw=="
}
},
{
"Key": "o",
"Value": [
{
"Key": "create",
"Value": "databases"
},
{
"Key": "idIndex",
"Value": [
{
"Key": "v",
"Value": 2
},
{
"Key": "key",
"Value": [
{
"Key": "_id",
"Value": 1
}
]
},
{
"Key": "name",
"Value": "_id_"
}
]
}
]
}
],
[
{
"Key": "op",
"Value": "i"
},
{
"Key": "ns",
"Value": "config.databases"
},
{
"Key": "ui",
"Value": {
"Subtype": 4,
"Data": "N7GDc/KIQISjbBE8Q1CJBw=="
}
},
{
"Key": "o",
"Value": [
{
"Key": "_id",
"Value": "test"
},
{
"Key": "primary",
"Value": "rs1"
},
{
"Key": "partitioned",
"Value": false
},
{
"Key": "version",
"Value": [
{
"Key": "uuid",
"Value": {
"Subtype": 4,
"Data": "BEPcxUUtR7O6HFM1U6/GpA=="
}
},
{
"Key": "timestamp",
"Value": {
"T": 1726649070,
"I": 15
}
},
{
"Key": "lastMod",
"Value": 1
}
]
}
]
},
{
"Key": "o2",
"Value": [
{
"Key": "_id",
"Value": "test"
}
]
}
],
[
{
"Key": "op",
"Value": "i"
},
{
"Key": "ns",
"Value": "config.placementHistory"
},
{
"Key": "ui",
"Value": {
"Subtype": 4,
"Data": "2NI40zRCT7a4NDS9E/IfEQ=="
}
},
{
"Key": "o",
"Value": [
{
"Key": "_id",
"Value": "66ea92eec79fd7809e95662f"
},
{
"Key": "nss",
"Value": "test"
},
{
"Key": "timestamp",
"Value": {
"T": 1726649070,
"I": 15
}
},
{
"Key": "shards",
"Value": [
"rs1"
]
}
]
},
{
"Key": "o2",
"Value": [
{
"Key": "_id",
"Value": "66ea92eec79fd7809e95662f"
}
]
}
]
]
}
],
"Query": null,
"UI": null,
"LSID": "ZgAAAAVpZAAQAAAABPDK6NKC+kkirn44i87XWO8FdWlkACAAAAAAY5mrDaxi8gv8RmdTsQ+1j7fmkr7JUsabhNmXAheU0fgFdHhuVVVJRAAQAAAABCtaeCb+Tkd2iTpSwpLP+SkA",
"TxnNumber": 0,
"PrevOpTime": "HAAAABF0cwAAAAAAAAAAABJ0AP//////////AA==",
"MultiOpType": null
}: applying transaction op: op: {"Timestamp":{"T":1726649070,"I":17},"Term":1,"Hash":null,"Version":0,"Operation":"i","Namespace":"config.databases","Object":[{"Key":"_id","Value":"test"},{"Key":"primary","Value":"rs1"},{"Key":"partitioned","Value":false},{"Key":"version","Value":[{"Key":"uuid","Value":{"Subtype":4,"Data":"BEPcxUUtR7O6HFM1U6/GpA=="}},{"Key":"timestamp","Value":{"T":1726649070,"I":15}},{"Key":"lastMod","Value":1}]}],"Query":[{"Key":"_id","Value":"test"}],"UI":{"Subtype":4,"Data":"N7GDc/KIQISjbBE8Q1CJBw=="},"LSID":null,"TxnNumber":null,"PrevOpTime":null,"MultiOpType":null} | merr <nil>: applyOps: (UnknownError) applyOps had one or more errors applying ops
Raw output
start_cluster = True, cluster = <cluster.Cluster object at 0x7f73af4f4090>
newcluster = <cluster.Cluster object at 0x7f73b05a6cd0>
@pytest.mark.timeout(600, func_only=True)
def test_logical_pitr_PBM_T253(start_cluster,cluster,newcluster):
cluster.check_pbm_status()
cluster.make_backup("logical")
cluster.enable_pitr(pitr_extra_args="--set pitr.oplogSpanMin=0.5")
#Create the first database during oplog slicing
client=pymongo.MongoClient(cluster.connection)
client.admin.command("enableSharding", "test")
client.admin.command("shardCollection", "test.test", key={"_id": "hashed"})
for i in range(100):
pymongo.MongoClient(cluster.connection)["test"]["test"].insert_one({"doc":i})
time.sleep(30)
pitr = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S")
backup="--time=" + pitr
Cluster.log("Time for PITR is: " + pitr)
time.sleep(30)
cluster.disable_pitr()
time.sleep(10)
cluster.destroy()
newcluster.create()
newcluster.setup_pbm()
time.sleep(10)
newcluster.check_pbm_status()
> newcluster.make_restore(backup,check_pbm_status=True)
test_PBM-1297.py:86:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <cluster.Cluster object at 0x7f73b05a6cd0>
name = '--time=2024-09-18T08:45:01', kwargs = {'check_pbm_status': True}
client = MongoClient(host=['newmongos:27017'], document_class=dict, tz_aware=False, connect=True)
result = CommandResult(backend=<testinfra.backend.docker.DockerBackend object at 0x7f73b062c150>, exit_status=1, command=b'time...MultiOpType":null} | merr <nil>: applyOps: (UnknownError) applyOps had one or more errors applying ops\n', _stderr=b'')
n = <testinfra.host.Host docker://newrscfg01>, timeout = 240, error = ''
host = 'newrscfg01', container = <Container: 76580bffe185>
def make_restore(self, name, **kwargs):
if self.layout == "sharded":
client = pymongo.MongoClient(self.connection)
result = client.admin.command("balancerStop")
client.close()
Cluster.log("Stopping balancer: " + str(result))
self.stop_mongos()
self.stop_arbiters()
n = testinfra.get_host("docker://" + self.pbm_cli)
timeout = time.time() + 60
while True:
if not self.get_status()['running']:
break
if time.time() > timeout:
assert False, "Cannot start restore, another operation running"
time.sleep(1)
Cluster.log("Restore started")
timeout=kwargs.get('timeout', 240)
result = n.run('timeout ' + str(timeout) + ' pbm restore ' + name + ' --wait')
if result.rc == 0:
Cluster.log(result.stdout)
else:
# try to catch possible failures if timeout exceeded
error=''
for host in self.mongod_hosts:
try:
container = docker.from_env().containers.get(host)
get_logs = container.exec_run(
'cat /var/lib/mongo/pbm.restore.log', stderr=False)
if get_logs.exit_code == 0:
Cluster.log(
"!!!!Possible failure on {}, file pbm.restore.log was found:".format(host))
logs = get_logs.output.decode('utf-8')
Cluster.log(logs)
if '"s":"F"' in logs:
error = logs
except docker.errors.APIError:
pass
if error:
assert False, result.stdout + result.stderr + "\n" + error
else:
> assert False, result.stdout + result.stderr
E AssertionError: Starting restore 2024-09-18T08:46:36.896473457Z to point-in-time 2024-09-18T08:45:01 from '2024-09-18T08:43:38Z'...Started logical restore.
E Waiting to finish...Error: operation failed with: reply oplog: replay chunk 1726649022.1726649100: apply oplog for chunk: applying a transaction entry: apply txn: {
E "Timestamp": {
E "T": 1726649070,
E "I": 17
E },
E "Term": 1,
E "Hash": null,
E "Version": 2,
E "Operation": "c",
E "Namespace": "admin.$cmd",
E "Object": [
E {
E "Key": "applyOps",
E "Value": [
E [
E {
E "Key": "op",
E "Value": "c"
E },
E {
E "Key": "ns",
E "Value": "config.$cmd"
E },
E {
E "Key": "ui",
E "Value": {
E "Subtype": 4,
E "Data": "N7GDc/KIQISjbBE8Q1CJBw=="
E }
E },
E {
E "Key": "o",
E "Value": [
E {
E "Key": "create",
E "Value": "databases"
E },
E {
E "Key": "idIndex",
E "Value": [
E {
E "Key": "v",
E "Value": 2
E },
E {
E "Key": "key",
E "Value": [
E {
E "Key": "_id",
E "Value": 1
E }
E ]
E },
E {
E "Key": "name",
E "Value": "_id_"
E }
E ]
E }
E ]
E }
E ],
E [
E {
E "Key": "op",
E "Value": "i"
E },
E {
E "Key": "ns",
E "Value": "config.databases"
E },
E {
E "Key": "ui",
E "Value": {
E "Subtype": 4,
E "Data": "N7GDc/KIQISjbBE8Q1CJBw=="
E }
E },
E {
E "Key": "o",
E "Value": [
E {
E "Key": "_id",
E "Value": "test"
E },
E {
E "Key": "primary",
E "Value": "rs1"
E },
E {
E "Key": "partitioned",
E "Value": false
E },
E {
E "Key": "version",
E "Value": [
E {
E "Key": "uuid",
E "Value": {
E "Subtype": 4,
E "Data": "BEPcxUUtR7O6HFM1U6/GpA=="
E }
E },
E {
E "Key": "timestamp",
E "Value": {
E "T": 1726649070,
E "I": 15
E }
E },
E {
E "Key": "lastMod",
E "Value": 1
E }
E ]
E }
E ]
E },
E {
E "Key": "o2",
E "Value": [
E {
E "Key": "_id",
E "Value": "test"
E }
E ]
E }
E ],
E [
E {
E "Key": "op",
E "Value": "i"
E },
E {
E "Key": "ns",
E "Value": "config.placementHistory"
E },
E {
E "Key": "ui",
E "Value": {
E "Subtype": 4,
E "Data": "2NI40zRCT7a4NDS9E/IfEQ=="
E }
E },
E {
E "Key": "o",
E "Value": [
E {
E "Key": "_id",
E "Value": "66ea92eec79fd7809e95662f"
E },
E {
E "Key": "nss",
E "Value": "test"
E },
E {
E "Key": "timestamp",
E "Value": {
E "T": 1726649070,
E "I": 15
E }
E },
E {
E "Key": "shards",
E "Value": [
E "rs1"
E ]
E }
E ]
E },
E {
E "Key": "o2",
E "Value": [
E {
E "Key": "_id",
E "Value": "66ea92eec79fd7809e95662f"
E }
E ]
E }
E ]
E ]
E }
E ],
E "Query": null,
E "UI": null,
E "LSID": "ZgAAAAVpZAAQAAAABPDK6NKC+kkirn44i87XWO8FdWlkACAAAAAAY5mrDaxi8gv8RmdTsQ+1j7fmkr7JUsabhNmXAheU0fgFdHhuVVVJRAAQAAAABCtaeCb+Tkd2iTpSwpLP+SkA",
E "TxnNumber": 0,
E "PrevOpTime": "HAAAABF0cwAAAAAAAAAAABJ0AP//////////AA==",
E "MultiOpType": null
E }: applying transaction op: op: {"Timestamp":{"T":1726649070,"I":17},"Term":1,"Hash":null,"Version":0,"Operation":"i","Namespace":"config.databases","Object":[{"Key":"_id","Value":"test"},{"Key":"primary","Value":"rs1"},{"Key":"partitioned","Value":false},{"Key":"version","Value":[{"Key":"uuid","Value":{"Subtype":4,"Data":"BEPcxUUtR7O6HFM1U6/GpA=="}},{"Key":"timestamp","Value":{"T":1726649070,"I":15}},{"Key":"lastMod","Value":1}]}],"Query":[{"Key":"_id","Value":"test"}],"UI":{"Subtype":4,"Data":"N7GDc/KIQISjbBE8Q1CJBw=="},"LSID":null,"TxnNumber":null,"PrevOpTime":null,"MultiOpType":null} | merr <nil>: applyOps: (UnknownError) applyOps had one or more errors applying ops
cluster.py:464: AssertionError
Check failure on line 110 in pbm-functional/pytest/test_replicaset.py
github-actions / JUnit Test Report
test_replicaset.test_logical_timeseries_PBM_T224
AssertionError: Starting restore 2024-09-18T09:15:05.128136343Z to point-in-time 2024-09-18T09:14:34 from '2024-09-18T09:13:23Z'...Started logical restore.
Waiting to finish....Error: operation failed with: reply oplog: replay chunk 1726650867.1726650897: apply oplog for chunk: applying an entry: op: {"Timestamp":{"T":1726650868,"I":3},"Term":1,"Hash":null,"Version":2,"Operation":"i","Namespace":"test.system.views","Object":[{"Key":"_id","Value":"test.test2"},{"Key":"viewOn","Value":"system.buckets.test2"},{"Key":"pipeline","Value":[[{"Key":"$_internalUnpackBucket","Value":[{"Key":"timeField","Value":"timestamp"},{"Key":"metaField","Value":"data"},{"Key":"bucketMaxSpanSeconds","Value":3600}]}]]}],"Query":[{"Key":"_id","Value":"test.test2"}],"UI":{"Subtype":4,"Data":"srwFx9/DQvygMZJM94/piQ=="},"LSID":null,"TxnNumber":null,"PrevOpTime":null,"MultiOpType":null} | merr <nil>: applyOps: (UnknownError) applyOps had one or more errors applying ops
Raw output
start_cluster = True, cluster = <cluster.Cluster object at 0x7f73af4e5fd0>
@pytest.mark.timeout(300,func_only=True)
def test_logical_timeseries_PBM_T224(start_cluster,cluster):
cluster.check_pbm_status()
client=pymongo.MongoClient(cluster.connection)
mongod_version=client.server_info()["version"]
if version.parse(mongod_version) < version.parse("5.0.0"):
pytest.skip("Unsupported version for timeseries")
pymongo.MongoClient(cluster.connection)["test"].create_collection('test1',timeseries={'timeField':'timestamp','metaField': 'data'})
for i in range(10):
pymongo.MongoClient(cluster.connection)["test"]["test1"].insert_one({"timestamp": datetime.now(), "data": i})
time.sleep(0.1)
cluster.make_backup("logical")
cluster.enable_pitr(pitr_extra_args="--set pitr.oplogSpanMin=0.5")
time.sleep(30)
#create new timeseries collection
pymongo.MongoClient(cluster.connection)["test"].create_collection('test2',timeseries={'timeField':'timestamp','metaField': 'data'})
for i in range(10):
pymongo.MongoClient(cluster.connection)["test"]["test1"].insert_one({"timestamp": datetime.now(), "data": i})
pymongo.MongoClient(cluster.connection)["test"]["test2"].insert_one({"timestamp": datetime.now(), "data": i})
time.sleep(0.1)
time.sleep(5)
pitr = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S")
backup="--time=" + pitr
Cluster.log("Time for PITR is: " + pitr)
time.sleep(30)
pymongo.MongoClient(cluster.connection).drop_database('test')
> cluster.make_restore(backup,check_pbm_status=True)
test_replicaset.py:110:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <cluster.Cluster object at 0x7f73af4e5fd0>
name = '--time=2024-09-18T09:14:34', kwargs = {'check_pbm_status': True}
result = CommandResult(backend=<testinfra.backend.docker.DockerBackend object at 0x7f73af0e3c10>, exit_status=1, command=b'time...MultiOpType":null} | merr <nil>: applyOps: (UnknownError) applyOps had one or more errors applying ops\n', _stderr=b'')
n = <testinfra.host.Host docker://rs101>, timeout = 240, error = ''
host = 'rs103', container = <Container: 2cfac897bcbb>
def make_restore(self, name, **kwargs):
if self.layout == "sharded":
client = pymongo.MongoClient(self.connection)
result = client.admin.command("balancerStop")
client.close()
Cluster.log("Stopping balancer: " + str(result))
self.stop_mongos()
self.stop_arbiters()
n = testinfra.get_host("docker://" + self.pbm_cli)
timeout = time.time() + 60
while True:
if not self.get_status()['running']:
break
if time.time() > timeout:
assert False, "Cannot start restore, another operation running"
time.sleep(1)
Cluster.log("Restore started")
timeout=kwargs.get('timeout', 240)
result = n.run('timeout ' + str(timeout) + ' pbm restore ' + name + ' --wait')
if result.rc == 0:
Cluster.log(result.stdout)
else:
# try to catch possible failures if timeout exceeded
error=''
for host in self.mongod_hosts:
try:
container = docker.from_env().containers.get(host)
get_logs = container.exec_run(
'cat /var/lib/mongo/pbm.restore.log', stderr=False)
if get_logs.exit_code == 0:
Cluster.log(
"!!!!Possible failure on {}, file pbm.restore.log was found:".format(host))
logs = get_logs.output.decode('utf-8')
Cluster.log(logs)
if '"s":"F"' in logs:
error = logs
except docker.errors.APIError:
pass
if error:
assert False, result.stdout + result.stderr + "\n" + error
else:
> assert False, result.stdout + result.stderr
E AssertionError: Starting restore 2024-09-18T09:15:05.128136343Z to point-in-time 2024-09-18T09:14:34 from '2024-09-18T09:13:23Z'...Started logical restore.
E Waiting to finish....Error: operation failed with: reply oplog: replay chunk 1726650867.1726650897: apply oplog for chunk: applying an entry: op: {"Timestamp":{"T":1726650868,"I":3},"Term":1,"Hash":null,"Version":2,"Operation":"i","Namespace":"test.system.views","Object":[{"Key":"_id","Value":"test.test2"},{"Key":"viewOn","Value":"system.buckets.test2"},{"Key":"pipeline","Value":[[{"Key":"$_internalUnpackBucket","Value":[{"Key":"timeField","Value":"timestamp"},{"Key":"metaField","Value":"data"},{"Key":"bucketMaxSpanSeconds","Value":3600}]}]]}],"Query":[{"Key":"_id","Value":"test.test2"}],"UI":{"Subtype":4,"Data":"srwFx9/DQvygMZJM94/piQ=="},"LSID":null,"TxnNumber":null,"PrevOpTime":null,"MultiOpType":null} | merr <nil>: applyOps: (UnknownError) applyOps had one or more errors applying ops
cluster.py:464: AssertionError
Loading