Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PBM-1296 fix the timestamp for restore #219

Merged
merged 1 commit into from
Sep 17, 2024

PBM-1296 fix the timestamp for restore

ec4c19f
Select commit
Loading
Failed to load commit list.
Sign in for the full log view
Merged

PBM-1296 fix the timestamp for restore #219

PBM-1296 fix the timestamp for restore
ec4c19f
Select commit
Loading
Failed to load commit list.
GitHub Actions / JUnit Test Report failed Sep 17, 2024 in 0s

30 tests run, 20 passed, 9 skipped, 1 failed.

Annotations

Check failure on line 110 in pbm-functional/pytest/test_replicaset.py

See this annotation in the file changed.

@github-actions github-actions / JUnit Test Report

test_replicaset.test_logical_timeseries_PBM_T224

AssertionError: Starting restore 2024-09-17T16:40:03.650694178Z to point-in-time 2024-09-17T16:39:33 from '2024-09-17T16:38:21Z'...Started logical restore.
Waiting to finish....Error: operation failed with: reply oplog: replay chunk 1726591166.1726591196: apply oplog for chunk: applying an entry: op: {"Timestamp":{"T":1726591166,"I":7},"Term":1,"Hash":null,"Version":2,"Operation":"i","Namespace":"test.system.views","Object":[{"Key":"_id","Value":"test.test2"},{"Key":"viewOn","Value":"system.buckets.test2"},{"Key":"pipeline","Value":[[{"Key":"$_internalUnpackBucket","Value":[{"Key":"timeField","Value":"timestamp"},{"Key":"metaField","Value":"data"},{"Key":"bucketMaxSpanSeconds","Value":3600}]}]]}],"Query":[{"Key":"_id","Value":"test.test2"}],"UI":{"Subtype":4,"Data":"L5s63/6aS1+sV2tJ0iNATg=="},"LSID":null,"TxnNumber":null,"PrevOpTime":null,"MultiOpType":null} | merr <nil>: applyOps: (UnknownError) applyOps had one or more errors applying ops
Raw output
start_cluster = True, cluster = <cluster.Cluster object at 0x7f7716d91050>

    @pytest.mark.timeout(300,func_only=True)
    def test_logical_timeseries_PBM_T224(start_cluster,cluster):
        cluster.check_pbm_status()
        client=pymongo.MongoClient(cluster.connection)
        mongod_version=client.server_info()["version"]
        if version.parse(mongod_version) < version.parse("5.0.0"):
            pytest.skip("Unsupported version for timeseries")
        pymongo.MongoClient(cluster.connection)["test"].create_collection('test1',timeseries={'timeField':'timestamp','metaField': 'data'})
        for i in range(10):
            pymongo.MongoClient(cluster.connection)["test"]["test1"].insert_one({"timestamp": datetime.now(), "data": i})
            time.sleep(0.1)
        cluster.make_backup("logical")
        cluster.enable_pitr(pitr_extra_args="--set pitr.oplogSpanMin=0.5")
        time.sleep(30)
        #create new timeseries collection
        pymongo.MongoClient(cluster.connection)["test"].create_collection('test2',timeseries={'timeField':'timestamp','metaField': 'data'})
        for i in range(10):
            pymongo.MongoClient(cluster.connection)["test"]["test1"].insert_one({"timestamp": datetime.now(), "data": i})
            pymongo.MongoClient(cluster.connection)["test"]["test2"].insert_one({"timestamp": datetime.now(), "data": i})
            time.sleep(0.1)
        time.sleep(5)
        pitr = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S")
        backup="--time=" + pitr
        Cluster.log("Time for PITR is: " + pitr)
        time.sleep(30)
        pymongo.MongoClient(cluster.connection).drop_database('test')
>       cluster.make_restore(backup,check_pbm_status=True)

test_replicaset.py:110: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <cluster.Cluster object at 0x7f7716d91050>
name = '--time=2024-09-17T16:39:33', kwargs = {'check_pbm_status': True}
result = CommandResult(backend=<testinfra.backend.docker.DockerBackend object at 0x7f7716e0db90>, exit_status=1, command=b'time...MultiOpType":null} | merr <nil>: applyOps: (UnknownError) applyOps had one or more errors applying ops\n', _stderr=b'')
n = <testinfra.host.Host docker://rs101>, timeout = 240, error = ''
host = 'rs103', container = <Container: 5c95b775e127>

    def make_restore(self, name, **kwargs):
        if self.layout == "sharded":
            client = pymongo.MongoClient(self.connection)
            result = client.admin.command("balancerStop")
            client.close()
            Cluster.log("Stopping balancer: " + str(result))
            self.stop_mongos()
        self.stop_arbiters()
        n = testinfra.get_host("docker://" + self.pbm_cli)
        timeout = time.time() + 60
    
        while True:
            if not self.get_status()['running']:
                break
            if time.time() > timeout:
                assert False, "Cannot start restore, another operation running"
            time.sleep(1)
        Cluster.log("Restore started")
        timeout=kwargs.get('timeout', 240)
        result = n.run('timeout ' + str(timeout) + ' pbm restore ' + name + ' --wait')
    
        if result.rc == 0:
            Cluster.log(result.stdout)
        else:
            # try to catch possible failures if timeout exceeded
            error=''
            for host in self.mongod_hosts:
                try:
                    container = docker.from_env().containers.get(host)
                    get_logs = container.exec_run(
                        'cat /var/lib/mongo/pbm.restore.log', stderr=False)
                    if get_logs.exit_code == 0:
                        Cluster.log(
                            "!!!!Possible failure on {}, file pbm.restore.log was found:".format(host))
                        logs = get_logs.output.decode('utf-8')
                        Cluster.log(logs)
                        if '"s":"F"' in logs:
                            error = logs
                except docker.errors.APIError:
                    pass
            if error:
                assert False, result.stdout + result.stderr + "\n" + error
            else:
>               assert False, result.stdout + result.stderr
E               AssertionError: Starting restore 2024-09-17T16:40:03.650694178Z to point-in-time 2024-09-17T16:39:33 from '2024-09-17T16:38:21Z'...Started logical restore.
E               Waiting to finish....Error: operation failed with: reply oplog: replay chunk 1726591166.1726591196: apply oplog for chunk: applying an entry: op: {"Timestamp":{"T":1726591166,"I":7},"Term":1,"Hash":null,"Version":2,"Operation":"i","Namespace":"test.system.views","Object":[{"Key":"_id","Value":"test.test2"},{"Key":"viewOn","Value":"system.buckets.test2"},{"Key":"pipeline","Value":[[{"Key":"$_internalUnpackBucket","Value":[{"Key":"timeField","Value":"timestamp"},{"Key":"metaField","Value":"data"},{"Key":"bucketMaxSpanSeconds","Value":3600}]}]]}],"Query":[{"Key":"_id","Value":"test.test2"}],"UI":{"Subtype":4,"Data":"L5s63/6aS1+sV2tJ0iNATg=="},"LSID":null,"TxnNumber":null,"PrevOpTime":null,"MultiOpType":null} | merr <nil>: applyOps: (UnknownError) applyOps had one or more errors applying ops

cluster.py:464: AssertionError