From 8ef978b22415dadce1f032a46facf6654206a31d Mon Sep 17 00:00:00 2001 From: Wenqi Mou Date: Thu, 21 Nov 2024 20:21:12 -0500 Subject: [PATCH] initial commit Signed-off-by: Wenqi Mou --- br/pkg/backup/client.go | 3 +- br/pkg/backup/schema.go | 1 + br/pkg/checkpoint/checkpoint_test.go | 6 +- br/pkg/checkpoint/log_restore.go | 30 +- br/pkg/restore/import_mode_switcher.go | 2 +- br/pkg/restore/log_client/BUILD.bazel | 2 + .../log_client/batch_file_processor.go | 134 ++++++ br/pkg/restore/log_client/client.go | 383 +++++++++++------- br/pkg/restore/log_client/client_test.go | 96 ++--- br/pkg/restore/log_client/export_test.go | 6 +- br/pkg/restore/log_client/import.go | 4 +- br/pkg/restore/log_client/log_file_manager.go | 101 ++--- .../log_client/log_file_manager_test.go | 4 +- br/pkg/restore/snap_client/client.go | 23 +- br/pkg/restore/snap_client/client_test.go | 10 +- br/pkg/stream/BUILD.bazel | 8 +- br/pkg/stream/meta_kv_test.go | 12 +- br/pkg/stream/rewrite_meta_rawkv.go | 183 +++++++-- br/pkg/stream/rewrite_meta_rawkv_test.go | 112 ++--- br/pkg/stream/search.go | 9 +- br/pkg/stream/search_test.go | 9 +- br/pkg/stream/stream_metas.go | 3 +- br/pkg/stream/stream_metas_test.go | 1 + br/pkg/stream/stream_mgr.go | 9 + br/pkg/stream/stream_status.go | 7 +- br/pkg/stream/table_history.go | 75 ++++ br/pkg/stream/util.go | 26 -- br/pkg/stream/util_test.go | 48 --- br/pkg/task/backup.go | 3 +- br/pkg/task/common.go | 16 +- br/pkg/task/config_test.go | 2 +- br/pkg/task/restore.go | 321 ++++++++++++--- br/pkg/task/restore_raw.go | 4 +- br/pkg/task/restore_test.go | 2 + br/pkg/task/restore_txn.go | 4 +- br/pkg/task/stream.go | 240 ++++++----- br/pkg/task/stream_test.go | 4 +- br/pkg/utils/BUILD.bazel | 4 + br/pkg/utils/consts/BUILD.bazel | 8 + br/pkg/utils/consts/consts.go | 21 + br/pkg/utils/filter.go | 76 ++++ br/pkg/utils/key.go | 27 ++ br/pkg/utils/key_test.go | 39 ++ br/pkg/utils/schema.go | 10 +- br/tests/br_encryption/run.sh | 35 +- br/tests/br_pitr/run.sh | 37 +- br/tests/br_pitr_failpoint/run.sh | 38 +- br/tests/br_pitr_gc_safepoint/run.sh | 37 +- br/tests/br_pitr_table_filter/run.sh | 305 ++++++++++++++ br/tests/br_restore_checkpoint/run.sh | 38 +- br/tests/br_test_utils.sh | 51 +++ br/tests/br_tiflash_conflict/run.sh | 37 +- br/tests/utils.go | 6 +- pkg/table/tables/bench_test.go | 2 +- tests/_utils/run_services | 2 +- 55 files changed, 1805 insertions(+), 871 deletions(-) create mode 100644 br/pkg/restore/log_client/batch_file_processor.go create mode 100644 br/pkg/stream/table_history.go delete mode 100644 br/pkg/stream/util.go delete mode 100644 br/pkg/stream/util_test.go create mode 100644 br/pkg/utils/consts/BUILD.bazel create mode 100644 br/pkg/utils/consts/consts.go create mode 100644 br/pkg/utils/filter.go create mode 100755 br/tests/br_pitr_table_filter/run.sh create mode 100644 br/tests/br_test_utils.sh diff --git a/br/pkg/backup/client.go b/br/pkg/backup/client.go index 6ad03dd45cebc..a7f17db6fe0b2 100644 --- a/br/pkg/backup/client.go +++ b/br/pkg/backup/client.go @@ -827,7 +827,7 @@ func BuildBackupSchemas( if err != nil { return errors.Trace(err) } - + log.Info("################ listing db", zap.Any("dbs", len(dbs))) for _, dbInfo := range dbs { // skip system databases if !tableFilter.MatchSchema(dbInfo.Name.O) || util.IsMemDB(dbInfo.Name.L) || utils.IsTemplateSysDB(dbInfo.Name) { @@ -929,6 +929,7 @@ func BuildBackupSchemas( } if !hasTable { + log.Info("################ doesn't have table", zap.Any("info", dbInfo)) fn(dbInfo, nil) } } diff --git a/br/pkg/backup/schema.go b/br/pkg/backup/schema.go index ac7bc98258a19..d0cf9baa0f6c5 100644 --- a/br/pkg/backup/schema.go +++ b/br/pkg/backup/schema.go @@ -96,6 +96,7 @@ func (ss *Schemas) BackupSchemas( // because the field of `dbInfo` would be modified, which affects the later iteration. // so copy the `dbInfo` for each to `newDBInfo` newDBInfo := *dbInfo + log.Info("############### backup schema ###############", zap.Any("schema", dbInfo.Name.O)) schema := &schemaInfo{ tableInfo: tableInfo, dbInfo: &newDBInfo, diff --git a/br/pkg/checkpoint/checkpoint_test.go b/br/pkg/checkpoint/checkpoint_test.go index c6756f8058c5c..b6eaed86d48a2 100644 --- a/br/pkg/checkpoint/checkpoint_test.go +++ b/br/pkg/checkpoint/checkpoint_test.go @@ -105,12 +105,12 @@ func TestCheckpointMetaForRestore(t *testing.T) { exists := checkpoint.ExistsCheckpointProgress(ctx, dom) require.False(t, exists) err = checkpoint.SaveCheckpointProgress(ctx, se, &checkpoint.CheckpointProgress{ - Progress: checkpoint.InLogRestoreAndIdMapPersist, + Progress: checkpoint.InLogRestoreAndIdMapPersisted, }) require.NoError(t, err) progress, err := checkpoint.LoadCheckpointProgress(ctx, se.GetSessionCtx().GetRestrictedSQLExecutor()) require.NoError(t, err) - require.Equal(t, checkpoint.InLogRestoreAndIdMapPersist, progress.Progress) + require.Equal(t, checkpoint.InLogRestoreAndIdMapPersisted, progress.Progress) taskInfo, err := checkpoint.TryToGetCheckpointTaskInfo(ctx, s.Mock.Domain, se.GetSessionCtx().GetRestrictedSQLExecutor()) require.NoError(t, err) @@ -120,7 +120,7 @@ func TestCheckpointMetaForRestore(t *testing.T) { require.Equal(t, uint64(333), taskInfo.Metadata.RewriteTS) require.Equal(t, "1.0", taskInfo.Metadata.GcRatio) require.Equal(t, true, taskInfo.HasSnapshotMetadata) - require.Equal(t, checkpoint.InLogRestoreAndIdMapPersist, taskInfo.Progress) + require.Equal(t, checkpoint.InLogRestoreAndIdMapPersisted, taskInfo.Progress) exists = checkpoint.ExistsCheckpointIngestIndexRepairSQLs(ctx, dom) require.False(t, exists) diff --git a/br/pkg/checkpoint/log_restore.go b/br/pkg/checkpoint/log_restore.go index b2ae3c398a3c8..2afcffe36d8d5 100644 --- a/br/pkg/checkpoint/log_restore.go +++ b/br/pkg/checkpoint/log_restore.go @@ -24,6 +24,7 @@ import ( "github.com/pingcap/log" "github.com/pingcap/tidb/br/pkg/glue" "github.com/pingcap/tidb/pkg/domain" + "github.com/pingcap/tidb/pkg/kv" "github.com/pingcap/tidb/pkg/meta/model" pmodel "github.com/pingcap/tidb/pkg/parser/model" "github.com/pingcap/tidb/pkg/util/sqlexec" @@ -120,13 +121,17 @@ func StartCheckpointLogRestoreRunnerForTest( return runner, nil } -// Notice that the session is owned by the checkpoint runner, and it will be also closed by it. func StartCheckpointRunnerForLogRestore( ctx context.Context, - se glue.Session, + g glue.Glue, + store kv.Storage, ) (*CheckpointRunner[LogRestoreKeyType, LogRestoreValueType], error) { + session, err := g.CreateSession(store) + if err != nil { + return nil, errors.Trace(err) + } runner := newCheckpointRunner[LogRestoreKeyType, LogRestoreValueType]( - newTableCheckpointStorage(se, LogRestoreCheckpointDatabaseName), + newTableCheckpointStorage(session, LogRestoreCheckpointDatabaseName), nil, valueMarshalerForLogRestore) // for restore, no need to set lock @@ -205,14 +210,14 @@ func ExistsLogRestoreCheckpointMetadata( TableExists(pmodel.NewCIStr(LogRestoreCheckpointDatabaseName), pmodel.NewCIStr(checkpointMetaTableName)) } -// A progress type for snapshot + log restore. +// RestoreProgress is a progress type for snapshot + log restore. // -// Before the id-maps is persist into external storage, the snapshot restore and -// id-maps constructure can be repeated. So if the progress is in `InSnapshotRestore`, +// Before the id-maps is persisted into external storage, the snapshot restore and +// id-maps building can be retried. So if the progress is in `InSnapshotRestore`, // it can retry from snapshot restore. // -// After the id-maps is persist into external storage, there are some meta-kvs has -// been restored into the cluster, such as `rename ddl`. Where would be a situation: +// After the id-maps is persisted into external storage, there are some meta-kvs has +// been restored into the cluster, such as `rename ddl`. A situation could be: // // the first execution: // @@ -220,7 +225,7 @@ func ExistsLogRestoreCheckpointMetadata( // table A (id 80) --------------> table B (id 80) // ( snapshot restore ) ( log restore ) // -// the second execution if don't skip snasphot restore: +// the second execution if don't skip snapshot restore: // // table A is created again in snapshot restore, because there is no table named A // table A (id 81) --------------> [not in id-maps, so ignored] @@ -232,8 +237,8 @@ type RestoreProgress int const ( InSnapshotRestore RestoreProgress = iota - // Only when the id-maps is persist, status turns into it. - InLogRestoreAndIdMapPersist + // Only when the id-maps is persisted, status turns into it. + InLogRestoreAndIdMapPersisted ) type CheckpointProgress struct { @@ -265,8 +270,7 @@ func ExistsCheckpointProgress( TableExists(pmodel.NewCIStr(LogRestoreCheckpointDatabaseName), pmodel.NewCIStr(checkpointProgressTableName)) } -// CheckpointTaskInfo is unique information within the same cluster id. It represents the last -// restore task executed for this cluster. +// CheckpointTaskInfoForLogRestore is tied to a specific cluster. It represents the last restore task executed this cluster. type CheckpointTaskInfoForLogRestore struct { Metadata *CheckpointMetadataForLogRestore HasSnapshotMetadata bool diff --git a/br/pkg/restore/import_mode_switcher.go b/br/pkg/restore/import_mode_switcher.go index 0ae69f4a6a0af..9a680e3958edd 100644 --- a/br/pkg/restore/import_mode_switcher.go +++ b/br/pkg/restore/import_mode_switcher.go @@ -148,7 +148,7 @@ func (switcher *ImportModeSwitcher) switchToImportMode( }() } -// RestorePreWork executes some prepare work before restore. +// RestorePreWork switches to import mode and removes pd schedulers if needed // TODO make this function returns a restore post work. func RestorePreWork( ctx context.Context, diff --git a/br/pkg/restore/log_client/BUILD.bazel b/br/pkg/restore/log_client/BUILD.bazel index 5975b0726aa1d..c7c90824d3a90 100644 --- a/br/pkg/restore/log_client/BUILD.bazel +++ b/br/pkg/restore/log_client/BUILD.bazel @@ -3,6 +3,7 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") go_library( name = "log_client", srcs = [ + "batch_file_processor.go", "client.go", "import.go", "import_retry.go", @@ -33,6 +34,7 @@ go_library( "//br/pkg/stream", "//br/pkg/summary", "//br/pkg/utils", + "//br/pkg/utils/consts", "//br/pkg/utils/iter", "//br/pkg/version", "//pkg/ddl/util", diff --git a/br/pkg/restore/log_client/batch_file_processor.go b/br/pkg/restore/log_client/batch_file_processor.go new file mode 100644 index 0000000000000..242c939060ff5 --- /dev/null +++ b/br/pkg/restore/log_client/batch_file_processor.go @@ -0,0 +1,134 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package logclient + +import ( + "context" + "encoding/json" + + "github.com/pingcap/errors" + backuppb "github.com/pingcap/kvproto/pkg/brpb" + "github.com/pingcap/log" + "github.com/pingcap/tidb/br/pkg/stream" + "github.com/pingcap/tidb/br/pkg/utils" + "github.com/pingcap/tidb/br/pkg/utils/consts" + "github.com/pingcap/tidb/pkg/meta" + "github.com/pingcap/tidb/pkg/meta/model" + "go.uber.org/zap" +) + +// BatchFileProcessor defines how to process a batch of files +type BatchFileProcessor interface { + // process a batch of files and with a filterTS and return what's not processed for next iteration + processBatch( + ctx context.Context, + files []*backuppb.DataFileInfo, + entries []*KvEntryWithTS, + filterTS uint64, + cf string, + ) ([]*KvEntryWithTS, error) +} + +// RestoreProcessor implements BatchFileProcessor for restoring files +type RestoreProcessor struct { + client *LogClient + schemasReplace *stream.SchemasReplace + updateStats func(kvCount uint64, size uint64) + progressInc func() +} + +func (rp *RestoreProcessor) processBatch( + ctx context.Context, + files []*backuppb.DataFileInfo, + entries []*KvEntryWithTS, + filterTS uint64, + cf string, +) ([]*KvEntryWithTS, error) { + return rp.client.RestoreBatchMetaKVFiles( + ctx, files, rp.schemasReplace, entries, + filterTS, rp.updateStats, rp.progressInc, cf, + ) +} + +// DDLCollector implements BatchFileProcessor for collecting DDL information +// 1. It collects table renaming information. The table rename operation will not change the table id, and the process +// will drop the original table and create a new one with the same table id, so in DDL history there will be two events +// that corresponds to the same table id. +// +// add more logic in future if needed +type DDLCollector struct { + client *LogClient + tableRenameInfo *stream.LogBackupTableHistory +} + +func (dc *DDLCollector) processBatch( + ctx context.Context, + files []*backuppb.DataFileInfo, + entries []*KvEntryWithTS, + filterTS uint64, + cf string, +) ([]*KvEntryWithTS, error) { + // doesn't need to parse writeCF as it contains value like "p\XXXX\XXX" which is meaningless. + // DefaultCF value should contain everything we want for DDL operation + if cf == consts.WriteCF { + return nil, nil + } + + curSortedEntries, filteredEntries, err := dc.client.filterAndSortKvEntriesFromFiles(ctx, files, entries, filterTS) + if err != nil { + return nil, errors.Trace(err) + } + + // process entries to collect table IDs + for _, entry := range curSortedEntries { + value := entry.E.Value + + if utils.IsMetaDBKey(entry.E.Key) { + rawKey, err := stream.ParseTxnMetaKeyFrom(entry.E.Key) + if err != nil { + return nil, errors.Trace(err) + } + + // collect db id -> name mapping during log backup, it will contain information about newly created db + if meta.IsDBkey(rawKey.Field) { + var dbInfo model.DBInfo + if err := json.Unmarshal(value, &dbInfo); err != nil { + return nil, errors.Trace(err) + } + dc.tableRenameInfo.RecordDBIdToName(dbInfo.ID, dbInfo.Name.O) + } else if !meta.IsDBkey(rawKey.Key) { + // also see RewriteMetaKvEntry + continue + } + + // collect table history indexed by table id, same id may have different table names in history + if meta.IsTableKey(rawKey.Field) { + var tableInfo model.TableInfo + if err := json.Unmarshal(value, &tableInfo); err != nil { + return nil, errors.Trace(err) + } + // cannot use dbib in the parsed table info cuz it might not set so default to 0 + dbID, err := meta.ParseDBKey(rawKey.Key) + if err != nil { + return nil, errors.Trace(err) + } + + log.Info("######################################## adding table info", zap.Int64("tableid", tableInfo.ID), zap.String("table name", tableInfo.Name.O), zap.Int64("db id", dbID)) + dc.tableRenameInfo.AddTableHistory(tableInfo.ID, tableInfo.Name.String(), dbID) + } + } + } + return filteredEntries, nil +} diff --git a/br/pkg/restore/log_client/client.go b/br/pkg/restore/log_client/client.go index d208b58bb15d2..7271204f484c7 100644 --- a/br/pkg/restore/log_client/client.go +++ b/br/pkg/restore/log_client/client.go @@ -55,6 +55,7 @@ import ( "github.com/pingcap/tidb/br/pkg/stream" "github.com/pingcap/tidb/br/pkg/summary" "github.com/pingcap/tidb/br/pkg/utils" + "github.com/pingcap/tidb/br/pkg/utils/consts" "github.com/pingcap/tidb/br/pkg/utils/iter" "github.com/pingcap/tidb/br/pkg/version" ddlutil "github.com/pingcap/tidb/pkg/ddl/util" @@ -114,8 +115,8 @@ type LogClient struct { useCheckpoint bool } -// NewRestoreClient returns a new RestoreClient. -func NewRestoreClient( +// NewLogClient returns a new RestoreClient. +func NewLogClient( pdClient pd.Client, pdHTTPCli pdhttp.Client, tlsConf *tls.Config, @@ -133,7 +134,7 @@ func NewRestoreClient( // Close a client. func (rc *LogClient) Close() { - // close the connection, and it must be succeed when in SQL mode. + // close the connection, and it must be succeeded when in SQL mode. if rc.se != nil { rc.se.Close() } @@ -143,10 +144,14 @@ func (rc *LogClient) Close() { } if err := rc.fileImporter.Close(); err != nil { - log.Warn("failed to close file improter") + log.Warn("failed to close file importer") } - log.Info("Restore client closed") + if rc.LogFileManager != nil { + rc.LogFileManager.Close() + } + + log.Info("Log client closed") } func (rc *LogClient) SetRawKVBatchClient( @@ -215,12 +220,7 @@ func (rc *LogClient) CleanUpKVFiles( } func (rc *LogClient) StartCheckpointRunnerForLogRestore(ctx context.Context, g glue.Glue, store kv.Storage) (*checkpoint.CheckpointRunner[checkpoint.LogRestoreKeyType, checkpoint.LogRestoreValueType], error) { - se, err := g.CreateSession(store) - if err != nil { - return nil, errors.Trace(err) - } - runner, err := checkpoint.StartCheckpointRunnerForLogRestore(ctx, se) - return runner, errors.Trace(err) + return checkpoint.StartCheckpointRunnerForLogRestore(ctx, g, store) } // Init create db connection and domain for storage. @@ -256,7 +256,7 @@ func (rc *LogClient) InitClients(ctx context.Context, backend *backuppb.StorageB rc.fileImporter = NewLogFileImporter(metaClient, importCli, backend) } -func (rc *LogClient) InitCheckpointMetadataForLogRestore( +func (rc *LogClient) LoadOrCreateCheckpointMetadataForLogRestore( ctx context.Context, startTS, restoredTS uint64, gcRatio string, @@ -273,7 +273,8 @@ func (rc *LogClient) InitCheckpointMetadataForLogRestore( return "", errors.Trace(err) } - log.Info("reuse gc ratio from checkpoint metadata", zap.String("gc-ratio", gcRatio)) + log.Info("reuse gc ratio from checkpoint metadata", zap.String("old-gc-ratio", gcRatio), + zap.String("checkpoint-gc-ratio", meta.GcRatio)) return meta.GcRatio, nil } @@ -376,7 +377,7 @@ func ApplyKVFilesWithBatchMethod( } fs.deleteFiles = append(fs.deleteFiles, f) } else { - if f.GetCf() == stream.DefaultCF { + if f.GetCf() == consts.DefaultCF { if fs.defaultFiles == nil { fs.defaultFiles = make([]*LogDataFileInfo, 0, batchCount) } @@ -511,6 +512,17 @@ func (rc *LogClient) RestoreKVFiles( var applyWg sync.WaitGroup eg, ectx := errgroup.WithContext(ctx) + log.Info("################ rewrite rules", zap.Any("rules", rules)) + for _, schema := range rc.dom.InfoSchema().AllSchemas() { + log.Info("############### schema", zap.Any("schema", schema)) + info, _ := rc.dom.InfoSchema().SchemaTableInfos(ctx, schema.Name) + log.Info("################ schema table size", zap.Any("size", len(info))) + if len(info) < 10 { + for _, i := range info { + log.Info("################ tables", zap.Any("tables", i.Name.O)) + } + } + } applyFunc := func(files []*LogDataFileInfo, kvCount int64, size uint64) { if len(files) == 0 { return @@ -518,6 +530,7 @@ func (rc *LogClient) RestoreKVFiles( // get rewrite rule from table id. // because the tableID of files is the same. rule, ok := rules[files[0].TableId] + log.Info("################ checking on table id", zap.Any("tableid", files[0].TableId), zap.Any("pass?", ok)) if !ok { // TODO handle new created table // For this version we do not handle new created table after full backup. @@ -586,7 +599,7 @@ func (rc *LogClient) RestoreKVFiles( return errors.Trace(err) } -func (rc *LogClient) initSchemasMap( +func (rc *LogClient) loadSchemasMap( ctx context.Context, restoreTS uint64, ) ([]*backuppb.PitrDBMap, error) { @@ -626,10 +639,11 @@ func (rc *LogClient) initSchemasMap( return backupMeta.GetDbMaps(), nil } -func initFullBackupTables( +func readFilteredFullBackupTables( ctx context.Context, s storage.ExternalStorage, tableFilter filter.Filter, + piTRTableFilter *utils.PiTRTableFilter, cipherInfo *backuppb.CipherInfo, ) (map[int64]*metautil.Table, error) { metaData, err := s.ReadFile(ctx, metautil.MetaFile) @@ -658,24 +672,37 @@ func initFullBackupTables( tables := make(map[int64]*metautil.Table) for _, db := range databases { dbName := db.Info.Name.O - if name, ok := utils.GetSysDBName(db.Info.Name); utils.IsSysDB(name) && ok { + if name, ok := utils.StripTempTableNamePrefixIfNeeded(db.Info.Name.O); utils.IsSysDB(name) && ok { dbName = name } - - if !tableFilter.MatchSchema(dbName) { + log.Info("################# db id", zap.Any("db", dbName), zap.Any("dbid", db.Info.ID)) + if !tableFilter.MatchSchema(dbName) && !(piTRTableFilter != nil && piTRTableFilter.ContainsDB(db.Info.ID)) { continue } + tableAdded := false for _, table := range db.Tables { + log.Info("############# table id", zap.Any("table", table.Info.ID)) // check this db is empty. if table.Info == nil { tables[db.Info.ID] = table + tableAdded = true continue } - if !tableFilter.MatchTable(dbName, table.Info.Name.O) { + if !tableFilter.MatchTable(dbName, table.Info.Name.O) && + !(piTRTableFilter != nil && piTRTableFilter.ContainsTable(db.Info.ID, table.Info.ID)) { continue } tables[table.Info.ID] = table + tableAdded = true + } + // all tables in this db are filtered out, but we still need to keep this db since it passed the filter check + // and tables might get created later during log backup, if not keeping this db, those tables will be mapped to + // a new db id and thus will become data corruption. + if !tableAdded { + tables[db.Info.ID] = &metautil.Table{ + DB: db.Info, + } } } @@ -689,43 +716,40 @@ type FullBackupStorageConfig struct { type InitSchemaConfig struct { // required - IsNewTask bool - TableFilter filter.Filter + IsNewRestoreTask bool + TableFilter filter.Filter // original table filter from user // optional TiFlashRecorder *tiflashrec.TiFlashRecorder FullBackupStorage *FullBackupStorageConfig + PiTRTableFilter *utils.PiTRTableFilter // generated table filter that contain all the table id that needs to restore } const UnsafePITRLogRestoreStartBeforeAnyUpstreamUserDDL = "UNSAFE_PITR_LOG_RESTORE_START_BEFORE_ANY_UPSTREAM_USER_DDL" +// generateDBReplacesFromFullBackupStorage reads the full backup schema and creates the mapping from upstream table id +// to downstream table id. The downstream tables have been created in the previous snapshot restore step, so we +// can build the mapping by looking at the table names. The current table information is in domain.InfoSchema. func (rc *LogClient) generateDBReplacesFromFullBackupStorage( ctx context.Context, cfg *InitSchemaConfig, cipherInfo *backuppb.CipherInfo, ) (map[stream.UpstreamID]*stream.DBReplace, error) { dbReplaces := make(map[stream.UpstreamID]*stream.DBReplace) - if cfg.FullBackupStorage == nil { - envVal, ok := os.LookupEnv(UnsafePITRLogRestoreStartBeforeAnyUpstreamUserDDL) - if ok && len(envVal) > 0 { - log.Info(fmt.Sprintf("the environment variable %s is active, skip loading the base schemas.", UnsafePITRLogRestoreStartBeforeAnyUpstreamUserDDL)) - return dbReplaces, nil - } - return nil, errors.Errorf("miss upstream table information at `start-ts`(%d) but the full backup path is not specified", rc.startTS) - } s, err := storage.New(ctx, cfg.FullBackupStorage.Backend, cfg.FullBackupStorage.Opts) if err != nil { return nil, errors.Trace(err) } - fullBackupTables, err := initFullBackupTables(ctx, s, cfg.TableFilter, cipherInfo) + filteredFullBackupTables, err := readFilteredFullBackupTables(ctx, s, cfg.TableFilter, cfg.PiTRTableFilter, cipherInfo) if err != nil { return nil, errors.Trace(err) } - for _, t := range fullBackupTables { + log.Info("############# getting filtered tables", zap.Int("tables", len(filteredFullBackupTables))) + for _, t := range filteredFullBackupTables { dbName, _ := utils.GetSysDBCIStrName(t.DB.Name) newDBInfo, exist := rc.dom.InfoSchema().SchemaByName(dbName) if !exist { - log.Info("db not existed", zap.String("dbname", dbName.String())) + log.Info("db does not exist", zap.String("dbName", dbName.String())) continue } @@ -741,7 +765,7 @@ func (rc *LogClient) generateDBReplacesFromFullBackupStorage( } newTableInfo, err := restore.GetTableSchema(rc.GetDomain(), dbName, t.Info.Name) if err != nil { - log.Info("table not existed", zap.String("tablename", dbName.String()+"."+t.Info.Name.String())) + log.Info("table doesn't exist", zap.String("tableName", dbName.String()+"."+t.Info.Name.String())) continue } @@ -771,68 +795,70 @@ func (rc *LogClient) InitSchemasReplaceForDDL( dbReplaces map[stream.UpstreamID]*stream.DBReplace ) - // not new task, load schemas map from external storage - if !cfg.IsNewTask { - log.Info("try to load pitr id maps") + // not a new task, load id map from cluster + if !cfg.IsNewRestoreTask { + log.Info("not a new task, loading existing pitr id maps") needConstructIdMap = false - dbMaps, err = rc.initSchemasMap(ctx, rc.restoreTS) + dbMaps, err = rc.loadSchemasMap(ctx, rc.restoreTS) if err != nil { return nil, errors.Trace(err) } - } - - // a new task, but without full snapshot restore, tries to load - // schemas map whose `restore-ts`` is the task's `start-ts`. - if len(dbMaps) <= 0 && cfg.FullBackupStorage == nil { - log.Info("try to load pitr id maps of the previous task", zap.Uint64("start-ts", rc.startTS)) - needConstructIdMap = true - dbMaps, err = rc.initSchemasMap(ctx, rc.startTS) - if err != nil { + dbReplaces = stream.FromSchemaMaps(dbMaps) + log.Info("loaded id maps", zap.Int("dbMapSize", len(dbMaps))) + } else { + // additional check that TiFlash replicas should have been cleaned up before a new task + if err = rc.validateNoTiFlashReplica(); err != nil { return nil, errors.Trace(err) } - existTiFlashTable := false - rc.dom.InfoSchema().ListTablesWithSpecialAttribute(func(tableInfo *model.TableInfo) bool { - if tableInfo.TiFlashReplica != nil && tableInfo.TiFlashReplica.Count > 0 { - existTiFlashTable = true - } - return false - }) - if existTiFlashTable { - return nil, errors.Errorf("exist table(s) have tiflash replica, please remove it before restore") - } - } - if len(dbMaps) <= 0 { - log.Info("no id maps, build the table replaces from cluster and full backup schemas") + // a new task, need to build id map needConstructIdMap = true - dbReplaces, err = rc.generateDBReplacesFromFullBackupStorage(ctx, cfg, cipherInfo) - if err != nil { - return nil, errors.Trace(err) + + // without full snapshot restore, tries to load + // schemas map whose `restore-ts`` is the task's `start-ts`. + if cfg.FullBackupStorage == nil { + log.Info("no full backup storage provided, loading pitr id maps from the previous task", zap.Uint64("start-ts", rc.startTS)) + dbMaps, err = rc.loadSchemasMap(ctx, rc.startTS) + if err != nil { + return nil, errors.Trace(err) + } + dbReplaces = stream.FromSchemaMaps(dbMaps) + if len(dbReplaces) <= 0 { + envVal, ok := os.LookupEnv(UnsafePITRLogRestoreStartBeforeAnyUpstreamUserDDL) + if ok && len(envVal) > 0 { + log.Info(fmt.Sprintf("the environment variable %s is active, skip loading the base schemas.", UnsafePITRLogRestoreStartBeforeAnyUpstreamUserDDL)) + } else { + return nil, errors.Errorf("miss upstream table information at `start-ts`(%d) but the full backup path is not specified", rc.startTS) + } + } + } else { + log.Info("building table replaces from full backup storage") + dbReplaces, err = rc.generateDBReplacesFromFullBackupStorage(ctx, cfg, cipherInfo) } - } else { - dbReplaces = stream.FromSchemaMaps(dbMaps) } - for oldDBID, dbReplace := range dbReplaces { - log.Info("replace info", func() []zapcore.Field { + log.Info("built db replace info", zap.Int("dbReplaceInfoSize", len(dbReplaces))) + + for upstreamDbId, dbReplace := range dbReplaces { + log.Info("db replace info", func() []zapcore.Field { fields := make([]zapcore.Field, 0, (len(dbReplace.TableMap)+1)*3) fields = append(fields, zap.String("dbName", dbReplace.Name), - zap.Int64("oldID", oldDBID), - zap.Int64("newID", dbReplace.DbID)) - for oldTableID, tableReplace := range dbReplace.TableMap { + zap.Int64("upstreamId", upstreamDbId), + zap.Int64("downstreamId", dbReplace.DbID)) + for upstreamTableID, tableReplace := range dbReplace.TableMap { fields = append(fields, zap.String("table", tableReplace.Name), - zap.Int64("oldID", oldTableID), - zap.Int64("newID", tableReplace.TableID)) + zap.Int64("upstreamId", upstreamTableID), + zap.Int64("downstreamId", tableReplace.TableID)) } return fields }()...) } rp := stream.NewSchemasReplace( - dbReplaces, needConstructIdMap, cfg.TiFlashRecorder, rc.currentTS, cfg.TableFilter, rc.GenGlobalID, rc.GenGlobalIDs, - rc.RecordDeleteRange) + dbReplaces, needConstructIdMap, cfg.TiFlashRecorder, rc.currentTS, cfg.TableFilter, cfg.PiTRTableFilter, + rc.GenGlobalID, rc.GenGlobalIDs, rc.RecordDeleteRange) return rp, nil } @@ -857,29 +883,8 @@ func (rc *LogClient) RestoreMetaKVFiles( updateStats func(kvCount uint64, size uint64), progressInc func(), ) error { - filesInWriteCF := make([]*backuppb.DataFileInfo, 0, len(files)) - filesInDefaultCF := make([]*backuppb.DataFileInfo, 0, len(files)) - - // The k-v events in default CF should be restored firstly. The reason is that: - // The error of transactions of meta could happen if restore write CF events successfully, - // but failed to restore default CF events. - for _, f := range files { - if f.Cf == stream.WriteCF { - filesInWriteCF = append(filesInWriteCF, f) - continue - } - if f.Type == backuppb.FileType_Delete { - // this should happen abnormally. - // only do some preventive checks here. - log.Warn("detected delete file of meta key, skip it", zap.Any("file", f)) - continue - } - if f.Cf == stream.DefaultCF { - filesInDefaultCF = append(filesInDefaultCF, f) - } - } - filesInDefaultCF = SortMetaKVFiles(filesInDefaultCF) - filesInWriteCF = SortMetaKVFiles(filesInWriteCF) + // separate the files by CF and sort each group by TS + filesInDefaultCF, filesInWriteCF := separateAndSortFilesByCF(files) failpoint.Inject("failed-before-id-maps-saved", func(_ failpoint.Value) { failpoint.Return(errors.New("failpoint: failed before id maps saved")) @@ -901,30 +906,102 @@ func (rc *LogClient) RestoreMetaKVFiles( return errors.Trace(err) } } + log.Info("################### before id map failpoint") failpoint.Inject("failed-after-id-maps-saved", func(_ failpoint.Value) { failpoint.Return(errors.New("failpoint: failed after id maps saved")) }) - + log.Info("################### after id map failpoint") // run the rewrite and restore meta-kv into TiKV cluster. - if err := RestoreMetaKVFilesWithBatchMethod( + restoreProcessor := RestoreProcessor{ + client: rc, + schemasReplace: schemasReplace, + updateStats: updateStats, + progressInc: progressInc, + } + // set to restoreKV status to actually restore kv to TiKV + schemasReplace.SetRestoreKVStatus() + if err := LoadAndProcessMetaKVFilesInBatch( ctx, filesInDefaultCF, filesInWriteCF, - schemasReplace, - updateStats, - progressInc, - rc.RestoreBatchMetaKVFiles, + &restoreProcessor, ); err != nil { return errors.Trace(err) } - // Update global schema version and report all of TiDBs. + // UpdateTable global schema version to trigger a diff-reload so every TiDB node in the cluster will get synced with + // the latest schema update. if err := rc.UpdateSchemaVersion(ctx); err != nil { return errors.Trace(err) } return nil } +// validateNoTiFlashReplica makes sure no table contains TiFlash replica +func (rc *LogClient) validateNoTiFlashReplica() error { + existTiFlashTable := false + rc.dom.InfoSchema().ListTablesWithSpecialAttribute(func(tableInfo *model.TableInfo) bool { + if tableInfo.TiFlashReplica != nil && tableInfo.TiFlashReplica.Count > 0 { + existTiFlashTable = true + } + return false + }) + if existTiFlashTable { + return errors.Errorf("exist table(s) have tiflash replica, please remove it before restore") + } + return nil +} + +// LoadMetaKVFilesAndBuildTableRenameInfo reads meta kv files from log backup external storage and build the table +// rename info mapping. +func (rc *LogClient) LoadMetaKVFilesAndBuildTableRenameInfo( + ctx context.Context, + files []*backuppb.DataFileInfo, +) (*stream.LogBackupTableHistory, error) { + // separate the files by CF and sort each group by TS + filesInDefaultCF, filesInWriteCF := separateAndSortFilesByCF(files) + + ddlProcessor := DDLCollector{ + client: rc, + tableRenameInfo: stream.NewTableRenameInfo(), + } + if err := LoadAndProcessMetaKVFilesInBatch( + ctx, + filesInDefaultCF, + filesInWriteCF, + &ddlProcessor, + ); err != nil { + return nil, errors.Trace(err) + } + return ddlProcessor.tableRenameInfo, nil +} + +// separateAndSortFilesByCF filters and sorts files by column family. +// It separates files into write CF and default CF groups and then sorts them within each CF group. +func separateAndSortFilesByCF(files []*backuppb.DataFileInfo) ([]*backuppb.DataFileInfo, []*backuppb.DataFileInfo) { + filesInWriteCF := make([]*backuppb.DataFileInfo, 0, len(files)) + filesInDefaultCF := make([]*backuppb.DataFileInfo, 0, len(files)) + + for _, f := range files { + if f.Cf == consts.WriteCF { + filesInWriteCF = append(filesInWriteCF, f) + continue + } + if f.Type == backuppb.FileType_Delete { + log.Warn("detected delete file of meta key, skip it", zap.Any("file", f)) + continue + } + if f.Cf == consts.DefaultCF { + filesInDefaultCF = append(filesInDefaultCF, f) + } + } + + filesInDefaultCF = SortMetaKVFiles(filesInDefaultCF) + filesInWriteCF = SortMetaKVFiles(filesInWriteCF) + + return filesInDefaultCF, filesInWriteCF +} + // PreConstructAndSaveIDMap constructs id mapping and save it. func (rc *LogClient) PreConstructAndSaveIDMap( ctx context.Context, @@ -952,13 +1029,16 @@ func (rc *LogClient) constructIDMap( sr *stream.SchemasReplace, ) error { for _, f := range fs { - entries, _, err := rc.ReadAllEntries(ctx, f, math.MaxUint64) + entries, _, err := rc.ReadFilteredEntriesFromFiles(ctx, f, math.MaxUint64) if err != nil { return errors.Trace(err) } for _, entry := range entries { - if _, err := sr.RewriteKvEntry(&entry.E, f.GetCf()); err != nil { + // the id map building logic is coupled with rewrite logic + // so calling rewriteMetaKv will build the id map if needed + // TODO, decouple the two + if _, err := sr.RewriteMetaKvEntry(&entry.E, f.GetCf()); err != nil { return errors.Trace(err) } } @@ -966,23 +1046,13 @@ func (rc *LogClient) constructIDMap( return nil } -func RestoreMetaKVFilesWithBatchMethod( +// LoadAndProcessMetaKVFilesInBatch restores meta kv files to TiKV in strict TS order. It does so in batch and after +// success it triggers an update so every TiDB node can pick up the restored content. +func LoadAndProcessMetaKVFilesInBatch( ctx context.Context, defaultFiles []*backuppb.DataFileInfo, writeFiles []*backuppb.DataFileInfo, - schemasReplace *stream.SchemasReplace, - updateStats func(kvCount uint64, size uint64), - progressInc func(), - restoreBatch func( - ctx context.Context, - files []*backuppb.DataFileInfo, - schemasReplace *stream.SchemasReplace, - kvEntries []*KvEntryWithTS, - filterTS uint64, - updateStats func(kvCount uint64, size uint64), - progressInc func(), - cf string, - ) ([]*KvEntryWithTS, error), + processor BatchFileProcessor, ) error { // the average size of each KV is 2560 Bytes // kvEntries is kvs left by the previous batch @@ -999,8 +1069,6 @@ func RestoreMetaKVFilesWithBatchMethod( defaultKvEntries = make([]*KvEntryWithTS, 0) writeKvEntries = make([]*KvEntryWithTS, 0) ) - // Set restoreKV to SchemaReplace. - schemasReplace.SetRestoreKVStatus() for i, f := range defaultFiles { if i == 0 { @@ -1015,7 +1083,7 @@ func RestoreMetaKVFilesWithBatchMethod( } else { // Either f.MinTS > rangeMax or f.MinTs is the filterTs we need. // So it is ok to pass f.MinTs as filterTs. - defaultKvEntries, err = restoreBatch(ctx, defaultFiles[defaultIdx:i], schemasReplace, defaultKvEntries, f.MinTs, updateStats, progressInc, stream.DefaultCF) + defaultKvEntries, err = processor.processBatch(ctx, defaultFiles[defaultIdx:i], defaultKvEntries, f.MinTs, consts.DefaultCF) if err != nil { return errors.Trace(err) } @@ -1032,7 +1100,7 @@ func RestoreMetaKVFilesWithBatchMethod( break } } - writeKvEntries, err = restoreBatch(ctx, writeFiles[writeIdx:toWriteIdx], schemasReplace, writeKvEntries, f.MinTs, updateStats, progressInc, stream.WriteCF) + writeKvEntries, err = processor.processBatch(ctx, writeFiles[writeIdx:toWriteIdx], writeKvEntries, f.MinTs, consts.WriteCF) if err != nil { return errors.Trace(err) } @@ -1044,11 +1112,11 @@ func RestoreMetaKVFilesWithBatchMethod( // restore the left meta kv files and entries // Notice: restoreBatch needs to realize the parameter `files` and `kvEntries` might be empty // Assert: defaultIdx <= len(defaultFiles) && writeIdx <= len(writeFiles) - _, err = restoreBatch(ctx, defaultFiles[defaultIdx:], schemasReplace, defaultKvEntries, math.MaxUint64, updateStats, progressInc, stream.DefaultCF) + _, err = processor.processBatch(ctx, defaultFiles[defaultIdx:], defaultKvEntries, math.MaxUint64, consts.DefaultCF) if err != nil { return errors.Trace(err) } - _, err = restoreBatch(ctx, writeFiles[writeIdx:], schemasReplace, writeKvEntries, math.MaxUint64, updateStats, progressInc, stream.WriteCF) + _, err = processor.processBatch(ctx, writeFiles[writeIdx:], writeKvEntries, math.MaxUint64, consts.WriteCF) if err != nil { return errors.Trace(err) } @@ -1056,6 +1124,9 @@ func RestoreMetaKVFilesWithBatchMethod( return nil } +// RestoreBatchMetaKVFiles tries to restore and rewrite meta kv to TiKV from external storage. It reads out entries +// from the given files and only restores ones that's in filter range, then it returns those entries out of the filter +// range back to caller for next iteration of restore. func (rc *LogClient) RestoreBatchMetaKVFiles( ctx context.Context, files []*backuppb.DataFileInfo, @@ -1066,10 +1137,33 @@ func (rc *LogClient) RestoreBatchMetaKVFiles( progressInc func(), cf string, ) ([]*KvEntryWithTS, error) { - nextKvEntries := make([]*KvEntryWithTS, 0) + curSortedKvEntries, filteredOutKvEntries, err := rc.filterAndSortKvEntriesFromFiles(ctx, files, kvEntries, filterTS) + + // restore and rewrite these entries to TiKV with rawPut() method. + kvCount, size, err := rc.restoreAndRewriteMetaKvEntries(ctx, schemasReplace, curSortedKvEntries, cf) + if err != nil { + return nil, errors.Trace(err) + } + + if schemasReplace.IsRestoreKVStatus() { + updateStats(kvCount, size) + for i := 0; i < len(files); i++ { + progressInc() + } + } + return filteredOutKvEntries, nil +} + +func (rc *LogClient) filterAndSortKvEntriesFromFiles( + ctx context.Context, + files []*backuppb.DataFileInfo, + kvEntries []*KvEntryWithTS, + filterTS uint64, +) ([]*KvEntryWithTS, []*KvEntryWithTS, error) { + filteredOutKvEntries := make([]*KvEntryWithTS, 0) curKvEntries := make([]*KvEntryWithTS, 0) if len(files) == 0 && len(kvEntries) == 0 { - return nextKvEntries, nil + return curKvEntries, filteredOutKvEntries, nil } // filter the kv from kvEntries again. @@ -1077,42 +1171,29 @@ func (rc *LogClient) RestoreBatchMetaKVFiles( if kv.Ts < filterTS { curKvEntries = append(curKvEntries, kv) } else { - nextKvEntries = append(nextKvEntries, kv) + filteredOutKvEntries = append(filteredOutKvEntries, kv) } } - // read all of entries from files. + // read all entries from files. for _, f := range files { - es, nextEs, err := rc.ReadAllEntries(ctx, f, filterTS) + es, filteredOutEs, err := rc.ReadFilteredEntriesFromFiles(ctx, f, filterTS) if err != nil { - return nextKvEntries, errors.Trace(err) + return nil, nil, errors.Trace(err) } curKvEntries = append(curKvEntries, es...) - nextKvEntries = append(nextKvEntries, nextEs...) + filteredOutKvEntries = append(filteredOutKvEntries, filteredOutEs...) } // sort these entries. slices.SortFunc(curKvEntries, func(i, j *KvEntryWithTS) int { return cmp.Compare(i.Ts, j.Ts) }) - - // restore these entries with rawPut() method. - kvCount, size, err := rc.restoreMetaKvEntries(ctx, schemasReplace, curKvEntries, cf) - if err != nil { - return nextKvEntries, errors.Trace(err) - } - - if schemasReplace.IsRestoreKVStatus() { - updateStats(kvCount, size) - for i := 0; i < len(files); i++ { - progressInc() - } - } - return nextKvEntries, nil + return curKvEntries, filteredOutKvEntries, nil } -func (rc *LogClient) restoreMetaKvEntries( +func (rc *LogClient) restoreAndRewriteMetaKvEntries( ctx context.Context, sr *stream.SchemasReplace, entries []*KvEntryWithTS, @@ -1126,10 +1207,10 @@ func (rc *LogClient) restoreMetaKvEntries( rc.rawKVClient.SetColumnFamily(columnFamily) for _, entry := range entries { - log.Debug("before rewrte entry", zap.Uint64("key-ts", entry.Ts), zap.Int("key-len", len(entry.E.Key)), + log.Debug("before rewriting entry", zap.Uint64("key-ts", entry.Ts), zap.Int("key-len", len(entry.E.Key)), zap.Int("value-len", len(entry.E.Value)), zap.ByteString("key", entry.E.Key)) - newEntry, err := sr.RewriteKvEntry(&entry.E, columnFamily) + newEntry, err := sr.RewriteMetaKvEntry(&entry.E, columnFamily) if err != nil { log.Error("rewrite txn entry failed", zap.Int("klen", len(entry.E.Key)), logutil.Key("txn-key", entry.E.Key)) @@ -1598,7 +1679,7 @@ func (rc *LogClient) saveIDMap( if rc.useCheckpoint { log.Info("save checkpoint task info with InLogRestoreAndIdMapPersist status") if err := checkpoint.SaveCheckpointProgress(ctx, rc.se, &checkpoint.CheckpointProgress{ - Progress: checkpoint.InLogRestoreAndIdMapPersist, + Progress: checkpoint.InLogRestoreAndIdMapPersisted, }); err != nil { return errors.Trace(err) } diff --git a/br/pkg/restore/log_client/client_test.go b/br/pkg/restore/log_client/client_test.go index 0b1baf7af5675..142b2b0488b37 100644 --- a/br/pkg/restore/log_client/client_test.go +++ b/br/pkg/restore/log_client/client_test.go @@ -31,6 +31,7 @@ import ( logclient "github.com/pingcap/tidb/br/pkg/restore/log_client" "github.com/pingcap/tidb/br/pkg/restore/utils" "github.com/pingcap/tidb/br/pkg/stream" + "github.com/pingcap/tidb/br/pkg/utils/consts" "github.com/pingcap/tidb/br/pkg/utils/iter" "github.com/pingcap/tidb/br/pkg/utiltest" "github.com/pingcap/tidb/pkg/domain" @@ -89,7 +90,7 @@ func TestDeleteRangeQueryExec(t *testing.T) { ctx := context.Background() m := mc g := gluetidb.New() - client := logclient.NewRestoreClient( + client := logclient.NewLogClient( utiltest.NewFakePDClient(nil, false, nil), nil, nil, keepalive.ClientParameters{}) err := client.Init(g, m.Storage) require.NoError(t, err) @@ -108,7 +109,7 @@ func TestDeleteRangeQuery(t *testing.T) { m := mc g := gluetidb.New() - client := logclient.NewRestoreClient( + client := logclient.NewLogClient( utiltest.NewFakePDClient(nil, false, nil), nil, nil, keepalive.ClientParameters{}) err := client.Init(g, m.Storage) require.NoError(t, err) @@ -141,11 +142,12 @@ func MockEmptySchemasReplace() *stream.SchemasReplace { nil, nil, nil, + nil, ) } func TestRestoreBatchMetaKVFiles(t *testing.T) { - client := logclient.NewRestoreClient(nil, nil, nil, keepalive.ClientParameters{}) + client := logclient.NewLogClient(nil, nil, nil, keepalive.ClientParameters{}) files := []*backuppb.DataFileInfo{} // test empty files and entries next, err := client.RestoreBatchMetaKVFiles(context.Background(), files[0:], nil, make([]*logclient.KvEntryWithTS, 0), math.MaxUint64, nil, nil, "") @@ -159,7 +161,7 @@ func TestRestoreMetaKVFilesWithBatchMethod1(t *testing.T) { batchCount := 0 sr := MockEmptySchemasReplace() - err := logclient.RestoreMetaKVFilesWithBatchMethod( + err := logclient.LoadAndProcessMetaKVFilesInBatch( context.Background(), files_default, files_write, @@ -198,7 +200,7 @@ func TestRestoreMetaKVFilesWithBatchMethod2_default_empty(t *testing.T) { batchCount := 0 sr := MockEmptySchemasReplace() - err := logclient.RestoreMetaKVFilesWithBatchMethod( + err := logclient.LoadAndProcessMetaKVFilesInBatch( context.Background(), files_default, files_write, @@ -244,7 +246,7 @@ func TestRestoreMetaKVFilesWithBatchMethod2_write_empty_1(t *testing.T) { batchCount := 0 sr := MockEmptySchemasReplace() - err := logclient.RestoreMetaKVFilesWithBatchMethod( + err := logclient.LoadAndProcessMetaKVFilesInBatch( context.Background(), files_default, files_write, @@ -298,7 +300,7 @@ func TestRestoreMetaKVFilesWithBatchMethod2_write_empty_2(t *testing.T) { batchCount := 0 sr := MockEmptySchemasReplace() - err := logclient.RestoreMetaKVFilesWithBatchMethod( + err := logclient.LoadAndProcessMetaKVFilesInBatch( context.Background(), files_default, files_write, @@ -364,7 +366,7 @@ func TestRestoreMetaKVFilesWithBatchMethod_with_entries(t *testing.T) { batchCount := 0 sr := MockEmptySchemasReplace() - err := logclient.RestoreMetaKVFilesWithBatchMethod( + err := logclient.LoadAndProcessMetaKVFilesInBatch( context.Background(), files_default, files_write, @@ -471,7 +473,7 @@ func TestRestoreMetaKVFilesWithBatchMethod3(t *testing.T) { resultKV := make(map[int]int) sr := MockEmptySchemasReplace() - err := logclient.RestoreMetaKVFilesWithBatchMethod( + err := logclient.LoadAndProcessMetaKVFilesInBatch( context.Background(), defaultFiles, writeFiles, @@ -557,7 +559,7 @@ func TestRestoreMetaKVFilesWithBatchMethod4(t *testing.T) { result := make(map[int][]*backuppb.DataFileInfo) sr := MockEmptySchemasReplace() - err := logclient.RestoreMetaKVFilesWithBatchMethod( + err := logclient.LoadAndProcessMetaKVFilesInBatch( context.Background(), defaultFiles, writeFiles, @@ -637,7 +639,7 @@ func TestRestoreMetaKVFilesWithBatchMethod5(t *testing.T) { result := make(map[int][]*backuppb.DataFileInfo) sr := MockEmptySchemasReplace() - err := logclient.RestoreMetaKVFilesWithBatchMethod( + err := logclient.LoadAndProcessMetaKVFilesInBatch( context.Background(), defaultFiles, writeFiles, @@ -734,7 +736,7 @@ func TestRestoreMetaKVFilesWithBatchMethod6(t *testing.T) { resultKV := make(map[int]int) sr := MockEmptySchemasReplace() - err := logclient.RestoreMetaKVFilesWithBatchMethod( + err := logclient.LoadAndProcessMetaKVFilesInBatch( context.Background(), defaultFiles, writeFiles, @@ -836,20 +838,20 @@ func TestApplyKVFilesWithSingelMethod(t *testing.T) { Path: "log3", NumberOfEntries: 5, Length: 100, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Delete, }, { Path: "log1", NumberOfEntries: 5, Length: 100, - Cf: stream.DefaultCF, + Cf: consts.DefaultCF, Type: backuppb.FileType_Put, }, { Path: "log2", NumberOfEntries: 5, Length: 100, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Put, }, } @@ -891,28 +893,28 @@ func TestApplyKVFilesWithBatchMethod1(t *testing.T) { Path: "log5", NumberOfEntries: 5, Length: 100, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Delete, RegionId: 1, }, { Path: "log3", NumberOfEntries: 5, Length: 100, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Put, RegionId: 1, }, { Path: "log4", NumberOfEntries: 5, Length: 100, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Put, RegionId: 1, }, { Path: "log1", NumberOfEntries: 5, Length: 800, - Cf: stream.DefaultCF, + Cf: consts.DefaultCF, Type: backuppb.FileType_Put, RegionId: 1, }, @@ -920,7 +922,7 @@ func TestApplyKVFilesWithBatchMethod1(t *testing.T) { Path: "log2", NumberOfEntries: 5, Length: 200, - Cf: stream.DefaultCF, + Cf: consts.DefaultCF, Type: backuppb.FileType_Put, RegionId: 1, }, @@ -974,35 +976,35 @@ func TestApplyKVFilesWithBatchMethod2(t *testing.T) { Path: "log1", NumberOfEntries: 5, Length: 100, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Delete, RegionId: 1, }, { Path: "log2", NumberOfEntries: 5, Length: 100, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Put, RegionId: 1, }, { Path: "log3", NumberOfEntries: 5, Length: 100, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Put, RegionId: 1, }, { Path: "log4", NumberOfEntries: 5, Length: 100, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Put, RegionId: 1, }, { Path: "log5", NumberOfEntries: 5, Length: 800, - Cf: stream.DefaultCF, + Cf: consts.DefaultCF, Type: backuppb.FileType_Put, RegionId: 1, }, @@ -1010,7 +1012,7 @@ func TestApplyKVFilesWithBatchMethod2(t *testing.T) { Path: "log6", NumberOfEntries: 5, Length: 200, - Cf: stream.DefaultCF, + Cf: consts.DefaultCF, Type: backuppb.FileType_Put, RegionId: 1, }, @@ -1065,28 +1067,28 @@ func TestApplyKVFilesWithBatchMethod3(t *testing.T) { Path: "log1", NumberOfEntries: 5, Length: 2000, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Delete, RegionId: 1, }, { Path: "log2", NumberOfEntries: 5, Length: 2000, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Put, RegionId: 1, }, { Path: "log3", NumberOfEntries: 5, Length: 100, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Put, RegionId: 1, }, { Path: "log5", NumberOfEntries: 5, Length: 800, - Cf: stream.DefaultCF, + Cf: consts.DefaultCF, Type: backuppb.FileType_Put, RegionId: 3, }, @@ -1094,7 +1096,7 @@ func TestApplyKVFilesWithBatchMethod3(t *testing.T) { Path: "log6", NumberOfEntries: 5, Length: 200, - Cf: stream.DefaultCF, + Cf: consts.DefaultCF, Type: backuppb.FileType_Put, RegionId: 3, }, @@ -1148,35 +1150,35 @@ func TestApplyKVFilesWithBatchMethod4(t *testing.T) { Path: "log1", NumberOfEntries: 5, Length: 2000, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Delete, TableId: 1, }, { Path: "log2", NumberOfEntries: 5, Length: 100, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Put, TableId: 1, }, { Path: "log3", NumberOfEntries: 5, Length: 100, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Put, TableId: 2, }, { Path: "log4", NumberOfEntries: 5, Length: 100, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Put, TableId: 1, }, { Path: "log5", NumberOfEntries: 5, Length: 100, - Cf: stream.DefaultCF, + Cf: consts.DefaultCF, Type: backuppb.FileType_Put, TableId: 2, }, @@ -1226,35 +1228,35 @@ func TestApplyKVFilesWithBatchMethod5(t *testing.T) { Path: "log1", NumberOfEntries: 5, Length: 2000, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Delete, TableId: 1, }, { Path: "log2", NumberOfEntries: 5, Length: 100, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Put, TableId: 1, }, { Path: "log3", NumberOfEntries: 5, Length: 100, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Put, TableId: 2, }, { Path: "log4", NumberOfEntries: 5, Length: 100, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Put, TableId: 1, }, { Path: "log5", NumberOfEntries: 5, Length: 100, - Cf: stream.DefaultCF, + Cf: consts.DefaultCF, Type: backuppb.FileType_Put, TableId: 2, }, @@ -1376,7 +1378,7 @@ func TestInitSchemasReplaceForDDL(t *testing.T) { { client := logclient.TEST_NewLogClient(123, 1, 2, 1, domain.NewMockDomain(), fakeSession{}) - cfg := &logclient.InitSchemaConfig{IsNewTask: false} + cfg := &logclient.InitSchemaConfig{IsNewRestoreTask: false} _, err := client.InitSchemasReplaceForDDL(ctx, cfg, nil) require.Error(t, err) require.Regexp(t, "failed to get pitr id map from mysql.tidb_pitr_id_map.* [2, 1]", err.Error()) @@ -1384,7 +1386,7 @@ func TestInitSchemasReplaceForDDL(t *testing.T) { { client := logclient.TEST_NewLogClient(123, 1, 2, 1, domain.NewMockDomain(), fakeSession{}) - cfg := &logclient.InitSchemaConfig{IsNewTask: true} + cfg := &logclient.InitSchemaConfig{IsNewRestoreTask: true} _, err := client.InitSchemasReplaceForDDL(ctx, cfg, nil) require.Error(t, err) require.Regexp(t, "failed to get pitr id map from mysql.tidb_pitr_id_map.* [1, 1]", err.Error()) @@ -1398,7 +1400,7 @@ func TestInitSchemasReplaceForDDL(t *testing.T) { se, err := g.CreateSession(s.Mock.Storage) require.NoError(t, err) client := logclient.TEST_NewLogClient(123, 1, 2, 1, domain.NewMockDomain(), se) - cfg := &logclient.InitSchemaConfig{IsNewTask: true} + cfg := &logclient.InitSchemaConfig{IsNewRestoreTask: true} _, err = client.InitSchemasReplaceForDDL(ctx, cfg, nil) require.Error(t, err) require.Contains(t, err.Error(), "miss upstream table information at `start-ts`(1) but the full backup path is not specified") @@ -1470,7 +1472,7 @@ func TestPITRIDMap(t *testing.T) { require.NoError(t, err) client := logclient.TEST_NewLogClient(123, 1, 2, 3, nil, se) baseSchemaReplaces := &stream.SchemasReplace{ - DbMap: getDBMap(), + DbReplaceMap: getDBMap(), } err = client.TEST_saveIDMap(ctx, baseSchemaReplaces) require.NoError(t, err) @@ -1484,9 +1486,9 @@ func TestPITRIDMap(t *testing.T) { newSchemaReplaces, err = client.TEST_initSchemasMap(ctx, 2) require.NoError(t, err) - require.Equal(t, len(baseSchemaReplaces.DbMap), len(newSchemaReplaces)) + require.Equal(t, len(baseSchemaReplaces.DbReplaceMap), len(newSchemaReplaces)) for _, dbMap := range newSchemaReplaces { - baseDbMap := baseSchemaReplaces.DbMap[dbMap.IdMap.UpstreamId] + baseDbMap := baseSchemaReplaces.DbReplaceMap[dbMap.IdMap.UpstreamId] require.NotNil(t, baseDbMap) require.Equal(t, baseDbMap.DbID, dbMap.IdMap.DownstreamId) require.Equal(t, baseDbMap.Name, dbMap.Name) diff --git a/br/pkg/restore/log_client/export_test.go b/br/pkg/restore/log_client/export_test.go index 9a35b35e8eb57..353753febf48b 100644 --- a/br/pkg/restore/log_client/export_test.go +++ b/br/pkg/restore/log_client/export_test.go @@ -40,12 +40,12 @@ func (rc *LogClient) TEST_initSchemasMap( ctx context.Context, restoreTS uint64, ) ([]*backuppb.PitrDBMap, error) { - return rc.initSchemasMap(ctx, restoreTS) + return rc.loadSchemasMap(ctx, restoreTS) } // readStreamMetaByTS is used for streaming task. collect all meta file by TS, it is for test usage. -func (rc *LogFileManager) ReadStreamMeta(ctx context.Context) ([]Meta, error) { - metas, err := rc.streamingMeta(ctx) +func (lm *LogFileManager) ReadStreamMeta(ctx context.Context) ([]Meta, error) { + metas, err := lm.streamingMeta(ctx) if err != nil { return nil, err } diff --git a/br/pkg/restore/log_client/import.go b/br/pkg/restore/log_client/import.go index 138b89d2430a9..7fa745a20929c 100644 --- a/br/pkg/restore/log_client/import.go +++ b/br/pkg/restore/log_client/import.go @@ -35,9 +35,9 @@ import ( importclient "github.com/pingcap/tidb/br/pkg/restore/internal/import_client" "github.com/pingcap/tidb/br/pkg/restore/split" restoreutils "github.com/pingcap/tidb/br/pkg/restore/utils" - "github.com/pingcap/tidb/br/pkg/stream" "github.com/pingcap/tidb/br/pkg/summary" "github.com/pingcap/tidb/br/pkg/utils" + "github.com/pingcap/tidb/br/pkg/utils/consts" "github.com/pingcap/tidb/pkg/kv" pd "github.com/tikv/pd/client" "go.uber.org/multierr" @@ -253,7 +253,7 @@ func (importer *LogFileImporter) downloadAndApplyKVFile( RangeLength: file.RangeLength, IsDelete: file.Type == backuppb.FileType_Delete, StartTs: func() uint64 { - if file.Cf == stream.DefaultCF { + if file.Cf == consts.DefaultCF { return shiftStartTS } return startTS diff --git a/br/pkg/restore/log_client/log_file_manager.go b/br/pkg/restore/log_client/log_file_manager.go index 81af10cf542b0..dff5c9303f915 100644 --- a/br/pkg/restore/log_client/log_file_manager.go +++ b/br/pkg/restore/log_client/log_file_manager.go @@ -18,6 +18,8 @@ import ( berrors "github.com/pingcap/tidb/br/pkg/errors" "github.com/pingcap/tidb/br/pkg/storage" "github.com/pingcap/tidb/br/pkg/stream" + "github.com/pingcap/tidb/br/pkg/utils" + "github.com/pingcap/tidb/br/pkg/utils/consts" "github.com/pingcap/tidb/br/pkg/utils/iter" "github.com/pingcap/tidb/pkg/kv" "github.com/pingcap/tidb/pkg/util/codec" @@ -59,6 +61,7 @@ type streamMetadataHelper interface { encryptionInfo *encryptionpb.FileEncryptionInfo, ) ([]byte, error) ParseToMetadata(rawMetaData []byte) (*backuppb.Metadata, error) + Close() } // LogFileManager is the manager for log files of a certain restoration, @@ -114,25 +117,25 @@ func CreateLogFileManager(ctx context.Context, init LogFileManagerInit) (*LogFil return fm, nil } -func (rc *LogFileManager) ShiftTS() uint64 { - return rc.shiftStartTS +func (lm *LogFileManager) ShiftTS() uint64 { + return lm.shiftStartTS } -func (rc *LogFileManager) loadShiftTS(ctx context.Context) error { +func (lm *LogFileManager) loadShiftTS(ctx context.Context) error { shiftTS := struct { sync.Mutex value uint64 exists bool }{} - err := stream.FastUnmarshalMetaData(ctx, rc.storage, rc.metadataDownloadBatchSize, func(path string, raw []byte) error { - m, err := rc.helper.ParseToMetadata(raw) + err := stream.FastUnmarshalMetaData(ctx, lm.storage, lm.metadataDownloadBatchSize, func(path string, raw []byte) error { + m, err := lm.helper.ParseToMetadata(raw) if err != nil { return err } log.Info("read meta from storage and parse", zap.String("path", path), zap.Uint64("min-ts", m.MinTs), zap.Uint64("max-ts", m.MaxTs), zap.Int32("meta-version", int32(m.MetaVersion))) - ts, ok := stream.UpdateShiftTS(m, rc.startTS, rc.restoreTS) + ts, ok := stream.UpdateShiftTS(m, lm.startTS, lm.restoreTS) shiftTS.Lock() if ok && (!shiftTS.exists || shiftTS.value > ts) { shiftTS.value = ts @@ -146,29 +149,29 @@ func (rc *LogFileManager) loadShiftTS(ctx context.Context) error { return err } if !shiftTS.exists { - rc.shiftStartTS = rc.startTS + lm.shiftStartTS = lm.startTS return nil } - rc.shiftStartTS = shiftTS.value + lm.shiftStartTS = shiftTS.value return nil } -func (rc *LogFileManager) streamingMeta(ctx context.Context) (MetaIter, error) { - return rc.streamingMetaByTS(ctx, rc.restoreTS) +func (lm *LogFileManager) streamingMeta(ctx context.Context) (MetaIter, error) { + return lm.streamingMetaByTS(ctx, lm.restoreTS) } -func (rc *LogFileManager) streamingMetaByTS(ctx context.Context, restoreTS uint64) (MetaIter, error) { - it, err := rc.createMetaIterOver(ctx, rc.storage) +func (lm *LogFileManager) streamingMetaByTS(ctx context.Context, restoreTS uint64) (MetaIter, error) { + it, err := lm.createMetaIterOver(ctx, lm.storage) if err != nil { return nil, err } filtered := iter.FilterOut(it, func(metadata *backuppb.Metadata) bool { - return restoreTS < metadata.MinTs || metadata.MaxTs < rc.shiftStartTS + return restoreTS < metadata.MinTs || metadata.MaxTs < lm.shiftStartTS }) return filtered, nil } -func (rc *LogFileManager) createMetaIterOver(ctx context.Context, s storage.ExternalStorage) (MetaIter, error) { +func (lm *LogFileManager) createMetaIterOver(ctx context.Context, s storage.ExternalStorage) (MetaIter, error) { opt := &storage.WalkOption{SubDir: stream.GetStreamBackupMetaPrefix()} names := []string{} err := s.WalkDir(ctx, opt, func(path string, size int64) error { @@ -187,7 +190,7 @@ func (rc *LogFileManager) createMetaIterOver(ctx context.Context, s storage.Exte if err != nil { return nil, errors.Annotatef(err, "failed during reading file %s", name) } - meta, err := rc.helper.ParseToMetadata(f) + meta, err := lm.helper.ParseToMetadata(f) if err != nil { return nil, errors.Annotatef(err, "failed to parse metadata of file %s", name) } @@ -196,11 +199,11 @@ func (rc *LogFileManager) createMetaIterOver(ctx context.Context, s storage.Exte // TODO: maybe we need to be able to adjust the concurrency to download files, // which currently is the same as the chunk size reader := iter.Transform(namesIter, readMeta, - iter.WithChunkSize(rc.metadataDownloadBatchSize), iter.WithConcurrency(rc.metadataDownloadBatchSize)) + iter.WithChunkSize(lm.metadataDownloadBatchSize), iter.WithConcurrency(lm.metadataDownloadBatchSize)) return reader, nil } -func (rc *LogFileManager) FilterDataFiles(ms MetaIter) LogIter { +func (lm *LogFileManager) FilterDataFiles(ms MetaIter) LogIter { return iter.FlatMap(ms, func(m *backuppb.Metadata) LogIter { return iter.FlatMap(iter.Enumerate(iter.FromSlice(m.FileGroups)), func(gi iter.Indexed[*backuppb.DataFileGroup]) LogIter { return iter.Map( @@ -209,7 +212,7 @@ func (rc *LogFileManager) FilterDataFiles(ms MetaIter) LogIter { if m.MetaVersion > backuppb.MetaVersion_V1 { di.Item.Path = gi.Item.Path } - return di.Item.IsMeta || rc.ShouldFilterOut(di.Item) + return di.Item.IsMeta || lm.ShouldFilterOutByTs(di.Item) }), func(di iter.Indexed[*backuppb.DataFileInfo]) *LogDataFileInfo { return &LogDataFileInfo{ @@ -228,14 +231,14 @@ func (rc *LogFileManager) FilterDataFiles(ms MetaIter) LogIter { }) } -// ShouldFilterOut checks whether a file should be filtered out via the current client. -func (rc *LogFileManager) ShouldFilterOut(d *backuppb.DataFileInfo) bool { - return d.MinTs > rc.restoreTS || - (d.Cf == stream.WriteCF && d.MaxTs < rc.startTS) || - (d.Cf == stream.DefaultCF && d.MaxTs < rc.shiftStartTS) +// ShouldFilterOutByTs checks whether a file should be filtered out via the current client. +func (lm *LogFileManager) ShouldFilterOutByTs(d *backuppb.DataFileInfo) bool { + return d.MinTs > lm.restoreTS || + (d.Cf == consts.WriteCF && d.MaxTs < lm.startTS) || + (d.Cf == consts.DefaultCF && d.MaxTs < lm.shiftStartTS) } -func (rc *LogFileManager) collectDDLFilesAndPrepareCache( +func (lm *LogFileManager) collectDDLFilesAndPrepareCache( ctx context.Context, files MetaGroupIter, ) ([]Log, error) { @@ -246,7 +249,7 @@ func (rc *LogFileManager) collectDDLFilesAndPrepareCache( dataFileInfos := make([]*backuppb.DataFileInfo, 0) for _, g := range fs.Item { - rc.helper.InitCacheEntry(g.Path, len(g.FileMetas)) + lm.helper.InitCacheEntry(g.Path, len(g.FileMetas)) dataFileInfos = append(dataFileInfos, g.FileMetas...) } @@ -256,8 +259,8 @@ func (rc *LogFileManager) collectDDLFilesAndPrepareCache( // LoadDDLFilesAndCountDMLFiles loads all DDL files needs to be restored in the restoration. // At the same time, if the `counter` isn't nil, counting the DML file needs to be restored into `counter`. // This function returns all DDL files needing directly because we need sort all of them. -func (rc *LogFileManager) LoadDDLFilesAndCountDMLFiles(ctx context.Context, counter *int) ([]Log, error) { - m, err := rc.streamingMeta(ctx) +func (lm *LogFileManager) LoadDDLFilesAndCountDMLFiles(ctx context.Context, counter *int) ([]Log, error) { + m, err := lm.streamingMeta(ctx) if err != nil { return nil, err } @@ -265,31 +268,31 @@ func (rc *LogFileManager) LoadDDLFilesAndCountDMLFiles(ctx context.Context, coun m = iter.Tap(m, func(m Meta) { for _, fg := range m.FileGroups { for _, f := range fg.DataFilesInfo { - if !f.IsMeta && !rc.ShouldFilterOut(f) { + if !f.IsMeta && !lm.ShouldFilterOutByTs(f) { *counter += 1 } } } }) } - mg := rc.FilterMetaFiles(m) + mg := lm.FilterMetaFiles(m) - return rc.collectDDLFilesAndPrepareCache(ctx, mg) + return lm.collectDDLFilesAndPrepareCache(ctx, mg) } // LoadDMLFiles loads all DML files needs to be restored in the restoration. // This function returns a stream, because there are usually many DML files need to be restored. -func (rc *LogFileManager) LoadDMLFiles(ctx context.Context) (LogIter, error) { - m, err := rc.streamingMeta(ctx) +func (lm *LogFileManager) LoadDMLFiles(ctx context.Context) (LogIter, error) { + m, err := lm.streamingMeta(ctx) if err != nil { return nil, err } - mg := rc.FilterDataFiles(m) + mg := lm.FilterDataFiles(m) return mg, nil } -func (rc *LogFileManager) FilterMetaFiles(ms MetaIter) MetaGroupIter { +func (lm *LogFileManager) FilterMetaFiles(ms MetaIter) MetaGroupIter { return iter.FlatMap(ms, func(m Meta) MetaGroupIter { return iter.Map(iter.FromSlice(m.FileGroups), func(g *backuppb.DataFileGroup) DDLMetaGroup { metas := iter.FilterOut(iter.FromSlice(g.DataFilesInfo), func(d Log) bool { @@ -297,7 +300,7 @@ func (rc *LogFileManager) FilterMetaFiles(ms MetaIter) MetaGroupIter { if m.MetaVersion > backuppb.MetaVersion_V1 { d.Path = g.Path } - return !d.IsMeta || rc.ShouldFilterOut(d) + return !d.IsMeta || lm.ShouldFilterOutByTs(d) }) return DDLMetaGroup{ Path: g.Path, @@ -324,17 +327,17 @@ func getKeyTS(key []byte) (uint64, error) { return ts, err } -// ReadAllEntries loads content of a log file, with filtering out no needed entries. -func (rc *LogFileManager) ReadAllEntries( +// ReadFilteredEntriesFromFiles loads content of a log file from external storage, and filter out entries based on TS. +func (lm *LogFileManager) ReadFilteredEntriesFromFiles( ctx context.Context, file Log, filterTS uint64, ) ([]*KvEntryWithTS, []*KvEntryWithTS, error) { kvEntries := make([]*KvEntryWithTS, 0) - nextKvEntries := make([]*KvEntryWithTS, 0) + filteredOutKvEntries := make([]*KvEntryWithTS, 0) - buff, err := rc.helper.ReadFile(ctx, file.Path, file.RangeOffset, file.RangeLength, file.CompressionType, - rc.storage, file.FileEncryptionInfo) + buff, err := lm.helper.ReadFile(ctx, file.Path, file.RangeOffset, file.RangeLength, file.CompressionType, + lm.storage, file.FileEncryptionInfo) if err != nil { return nil, nil, errors.Trace(err) } @@ -353,7 +356,7 @@ func (rc *LogFileManager) ReadAllEntries( txnEntry := kv.Entry{Key: iter.Key(), Value: iter.Value()} - if !stream.MaybeDBOrDDLJobHistoryKey(txnEntry.Key) { + if !utils.IsDBOrDDLJobHistoryKey(txnEntry.Key) { // only restore mDB and mDDLHistory continue } @@ -365,11 +368,11 @@ func (rc *LogFileManager) ReadAllEntries( // The commitTs in write CF need be limited on [startTs, restoreTs]. // We can restore more key-value in default CF. - if ts > rc.restoreTS { + if ts > lm.restoreTS { continue - } else if file.Cf == stream.WriteCF && ts < rc.startTS { + } else if file.Cf == consts.WriteCF && ts < lm.startTS { continue - } else if file.Cf == stream.DefaultCF && ts < rc.shiftStartTS { + } else if file.Cf == consts.DefaultCF && ts < lm.shiftStartTS { continue } @@ -385,9 +388,15 @@ func (rc *LogFileManager) ReadAllEntries( if ts < filterTS { kvEntries = append(kvEntries, &KvEntryWithTS{E: txnEntry, Ts: ts}) } else { - nextKvEntries = append(nextKvEntries, &KvEntryWithTS{E: txnEntry, Ts: ts}) + filteredOutKvEntries = append(filteredOutKvEntries, &KvEntryWithTS{E: txnEntry, Ts: ts}) } } - return kvEntries, nextKvEntries, nil + return kvEntries, filteredOutKvEntries, nil +} + +func (lm *LogFileManager) Close() { + if lm.helper != nil { + lm.helper.Close() + } } diff --git a/br/pkg/restore/log_client/log_file_manager_test.go b/br/pkg/restore/log_client/log_file_manager_test.go index 82fcf628d0139..bd3a0bb1e5774 100644 --- a/br/pkg/restore/log_client/log_file_manager_test.go +++ b/br/pkg/restore/log_client/log_file_manager_test.go @@ -618,7 +618,7 @@ func TestReadAllEntries(t *testing.T) { fm := logclient.TEST_NewLogFileManager(35, 75, 25, &logclient.FakeStreamMetadataHelper{Data: data}) { file.Cf = stream.WriteCF - kvEntries, nextKvEntries, err := fm.ReadAllEntries(ctx, file, 50) + kvEntries, nextKvEntries, err := fm.ReadFilteredEntriesFromFiles(ctx, file, 50) require.NoError(t, err) require.Equal(t, []*logclient.KvEntryWithTS{ encodekvEntryWithTS("mDDL", 37), @@ -631,7 +631,7 @@ func TestReadAllEntries(t *testing.T) { } { file.Cf = stream.DefaultCF - kvEntries, nextKvEntries, err := fm.ReadAllEntries(ctx, file, 50) + kvEntries, nextKvEntries, err := fm.ReadFilteredEntriesFromFiles(ctx, file, 50) require.NoError(t, err) require.Equal(t, []*logclient.KvEntryWithTS{ encodekvEntryWithTS("mDDL", 27), diff --git a/br/pkg/restore/snap_client/client.go b/br/pkg/restore/snap_client/client.go index 8e255c98743db..af5589171709d 100644 --- a/br/pkg/restore/snap_client/client.go +++ b/br/pkg/restore/snap_client/client.go @@ -183,7 +183,7 @@ func (rc *SnapClient) Close() { rc.closeConn() if err := rc.fileImporter.Close(); err != nil { - log.Warn("failed to close file improter") + log.Warn("failed to close file importer") } log.Info("Restore client closed") @@ -406,8 +406,8 @@ func makeDBPool(size uint, dbFactory func() (*tidallocdb.DB, error)) ([]*tidallo return dbPool, nil } -// Init create db connection and domain for storage. -func (rc *SnapClient) Init(g glue.Glue, store kv.Storage) error { +// InitConnections create db connection and domain for storage. +func (rc *SnapClient) InitConnections(g glue.Glue, store kv.Storage) error { // setDB must happen after set PolicyMode. // we will use policyMode to set session variables. var err error @@ -463,18 +463,18 @@ func (rc *SnapClient) initClients(ctx context.Context, backend *backuppb.Storage return errors.Trace(err) } -func (rc *SnapClient) needLoadSchemas(backupMeta *backuppb.BackupMeta) bool { +func needLoadSchemas(backupMeta *backuppb.BackupMeta) bool { return !(backupMeta.IsRawKv || backupMeta.IsTxnKv) } -// InitBackupMeta loads schemas from BackupMeta to initialize RestoreClient. -func (rc *SnapClient) InitBackupMeta( +// LoadBackupMetaAndInitClients loads schemas from BackupMeta to initialize SnapClient. +func (rc *SnapClient) LoadBackupMetaAndInitClients( c context.Context, backupMeta *backuppb.BackupMeta, backend *backuppb.StorageBackend, reader *metautil.MetaReader, loadStats bool) error { - if rc.needLoadSchemas(backupMeta) { + if needLoadSchemas(backupMeta) { databases, err := metautil.LoadBackupTables(c, reader, loadStats) if err != nil { return errors.Trace(err) @@ -580,6 +580,15 @@ func (rc *SnapClient) GetDatabases() []*metautil.Database { return dbs } +// GetDatabaseMap returns all databases in a map indexed by db id +func (rc *SnapClient) GetDatabaseMap() map[int64]*metautil.Database { + dbMap := make(map[int64]*metautil.Database) + for _, db := range rc.databases { + dbMap[db.Info.ID] = db + } + return dbMap +} + // HasBackedUpSysDB whether we have backed up system tables // br backs system tables up since 5.1.0 func (rc *SnapClient) HasBackedUpSysDB() bool { diff --git a/br/pkg/restore/snap_client/client_test.go b/br/pkg/restore/snap_client/client_test.go index 380e4421b68fd..99e63852c0f18 100644 --- a/br/pkg/restore/snap_client/client_test.go +++ b/br/pkg/restore/snap_client/client_test.go @@ -49,7 +49,7 @@ func TestCreateTables(t *testing.T) { m := mc g := gluetidb.New() client := snapclient.NewRestoreClient(m.PDClient, m.PDHTTPCli, nil, utiltest.DefaultTestKeepaliveCfg) - err := client.Init(g, m.Storage) + err := client.InitConnections(g, m.Storage) require.NoError(t, err) info, err := m.Domain.GetSnapshotInfoSchema(math.MaxUint64) @@ -120,7 +120,7 @@ func TestNeedCheckTargetClusterFresh(t *testing.T) { g := gluetidb.New() client := snapclient.NewRestoreClient(cluster.PDClient, cluster.PDHTTPCli, nil, utiltest.DefaultTestKeepaliveCfg) - err := client.Init(g, cluster.Storage) + err := client.InitConnections(g, cluster.Storage) require.NoError(t, err) // not set filter and first run with checkpoint @@ -150,7 +150,7 @@ func TestCheckTargetClusterFresh(t *testing.T) { g := gluetidb.New() client := snapclient.NewRestoreClient(cluster.PDClient, cluster.PDHTTPCli, nil, utiltest.DefaultTestKeepaliveCfg) - err := client.Init(g, cluster.Storage) + err := client.InitConnections(g, cluster.Storage) require.NoError(t, err) ctx := context.Background() @@ -167,7 +167,7 @@ func TestCheckTargetClusterFreshWithTable(t *testing.T) { g := gluetidb.New() client := snapclient.NewRestoreClient(cluster.PDClient, cluster.PDHTTPCli, nil, utiltest.DefaultTestKeepaliveCfg) - err := client.Init(g, cluster.Storage) + err := client.InitConnections(g, cluster.Storage) require.NoError(t, err) ctx := context.Background() @@ -202,7 +202,7 @@ func TestInitFullClusterRestore(t *testing.T) { cluster := mc g := gluetidb.New() client := snapclient.NewRestoreClient(cluster.PDClient, cluster.PDHTTPCli, nil, utiltest.DefaultTestKeepaliveCfg) - err := client.Init(g, cluster.Storage) + err := client.InitConnections(g, cluster.Storage) require.NoError(t, err) // explicit filter diff --git a/br/pkg/stream/BUILD.bazel b/br/pkg/stream/BUILD.bazel index e72ef472d26f6..22336b87101d3 100644 --- a/br/pkg/stream/BUILD.bazel +++ b/br/pkg/stream/BUILD.bazel @@ -10,7 +10,7 @@ go_library( "stream_metas.go", "stream_mgr.go", "stream_status.go", - "util.go", + "table_history.go", ], importpath = "github.com/pingcap/tidb/br/pkg/stream", visibility = ["//visibility:public"], @@ -24,6 +24,8 @@ go_library( "//br/pkg/restore/tiflashrec", "//br/pkg/storage", "//br/pkg/streamhelper", + "//br/pkg/utils", + "//br/pkg/utils/consts", "//br/pkg/utils/iter", "//pkg/ddl", "//pkg/kv", @@ -60,7 +62,6 @@ go_test( "search_test.go", "stream_metas_test.go", "stream_misc_test.go", - "util_test.go", ], embed = [":stream"], flaky = True, @@ -68,6 +69,8 @@ go_test( deps = [ "//br/pkg/storage", "//br/pkg/streamhelper", + "//br/pkg/utils", + "//br/pkg/utils/consts", "//pkg/ddl", "//pkg/meta", "//pkg/meta/model", @@ -85,7 +88,6 @@ go_test( "@com_github_pingcap_kvproto//pkg/brpb", "@com_github_pingcap_log//:log", "@com_github_stretchr_testify//require", - "@com_github_tikv_client_go_v2//oracle", "@org_golang_x_exp//maps", "@org_uber_go_zap//:zap", ], diff --git a/br/pkg/stream/meta_kv_test.go b/br/pkg/stream/meta_kv_test.go index 0ac5b54763022..9f64d69b3f5f6 100644 --- a/br/pkg/stream/meta_kv_test.go +++ b/br/pkg/stream/meta_kv_test.go @@ -6,18 +6,12 @@ import ( "bytes" "testing" + "github.com/pingcap/tidb/br/pkg/utils" "github.com/pingcap/tidb/pkg/meta" - "github.com/pingcap/tidb/pkg/tablecodec" "github.com/pingcap/tidb/pkg/util/codec" "github.com/stretchr/testify/require" ) -func encodeTxnMetaKey(key []byte, field []byte, ts uint64) []byte { - k := tablecodec.EncodeMetaKey(key, field) - txnKey := codec.EncodeBytes(nil, k) - return codec.EncodeUintDesc(txnKey, ts) -} - func TestRawMetaKeyForDB(t *testing.T) { var ( dbID int64 = 1 @@ -25,7 +19,7 @@ func TestRawMetaKeyForDB(t *testing.T) { mDbs = []byte("DBs") ) - txnKey := encodeTxnMetaKey(mDbs, meta.DBkey(dbID), ts) + txnKey := utils.EncodeTxnMetaKey(mDbs, meta.DBkey(dbID), ts) rawMetaKey, err := ParseTxnMetaKeyFrom(txnKey) require.NoError(t, err) @@ -44,7 +38,7 @@ func TestRawMetaKeyForTable(t *testing.T) { tableID int64 = 57 ts uint64 = 400036290571534337 ) - txnKey := encodeTxnMetaKey(meta.DBkey(dbID), meta.TableKey(tableID), ts) + txnKey := utils.EncodeTxnMetaKey(meta.DBkey(dbID), meta.TableKey(tableID), ts) rawMetakey, err := ParseTxnMetaKeyFrom(txnKey) require.NoError(t, err) diff --git a/br/pkg/stream/rewrite_meta_rawkv.go b/br/pkg/stream/rewrite_meta_rawkv.go index e0b7ac6252958..60740e10a416e 100644 --- a/br/pkg/stream/rewrite_meta_rawkv.go +++ b/br/pkg/stream/rewrite_meta_rawkv.go @@ -25,6 +25,8 @@ import ( berrors "github.com/pingcap/tidb/br/pkg/errors" "github.com/pingcap/tidb/br/pkg/restore/ingestrec" "github.com/pingcap/tidb/br/pkg/restore/tiflashrec" + "github.com/pingcap/tidb/br/pkg/utils" + "github.com/pingcap/tidb/br/pkg/utils/consts" "github.com/pingcap/tidb/pkg/ddl" "github.com/pingcap/tidb/pkg/kv" "github.com/pingcap/tidb/pkg/meta" @@ -33,12 +35,6 @@ import ( "go.uber.org/zap" ) -// Default columnFamily and write columnFamily -const ( - DefaultCF = "default" - WriteCF = "write" -) - type RewriteStatus int const ( @@ -49,7 +45,7 @@ const ( type UpstreamID = int64 type DownstreamID = int64 -// TableReplace specifies table information mapping from up-stream cluster to up-stream cluster. +// TableReplace specifies table information mapping from up-stream cluster to down-stream cluster. type TableReplace struct { Name string TableID DownstreamID @@ -57,17 +53,17 @@ type TableReplace struct { IndexMap map[UpstreamID]DownstreamID } -// DBReplace specifies database information mapping from up-stream cluster to up-stream cluster. +// DBReplace specifies database information mapping from up-stream cluster to down-stream cluster. type DBReplace struct { Name string DbID DownstreamID TableMap map[UpstreamID]*TableReplace } -// SchemasReplace specifies schemas information mapping from up-stream cluster to up-stream cluster. +// SchemasReplace specifies schemas information mapping from up-stream cluster to down-stream cluster. type SchemasReplace struct { status RewriteStatus - DbMap map[UpstreamID]*DBReplace + DbReplaceMap map[UpstreamID]*DBReplace globalTableIdMap map[UpstreamID]DownstreamID needConstructIdMap bool @@ -76,11 +72,15 @@ type SchemasReplace struct { TiflashRecorder *tiflashrec.TiFlashRecorder RewriteTS uint64 // used to rewrite commit ts in meta kv. TableFilter filter.Filter // used to filter schema/table + // generated from at snapshot restore phase, it contains all the tables and dbs id that need to restore during log restore + // if pitr doesn't run snapshot restore, might due to a restart same task etc, this filter will be empty and + // all filtered table information is already piggybacked on dbReplaceMap and persisted in cluster. + PiTRTableFilter *utils.PiTRTableFilter genGenGlobalID func(ctx context.Context) (int64, error) genGenGlobalIDs func(ctx context.Context, n int) ([]int64, error) - AfterTableRewritten func(deleted bool, tableInfo *model.TableInfo) + AfterTableRewrittenFunc func(deleted bool, tableInfo *model.TableInfo) } // NewTableReplace creates a TableReplace struct. @@ -104,17 +104,18 @@ func NewDBReplace(name string, newID DownstreamID) *DBReplace { // NewSchemasReplace creates a SchemasReplace struct. func NewSchemasReplace( - dbMap map[UpstreamID]*DBReplace, + dbReplaceMap map[UpstreamID]*DBReplace, needConstructIdMap bool, tiflashRecorder *tiflashrec.TiFlashRecorder, restoreTS uint64, tableFilter filter.Filter, + piTRTableFilter *utils.PiTRTableFilter, genID func(ctx context.Context) (int64, error), genIDs func(ctx context.Context, n int) ([]int64, error), recordDeleteRange func(*PreDelRangeQuery), ) *SchemasReplace { globalTableIdMap := make(map[UpstreamID]DownstreamID) - for _, dr := range dbMap { + for _, dr := range dbReplaceMap { for tblID, tr := range dr.TableMap { globalTableIdMap[tblID] = tr.TableID for oldpID, newpID := range tr.PartitionMap { @@ -124,7 +125,7 @@ func NewSchemasReplace( } return &SchemasReplace{ - DbMap: dbMap, + DbReplaceMap: dbReplaceMap, globalTableIdMap: globalTableIdMap, needConstructIdMap: needConstructIdMap, delRangeRecorder: newDelRangeExecWrapper(globalTableIdMap, recordDeleteRange), @@ -132,6 +133,7 @@ func NewSchemasReplace( TiflashRecorder: tiflashRecorder, RewriteTS: restoreTS, TableFilter: tableFilter, + PiTRTableFilter: piTRTableFilter, genGenGlobalID: genID, genGenGlobalIDs: genIDs, } @@ -143,9 +145,9 @@ func (sr *SchemasReplace) NeedConstructIdMap() bool { // TidySchemaMaps produces schemas id maps from up-stream to down-stream. func (sr *SchemasReplace) TidySchemaMaps() []*backuppb.PitrDBMap { - dbMaps := make([]*backuppb.PitrDBMap, 0, len(sr.DbMap)) + dbMaps := make([]*backuppb.PitrDBMap, 0, len(sr.DbReplaceMap)) - for dbID, dr := range sr.DbMap { + for dbID, dr := range sr.DbReplaceMap { dbm := backuppb.PitrDBMap{ Name: dr.Name, IdMap: &backuppb.IDMap{ @@ -231,24 +233,24 @@ func (sr *SchemasReplace) rewriteKeyForDB(key []byte, cf string) ([]byte, error) } if sr.IsPreConsturctMapStatus() { - if _, exist := sr.DbMap[dbID]; !exist { + if _, exist := sr.DbReplaceMap[dbID]; !exist { newID, err := sr.genGenGlobalID(context.Background()) if err != nil { return nil, errors.Trace(err) } - sr.DbMap[dbID] = NewDBReplace("", newID) + sr.DbReplaceMap[dbID] = NewDBReplace("", newID) sr.globalTableIdMap[dbID] = newID } return nil, nil } - dbMap, exist := sr.DbMap[dbID] + dbMap, exist := sr.DbReplaceMap[dbID] if !exist { return nil, errors.Annotatef(berrors.ErrInvalidArgument, "failed to find id:%v in maps", dbID) } rawMetaKey.UpdateField(meta.DBkey(dbMap.DbID)) - if cf == WriteCF { + if cf == consts.WriteCF { rawMetaKey.UpdateTS(sr.RewriteTS) } return rawMetaKey.EncodeMetaKey(), nil @@ -260,20 +262,28 @@ func (sr *SchemasReplace) rewriteDBInfo(value []byte) ([]byte, error) { return nil, errors.Trace(err) } + shouldProcess, err := sr.shouldProcessDB(dbInfo.ID) + if err != nil { + return nil, errors.Trace(err) + } + if !shouldProcess { + return nil, nil + } + if sr.IsPreConsturctMapStatus() { - if dr, exist := sr.DbMap[dbInfo.ID]; !exist { + if dr, exist := sr.DbReplaceMap[dbInfo.ID]; !exist { newID, err := sr.genGenGlobalID(context.Background()) if err != nil { return nil, errors.Trace(err) } - sr.DbMap[dbInfo.ID] = NewDBReplace(dbInfo.Name.O, newID) + sr.DbReplaceMap[dbInfo.ID] = NewDBReplace(dbInfo.Name.O, newID) } else { dr.Name = dbInfo.Name.O } return nil, nil } - dbMap, exist := sr.DbMap[dbInfo.ID] + dbMap, exist := sr.DbReplaceMap[dbInfo.ID] if !exist { return nil, errors.Annotatef(berrors.ErrInvalidArgument, "failed to find id:%v in maps", dbInfo.ID) } @@ -299,6 +309,11 @@ func (sr *SchemasReplace) rewriteEntryForDB(e *kv.Entry, cf string) (*kv.Entry, } newValue := r.NewValue + // DB is filtered out thus returning nil value + if newValue == nil { + return nil, nil + } + newKey, err := sr.rewriteKeyForDB(e.Key, cf) if err != nil { return nil, errors.Trace(err) @@ -340,7 +355,17 @@ func (sr *SchemasReplace) rewriteKeyForTable( return nil, errors.Trace(err) } - dbReplace, exist := sr.DbMap[dbID] + shouldProcess, err := sr.shouldProcessTable(dbID, tableID) + log.Info("############# should process table for table key", zap.Int64("tableID", tableID), zap.Bool("shouldProcess", shouldProcess)) + if err != nil { + return nil, errors.Trace(err) + } + if !shouldProcess { + return nil, nil + } + + log.Info("############# processing", zap.Int64("tableID", tableID)) + dbReplace, exist := sr.DbReplaceMap[dbID] if !exist { if !sr.IsPreConsturctMapStatus() { return nil, errors.Annotatef(berrors.ErrInvalidArgument, "failed to find id:%v in maps", dbID) @@ -350,7 +375,7 @@ func (sr *SchemasReplace) rewriteKeyForTable( return nil, errors.Trace(err) } dbReplace = NewDBReplace("", newID) - sr.DbMap[dbID] = dbReplace + sr.DbReplaceMap[dbID] = dbReplace } tableReplace, exist := dbReplace.TableMap[tableID] @@ -375,10 +400,11 @@ func (sr *SchemasReplace) rewriteKeyForTable( if sr.IsPreConsturctMapStatus() { return nil, nil } - + log.Info("############## rewritting table db", zap.Int64("dbId", dbID), zap.Int64("to", dbReplace.DbID)) + log.Info("############## rewritting table", zap.Int64("tableID", tableID), zap.Int64("to", tableReplace.TableID)) rawMetaKey.UpdateKey(meta.DBkey(dbReplace.DbID)) rawMetaKey.UpdateField(encodeField(tableReplace.TableID)) - if cf == WriteCF { + if cf == consts.WriteCF { rawMetaKey.UpdateTS(sr.RewriteTS) } return rawMetaKey.EncodeMetaKey(), nil @@ -396,8 +422,18 @@ func (sr *SchemasReplace) rewriteTableInfo(value []byte, dbID int64) ([]byte, er return nil, errors.Trace(err) } + shouldProcess, err := sr.shouldProcessTable(dbID, tableInfo.ID) + log.Info("############# should process table for table key", zap.Int64("dbID", dbID), zap.Int64("tableID", tableInfo.ID), zap.Bool("shouldProcess", shouldProcess)) + + if err != nil { + return nil, errors.Trace(err) + } + if !shouldProcess { + return nil, nil + } + log.Info("############# processing ", zap.Int64("tableID", tableInfo.ID)) // construct or find the id map. - dbReplace, exist = sr.DbMap[dbID] + dbReplace, exist = sr.DbReplaceMap[dbID] if !exist { if sr.IsRestoreKVStatus() { return nil, errors.Annotatef(berrors.ErrInvalidArgument, "failed to find id:%v in maps", dbID) @@ -408,7 +444,7 @@ func (sr *SchemasReplace) rewriteTableInfo(value []byte, dbID int64) ([]byte, er return nil, errors.Trace(err) } dbReplace = NewDBReplace("", newID) - sr.DbMap[dbID] = dbReplace + sr.DbReplaceMap[dbID] = dbReplace } tableReplace, exist = dbReplace.TableMap[tableInfo.ID] @@ -465,8 +501,8 @@ func (sr *SchemasReplace) rewriteTableInfo(value []byte, dbID int64) ([]byte, er if tableInfo.TTLInfo != nil { tableInfo.TTLInfo.Enable = false } - if sr.AfterTableRewritten != nil { - sr.AfterTableRewritten(false, &tableInfo) + if sr.AfterTableRewrittenFunc != nil { + sr.AfterTableRewrittenFunc(false, &tableInfo) } // marshal to json @@ -494,6 +530,12 @@ func (sr *SchemasReplace) rewriteEntryForTable(e *kv.Entry, cf string) (*kv.Entr return nil, errors.Trace(err) } + // entry is filtered out, no need to process + if result.NewValue == nil { + log.Info("############## filtered out", zap.Any("db id", dbID)) + return nil, nil + } + var newTableID int64 = 0 newKey, err := sr.rewriteKeyForTable(e.Key, cf, meta.ParseTableKey, func(tableID int64) []byte { newTableID = tableID @@ -510,8 +552,8 @@ func (sr *SchemasReplace) rewriteEntryForTable(e *kv.Entry, cf string) (*kv.Entr // for now, we rewrite key and value separately hence we cannot // get a view of (is_delete, table_id, table_info) at the same time :(. // Maybe we can extract the rewrite part from rewriteTableInfo. - if result.Deleted && sr.AfterTableRewritten != nil { - sr.AfterTableRewritten(true, &model.TableInfo{ID: newTableID}) + if result.Deleted && sr.AfterTableRewrittenFunc != nil { + sr.AfterTableRewrittenFunc(true, &model.TableInfo{ID: newTableID}) } return &kv.Entry{Key: newKey, Value: result.NewValue}, nil @@ -542,6 +584,11 @@ func (sr *SchemasReplace) rewriteEntryForAutoTableIDKey(e *kv.Entry, cf string) return nil, errors.Trace(err) } + // entry is filtered out + if newKey == nil { + return nil, nil + } + return &kv.Entry{Key: newKey, Value: e.Value}, nil } @@ -556,6 +603,11 @@ func (sr *SchemasReplace) rewriteEntryForSequenceKey(e *kv.Entry, cf string) (*k return nil, errors.Trace(err) } + // entry is filtered out + if newKey == nil { + return nil, nil + } + return &kv.Entry{Key: newKey, Value: e.Value}, nil } @@ -570,6 +622,11 @@ func (sr *SchemasReplace) rewriteEntryForAutoRandomTableIDKey(e *kv.Entry, cf st return nil, errors.Trace(err) } + // entry is filtered out + if newKey == nil { + return nil, nil + } + return &kv.Entry{Key: newKey, Value: e.Value}, nil } @@ -579,10 +636,10 @@ type rewriteResult struct { } // rewriteValue rewrite the value if cf is "default", or rewrite the shortValue if cf is "write". -func (sr *SchemasReplace) rewriteValue(value []byte, cf string, rewrite func([]byte) ([]byte, error)) (rewriteResult, error) { +func (sr *SchemasReplace) rewriteValue(value []byte, cf string, rewriteFunc func([]byte) ([]byte, error)) (rewriteResult, error) { switch cf { - case DefaultCF: - newValue, err := rewrite(value) + case consts.DefaultCF: + newValue, err := rewriteFunc(value) if err != nil { return rewriteResult{}, errors.Trace(err) } @@ -590,7 +647,7 @@ func (sr *SchemasReplace) rewriteValue(value []byte, cf string, rewrite func([]b NewValue: newValue, Deleted: false, }, nil - case WriteCF: + case consts.WriteCF: rawWriteCFValue := new(RawWriteCFValue) if err := rawWriteCFValue.ParseFrom(value); err != nil { return rewriteResult{}, errors.Trace(err) @@ -614,7 +671,7 @@ func (sr *SchemasReplace) rewriteValue(value []byte, cf string, rewrite func([]b }, nil } - shortValue, err := rewrite(rawWriteCFValue.GetShortValue()) + shortValue, err := rewriteFunc(rawWriteCFValue.GetShortValue()) if err != nil { log.Info("failed to rewrite short value", zap.ByteString("write-type", []byte{rawWriteCFValue.GetWriteType()}), @@ -633,11 +690,13 @@ func (sr *SchemasReplace) GetIngestRecorder() *ingestrec.IngestRecorder { return sr.ingestRecorder } -// RewriteKvEntry uses to rewrite tableID/dbID in entry.key and entry.value -func (sr *SchemasReplace) RewriteKvEntry(e *kv.Entry, cf string) (*kv.Entry, error) { - // skip mDDLJob - if !IsMetaDBKey(e.Key) { - if sr.IsRestoreKVStatus() && cf == DefaultCF && IsMetaDDLJobHistoryKey(e.Key) { // mDDLJobHistory +// RewriteMetaKvEntry uses to rewrite tableID/dbID in entry.key and entry.value +// TODO: decouple rewrite with build id map functionality +func (sr *SchemasReplace) RewriteMetaKvEntry(e *kv.Entry, cf string) (*kv.Entry, error) { + if !utils.IsMetaDBKey(e.Key) { + // need to special handle ddl job history during actual restore phase. The job history contains index ingestion + // and range deletion that need to be handled separately after restore. + if sr.IsRestoreKVStatus() && cf == consts.DefaultCF && utils.IsMetaDDLJobHistoryKey(e.Key) { // mDDLJobHistory job := &model.Job{} if err := job.Decode(e.Value); err != nil { log.Debug("failed to decode the job", @@ -647,7 +706,7 @@ func (sr *SchemasReplace) RewriteKvEntry(e *kv.Entry, cf string) (*kv.Entry, err return nil, nil } - return nil, sr.restoreFromHistory(job) + return nil, sr.processIngestIndexAndDeleteRangeFromJob(job) } return nil, nil } @@ -662,6 +721,7 @@ func (sr *SchemasReplace) RewriteKvEntry(e *kv.Entry, cf string) (*kv.Entry, err } else if !meta.IsDBkey(rawKey.Key) { return nil, nil } + if meta.IsTableKey(rawKey.Field) { return sr.rewriteEntryForTable(e, cf) } else if meta.IsAutoIncrementIDKey(rawKey.Field) { @@ -691,7 +751,10 @@ func (sr *SchemasReplace) tryRecordIngestIndex(job *model.Job) error { return nil } -func (sr *SchemasReplace) restoreFromHistory(job *model.Job) error { +// processIngestIndexAndDeleteRangeFromJob handles two special cases during log backup meta key replay. +// 1. index ingest is not captured by the log backup, thus we need to restore them manually later +// 2. delete range also needs to be handled to clean up dropped table since it was previously relying on GC to clean it up +func (sr *SchemasReplace) processIngestIndexAndDeleteRangeFromJob(job *model.Job) error { if ddl.JobNeedGC(job) { if err := ddl.AddDelRangeJobInternal(context.TODO(), sr.delRangeRecorder, job); err != nil { return err @@ -701,6 +764,36 @@ func (sr *SchemasReplace) restoreFromHistory(job *model.Job) error { return sr.tryRecordIngestIndex(job) } +func (sr *SchemasReplace) shouldProcessDB(dbId int64) (bool, error) { + if sr.IsPreConsturctMapStatus() { + if sr.PiTRTableFilter == nil { + //return false, errors.Annotate(berrors.ErrRestoreInvalidRewrite, "expecting pitr table filter but got none") + return true, nil + } + return sr.PiTRTableFilter.ContainsDB(dbId), nil + } else { + _, ok := sr.DbReplaceMap[dbId] + return ok, nil + } +} + +func (sr *SchemasReplace) shouldProcessTable(dbId, tableId int64) (bool, error) { + if sr.IsPreConsturctMapStatus() { + if sr.PiTRTableFilter == nil { + //return false, errors.Annotate(berrors.ErrRestoreInvalidRewrite, "expecting pitr table filter but got none") + return true, nil + } + return sr.PiTRTableFilter.ContainsTable(dbId, tableId), nil + } else { + tableReplace, ok := sr.DbReplaceMap[dbId] + if !ok { + return false, nil + } + _, ok = tableReplace.TableMap[tableId] + return ok, nil + } +} + type DelRangeParams struct { JobID int64 ElemID int64 diff --git a/br/pkg/stream/rewrite_meta_rawkv_test.go b/br/pkg/stream/rewrite_meta_rawkv_test.go index fa8e1cf84bb2b..5dd143617b599 100644 --- a/br/pkg/stream/rewrite_meta_rawkv_test.go +++ b/br/pkg/stream/rewrite_meta_rawkv_test.go @@ -8,6 +8,8 @@ import ( "encoding/json" "testing" + "github.com/pingcap/tidb/br/pkg/utils" + "github.com/pingcap/tidb/br/pkg/utils/consts" "github.com/pingcap/tidb/pkg/ddl" "github.com/pingcap/tidb/pkg/meta" "github.com/pingcap/tidb/pkg/meta/model" @@ -40,6 +42,7 @@ func MockEmptySchemasReplace(midr *mockInsertDeleteRange, dbMap map[UpstreamID]* nil, 9527, filter.All(), + nil, mockGenGenGlobalID, nil, midr.mockRecordDeleteRange, @@ -84,7 +87,7 @@ func TestTidySchemaMaps(t *testing.T) { drs[oldDBID] = dr // create schemas replace and test TidySchemaMaps(). - sr := NewSchemasReplace(drs, true, nil, 0, filter.All(), nil, nil, nil) + sr := NewSchemasReplace(drs, true, nil, 0, filter.All(), nil, nil, nil, nil) globalTableIdMap := sr.globalTableIdMap require.Equal(t, len(globalTableIdMap), 3) require.Equal(t, globalTableIdMap[oldTblID], newTblID) @@ -128,22 +131,22 @@ func TestRewriteKeyForDB(t *testing.T) { mDbs = []byte("DBs") ) - encodedKey := encodeTxnMetaKey(mDbs, meta.DBkey(dbID), ts) + encodedKey := utils.EncodeTxnMetaKey(mDbs, meta.DBkey(dbID), ts) // create schemasReplace. sr := MockEmptySchemasReplace(nil, nil) // preConstruct Map information. sr.SetPreConstructMapStatus() - newKey, err := sr.rewriteKeyForDB(encodedKey, WriteCF) + newKey, err := sr.rewriteKeyForDB(encodedKey, consts.WriteCF) require.Nil(t, err) require.Nil(t, newKey) - require.Equal(t, len(sr.DbMap[dbID].TableMap), 0) - downID := sr.DbMap[dbID].DbID + require.Equal(t, len(sr.DbReplaceMap[dbID].TableMap), 0) + downID := sr.DbReplaceMap[dbID].DbID // set restoreKV status and rewrite it. sr.SetRestoreKVStatus() - newKey, err = sr.rewriteKeyForDB(encodedKey, DefaultCF) + newKey, err = sr.rewriteKeyForDB(encodedKey, consts.DefaultCF) require.Nil(t, err) decodedKey, err := ParseTxnMetaKeyFrom(newKey) require.Nil(t, err) @@ -153,7 +156,7 @@ func TestRewriteKeyForDB(t *testing.T) { require.Equal(t, newDBID, downID) // rewrite it again, and get the same result. - newKey, err = sr.rewriteKeyForDB(encodedKey, WriteCF) + newKey, err = sr.rewriteKeyForDB(encodedKey, consts.WriteCF) require.Nil(t, err) decodedKey, err = ParseTxnMetaKeyFrom(newKey) require.Nil(t, err) @@ -186,7 +189,7 @@ func TestRewriteDBInfo(t *testing.T) { newValue, err := sr.rewriteDBInfo(value) require.Nil(t, err) require.Nil(t, newValue) - dr := sr.DbMap[dbID] + dr := sr.DbReplaceMap[dbID] require.Equal(t, dr.Name, dbName) // set restoreKV status and rewrite it. @@ -195,16 +198,16 @@ func TestRewriteDBInfo(t *testing.T) { require.Nil(t, err) err = json.Unmarshal(newValue, &DBInfo) require.Nil(t, err) - require.Equal(t, DBInfo.ID, sr.DbMap[dbID].DbID) + require.Equal(t, DBInfo.ID, sr.DbReplaceMap[dbID].DbID) // rewrite agagin, and get the same result. - newId := sr.DbMap[dbID].DbID + newId := sr.DbReplaceMap[dbID].DbID newValue, err = sr.rewriteDBInfo(value) require.Nil(t, err) err = json.Unmarshal(newValue, &DBInfo) require.Nil(t, err) - require.Equal(t, DBInfo.ID, sr.DbMap[dbID].DbID) - require.Equal(t, newId, sr.DbMap[dbID].DbID) + require.Equal(t, DBInfo.ID, sr.DbReplaceMap[dbID].DbID) + require.Equal(t, newId, sr.DbReplaceMap[dbID].DbID) } func TestRewriteKeyForTable(t *testing.T) { @@ -240,23 +243,23 @@ func TestRewriteKeyForTable(t *testing.T) { } for _, ca := range cases { - encodedKey := encodeTxnMetaKey(meta.DBkey(dbID), ca.encodeTableFn(tableID), ts) + encodedKey := utils.EncodeTxnMetaKey(meta.DBkey(dbID), ca.encodeTableFn(tableID), ts) // create schemasReplace. sr := MockEmptySchemasReplace(nil, nil) // set preConstruct status and construct map information. sr.SetPreConstructMapStatus() - newKey, err := sr.rewriteKeyForTable(encodedKey, WriteCF, ca.decodeTableFn, ca.encodeTableFn) + newKey, err := sr.rewriteKeyForTable(encodedKey, consts.WriteCF, ca.decodeTableFn, ca.encodeTableFn) require.Nil(t, err) require.Nil(t, newKey) - require.Equal(t, len(sr.DbMap), 1) - require.Equal(t, len(sr.DbMap[dbID].TableMap), 1) - downStreamDbID := sr.DbMap[dbID].DbID - downStreamTblID := sr.DbMap[dbID].TableMap[tableID].TableID + require.Equal(t, len(sr.DbReplaceMap), 1) + require.Equal(t, len(sr.DbReplaceMap[dbID].TableMap), 1) + downStreamDbID := sr.DbReplaceMap[dbID].DbID + downStreamTblID := sr.DbReplaceMap[dbID].TableMap[tableID].TableID // set restoreKV status and rewrite it. sr.SetRestoreKVStatus() - newKey, err = sr.rewriteKeyForTable(encodedKey, DefaultCF, ca.decodeTableFn, ca.encodeTableFn) + newKey, err = sr.rewriteKeyForTable(encodedKey, consts.DefaultCF, ca.decodeTableFn, ca.encodeTableFn) require.Nil(t, err) decodedKey, err := ParseTxnMetaKeyFrom(newKey) require.Nil(t, err) @@ -270,7 +273,7 @@ func TestRewriteKeyForTable(t *testing.T) { require.Equal(t, newTblID, downStreamTblID) // rewrite it again, and get the same result. - newKey, err = sr.rewriteKeyForTable(encodedKey, WriteCF, ca.decodeTableFn, ca.encodeTableFn) + newKey, err = sr.rewriteKeyForTable(encodedKey, consts.WriteCF, ca.decodeTableFn, ca.encodeTableFn) require.Nil(t, err) decodedKey, err = ParseTxnMetaKeyFrom(newKey) require.Nil(t, err) @@ -299,7 +302,7 @@ func TestRewriteTableInfo(t *testing.T) { // create schemasReplace. sr := MockEmptySchemasReplace(nil, nil) tableCount := 0 - sr.AfterTableRewritten = func(deleted bool, tableInfo *model.TableInfo) { + sr.AfterTableRewrittenFunc = func(deleted bool, tableInfo *model.TableInfo) { tableCount++ tableInfo.TiFlashReplica = &model.TiFlashReplicaInfo{ Count: 1, @@ -323,17 +326,17 @@ func TestRewriteTableInfo(t *testing.T) { require.Nil(t, err) err = json.Unmarshal(newValue, &tableInfo) require.Nil(t, err) - require.Equal(t, tableInfo.ID, sr.DbMap[dbId].TableMap[tableID].TableID) + require.Equal(t, tableInfo.ID, sr.DbReplaceMap[dbId].TableMap[tableID].TableID) require.EqualValues(t, tableInfo.TiFlashReplica.Count, 1) // rewrite it again and get the same result. - newID := sr.DbMap[dbId].TableMap[tableID].TableID + newID := sr.DbReplaceMap[dbId].TableMap[tableID].TableID newValue, err = sr.rewriteTableInfo(value, dbId) require.Nil(t, err) err = json.Unmarshal(newValue, &tableInfo) require.Nil(t, err) - require.Equal(t, tableInfo.ID, sr.DbMap[dbId].TableMap[tableID].TableID) - require.Equal(t, newID, sr.DbMap[dbId].TableMap[tableID].TableID) + require.Equal(t, tableInfo.ID, sr.DbReplaceMap[dbId].TableMap[tableID].TableID) + require.Equal(t, newID, sr.DbReplaceMap[dbId].TableMap[tableID].TableID) require.EqualValues(t, tableCount, 2) } @@ -388,11 +391,11 @@ func TestRewriteTableInfoForPartitionTable(t *testing.T) { err = json.Unmarshal(newValue, &tableInfo) require.Nil(t, err) require.Equal(t, tableInfo.Name.String(), tableName) - require.Equal(t, tableInfo.ID, sr.DbMap[dbId].TableMap[tableID].TableID) + require.Equal(t, tableInfo.ID, sr.DbReplaceMap[dbId].TableMap[tableID].TableID) require.Equal( t, tableInfo.Partition.Definitions[0].ID, - sr.DbMap[dbId].TableMap[tableID].PartitionMap[pt1ID], + sr.DbReplaceMap[dbId].TableMap[tableID].PartitionMap[pt1ID], ) require.Equal( t, @@ -402,7 +405,7 @@ func TestRewriteTableInfoForPartitionTable(t *testing.T) { require.Equal( t, tableInfo.Partition.Definitions[1].ID, - sr.DbMap[dbId].TableMap[tableID].PartitionMap[pt2ID], + sr.DbReplaceMap[dbId].TableMap[tableID].PartitionMap[pt2ID], ) require.Equal( t, @@ -411,8 +414,8 @@ func TestRewriteTableInfoForPartitionTable(t *testing.T) { ) // rewrite it aggin, and get the same result. - newID1 := sr.DbMap[dbId].TableMap[tableID].PartitionMap[pt1ID] - newID2 := sr.DbMap[dbId].TableMap[tableID].PartitionMap[pt2ID] + newID1 := sr.DbReplaceMap[dbId].TableMap[tableID].PartitionMap[pt1ID] + newID2 := sr.DbReplaceMap[dbId].TableMap[tableID].PartitionMap[pt2ID] newValue, err = sr.rewriteTableInfo(value, dbId) require.Nil(t, err) @@ -422,13 +425,13 @@ func TestRewriteTableInfoForPartitionTable(t *testing.T) { require.Equal( t, tableInfo.Partition.Definitions[0].ID, - sr.DbMap[dbId].TableMap[tableID].PartitionMap[pt1ID], + sr.DbReplaceMap[dbId].TableMap[tableID].PartitionMap[pt1ID], ) require.Equal(t, tableInfo.Partition.Definitions[0].ID, newID1) require.Equal( t, tableInfo.Partition.Definitions[1].ID, - sr.DbMap[dbId].TableMap[tableID].PartitionMap[pt2ID], + sr.DbReplaceMap[dbId].TableMap[tableID].PartitionMap[pt2ID], ) require.Equal(t, tableInfo.Partition.Definitions[1].ID, newID2) } @@ -494,6 +497,7 @@ func TestRewriteTableInfoForExchangePartition(t *testing.T) { nil, 0, filter.All(), + nil, mockGenGenGlobalID, nil, nil, @@ -573,7 +577,7 @@ func TestRewriteTableInfoForTTLTable(t *testing.T) { err = json.Unmarshal(newValue, &tableInfo) require.Nil(t, err) require.Equal(t, tableInfo.Name.String(), tableName) - require.Equal(t, tableInfo.ID, sr.DbMap[dbId].TableMap[tableID].TableID) + require.Equal(t, tableInfo.ID, sr.DbReplaceMap[dbId].TableMap[tableID].TableID) require.NotNil(t, tableInfo.TTLInfo) require.Equal(t, colName, tableInfo.TTLInfo.ColumnName.O) require.Equal(t, "1", tableInfo.TTLInfo.IntervalExprStr) @@ -802,7 +806,7 @@ func TestDeleteRangeForMDDLJob(t *testing.T) { var qargs *PreDelRangeQuery // drop schema - err := schemaReplace.restoreFromHistory(dropSchemaJob) + err := schemaReplace.processIngestIndexAndDeleteRangeFromJob(dropSchemaJob) require.NoError(t, err) qargs = <-midr.queryCh require.Equal(t, len(qargs.ParamsList), len(mDDLJobALLNewTableIDSet)) @@ -812,7 +816,7 @@ func TestDeleteRangeForMDDLJob(t *testing.T) { } // drop table0 - err = schemaReplace.restoreFromHistory(dropTable0Job) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(dropTable0Job) require.NoError(t, err) qargs = <-midr.queryCh require.Equal(t, len(qargs.ParamsList), len(mDDLJobALLNewPartitionIDSet)) @@ -825,42 +829,42 @@ func TestDeleteRangeForMDDLJob(t *testing.T) { require.Equal(t, qargs.ParamsList[0].StartKey, encodeTableKey(mDDLJobTable0NewID)) // drop table1 - err = schemaReplace.restoreFromHistory(dropTable1Job) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(dropTable1Job) require.NoError(t, err) qargs = <-midr.queryCh require.Equal(t, len(qargs.ParamsList), 1) require.Equal(t, qargs.ParamsList[0].StartKey, encodeTableKey(mDDLJobTable1NewID)) // drop table partition1 - err = schemaReplace.restoreFromHistory(dropTable0Partition1Job) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(dropTable0Partition1Job) require.NoError(t, err) qargs = <-midr.queryCh require.Equal(t, len(qargs.ParamsList), 1) require.Equal(t, qargs.ParamsList[0].StartKey, encodeTableKey(mDDLJobPartition1NewID)) // reorganize table partition1 - err = schemaReplace.restoreFromHistory(reorganizeTable0Partition1Job) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(reorganizeTable0Partition1Job) require.NoError(t, err) qargs = <-midr.queryCh require.Equal(t, len(qargs.ParamsList), 1) require.Equal(t, encodeTableKey(mDDLJobPartition1NewID), qargs.ParamsList[0].StartKey) // remove table partition1 - err = schemaReplace.restoreFromHistory(removeTable0Partition1Job) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(removeTable0Partition1Job) require.NoError(t, err) qargs = <-midr.queryCh require.Equal(t, len(qargs.ParamsList), 1) require.Equal(t, encodeTableKey(mDDLJobPartition1NewID), qargs.ParamsList[0].StartKey) // alter table partition1 - err = schemaReplace.restoreFromHistory(alterTable0Partition1Job) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(alterTable0Partition1Job) require.NoError(t, err) qargs = <-midr.queryCh require.Equal(t, len(qargs.ParamsList), 1) require.Equal(t, encodeTableKey(mDDLJobPartition1NewID), qargs.ParamsList[0].StartKey) // roll back add index for table0 - err = schemaReplace.restoreFromHistory(rollBackTable0IndexJob) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(rollBackTable0IndexJob) require.NoError(t, err) oldPartitionIDMap := make(map[string]struct{}) for i := 0; i < len(mDDLJobALLNewPartitionIDSet); i++ { @@ -881,7 +885,7 @@ func TestDeleteRangeForMDDLJob(t *testing.T) { } // roll back add index for table1 - err = schemaReplace.restoreFromHistory(rollBackTable1IndexJob) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(rollBackTable1IndexJob) require.NoError(t, err) qargs = <-midr.queryCh require.Equal(t, len(qargs.ParamsList), 2) @@ -891,7 +895,7 @@ func TestDeleteRangeForMDDLJob(t *testing.T) { require.Equal(t, encodeTableIndexKey(mDDLJobTable1NewID, int64(tablecodec.TempIndexPrefix|2)), qargs.ParamsList[1].StartKey) // drop index for table0 - err = schemaReplace.restoreFromHistory(dropTable0IndexJob) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(dropTable0IndexJob) require.NoError(t, err) oldPartitionIDMap = make(map[string]struct{}) for i := 0; i < len(mDDLJobALLNewPartitionIDSet); i++ { @@ -905,14 +909,14 @@ func TestDeleteRangeForMDDLJob(t *testing.T) { } // drop index for table1 - err = schemaReplace.restoreFromHistory(dropTable1IndexJob) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(dropTable1IndexJob) require.NoError(t, err) qargs = <-midr.queryCh require.Equal(t, len(qargs.ParamsList), 1) require.Equal(t, encodeTableIndexKey(mDDLJobTable1NewID, int64(2)), qargs.ParamsList[0].StartKey) // add index for table 0 - err = schemaReplace.restoreFromHistory(addTable0IndexJob) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(addTable0IndexJob) require.NoError(t, err) oldPartitionIDMap = make(map[string]struct{}) for i := 0; i < len(mDDLJobALLNewPartitionIDSet); i++ { @@ -926,14 +930,14 @@ func TestDeleteRangeForMDDLJob(t *testing.T) { } // add index for table 1 - err = schemaReplace.restoreFromHistory(addTable1IndexJob) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(addTable1IndexJob) require.NoError(t, err) qargs = <-midr.queryCh require.Equal(t, len(qargs.ParamsList), 1) require.Equal(t, encodeTableIndexKey(mDDLJobTable1NewID, tempIndex2), qargs.ParamsList[0].StartKey) // drop column for table0 - err = schemaReplace.restoreFromHistory(dropTable0ColumnJob) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(dropTable0ColumnJob) require.NoError(t, err) oldPartitionIDMap = make(map[string]struct{}) for i := 0; i < len(mDDLJobALLNewPartitionIDSet); i++ { @@ -954,7 +958,7 @@ func TestDeleteRangeForMDDLJob(t *testing.T) { } // drop column for table1 - err = schemaReplace.restoreFromHistory(dropTable1ColumnJob) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(dropTable1ColumnJob) require.NoError(t, err) qargs = <-midr.queryCh require.Equal(t, len(qargs.ParamsList), len(mDDLJobALLIndexesIDSet)) @@ -964,7 +968,7 @@ func TestDeleteRangeForMDDLJob(t *testing.T) { require.Equal(t, encodeTableIndexKey(mDDLJobTable1NewID, int64(3)), qargs.ParamsList[1].StartKey) // modify column for table0 - err = schemaReplace.restoreFromHistory(modifyTable0ColumnJob) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(modifyTable0ColumnJob) require.NoError(t, err) oldPartitionIDMap = make(map[string]struct{}) for i := 0; i < len(mDDLJobALLNewPartitionIDSet); i++ { @@ -985,7 +989,7 @@ func TestDeleteRangeForMDDLJob(t *testing.T) { } // modify column for table1 - err = schemaReplace.restoreFromHistory(modifyTable1ColumnJob) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(modifyTable1ColumnJob) require.NoError(t, err) qargs = <-midr.queryCh require.Equal(t, len(qargs.ParamsList), len(mDDLJobALLIndexesIDSet)) @@ -995,7 +999,7 @@ func TestDeleteRangeForMDDLJob(t *testing.T) { require.Equal(t, encodeTableIndexKey(mDDLJobTable1NewID, int64(3)), qargs.ParamsList[1].StartKey) // drop indexes(multi-schema-change) for table0 - err = schemaReplace.restoreFromHistory(multiSchemaChangeJob0) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(multiSchemaChangeJob0) require.NoError(t, err) oldPartitionIDMap = make(map[string]struct{}) for l := 0; l < 2; l++ { @@ -1011,7 +1015,7 @@ func TestDeleteRangeForMDDLJob(t *testing.T) { } // drop indexes(multi-schema-change) for table1 - err = schemaReplace.restoreFromHistory(multiSchemaChangeJob1) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(multiSchemaChangeJob1) require.NoError(t, err) qargs = <-midr.queryCh require.Equal(t, len(qargs.ParamsList), 1) @@ -1049,7 +1053,7 @@ func TestDeleteRangeForMDDLJob2(t *testing.T) { }) var qargs *PreDelRangeQuery // drop schema - err := schemaReplace.restoreFromHistory(dropSchemaJob) + err := schemaReplace.processIngestIndexAndDeleteRangeFromJob(dropSchemaJob) require.NoError(t, err) qargs = <-midr.queryCh require.Equal(t, len(qargs.ParamsList), len(mDDLJobALLNewTableIDSet)) @@ -1067,7 +1071,7 @@ func TestDeleteRangeForMDDLJob2(t *testing.T) { schemaReplace = MockEmptySchemasReplace(midr, map[int64]*DBReplace{ mDDLJobDBOldID: dbReplace, }) - err = schemaReplace.restoreFromHistory(dropSchemaJob) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(dropSchemaJob) require.NoError(t, err) qargs = <-midr.queryCh require.Equal(t, len(qargs.ParamsList), len(mDDLJobALLNewPartitionIDSet)+1) diff --git a/br/pkg/stream/search.go b/br/pkg/stream/search.go index 7cf940a42f135..ea3664739576c 100644 --- a/br/pkg/stream/search.go +++ b/br/pkg/stream/search.go @@ -16,6 +16,7 @@ import ( backuppb "github.com/pingcap/kvproto/pkg/brpb" "github.com/pingcap/log" "github.com/pingcap/tidb/br/pkg/storage" + "github.com/pingcap/tidb/br/pkg/utils/consts" "github.com/pingcap/tidb/pkg/util" "github.com/pingcap/tidb/pkg/util/codec" "go.uber.org/zap" @@ -193,9 +194,9 @@ func (s *StreamBackupSearch) Search(ctx context.Context) ([]*StreamKVInfo, error writeCFEntries := make(map[string]*StreamKVInfo, 64) for entry := range entriesCh { - if entry.CFName == WriteCF { + if entry.CFName == consts.WriteCF { writeCFEntries[entry.EncodedKey] = entry - } else if entry.CFName == DefaultCF { + } else if entry.CFName == consts.DefaultCF { defaultCFEntries[entry.EncodedKey] = entry } } @@ -241,7 +242,7 @@ func (s *StreamBackupSearch) searchFromDataFile( return errors.Annotatef(err, "decode raw key error, file: %s", dataFile.Path) } - if dataFile.Cf == WriteCF { + if dataFile.Cf == consts.WriteCF { rawWriteCFValue := new(RawWriteCFValue) if err := rawWriteCFValue.ParseFrom(v); err != nil { return errors.Annotatef(err, "parse raw write cf value error, file: %s", dataFile.Path) @@ -262,7 +263,7 @@ func (s *StreamBackupSearch) searchFromDataFile( ShortValue: valueStr, } ch <- kvInfo - } else if dataFile.Cf == DefaultCF { + } else if dataFile.Cf == consts.DefaultCF { kvInfo := &StreamKVInfo{ CFName: dataFile.Cf, StartTs: ts, diff --git a/br/pkg/stream/search_test.go b/br/pkg/stream/search_test.go index 224beb5ac7403..2cf3b74b8efe0 100644 --- a/br/pkg/stream/search_test.go +++ b/br/pkg/stream/search_test.go @@ -13,6 +13,7 @@ import ( backuppb "github.com/pingcap/kvproto/pkg/brpb" "github.com/pingcap/tidb/br/pkg/storage" + "github.com/pingcap/tidb/br/pkg/utils/consts" "github.com/pingcap/tidb/pkg/util/codec" "github.com/stretchr/testify/require" ) @@ -121,7 +122,7 @@ func fakeDataFile(t *testing.T, s storage.ExternalStorage) (defaultCFDataFile, w defaultCFCheckSum := sha256.Sum256(defaultCFBuf.Bytes()) defaultCFDataFile = &backuppb.DataFileInfo{ Path: defaultCFFile, - Cf: DefaultCF, + Cf: consts.DefaultCF, Sha256: defaultCFCheckSum[:], } @@ -135,7 +136,7 @@ func fakeDataFile(t *testing.T, s storage.ExternalStorage) (defaultCFDataFile, w writeCFCheckSum := sha256.Sum256(writeCFBuf.Bytes()) writeCFDataFile = &backuppb.DataFileInfo{ Path: writeCFFile, - Cf: WriteCF, + Cf: consts.WriteCF, Sha256: writeCFCheckSum[:], } @@ -178,7 +179,7 @@ func TestMergeCFEntries(t *testing.T) { Key: hex.EncodeToString([]byte(defaultCF.key)), EncodedKey: encodedKey, StartTs: uint64(defaultCF.startTs), - CFName: DefaultCF, + CFName: consts.DefaultCF, Value: defaultCF.val, } } @@ -189,7 +190,7 @@ func TestMergeCFEntries(t *testing.T) { EncodedKey: encodedKey, StartTs: uint64(writeCF.startTs), CommitTs: uint64(writeCF.commitTS), - CFName: WriteCF, + CFName: consts.WriteCF, Value: writeCF.val, } } diff --git a/br/pkg/stream/stream_metas.go b/br/pkg/stream/stream_metas.go index b51923a9638c1..4ab9bf2648045 100644 --- a/br/pkg/stream/stream_metas.go +++ b/br/pkg/stream/stream_metas.go @@ -24,6 +24,7 @@ import ( "github.com/pingcap/tidb/br/pkg/glue" "github.com/pingcap/tidb/br/pkg/logutil" "github.com/pingcap/tidb/br/pkg/storage" + "github.com/pingcap/tidb/br/pkg/utils/consts" "github.com/pingcap/tidb/br/pkg/utils/iter" "github.com/pingcap/tidb/pkg/util" "github.com/pingcap/tidb/pkg/util/mathutil" @@ -285,7 +286,7 @@ func UpdateShiftTS(m *pb.Metadata, startTS uint64, restoreTS uint64) (uint64, bo for _, ds := range m.FileGroups { for _, d := range ds.DataFilesInfo { - if d.Cf == DefaultCF || d.MinBeginTsInDefaultCf == 0 { + if d.Cf == consts.DefaultCF || d.MinBeginTsInDefaultCf == 0 { continue } if d.MinTs > restoreTS || d.MaxTs < startTS { diff --git a/br/pkg/stream/stream_metas_test.go b/br/pkg/stream/stream_metas_test.go index c6055459a26c0..840b220772588 100644 --- a/br/pkg/stream/stream_metas_test.go +++ b/br/pkg/stream/stream_metas_test.go @@ -21,6 +21,7 @@ import ( backuppb "github.com/pingcap/kvproto/pkg/brpb" "github.com/pingcap/log" "github.com/pingcap/tidb/br/pkg/storage" + . "github.com/pingcap/tidb/br/pkg/utils/consts" "github.com/pingcap/tidb/pkg/util/intest" "github.com/stretchr/testify/require" "go.uber.org/zap" diff --git a/br/pkg/stream/stream_mgr.go b/br/pkg/stream/stream_mgr.go index a9dffb23f017d..1d1add48fbdaa 100644 --- a/br/pkg/stream/stream_mgr.go +++ b/br/pkg/stream/stream_mgr.go @@ -356,6 +356,15 @@ func (*MetadataHelper) Marshal(meta *backuppb.Metadata) ([]byte, error) { return meta.Marshal() } +func (m *MetadataHelper) Close() { + if m.decoder != nil { + m.decoder.Close() + } + if m.encryptionManager != nil { + m.encryptionManager.Close() + } +} + // FastUnmarshalMetaData used a 128 worker pool to speed up // read metadata content from external_storage. func FastUnmarshalMetaData( diff --git a/br/pkg/stream/stream_status.go b/br/pkg/stream/stream_status.go index 10e916cbd6578..f18f3d1c055bd 100644 --- a/br/pkg/stream/stream_status.go +++ b/br/pkg/stream/stream_status.go @@ -24,6 +24,7 @@ import ( "github.com/pingcap/tidb/br/pkg/logutil" "github.com/pingcap/tidb/br/pkg/storage" . "github.com/pingcap/tidb/br/pkg/streamhelper" + "github.com/pingcap/tidb/br/pkg/utils" "github.com/tikv/client-go/v2/oracle" pd "github.com/tikv/pd/client" "go.uber.org/zap" @@ -120,9 +121,9 @@ func (p *printByTable) AddTask(task TaskStatus) { table := p.console.CreateTable() table.Add("name", task.Info.Name) table.Add("status", task.colorfulStatusString()) - table.Add("start", fmt.Sprint(FormatDate(oracle.GetTimeFromTS(task.Info.StartTs)))) + table.Add("start", fmt.Sprint(utils.FormatDate(oracle.GetTimeFromTS(task.Info.StartTs)))) if task.Info.EndTs > 0 { - table.Add("end", fmt.Sprint(FormatDate(oracle.GetTimeFromTS(task.Info.EndTs)))) + table.Add("end", fmt.Sprint(utils.FormatDate(oracle.GetTimeFromTS(task.Info.EndTs)))) } s := storage.FormatBackendURL(task.Info.GetStorage()) table.Add("storage", s.String()) @@ -136,7 +137,7 @@ func (p *printByTable) AddTask(task TaskStatus) { if gap > 10*time.Minute { gapColor = color.New(color.FgRed) } - info := fmt.Sprintf("%s; gap=%s", FormatDate(pTime), gapColor.Sprint(gap)) + info := fmt.Sprintf("%s; gap=%s", utils.FormatDate(pTime), gapColor.Sprint(gap)) return info } table.Add("checkpoint[global]", formatTS(task.globalCheckpoint)) diff --git a/br/pkg/stream/table_history.go b/br/pkg/stream/table_history.go new file mode 100644 index 0000000000000..cad3bcd80b40d --- /dev/null +++ b/br/pkg/stream/table_history.go @@ -0,0 +1,75 @@ +// Copyright 2022-present PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package stream + +import ( + "github.com/pingcap/log" + "go.uber.org/zap" +) + +// TableLocationInfo stores db id and the table name to locate the table +type TableLocationInfo struct { + DbID int64 + TableName string +} + +type LogBackupTableHistory struct { + // maps table ID to its original and current names + // [0] is original location, [1] is current location + tableNameHistory map[int64][2]TableLocationInfo + // record all the db id to name that were seen during log backup DDL history + dbIdToName map[int64]string +} + +func NewTableRenameInfo() *LogBackupTableHistory { + return &LogBackupTableHistory{ + tableNameHistory: make(map[int64][2]TableLocationInfo), + } +} + +func (info *LogBackupTableHistory) AddTableHistory(tableId int64, tableName string, dbID int64) { + tableLocationInfo := TableLocationInfo{ + DbID: dbID, + TableName: tableName, + } + names, exists := info.tableNameHistory[tableId] + if !exists { + // first occurrence - store as original name + info.tableNameHistory[tableId] = [2]TableLocationInfo{tableLocationInfo, tableLocationInfo} + } else { + // update current name while preserving original name + info.tableNameHistory[tableId] = [2]TableLocationInfo{names[0], tableLocationInfo} + } +} + +func (info *LogBackupTableHistory) RecordDBIdToName(dbId int64, dbName string) { + log.Info("################## getting db id to name", zap.Int64("dbId", dbId), zap.String("dbName", dbName)) + info.dbIdToName[dbId] = dbName +} + +// GetTableHistory returns information about all tables that have been renamed. +// Returns a map of table IDs to their original and current locations +func (info *LogBackupTableHistory) GetTableHistory() map[int64][2]TableLocationInfo { + return info.tableNameHistory +} + +func (info *LogBackupTableHistory) GetDBNameByID(dbId int64) (string, bool) { + name, ok := info.dbIdToName[dbId] + return name, ok +} + +func (info *LogBackupTableHistory) GetNewlyCreatedDBHistory() map[int64]string { + return info.dbIdToName +} diff --git a/br/pkg/stream/util.go b/br/pkg/stream/util.go deleted file mode 100644 index 10215a68df61d..0000000000000 --- a/br/pkg/stream/util.go +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright 2022 PingCAP, Inc. Licensed under Apache-2.0. - -package stream - -import ( - "strings" - "time" -) - -const DATE_FORMAT = "2006-01-02 15:04:05.999999999 -0700" - -func FormatDate(ts time.Time) string { - return ts.Format(DATE_FORMAT) -} - -func IsMetaDBKey(key []byte) bool { - return strings.HasPrefix(string(key), "mDB") -} - -func IsMetaDDLJobHistoryKey(key []byte) bool { - return strings.HasPrefix(string(key), "mDDLJobH") -} - -func MaybeDBOrDDLJobHistoryKey(key []byte) bool { - return strings.HasPrefix(string(key), "mD") -} diff --git a/br/pkg/stream/util_test.go b/br/pkg/stream/util_test.go deleted file mode 100644 index 6dda62a04ad60..0000000000000 --- a/br/pkg/stream/util_test.go +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright 2022 PingCAP, Inc. Licensed under Apache-2.0. - -package stream - -import ( - "testing" - "time" - - "github.com/stretchr/testify/require" - "github.com/tikv/client-go/v2/oracle" -) - -func TestDateFormat(t *testing.T) { - cases := []struct { - ts uint64 - target string - }{ - { - 434604259287760897, - "2022-07-15 19:14:39.534 +0800", - }, - { - 434605479096221697, - "2022-07-15 20:32:12.734 +0800", - }, - { - 434605478903808000, - "2022-07-15 20:32:12 +0800", - }, - } - - timeZone, _ := time.LoadLocation("Asia/Shanghai") - for _, ca := range cases { - date := FormatDate(oracle.GetTimeFromTS(ca.ts).In(timeZone)) - require.Equal(t, ca.target, date) - } -} - -func TestPrefix(t *testing.T) { - require.True(t, IsMetaDBKey([]byte("mDBs"))) - require.False(t, IsMetaDBKey([]byte("mDDL"))) - require.True(t, IsMetaDDLJobHistoryKey([]byte("mDDLJobHistory"))) - require.False(t, IsMetaDDLJobHistoryKey([]byte("mDDL"))) - require.True(t, MaybeDBOrDDLJobHistoryKey([]byte("mDL"))) - require.True(t, MaybeDBOrDDLJobHistoryKey([]byte("mDB:"))) - require.True(t, MaybeDBOrDDLJobHistoryKey([]byte("mDDLHistory"))) - require.False(t, MaybeDBOrDDLJobHistoryKey([]byte("DDL"))) -} diff --git a/br/pkg/task/backup.go b/br/pkg/task/backup.go index ab77b59bdc7a3..2c384d25faa4c 100644 --- a/br/pkg/task/backup.go +++ b/br/pkg/task/backup.go @@ -556,6 +556,7 @@ func RunBackup(c context.Context, g glue.Glue, cmdName string, cfg *BackupConfig } ranges, schemas, policies, err := client.BuildBackupRangeAndSchema(mgr.GetStorage(), cfg.TableFilter, backupTS, isFullBackup(cmdName)) + log.Info("###################### backing up schema", zap.Any("num", schemas.Len())) if err != nil { return errors.Trace(err) } @@ -583,7 +584,7 @@ func RunBackup(c context.Context, g glue.Glue, cmdName string, cfg *BackupConfig } // nothing to backup - if len(ranges) == 0 { + if len(ranges) == 0 && schemas.Len() == 0 { pdAddress := strings.Join(cfg.PD, ",") log.Warn("Nothing to backup, maybe connected to cluster for restoring", zap.String("PD address", pdAddress)) diff --git a/br/pkg/task/common.go b/br/pkg/task/common.go index ebb53968d5fea..f3224c88a744a 100644 --- a/br/pkg/task/common.go +++ b/br/pkg/task/common.go @@ -252,8 +252,13 @@ type Config struct { // should be removed after TiDB upgrades the BR dependency. Filter filter.MySQLReplicationRules - FilterStr []string `json:"filter-strings" toml:"filter-strings"` - TableFilter filter.Filter `json:"-" toml:"-"` + FilterStr []string `json:"filter-strings" toml:"filter-strings"` + // generated from FilterStr provides by user + TableFilter filter.Filter `json:"-" toml:"-"` + // PiTRTableFilter generated from TableFilter during snapshot restore, it has all the db id and table id that needs + // to be restored + PiTRTableFilter *utils.PiTRTableFilter `json:"-" toml:"-"` + SwitchModeInterval time.Duration `json:"switch-mode-interval" toml:"switch-mode-interval"` // Schemas is a database name set, to check whether the restore database has been backup Schemas map[string]struct{} @@ -884,6 +889,7 @@ func ReadBackupMeta( return nil, nil, nil, errors.Annotate(err, "parse backupmeta failed because of wrong aes cipher") } + log.Info("########### read backup meta ", zap.Any("backupMeta", backupMeta.SchemaIndex.Schemas)) return u, s, backupMeta, nil } @@ -988,3 +994,9 @@ func progressFileWriterRoutine(ctx context.Context, progress glue.Progress, tota } } } + +func WriteStringToConsole(g glue.Glue, msg string) error { + b := []byte(msg) + _, err := glue.GetConsole(g).Out().Write(b) + return err +} diff --git a/br/pkg/task/config_test.go b/br/pkg/task/config_test.go index bfb08fff2afd6..d929c1d727c5b 100644 --- a/br/pkg/task/config_test.go +++ b/br/pkg/task/config_test.go @@ -193,7 +193,7 @@ func TestCheckRestoreDBAndTable(t *testing.T) { for _, db := range ca.backupDBs { backupDBs = append(backupDBs, db) } - err := CheckRestoreDBAndTable(backupDBs, cfg) + err := VerifyDBAndTableInBackup(backupDBs, cfg) require.NoError(t, err) } } diff --git a/br/pkg/task/restore.go b/br/pkg/task/restore.go index 5bee261ceff21..4f6039d389fca 100644 --- a/br/pkg/task/restore.go +++ b/br/pkg/task/restore.go @@ -29,6 +29,7 @@ import ( "github.com/pingcap/tidb/br/pkg/restore" snapclient "github.com/pingcap/tidb/br/pkg/restore/snap_client" "github.com/pingcap/tidb/br/pkg/restore/tiflashrec" + "github.com/pingcap/tidb/br/pkg/stream" "github.com/pingcap/tidb/br/pkg/summary" "github.com/pingcap/tidb/br/pkg/utils" "github.com/pingcap/tidb/br/pkg/version" @@ -586,8 +587,8 @@ func CheckNewCollationEnable( return enabled, nil } -// CheckRestoreDBAndTable is used to check whether the restore dbs or tables have been backup -func CheckRestoreDBAndTable(schemas []*metautil.Database, cfg *RestoreConfig) error { +// VerifyDBAndTableInBackup is used to check whether the restore dbs or tables have been backup +func VerifyDBAndTableInBackup(schemas []*metautil.Database, cfg *RestoreConfig) error { if len(cfg.Schemas) == 0 && len(cfg.Tables) == 0 { return nil } @@ -701,7 +702,10 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf if err := version.CheckClusterVersion(c, mgr.GetPDClient(), version.CheckVersionForBR); err != nil { return errors.Trace(err) } - restoreError = runSnapshotRestore(c, mgr, g, cmdName, cfg, nil) + snapshotRestoreConfig := SnapshotRestoreConfig{ + RestoreConfig: cfg, + } + restoreError = runSnapshotRestore(c, mgr, g, cmdName, &snapshotRestoreConfig) } if restoreError != nil { return errors.Trace(restoreError) @@ -734,7 +738,13 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf return nil } -func runSnapshotRestore(c context.Context, mgr *conn.Mgr, g glue.Glue, cmdName string, cfg *RestoreConfig, checkInfo *PiTRTaskInfo) error { +type SnapshotRestoreConfig struct { + *RestoreConfig + piTRTaskInfo *PiTRTaskInfo + logBackupTableHistory *stream.LogBackupTableHistory +} + +func runSnapshotRestore(c context.Context, mgr *conn.Mgr, g glue.Glue, cmdName string, cfg *SnapshotRestoreConfig) error { cfg.Adjust() defer summary.Summary(cmdName) ctx, cancel := context.WithCancel(c) @@ -747,9 +757,31 @@ func runSnapshotRestore(c context.Context, mgr *conn.Mgr, g glue.Glue, cmdName s ctx = opentracing.ContextWithSpan(ctx, span1) } - codec := mgr.GetStorage().GetCodec() + // reads out information from backup meta file and do requirement checking if needed + u, s, backupMeta, err := ReadBackupMeta(ctx, metautil.MetaFile, &cfg.Config) + if err != nil { + return errors.Trace(err) + } + if cfg.CheckRequirements { + log.Info("Checking incompatible TiCDC changefeeds before restoring.", + logutil.ShortError(err), zap.Uint64("restore-ts", backupMeta.EndVersion)) + if err := checkIncompatibleChangefeed(ctx, backupMeta.EndVersion, mgr.GetDomain().GetEtcdClient()); err != nil { + return errors.Trace(err) + } - // need retrieve these configs from tikv if not set in command. + backupVersion := version.NormalizeBackupVersion(backupMeta.ClusterVersion) + if backupVersion != nil { + if versionErr := version.CheckClusterVersion(ctx, mgr.GetPDClient(), version.CheckVersionForBackup(backupVersion)); versionErr != nil { + return errors.Trace(versionErr) + } + } + } + if _, err = CheckNewCollationEnable(backupMeta.GetNewCollationsEnabled(), g, mgr.GetStorage(), cfg.CheckRequirements); err != nil { + return errors.Trace(err) + } + + // build restore client + // need to retrieve these configs from tikv if not set in command. kvConfigs := &pconfig.KVConfig{ ImportGoroutines: cfg.ConcurrencyPerStore, MergeRegionSize: cfg.MergeSmallRegionSizeBytes, @@ -766,63 +798,61 @@ func runSnapshotRestore(c context.Context, mgr *conn.Mgr, g glue.Glue, cmdName s client := snapclient.NewRestoreClient(mgr.GetPDClient(), mgr.GetPDHTTPClient(), mgr.GetTLSConfig(), keepaliveCfg) // using tikv config to set the concurrency-per-store for client. client.SetConcurrencyPerStore(kvConfigs.ImportGoroutines.Value) - err := configureRestoreClient(ctx, client, cfg) - if err != nil { + if err := configureRestoreClient(ctx, client, cfg.RestoreConfig); err != nil { return errors.Trace(err) } - // Init DB connection sessions - err = client.Init(g, mgr.GetStorage()) + // InitConnections DB connection sessions + err = client.InitConnections(g, mgr.GetStorage()) defer client.Close() - - if err != nil { - return errors.Trace(err) - } - u, s, backupMeta, err := ReadBackupMeta(ctx, metautil.MetaFile, &cfg.Config) if err != nil { return errors.Trace(err) } - if cfg.CheckRequirements { - err := checkIncompatibleChangefeed(ctx, backupMeta.EndVersion, mgr.GetDomain().GetEtcdClient()) - log.Info("Checking incompatible TiCDC changefeeds before restoring.", - logutil.ShortError(err), zap.Uint64("restore-ts", backupMeta.EndVersion)) - if err != nil { - return errors.Trace(err) - } - } - - backupVersion := version.NormalizeBackupVersion(backupMeta.ClusterVersion) - if cfg.CheckRequirements && backupVersion != nil { - if versionErr := version.CheckClusterVersion(ctx, mgr.GetPDClient(), version.CheckVersionForBackup(backupVersion)); versionErr != nil { - return errors.Trace(versionErr) - } - } - if _, err = CheckNewCollationEnable(backupMeta.GetNewCollationsEnabled(), g, mgr.GetStorage(), cfg.CheckRequirements); err != nil { - return errors.Trace(err) - } - reader := metautil.NewMetaReader(backupMeta, s, &cfg.CipherInfo) - if err = client.InitBackupMeta(c, backupMeta, u, reader, cfg.LoadStats); err != nil { + metaReader := metautil.NewMetaReader(backupMeta, s, &cfg.CipherInfo) + if err = client.LoadBackupMetaAndInitClients(ctx, backupMeta, u, metaReader, cfg.LoadStats); err != nil { return errors.Trace(err) } if client.IsRawKvMode() { return errors.Annotate(berrors.ErrRestoreModeMismatch, "cannot do transactional restore from raw kv data") } - if err = CheckRestoreDBAndTable(client.GetDatabases(), cfg); err != nil { + if err = VerifyDBAndTableInBackup(client.GetDatabases(), cfg.RestoreConfig); err != nil { return err } - files, tables, dbs := filterRestoreFiles(client, cfg) - if len(dbs) == 0 && len(tables) != 0 { - return errors.Annotate(berrors.ErrRestoreInvalidBackup, "contain tables but no databases") + + // filters out db/table/files using filter + fileMap, tableMap, dbMap, err := filterRestoreFiles(client, cfg.RestoreConfig) + if err != nil { + return errors.Trace(err) + } + log.Info("### found number of ", zap.Int("files", len(fileMap)), zap.Int("tables", len(tableMap)), zap.Int("db", len(dbMap))) + if cfg.logBackupTableHistory != nil { + // add additional tables and etc to restore in the snapshot restore phase since it will later be renamed during + // log restore and will fall into filter range. + err := adjustTablesToRestoreAndCreateFilter(cfg.logBackupTableHistory, cfg.RestoreConfig, client, fileMap, tableMap) + if err != nil { + return errors.Trace(err) + } + + log.Info("### need to restore additional number of ", + zap.Int("files", len(fileMap)), zap.Int("tables", len(tableMap)), zap.Int("db", len(dbMap))) + + // need to update to include all eligible table id from snapshot restore + UpdatePiTRFilter(cfg.RestoreConfig, tableMap) + log.Info("############### restore piter filter content", zap.Any("filter", len(cfg.PiTRTableFilter.DbIdToTable))) } + log.Info("### restore number of ", zap.Int("files", len(fileMap)), zap.Int("tables", len(tableMap)), zap.Int("db", len(dbMap))) + + files, tables, dbs := convertMapsToSlices(fileMap, tableMap, dbMap) + // after figuring out what files to restore, check if disk has enough space if cfg.CheckRequirements { if err := checkDiskSpace(ctx, mgr, files, tables); err != nil { return errors.Trace(err) } } - archiveSize := reader.ArchiveSize(ctx, files) + archiveSize := metaReader.ArchiveSize(ctx, files) g.Record(summary.RestoreDataSize, archiveSize) //restore from tidb will fetch a general Size issue https://github.com/pingcap/tidb/issues/27247 g.Record("Size", archiveSize) @@ -832,8 +862,8 @@ func runSnapshotRestore(c context.Context, mgr *conn.Mgr, g glue.Glue, cmdName s } // for full + log restore. should check the cluster is empty. - if client.IsFull() && checkInfo != nil && checkInfo.FullRestoreCheckErr != nil { - return checkInfo.FullRestoreCheckErr + if client.IsFull() && cfg.piTRTaskInfo != nil && cfg.piTRTaskInfo.FullRestoreCheckErr != nil { + return cfg.piTRTaskInfo.FullRestoreCheckErr } if client.IsIncremental() { @@ -843,23 +873,24 @@ func runSnapshotRestore(c context.Context, mgr *conn.Mgr, g glue.Glue, cmdName s } importModeSwitcher := restore.NewImportModeSwitcher(mgr.GetPDClient(), cfg.Config.SwitchModeInterval, mgr.GetTLSConfig()) - restoreSchedulers, schedulersConfig, err := restore.RestorePreWork(ctx, mgr, importModeSwitcher, cfg.Online, true) + restoreSchedulersFunc, schedulersConfig, err := restore.RestorePreWork(ctx, mgr, importModeSwitcher, cfg.Online, true) if err != nil { return errors.Trace(err) } - schedulersRemovable := false + // need to know whether restore has been completed so can restore schedulers + canRestoreSchedulers := false defer func() { // don't reset pd scheduler if checkpoint mode is used and restored is not finished - if cfg.UseCheckpoint && !schedulersRemovable { - log.Info("skip removing pd schehduler for next retry") + if cfg.UseCheckpoint && !canRestoreSchedulers { + log.Info("skip removing pd scheduler for next retry") return } - log.Info("start to remove the pd scheduler") + log.Info("start to restore pd scheduler") // run the post-work to avoid being stuck in the import // mode or emptied schedulers. - restore.RestorePostWork(ctx, importModeSwitcher, restoreSchedulers, cfg.Online) - log.Info("finish removing pd scheduler") + restore.RestorePostWork(ctx, importModeSwitcher, restoreSchedulersFunc, cfg.Online) + log.Info("finish restoring pd scheduler") }() var checkpointFirstRun = true @@ -885,7 +916,7 @@ func runSnapshotRestore(c context.Context, mgr *conn.Mgr, g glue.Glue, cmdName s } } else if client.IsFull() && checkpointFirstRun && cfg.CheckRequirements { if err := checkTableExistence(ctx, mgr, tables, g); err != nil { - schedulersRemovable = true + canRestoreSchedulers = true return errors.Trace(err) } } @@ -897,8 +928,7 @@ func runSnapshotRestore(c context.Context, mgr *conn.Mgr, g glue.Glue, cmdName s } // preallocate the table id, because any ddl job or database creation(include checkpoint) also allocates the global ID - err = client.AllocTableIDs(ctx, tables) - if err != nil { + if err = client.AllocTableIDs(ctx, tables); err != nil { return errors.Trace(err) } @@ -910,7 +940,7 @@ func runSnapshotRestore(c context.Context, mgr *conn.Mgr, g glue.Glue, cmdName s return errors.Trace(err) } if restoreSchedulersConfigFromCheckpoint != nil { - restoreSchedulers = mgr.MakeUndoFunctionByConfig(*restoreSchedulersConfigFromCheckpoint) + restoreSchedulersFunc = mgr.MakeUndoFunctionByConfig(*restoreSchedulersConfigFromCheckpoint) } checkpointSetWithTableID = sets @@ -918,7 +948,7 @@ func runSnapshotRestore(c context.Context, mgr *conn.Mgr, g glue.Glue, cmdName s // need to flush the whole checkpoint data so that br can quickly jump to // the log kv restore step when the next retry. log.Info("wait for flush checkpoint...") - client.WaitForFinishCheckpoint(ctx, len(cfg.FullBackupStorage) > 0 || !schedulersRemovable) + client.WaitForFinishCheckpoint(ctx, len(cfg.FullBackupStorage) > 0 || !canRestoreSchedulers) }() } @@ -992,8 +1022,7 @@ func runSnapshotRestore(c context.Context, mgr *conn.Mgr, g glue.Glue, cmdName s } // execute DDL first - err = client.ExecDDLs(ctx, ddlJobs) - if err != nil { + if err = client.ExecDDLs(ctx, ddlJobs); err != nil { return errors.Trace(err) } @@ -1028,6 +1057,7 @@ func runSnapshotRestore(c context.Context, mgr *conn.Mgr, g glue.Glue, cmdName s return errors.Trace(err) } + codec := mgr.GetStorage().GetCodec() if len(files) == 0 { log.Info("no files, empty databases and tables are restored") summary.SetSuccessStatus(true) @@ -1124,7 +1154,7 @@ func runSnapshotRestore(c context.Context, mgr *conn.Mgr, g glue.Glue, cmdName s finish := dropToBlackhole(ctx, postHandleCh, errCh) - // Reset speed limit. ResetSpeedLimit must be called after client.InitBackupMeta has been called. + // Reset speed limit. ResetSpeedLimit must be called after client.LoadBackupMetaAndInitClients has been called. defer func() { var resetErr error // In future we may need a mechanism to set speed limit in ttl. like what we do in switchmode. TODO @@ -1161,7 +1191,7 @@ func runSnapshotRestore(c context.Context, mgr *conn.Mgr, g glue.Glue, cmdName s return errors.Trace(err) } - schedulersRemovable = true + canRestoreSchedulers = true // Set task summary to success status. summary.SetSuccessStatus(true) @@ -1380,15 +1410,21 @@ func dropToBlackhole( return outCh } -// filterRestoreFiles filters tables that can't be processed after applying cfg.TableFilter.MatchTable. -// if the db has no table that can be processed, the db will be filtered too. +// filterRestoreFiles filters out tables that can't be processed after applying cfg.TableFilter.MatchTable. func filterRestoreFiles( client *snapclient.SnapClient, cfg *RestoreConfig, -) (files []*backuppb.File, tables []*metautil.Table, dbs []*metautil.Database) { +) (fileMap map[string]*backuppb.File, tableMap map[int64]*metautil.Table, dbMap map[int64]*metautil.Database, err error) { + // Initialize maps + fileMap = make(map[string]*backuppb.File) + tableMap = make(map[int64]*metautil.Table) + dbMap = make(map[int64]*metautil.Database) + for _, db := range client.GetDatabases() { dbName := db.Info.Name.O - if name, ok := utils.GetSysDBName(db.Info.Name); utils.IsSysDB(name) && ok { + dbMap[db.Info.ID] = db + log.Info("################### getting db", zap.Any("db", dbName)) + if name, ok := utils.StripTempTableNamePrefixIfNeeded(db.Info.Name.O); utils.IsSysDB(name) && ok { dbName = name } if checkpoint.IsCheckpointDB(db.Info.Name) { @@ -1397,18 +1433,147 @@ func filterRestoreFiles( if !cfg.TableFilter.MatchSchema(dbName) { continue } - dbs = append(dbs, db) + dbMap[db.Info.ID] = db for _, table := range db.Tables { + log.Info("################### restore getting table", zap.Any("table", table.Info)) if table.Info == nil || !cfg.TableFilter.MatchTable(dbName, table.Info.Name.O) { continue } - files = append(files, table.Files...) - tables = append(tables, table) + + // Add table to tableMap using table ID as key + tableMap[table.Info.ID] = table + + // Add files to fileMap using file name as key + for _, file := range table.Files { + fileMap[file.Name] = file + } } } + + // sanity check + if len(dbMap) == 0 && len(tableMap) != 0 { + err = errors.Annotate(berrors.ErrRestoreInvalidBackup, "contains tables but no databases") + } return } +func adjustTablesToRestoreAndCreateFilter( + logBackupTableHistory *stream.LogBackupTableHistory, + cfg *RestoreConfig, + client *snapclient.SnapClient, + fileMap map[string]*backuppb.File, + tableMap map[int64]*metautil.Table, +) (err error) { + snapshotDbMap := client.GetDatabaseMap() + + // build filter for pitr restore to use later + piTRTableFilter := utils.NewPiTRTableFilter() + + // put all the newly created db that matches the filter during log backup into the pitr filter + newlyCreatedDBs := logBackupTableHistory.GetNewlyCreatedDBHistory() + for dbId, dbName := range newlyCreatedDBs { + if cfg.TableFilter.MatchSchema(dbName) { + piTRTableFilter.UpdateDB(dbId) + } + } + + // get all the tables seen during the log backup + tableHistory := logBackupTableHistory.GetTableHistory() + + log.Info("################### table filter string", zap.Any("table str", cfg.FilterStr)) + for tableID, locations := range tableHistory { + originalLocation := locations[0] + currentLocation := locations[1] + + var dbName string + if snapDb, exists := snapshotDbMap[currentLocation.DbID]; exists { + dbName = snapDb.Info.Name.O + } else if name, exists := logBackupTableHistory.GetDBNameByID(currentLocation.DbID); exists { + // if db id does not exist in the snapshot, meaning it's created during log backup + dbName = name + } else { + err = errors.Annotate(berrors.ErrRestoreInvalidBackup, + "internal error: did not find db id to name information in snapshot and log backup history") + return + } + + if name, ok := utils.StripTempTableNamePrefixIfNeeded(dbName); utils.IsSysDB(name) && ok { + dbName = name + } + log.Info("################### getting table", zap.Any("table", currentLocation.TableName)) + // handle in filter range cases + // 1. original == current, didn't have renaming + // 2. original has been renamed and current is in the filter range + // we need to restore original table + if cfg.TableFilter.MatchTable(dbName, currentLocation.TableName) { + // put this db/table id into pitr filter as it matches with user's filter + // have to update filter here since table might be empty or not in snapshot so nothing will be returned . + // but we still need to capture this table id to restore during log restore. + piTRTableFilter.UpdateTable(currentLocation.DbID, tableID) + log.Info("###################matching table", zap.Any("table id", tableID)) + + // check if snapshot contains the original db/table + originalDB, exists := snapshotDbMap[originalLocation.DbID] + if !exists { + // original db created during log backup, snapshot doesn't have information about this db so doesn't + // need to restore at snapshot + continue + } + + // need to restore the matching table in snapshot restore phase + for _, originalTable := range originalDB.Tables { + if originalTable.Info == nil { + continue + } + if originalTable.Info.ID == tableID { + for _, file := range originalTable.Files { + fileMap[file.Name] = file + } + tableMap[originalTable.Info.ID] = originalTable + // only one table id will match + break + } + } + // handle case where current is not in range and original was in range, we need to remove the original from + // restoring + } else if cfg.TableFilter.MatchTable(dbName, originalLocation.TableName) { + // remove it from the filter, will not remove db even table size becomes 0 + _ = piTRTableFilter.Remove(originalLocation.DbID, tableID) + + // check if snapshot contains the original db/table + originalDB, exists := snapshotDbMap[originalLocation.DbID] + if !exists { + // original db created during log backup, no need to process further + continue + } + for _, originalTable := range originalDB.Tables { + if originalTable.Info == nil { + continue + } + if originalTable.Info.ID == tableID { + for _, file := range originalTable.Files { + delete(fileMap, file.Name) + } + delete(tableMap, originalTable.Info.ID) + // only one table id will match + break + } + } + + } + } + // store the filter into config + cfg.PiTRTableFilter = piTRTableFilter + log.Info("###################### table filter", zap.Any("piTRTableFilter", piTRTableFilter)) + return +} + +func UpdatePiTRFilter(cfg *RestoreConfig, tableMap map[int64]*metautil.Table) { + for _, table := range tableMap { + cfg.PiTRTableFilter.UpdateTable(table.DB.ID, table.Info.ID) + } +} + // enableTiDBConfig tweaks some of configs of TiDB to make the restore progress go well. // return a function that could restore the config to origin. func enableTiDBConfig() func() { @@ -1577,6 +1742,9 @@ func FilterDDLJobs(allDDLJobs []*model.Job, tables []*metautil.Table) (ddlJobs [ } } } + slices.SortFunc(ddlJobs, func(i, j *model.Job) int { + return cmp.Compare(i.BinlogInfo.SchemaVersion, j.BinlogInfo.SchemaVersion) + }) return ddlJobs } @@ -1659,3 +1827,26 @@ func afterTableRestoredCh(ctx context.Context, createdTables []*snapclient.Creat }() return outCh } + +func convertMapsToSlices( + fileMap map[string]*backuppb.File, + tableMap map[int64]*metautil.Table, + dbMap map[int64]*metautil.Database, +) ([]*backuppb.File, []*metautil.Table, []*metautil.Database) { + files := make([]*backuppb.File, 0, len(fileMap)) + for _, file := range fileMap { + files = append(files, file) + } + + tables := make([]*metautil.Table, 0, len(tableMap)) + for _, table := range tableMap { + tables = append(tables, table) + } + + dbs := make([]*metautil.Database, 0, len(dbMap)) + for _, db := range dbMap { + dbs = append(dbs, db) + } + + return files, tables, dbs +} diff --git a/br/pkg/task/restore_raw.go b/br/pkg/task/restore_raw.go index 13a7382d6092c..cbda2f3365853 100644 --- a/br/pkg/task/restore_raw.go +++ b/br/pkg/task/restore_raw.go @@ -98,7 +98,7 @@ func RunRestoreRaw(c context.Context, g glue.Glue, cmdName string, cfg *RestoreR client.SetRateLimit(cfg.RateLimit) client.SetCrypter(&cfg.CipherInfo) client.SetConcurrencyPerStore(cfg.ConcurrencyPerStore.Value) - err = client.Init(g, mgr.GetStorage()) + err = client.InitConnections(g, mgr.GetStorage()) defer client.Close() if err != nil { return errors.Trace(err) @@ -109,7 +109,7 @@ func RunRestoreRaw(c context.Context, g glue.Glue, cmdName string, cfg *RestoreR return errors.Trace(err) } reader := metautil.NewMetaReader(backupMeta, s, &cfg.CipherInfo) - if err = client.InitBackupMeta(c, backupMeta, u, reader, true); err != nil { + if err = client.LoadBackupMetaAndInitClients(c, backupMeta, u, reader, true); err != nil { return errors.Trace(err) } diff --git a/br/pkg/task/restore_test.go b/br/pkg/task/restore_test.go index 4713e5a540ab7..5fe7c4c489a41 100644 --- a/br/pkg/task/restore_test.go +++ b/br/pkg/task/restore_test.go @@ -307,6 +307,8 @@ func TestFilterDDLJobs(t *testing.T) { ddlJobs := task.FilterDDLJobs(allDDLJobs, tables) for _, job := range ddlJobs { t.Logf("get ddl job: %s", job.Query) + t.Logf("table name: %s", job.TableName) + t.Logf("dbid: %s", job.SchemaName) } require.Equal(t, 7, len(ddlJobs)) } diff --git a/br/pkg/task/restore_txn.go b/br/pkg/task/restore_txn.go index 4a4a832aad660..5ea8e28aa435b 100644 --- a/br/pkg/task/restore_txn.go +++ b/br/pkg/task/restore_txn.go @@ -43,7 +43,7 @@ func RunRestoreTxn(c context.Context, g glue.Glue, cmdName string, cfg *Config) client.SetRateLimit(cfg.RateLimit) client.SetCrypter(&cfg.CipherInfo) client.SetConcurrencyPerStore(uint(cfg.Concurrency)) - err = client.Init(g, mgr.GetStorage()) + err = client.InitConnections(g, mgr.GetStorage()) defer client.Close() if err != nil { return errors.Trace(err) @@ -54,7 +54,7 @@ func RunRestoreTxn(c context.Context, g glue.Glue, cmdName string, cfg *Config) return errors.Trace(err) } reader := metautil.NewMetaReader(backupMeta, s, &cfg.CipherInfo) - if err = client.InitBackupMeta(c, backupMeta, u, reader, true); err != nil { + if err = client.LoadBackupMetaAndInitClients(c, backupMeta, u, reader, true); err != nil { return errors.Trace(err) } diff --git a/br/pkg/task/stream.go b/br/pkg/task/stream.go index 6009eb47a6c70..6f35ceb68f842 100644 --- a/br/pkg/task/stream.go +++ b/br/pkg/task/stream.go @@ -455,11 +455,11 @@ func (s *streamMgr) checkStreamStartEnable(ctx context.Context) error { return nil } -type RestoreFunc func(string) error +type RestoreGcFunc func(string) error -// KeepGcDisabled keeps GC disabled and return a function that used to gc enabled. +// DisableGc disables and returns a function that can enable gc back. // gc.ratio-threshold = "-1.0", which represents disable gc in TiKV. -func KeepGcDisabled(g glue.Glue, store kv.Storage) (RestoreFunc, string, error) { +func DisableGc(g glue.Glue, store kv.Storage) (RestoreGcFunc, string, error) { se, err := g.CreateSession(store) if err != nil { return nil, "", errors.Trace(err) @@ -570,7 +570,7 @@ func RunStreamStart( // locked means this is a stream task restart. Or create a new stream task. if locked { - logInfo, err := getLogRange(ctx, &cfg.Config) + logInfo, err := getLogInfo(ctx, &cfg.Config) if err != nil { return errors.Trace(err) } @@ -684,13 +684,13 @@ func RunStreamMetadata( ctx = opentracing.ContextWithSpan(ctx, span1) } - logInfo, err := getLogRange(ctx, &cfg.Config) + logInfo, err := getLogInfo(ctx, &cfg.Config) if err != nil { return errors.Trace(err) } - logMinDate := stream.FormatDate(oracle.GetTimeFromTS(logInfo.logMinTS)) - logMaxDate := stream.FormatDate(oracle.GetTimeFromTS(logInfo.logMaxTS)) + logMinDate := utils.FormatDate(oracle.GetTimeFromTS(logInfo.logMinTS)) + logMaxDate := utils.FormatDate(oracle.GetTimeFromTS(logInfo.logMaxTS)) summary.Log(cmdName, zap.Uint64("log-min-ts", logInfo.logMinTS), zap.String("log-min-date", logMinDate), zap.Uint64("log-max-ts", logInfo.logMaxTS), @@ -1141,7 +1141,8 @@ func checkIncompatibleChangefeed(ctx context.Context, backupTS uint64, etcdCLI * return nil } -// RunStreamRestore restores stream log. +// RunStreamRestore is the entry point to do PiTR restore. It can optionally start a full/snapshot restore followed +// by the log restore. func RunStreamRestore( c context.Context, mgr *conn.Mgr, @@ -1160,7 +1161,7 @@ func RunStreamRestore( if err != nil { return errors.Trace(err) } - logInfo, err := getLogRangeWithStorage(ctx, s) + logInfo, err := getLogInfoFromStorage(ctx, s) if err != nil { return errors.Trace(err) } @@ -1198,7 +1199,7 @@ func RunStreamRestore( return errors.Trace(err) } - checkInfo, err := checkPiTRTaskInfo(ctx, mgr, g, cfg) + taskInfo, err := generatePiTRTaskInfo(ctx, mgr, g, cfg) if err != nil { return errors.Trace(err) } @@ -1210,39 +1211,61 @@ func RunStreamRestore( recorder := tiflashrec.New() cfg.tiflashRecorder = recorder // restore full snapshot. - if checkInfo.NeedFullRestore { + if taskInfo.NeedFullRestore { + // if table filter is specified, go through log backup DDL change and build table rename history to figure out + // the tables that need to be restored currently out of filter range but later renamed into the filter range. + var logBackupTableHistory *stream.LogBackupTableHistory + // TODO need to do more + if cfg.ExplicitFilter { + log.Info("### building log backup table history") + logClient, err := createLogClient(ctx, g, cfg, mgr) + if err != nil { + return errors.Trace(err) + } + defer logClient.Close() + dataFileCount := 0 + ddlFiles, err := logClient.LoadDDLFilesAndCountDMLFiles(ctx, &dataFileCount) + logBackupTableHistory, err = logClient.LoadMetaKVFilesAndBuildTableRenameInfo(ctx, ddlFiles) + if err != nil { + return errors.Trace(err) + } + } logStorage := cfg.Config.Storage cfg.Config.Storage = cfg.FullBackupStorage // TiFlash replica is restored to down-stream on 'pitr' currently. - if err = runSnapshotRestore(ctx, mgr, g, FullRestoreCmd, cfg, checkInfo); err != nil { + snapshotRestoreConfig := SnapshotRestoreConfig{ + RestoreConfig: cfg, + piTRTaskInfo: taskInfo, + logBackupTableHistory: logBackupTableHistory, + } + if err = runSnapshotRestore(ctx, mgr, g, FullRestoreCmd, &snapshotRestoreConfig); err != nil { return errors.Trace(err) } cfg.Config.Storage = logStorage } else if len(cfg.FullBackupStorage) > 0 { - skipMsg := []byte(fmt.Sprintf("%s command is skipped due to checkpoint mode for restore\n", FullRestoreCmd)) - if _, err := glue.GetConsole(g).Out().Write(skipMsg); err != nil { + if err = WriteStringToConsole(g, fmt.Sprintf("%s is skipped due to checkpoint mode for restore\n", FullRestoreCmd)); err != nil { return errors.Trace(err) } - if checkInfo.CheckpointInfo != nil && checkInfo.CheckpointInfo.Metadata != nil && checkInfo.CheckpointInfo.Metadata.TiFlashItems != nil { + if taskInfo.CheckpointInfo != nil && taskInfo.CheckpointInfo.Metadata != nil && taskInfo.CheckpointInfo.Metadata.TiFlashItems != nil { log.Info("load tiflash records of snapshot restore from checkpoint") - cfg.tiflashRecorder.Load(checkInfo.CheckpointInfo.Metadata.TiFlashItems) + cfg.tiflashRecorder.Load(taskInfo.CheckpointInfo.Metadata.TiFlashItems) } } // restore log. cfg.adjustRestoreConfigForStreamRestore() - if err := restoreStream(ctx, mgr, g, cfg, checkInfo.CheckpointInfo); err != nil { + if err := restoreStream(ctx, mgr, g, cfg, taskInfo.CheckpointInfo); err != nil { return errors.Trace(err) } return nil } -// RunStreamRestore start restore job +// restoreStream starts the log restore func restoreStream( c context.Context, mgr *conn.Mgr, g glue.Glue, cfg *RestoreConfig, - taskInfo *checkpoint.CheckpointTaskInfoForLogRestore, + checkpointTaskInfo *checkpoint.CheckpointTaskInfoForLogRestore, ) (err error) { var ( totalKVCount uint64 @@ -1262,9 +1285,9 @@ func restoreStream( zap.Uint64("source-start-point", cfg.StartTS), zap.Uint64("source-end-point", cfg.RestoreTS), zap.Uint64("target-end-point", currentTS), - zap.String("source-start", stream.FormatDate(oracle.GetTimeFromTS(cfg.StartTS))), - zap.String("source-end", stream.FormatDate(oracle.GetTimeFromTS(cfg.RestoreTS))), - zap.String("target-end", stream.FormatDate(oracle.GetTimeFromTS(currentTS))), + zap.String("source-start", utils.FormatDate(oracle.GetTimeFromTS(cfg.StartTS))), + zap.String("source-end", utils.FormatDate(oracle.GetTimeFromTS(cfg.RestoreTS))), + zap.String("target-end", utils.FormatDate(oracle.GetTimeFromTS(currentTS))), zap.Uint64("total-kv-count", totalKVCount), zap.Uint64("skipped-kv-count-by-checkpoint", checkpointTotalKVCount), zap.String("total-size", units.HumanSize(float64(totalSize))), @@ -1286,16 +1309,16 @@ func restoreStream( ctx = opentracing.ContextWithSpan(ctx, span1) } - client, err := createRestoreClient(ctx, g, cfg, mgr) + client, err := createLogClient(ctx, g, cfg, mgr) + defer client.Close() if err != nil { - return errors.Annotate(err, "failed to create restore client") + return errors.Annotate(err, "failed to create log client") } - defer client.Close() - if taskInfo != nil && taskInfo.Metadata != nil { - // reuse the task's rewrite ts - log.Info("reuse the task's rewrite ts", zap.Uint64("rewrite-ts", taskInfo.Metadata.RewriteTS)) - currentTS = taskInfo.Metadata.RewriteTS + if checkpointTaskInfo != nil && checkpointTaskInfo.Metadata != nil { + // reuse the checkpoint task's rewrite ts + log.Info("reuse the task's rewrite ts", zap.Uint64("rewrite-ts", checkpointTaskInfo.Metadata.RewriteTS)) + currentTS = checkpointTaskInfo.Metadata.RewriteTS } else { currentTS, err = restore.GetTSWithRetry(ctx, mgr.GetPDClient()) if err != nil { @@ -1307,17 +1330,17 @@ func restoreStream( } importModeSwitcher := restore.NewImportModeSwitcher(mgr.GetPDClient(), cfg.Config.SwitchModeInterval, mgr.GetTLSConfig()) - restoreSchedulers, _, err := restore.RestorePreWork(ctx, mgr, importModeSwitcher, cfg.Online, false) + restoreSchedulersFunc, _, err := restore.RestorePreWork(ctx, mgr, importModeSwitcher, cfg.Online, false) + // Always run the post-work even on error, so we don't stuck in the import + // mode or emptied schedulers + defer restore.RestorePostWork(ctx, importModeSwitcher, restoreSchedulersFunc, cfg.Online) if err != nil { return errors.Trace(err) } - // Always run the post-work even on error, so we don't stuck in the import - // mode or emptied schedulers - defer restore.RestorePostWork(ctx, importModeSwitcher, restoreSchedulers, cfg.Online) // It need disable GC in TiKV when PiTR. // because the process of PITR is concurrent and kv events isn't sorted by tso. - restoreGc, oldRatio, err := KeepGcDisabled(g, mgr.GetStorage()) + restoreGcFunc, oldGcRatio, err := DisableGc(g, mgr.GetStorage()) if err != nil { return errors.Trace(err) } @@ -1329,80 +1352,44 @@ func restoreStream( return } - // If the oldRatio is negative, which is not normal status. + // If the oldGcRatio is negative, which is not normal status. // It should set default value "1.1" after PiTR finished. - if strings.HasPrefix(oldRatio, "-") { - log.Warn("the original gc-ratio is negative, reset by default value 1.1", zap.String("old-gc-ratio", oldRatio)) - oldRatio = utils.DefaultGcRatioVal + if strings.HasPrefix(oldGcRatio, "-") { + log.Warn("the original gc-ratio is negative, reset by default value 1.1", zap.String("old-gc-ratio", oldGcRatio)) + oldGcRatio = utils.DefaultGcRatioVal } - log.Info("start to restore gc", zap.String("ratio", oldRatio)) - if err := restoreGc(oldRatio); err != nil { - log.Error("failed to set gc enabled", zap.Error(err)) + log.Info("start to restore gc", zap.String("ratio", oldGcRatio)) + if err := restoreGcFunc(oldGcRatio); err != nil { + log.Error("failed to restore gc", zap.Error(err)) } log.Info("finish restoring gc") }() var checkpointRunner *checkpoint.CheckpointRunner[checkpoint.LogRestoreKeyType, checkpoint.LogRestoreValueType] if cfg.UseCheckpoint { - oldRatioFromCheckpoint, err := client.InitCheckpointMetadataForLogRestore(ctx, cfg.StartTS, cfg.RestoreTS, oldRatio, cfg.tiflashRecorder) + gcRatioFromCheckpoint, err := client.LoadOrCreateCheckpointMetadataForLogRestore(ctx, cfg.StartTS, cfg.RestoreTS, oldGcRatio, cfg.tiflashRecorder) if err != nil { return errors.Trace(err) } - oldRatio = oldRatioFromCheckpoint + oldGcRatio = gcRatioFromCheckpoint checkpointRunner, err = client.StartCheckpointRunnerForLogRestore(ctx, g, mgr.GetStorage()) if err != nil { return errors.Trace(err) } defer func() { - log.Info("wait for flush checkpoint...") + log.Info("wait for flushing checkpoint...") checkpointRunner.WaitForFinish(ctx, !gcDisabledRestorable) }() } - encryptionManager, err := encryption.NewManager(&cfg.LogBackupCipherInfo, &cfg.MasterKeyConfig) - if err != nil { - return errors.Annotate(err, "failed to create encryption manager for log restore") - } - defer encryptionManager.Close() - err = client.InstallLogFileManager(ctx, cfg.StartTS, cfg.RestoreTS, cfg.MetadataDownloadBatchSize, encryptionManager) - if err != nil { - return err - } - - // get full backup meta storage to generate rewrite rules. - fullBackupStorage, err := parseFullBackupTablesStorage(cfg) - if err != nil { - return errors.Trace(err) - } // load the id maps only when the checkpoint mode is used and not the first execution - newTask := true - if taskInfo != nil && taskInfo.Progress == checkpoint.InLogRestoreAndIdMapPersist { - newTask = false - } // get the schemas ID replace information. - // since targeted full backup storage, need to use the full backup cipher - schemasReplace, err := client.InitSchemasReplaceForDDL(ctx, &logclient.InitSchemaConfig{ - IsNewTask: newTask, - TableFilter: cfg.TableFilter, - TiFlashRecorder: cfg.tiflashRecorder, - FullBackupStorage: fullBackupStorage, - }, &cfg.Config.CipherInfo) + schemasReplace, err := buildSchemaReplace(ctx, client, cfg, isNewRestoreTask(checkpointTaskInfo)) if err != nil { return errors.Trace(err) } - schemasReplace.AfterTableRewritten = func(deleted bool, tableInfo *model.TableInfo) { - // When the table replica changed to 0, the tiflash replica might be set to `nil`. - // We should remove the table if we meet. - if deleted || tableInfo.TiFlashReplica == nil { - cfg.tiflashRecorder.DelTable(tableInfo.ID) - return - } - cfg.tiflashRecorder.AddTable(tableInfo.ID, *tableInfo.TiFlashReplica) - // Remove the replica firstly. Let's restore them at the end. - tableInfo.TiFlashReplica = nil - } - + log.Info("############ schema replace before kv restore ", zap.Any("replace", schemasReplace.DbReplaceMap)) updateStats := func(kvCount uint64, size uint64) { mu.Lock() defer mu.Unlock() @@ -1414,6 +1401,7 @@ func restoreStream( if err != nil { return err } + pm := g.StartProgress(ctx, "Restore Meta Files", int64(len(ddlFiles)), !cfg.LogProgress) if err = withProgress(pm, func(p glue.Progress) error { client.RunGCRowsLoader(ctx) @@ -1421,8 +1409,8 @@ func restoreStream( }); err != nil { return errors.Annotate(err, "failed to restore meta files") } - - rewriteRules := initRewriteRules(schemasReplace) + log.Info("############ schema replace after restore", zap.Any("replace", schemasReplace.DbReplaceMap)) + rewriteRules := buildRewriteRules(schemasReplace) ingestRecorder := schemasReplace.GetIngestRecorder() if err := rangeFilterFromIngestRecorder(ingestRecorder, rewriteRules); err != nil { @@ -1479,15 +1467,18 @@ func restoreStream( return errors.Annotate(err, "failed to clean up") } + // to delete range that's dropped previously if err = client.InsertGCRows(ctx); err != nil { return errors.Annotate(err, "failed to insert rows into gc_delete_range") } + // might need to skip some index due to filtered out if err = client.RepairIngestIndex(ctx, ingestRecorder, g); err != nil { return errors.Annotate(err, "failed to repair ingest index") } if cfg.tiflashRecorder != nil { + // might need to check filter too sqls := cfg.tiflashRecorder.GenerateAlterTableDDLs(mgr.GetDomain().InfoSchema()) log.Info("Generating SQLs for restoring TiFlash Replica", zap.Strings("sqls", sqls)) @@ -1518,11 +1509,11 @@ func restoreStream( return nil } -func createRestoreClient(ctx context.Context, g glue.Glue, cfg *RestoreConfig, mgr *conn.Mgr) (*logclient.LogClient, error) { +func createLogClient(ctx context.Context, g glue.Glue, cfg *RestoreConfig, mgr *conn.Mgr) (*logclient.LogClient, error) { var err error keepaliveCfg := GetKeepalive(&cfg.Config) keepaliveCfg.PermitWithoutStream = true - client := logclient.NewRestoreClient(mgr.GetPDClient(), mgr.GetPDHTTPClient(), mgr.GetTLSConfig(), keepaliveCfg) + client := logclient.NewLogClient(mgr.GetPDClient(), mgr.GetPDHTTPClient(), mgr.GetTLSConfig(), keepaliveCfg) err = client.Init(g, mgr.GetStorage()) if err != nil { return nil, errors.Trace(err) @@ -1552,6 +1543,14 @@ func createRestoreClient(ctx context.Context, g glue.Glue, cfg *RestoreConfig, m return nil, errors.Trace(err) } + encryptionManager, err := encryption.NewManager(&cfg.LogBackupCipherInfo, &cfg.MasterKeyConfig) + if err != nil { + return nil, errors.Annotate(err, "failed to create encryption manager for log restore") + } + if err = client.InstallLogFileManager(ctx, cfg.StartTS, cfg.RestoreTS, cfg.MetadataDownloadBatchSize, encryptionManager); err != nil { + return nil, errors.Trace(err) + } + return client, nil } @@ -1613,8 +1612,8 @@ type backupLogInfo struct { clusterID uint64 } -// getLogRange gets the log-min-ts and log-max-ts of starting log backup. -func getLogRange( +// getLogInfo gets the log-min-ts and log-max-ts of starting log backup. +func getLogInfo( ctx context.Context, cfg *Config, ) (backupLogInfo, error) { @@ -1622,10 +1621,10 @@ func getLogRange( if err != nil { return backupLogInfo{}, errors.Trace(err) } - return getLogRangeWithStorage(ctx, s) + return getLogInfoFromStorage(ctx, s) } -func getLogRangeWithStorage( +func getLogInfoFromStorage( ctx context.Context, s storage.ExternalStorage, ) (backupLogInfo, error) { @@ -1732,20 +1731,11 @@ func parseFullBackupTablesStorage( }, nil } -func initRewriteRules(schemasReplace *stream.SchemasReplace) map[int64]*restoreutils.RewriteRules { +func buildRewriteRules(schemasReplace *stream.SchemasReplace) map[int64]*restoreutils.RewriteRules { rules := make(map[int64]*restoreutils.RewriteRules) - filter := schemasReplace.TableFilter - - for _, dbReplace := range schemasReplace.DbMap { - if utils.IsSysDB(dbReplace.Name) || !filter.MatchSchema(dbReplace.Name) { - continue - } + for _, dbReplace := range schemasReplace.DbReplaceMap { for oldTableID, tableReplace := range dbReplace.TableMap { - if !filter.MatchTable(dbReplace.Name, tableReplace.Name) { - continue - } - if _, exist := rules[oldTableID]; !exist { log.Info("add rewrite rule", zap.String("tableName", dbReplace.Name+"."+tableReplace.Name), @@ -1768,7 +1758,7 @@ func initRewriteRules(schemasReplace *stream.SchemasReplace) map[int64]*restoreu } // ShiftTS gets a smaller shiftTS than startTS. -// It has a safe duration between shiftTS and startTS for trasaction. +// It has a safe duration between shiftTS and startTS for transaction. func ShiftTS(startTS uint64) uint64 { physical := oracle.ExtractPhysical(startTS) logical := oracle.ExtractLogical(startTS) @@ -1794,7 +1784,7 @@ type PiTRTaskInfo struct { FullRestoreCheckErr error } -func checkPiTRTaskInfo( +func generatePiTRTaskInfo( ctx context.Context, mgr *conn.Mgr, g glue.Glue, @@ -1866,3 +1856,47 @@ func checkPiTRTaskInfo( return checkInfo, nil } + +func isNewRestoreTask(checkpointTaskInfo *checkpoint.CheckpointTaskInfoForLogRestore) bool { + newTask := true + if checkpointTaskInfo != nil && checkpointTaskInfo.Progress == checkpoint.InLogRestoreAndIdMapPersisted { + newTask = false + } + return newTask +} + +func buildSchemaReplace(ctx context.Context, client *logclient.LogClient, cfg *RestoreConfig, isNewRestoreTask bool) ( + *stream.SchemasReplace, error) { + + // get full backup meta storage to generate rewrite rules. + fullBackupStorage, err := parseFullBackupTablesStorage(cfg) + if err != nil { + return nil, errors.Trace(err) + } + + initSchemaConfig := logclient.InitSchemaConfig{ + IsNewRestoreTask: isNewRestoreTask, + TableFilter: cfg.TableFilter, + TiFlashRecorder: cfg.tiflashRecorder, + FullBackupStorage: fullBackupStorage, + PiTRTableFilter: cfg.PiTRTableFilter, + } + + schemasReplace, err := client.InitSchemasReplaceForDDL(ctx, &initSchemaConfig, &cfg.Config.CipherInfo) + if err != nil { + return nil, errors.Trace(err) + } + + schemasReplace.AfterTableRewrittenFunc = func(deleted bool, tableInfo *model.TableInfo) { + // When the table replica changed to 0, the tiflash replica might be set to `nil`. + // We should remove the table if we meet. + if deleted || tableInfo.TiFlashReplica == nil { + cfg.tiflashRecorder.DelTable(tableInfo.ID) + return + } + cfg.tiflashRecorder.AddTable(tableInfo.ID, *tableInfo.TiFlashReplica) + // Remove the replica firstly. Let's restore them at the end. + tableInfo.TiFlashReplica = nil + } + return schemasReplace, nil +} diff --git a/br/pkg/task/stream_test.go b/br/pkg/task/stream_test.go index 847699bc152cd..bdf2fdf41aa80 100644 --- a/br/pkg/task/stream_test.go +++ b/br/pkg/task/stream_test.go @@ -192,7 +192,7 @@ func TestGetLogRangeWithFullBackupDir(t *testing.T) { cfg := Config{ Storage: testDir, } - _, err = getLogRange(context.TODO(), &cfg) + _, err = getLogInfo(context.TODO(), &cfg) require.Error(t, err, errors.Annotate(berrors.ErrStorageUnknown, "the storage has been used for full backup")) } @@ -215,7 +215,7 @@ func TestGetLogRangeWithLogBackupDir(t *testing.T) { cfg := Config{ Storage: testDir, } - logInfo, err := getLogRange(context.TODO(), &cfg) + logInfo, err := getLogInfo(context.TODO(), &cfg) require.Nil(t, err) require.Equal(t, logInfo.logMinTS, startLogBackupTS) } diff --git a/br/pkg/utils/BUILD.bazel b/br/pkg/utils/BUILD.bazel index fa18a8317b234..3f36947b3032b 100644 --- a/br/pkg/utils/BUILD.bazel +++ b/br/pkg/utils/BUILD.bazel @@ -9,6 +9,7 @@ go_library( "dyn_pprof_unix.go", "encryption.go", "error_handling.go", + "filter.go", "json.go", "key.go", "misc.go", @@ -35,7 +36,9 @@ go_library( "//pkg/parser/terror", "//pkg/parser/types", "//pkg/sessionctx", + "//pkg/tablecodec", "//pkg/util", + "//pkg/util/codec", "//pkg/util/encrypt", "//pkg/util/logutil", "//pkg/util/sqlexec", @@ -103,6 +106,7 @@ go_test( "@com_github_pingcap_kvproto//pkg/brpb", "@com_github_pingcap_kvproto//pkg/errorpb", "@com_github_stretchr_testify//require", + "@com_github_tikv_client_go_v2//oracle", "@com_github_tikv_client_go_v2//tikv", "@com_github_tikv_pd_client//:client", "@io_etcd_go_etcd_tests_v3//integration", diff --git a/br/pkg/utils/consts/BUILD.bazel b/br/pkg/utils/consts/BUILD.bazel new file mode 100644 index 0000000000000..1c9766fe93e1a --- /dev/null +++ b/br/pkg/utils/consts/BUILD.bazel @@ -0,0 +1,8 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library") + +go_library( + name = "consts", + srcs = ["consts.go"], + importpath = "github.com/pingcap/tidb/br/pkg/utils/consts", + visibility = ["//visibility:public"], +) diff --git a/br/pkg/utils/consts/consts.go b/br/pkg/utils/consts/consts.go new file mode 100644 index 0000000000000..93390bcbacc38 --- /dev/null +++ b/br/pkg/utils/consts/consts.go @@ -0,0 +1,21 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package consts + +// Default columnFamily and write columnFamily +const ( + DefaultCF = "default" + WriteCF = "write" +) diff --git a/br/pkg/utils/filter.go b/br/pkg/utils/filter.go new file mode 100644 index 0000000000000..da5170ef82971 --- /dev/null +++ b/br/pkg/utils/filter.go @@ -0,0 +1,76 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package utils + +type PiTRTableFilter struct { + DbIdToTable map[int64]map[int64]struct{} +} + +func NewPiTRTableFilter() *PiTRTableFilter { + return &PiTRTableFilter{ + DbIdToTable: make(map[int64]map[int64]struct{}), + } +} + +// UpdateTable adds a table ID to the filter for the given database ID +func (f *PiTRTableFilter) UpdateTable(dbID, tableID int64) { + if f.DbIdToTable == nil { + f.DbIdToTable = make(map[int64]map[int64]struct{}) + } + + if _, ok := f.DbIdToTable[dbID]; !ok { + f.DbIdToTable[dbID] = make(map[int64]struct{}) + } + + f.DbIdToTable[dbID][tableID] = struct{}{} +} + +// UpdateDB adds the database id +func (f *PiTRTableFilter) UpdateDB(dbID int64) { + if f.DbIdToTable == nil { + f.DbIdToTable = make(map[int64]map[int64]struct{}) + } + + if _, ok := f.DbIdToTable[dbID]; !ok { + f.DbIdToTable[dbID] = make(map[int64]struct{}) + } +} + +// Remove removes a table ID from the filter for the given database ID. +// Returns true if the table was found and removed, false otherwise. +func (f *PiTRTableFilter) Remove(dbID, tableID int64) bool { + if tables, ok := f.DbIdToTable[dbID]; ok { + if _, exists := tables[tableID]; exists { + delete(tables, tableID) + return true + } + } + return false +} + +// ContainsTable checks if the given database ID and table ID combination exists in the filter +func (f *PiTRTableFilter) ContainsTable(dbID, tableID int64) bool { + if tables, ok := f.DbIdToTable[dbID]; ok { + _, exists := tables[tableID] + return exists + } + return false +} + +// ContainsDB checks if the given database ID exists in the filter +func (f *PiTRTableFilter) ContainsDB(dbID int64) bool { + _, ok := f.DbIdToTable[dbID] + return ok +} diff --git a/br/pkg/utils/key.go b/br/pkg/utils/key.go index 3e26211adf59e..51414d1b014b9 100644 --- a/br/pkg/utils/key.go +++ b/br/pkg/utils/key.go @@ -8,12 +8,15 @@ import ( "fmt" "io" "strings" + "time" "github.com/pingcap/errors" "github.com/pingcap/log" berrors "github.com/pingcap/tidb/br/pkg/errors" "github.com/pingcap/tidb/br/pkg/logutil" "github.com/pingcap/tidb/pkg/kv" + "github.com/pingcap/tidb/pkg/tablecodec" + "github.com/pingcap/tidb/pkg/util/codec" "go.uber.org/zap" ) @@ -197,3 +200,27 @@ func IntersectAll(s1 []kv.KeyRange, s2 []kv.KeyRange) []kv.KeyRange { } return rs } + +const DateFormat = "2006-01-02 15:04:05.999999999 -0700" + +func FormatDate(ts time.Time) string { + return ts.Format(DateFormat) +} + +func IsMetaDBKey(key []byte) bool { + return strings.HasPrefix(string(key), "mDB") +} + +func IsMetaDDLJobHistoryKey(key []byte) bool { + return strings.HasPrefix(string(key), "mDDLJobH") +} + +func IsDBOrDDLJobHistoryKey(key []byte) bool { + return strings.HasPrefix(string(key), "mD") +} + +func EncodeTxnMetaKey(key []byte, field []byte, ts uint64) []byte { + k := tablecodec.EncodeMetaKey(key, field) + txnKey := codec.EncodeBytes(nil, k) + return codec.EncodeUintDesc(txnKey, ts) +} diff --git a/br/pkg/utils/key_test.go b/br/pkg/utils/key_test.go index 10723ed4c8a16..00ddd3d381c4e 100644 --- a/br/pkg/utils/key_test.go +++ b/br/pkg/utils/key_test.go @@ -6,9 +6,11 @@ import ( "encoding/hex" "fmt" "testing" + "time" "github.com/pingcap/tidb/pkg/kv" "github.com/stretchr/testify/require" + "github.com/tikv/client-go/v2/oracle" ) func TestParseKey(t *testing.T) { @@ -176,3 +178,40 @@ func TestClampKeyRanges(t *testing.T) { }) } } + +func TestDateFormat(t *testing.T) { + cases := []struct { + ts uint64 + target string + }{ + { + 434604259287760897, + "2022-07-15 19:14:39.534 +0800", + }, + { + 434605479096221697, + "2022-07-15 20:32:12.734 +0800", + }, + { + 434605478903808000, + "2022-07-15 20:32:12 +0800", + }, + } + + timeZone, _ := time.LoadLocation("Asia/Shanghai") + for _, ca := range cases { + date := FormatDate(oracle.GetTimeFromTS(ca.ts).In(timeZone)) + require.Equal(t, ca.target, date) + } +} + +func TestPrefix(t *testing.T) { + require.True(t, IsMetaDBKey([]byte("mDBs"))) + require.False(t, IsMetaDBKey([]byte("mDDL"))) + require.True(t, IsMetaDDLJobHistoryKey([]byte("mDDLJobHistory"))) + require.False(t, IsMetaDDLJobHistoryKey([]byte("mDDL"))) + require.True(t, IsDBOrDDLJobHistoryKey([]byte("mDL"))) + require.True(t, IsDBOrDDLJobHistoryKey([]byte("mDB:"))) + require.True(t, IsDBOrDDLJobHistoryKey([]byte("mDDLHistory"))) + require.False(t, IsDBOrDDLJobHistoryKey([]byte("DDL"))) +} diff --git a/br/pkg/utils/schema.go b/br/pkg/utils/schema.go index 47ea86dcc9370..04da73817b239 100644 --- a/br/pkg/utils/schema.go +++ b/br/pkg/utils/schema.go @@ -47,12 +47,12 @@ func TemporaryDBName(db string) pmodel.CIStr { return pmodel.NewCIStr(temporaryDBNamePrefix + db) } -// GetSysDBName get the original name of system DB -func GetSysDBName(tempDB pmodel.CIStr) (string, bool) { - if ok := strings.HasPrefix(tempDB.O, temporaryDBNamePrefix); !ok { - return tempDB.O, false +// StripTempTableNamePrefixIfNeeded get the original name of system DB +func StripTempTableNamePrefixIfNeeded(tempDB string) (string, bool) { + if ok := strings.HasPrefix(tempDB, temporaryDBNamePrefix); !ok { + return tempDB, false } - return tempDB.O[len(temporaryDBNamePrefix):], true + return tempDB[len(temporaryDBNamePrefix):], true } // GetSysDBCIStrName get the CIStr name of system DB diff --git a/br/tests/br_encryption/run.sh b/br/tests/br_encryption/run.sh index 3934dd3b6103c..5f18b8be80406 100755 --- a/br/tests/br_encryption/run.sh +++ b/br/tests/br_encryption/run.sh @@ -59,39 +59,6 @@ insert_additional_data() { done } -wait_log_checkpoint_advance() { - echo "wait for log checkpoint to advance" - sleep 10 - local current_ts=$(python3 -c "import time; print(int(time.time() * 1000) << 18)") - echo "current ts: $current_ts" - i=0 - while true; do - # extract the checkpoint ts of the log backup task. If there is some error, the checkpoint ts should be empty - log_backup_status=$(unset BR_LOG_TO_TERM && run_br --skip-goleak --pd $PD_ADDR log status --task-name $TASK_NAME --json 2>br.log) - echo "log backup status: $log_backup_status" - local checkpoint_ts=$(echo "$log_backup_status" | head -n 1 | jq 'if .[0].last_errors | length == 0 then .[0].checkpoint else empty end') - echo "checkpoint ts: $checkpoint_ts" - - # check whether the checkpoint ts is a number - if [ $checkpoint_ts -gt 0 ] 2>/dev/null; then - if [ $checkpoint_ts -gt $current_ts ]; then - echo "the checkpoint has advanced" - break - fi - echo "the checkpoint hasn't advanced" - i=$((i+1)) - if [ "$i" -gt 50 ]; then - echo 'the checkpoint lag is too large' - exit 1 - fi - sleep 10 - else - echo "TEST: [$TEST_NAME] failed to wait checkpoint advance!" - exit 1 - fi - done -} - calculate_checksum() { local db=$1 local checksum=$(run_sql "USE $db; ADMIN CHECKSUM TABLE $TABLE;" | awk '/CHECKSUM/{print $2}') @@ -170,7 +137,7 @@ run_backup_restore_test() { checksum_ori[${i}]=$(calculate_checksum "$DB${i}") || { echo "Failed to calculate checksum after insertion"; exit 1; } done - wait_log_checkpoint_advance || { echo "Failed to wait for log checkpoint"; exit 1; } + . "$CUR/../br_test_utils.sh" && wait_log_checkpoint_advance $TASK_NAME || { echo "Failed to wait for log checkpoint"; exit 1; } #sanity check pause still works run_br log pause --task-name $TASK_NAME --pd $PD_ADDR || { echo "Failed to pause log backup"; exit 1; } diff --git a/br/tests/br_pitr/run.sh b/br/tests/br_pitr/run.sh index 02d85e1170589..f01ff02c6a68f 100644 --- a/br/tests/br_pitr/run.sh +++ b/br/tests/br_pitr/run.sh @@ -21,6 +21,7 @@ CUR=$(cd `dirname $0`; pwd) # const value PREFIX="pitr_backup" # NOTICE: don't start with 'br' because `restart services` would remove file/directory br*. res_file="$TEST_DIR/sql_res.$TEST_NAME.txt" +TASK_NAME="br_pitr" # start a new cluster echo "restart a services" @@ -38,7 +39,7 @@ echo "prepare_delete_range_count: $prepare_delete_range_count" # start the log backup task echo "start log task" -run_br --pd $PD_ADDR log start --task-name integration_test -s "local://$TEST_DIR/$PREFIX/log" +run_br --pd $PD_ADDR log start --task-name $TASK_NAME -s "local://$TEST_DIR/$PREFIX/log" # run snapshot backup echo "run snapshot backup" @@ -70,39 +71,7 @@ incremental_delete_range_count=$(run_sql "select count(*) DELETE_RANGE_CNT from echo "incremental_delete_range_count: $incremental_delete_range_count" # wait checkpoint advance -echo "wait checkpoint advance" -sleep 10 -current_ts=$(python3 -c "import time; print(int(time.time() * 1000) << 18)") -echo "current ts: $current_ts" -i=0 -while true; do - # extract the checkpoint ts of the log backup task. If there is some error, the checkpoint ts should be empty - log_backup_status=$(unset BR_LOG_TO_TERM && run_br --skip-goleak --pd $PD_ADDR log status --task-name integration_test --json 2>br.log) - echo "log backup status: $log_backup_status" - checkpoint_ts=$(echo "$log_backup_status" | head -n 1 | jq 'if .[0].last_errors | length == 0 then .[0].checkpoint else empty end') - echo "checkpoint ts: $checkpoint_ts" - - # check whether the checkpoint ts is a number - if [ $checkpoint_ts -gt 0 ] 2>/dev/null; then - # check whether the checkpoint has advanced - if [ $checkpoint_ts -gt $current_ts ]; then - echo "the checkpoint has advanced" - break - fi - # the checkpoint hasn't advanced - echo "the checkpoint hasn't advanced" - i=$((i+1)) - if [ "$i" -gt 50 ]; then - echo 'the checkpoint lag is too large' - exit 1 - fi - sleep 10 - else - # unknown status, maybe somewhere is wrong - echo "TEST: [$TEST_NAME] failed to wait checkpoint advance!" - exit 1 - fi -done +. "$CUR/../br_test_utils.sh" && wait_log_checkpoint_advance $TASK_NAME # dump some info from upstream cluster # ... diff --git a/br/tests/br_pitr_failpoint/run.sh b/br/tests/br_pitr_failpoint/run.sh index dc6e9b463367e..5854bdd71f32c 100644 --- a/br/tests/br_pitr_failpoint/run.sh +++ b/br/tests/br_pitr_failpoint/run.sh @@ -17,6 +17,7 @@ set -eu . run_services CUR=$(cd `dirname $0`; pwd) +TASK_NAME="br_pitr_failpoint" # const value PREFIX="pitr_backup_failpoint" # NOTICE: don't start with 'br' because `restart services` would remove file/directory br*. @@ -42,7 +43,7 @@ sql_pid=$! # start the log backup task echo "start log task" -run_br --pd $PD_ADDR log start --task-name integration_test -s "local://$TEST_DIR/$PREFIX/log" +run_br --pd $PD_ADDR log start --task-name $TASK_NAME -s "local://$TEST_DIR/$PREFIX/log" # wait until the index creation is running retry_cnt=0 @@ -121,42 +122,9 @@ check_contains "Column_name: y" check_contains "Column_name: z" # wait checkpoint advance -echo "wait checkpoint advance" -sleep 10 -current_ts=$(echo $(($(date +%s%3N) << 18))) -echo "current ts: $current_ts" -i=0 -while true; do - # extract the checkpoint ts of the log backup task. If there is some error, the checkpoint ts should be empty - log_backup_status=$(unset BR_LOG_TO_TERM && run_br --skip-goleak --pd $PD_ADDR log status --task-name integration_test --json 2>/dev/null) - echo "log backup status: $log_backup_status" - checkpoint_ts=$(echo "$log_backup_status" | head -n 1 | jq 'if .[0].last_errors | length == 0 then .[0].checkpoint else empty end') - echo "checkpoint ts: $checkpoint_ts" - - # check whether the checkpoint ts is a number - if [ $checkpoint_ts -gt 0 ] 2>/dev/null; then - # check whether the checkpoint has advanced - if [ $checkpoint_ts -gt $current_ts ]; then - echo "the checkpoint has advanced" - break - fi - # the checkpoint hasn't advanced - echo "the checkpoint hasn't advanced" - i=$((i+1)) - if [ "$i" -gt 50 ]; then - echo 'the checkpoint lag is too large' - exit 1 - fi - sleep 10 - else - # unknown status, maybe somewhere is wrong - echo "TEST: [$TEST_NAME] failed to wait checkpoint advance!" - exit 1 - fi -done +. "$CUR/../br_test_utils.sh" && wait_log_checkpoint_advance $TASK_NAME # start a new cluster -echo "restart a services" restart_services # PITR restore - 1 diff --git a/br/tests/br_pitr_gc_safepoint/run.sh b/br/tests/br_pitr_gc_safepoint/run.sh index 26b3b533c1d69..0edcb1c57d221 100644 --- a/br/tests/br_pitr_gc_safepoint/run.sh +++ b/br/tests/br_pitr_gc_safepoint/run.sh @@ -21,6 +21,7 @@ CUR=$(cd `dirname $0`; pwd) # const value PREFIX="pitr_backup" # NOTICE: don't start with 'br' because `restart services` would remove file/directory br*. res_file="$TEST_DIR/sql_res.$TEST_NAME.txt" +TASK_NAME="br_pitr_gc_safepoint" # start a new cluster echo "restart a services" @@ -28,7 +29,7 @@ restart_services # start the log backup task echo "start log task" -run_br --pd $PD_ADDR log start --task-name integration_test -s "local://$TEST_DIR/$PREFIX/log" +run_br --pd $PD_ADDR log start --task-name $TASK_NAME -s "local://$TEST_DIR/$PREFIX/log" # prepare the data echo "prepare the data" @@ -41,39 +42,7 @@ prepare_delete_range_count=$(run_sql "select count(*) DELETE_RANGE_CNT from (sel echo "prepare_delete_range_count: $prepare_delete_range_count" # wait checkpoint advance -echo "wait checkpoint advance" -sleep 10 -current_ts=$(echo $(($(date +%s%3N) << 18))) -echo "current ts: $current_ts" -i=0 -while true; do - # extract the checkpoint ts of the log backup task. If there is some error, the checkpoint ts should be empty - log_backup_status=$(unset BR_LOG_TO_TERM && run_br --skip-goleak --pd $PD_ADDR log status --task-name integration_test --json 2>br.log) - echo "log backup status: $log_backup_status" - checkpoint_ts=$(echo "$log_backup_status" | head -n 1 | jq 'if .[0].last_errors | length == 0 then .[0].checkpoint else empty end') - echo "checkpoint ts: $checkpoint_ts" - - # check whether the checkpoint ts is a number - if [ $checkpoint_ts -gt 0 ] 2>/dev/null; then - # check whether the checkpoint has advanced - if [ $checkpoint_ts -gt $current_ts ]; then - echo "the checkpoint has advanced" - break - fi - # the checkpoint hasn't advanced - echo "the checkpoint hasn't advanced" - i=$((i+1)) - if [ "$i" -gt 50 ]; then - echo 'the checkpoint lag is too large' - exit 1 - fi - sleep 10 - else - # unknown status, maybe somewhere is wrong - echo "TEST: [$TEST_NAME] failed to wait checkpoint advance!" - exit 1 - fi -done +. "$CUR/../br_test_utils.sh" && wait_log_checkpoint_advance "$TASK_NAME" run_br --pd $PD_ADDR log pause --task-name integration_test diff --git a/br/tests/br_pitr_table_filter/run.sh b/br/tests/br_pitr_table_filter/run.sh new file mode 100755 index 0000000000000..6b1f87efa6a21 --- /dev/null +++ b/br/tests/br_pitr_table_filter/run.sh @@ -0,0 +1,305 @@ +#!/bin/sh +# +# Copyright 2024 PingCAP, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -eux +DB="$TEST_NAME" +CUR=$(cd `dirname $0`; pwd) +TASK_NAME="pitr_table_filter" +. run_services + +# helper methods +create_tables_with_values() { + local prefix=$1 # table name prefix + local count=$2 # number of tables to create + + for i in $(seq 1 $count); do + run_sql "create table $DB.${prefix}_${i}(c int); insert into $DB.${prefix}_${i} values ($i);" + done +} + +verify_tables() { + local prefix=$1 # table name prefix + local count=$2 # number of tables to verify + local should_exist=$3 # true/false - whether tables should exist + + for i in $(seq 1 $count); do + if [ "$should_exist" = "true" ]; then + run_sql "select count(*) = 1 from $DB.${prefix}_${i} where c = $i" || { + echo "Table $DB.${prefix}_${i} doesn't have expected value $i" + exit 1 + } + else + if run_sql "select * from $DB.${prefix}_${i}" 2>/dev/null; then + echo "Table $DB.${prefix}_${i} exists but should not" + exit 1 + fi + fi + done +} + +rename_tables() { + local old_prefix=$1 # original table name prefix + local new_prefix=$2 # new table name prefix + local count=$3 # number of tables to rename + + for i in $(seq 1 $count); do + run_sql "rename table $DB.${old_prefix}_${i} to $DB.${new_prefix}_${i};" + done +} + +test_basic_filter() { + echo "start basic filter testing" + run_br --pd $PD_ADDR log start --task-name $TASK_NAME -s "local://$TEST_DIR/$TASK_NAME/log" + + run_sql "create schema $DB;" + + echo "write initial data and do snapshot backup" + create_tables_with_values "full_backup" 3 + + run_br backup full -f "$DB.*" -s "local://$TEST_DIR/$TASK_NAME/full" --pd $PD_ADDR + + echo "write more data and wait for log backup to catch up" + create_tables_with_values "log_backup_lower" 3 + create_tables_with_values "LOG_BACKUP_UPPER" 3 + create_tables_with_values "other" 3 + + . "$CUR/../br_test_utils.sh" && wait_log_checkpoint_advance "$TASK_NAME" + + # restart services to clean up the cluster + restart_services || { echo "Failed to restart services"; exit 1; } + + echo "case 1 sanity check, zero filter" + run_br --pd "$PD_ADDR" restore point -s "local://$TEST_DIR/$TASK_NAME/log" --full-backup-storage "local://$TEST_DIR/$TASK_NAME/full" + + verify_tables "log_backup_lower" 3 true + verify_tables "LOG_BACKUP_UPPER" 3 true + verify_tables "full_backup" 3 true + verify_tables "other" 3 true + + echo "case 2 with log backup table filter" + run_sql "drop schema $DB;" + run_br --pd "$PD_ADDR" restore point -s "local://$TEST_DIR/$TASK_NAME/log" --full-backup-storage "local://$TEST_DIR/$TASK_NAME/full" -f "$DB.log*" + + verify_tables "log_backup_lower" 3 true + verify_tables "LOG_BACKUP_UPPER" 3 true + verify_tables "full_backup" 3 false + verify_tables "other" 3 false + + echo "case 3 with multiple filters" + run_sql "drop schema $DB;" + run_br --pd "$PD_ADDR" restore point -s "local://$TEST_DIR/$TASK_NAME/log" --full-backup-storage "local://$TEST_DIR/$TASK_NAME/full" -f "$DB.log*" -f "$DB.full*" + + verify_tables "log_backup_lower" 3 true + verify_tables "LOG_BACKUP_UPPER" 3 true + verify_tables "full_backup" 3 true + verify_tables "other" 3 false + + echo "case 4 with negative filters" + run_sql "drop schema $DB;" + # have to use a match all filter before using negative filters + run_br --pd "$PD_ADDR" restore point -s "local://$TEST_DIR/$TASK_NAME/log" --full-backup-storage "local://$TEST_DIR/$TASK_NAME/full" -f "*.*" -f "!$DB.log*" + + verify_tables "log_backup_lower" 3 false + verify_tables "LOG_BACKUP_UPPER" 3 false + verify_tables "full_backup" 3 true + verify_tables "other" 3 true + + echo "basic filter test cases passed" +} + +test_table_rename() { + echo "start table rename with filter testing" + run_br --pd $PD_ADDR log start --task-name $TASK_NAME -s "local://$TEST_DIR/$TASK_NAME/log" + + run_sql "create schema $DB;" + + echo "write initial data and do snapshot backup" + create_tables_with_values "full_backup" 3 + create_tables_with_values "renamed_in" 3 + create_tables_with_values "log_renamed_out" 3 + + run_br backup full -f "$DB.*" -s "local://$TEST_DIR/$TASK_NAME/full" --pd $PD_ADDR + + echo "write more data and wait for log backup to catch up" + create_tables_with_values "log_backup" 3 + rename_tables "renamed_in" "log_backup_renamed_in" 3 + rename_tables "log_renamed_out" "renamed_out" 3 + + . "$CUR/../br_test_utils.sh" && wait_log_checkpoint_advance "$TASK_NAME" + + # restart services to clean up the cluster + restart_services || { echo "Failed to restart services"; exit 1; } + + run_br --pd "$PD_ADDR" restore point -s "local://$TEST_DIR/$TASK_NAME/log" --full-backup-storage "local://$TEST_DIR/$TASK_NAME/full" -f "$DB.log*" + + verify_tables "log_backup" 3 true + verify_tables "log_backup_renamed_in" 3 true + + verify_tables "full_backup" 3 false + # has been renamed, should not visible anymore + verify_tables "renamed_in" 3 false + # also renamed out of filter range, should not be visible for both + verify_tables "renamed_out" 3 false + verify_tables "log_renamed_out" 3 false + + echo "table rename with filter passed" +} + +test_with_checkpoint_and_rename() { + echo "start table filter with checkpoint" + run_br --pd $PD_ADDR log start --task-name $TASK_NAME -s "local://$TEST_DIR/$TASK_NAME/log" + + run_sql "create schema $DB;" + + echo "write initial data and do snapshot backup" + create_tables_with_values "full_backup" 3 + create_tables_with_values "renamed_in" 3 + create_tables_with_values "log_renamed_out" 3 + + run_br backup full -f "$DB.*" -s "local://$TEST_DIR/$TASK_NAME/full" --pd $PD_ADDR + + echo "write more data and wait for log backup to catch up" + create_tables_with_values "log_backup" 3 + rename_tables "renamed_in" "log_backup_renamed_in" 3 + rename_tables "log_renamed_out" "renamed_out" 3 + + . "$CUR/../br_test_utils.sh" && wait_log_checkpoint_advance "$TASK_NAME" + + # restart services to clean up the cluster + restart_services || { echo "Failed to restart services"; exit 1; } + + export GO_FAILPOINTS="github.com/pingcap/tidb/br/pkg/restore/snap_client/corrupt-files=return(\"corrupt-last-table-files\")" + restore_fail=0 + run_br --pd $PD_ADDR restore point --full-backup-storage "local://$TEST_DIR/$TASK_NAME/full" -s "local://$TEST_DIR/$TASK_NAME/log" -f "$DB.log*" || restore_fail=1 + export GO_FAILPOINTS="" + if [ $restore_fail -ne 1 ]; then + echo 'expecting failure but success' + exit 1 + fi + + # PITR with checkpoint but failed in the log restore metakv stage + export GO_FAILPOINTS="github.com/pingcap/tidb/br/pkg/restore/snap_client/corrupt-files=return(\"only-last-table-files\");github.com/pingcap/tidb/br/pkg/restore/log_client/failed-after-id-maps-saved=return(true)" + restore_fail=0 + run_br --pd $PD_ADDR restore point --full-backup-storage "local://$TEST_DIR/$TASK_NAME/full" -s "local://$TEST_DIR/$TASK_NAME/log" -f "$DB.log*" || restore_fail=1 + export GO_FAILPOINTS="" + if [ $restore_fail -ne 1 ]; then + echo 'expecting failure but success' + exit 1 + fi + + # PITR with checkpoint but failed in the log restore datakv stage + # skip the snapshot restore stage + export GO_FAILPOINTS="github.com/pingcap/tidb/br/pkg/task/corrupt-files=return(\"corrupt-last-table-files\")" + restore_fail=0 + run_br --pd $PD_ADDR restore point --full-backup-storage "local://$TEST_DIR/$TASK_NAME/full" -s "local://$TEST_DIR/$TASK_NAME/log" -f "$DB.log*" || restore_fail=1 + export GO_FAILPOINTS="" + if [ $restore_fail -ne 1 ]; then + echo 'expecting failure but success' + exit 1 + fi + + # PITR with checkpoint + export GO_FAILPOINTS="github.com/pingcap/tidb/br/pkg/task/corrupt-files=return(\"only-last-table-files\")" + run_br --pd $PD_ADDR restore point --full-backup-storage "local://$TEST_DIR/$TASK_NAME/full" -s "local://$TEST_DIR/$TASK_NAME/log" -f "$DB.log*" + export GO_FAILPOINTS="" + + verify_tables "log_backup" 3 true + verify_tables "log_backup_renamed_in" 3 true + + verify_tables "full_backup" 3 false + verify_tables "renamed_in" 3 false + verify_tables "renamed_out" 3 false + verify_tables "log_renamed_out" 3 false + + echo "table rename with checkpoint passed" +} + +test_exchange_partition() { + echo "start table filter with checkpoint" + run_br --pd $PD_ADDR log start --task-name $TASK_NAME -s "local://$TEST_DIR/$TASK_NAME/log" + + run_sql "create schema $DB;" + + echo "write initial data and do snapshot backup" + create_tables_with_values "full_backup" 3 + create_tables_with_values "renamed_in" 3 + create_tables_with_values "log_renamed_out" 3 + + run_br backup full -f "$DB.*" -s "local://$TEST_DIR/$TASK_NAME/full" --pd $PD_ADDR + + echo "write more data and wait for log backup to catch up" + create_tables_with_values "log_backup" 3 + rename_tables "renamed_in" "log_backup_renamed_in" 3 + rename_tables "log_renamed_out" "renamed_out" 3 + + . "$CUR/../br_test_utils.sh" && wait_log_checkpoint_advance "$TASK_NAME" + + # restart services to clean up the cluster + restart_services || { echo "Failed to restart services"; exit 1; } + + export GO_FAILPOINTS="github.com/pingcap/tidb/br/pkg/restore/snap_client/corrupt-files=return(\"corrupt-last-table-files\")" + restore_fail=0 + run_br --pd $PD_ADDR restore point --full-backup-storage "local://$TEST_DIR/$TASK_NAME/full" -s "local://$TEST_DIR/$TASK_NAME/log" -f "$DB.log*" || restore_fail=1 + export GO_FAILPOINTS="" + if [ $restore_fail -ne 1 ]; then + echo 'expecting failure but success' + exit 1 + fi + + # PITR with checkpoint but failed in the log restore metakv stage + export GO_FAILPOINTS="github.com/pingcap/tidb/br/pkg/restore/snap_client/corrupt-files=return(\"only-last-table-files\");github.com/pingcap/tidb/br/pkg/restore/log_client/failed-after-id-maps-saved=return(true)" + restore_fail=0 + run_br --pd $PD_ADDR restore point --full-backup-storage "local://$TEST_DIR/$TASK_NAME/full" -s "local://$TEST_DIR/$TASK_NAME/log" -f "$DB.log*" || restore_fail=1 + export GO_FAILPOINTS="" + if [ $restore_fail -ne 1 ]; then + echo 'expecting failure but success' + exit 1 + fi + + # PITR with checkpoint but failed in the log restore datakv stage + # skip the snapshot restore stage + export GO_FAILPOINTS="github.com/pingcap/tidb/br/pkg/task/corrupt-files=return(\"corrupt-last-table-files\")" + restore_fail=0 + run_br --pd $PD_ADDR restore point --full-backup-storage "local://$TEST_DIR/$TASK_NAME/full" -s "local://$TEST_DIR/$TASK_NAME/log" -f "$DB.log*" || restore_fail=1 + export GO_FAILPOINTS="" + if [ $restore_fail -ne 1 ]; then + echo 'expecting failure but success' + exit 1 + fi + + # PITR with checkpoint + export GO_FAILPOINTS="github.com/pingcap/tidb/br/pkg/task/corrupt-files=return(\"only-last-table-files\")" + run_br --pd $PD_ADDR restore point --full-backup-storage "local://$TEST_DIR/$TASK_NAME/full" -s "local://$TEST_DIR/$TASK_NAME/log" -f "$DB.log*" + export GO_FAILPOINTS="" + + verify_tables "log_backup" 3 true + verify_tables "log_backup_renamed_in" 3 true + + verify_tables "full_backup" 3 false + verify_tables "renamed_in" 3 false + verify_tables "renamed_out" 3 false + verify_tables "log_renamed_out" 3 false + + echo "table rename with checkpoint passed" +} + +# Run all test cases +#test_basic_filter +#test_table_rename +#test_with_checkpoint_and_rename + + +echo "br pitr table filter all tests passed" diff --git a/br/tests/br_restore_checkpoint/run.sh b/br/tests/br_restore_checkpoint/run.sh index 2a4b1104916de..da45692cdcb62 100644 --- a/br/tests/br_restore_checkpoint/run.sh +++ b/br/tests/br_restore_checkpoint/run.sh @@ -22,9 +22,9 @@ CUR=$(cd `dirname $0`; pwd) PREFIX="checkpoint" # NOTICE: don't start with 'br' because `restart services` would remove file/directory br*. DB=$TEST_NAME res_file="$TEST_DIR/sql_res.$TEST_NAME.txt" +TASK_NAME="br_restore_checkpoint" # start a new cluster -echo "restart a services" restart_services # prepare snapshot data @@ -37,7 +37,7 @@ run_sql "INSERT INTO $DB.tbl2 values (2, 'b');" # start the log backup task echo "start log task" -run_br --pd $PD_ADDR log start --task-name integration_test -s "local://$TEST_DIR/$PREFIX/log" +run_br --pd $PD_ADDR log start --task-name $TASK_NAME -s "local://$TEST_DIR/$PREFIX/log" # run snapshot backup echo "run snapshot backup" @@ -53,41 +53,9 @@ run_sql "INSERT INTO $DB.tbl3 values (33, 'cc');" # wait checkpoint advance echo "wait checkpoint advance" -sleep 10 -current_ts=$(echo $(($(date +%s%3N) << 18))) -echo "current ts: $current_ts" -i=0 -while true; do - # extract the checkpoint ts of the log backup task. If there is some error, the checkpoint ts should be empty - log_backup_status=$(unset BR_LOG_TO_TERM && run_br --skip-goleak --pd $PD_ADDR log status --task-name integration_test --json 2>br.log) - echo "log backup status: $log_backup_status" - checkpoint_ts=$(echo "$log_backup_status" | head -n 1 | jq 'if .[0].last_errors | length == 0 then .[0].checkpoint else empty end') - echo "checkpoint ts: $checkpoint_ts" - - # check whether the checkpoint ts is a number - if [ $checkpoint_ts -gt 0 ] 2>/dev/null; then - # check whether the checkpoint has advanced - if [ $checkpoint_ts -gt $current_ts ]; then - echo "the checkpoint has advanced" - break - fi - # the checkpoint hasn't advanced - echo "the checkpoint hasn't advanced" - i=$((i+1)) - if [ "$i" -gt 50 ]; then - echo 'the checkpoint lag is too large' - exit 1 - fi - sleep 10 - else - # unknown status, maybe somewhere is wrong - echo "TEST: [$TEST_NAME] failed to wait checkpoint advance!" - exit 1 - fi -done +. "$CUR/../br_test_utils.sh" && wait_log_checkpoint_advance $TASK_NAME # start a new cluster -echo "restart a services" restart_services # PITR but failed in the snapshot restore stage diff --git a/br/tests/br_test_utils.sh b/br/tests/br_test_utils.sh new file mode 100644 index 0000000000000..9102415a77e14 --- /dev/null +++ b/br/tests/br_test_utils.sh @@ -0,0 +1,51 @@ +#!/bin/sh +# +# Copyright 2024 PingCAP, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -eux + +wait_log_checkpoint_advance() { + local task_name=${1:-$TASK_NAME} + echo "wait for log checkpoint to advance for task: $task_name" + sleep 10 + local current_ts=$(python3 -c "import time; print(int(time.time() * 1000) << 18)") + echo "current ts: $current_ts" + i=0 + while true; do + # extract the checkpoint ts of the log backup task. If there is some error, the checkpoint ts should be empty + log_backup_status=$(unset BR_LOG_TO_TERM && run_br --skip-goleak --pd $PD_ADDR log status --task-name $task_name --json 2>br.log) + echo "log backup status: $log_backup_status" + local checkpoint_ts=$(echo "$log_backup_status" | head -n 1 | jq 'if .[0].last_errors | length == 0 then .[0].checkpoint else empty end') + echo "checkpoint ts: $checkpoint_ts" + + # check whether the checkpoint ts is a number + if [ $checkpoint_ts -gt 0 ] 2>/dev/null; then + if [ $checkpoint_ts -gt $current_ts ]; then + echo "the checkpoint has advanced" + break + fi + echo "the checkpoint hasn't advanced" + i=$((i+1)) + if [ "$i" -gt 50 ]; then + echo 'the checkpoint lag is too large' + exit 1 + fi + sleep 10 + else + echo "TEST: [$TEST_NAME] failed to wait checkpoint advance!" + exit 1 + fi + done +} diff --git a/br/tests/br_tiflash_conflict/run.sh b/br/tests/br_tiflash_conflict/run.sh index f224a1497bf00..5f7e1f7726ae4 100644 --- a/br/tests/br_tiflash_conflict/run.sh +++ b/br/tests/br_tiflash_conflict/run.sh @@ -22,6 +22,7 @@ CUR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) backup_dir=$TEST_DIR/keep/${TEST_NAME} pitr_dir=${backup_dir}_pitr br_log_file=$TEST_DIR/br.log +TASK_NAME="br_tiflash_conflict" # start a new cluster echo "restart a services" @@ -33,7 +34,7 @@ run_sql_file $CUR/prepare_data/prepare_data.sql #run pitr backup echo "run pitr backup" -run_br log start --task-name integration_test -s "local://$pitr_dir" +run_br log start --task-name $TASK_NAME -s "local://$pitr_dir" # run snapshot backup echo "run snapshot backup" @@ -47,39 +48,7 @@ echo "load the incremental data" run_sql_file $CUR/prepare_data/insert_data.sql # wait checkpoint advance -echo "wait checkpoint advance" -sleep 10 -current_ts=$(echo $(($(date +%s%3N) << 18))) -echo "current ts: $current_ts" -i=0 -while true; do - # extract the checkpoint ts of the log backup task. If there is some error, the checkpoint ts should be empty - log_backup_status=$(unset BR_LOG_TO_TERM && run_br --skip-goleak log status --task-name integration_test --json 2>br.log) - echo "log backup status: $log_backup_status" - checkpoint_ts=$(echo "$log_backup_status" | head -n 1 | jq 'if .[0].last_errors | length == 0 then .[0].checkpoint else empty end') - echo "checkpoint ts: $checkpoint_ts" - - # check whether the checkpoint ts is a number - if [ $checkpoint_ts -gt 0 ] 2>/dev/null; then - # check whether the checkpoint has advanced - if [ $checkpoint_ts -gt $current_ts ]; then - echo "the checkpoint has advanced" - break - fi - # the checkpoint hasn't advanced - echo "the checkpoint hasn't advanced" - i=$((i+1)) - if [ "$i" -gt 50 ]; then - echo 'the checkpoint lag is too large' - exit 1 - fi - sleep 10 - else - # unknown status, maybe somewhere is wrong - echo "TEST: [$TEST_NAME] failed to wait checkpoint advance!" - exit 1 - fi -done +. "$CUR/../br_test_utils.sh" && wait_log_checkpoint_advance $TASK_NAME # start a new cluster echo "restart a services" diff --git a/br/tests/utils.go b/br/tests/utils.go index e8653aaaabb43..1884c00a38b00 100644 --- a/br/tests/utils.go +++ b/br/tests/utils.go @@ -77,14 +77,16 @@ func runValidateBackupFiles(cmd *cobra.Command, args []string) { func parseCommand(cmd string) (string, bool) { // Create a temporary cobra command to parse the input tempCmd := &cobra.Command{} - tempCmd.Flags().String("s", "", "Storage path (short)") - tempCmd.Flags().String("storage", "", "Storage path (long)") + tempCmd.Flags().StringP("storage", "s", "", "log backup storage path") + tempCmd.Flags().String("pd", "", "placement driver") + tempCmd.Flags().String("full-backup-storage", "", "full backup storage path") // Split the command string into args args := strings.Fields(cmd) // Parse the args if err := tempCmd.Flags().Parse(args); err != nil { + fmt.Printf("error parsing flags %s \n", err.Error()) return "", false } diff --git a/pkg/table/tables/bench_test.go b/pkg/table/tables/bench_test.go index c389a947778c5..db0b96e831e0d 100644 --- a/pkg/table/tables/bench_test.go +++ b/pkg/table/tables/bench_test.go @@ -197,7 +197,7 @@ func BenchmarkUpdateRecordInPipelinedDML(b *testing.B) { b.StartTimer() for j := 0; j < batchSize; j++ { - // Update record + // UpdateTable record handle := kv.IntHandle(j) err := tb.UpdateRecord(se.GetTableCtx(), txn, handle, records[j], newData[j], touched, table.WithCtx(context.TODO())) if err != nil { diff --git a/tests/_utils/run_services b/tests/_utils/run_services index 8f8a31caf7f96..7829843a9b172 100644 --- a/tests/_utils/run_services +++ b/tests/_utils/run_services @@ -252,7 +252,7 @@ start_tiflash() { i=0 while ! run_curl "https://$TIFLASH_HTTP/tiflash/store-status" 1>/dev/null 2>&1; do i=$((i+1)) - if [ "$i" -gt 20 ]; then + if [ "$i" -gt 1 ]; then echo "failed to start tiflash" return 1 fi