From 79e7cdb48e610ce8e7dc24812cc57637be443109 Mon Sep 17 00:00:00 2001 From: Wenqi Mou Date: Thu, 19 Dec 2024 15:57:58 -0500 Subject: [PATCH] initial Signed-off-by: Wenqi Mou --- br/pkg/backup/client.go | 1 - br/pkg/checkpoint/checkpoint_test.go | 6 +- br/pkg/checkpoint/log_restore.go | 29 +- br/pkg/checkpoint/restore.go | 9 +- br/pkg/errors/errors.go | 1 + br/pkg/restore/import_mode_switcher.go | 2 +- br/pkg/restore/log_client/BUILD.bazel | 4 +- .../log_client/batch_meta_processor.go | 226 +++++++++ br/pkg/restore/log_client/client.go | 401 +++++++--------- br/pkg/restore/log_client/client_test.go | 390 +++++++-------- br/pkg/restore/log_client/export_test.go | 8 +- br/pkg/restore/log_client/import.go | 4 +- br/pkg/restore/log_client/log_file_manager.go | 137 +++--- .../log_client/log_file_manager_test.go | 15 +- br/pkg/restore/snap_client/client.go | 17 +- br/pkg/restore/snap_client/client_test.go | 10 +- .../snap_client/systable_restore_test.go | 2 +- br/pkg/restore/tiflashrec/tiflash_recorder.go | 8 +- br/pkg/stream/BUILD.bazel | 9 +- br/pkg/stream/logging_helper.go | 40 ++ br/pkg/stream/meta_kv_test.go | 12 +- br/pkg/stream/rewrite_meta_rawkv.go | 85 ++-- br/pkg/stream/rewrite_meta_rawkv_test.go | 103 ++-- br/pkg/stream/search.go | 9 +- br/pkg/stream/search_test.go | 9 +- br/pkg/stream/stream_metas.go | 3 +- br/pkg/stream/stream_metas_test.go | 1 + br/pkg/stream/stream_mgr.go | 9 + br/pkg/stream/stream_status.go | 7 +- br/pkg/stream/table_history.go | 69 +++ br/pkg/stream/table_mapping.go | 305 ++++++++---- br/pkg/stream/util.go | 26 - br/pkg/stream/util_test.go | 48 -- br/pkg/task/backup.go | 4 +- br/pkg/task/common.go | 15 +- br/pkg/task/config_test.go | 2 +- br/pkg/task/restore.go | 312 +++++++++--- br/pkg/task/restore_raw.go | 2 +- br/pkg/task/restore_test.go | 2 + br/pkg/task/restore_txn.go | 2 +- br/pkg/task/stream.go | 362 ++++++++------ br/pkg/task/stream_test.go | 4 +- br/pkg/utils/BUILD.bazel | 6 +- br/pkg/utils/consts/BUILD.bazel | 8 + br/pkg/utils/consts/consts.go | 21 + br/pkg/utils/filter.go | 76 +++ br/pkg/utils/key.go | 27 ++ br/pkg/utils/key_test.go | 39 ++ br/pkg/utils/schema.go | 10 +- br/tests/br_pitr_table_filter/run.sh | 448 ++++++++++++++++++ br/tests/run_group_br_tests.sh | 2 +- br/tests/utils.go | 47 +- pkg/table/tables/bench_test.go | 2 +- tests/_utils/run_services | 2 +- 54 files changed, 2312 insertions(+), 1086 deletions(-) create mode 100644 br/pkg/restore/log_client/batch_meta_processor.go create mode 100644 br/pkg/stream/logging_helper.go create mode 100644 br/pkg/stream/table_history.go delete mode 100644 br/pkg/stream/util.go delete mode 100644 br/pkg/stream/util_test.go create mode 100644 br/pkg/utils/consts/BUILD.bazel create mode 100644 br/pkg/utils/consts/consts.go create mode 100644 br/pkg/utils/filter.go create mode 100755 br/tests/br_pitr_table_filter/run.sh diff --git a/br/pkg/backup/client.go b/br/pkg/backup/client.go index 6ad03dd45cebc..3ee5c395ede8e 100644 --- a/br/pkg/backup/client.go +++ b/br/pkg/backup/client.go @@ -827,7 +827,6 @@ func BuildBackupSchemas( if err != nil { return errors.Trace(err) } - for _, dbInfo := range dbs { // skip system databases if !tableFilter.MatchSchema(dbInfo.Name.O) || util.IsMemDB(dbInfo.Name.L) || utils.IsTemplateSysDB(dbInfo.Name) { diff --git a/br/pkg/checkpoint/checkpoint_test.go b/br/pkg/checkpoint/checkpoint_test.go index b70348aaa5fd9..3facea55229f3 100644 --- a/br/pkg/checkpoint/checkpoint_test.go +++ b/br/pkg/checkpoint/checkpoint_test.go @@ -105,12 +105,12 @@ func TestCheckpointMetaForRestore(t *testing.T) { exists := checkpoint.ExistsCheckpointProgress(ctx, dom) require.False(t, exists) err = checkpoint.SaveCheckpointProgress(ctx, se, &checkpoint.CheckpointProgress{ - Progress: checkpoint.InLogRestoreAndIdMapPersist, + Progress: checkpoint.InLogRestoreAndIdMapPersisted, }) require.NoError(t, err) progress, err := checkpoint.LoadCheckpointProgress(ctx, se.GetSessionCtx().GetRestrictedSQLExecutor()) require.NoError(t, err) - require.Equal(t, checkpoint.InLogRestoreAndIdMapPersist, progress.Progress) + require.Equal(t, checkpoint.InLogRestoreAndIdMapPersisted, progress.Progress) taskInfo, err := checkpoint.TryToGetCheckpointTaskInfo(ctx, s.Mock.Domain, se.GetSessionCtx().GetRestrictedSQLExecutor()) require.NoError(t, err) @@ -120,7 +120,7 @@ func TestCheckpointMetaForRestore(t *testing.T) { require.Equal(t, uint64(333), taskInfo.Metadata.RewriteTS) require.Equal(t, "1.0", taskInfo.Metadata.GcRatio) require.Equal(t, true, taskInfo.HasSnapshotMetadata) - require.Equal(t, checkpoint.InLogRestoreAndIdMapPersist, taskInfo.Progress) + require.Equal(t, checkpoint.InLogRestoreAndIdMapPersisted, taskInfo.Progress) exists = checkpoint.ExistsCheckpointIngestIndexRepairSQLs(ctx, dom) require.False(t, exists) diff --git a/br/pkg/checkpoint/log_restore.go b/br/pkg/checkpoint/log_restore.go index 0fd046b67ad7c..d4f4f702335e5 100644 --- a/br/pkg/checkpoint/log_restore.go +++ b/br/pkg/checkpoint/log_restore.go @@ -109,13 +109,16 @@ func StartCheckpointLogRestoreRunnerForTest( return runner, nil } -// Notice that the session is owned by the checkpoint runner, and it will be also closed by it. func StartCheckpointRunnerForLogRestore( ctx context.Context, - se glue.Session, + createSessionFn func() (glue.Session, error), ) (*CheckpointRunner[LogRestoreKeyType, LogRestoreValueType], error) { + session, err := createSessionFn() + if err != nil { + return nil, errors.Trace(err) + } runner := newCheckpointRunner[LogRestoreKeyType, LogRestoreValueType]( - newTableCheckpointStorage(se, LogRestoreCheckpointDatabaseName), + newTableCheckpointStorage(session, LogRestoreCheckpointDatabaseName), nil, valueMarshalerForLogRestore) // for restore, no need to set lock @@ -194,14 +197,14 @@ func ExistsLogRestoreCheckpointMetadata( TableExists(pmodel.NewCIStr(LogRestoreCheckpointDatabaseName), pmodel.NewCIStr(checkpointMetaTableName)) } -// A progress type for snapshot + log restore. +// RestoreProgress is a progress type for snapshot + log restore. // -// Before the id-maps is persist into external storage, the snapshot restore and -// id-maps constructure can be repeated. So if the progress is in `InSnapshotRestore`, +// Before the id-maps is persisted into external storage, the snapshot restore and +// id-maps building can be retried. So if the progress is in `InSnapshotRestore`, // it can retry from snapshot restore. // -// After the id-maps is persist into external storage, there are some meta-kvs has -// been restored into the cluster, such as `rename ddl`. Where would be a situation: +// After the id-maps is persisted into external storage, there are some meta-kvs has +// been restored into the cluster, such as `rename ddl`. A situation could be: // // the first execution: // @@ -209,7 +212,7 @@ func ExistsLogRestoreCheckpointMetadata( // table A (id 80) --------------> table B (id 80) // ( snapshot restore ) ( log restore ) // -// the second execution if don't skip snasphot restore: +// the second execution if don't skip snapshot restore: // // table A is created again in snapshot restore, because there is no table named A // table A (id 81) --------------> [not in id-maps, so ignored] @@ -221,8 +224,8 @@ type RestoreProgress int const ( InSnapshotRestore RestoreProgress = iota - // Only when the id-maps is persist, status turns into it. - InLogRestoreAndIdMapPersist + // Only when the id-maps is persisted, status turns into it. + InLogRestoreAndIdMapPersisted ) type CheckpointProgress struct { @@ -254,8 +257,8 @@ func ExistsCheckpointProgress( TableExists(pmodel.NewCIStr(LogRestoreCheckpointDatabaseName), pmodel.NewCIStr(checkpointProgressTableName)) } -// CheckpointTaskInfo is unique information within the same cluster id. It represents the last -// restore task executed for this cluster. +// CheckpointTaskInfoForLogRestore is tied to a specific cluster. +// It represents the last restore task executed in this cluster. type CheckpointTaskInfoForLogRestore struct { Metadata *CheckpointMetadataForLogRestore HasSnapshotMetadata bool diff --git a/br/pkg/checkpoint/restore.go b/br/pkg/checkpoint/restore.go index 2e55cc3eb81c2..6af63f834f228 100644 --- a/br/pkg/checkpoint/restore.go +++ b/br/pkg/checkpoint/restore.go @@ -77,14 +77,17 @@ func StartCheckpointRestoreRunnerForTest( return runner, nil } -// Notice that the session is owned by the checkpoint runner, and it will be also closed by it. func StartCheckpointRunnerForRestore( ctx context.Context, - se glue.Session, + createSessionFn func() (glue.Session, error), dbName string, ) (*CheckpointRunner[RestoreKeyType, RestoreValueType], error) { + session, err := createSessionFn() + if err != nil { + return nil, errors.Trace(err) + } runner := newCheckpointRunner[RestoreKeyType, RestoreValueType]( - newTableCheckpointStorage(se, dbName), + newTableCheckpointStorage(session, dbName), nil, valueMarshalerForRestore) // for restore, no need to set lock diff --git a/br/pkg/errors/errors.go b/br/pkg/errors/errors.go index 6a9449eff95d1..cb9cafdbee294 100644 --- a/br/pkg/errors/errors.go +++ b/br/pkg/errors/errors.go @@ -33,6 +33,7 @@ func IsContextCanceled(err error) bool { var ( ErrUnknown = errors.Normalize("internal error", errors.RFCCodeText("BR:Common:ErrUnknown")) ErrInvalidArgument = errors.Normalize("invalid argument", errors.RFCCodeText("BR:Common:ErrInvalidArgument")) + ErrInvalidState = errors.Normalize("invalid state", errors.RFCCodeText("BR:Common:ErrInvalidState")) ErrUndefinedRestoreDbOrTable = errors.Normalize("undefined restore databases or tables", errors.RFCCodeText("BR:Common:ErrUndefinedDbOrTable")) ErrVersionMismatch = errors.Normalize("version mismatch", errors.RFCCodeText("BR:Common:ErrVersionMismatch")) ErrFailedToConnect = errors.Normalize("failed to make gRPC channels", errors.RFCCodeText("BR:Common:ErrFailedToConnect")) diff --git a/br/pkg/restore/import_mode_switcher.go b/br/pkg/restore/import_mode_switcher.go index 0bec6a4d1e384..5f796292c8879 100644 --- a/br/pkg/restore/import_mode_switcher.go +++ b/br/pkg/restore/import_mode_switcher.go @@ -176,7 +176,7 @@ func (switcher *ImportModeSwitcher) GoSwitchToImportMode( return nil } -// RestorePreWork executes some prepare work before restore. +// RestorePreWork switches to import mode and removes pd schedulers if needed // TODO make this function returns a restore post work. func RestorePreWork( ctx context.Context, diff --git a/br/pkg/restore/log_client/BUILD.bazel b/br/pkg/restore/log_client/BUILD.bazel index 7fb781e7ad0ef..bb9da80979226 100644 --- a/br/pkg/restore/log_client/BUILD.bazel +++ b/br/pkg/restore/log_client/BUILD.bazel @@ -3,6 +3,7 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") go_library( name = "log_client", srcs = [ + "batch_file_processor.go", "client.go", "compacted_file_strategy.go", "import.go", @@ -36,6 +37,7 @@ go_library( "//br/pkg/stream", "//br/pkg/summary", "//br/pkg/utils", + "//br/pkg/utils/consts", "//br/pkg/utils/iter", "//br/pkg/version", "//pkg/ddl/util", @@ -103,6 +105,7 @@ go_test( "//br/pkg/storage", "//br/pkg/stream", "//br/pkg/utils", + "//br/pkg/utils/consts", "//br/pkg/utils/iter", "//br/pkg/utiltest", "//pkg/domain", @@ -117,7 +120,6 @@ go_test( "//pkg/util/chunk", "//pkg/util/codec", "//pkg/util/sqlexec", - "//pkg/util/table-filter", "@com_github_docker_go_units//:go-units", "@com_github_pingcap_errors//:errors", "@com_github_pingcap_failpoint//:failpoint", diff --git a/br/pkg/restore/log_client/batch_meta_processor.go b/br/pkg/restore/log_client/batch_meta_processor.go new file mode 100644 index 0000000000000..7253679a605dd --- /dev/null +++ b/br/pkg/restore/log_client/batch_meta_processor.go @@ -0,0 +1,226 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package logclient + +import ( + "context" + "encoding/json" + + "github.com/pingcap/errors" + backuppb "github.com/pingcap/kvproto/pkg/brpb" + "github.com/pingcap/log" + "github.com/pingcap/tidb/br/pkg/stream" + "github.com/pingcap/tidb/br/pkg/utils" + "github.com/pingcap/tidb/br/pkg/utils/consts" + "github.com/pingcap/tidb/pkg/meta" + "github.com/pingcap/tidb/pkg/meta/model" + "go.uber.org/zap" +) + +// BatchMetaKVProcessor defines how to process a batch of files +type BatchMetaKVProcessor interface { + // ProcessBatch processes a batch of files and with a filterTS and return what's not processed for next iteration + ProcessBatch( + ctx context.Context, + files []*backuppb.DataFileInfo, + entries []*KvEntryWithTS, + filterTS uint64, + cf string, + ) ([]*KvEntryWithTS, error) +} + +// RestoreMetaKVProcessor implements BatchMetaKVProcessor for restoring files in batches +type RestoreMetaKVProcessor struct { + client *LogClient + schemasReplace *stream.SchemasReplace + updateStats func(kvCount uint64, size uint64) + progressInc func() +} + +func NewRestoreMetaKVProcessor(client *LogClient, schemasReplace *stream.SchemasReplace, + updateStats func(kvCount uint64, size uint64), + progressInc func()) *RestoreMetaKVProcessor { + return &RestoreMetaKVProcessor{ + client: client, + schemasReplace: schemasReplace, + updateStats: updateStats, + progressInc: progressInc, + } +} + +// RestoreAndRewriteMetaKVFiles tries to restore files about meta kv-event from stream-backup. +func (rp *RestoreMetaKVProcessor) RestoreAndRewriteMetaKVFiles( + ctx context.Context, + files []*backuppb.DataFileInfo, +) error { + // starts gc row collector + rp.client.RunGCRowsLoader(ctx) + + // separate the files by CF and sort each group by TS + filesInDefaultCF, filesInWriteCF := SeparateAndSortFilesByCF(files) + + log.Info("start to restore meta files", + zap.Int("total files", len(files)), + zap.Int("default files", len(filesInDefaultCF)), + zap.Int("write files", len(filesInWriteCF))) + + if err := LoadAndProcessMetaKVFilesInBatch( + ctx, + filesInDefaultCF, + filesInWriteCF, + rp, + ); err != nil { + return errors.Trace(err) + } + + // UpdateTable global schema version to trigger a full reload so every TiDB node in the cluster will get synced with + // the latest schema update. + if err := rp.client.UpdateSchemaVersionFullReload(ctx); err != nil { + return errors.Trace(err) + } + return nil +} + +func (rp *RestoreMetaKVProcessor) ProcessBatch( + ctx context.Context, + files []*backuppb.DataFileInfo, + entries []*KvEntryWithTS, + filterTS uint64, + cf string, +) ([]*KvEntryWithTS, error) { + return rp.client.RestoreBatchMetaKVFiles( + ctx, files, rp.schemasReplace, entries, + filterTS, rp.updateStats, rp.progressInc, cf, + ) +} + +// MetaKVInfoProcessor implements BatchMetaKVProcessor to iterate meta kv and collect information. +// +// 1. It collects table renaming information. The table rename operation will not change the table id, and the process +// will drop the original table and create a new one with the same table id, so in DDL history there will be two events +// that corresponds to the same table id. +// +// 2. It builds the id mapping from upstream to downstream. This logic was nested into table rewrite previously and now +// separated out to its own component. +type MetaKVInfoProcessor struct { + client *LogClient + tableHistoryManager *stream.LogBackupTableHistoryManager + tableMappingManager *stream.TableMappingManager +} + +func NewMetaKVInfoProcessor(client *LogClient) *MetaKVInfoProcessor { + return &MetaKVInfoProcessor{ + client: client, + tableHistoryManager: stream.NewTableHistoryManager(), + tableMappingManager: stream.NewTableMappingManager(), + } +} + +func (mp *MetaKVInfoProcessor) ReadMetaKVFilesAndBuildInfo( + ctx context.Context, + files []*backuppb.DataFileInfo, +) error { + // separate the files by CF and sort each group by TS + filesInDefaultCF, filesInWriteCF := SeparateAndSortFilesByCF(files) + + if err := LoadAndProcessMetaKVFilesInBatch( + ctx, + filesInDefaultCF, + filesInWriteCF, + mp, + ); err != nil { + return errors.Trace(err) + } + return nil +} + +func (mp *MetaKVInfoProcessor) ProcessBatch( + ctx context.Context, + files []*backuppb.DataFileInfo, + entries []*KvEntryWithTS, + filterTS uint64, + cf string, +) ([]*KvEntryWithTS, error) { + // doesn't need to parse writeCF as it contains value like "p\XXXX\XXX" which is meaningless. + // DefaultCF value should contain everything we want for DDL operation + if cf == consts.WriteCF { + return nil, nil + } + + curSortedEntries, filteredEntries, err := mp.client.filterAndSortKvEntriesFromFiles(ctx, files, entries, filterTS) + if err != nil { + return nil, errors.Trace(err) + } + + // process entries to collect table IDs + for _, entry := range curSortedEntries { + value := entry.E.Value + + if utils.IsMetaDBKey(entry.E.Key) { + rawKey, err := stream.ParseTxnMetaKeyFrom(entry.E.Key) + if err != nil { + return nil, errors.Trace(err) + } + + if meta.IsDBkey(rawKey.Field) { + var dbInfo model.DBInfo + if err := json.Unmarshal(value, &dbInfo); err != nil { + return nil, errors.Trace(err) + } + + // collect db id -> name mapping during log backup, it will contain information about newly created db + mp.tableHistoryManager.RecordDBIdToName(dbInfo.ID, dbInfo.Name.O) + + // update the id map + if err = mp.tableMappingManager.ProcessDBEntryAndUpdateIdMapping(dbInfo); err != nil { + return nil, errors.Trace(err) + } + } else if !meta.IsDBkey(rawKey.Key) { + // also see RewriteMetaKvEntry + continue + } + + // collect table history indexed by table id, same id may have different table names in history + if meta.IsTableKey(rawKey.Field) { + var tableInfo model.TableInfo + if err := json.Unmarshal(value, &tableInfo); err != nil { + return nil, errors.Trace(err) + } + // cannot use dbib in the parsed table info cuz it might not set so default to 0 + dbID, err := meta.ParseDBKey(rawKey.Key) + if err != nil { + return nil, errors.Trace(err) + } + + // add to table rename history + mp.tableHistoryManager.AddTableHistory(tableInfo.ID, tableInfo.Name.String(), dbID) + + // update the id map + if err = mp.tableMappingManager.ProcessTableEntryAndUpdateIdMapping(dbID, tableInfo); err != nil { + return nil, errors.Trace(err) + } + } + } + } + return filteredEntries, nil +} + +func (mp *MetaKVInfoProcessor) GetTableMappingManager() *stream.TableMappingManager { + return mp.tableMappingManager +} + +func (mp *MetaKVInfoProcessor) GetTableHistoryManager() *stream.LogBackupTableHistoryManager { + return mp.tableHistoryManager +} diff --git a/br/pkg/restore/log_client/client.go b/br/pkg/restore/log_client/client.go index 474578a05e8dc..30c1a735fa6d4 100644 --- a/br/pkg/restore/log_client/client.go +++ b/br/pkg/restore/log_client/client.go @@ -55,6 +55,7 @@ import ( "github.com/pingcap/tidb/br/pkg/stream" "github.com/pingcap/tidb/br/pkg/summary" "github.com/pingcap/tidb/br/pkg/utils" + "github.com/pingcap/tidb/br/pkg/utils/consts" "github.com/pingcap/tidb/br/pkg/utils/iter" "github.com/pingcap/tidb/br/pkg/version" ddlutil "github.com/pingcap/tidb/pkg/ddl/util" @@ -70,7 +71,6 @@ import ( pd "github.com/tikv/pd/client" pdhttp "github.com/tikv/pd/client/http" "go.uber.org/zap" - "go.uber.org/zap/zapcore" "golang.org/x/sync/errgroup" "google.golang.org/grpc/keepalive" ) @@ -211,8 +211,8 @@ type LogClient struct { useCheckpoint bool } -// NewRestoreClient returns a new RestoreClient. -func NewRestoreClient( +// NewLogClient returns a new RestoreClient. +func NewLogClient( pdClient pd.Client, pdHTTPCli pdhttp.Client, tlsConf *tls.Config, @@ -230,24 +230,27 @@ func NewRestoreClient( // Close a client. func (rc *LogClient) Close(ctx context.Context) { - defer func() { - if rc.logRestoreManager != nil { - rc.logRestoreManager.Close(ctx) - } - if rc.sstRestoreManager != nil { - rc.sstRestoreManager.Close(ctx) - } - }() - - // close the connection, and it must be succeed when in SQL mode. + // close the connection, and it must be succeeded when in SQL mode. if rc.unsafeSession != nil { rc.unsafeSession.Close() } + if rc.LogFileManager != nil { + rc.LogFileManager.Close() + } + if rc.rawKVClient != nil { rc.rawKVClient.Close() } - log.Info("Restore client closed") + + if rc.logRestoreManager != nil { + rc.logRestoreManager.Close(ctx) + } + + if rc.sstRestoreManager != nil { + rc.sstRestoreManager.Close(ctx) + } + log.Info("Log client closed") } func (rc *LogClient) RestoreCompactedSstFiles( @@ -473,7 +476,7 @@ func (rc *LogClient) InitCheckpointMetadataForCompactedSstRestore( return sstCheckpointSets, nil } -func (rc *LogClient) InitCheckpointMetadataForLogRestore( +func (rc *LogClient) LoadOrCreateCheckpointMetadataForLogRestore( ctx context.Context, startTS, restoredTS uint64, gcRatio string, @@ -490,7 +493,8 @@ func (rc *LogClient) InitCheckpointMetadataForLogRestore( return "", errors.Trace(err) } - log.Info("reuse gc ratio from checkpoint metadata", zap.String("gc-ratio", gcRatio)) + log.Info("reuse gc ratio from checkpoint metadata", zap.String("old-gc-ratio", gcRatio), + zap.String("checkpoint-gc-ratio", meta.GcRatio)) return meta.GcRatio, nil } @@ -606,7 +610,7 @@ func ApplyKVFilesWithBatchMethod( } fs.deleteFiles = append(fs.deleteFiles, f) } else { - if f.GetCf() == stream.DefaultCF { + if f.GetCf() == consts.DefaultCF { if fs.defaultFiles == nil { fs.defaultFiles = make([]*LogDataFileInfo, 0, batchCount) } @@ -739,6 +743,17 @@ func (rc *LogClient) RestoreKVFiles( var applyWg sync.WaitGroup eg, ectx := errgroup.WithContext(ctx) + log.Info("################ rewrite rules", zap.Any("rules", rules)) + for _, schema := range rc.dom.InfoSchema().AllSchemas() { + log.Info("############### schema", zap.Any("schema", schema)) + info, _ := rc.dom.InfoSchema().SchemaTableInfos(ctx, schema.Name) + log.Info("################ schema table size", zap.Any("size", len(info))) + if len(info) < 100 { + for _, i := range info { + log.Info("################ tables", zap.Any("tables", i.Name.O), zap.Any("id", i.ID)) + } + } + } applyFunc := func(files []*LogDataFileInfo, kvCount int64, size uint64) { if len(files) == 0 { return @@ -809,7 +824,7 @@ func (rc *LogClient) RestoreKVFiles( return errors.Trace(err) } -func (rc *LogClient) initSchemasMap( +func (rc *LogClient) loadSchemasMap( ctx context.Context, restoreTS uint64, ) ([]*backuppb.PitrDBMap, error) { @@ -849,10 +864,11 @@ func (rc *LogClient) initSchemasMap( return backupMeta.GetDbMaps(), nil } -func initFullBackupTables( +func readFilteredFullBackupTables( ctx context.Context, s storage.ExternalStorage, tableFilter filter.Filter, + piTRTableFilter *utils.PiTRTableFilter, cipherInfo *backuppb.CipherInfo, ) (map[int64]*metautil.Table, error) { metaData, err := s.ReadFile(ctx, metautil.MetaFile) @@ -881,24 +897,35 @@ func initFullBackupTables( tables := make(map[int64]*metautil.Table) for _, db := range databases { dbName := db.Info.Name.O - if name, ok := utils.GetSysDBName(db.Info.Name); utils.IsSysDB(name) && ok { + if name, ok := utils.StripTempTableNamePrefixIfNeeded(db.Info.Name.O); utils.IsSysDB(name) && ok { dbName = name } - - if !tableFilter.MatchSchema(dbName) { + if !tableFilter.MatchSchema(dbName) && !(piTRTableFilter != nil && piTRTableFilter.ContainsDB(db.Info.ID)) { continue } + tableAdded := false for _, table := range db.Tables { // check this db is empty. if table.Info == nil { tables[db.Info.ID] = table + tableAdded = true continue } - if !tableFilter.MatchTable(dbName, table.Info.Name.O) { + if !tableFilter.MatchTable(dbName, table.Info.Name.O) && + !(piTRTableFilter != nil && piTRTableFilter.ContainsTable(db.Info.ID, table.Info.ID)) { continue } tables[table.Info.ID] = table + tableAdded = true + } + // all tables in this db are filtered out, but we still need to keep this db since it passed the filter check + // and tables might get created later during log backup, if not keeping this db, those tables will be mapped to + // a new db id and thus will become data corruption. + if !tableAdded { + tables[db.Info.ID] = &metautil.Table{ + DB: db.Info, + } } } @@ -910,22 +937,25 @@ type FullBackupStorageConfig struct { Opts *storage.ExternalStorageOptions } -type BuildTableMappingManagerConfig struct { +type GetIDMapConfig struct { // required - CurrentIdMapSaved bool - TableFilter filter.Filter + LoadSavedIDMap bool + TableFilter filter.Filter // original table filter from user // optional FullBackupStorage *FullBackupStorageConfig CipherInfo *backuppb.CipherInfo - Files []*backuppb.DataFileInfo + PiTRTableFilter *utils.PiTRTableFilter // generated table filter that contain all the table id that needs to restore } const UnsafePITRLogRestoreStartBeforeAnyUpstreamUserDDL = "UNSAFE_PITR_LOG_RESTORE_START_BEFORE_ANY_UPSTREAM_USER_DDL" +// generateDBReplacesFromFullBackupStorage reads the full backup schema and creates the mapping from upstream table id +// to downstream table id. The downstream tables have been created in the previous snapshot restore step, so we +// can build the mapping by looking at the table names. The current table information is in domain.InfoSchema. func (rc *LogClient) generateDBReplacesFromFullBackupStorage( ctx context.Context, - cfg *BuildTableMappingManagerConfig, + cfg *GetIDMapConfig, ) (map[stream.UpstreamID]*stream.DBReplace, error) { dbReplaces := make(map[stream.UpstreamID]*stream.DBReplace) if cfg.FullBackupStorage == nil { @@ -940,15 +970,15 @@ func (rc *LogClient) generateDBReplacesFromFullBackupStorage( if err != nil { return nil, errors.Trace(err) } - fullBackupTables, err := initFullBackupTables(ctx, s, cfg.TableFilter, cfg.CipherInfo) + filteredFullBackupTables, err := readFilteredFullBackupTables(ctx, s, cfg.TableFilter, cfg.PiTRTableFilter, cfg.CipherInfo) if err != nil { return nil, errors.Trace(err) } - for _, t := range fullBackupTables { + for _, t := range filteredFullBackupTables { dbName, _ := utils.GetSysDBCIStrName(t.DB.Name) newDBInfo, exist := rc.dom.InfoSchema().SchemaByName(dbName) if !exist { - log.Info("db not existed", zap.String("dbname", dbName.String())) + log.Info("db does not exist", zap.String("dbName", dbName.String())) continue } @@ -964,7 +994,7 @@ func (rc *LogClient) generateDBReplacesFromFullBackupStorage( } newTableInfo, err := restore.GetTableSchema(rc.GetDomain(), dbName, t.Info.Name) if err != nil { - log.Info("table not existed", zap.String("tablename", dbName.String()+"."+t.Info.Name.String())) + log.Info("table doesn't exist", zap.String("tableName", dbName.String()+"."+t.Info.Name.String())) continue } @@ -978,26 +1008,26 @@ func (rc *LogClient) generateDBReplacesFromFullBackupStorage( return dbReplaces, nil } -// BuildTableMappingManager builds the table mapping manager. It reads the full backup storage to get the full backup -// table info to initialize the manager, or it reads the id map from previous task, -// or it loads the saved mapping from last time of run of the same task. -func (rc *LogClient) BuildTableMappingManager( +// GetIDMapFromCheckpointOrFullBackup updates the table mapping manager. It reads the full backup storage to get the full backup +// table info to initialize the manager, or it loads the saved mapping from last time of run. +func (rc *LogClient) GetIDMapFromCheckpointOrFullBackup( ctx context.Context, - cfg *BuildTableMappingManagerConfig, -) (*stream.TableMappingManager, error) { + cfg *GetIDMapConfig, +) (map[stream.UpstreamID]*stream.DBReplace, error) { var ( err error dbMaps []*backuppb.PitrDBMap // the id map doesn't need to construct only when it is not the first execution needConstructIdMap bool - dbReplaces map[stream.UpstreamID]*stream.DBReplace + + dbReplaces map[stream.UpstreamID]*stream.DBReplace ) // this is a retry, id map saved last time, load it from external storage if cfg.CurrentIdMapSaved { log.Info("try to load previously saved pitr id maps") needConstructIdMap = false - dbMaps, err = rc.initSchemasMap(ctx, rc.restoreTS) + dbMaps, err = rc.loadSchemasMap(ctx, rc.restoreTS) if err != nil { return nil, errors.Trace(err) } @@ -1008,7 +1038,7 @@ func (rc *LogClient) BuildTableMappingManager( if len(dbMaps) <= 0 && cfg.FullBackupStorage == nil { log.Info("try to load pitr id maps of the previous task", zap.Uint64("start-ts", rc.startTS)) needConstructIdMap = true - dbMaps, err = rc.initSchemasMap(ctx, rc.startTS) + dbMaps, err = rc.loadSchemasMap(ctx, rc.startTS) if err != nil { return nil, errors.Trace(err) } @@ -1027,7 +1057,7 @@ func (rc *LogClient) BuildTableMappingManager( if len(dbMaps) <= 0 { log.Info("no id maps, build the table replaces from cluster and full backup schemas") needConstructIdMap = true - dbReplaces, err = rc.generateDBReplacesFromFullBackupStorage(ctx, cfg) + dbReplaces, err = rc.generateDBReplacesFromFullBackupStorage(ctx, cfg, cipherInfo) if err != nil { return nil, errors.Trace(err) } @@ -1035,33 +1065,9 @@ func (rc *LogClient) BuildTableMappingManager( dbReplaces = stream.FromDBMapProto(dbMaps) } - for oldDBID, dbReplace := range dbReplaces { - log.Info("base replace info", func() []zapcore.Field { - fields := make([]zapcore.Field, 0, (len(dbReplace.TableMap)+1)*3) - fields = append(fields, - zap.String("dbName", dbReplace.Name), - zap.Int64("oldID", oldDBID), - zap.Int64("newID", dbReplace.DbID)) - for oldTableID, tableReplace := range dbReplace.TableMap { - fields = append(fields, - zap.String("table", tableReplace.Name), - zap.Int64("oldID", oldTableID), - zap.Int64("newID", tableReplace.TableID)) - } - return fields - }()...) - } - - tableMappingManager := stream.NewTableMappingManager(dbReplaces, rc.GenGlobalID) - - // not loaded from previously saved, need to iter meta kv and build and save the map - if needConstructIdMap { - if err = rc.IterMetaKVToBuildAndSaveIdMap(ctx, tableMappingManager, cfg.Files); err != nil { - return nil, errors.Trace(err) - } - } + stream.LogDBReplaceMap("base db replace info", dbReplaces) - return tableMappingManager, nil + return dbReplaces, nil } func SortMetaKVFiles(files []*backuppb.DataFileInfo) []*backuppb.DataFileInfo { @@ -1077,14 +1083,24 @@ func SortMetaKVFiles(files []*backuppb.DataFileInfo) []*backuppb.DataFileInfo { return files } -// RestoreAndRewriteMetaKVFiles tries to restore files about meta kv-event from stream-backup. -func (rc *LogClient) RestoreAndRewriteMetaKVFiles( - ctx context.Context, - files []*backuppb.DataFileInfo, - schemasReplace *stream.SchemasReplace, - updateStats func(kvCount uint64, size uint64), - progressInc func(), -) error { +// validateNoTiFlashReplica makes sure no table contains TiFlash replica +func (rc *LogClient) validateNoTiFlashReplica() error { + existTiFlashTable := false + rc.dom.InfoSchema().ListTablesWithSpecialAttribute(func(tableInfo *model.TableInfo) bool { + if tableInfo.TiFlashReplica != nil && tableInfo.TiFlashReplica.Count > 0 { + existTiFlashTable = true + } + return false + }) + if existTiFlashTable { + return errors.Errorf("exist table(s) have tiflash replica, please remove it before restore") + } + return nil +} + +// SeparateAndSortFilesByCF filters and sorts files by column family. +// It separates files into write CF and default CF groups and then sorts them within each CF group. +func SeparateAndSortFilesByCF(files []*backuppb.DataFileInfo) ([]*backuppb.DataFileInfo, []*backuppb.DataFileInfo) { filesInWriteCF := make([]*backuppb.DataFileInfo, 0, len(files)) filesInDefaultCF := make([]*backuppb.DataFileInfo, 0, len(files)) @@ -1092,70 +1108,15 @@ func (rc *LogClient) RestoreAndRewriteMetaKVFiles( // The error of transactions of meta could happen if restore write CF events successfully, // but failed to restore default CF events. for _, f := range files { - if f.Cf == stream.WriteCF { + if f.Cf == consts.WriteCF { filesInWriteCF = append(filesInWriteCF, f) continue } if f.Type == backuppb.FileType_Delete { - // this should happen abnormally. - // only do some preventive checks here. - log.Warn("detected delete file of meta key, skip it", zap.Any("file", f)) - continue - } - if f.Cf == stream.DefaultCF { - filesInDefaultCF = append(filesInDefaultCF, f) - } - } - filesInDefaultCF = SortMetaKVFiles(filesInDefaultCF) - filesInWriteCF = SortMetaKVFiles(filesInWriteCF) - - log.Info("start to restore meta files", - zap.Int("total files", len(files)), - zap.Int("default files", len(filesInDefaultCF)), - zap.Int("write files", len(filesInWriteCF))) - - // run the rewrite and restore meta-kv into TiKV cluster. - if err := RestoreMetaKVFilesWithBatchMethod( - ctx, - filesInDefaultCF, - filesInWriteCF, - schemasReplace, - updateStats, - progressInc, - rc.RestoreBatchMetaKVFiles, - ); err != nil { - return errors.Trace(err) - } - - // Update global schema version and report all of TiDBs. - if err := rc.UpdateSchemaVersion(ctx); err != nil { - return errors.Trace(err) - } - return nil -} - -// IterMetaKVToBuildAndSaveIdMap iterates meta kv and builds id mapping and saves it to storage. -func (rc *LogClient) IterMetaKVToBuildAndSaveIdMap( - ctx context.Context, - tableMappingManager *stream.TableMappingManager, - files []*backuppb.DataFileInfo, -) error { - filesInDefaultCF := make([]*backuppb.DataFileInfo, 0, len(files)) - // need to look at write cf for "short value", which inlines the actual values without redirecting to default cf - filesInWriteCF := make([]*backuppb.DataFileInfo, 0, len(files)) - - for _, f := range files { - if f.Type == backuppb.FileType_Delete { - // it should not happen - // only do some preventive checks here. log.Warn("internal error: detected delete file of meta key, skip it", zap.Any("file", f)) continue } - if f.Cf == stream.WriteCF { - filesInWriteCF = append(filesInWriteCF, f) - continue - } - if f.Cf == stream.DefaultCF { + if f.Cf == consts.DefaultCF { filesInDefaultCF = append(filesInDefaultCF, f) } } @@ -1163,88 +1124,16 @@ func (rc *LogClient) IterMetaKVToBuildAndSaveIdMap( filesInDefaultCF = SortMetaKVFiles(filesInDefaultCF) filesInWriteCF = SortMetaKVFiles(filesInWriteCF) - failpoint.Inject("failed-before-id-maps-saved", func(_ failpoint.Value) { - failpoint.Return(errors.New("failpoint: failed before id maps saved")) - }) - - log.Info("start to iterate meta kv and build id map", - zap.Int("total files", len(files)), - zap.Int("default files", len(filesInDefaultCF)), - zap.Int("write files", len(filesInWriteCF))) - - // build the map and save it into external storage. - if err := rc.buildAndSaveIDMap( - ctx, - filesInDefaultCF, - filesInWriteCF, - tableMappingManager, - ); err != nil { - return errors.Trace(err) - } - failpoint.Inject("failed-after-id-maps-saved", func(_ failpoint.Value) { - failpoint.Return(errors.New("failpoint: failed after id maps saved")) - }) - return nil -} - -// buildAndSaveIDMap build id mapping and save it. -func (rc *LogClient) buildAndSaveIDMap( - ctx context.Context, - fsInDefaultCF []*backuppb.DataFileInfo, - fsInWriteCF []*backuppb.DataFileInfo, - tableMappingManager *stream.TableMappingManager, -) error { - if err := rc.iterAndBuildIDMap(ctx, fsInWriteCF, tableMappingManager); err != nil { - return errors.Trace(err) - } - - if err := rc.iterAndBuildIDMap(ctx, fsInDefaultCF, tableMappingManager); err != nil { - return errors.Trace(err) - } - - if err := rc.saveIDMap(ctx, tableMappingManager); err != nil { - return errors.Trace(err) - } - return nil -} - -func (rc *LogClient) iterAndBuildIDMap( - ctx context.Context, - fs []*backuppb.DataFileInfo, - tableMappingManager *stream.TableMappingManager, -) error { - for _, f := range fs { - entries, _, err := rc.ReadAllEntries(ctx, f, math.MaxUint64) - if err != nil { - return errors.Trace(err) - } - - for _, entry := range entries { - if err := tableMappingManager.ParseMetaKvAndUpdateIdMapping(&entry.E, f.GetCf()); err != nil { - return errors.Trace(err) - } - } - } - return nil + return filesInDefaultCF, filesInWriteCF } -func RestoreMetaKVFilesWithBatchMethod( +// LoadAndProcessMetaKVFilesInBatch restores meta kv files to TiKV in strict TS order. It does so in batch and after +// success it triggers an update so every TiDB node can pick up the restored content. +func LoadAndProcessMetaKVFilesInBatch( ctx context.Context, defaultFiles []*backuppb.DataFileInfo, writeFiles []*backuppb.DataFileInfo, - schemasReplace *stream.SchemasReplace, - updateStats func(kvCount uint64, size uint64), - progressInc func(), - restoreBatch func( - ctx context.Context, - files []*backuppb.DataFileInfo, - schemasReplace *stream.SchemasReplace, - kvEntries []*KvEntryWithTS, - filterTS uint64, - updateStats func(kvCount uint64, size uint64), - progressInc func(), - cf string, - ) ([]*KvEntryWithTS, error), + processor BatchMetaKVProcessor, ) error { // the average size of each KV is 2560 Bytes // kvEntries is kvs left by the previous batch @@ -1275,7 +1164,7 @@ func RestoreMetaKVFilesWithBatchMethod( } else { // Either f.MinTS > rangeMax or f.MinTs is the filterTs we need. // So it is ok to pass f.MinTs as filterTs. - defaultKvEntries, err = restoreBatch(ctx, defaultFiles[defaultIdx:i], schemasReplace, defaultKvEntries, f.MinTs, updateStats, progressInc, stream.DefaultCF) + defaultKvEntries, err = processor.ProcessBatch(ctx, defaultFiles[defaultIdx:i], defaultKvEntries, f.MinTs, consts.DefaultCF) if err != nil { return errors.Trace(err) } @@ -1292,7 +1181,7 @@ func RestoreMetaKVFilesWithBatchMethod( break } } - writeKvEntries, err = restoreBatch(ctx, writeFiles[writeIdx:toWriteIdx], schemasReplace, writeKvEntries, f.MinTs, updateStats, progressInc, stream.WriteCF) + writeKvEntries, err = processor.ProcessBatch(ctx, writeFiles[writeIdx:toWriteIdx], writeKvEntries, f.MinTs, consts.WriteCF) if err != nil { return errors.Trace(err) } @@ -1304,11 +1193,11 @@ func RestoreMetaKVFilesWithBatchMethod( // restore the left meta kv files and entries // Notice: restoreBatch needs to realize the parameter `files` and `kvEntries` might be empty // Assert: defaultIdx <= len(defaultFiles) && writeIdx <= len(writeFiles) - _, err = restoreBatch(ctx, defaultFiles[defaultIdx:], schemasReplace, defaultKvEntries, math.MaxUint64, updateStats, progressInc, stream.DefaultCF) + _, err = processor.ProcessBatch(ctx, defaultFiles[defaultIdx:], defaultKvEntries, math.MaxUint64, consts.DefaultCF) if err != nil { return errors.Trace(err) } - _, err = restoreBatch(ctx, writeFiles[writeIdx:], schemasReplace, writeKvEntries, math.MaxUint64, updateStats, progressInc, stream.WriteCF) + _, err = processor.ProcessBatch(ctx, writeFiles[writeIdx:], writeKvEntries, math.MaxUint64, consts.WriteCF) if err != nil { return errors.Trace(err) } @@ -1316,6 +1205,9 @@ func RestoreMetaKVFilesWithBatchMethod( return nil } +// RestoreBatchMetaKVFiles tries to restore and rewrite meta kv to TiKV from external storage. It reads out entries +// from the given files and only restores ones that's in filter range, then it returns those entries out of the filter +// range back to caller for next iteration of restore. func (rc *LogClient) RestoreBatchMetaKVFiles( ctx context.Context, files []*backuppb.DataFileInfo, @@ -1326,10 +1218,34 @@ func (rc *LogClient) RestoreBatchMetaKVFiles( progressInc func(), cf string, ) ([]*KvEntryWithTS, error) { - nextKvEntries := make([]*KvEntryWithTS, 0) + curSortedKvEntries, filteredOutKvEntries, err := rc.filterAndSortKvEntriesFromFiles(ctx, files, kvEntries, filterTS) + if err != nil { + return nil, errors.Trace(err) + } + + // restore and rewrite these entries to TiKV with rawPut() method. + kvCount, size, err := rc.restoreAndRewriteMetaKvEntries(ctx, schemasReplace, curSortedKvEntries, cf) + if err != nil { + return nil, errors.Trace(err) + } + + updateStats(kvCount, size) + for i := 0; i < len(files); i++ { + progressInc() + } + return filteredOutKvEntries, nil +} + +func (rc *LogClient) filterAndSortKvEntriesFromFiles( + ctx context.Context, + files []*backuppb.DataFileInfo, + kvEntries []*KvEntryWithTS, + filterTS uint64, +) ([]*KvEntryWithTS, []*KvEntryWithTS, error) { + filteredOutKvEntries := make([]*KvEntryWithTS, 0) curKvEntries := make([]*KvEntryWithTS, 0) if len(files) == 0 && len(kvEntries) == 0 { - return nextKvEntries, nil + return curKvEntries, filteredOutKvEntries, nil } // filter the kv from kvEntries again. @@ -1337,40 +1253,29 @@ func (rc *LogClient) RestoreBatchMetaKVFiles( if kv.Ts < filterTS { curKvEntries = append(curKvEntries, kv) } else { - nextKvEntries = append(nextKvEntries, kv) + filteredOutKvEntries = append(filteredOutKvEntries, kv) } } - // read all of entries from files. + // read all entries from files. for _, f := range files { - es, nextEs, err := rc.ReadAllEntries(ctx, f, filterTS) + es, filteredOutEs, err := rc.ReadFilteredEntriesFromFiles(ctx, f, filterTS) if err != nil { - return nextKvEntries, errors.Trace(err) + return nil, nil, errors.Trace(err) } curKvEntries = append(curKvEntries, es...) - nextKvEntries = append(nextKvEntries, nextEs...) + filteredOutKvEntries = append(filteredOutKvEntries, filteredOutEs...) } // sort these entries. slices.SortFunc(curKvEntries, func(i, j *KvEntryWithTS) int { return cmp.Compare(i.Ts, j.Ts) }) - - // restore these entries with rawPut() method. - kvCount, size, err := rc.restoreMetaKvEntries(ctx, schemasReplace, curKvEntries, cf) - if err != nil { - return nextKvEntries, errors.Trace(err) - } - - updateStats(kvCount, size) - for i := 0; i < len(files); i++ { - progressInc() - } - return nextKvEntries, nil + return curKvEntries, filteredOutKvEntries, nil } -func (rc *LogClient) restoreMetaKvEntries( +func (rc *LogClient) restoreAndRewriteMetaKvEntries( ctx context.Context, sr *stream.SchemasReplace, entries []*KvEntryWithTS, @@ -1384,10 +1289,10 @@ func (rc *LogClient) restoreMetaKvEntries( rc.rawKVClient.SetColumnFamily(columnFamily) for _, entry := range entries { - log.Debug("before rewrte entry", zap.Uint64("key-ts", entry.Ts), zap.Int("key-len", len(entry.E.Key)), + log.Debug("before rewriting entry", zap.Uint64("key-ts", entry.Ts), zap.Int("key-len", len(entry.E.Key)), zap.Int("value-len", len(entry.E.Value)), zap.ByteString("key", entry.E.Key)) - newEntry, err := sr.RewriteKvEntry(&entry.E, columnFamily) + newEntry, err := sr.RewriteMetaKvEntry(&entry.E, columnFamily) if err != nil { log.Error("rewrite txn entry failed", zap.Int("klen", len(entry.E.Key)), logutil.Key("txn-key", entry.E.Key)) @@ -1457,8 +1362,8 @@ func (rc *LogClient) GenGlobalIDs(ctx context.Context, n int) ([]int64, error) { return ids, err } -// UpdateSchemaVersion updates schema version by transaction way. -func (rc *LogClient) UpdateSchemaVersion(ctx context.Context) error { +// UpdateSchemaVersionFullReload updates schema version to trigger a full reload in transaction way. +func (rc *LogClient) UpdateSchemaVersionFullReload(ctx context.Context) error { storage := rc.GetDomain().Store() var schemaVersion int64 @@ -1813,6 +1718,24 @@ func (rc *LogClient) GetGCRows() []*stream.PreDelRangeQuery { const PITRIdMapBlockSize int = 524288 +func (rc *LogClient) SaveIdMapWithFailPoints( + ctx context.Context, + manager *stream.TableMappingManager, +) error { + failpoint.Inject("failed-before-id-maps-saved", func(_ failpoint.Value) { + failpoint.Return(errors.New("failpoint: failed before id maps saved")) + }) + + if err := rc.saveIDMap(ctx, manager); err != nil { + return errors.Trace(err) + } + + failpoint.Inject("failed-after-id-maps-saved", func(_ failpoint.Value) { + failpoint.Return(errors.New("failpoint: failed after id maps saved")) + }) + return nil +} + // saveIDMap saves the id mapping information. func (rc *LogClient) saveIDMap( ctx context.Context, @@ -1844,7 +1767,7 @@ func (rc *LogClient) saveIDMap( if rc.useCheckpoint { log.Info("save checkpoint task info with InLogRestoreAndIdMapPersist status") if err := checkpoint.SaveCheckpointProgress(ctx, rc.unsafeSession, &checkpoint.CheckpointProgress{ - Progress: checkpoint.InLogRestoreAndIdMapPersist, + Progress: checkpoint.InLogRestoreAndIdMapPersisted, }); err != nil { return errors.Trace(err) } diff --git a/br/pkg/restore/log_client/client_test.go b/br/pkg/restore/log_client/client_test.go index 1b16b25ecfa46..1c395bb5766e7 100644 --- a/br/pkg/restore/log_client/client_test.go +++ b/br/pkg/restore/log_client/client_test.go @@ -35,6 +35,7 @@ import ( "github.com/pingcap/tidb/br/pkg/restore/split" "github.com/pingcap/tidb/br/pkg/restore/utils" "github.com/pingcap/tidb/br/pkg/stream" + "github.com/pingcap/tidb/br/pkg/utils/consts" "github.com/pingcap/tidb/br/pkg/utils/iter" "github.com/pingcap/tidb/br/pkg/utiltest" "github.com/pingcap/tidb/pkg/domain" @@ -47,7 +48,6 @@ import ( "github.com/pingcap/tidb/pkg/util/chunk" "github.com/pingcap/tidb/pkg/util/codec" "github.com/pingcap/tidb/pkg/util/sqlexec" - filter "github.com/pingcap/tidb/pkg/util/table-filter" "github.com/stretchr/testify/require" "google.golang.org/grpc/keepalive" ) @@ -95,7 +95,7 @@ func TestDeleteRangeQueryExec(t *testing.T) { ctx := context.Background() m := mc g := gluetidb.New() - client := logclient.NewRestoreClient( + client := logclient.NewLogClient( split.NewFakePDClient(nil, false, nil), nil, nil, keepalive.ClientParameters{}) err := client.Init(ctx, g, m.Storage) require.NoError(t, err) @@ -114,7 +114,7 @@ func TestDeleteRangeQuery(t *testing.T) { m := mc g := gluetidb.New() - client := logclient.NewRestoreClient( + client := logclient.NewLogClient( split.NewFakePDClient(nil, false, nil), nil, nil, keepalive.ClientParameters{}) err := client.Init(ctx, g, m.Storage) require.NoError(t, err) @@ -136,19 +136,8 @@ func TestDeleteRangeQuery(t *testing.T) { } } -func MockEmptySchemasReplace() *stream.SchemasReplace { - dbMap := make(map[stream.UpstreamID]*stream.DBReplace) - return stream.NewSchemasReplace( - dbMap, - nil, - 1, - filter.All(), - nil, - ) -} - func TestRestoreBatchMetaKVFiles(t *testing.T) { - client := logclient.NewRestoreClient(nil, nil, nil, keepalive.ClientParameters{}) + client := logclient.NewLogClient(nil, nil, nil, keepalive.ClientParameters{}) files := []*backuppb.DataFileInfo{} // test empty files and entries next, err := client.RestoreBatchMetaKVFiles(context.Background(), files[0:], nil, make([]*logclient.KvEntryWithTS, 0), math.MaxUint64, nil, nil, "") @@ -157,41 +146,35 @@ func TestRestoreBatchMetaKVFiles(t *testing.T) { } func TestRestoreMetaKVFilesWithBatchMethod1(t *testing.T) { - files_default := []*backuppb.DataFileInfo{} - files_write := []*backuppb.DataFileInfo{} + var filesDefault []*backuppb.DataFileInfo + var filesWrite []*backuppb.DataFileInfo batchCount := 0 - sr := MockEmptySchemasReplace() - err := logclient.RestoreMetaKVFilesWithBatchMethod( - context.Background(), - files_default, - files_write, - sr, - nil, - nil, - func( - ctx context.Context, + mockProcessor := &mockBatchProcessor{ + processFunc: func(ctx context.Context, files []*backuppb.DataFileInfo, - schemasReplace *stream.SchemasReplace, entries []*logclient.KvEntryWithTS, filterTS uint64, - updateStats func(kvCount uint64, size uint64), - progressInc func(), - cf string, - ) ([]*logclient.KvEntryWithTS, error) { + cf string) ([]*logclient.KvEntryWithTS, error) { require.Equal(t, 0, len(entries)) require.Equal(t, 0, len(files)) batchCount++ return nil, nil }, + } + err := logclient.LoadAndProcessMetaKVFilesInBatch( + context.Background(), + filesDefault, + filesWrite, + mockProcessor, ) require.Nil(t, err) require.Equal(t, batchCount, 2) } func TestRestoreMetaKVFilesWithBatchMethod2_default_empty(t *testing.T) { - files_default := []*backuppb.DataFileInfo{} - files_write := []*backuppb.DataFileInfo{ + var filesDefault []*backuppb.DataFileInfo + filesWrite := []*backuppb.DataFileInfo{ { Path: "f1", MinTs: 100, @@ -200,89 +183,78 @@ func TestRestoreMetaKVFilesWithBatchMethod2_default_empty(t *testing.T) { } batchCount := 0 - sr := MockEmptySchemasReplace() - err := logclient.RestoreMetaKVFilesWithBatchMethod( - context.Background(), - files_default, - files_write, - sr, - nil, - nil, - func( - ctx context.Context, + mockProcessor := &mockBatchProcessor{ + processFunc: func(ctx context.Context, files []*backuppb.DataFileInfo, - schemasReplace *stream.SchemasReplace, entries []*logclient.KvEntryWithTS, filterTS uint64, - updateStats func(kvCount uint64, size uint64), - progressInc func(), - cf string, - ) ([]*logclient.KvEntryWithTS, error) { + cf string) ([]*logclient.KvEntryWithTS, error) { if len(entries) == 0 && len(files) == 0 { - require.Equal(t, stream.DefaultCF, cf) + require.Equal(t, consts.DefaultCF, cf) batchCount++ } else { require.Equal(t, 0, len(entries)) require.Equal(t, 1, len(files)) require.Equal(t, uint64(100), files[0].MinTs) - require.Equal(t, stream.WriteCF, cf) + require.Equal(t, consts.WriteCF, cf) } require.Equal(t, uint64(math.MaxUint64), filterTS) return nil, nil }, + } + err := logclient.LoadAndProcessMetaKVFilesInBatch( + context.Background(), + filesDefault, + filesWrite, + mockProcessor, ) require.Nil(t, err) require.Equal(t, batchCount, 1) } func TestRestoreMetaKVFilesWithBatchMethod2_write_empty_1(t *testing.T) { - files_default := []*backuppb.DataFileInfo{ + filesDefault := []*backuppb.DataFileInfo{ { Path: "f1", MinTs: 100, MaxTs: 120, }, } - files_write := []*backuppb.DataFileInfo{} + var filesWrite []*backuppb.DataFileInfo batchCount := 0 - sr := MockEmptySchemasReplace() - err := logclient.RestoreMetaKVFilesWithBatchMethod( - context.Background(), - files_default, - files_write, - sr, - nil, - nil, - func( - ctx context.Context, + mockProcessor := &mockBatchProcessor{ + processFunc: func(ctx context.Context, files []*backuppb.DataFileInfo, - schemasReplace *stream.SchemasReplace, entries []*logclient.KvEntryWithTS, filterTS uint64, - updateStats func(kvCount uint64, size uint64), - progressInc func(), - cf string, - ) ([]*logclient.KvEntryWithTS, error) { + cf string) ([]*logclient.KvEntryWithTS, error) { if len(entries) == 0 && len(files) == 0 { - require.Equal(t, stream.WriteCF, cf) + require.Equal(t, consts.WriteCF, cf) batchCount++ } else { require.Equal(t, 0, len(entries)) require.Equal(t, 1, len(files)) require.Equal(t, uint64(100), files[0].MinTs) - require.Equal(t, stream.DefaultCF, cf) + require.Equal(t, consts.DefaultCF, cf) } require.Equal(t, uint64(math.MaxUint64), filterTS) return nil, nil }, + } + + err := logclient.LoadAndProcessMetaKVFilesInBatch( + context.Background(), + filesDefault, + filesWrite, + mockProcessor, ) require.Nil(t, err) require.Equal(t, batchCount, 1) } func TestRestoreMetaKVFilesWithBatchMethod2_write_empty_2(t *testing.T) { - files_default := []*backuppb.DataFileInfo{ + filesDefault := []*backuppb.DataFileInfo{ { Path: "f1", MinTs: 100, @@ -296,31 +268,19 @@ func TestRestoreMetaKVFilesWithBatchMethod2_write_empty_2(t *testing.T) { Length: logclient.MetaKVBatchSize, }, } - files_write := []*backuppb.DataFileInfo{} + var filesWrite []*backuppb.DataFileInfo emptyCount := 0 batchCount := 0 - sr := MockEmptySchemasReplace() - err := logclient.RestoreMetaKVFilesWithBatchMethod( - context.Background(), - files_default, - files_write, - sr, - nil, - nil, - func( - ctx context.Context, + mockProcessor := &mockBatchProcessor{ + processFunc: func(ctx context.Context, files []*backuppb.DataFileInfo, - schemasReplace *stream.SchemasReplace, entries []*logclient.KvEntryWithTS, filterTS uint64, - updateStats func(kvCount uint64, size uint64), - progressInc func(), - cf string, - ) ([]*logclient.KvEntryWithTS, error) { + cf string) ([]*logclient.KvEntryWithTS, error) { if len(entries) == 0 && len(files) == 0 { // write - write - require.Equal(t, stream.WriteCF, cf) + require.Equal(t, consts.WriteCF, cf) emptyCount++ if emptyCount == 1 { require.Equal(t, uint64(110), filterTS) @@ -331,7 +291,7 @@ func TestRestoreMetaKVFilesWithBatchMethod2_write_empty_2(t *testing.T) { // default - default batchCount++ require.Equal(t, 1, len(files)) - require.Equal(t, stream.DefaultCF, cf) + require.Equal(t, consts.DefaultCF, cf) if batchCount == 1 { require.Equal(t, uint64(100), files[0].MinTs) require.Equal(t, uint64(110), filterTS) @@ -341,6 +301,13 @@ func TestRestoreMetaKVFilesWithBatchMethod2_write_empty_2(t *testing.T) { } return nil, nil }, + } + + err := logclient.LoadAndProcessMetaKVFilesInBatch( + context.Background(), + filesDefault, + filesWrite, + mockProcessor, ) require.Nil(t, err) require.Equal(t, batchCount, 2) @@ -348,7 +315,7 @@ func TestRestoreMetaKVFilesWithBatchMethod2_write_empty_2(t *testing.T) { } func TestRestoreMetaKVFilesWithBatchMethod_with_entries(t *testing.T) { - files_default := []*backuppb.DataFileInfo{ + filesDefault := []*backuppb.DataFileInfo{ { Path: "f1", MinTs: 100, @@ -362,31 +329,19 @@ func TestRestoreMetaKVFilesWithBatchMethod_with_entries(t *testing.T) { Length: logclient.MetaKVBatchSize, }, } - files_write := []*backuppb.DataFileInfo{} + var filesWrite []*backuppb.DataFileInfo emptyCount := 0 batchCount := 0 - sr := MockEmptySchemasReplace() - err := logclient.RestoreMetaKVFilesWithBatchMethod( - context.Background(), - files_default, - files_write, - sr, - nil, - nil, - func( - ctx context.Context, + mockProcessor := &mockBatchProcessor{ + processFunc: func(ctx context.Context, files []*backuppb.DataFileInfo, - schemasReplace *stream.SchemasReplace, entries []*logclient.KvEntryWithTS, filterTS uint64, - updateStats func(kvCount uint64, size uint64), - progressInc func(), - cf string, - ) ([]*logclient.KvEntryWithTS, error) { + cf string) ([]*logclient.KvEntryWithTS, error) { if len(entries) == 0 && len(files) == 0 { // write - write - require.Equal(t, stream.WriteCF, cf) + require.Equal(t, consts.WriteCF, cf) emptyCount++ if emptyCount == 1 { require.Equal(t, uint64(110), filterTS) @@ -397,7 +352,7 @@ func TestRestoreMetaKVFilesWithBatchMethod_with_entries(t *testing.T) { // default - default batchCount++ require.Equal(t, 1, len(files)) - require.Equal(t, stream.DefaultCF, cf) + require.Equal(t, consts.DefaultCF, cf) if batchCount == 1 { require.Equal(t, uint64(100), files[0].MinTs) require.Equal(t, uint64(110), filterTS) @@ -407,6 +362,13 @@ func TestRestoreMetaKVFilesWithBatchMethod_with_entries(t *testing.T) { } return nil, nil }, + } + + err := logclient.LoadAndProcessMetaKVFilesInBatch( + context.Background(), + filesDefault, + filesWrite, + mockProcessor, ) require.Nil(t, err) require.Equal(t, batchCount, 2) @@ -473,31 +435,27 @@ func TestRestoreMetaKVFilesWithBatchMethod3(t *testing.T) { result := make(map[int][]*backuppb.DataFileInfo) resultKV := make(map[int]int) - sr := MockEmptySchemasReplace() - err := logclient.RestoreMetaKVFilesWithBatchMethod( - context.Background(), - defaultFiles, - writeFiles, - sr, - nil, - nil, - func( - ctx context.Context, - fs []*backuppb.DataFileInfo, - schemasReplace *stream.SchemasReplace, + mockProcessor := &mockBatchProcessor{ + processFunc: func(ctx context.Context, + files []*backuppb.DataFileInfo, entries []*logclient.KvEntryWithTS, filterTS uint64, - updateStats func(kvCount uint64, size uint64), - progressInc func(), - cf string, - ) ([]*logclient.KvEntryWithTS, error) { - result[batchCount] = fs + cf string) ([]*logclient.KvEntryWithTS, error) { + result[batchCount] = files t.Log(filterTS) resultKV[batchCount] = len(entries) batchCount++ return make([]*logclient.KvEntryWithTS, batchCount), nil }, + } + + err := logclient.LoadAndProcessMetaKVFilesInBatch( + context.Background(), + defaultFiles, + writeFiles, + mockProcessor, ) + require.Nil(t, err) require.Equal(t, len(result), 4) require.Equal(t, result[0], defaultFiles[0:3]) @@ -559,29 +517,25 @@ func TestRestoreMetaKVFilesWithBatchMethod4(t *testing.T) { batchCount := 0 result := make(map[int][]*backuppb.DataFileInfo) - sr := MockEmptySchemasReplace() - err := logclient.RestoreMetaKVFilesWithBatchMethod( - context.Background(), - defaultFiles, - writeFiles, - sr, - nil, - nil, - func( - ctx context.Context, - fs []*backuppb.DataFileInfo, - schemasReplace *stream.SchemasReplace, + mockProcessor := &mockBatchProcessor{ + processFunc: func(ctx context.Context, + files []*backuppb.DataFileInfo, entries []*logclient.KvEntryWithTS, filterTS uint64, - updateStats func(kvCount uint64, size uint64), - progressInc func(), - cf string, - ) ([]*logclient.KvEntryWithTS, error) { - result[batchCount] = fs + cf string) ([]*logclient.KvEntryWithTS, error) { + result[batchCount] = files batchCount++ return nil, nil }, + } + + err := logclient.LoadAndProcessMetaKVFilesInBatch( + context.Background(), + defaultFiles, + writeFiles, + mockProcessor, ) + require.Nil(t, err) require.Equal(t, len(result), 4) require.Equal(t, result[0], defaultFiles[0:2]) @@ -639,28 +593,22 @@ func TestRestoreMetaKVFilesWithBatchMethod5(t *testing.T) { batchCount := 0 result := make(map[int][]*backuppb.DataFileInfo) - sr := MockEmptySchemasReplace() - err := logclient.RestoreMetaKVFilesWithBatchMethod( - context.Background(), - defaultFiles, - writeFiles, - sr, - nil, - nil, - func( - ctx context.Context, - fs []*backuppb.DataFileInfo, - schemasReplace *stream.SchemasReplace, + mockProcessor := &mockBatchProcessor{ + processFunc: func(ctx context.Context, + files []*backuppb.DataFileInfo, entries []*logclient.KvEntryWithTS, filterTS uint64, - updateStats func(kvCount uint64, size uint64), - progressInc func(), - cf string, - ) ([]*logclient.KvEntryWithTS, error) { - result[batchCount] = fs + cf string) ([]*logclient.KvEntryWithTS, error) { + result[batchCount] = files batchCount++ return nil, nil }, + } + err := logclient.LoadAndProcessMetaKVFilesInBatch( + context.Background(), + defaultFiles, + writeFiles, + mockProcessor, ) require.Nil(t, err) require.Equal(t, len(result), 4) @@ -736,30 +684,24 @@ func TestRestoreMetaKVFilesWithBatchMethod6(t *testing.T) { result := make(map[int][]*backuppb.DataFileInfo) resultKV := make(map[int]int) - sr := MockEmptySchemasReplace() - err := logclient.RestoreMetaKVFilesWithBatchMethod( - context.Background(), - defaultFiles, - writeFiles, - sr, - nil, - nil, - func( - ctx context.Context, - fs []*backuppb.DataFileInfo, - schemasReplace *stream.SchemasReplace, + mockProcessor := &mockBatchProcessor{ + processFunc: func(ctx context.Context, + files []*backuppb.DataFileInfo, entries []*logclient.KvEntryWithTS, filterTS uint64, - updateStats func(kvCount uint64, size uint64), - progressInc func(), - cf string, - ) ([]*logclient.KvEntryWithTS, error) { - result[batchCount] = fs + cf string) ([]*logclient.KvEntryWithTS, error) { + result[batchCount] = files t.Log(filterTS) resultKV[batchCount] = len(entries) batchCount++ return make([]*logclient.KvEntryWithTS, batchCount), nil }, + } + err := logclient.LoadAndProcessMetaKVFilesInBatch( + context.Background(), + defaultFiles, + writeFiles, + mockProcessor, ) require.Nil(t, err) require.Equal(t, len(result), 6) @@ -839,20 +781,20 @@ func TestApplyKVFilesWithSingelMethod(t *testing.T) { Path: "log3", NumberOfEntries: 5, Length: 100, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Delete, }, { Path: "log1", NumberOfEntries: 5, Length: 100, - Cf: stream.DefaultCF, + Cf: consts.DefaultCF, Type: backuppb.FileType_Put, }, { Path: "log2", NumberOfEntries: 5, Length: 100, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Put, }, } @@ -894,28 +836,28 @@ func TestApplyKVFilesWithBatchMethod1(t *testing.T) { Path: "log5", NumberOfEntries: 5, Length: 100, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Delete, RegionId: 1, }, { Path: "log3", NumberOfEntries: 5, Length: 100, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Put, RegionId: 1, }, { Path: "log4", NumberOfEntries: 5, Length: 100, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Put, RegionId: 1, }, { Path: "log1", NumberOfEntries: 5, Length: 800, - Cf: stream.DefaultCF, + Cf: consts.DefaultCF, Type: backuppb.FileType_Put, RegionId: 1, }, @@ -923,7 +865,7 @@ func TestApplyKVFilesWithBatchMethod1(t *testing.T) { Path: "log2", NumberOfEntries: 5, Length: 200, - Cf: stream.DefaultCF, + Cf: consts.DefaultCF, Type: backuppb.FileType_Put, RegionId: 1, }, @@ -977,35 +919,35 @@ func TestApplyKVFilesWithBatchMethod2(t *testing.T) { Path: "log1", NumberOfEntries: 5, Length: 100, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Delete, RegionId: 1, }, { Path: "log2", NumberOfEntries: 5, Length: 100, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Put, RegionId: 1, }, { Path: "log3", NumberOfEntries: 5, Length: 100, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Put, RegionId: 1, }, { Path: "log4", NumberOfEntries: 5, Length: 100, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Put, RegionId: 1, }, { Path: "log5", NumberOfEntries: 5, Length: 800, - Cf: stream.DefaultCF, + Cf: consts.DefaultCF, Type: backuppb.FileType_Put, RegionId: 1, }, @@ -1013,7 +955,7 @@ func TestApplyKVFilesWithBatchMethod2(t *testing.T) { Path: "log6", NumberOfEntries: 5, Length: 200, - Cf: stream.DefaultCF, + Cf: consts.DefaultCF, Type: backuppb.FileType_Put, RegionId: 1, }, @@ -1068,28 +1010,28 @@ func TestApplyKVFilesWithBatchMethod3(t *testing.T) { Path: "log1", NumberOfEntries: 5, Length: 2000, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Delete, RegionId: 1, }, { Path: "log2", NumberOfEntries: 5, Length: 2000, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Put, RegionId: 1, }, { Path: "log3", NumberOfEntries: 5, Length: 100, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Put, RegionId: 1, }, { Path: "log5", NumberOfEntries: 5, Length: 800, - Cf: stream.DefaultCF, + Cf: consts.DefaultCF, Type: backuppb.FileType_Put, RegionId: 3, }, @@ -1097,7 +1039,7 @@ func TestApplyKVFilesWithBatchMethod3(t *testing.T) { Path: "log6", NumberOfEntries: 5, Length: 200, - Cf: stream.DefaultCF, + Cf: consts.DefaultCF, Type: backuppb.FileType_Put, RegionId: 3, }, @@ -1151,35 +1093,35 @@ func TestApplyKVFilesWithBatchMethod4(t *testing.T) { Path: "log1", NumberOfEntries: 5, Length: 2000, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Delete, TableId: 1, }, { Path: "log2", NumberOfEntries: 5, Length: 100, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Put, TableId: 1, }, { Path: "log3", NumberOfEntries: 5, Length: 100, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Put, TableId: 2, }, { Path: "log4", NumberOfEntries: 5, Length: 100, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Put, TableId: 1, }, { Path: "log5", NumberOfEntries: 5, Length: 100, - Cf: stream.DefaultCF, + Cf: consts.DefaultCF, Type: backuppb.FileType_Put, TableId: 2, }, @@ -1229,35 +1171,35 @@ func TestApplyKVFilesWithBatchMethod5(t *testing.T) { Path: "log1", NumberOfEntries: 5, Length: 2000, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Delete, TableId: 1, }, { Path: "log2", NumberOfEntries: 5, Length: 100, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Put, TableId: 1, }, { Path: "log3", NumberOfEntries: 5, Length: 100, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Put, TableId: 2, }, { Path: "log4", NumberOfEntries: 5, Length: 100, - Cf: stream.WriteCF, + Cf: consts.WriteCF, Type: backuppb.FileType_Put, TableId: 1, }, { Path: "log5", NumberOfEntries: 5, Length: 100, - Cf: stream.DefaultCF, + Cf: consts.DefaultCF, Type: backuppb.FileType_Put, TableId: 2, }, @@ -1384,16 +1326,16 @@ func TestInitSchemasReplaceForDDL(t *testing.T) { { client := logclient.TEST_NewLogClient(123, 1, 2, 1, domain.NewMockDomain(), fakeSession{}) - cfg := &logclient.BuildTableMappingManagerConfig{CurrentIdMapSaved: false} - _, err := client.BuildTableMappingManager(ctx, cfg) + cfg := &logclient.GetIDMapConfig{LoadSavedIDMap: false} + _, err := client.GetIDMapFromCheckpointOrFullBackup(ctx, cfg) require.Error(t, err) require.Regexp(t, "failed to get pitr id map from mysql.tidb_pitr_id_map.* [2, 1]", err.Error()) } { client := logclient.TEST_NewLogClient(123, 1, 2, 1, domain.NewMockDomain(), fakeSession{}) - cfg := &logclient.BuildTableMappingManagerConfig{CurrentIdMapSaved: true} - _, err := client.BuildTableMappingManager(ctx, cfg) + cfg := &logclient.GetIDMapConfig{LoadSavedIDMap: true} + _, err := client.GetIDMapFromCheckpointOrFullBackup(ctx, cfg) require.Error(t, err) require.Regexp(t, "failed to get pitr id map from mysql.tidb_pitr_id_map.* [1, 1]", err.Error()) } @@ -1406,8 +1348,8 @@ func TestInitSchemasReplaceForDDL(t *testing.T) { se, err := g.CreateSession(s.Mock.Storage) require.NoError(t, err) client := logclient.TEST_NewLogClient(123, 1, 2, 1, domain.NewMockDomain(), se) - cfg := &logclient.BuildTableMappingManagerConfig{CurrentIdMapSaved: true} - _, err = client.BuildTableMappingManager(ctx, cfg) + cfg := &logclient.GetIDMapConfig{LoadSavedIDMap: true} + _, err = client.GetIDMapFromCheckpointOrFullBackup(ctx, cfg) require.Error(t, err) require.Contains(t, err.Error(), "miss upstream table information at `start-ts`(1) but the full backup path is not specified") } @@ -1478,7 +1420,7 @@ func TestPITRIDMap(t *testing.T) { require.NoError(t, err) client := logclient.TEST_NewLogClient(123, 1, 2, 3, nil, se) baseTableMappingManager := &stream.TableMappingManager{ - DbReplaceMap: getDBMap(), + DBReplaceMap: getDBMap(), } err = client.TEST_saveIDMap(ctx, baseTableMappingManager) require.NoError(t, err) @@ -1492,9 +1434,9 @@ func TestPITRIDMap(t *testing.T) { newSchemaReplaces, err = client.TEST_initSchemasMap(ctx, 2) require.NoError(t, err) - require.Equal(t, len(baseTableMappingManager.DbReplaceMap), len(newSchemaReplaces)) + require.Equal(t, len(baseTableMappingManager.DBReplaceMap), len(newSchemaReplaces)) for _, dbMap := range newSchemaReplaces { - baseDbMap := baseTableMappingManager.DbReplaceMap[dbMap.IdMap.UpstreamId] + baseDbMap := baseTableMappingManager.DBReplaceMap[dbMap.IdMap.UpstreamId] require.NotNil(t, baseDbMap) require.Equal(t, baseDbMap.DbID, dbMap.IdMap.DownstreamId) require.Equal(t, baseDbMap.Name, dbMap.Name) @@ -1986,3 +1928,23 @@ func fakeRowKey(tableID, rowID int64) kv.Key { func fakeRowRawKey(tableID, rowID int64) kv.Key { return tablecodec.EncodeRecordKey(tablecodec.GenTableRecordPrefix(tableID), kv.IntHandle(rowID)) } + +type mockBatchProcessor struct { + processFunc func( + ctx context.Context, + files []*backuppb.DataFileInfo, + entries []*logclient.KvEntryWithTS, + filterTS uint64, + cf string, + ) ([]*logclient.KvEntryWithTS, error) +} + +func (m *mockBatchProcessor) ProcessBatch( + ctx context.Context, + files []*backuppb.DataFileInfo, + entries []*logclient.KvEntryWithTS, + filterTS uint64, + cf string, +) ([]*logclient.KvEntryWithTS, error) { + return m.processFunc(ctx, files, entries, filterTS, cf) +} diff --git a/br/pkg/restore/log_client/export_test.go b/br/pkg/restore/log_client/export_test.go index f78a54bf50c8a..37b0f1e14779f 100644 --- a/br/pkg/restore/log_client/export_test.go +++ b/br/pkg/restore/log_client/export_test.go @@ -65,19 +65,19 @@ func (rc *LogClient) TEST_saveIDMap( ctx context.Context, m *stream.TableMappingManager, ) error { - return rc.saveIDMap(ctx, m) + return rc.SaveIDMap(ctx, m) } func (rc *LogClient) TEST_initSchemasMap( ctx context.Context, restoreTS uint64, ) ([]*backuppb.PitrDBMap, error) { - return rc.initSchemasMap(ctx, restoreTS) + return rc.loadSchemasMap(ctx, restoreTS) } // readStreamMetaByTS is used for streaming task. collect all meta file by TS, it is for test usage. -func (rc *LogFileManager) ReadStreamMeta(ctx context.Context) ([]*MetaName, error) { - metas, err := rc.streamingMeta(ctx) +func (lm *LogFileManager) ReadStreamMeta(ctx context.Context) ([]*MetaName, error) { + metas, err := lm.streamingMeta(ctx) if err != nil { return nil, err } diff --git a/br/pkg/restore/log_client/import.go b/br/pkg/restore/log_client/import.go index a4dbf4ca73839..0b5581c706fa2 100644 --- a/br/pkg/restore/log_client/import.go +++ b/br/pkg/restore/log_client/import.go @@ -35,9 +35,9 @@ import ( importclient "github.com/pingcap/tidb/br/pkg/restore/internal/import_client" "github.com/pingcap/tidb/br/pkg/restore/split" restoreutils "github.com/pingcap/tidb/br/pkg/restore/utils" - "github.com/pingcap/tidb/br/pkg/stream" "github.com/pingcap/tidb/br/pkg/summary" "github.com/pingcap/tidb/br/pkg/utils" + "github.com/pingcap/tidb/br/pkg/utils/consts" "github.com/pingcap/tidb/pkg/kv" pd "github.com/tikv/pd/client" "go.uber.org/multierr" @@ -253,7 +253,7 @@ func (importer *LogFileImporter) downloadAndApplyKVFile( RangeLength: file.RangeLength, IsDelete: file.Type == backuppb.FileType_Delete, StartTs: func() uint64 { - if file.Cf == stream.DefaultCF { + if file.Cf == consts.DefaultCF { return shiftStartTS } return startTS diff --git a/br/pkg/restore/log_client/log_file_manager.go b/br/pkg/restore/log_client/log_file_manager.go index 4c2992467a2ab..ae44b3c4fbb4e 100644 --- a/br/pkg/restore/log_client/log_file_manager.go +++ b/br/pkg/restore/log_client/log_file_manager.go @@ -19,6 +19,8 @@ import ( berrors "github.com/pingcap/tidb/br/pkg/errors" "github.com/pingcap/tidb/br/pkg/storage" "github.com/pingcap/tidb/br/pkg/stream" + "github.com/pingcap/tidb/br/pkg/utils" + "github.com/pingcap/tidb/br/pkg/utils/consts" "github.com/pingcap/tidb/br/pkg/utils/iter" "github.com/pingcap/tidb/pkg/kv" "github.com/pingcap/tidb/pkg/util/codec" @@ -84,6 +86,7 @@ type streamMetadataHelper interface { encryptionInfo *encryptionpb.FileEncryptionInfo, ) ([]byte, error) ParseToMetadata(rawMetaData []byte) (*backuppb.Metadata, error) + Close() } // LogFileManager is the manager for log files of a certain restoration, @@ -103,8 +106,8 @@ type LogFileManager struct { storage storage.ExternalStorage helper streamMetadataHelper - withMigraionBuilder *WithMigrationsBuilder - withMigrations *WithMigrations + withMigrationBuilder *WithMigrationsBuilder + withMigrations *WithMigrations metadataDownloadBatchSize uint } @@ -130,12 +133,12 @@ type DDLMetaGroup struct { // Generally the config cannot be changed during its lifetime. func CreateLogFileManager(ctx context.Context, init LogFileManagerInit) (*LogFileManager, error) { fm := &LogFileManager{ - startTS: init.StartTS, - restoreTS: init.RestoreTS, - storage: init.Storage, - helper: stream.NewMetadataHelper(stream.WithEncryptionManager(init.EncryptionManager)), - withMigraionBuilder: init.MigrationsBuilder, - withMigrations: init.Migrations, + startTS: init.StartTS, + restoreTS: init.RestoreTS, + storage: init.Storage, + helper: stream.NewMetadataHelper(stream.WithEncryptionManager(init.EncryptionManager)), + withMigrationBuilder: init.MigrationsBuilder, + withMigrations: init.Migrations, metadataDownloadBatchSize: init.MetadataDownloadBatchSize, } @@ -146,30 +149,30 @@ func CreateLogFileManager(ctx context.Context, init LogFileManagerInit) (*LogFil return fm, nil } -func (rc *LogFileManager) BuildMigrations(migs []*backuppb.Migration) { - w := rc.withMigraionBuilder.Build(migs) - rc.withMigrations = &w +func (lm *LogFileManager) BuildMigrations(migs []*backuppb.Migration) { + w := lm.withMigrationBuilder.Build(migs) + lm.withMigrations = &w } -func (rc *LogFileManager) ShiftTS() uint64 { - return rc.shiftStartTS +func (lm *LogFileManager) ShiftTS() uint64 { + return lm.shiftStartTS } -func (rc *LogFileManager) loadShiftTS(ctx context.Context) error { +func (lm *LogFileManager) loadShiftTS(ctx context.Context) error { shiftTS := struct { sync.Mutex value uint64 exists bool }{} - err := stream.FastUnmarshalMetaData(ctx, rc.storage, rc.metadataDownloadBatchSize, func(path string, raw []byte) error { - m, err := rc.helper.ParseToMetadata(raw) + err := stream.FastUnmarshalMetaData(ctx, lm.storage, lm.metadataDownloadBatchSize, func(path string, raw []byte) error { + m, err := lm.helper.ParseToMetadata(raw) if err != nil { return err } log.Info("read meta from storage and parse", zap.String("path", path), zap.Uint64("min-ts", m.MinTs), zap.Uint64("max-ts", m.MaxTs), zap.Int32("meta-version", int32(m.MetaVersion))) - ts, ok := stream.UpdateShiftTS(m, rc.startTS, rc.restoreTS) + ts, ok := stream.UpdateShiftTS(m, lm.startTS, lm.restoreTS) shiftTS.Lock() if ok && (!shiftTS.exists || shiftTS.value > ts) { shiftTS.value = ts @@ -183,31 +186,31 @@ func (rc *LogFileManager) loadShiftTS(ctx context.Context) error { return err } if !shiftTS.exists { - rc.shiftStartTS = rc.startTS - rc.withMigraionBuilder.SetShiftStartTS(rc.shiftStartTS) + lm.shiftStartTS = lm.startTS + lm.withMigrationBuilder.SetShiftStartTS(lm.shiftStartTS) return nil } - rc.shiftStartTS = shiftTS.value - rc.withMigraionBuilder.SetShiftStartTS(rc.shiftStartTS) + lm.shiftStartTS = shiftTS.value + lm.withMigrationBuilder.SetShiftStartTS(lm.shiftStartTS) return nil } -func (rc *LogFileManager) streamingMeta(ctx context.Context) (MetaNameIter, error) { - return rc.streamingMetaByTS(ctx, rc.restoreTS) +func (lm *LogFileManager) streamingMeta(ctx context.Context) (MetaNameIter, error) { + return lm.streamingMetaByTS(ctx, lm.restoreTS) } -func (rc *LogFileManager) streamingMetaByTS(ctx context.Context, restoreTS uint64) (MetaNameIter, error) { - it, err := rc.createMetaIterOver(ctx, rc.storage) +func (lm *LogFileManager) streamingMetaByTS(ctx context.Context, restoreTS uint64) (MetaNameIter, error) { + it, err := lm.createMetaIterOver(ctx, lm.storage) if err != nil { return nil, err } filtered := iter.FilterOut(it, func(metaname *MetaName) bool { - return restoreTS < metaname.meta.MinTs || metaname.meta.MaxTs < rc.shiftStartTS + return restoreTS < metaname.meta.MinTs || metaname.meta.MaxTs < lm.shiftStartTS }) return filtered, nil } -func (rc *LogFileManager) createMetaIterOver(ctx context.Context, s storage.ExternalStorage) (MetaNameIter, error) { +func (lm *LogFileManager) createMetaIterOver(ctx context.Context, s storage.ExternalStorage) (MetaNameIter, error) { opt := &storage.WalkOption{SubDir: stream.GetStreamBackupMetaPrefix()} names := []string{} err := s.WalkDir(ctx, opt, func(path string, size int64) error { @@ -226,7 +229,7 @@ func (rc *LogFileManager) createMetaIterOver(ctx context.Context, s storage.Exte if err != nil { return nil, errors.Annotatef(err, "failed during reading file %s", name) } - meta, err := rc.helper.ParseToMetadata(f) + meta, err := lm.helper.ParseToMetadata(f) if err != nil { return nil, errors.Annotatef(err, "failed to parse metadata of file %s", name) } @@ -235,12 +238,12 @@ func (rc *LogFileManager) createMetaIterOver(ctx context.Context, s storage.Exte // TODO: maybe we need to be able to adjust the concurrency to download files, // which currently is the same as the chunk size reader := iter.Transform(namesIter, readMeta, - iter.WithChunkSize(rc.metadataDownloadBatchSize), iter.WithConcurrency(rc.metadataDownloadBatchSize)) + iter.WithChunkSize(lm.metadataDownloadBatchSize), iter.WithConcurrency(lm.metadataDownloadBatchSize)) return reader, nil } -func (rc *LogFileManager) FilterDataFiles(m MetaNameIter) LogIter { - ms := rc.withMigrations.Metas(m) +func (lm *LogFileManager) FilterDataFiles(m MetaNameIter) LogIter { + ms := lm.withMigrations.Metas(m) return iter.FlatMap(ms, func(m *MetaWithMigrations) LogIter { gs := m.Physicals(iter.Enumerate(iter.FromSlice(m.meta.FileGroups))) return iter.FlatMap(gs, func(gim *PhysicalWithMigrations) LogIter { @@ -251,7 +254,7 @@ func (rc *LogFileManager) FilterDataFiles(m MetaNameIter) LogIter { if m.meta.MetaVersion > backuppb.MetaVersion_V1 { di.Item.Path = gim.physical.Item.Path } - return di.Item.IsMeta || rc.ShouldFilterOut(di.Item) + return di.Item.IsMeta || lm.ShouldFilterOutByTs(di.Item) }) return iter.Map(fs, func(di FileIndex) *LogDataFileInfo { return &LogDataFileInfo{ @@ -270,14 +273,14 @@ func (rc *LogFileManager) FilterDataFiles(m MetaNameIter) LogIter { }) } -// ShouldFilterOut checks whether a file should be filtered out via the current client. -func (rc *LogFileManager) ShouldFilterOut(d *backuppb.DataFileInfo) bool { - return d.MinTs > rc.restoreTS || - (d.Cf == stream.WriteCF && d.MaxTs < rc.startTS) || - (d.Cf == stream.DefaultCF && d.MaxTs < rc.shiftStartTS) +// ShouldFilterOutByTs checks whether a file should be filtered out via the current client. +func (lm *LogFileManager) ShouldFilterOutByTs(d *backuppb.DataFileInfo) bool { + return d.MinTs > lm.restoreTS || + (d.Cf == consts.WriteCF && d.MaxTs < lm.startTS) || + (d.Cf == consts.DefaultCF && d.MaxTs < lm.shiftStartTS) } -func (rc *LogFileManager) collectDDLFilesAndPrepareCache( +func (lm *LogFileManager) collectDDLFilesAndPrepareCache( ctx context.Context, files MetaGroupIter, ) ([]Log, error) { @@ -291,38 +294,38 @@ func (rc *LogFileManager) collectDDLFilesAndPrepareCache( dataFileInfos := make([]*backuppb.DataFileInfo, 0) for _, g := range fs.Item { - rc.helper.InitCacheEntry(g.Path, len(g.FileMetas)) + lm.helper.InitCacheEntry(g.Path, len(g.FileMetas)) dataFileInfos = append(dataFileInfos, g.FileMetas...) } return dataFileInfos, nil } -// LoadDDLFilesAndCountDMLFiles loads all DDL files needs to be restored in the restoration. +// LoadDDLFiles loads all DDL files needs to be restored in the restoration. // This function returns all DDL files needing directly because we need sort all of them. -func (rc *LogFileManager) LoadDDLFilesAndCountDMLFiles(ctx context.Context) ([]Log, error) { - m, err := rc.streamingMeta(ctx) +func (lm *LogFileManager) LoadDDLFiles(ctx context.Context) ([]Log, error) { + m, err := lm.streamingMeta(ctx) if err != nil { return nil, err } - mg := rc.FilterMetaFiles(m) + mg := lm.FilterMetaFiles(m) - return rc.collectDDLFilesAndPrepareCache(ctx, mg) + return lm.collectDDLFilesAndPrepareCache(ctx, mg) } // LoadDMLFiles loads all DML files needs to be restored in the restoration. // This function returns a stream, because there are usually many DML files need to be restored. -func (rc *LogFileManager) LoadDMLFiles(ctx context.Context) (LogIter, error) { - m, err := rc.streamingMeta(ctx) +func (lm *LogFileManager) LoadDMLFiles(ctx context.Context) (LogIter, error) { + m, err := lm.streamingMeta(ctx) if err != nil { return nil, err } - l := rc.FilterDataFiles(m) + l := lm.FilterDataFiles(m) return l, nil } -func (rc *LogFileManager) FilterMetaFiles(ms MetaNameIter) MetaGroupIter { +func (lm *LogFileManager) FilterMetaFiles(ms MetaNameIter) MetaGroupIter { return iter.FlatMap(ms, func(m *MetaName) MetaGroupIter { return iter.Map(iter.FromSlice(m.meta.FileGroups), func(g *backuppb.DataFileGroup) DDLMetaGroup { metas := iter.FilterOut(iter.FromSlice(g.DataFilesInfo), func(d Log) bool { @@ -330,7 +333,7 @@ func (rc *LogFileManager) FilterMetaFiles(ms MetaNameIter) MetaGroupIter { if m.meta.MetaVersion > backuppb.MetaVersion_V1 { d.Path = g.Path } - if rc.ShouldFilterOut(d) { + if lm.ShouldFilterOutByTs(d) { return true } // count the progress @@ -346,12 +349,12 @@ func (rc *LogFileManager) FilterMetaFiles(ms MetaNameIter) MetaGroupIter { }) } -// Fetch compactions that may contain file less than the TS. -func (rc *LogFileManager) GetCompactionIter(ctx context.Context) iter.TryNextor[*backuppb.LogFileSubcompaction] { - return rc.withMigrations.Compactions(ctx, rc.storage) +// GetCompactionIter fetches compactions that may contain file less than the TS. +func (lm *LogFileManager) GetCompactionIter(ctx context.Context) iter.TryNextor[*backuppb.LogFileSubcompaction] { + return lm.withMigrations.Compactions(ctx, lm.storage) } -// the kv entry with ts, the ts is decoded from entry. +// KvEntryWithTS is kv entry with ts, the ts is decoded from entry. type KvEntryWithTS struct { E kv.Entry Ts uint64 @@ -367,17 +370,17 @@ func getKeyTS(key []byte) (uint64, error) { return ts, err } -// ReadAllEntries loads content of a log file, with filtering out no needed entries. -func (rc *LogFileManager) ReadAllEntries( +// ReadFilteredEntriesFromFiles loads content of a log file from external storage, and filter out entries based on TS. +func (lm *LogFileManager) ReadFilteredEntriesFromFiles( ctx context.Context, file Log, filterTS uint64, ) ([]*KvEntryWithTS, []*KvEntryWithTS, error) { kvEntries := make([]*KvEntryWithTS, 0) - nextKvEntries := make([]*KvEntryWithTS, 0) + filteredOutKvEntries := make([]*KvEntryWithTS, 0) - buff, err := rc.helper.ReadFile(ctx, file.Path, file.RangeOffset, file.RangeLength, file.CompressionType, - rc.storage, file.FileEncryptionInfo) + buff, err := lm.helper.ReadFile(ctx, file.Path, file.RangeOffset, file.RangeLength, file.CompressionType, + lm.storage, file.FileEncryptionInfo) if err != nil { return nil, nil, errors.Trace(err) } @@ -396,7 +399,7 @@ func (rc *LogFileManager) ReadAllEntries( txnEntry := kv.Entry{Key: iter.Key(), Value: iter.Value()} - if !stream.MaybeDBOrDDLJobHistoryKey(txnEntry.Key) { + if !utils.IsDBOrDDLJobHistoryKey(txnEntry.Key) { // only restore mDB and mDDLHistory continue } @@ -408,11 +411,11 @@ func (rc *LogFileManager) ReadAllEntries( // The commitTs in write CF need be limited on [startTs, restoreTs]. // We can restore more key-value in default CF. - if ts > rc.restoreTS { + if ts > lm.restoreTS { continue - } else if file.Cf == stream.WriteCF && ts < rc.startTS { + } else if file.Cf == consts.WriteCF && ts < lm.startTS { continue - } else if file.Cf == stream.DefaultCF && ts < rc.shiftStartTS { + } else if file.Cf == consts.DefaultCF && ts < lm.shiftStartTS { continue } @@ -428,11 +431,17 @@ func (rc *LogFileManager) ReadAllEntries( if ts < filterTS { kvEntries = append(kvEntries, &KvEntryWithTS{E: txnEntry, Ts: ts}) } else { - nextKvEntries = append(nextKvEntries, &KvEntryWithTS{E: txnEntry, Ts: ts}) + filteredOutKvEntries = append(filteredOutKvEntries, &KvEntryWithTS{E: txnEntry, Ts: ts}) } } - return kvEntries, nextKvEntries, nil + return kvEntries, filteredOutKvEntries, nil +} + +func (lm *LogFileManager) Close() { + if lm.helper != nil { + lm.helper.Close() + } } func Subcompactions(ctx context.Context, prefix string, s storage.ExternalStorage) SubCompactionIter { diff --git a/br/pkg/restore/log_client/log_file_manager_test.go b/br/pkg/restore/log_client/log_file_manager_test.go index 79813d6ef78f2..490c26f3ad6dc 100644 --- a/br/pkg/restore/log_client/log_file_manager_test.go +++ b/br/pkg/restore/log_client/log_file_manager_test.go @@ -23,6 +23,7 @@ import ( logclient "github.com/pingcap/tidb/br/pkg/restore/log_client" "github.com/pingcap/tidb/br/pkg/storage" "github.com/pingcap/tidb/br/pkg/stream" + "github.com/pingcap/tidb/br/pkg/utils/consts" "github.com/pingcap/tidb/br/pkg/utils/iter" "github.com/pingcap/tidb/pkg/kv" "github.com/pingcap/tidb/pkg/util/codec" @@ -55,7 +56,7 @@ func wr(start, end uint64, minBegin uint64) *backuppb.DataFileInfo { MinTs: start, MaxTs: end, MinBeginTsInDefaultCf: minBegin, - Cf: stream.WriteCF, + Cf: consts.WriteCF, } } @@ -66,7 +67,7 @@ func dr(start, end uint64) *backuppb.DataFileInfo { Path: fmt.Sprintf("write-%06d", id), MinTs: start, MaxTs: end, - Cf: stream.DefaultCF, + Cf: consts.DefaultCF, } } @@ -491,7 +492,7 @@ func testFileManagerWithMeta(t *testing.T, m metaMaker) { ), ).Item } else { - data, err := fm.LoadDDLFilesAndCountDMLFiles(ctx) + data, err := fm.LoadDDLFiles(ctx) req.NoError(err) r = data } @@ -618,8 +619,8 @@ func TestReadAllEntries(t *testing.T) { data, file := generateKvData() fm := logclient.TEST_NewLogFileManager(35, 75, 25, &logclient.FakeStreamMetadataHelper{Data: data}) { - file.Cf = stream.WriteCF - kvEntries, nextKvEntries, err := fm.ReadAllEntries(ctx, file, 50) + file.Cf = consts.WriteCF + kvEntries, nextKvEntries, err := fm.ReadFilteredEntriesFromFiles(ctx, file, 50) require.NoError(t, err) require.Equal(t, []*logclient.KvEntryWithTS{ encodekvEntryWithTS("mDDL", 37), @@ -631,8 +632,8 @@ func TestReadAllEntries(t *testing.T) { }, nextKvEntries) } { - file.Cf = stream.DefaultCF - kvEntries, nextKvEntries, err := fm.ReadAllEntries(ctx, file, 50) + file.Cf = consts.DefaultCF + kvEntries, nextKvEntries, err := fm.ReadFilteredEntriesFromFiles(ctx, file, 50) require.NoError(t, err) require.Equal(t, []*logclient.KvEntryWithTS{ encodekvEntryWithTS("mDDL", 27), diff --git a/br/pkg/restore/snap_client/client.go b/br/pkg/restore/snap_client/client.go index ae878b0e9e0ca..2e16f39e3312b 100644 --- a/br/pkg/restore/snap_client/client.go +++ b/br/pkg/restore/snap_client/client.go @@ -422,8 +422,8 @@ func makeDBPool(size uint, dbFactory func() (*tidallocdb.DB, error)) ([]*tidallo return dbPool, nil } -// Init create db connection and domain for storage. -func (rc *SnapClient) Init(g glue.Glue, store kv.Storage) error { +// InitConnections create db connection and domain for storage. +func (rc *SnapClient) InitConnections(g glue.Glue, store kv.Storage) error { // setDB must happen after set PolicyMode. // we will use policyMode to set session variables. var err error @@ -565,7 +565,7 @@ func (rc *SnapClient) initClients(ctx context.Context, backend *backuppb.Storage return nil } -func (rc *SnapClient) needLoadSchemas(backupMeta *backuppb.BackupMeta) bool { +func needLoadSchemas(backupMeta *backuppb.BackupMeta) bool { return !(backupMeta.IsRawKv || backupMeta.IsTxnKv) } @@ -579,7 +579,7 @@ func (rc *SnapClient) LoadSchemaIfNeededAndInitClient( RawStartKey []byte, RawEndKey []byte, ) error { - if rc.needLoadSchemas(backupMeta) { + if needLoadSchemas(backupMeta) { databases, err := metautil.LoadBackupTables(c, reader, loadStats) if err != nil { return errors.Trace(err) @@ -685,6 +685,15 @@ func (rc *SnapClient) GetDatabases() []*metautil.Database { return dbs } +// GetDatabaseMap returns all databases in a map indexed by db id +func (rc *SnapClient) GetDatabaseMap() map[int64]*metautil.Database { + dbMap := make(map[int64]*metautil.Database) + for _, db := range rc.databases { + dbMap[db.Info.ID] = db + } + return dbMap +} + // HasBackedUpSysDB whether we have backed up system tables // br backs system tables up since 5.1.0 func (rc *SnapClient) HasBackedUpSysDB() bool { diff --git a/br/pkg/restore/snap_client/client_test.go b/br/pkg/restore/snap_client/client_test.go index 4b96877949e23..ded9e6ea24ed6 100644 --- a/br/pkg/restore/snap_client/client_test.go +++ b/br/pkg/restore/snap_client/client_test.go @@ -49,7 +49,7 @@ func TestCreateTables(t *testing.T) { m := mc g := gluetidb.New() client := snapclient.NewRestoreClient(m.PDClient, m.PDHTTPCli, nil, split.DefaultTestKeepaliveCfg) - err := client.Init(g, m.Storage) + err := client.InitConnections(g, m.Storage) require.NoError(t, err) info, err := m.Domain.GetSnapshotInfoSchema(math.MaxUint64) @@ -120,7 +120,7 @@ func TestNeedCheckTargetClusterFresh(t *testing.T) { g := gluetidb.New() client := snapclient.NewRestoreClient(cluster.PDClient, cluster.PDHTTPCli, nil, split.DefaultTestKeepaliveCfg) - err := client.Init(g, cluster.Storage) + err := client.InitConnections(g, cluster.Storage) require.NoError(t, err) // not set filter and first run with checkpoint @@ -150,7 +150,7 @@ func TestCheckTargetClusterFresh(t *testing.T) { g := gluetidb.New() client := snapclient.NewRestoreClient(cluster.PDClient, cluster.PDHTTPCli, nil, split.DefaultTestKeepaliveCfg) - err := client.Init(g, cluster.Storage) + err := client.InitConnections(g, cluster.Storage) require.NoError(t, err) ctx := context.Background() @@ -167,7 +167,7 @@ func TestCheckTargetClusterFreshWithTable(t *testing.T) { g := gluetidb.New() client := snapclient.NewRestoreClient(cluster.PDClient, cluster.PDHTTPCli, nil, split.DefaultTestKeepaliveCfg) - err := client.Init(g, cluster.Storage) + err := client.InitConnections(g, cluster.Storage) require.NoError(t, err) ctx := context.Background() @@ -202,7 +202,7 @@ func TestInitFullClusterRestore(t *testing.T) { cluster := mc g := gluetidb.New() client := snapclient.NewRestoreClient(cluster.PDClient, cluster.PDHTTPCli, nil, split.DefaultTestKeepaliveCfg) - err := client.Init(g, cluster.Storage) + err := client.InitConnections(g, cluster.Storage) require.NoError(t, err) // explicit filter diff --git a/br/pkg/restore/snap_client/systable_restore_test.go b/br/pkg/restore/snap_client/systable_restore_test.go index 6142a270745db..3aa9fb6f87801 100644 --- a/br/pkg/restore/snap_client/systable_restore_test.go +++ b/br/pkg/restore/snap_client/systable_restore_test.go @@ -36,7 +36,7 @@ func TestCheckSysTableCompatibility(t *testing.T) { cluster := mc g := gluetidb.New() client := snapclient.NewRestoreClient(cluster.PDClient, cluster.PDHTTPCli, nil, split.DefaultTestKeepaliveCfg) - err := client.Init(g, cluster.Storage) + err := client.InitConnections(g, cluster.Storage) require.NoError(t, err) info, err := cluster.Domain.GetSnapshotInfoSchema(math.MaxUint64) diff --git a/br/pkg/restore/tiflashrec/tiflash_recorder.go b/br/pkg/restore/tiflashrec/tiflash_recorder.go index c87f0372f86a6..8cdcfa10182f5 100644 --- a/br/pkg/restore/tiflashrec/tiflash_recorder.go +++ b/br/pkg/restore/tiflashrec/tiflash_recorder.go @@ -130,15 +130,15 @@ func (r *TiFlashRecorder) GenerateResetAlterTableDDLs(info infoschema.InfoSchema func (r *TiFlashRecorder) GenerateAlterTableDDLs(info infoschema.InfoSchema) []string { items := make([]string, 0, len(r.items)) - r.Iterate(func(id int64, replica model.TiFlashReplicaInfo) { - table, ok := info.TableByID(context.Background(), id) + r.Iterate(func(tableId int64, replica model.TiFlashReplicaInfo) { + table, ok := info.TableByID(context.Background(), tableId) if !ok { - log.Warn("Table do not exist, skipping", zap.Int64("id", id)) + log.Warn("Table does not exist, might get filtered out if a custom filter is specified, skipping", zap.Int64("tableId", tableId)) return } schema, ok := infoschema.SchemaByTable(info, table.Meta()) if !ok { - log.Warn("Schema do not exist, skipping", zap.Int64("id", id), zap.Stringer("table", table.Meta().Name)) + log.Warn("Schema do not exist, skipping", zap.Int64("tableId", tableId), zap.Stringer("table", table.Meta().Name)) return } altTableSpec, err := alterTableSpecOf(replica, false) diff --git a/br/pkg/stream/BUILD.bazel b/br/pkg/stream/BUILD.bazel index 252f789c78b75..e9c52b98d0435 100644 --- a/br/pkg/stream/BUILD.bazel +++ b/br/pkg/stream/BUILD.bazel @@ -10,8 +10,8 @@ go_library( "stream_metas.go", "stream_mgr.go", "stream_status.go", + "table_history.go", "table_mapping.go", - "util.go", ], importpath = "github.com/pingcap/tidb/br/pkg/stream", visibility = ["//visibility:public"], @@ -25,6 +25,8 @@ go_library( "//br/pkg/restore/tiflashrec", "//br/pkg/storage", "//br/pkg/streamhelper", + "//br/pkg/utils", + "//br/pkg/utils/consts", "//br/pkg/utils/iter", "//pkg/ddl", "//pkg/kv", @@ -63,7 +65,6 @@ go_test( "stream_metas_test.go", "stream_misc_test.go", "table_mapping_test.go", - "util_test.go", ], embed = [":stream"], flaky = True, @@ -71,7 +72,10 @@ go_test( deps = [ "//br/pkg/storage", "//br/pkg/streamhelper", + "//br/pkg/utils", + "//br/pkg/utils/consts", "//pkg/ddl", + "//pkg/kv", "//pkg/meta", "//pkg/meta/model", "//pkg/parser/ast", @@ -88,7 +92,6 @@ go_test( "@com_github_pingcap_kvproto//pkg/brpb", "@com_github_pingcap_log//:log", "@com_github_stretchr_testify//require", - "@com_github_tikv_client_go_v2//oracle", "@org_golang_x_exp//maps", "@org_uber_go_multierr//:multierr", "@org_uber_go_zap//:zap", diff --git a/br/pkg/stream/logging_helper.go b/br/pkg/stream/logging_helper.go new file mode 100644 index 0000000000000..b67571aa77a47 --- /dev/null +++ b/br/pkg/stream/logging_helper.go @@ -0,0 +1,40 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package stream + +import ( + "github.com/pingcap/log" + "go.uber.org/zap" + "go.uber.org/zap/zapcore" +) + +func LogDBReplaceMap(title string, dbReplaces map[UpstreamID]*DBReplace) { + for upstreamDbId, dbReplace := range dbReplaces { + log.Info(title, func() []zapcore.Field { + fields := make([]zapcore.Field, 0, (len(dbReplace.TableMap)+1)*3) + fields = append(fields, + zap.String("dbName", dbReplace.Name), + zap.Int64("upstreamId", upstreamDbId), + zap.Int64("downstreamId", dbReplace.DbID)) + for upstreamTableID, tableReplace := range dbReplace.TableMap { + fields = append(fields, + zap.String("table", tableReplace.Name), + zap.Int64("upstreamId", upstreamTableID), + zap.Int64("downstreamId", tableReplace.TableID)) + } + return fields + }()...) + } +} diff --git a/br/pkg/stream/meta_kv_test.go b/br/pkg/stream/meta_kv_test.go index 0ac5b54763022..9f64d69b3f5f6 100644 --- a/br/pkg/stream/meta_kv_test.go +++ b/br/pkg/stream/meta_kv_test.go @@ -6,18 +6,12 @@ import ( "bytes" "testing" + "github.com/pingcap/tidb/br/pkg/utils" "github.com/pingcap/tidb/pkg/meta" - "github.com/pingcap/tidb/pkg/tablecodec" "github.com/pingcap/tidb/pkg/util/codec" "github.com/stretchr/testify/require" ) -func encodeTxnMetaKey(key []byte, field []byte, ts uint64) []byte { - k := tablecodec.EncodeMetaKey(key, field) - txnKey := codec.EncodeBytes(nil, k) - return codec.EncodeUintDesc(txnKey, ts) -} - func TestRawMetaKeyForDB(t *testing.T) { var ( dbID int64 = 1 @@ -25,7 +19,7 @@ func TestRawMetaKeyForDB(t *testing.T) { mDbs = []byte("DBs") ) - txnKey := encodeTxnMetaKey(mDbs, meta.DBkey(dbID), ts) + txnKey := utils.EncodeTxnMetaKey(mDbs, meta.DBkey(dbID), ts) rawMetaKey, err := ParseTxnMetaKeyFrom(txnKey) require.NoError(t, err) @@ -44,7 +38,7 @@ func TestRawMetaKeyForTable(t *testing.T) { tableID int64 = 57 ts uint64 = 400036290571534337 ) - txnKey := encodeTxnMetaKey(meta.DBkey(dbID), meta.TableKey(tableID), ts) + txnKey := utils.EncodeTxnMetaKey(meta.DBkey(dbID), meta.TableKey(tableID), ts) rawMetakey, err := ParseTxnMetaKeyFrom(txnKey) require.NoError(t, err) diff --git a/br/pkg/stream/rewrite_meta_rawkv.go b/br/pkg/stream/rewrite_meta_rawkv.go index 94e05221b2424..50f7692b5605c 100644 --- a/br/pkg/stream/rewrite_meta_rawkv.go +++ b/br/pkg/stream/rewrite_meta_rawkv.go @@ -21,27 +21,22 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/log" - berrors "github.com/pingcap/tidb/br/pkg/errors" "github.com/pingcap/tidb/br/pkg/restore/ingestrec" "github.com/pingcap/tidb/br/pkg/restore/tiflashrec" + "github.com/pingcap/tidb/br/pkg/utils" + "github.com/pingcap/tidb/br/pkg/utils/consts" "github.com/pingcap/tidb/pkg/ddl" "github.com/pingcap/tidb/pkg/kv" "github.com/pingcap/tidb/pkg/meta" "github.com/pingcap/tidb/pkg/meta/model" - filter "github.com/pingcap/tidb/pkg/util/table-filter" "go.uber.org/zap" ) -// Default columnFamily and write columnFamily -const ( - DefaultCF = "default" - WriteCF = "write" -) - +type RewriteStatus int type UpstreamID = int64 type DownstreamID = int64 -// TableReplace specifies table information mapping from up-stream cluster to up-stream cluster. +// TableReplace specifies table information mapping from up-stream cluster to down-stream cluster. type TableReplace struct { Name string TableID DownstreamID @@ -49,24 +44,23 @@ type TableReplace struct { IndexMap map[UpstreamID]DownstreamID } -// DBReplace specifies database information mapping from up-stream cluster to up-stream cluster. +// DBReplace specifies database information mapping from up-stream cluster to down-stream cluster. type DBReplace struct { Name string DbID DownstreamID TableMap map[UpstreamID]*TableReplace } -// SchemasReplace specifies schemas information mapping from up-stream cluster to up-stream cluster. +// SchemasReplace specifies schemas information mapping from up-stream cluster to down-stream cluster. type SchemasReplace struct { - DbMap map[UpstreamID]*DBReplace + DbReplaceMap map[UpstreamID]*DBReplace delRangeRecorder *brDelRangeExecWrapper ingestRecorder *ingestrec.IngestRecorder TiflashRecorder *tiflashrec.TiFlashRecorder - RewriteTS uint64 // used to rewrite commit ts in meta kv. - TableFilter filter.Filter // used to filter schema/table + RewriteTS uint64 // used to rewrite commit ts in meta kv. - AfterTableRewritten func(deleted bool, tableInfo *model.TableInfo) + AfterTableRewrittenFn func(deleted bool, tableInfo *model.TableInfo) } // NewTableReplace creates a TableReplace struct. @@ -90,14 +84,13 @@ func NewDBReplace(name string, newID DownstreamID) *DBReplace { // NewSchemasReplace creates a SchemasReplace struct. func NewSchemasReplace( - dbMap map[UpstreamID]*DBReplace, + dbReplaceMap map[UpstreamID]*DBReplace, tiflashRecorder *tiflashrec.TiFlashRecorder, restoreTS uint64, - tableFilter filter.Filter, recordDeleteRange func(*PreDelRangeQuery), ) *SchemasReplace { globalTableIdMap := make(map[UpstreamID]DownstreamID) - for _, dr := range dbMap { + for _, dr := range dbReplaceMap { for tblID, tr := range dr.TableMap { globalTableIdMap[tblID] = tr.TableID for oldpID, newpID := range tr.PartitionMap { @@ -107,12 +100,11 @@ func NewSchemasReplace( } return &SchemasReplace{ - DbMap: dbMap, + DbReplaceMap: dbReplaceMap, delRangeRecorder: newDelRangeExecWrapper(globalTableIdMap, recordDeleteRange), ingestRecorder: ingestrec.New(), TiflashRecorder: tiflashRecorder, RewriteTS: restoreTS, - TableFilter: tableFilter, } } @@ -127,14 +119,14 @@ func (sr *SchemasReplace) rewriteKeyForDB(key []byte, cf string) ([]byte, error) return nil, errors.Trace(err) } - dbMap, exist := sr.DbMap[dbID] + dbMap, exist := sr.DbReplaceMap[dbID] if !exist { // db filtered out return nil, nil } rawMetaKey.UpdateField(meta.DBkey(dbMap.DbID)) - if cf == WriteCF { + if cf == consts.WriteCF { rawMetaKey.UpdateTS(sr.RewriteTS) } return rawMetaKey.EncodeMetaKey(), nil @@ -146,7 +138,7 @@ func (sr *SchemasReplace) rewriteDBInfo(value []byte) ([]byte, error) { return nil, errors.Trace(err) } - dbMap, exist := sr.DbMap[dbInfo.ID] + dbMap, exist := sr.DbReplaceMap[dbInfo.ID] if !exist { // db filtered out return nil, nil @@ -206,7 +198,7 @@ func (sr *SchemasReplace) rewriteKeyForTable( return nil, errors.Trace(err) } - dbReplace, exist := sr.DbMap[dbID] + dbReplace, exist := sr.DbReplaceMap[dbID] if !exist { // db filtered out return nil, nil @@ -217,10 +209,9 @@ func (sr *SchemasReplace) rewriteKeyForTable( // table filtered out return nil, nil } - rawMetaKey.UpdateKey(meta.DBkey(dbReplace.DbID)) rawMetaKey.UpdateField(encodeField(tableReplace.TableID)) - if cf == WriteCF { + if cf == consts.WriteCF { rawMetaKey.UpdateTS(sr.RewriteTS) } return rawMetaKey.EncodeMetaKey(), nil @@ -239,7 +230,7 @@ func (sr *SchemasReplace) rewriteTableInfo(value []byte, dbID int64) ([]byte, er } // construct or find the id map. - dbReplace, exist = sr.DbMap[dbID] + dbReplace, exist = sr.DbReplaceMap[dbID] if !exist { // db filtered out return nil, nil @@ -269,8 +260,8 @@ func (sr *SchemasReplace) rewriteTableInfo(value []byte, dbID int64) ([]byte, er if tableInfo.TTLInfo != nil { tableInfo.TTLInfo.Enable = false } - if sr.AfterTableRewritten != nil { - sr.AfterTableRewritten(false, &tableInfo) + if sr.AfterTableRewrittenFn != nil { + sr.AfterTableRewrittenFn(false, &tableInfo) } // marshal to json @@ -311,8 +302,8 @@ func (sr *SchemasReplace) rewriteEntryForTable(e *kv.Entry, cf string) (*kv.Entr // for now, we rewrite key and value separately hence we cannot // get a view of (is_delete, table_id, table_info) at the same time :(. // Maybe we can extract the rewrite part from rewriteTableInfo. - if result.Deleted && sr.AfterTableRewritten != nil { - sr.AfterTableRewritten(true, &model.TableInfo{ID: newTableID}) + if result.Deleted && sr.AfterTableRewrittenFn != nil { + sr.AfterTableRewrittenFn(true, &model.TableInfo{ID: newTableID}) } return &kv.Entry{Key: newKey, Value: result.NewValue}, nil @@ -380,10 +371,10 @@ type rewriteResult struct { } // rewriteValue rewrite the value if cf is "default", or rewrite the shortValue if cf is "write". -func (sr *SchemasReplace) rewriteValue(value []byte, cf string, rewrite func([]byte) ([]byte, error)) (rewriteResult, error) { +func (sr *SchemasReplace) rewriteValue(value []byte, cf string, rewriteFunc func([]byte) ([]byte, error)) (rewriteResult, error) { switch cf { - case DefaultCF: - newValue, err := rewrite(value) + case consts.DefaultCF: + newValue, err := rewriteFunc(value) if err != nil { return rewriteResult{}, errors.Trace(err) } @@ -391,7 +382,7 @@ func (sr *SchemasReplace) rewriteValue(value []byte, cf string, rewrite func([]b NewValue: newValue, Deleted: false, }, nil - case WriteCF: + case consts.WriteCF: rawWriteCFValue := new(RawWriteCFValue) if err := rawWriteCFValue.ParseFrom(value); err != nil { return rewriteResult{}, errors.Trace(err) @@ -415,7 +406,7 @@ func (sr *SchemasReplace) rewriteValue(value []byte, cf string, rewrite func([]b }, nil } - shortValue, err := rewrite(rawWriteCFValue.GetShortValue()) + shortValue, err := rewriteFunc(rawWriteCFValue.GetShortValue()) if err != nil { log.Info("failed to rewrite short value", zap.ByteString("write-type", []byte{rawWriteCFValue.GetWriteType()}), @@ -434,11 +425,12 @@ func (sr *SchemasReplace) GetIngestRecorder() *ingestrec.IngestRecorder { return sr.ingestRecorder } -// RewriteKvEntry uses to rewrite tableID/dbID in entry.key and entry.value -func (sr *SchemasReplace) RewriteKvEntry(e *kv.Entry, cf string) (*kv.Entry, error) { - // skip mDDLJob - if !IsMetaDBKey(e.Key) { - if cf == DefaultCF && IsMetaDDLJobHistoryKey(e.Key) { // mDDLJobHistory +// RewriteMetaKvEntry uses to rewrite tableID/dbID in entry.key and entry.value +func (sr *SchemasReplace) RewriteMetaKvEntry(e *kv.Entry, cf string) (*kv.Entry, error) { + if !utils.IsMetaDBKey(e.Key) { + // need to special handle ddl job history during actual restore phase. The job history contains index ingestion + // and range deletion that need to be handled separately after restore. + if cf == consts.DefaultCF && utils.IsMetaDDLJobHistoryKey(e.Key) { // mDDLJobHistory job := &model.Job{} if err := job.Decode(e.Value); err != nil { log.Debug("failed to decode the job", @@ -448,7 +440,7 @@ func (sr *SchemasReplace) RewriteKvEntry(e *kv.Entry, cf string) (*kv.Entry, err return nil, nil } - return nil, sr.restoreFromHistory(job) + return nil, sr.processIngestIndexAndDeleteRangeFromJob(job) } return nil, nil } @@ -463,6 +455,7 @@ func (sr *SchemasReplace) RewriteKvEntry(e *kv.Entry, cf string) (*kv.Entry, err } else if !meta.IsDBkey(rawKey.Key) { return nil, nil } + if meta.IsTableKey(rawKey.Field) { return sr.rewriteEntryForTable(e, cf) } else if meta.IsAutoIncrementIDKey(rawKey.Field) { @@ -492,7 +485,10 @@ func (sr *SchemasReplace) tryRecordIngestIndex(job *model.Job) error { return nil } -func (sr *SchemasReplace) restoreFromHistory(job *model.Job) error { +// processIngestIndexAndDeleteRangeFromJob handles two special cases during log backup meta key replay. +// 1. index ingestion is not captured by the log backup, thus we need to restore them manually later +// 2. delete range also needs to be handled to clean up dropped table since it was previously relying on GC to clean it up +func (sr *SchemasReplace) processIngestIndexAndDeleteRangeFromJob(job *model.Job) error { if ddl.JobNeedGC(job) { if err := ddl.AddDelRangeJobInternal(context.TODO(), sr.delRangeRecorder, job); err != nil { return err @@ -549,7 +545,8 @@ func (bdr *brDelRangeExecWrapper) PrepareParamsList(sz int) { func (bdr *brDelRangeExecWrapper) RewriteTableID(tableID int64) (int64, bool) { newTableID, exists := bdr.globalTableIdMap[tableID] if !exists { - log.Warn("failed to find the downstream id when rewrite delete range", zap.Int64("old tableID", tableID)) + log.Warn("failed to find the downstream id when rewrite delete range, "+ + "it might due to table has been filtered out if filters have been specified", zap.Int64("old tableID", tableID)) } return newTableID, exists } diff --git a/br/pkg/stream/rewrite_meta_rawkv_test.go b/br/pkg/stream/rewrite_meta_rawkv_test.go index 81d21e5e5b5a7..be49b313378df 100644 --- a/br/pkg/stream/rewrite_meta_rawkv_test.go +++ b/br/pkg/stream/rewrite_meta_rawkv_test.go @@ -7,6 +7,8 @@ import ( "encoding/json" "testing" + "github.com/pingcap/tidb/br/pkg/utils" + "github.com/pingcap/tidb/br/pkg/utils/consts" "github.com/pingcap/tidb/pkg/ddl" "github.com/pingcap/tidb/pkg/meta" "github.com/pingcap/tidb/pkg/meta/model" @@ -15,7 +17,6 @@ import ( "github.com/pingcap/tidb/pkg/parser/mysql" "github.com/pingcap/tidb/pkg/tablecodec" "github.com/pingcap/tidb/pkg/types" - filter "github.com/pingcap/tidb/pkg/util/table-filter" "github.com/stretchr/testify/require" ) @@ -30,7 +31,6 @@ func MockEmptySchemasReplace(midr *mockInsertDeleteRange, dbMap map[UpstreamID]* dbMap, nil, 9527, - filter.All(), midr.mockRecordDeleteRange, ) } @@ -60,7 +60,7 @@ func TestRewriteKeyForDB(t *testing.T) { mDbs = []byte("DBs") ) - encodedKey := encodeTxnMetaKey(mDbs, meta.DBkey(dbID), ts) + encodedKey := utils.EncodeTxnMetaKey(mDbs, meta.DBkey(dbID), ts) dbMap := make(map[UpstreamID]*DBReplace) downstreamID := dbID + 100 @@ -70,7 +70,7 @@ func TestRewriteKeyForDB(t *testing.T) { sr := MockEmptySchemasReplace(nil, dbMap) // set restoreKV status and rewrite it. - newKey, err := sr.rewriteKeyForDB(encodedKey, DefaultCF) + newKey, err := sr.rewriteKeyForDB(encodedKey, consts.DefaultCF) require.Nil(t, err) decodedKey, err := ParseTxnMetaKeyFrom(newKey) require.Nil(t, err) @@ -80,7 +80,7 @@ func TestRewriteKeyForDB(t *testing.T) { require.Equal(t, newDBID, downstreamID) // rewrite it again, and get the same result. - newKey, err = sr.rewriteKeyForDB(encodedKey, WriteCF) + newKey, err = sr.rewriteKeyForDB(encodedKey, consts.WriteCF) require.Nil(t, err) decodedKey, err = ParseTxnMetaKeyFrom(newKey) require.Nil(t, err) @@ -111,16 +111,16 @@ func TestRewriteDBInfo(t *testing.T) { require.Nil(t, err) err = json.Unmarshal(newValue, &DBInfo) require.Nil(t, err) - require.Equal(t, DBInfo.ID, sr.DbMap[dbID].DbID) + require.Equal(t, DBInfo.ID, sr.DbReplaceMap[dbID].DbID) // rewrite again, and get the same result. - newId := sr.DbMap[dbID].DbID + newId := sr.DbReplaceMap[dbID].DbID newValue, err = sr.rewriteDBInfo(value) require.Nil(t, err) err = json.Unmarshal(newValue, &DBInfo) require.Nil(t, err) - require.Equal(t, DBInfo.ID, sr.DbMap[dbID].DbID) - require.Equal(t, newId, sr.DbMap[dbID].DbID) + require.Equal(t, DBInfo.ID, sr.DbReplaceMap[dbID].DbID) + require.Equal(t, newId, sr.DbReplaceMap[dbID].DbID) } func TestRewriteKeyForTable(t *testing.T) { @@ -158,7 +158,7 @@ func TestRewriteKeyForTable(t *testing.T) { } for _, ca := range cases { - encodedKey := encodeTxnMetaKey(meta.DBkey(dbID), ca.encodeTableFn(tableID), ts) + encodedKey := utils.EncodeTxnMetaKey(meta.DBkey(dbID), ca.encodeTableFn(tableID), ts) dbMap := make(map[UpstreamID]*DBReplace) downStreamDbID := dbID + 100 @@ -170,7 +170,7 @@ func TestRewriteKeyForTable(t *testing.T) { sr := MockEmptySchemasReplace(nil, dbMap) // set restoreKV status and rewrite it. - newKey, err := sr.rewriteKeyForTable(encodedKey, DefaultCF, ca.decodeTableFn, ca.encodeTableFn) + newKey, err := sr.rewriteKeyForTable(encodedKey, consts.DefaultCF, ca.decodeTableFn, ca.encodeTableFn) require.Nil(t, err) decodedKey, err := ParseTxnMetaKeyFrom(newKey) require.Nil(t, err) @@ -184,7 +184,7 @@ func TestRewriteKeyForTable(t *testing.T) { require.Equal(t, newTblID, downStreamTblID) // rewrite it again, and get the same result. - newKey, err = sr.rewriteKeyForTable(encodedKey, WriteCF, ca.decodeTableFn, ca.encodeTableFn) + newKey, err = sr.rewriteKeyForTable(encodedKey, consts.WriteCF, ca.decodeTableFn, ca.encodeTableFn) require.Nil(t, err) decodedKey, err = ParseTxnMetaKeyFrom(newKey) require.Nil(t, err) @@ -218,7 +218,7 @@ func TestRewriteTableInfo(t *testing.T) { // create schemasReplace. sr := MockEmptySchemasReplace(nil, dbMap) tableCount := 0 - sr.AfterTableRewritten = func(deleted bool, tableInfo *model.TableInfo) { + sr.AfterTableRewrittenFn = func(deleted bool, tableInfo *model.TableInfo) { tableCount++ tableInfo.TiFlashReplica = &model.TiFlashReplicaInfo{ Count: 1, @@ -230,17 +230,17 @@ func TestRewriteTableInfo(t *testing.T) { require.Nil(t, err) err = json.Unmarshal(newValue, &tableInfo) require.Nil(t, err) - require.Equal(t, tableInfo.ID, sr.DbMap[dbId].TableMap[tableID].TableID) + require.Equal(t, tableInfo.ID, sr.DbReplaceMap[dbId].TableMap[tableID].TableID) require.EqualValues(t, tableInfo.TiFlashReplica.Count, 1) // rewrite it again and get the same result. - newID := sr.DbMap[dbId].TableMap[tableID].TableID + newID := sr.DbReplaceMap[dbId].TableMap[tableID].TableID newValue, err = sr.rewriteTableInfo(value, dbId) require.Nil(t, err) err = json.Unmarshal(newValue, &tableInfo) require.Nil(t, err) - require.Equal(t, tableInfo.ID, sr.DbMap[dbId].TableMap[tableID].TableID) - require.Equal(t, newID, sr.DbMap[dbId].TableMap[tableID].TableID) + require.Equal(t, tableInfo.ID, sr.DbReplaceMap[dbId].TableMap[tableID].TableID) + require.Equal(t, newID, sr.DbReplaceMap[dbId].TableMap[tableID].TableID) require.EqualValues(t, tableCount, 2) } @@ -292,7 +292,6 @@ func TestRewriteTableInfoForPartitionTable(t *testing.T) { dbMap, nil, 0, - filter.All(), nil, ) @@ -302,11 +301,11 @@ func TestRewriteTableInfoForPartitionTable(t *testing.T) { err = json.Unmarshal(newValue, &tableInfo) require.Nil(t, err) require.Equal(t, tableInfo.Name.String(), tableName) - require.Equal(t, tableInfo.ID, sr.DbMap[dbId].TableMap[tableID].TableID) + require.Equal(t, tableInfo.ID, sr.DbReplaceMap[dbId].TableMap[tableID].TableID) require.Equal( t, tableInfo.Partition.Definitions[0].ID, - sr.DbMap[dbId].TableMap[tableID].PartitionMap[pt1ID], + sr.DbReplaceMap[dbId].TableMap[tableID].PartitionMap[pt1ID], ) require.Equal( t, @@ -316,7 +315,7 @@ func TestRewriteTableInfoForPartitionTable(t *testing.T) { require.Equal( t, tableInfo.Partition.Definitions[1].ID, - sr.DbMap[dbId].TableMap[tableID].PartitionMap[pt2ID], + sr.DbReplaceMap[dbId].TableMap[tableID].PartitionMap[pt2ID], ) require.Equal( t, @@ -325,8 +324,8 @@ func TestRewriteTableInfoForPartitionTable(t *testing.T) { ) // rewrite it aggin, and get the same result. - newID1 := sr.DbMap[dbId].TableMap[tableID].PartitionMap[pt1ID] - newID2 := sr.DbMap[dbId].TableMap[tableID].PartitionMap[pt2ID] + newID1 := sr.DbReplaceMap[dbId].TableMap[tableID].PartitionMap[pt1ID] + newID2 := sr.DbReplaceMap[dbId].TableMap[tableID].PartitionMap[pt2ID] newValue, err = sr.rewriteTableInfo(value, dbId) require.Nil(t, err) @@ -336,13 +335,13 @@ func TestRewriteTableInfoForPartitionTable(t *testing.T) { require.Equal( t, tableInfo.Partition.Definitions[0].ID, - sr.DbMap[dbId].TableMap[tableID].PartitionMap[pt1ID], + sr.DbReplaceMap[dbId].TableMap[tableID].PartitionMap[pt1ID], ) require.Equal(t, tableInfo.Partition.Definitions[0].ID, newID1) require.Equal( t, tableInfo.Partition.Definitions[1].ID, - sr.DbMap[dbId].TableMap[tableID].PartitionMap[pt2ID], + sr.DbReplaceMap[dbId].TableMap[tableID].PartitionMap[pt2ID], ) require.Equal(t, tableInfo.Partition.Definitions[1].ID, newID2) } @@ -402,7 +401,8 @@ func TestRewriteTableInfoForExchangePartition(t *testing.T) { dbMap[dbID2] = NewDBReplace(db2.Name.O, dbID2+100) dbMap[dbID2].TableMap[tableID2] = NewTableReplace(t2.Name.O, tableID2+100) - tc := NewTableMappingManager(dbMap, mockGenGenGlobalID) + tc := NewTableMappingManager() + tc.MergeBaseDBReplace(dbMap) //exchange partition, t1 partition0 with the t2 t1Copy := t1.Clone() @@ -412,14 +412,13 @@ func TestRewriteTableInfoForExchangePartition(t *testing.T) { value, err := json.Marshal(&t1Copy) require.Nil(t, err) - err = tc.parseTableValueAndUpdateIdMapping(dbID1, value) + err = tc.ProcessTableEntryAndUpdateIdMapping(dbID1, *t1Copy) require.Nil(t, err) sr := NewSchemasReplace( - tc.DbReplaceMap, + tc.DBReplaceMap, nil, 0, - filter.All(), nil, ) @@ -435,7 +434,7 @@ func TestRewriteTableInfoForExchangePartition(t *testing.T) { // rewrite no partition table value, err = json.Marshal(&t2Copy) require.Nil(t, err) - err = tc.parseTableValueAndUpdateIdMapping(dbID2, value) + err = tc.ProcessTableEntryAndUpdateIdMapping(dbID2, *t2Copy) require.Nil(t, err) value, err = sr.rewriteTableInfo(value, dbID2) require.Nil(t, err) @@ -489,7 +488,7 @@ func TestRewriteTableInfoForTTLTable(t *testing.T) { err = json.Unmarshal(newValue, &tableInfo) require.Nil(t, err) require.Equal(t, tableInfo.Name.String(), tableName) - require.Equal(t, tableInfo.ID, sr.DbMap[dbId].TableMap[tableID].TableID) + require.Equal(t, tableInfo.ID, sr.DbReplaceMap[dbId].TableMap[tableID].TableID) require.NotNil(t, tableInfo.TTLInfo) require.Equal(t, colName, tableInfo.TTLInfo.ColumnName.O) require.Equal(t, "1", tableInfo.TTLInfo.IntervalExprStr) @@ -706,7 +705,7 @@ func TestDeleteRangeForMDDLJob(t *testing.T) { var qargs *PreDelRangeQuery // drop schema - err := schemaReplace.restoreFromHistory(dropSchemaJob) + err := schemaReplace.processIngestIndexAndDeleteRangeFromJob(dropSchemaJob) require.NoError(t, err) qargs = <-midr.queryCh require.Equal(t, len(qargs.ParamsList), len(mDDLJobALLNewTableIDSet)) @@ -716,7 +715,7 @@ func TestDeleteRangeForMDDLJob(t *testing.T) { } // drop table0 - err = schemaReplace.restoreFromHistory(dropTable0Job) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(dropTable0Job) require.NoError(t, err) qargs = <-midr.queryCh require.Equal(t, len(qargs.ParamsList), len(mDDLJobALLNewPartitionIDSet)) @@ -729,42 +728,42 @@ func TestDeleteRangeForMDDLJob(t *testing.T) { require.Equal(t, qargs.ParamsList[0].StartKey, encodeTableKey(mDDLJobTable0NewID)) // drop table1 - err = schemaReplace.restoreFromHistory(dropTable1Job) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(dropTable1Job) require.NoError(t, err) qargs = <-midr.queryCh require.Equal(t, len(qargs.ParamsList), 1) require.Equal(t, qargs.ParamsList[0].StartKey, encodeTableKey(mDDLJobTable1NewID)) // drop table partition1 - err = schemaReplace.restoreFromHistory(dropTable0Partition1Job) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(dropTable0Partition1Job) require.NoError(t, err) qargs = <-midr.queryCh require.Equal(t, len(qargs.ParamsList), 1) require.Equal(t, qargs.ParamsList[0].StartKey, encodeTableKey(mDDLJobPartition1NewID)) // reorganize table partition1 - err = schemaReplace.restoreFromHistory(reorganizeTable0Partition1Job) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(reorganizeTable0Partition1Job) require.NoError(t, err) qargs = <-midr.queryCh require.Equal(t, len(qargs.ParamsList), 1) require.Equal(t, encodeTableKey(mDDLJobPartition1NewID), qargs.ParamsList[0].StartKey) // remove table partition1 - err = schemaReplace.restoreFromHistory(removeTable0Partition1Job) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(removeTable0Partition1Job) require.NoError(t, err) qargs = <-midr.queryCh require.Equal(t, len(qargs.ParamsList), 1) require.Equal(t, encodeTableKey(mDDLJobPartition1NewID), qargs.ParamsList[0].StartKey) // alter table partition1 - err = schemaReplace.restoreFromHistory(alterTable0Partition1Job) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(alterTable0Partition1Job) require.NoError(t, err) qargs = <-midr.queryCh require.Equal(t, len(qargs.ParamsList), 1) require.Equal(t, encodeTableKey(mDDLJobPartition1NewID), qargs.ParamsList[0].StartKey) // roll back add index for table0 - err = schemaReplace.restoreFromHistory(rollBackTable0IndexJob) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(rollBackTable0IndexJob) require.NoError(t, err) oldPartitionIDMap := make(map[string]struct{}) for i := 0; i < len(mDDLJobALLNewPartitionIDSet); i++ { @@ -785,7 +784,7 @@ func TestDeleteRangeForMDDLJob(t *testing.T) { } // roll back add index for table1 - err = schemaReplace.restoreFromHistory(rollBackTable1IndexJob) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(rollBackTable1IndexJob) require.NoError(t, err) qargs = <-midr.queryCh require.Equal(t, len(qargs.ParamsList), 2) @@ -795,7 +794,7 @@ func TestDeleteRangeForMDDLJob(t *testing.T) { require.Equal(t, encodeTableIndexKey(mDDLJobTable1NewID, int64(tablecodec.TempIndexPrefix|2)), qargs.ParamsList[1].StartKey) // drop index for table0 - err = schemaReplace.restoreFromHistory(dropTable0IndexJob) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(dropTable0IndexJob) require.NoError(t, err) oldPartitionIDMap = make(map[string]struct{}) for i := 0; i < len(mDDLJobALLNewPartitionIDSet); i++ { @@ -809,14 +808,14 @@ func TestDeleteRangeForMDDLJob(t *testing.T) { } // drop index for table1 - err = schemaReplace.restoreFromHistory(dropTable1IndexJob) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(dropTable1IndexJob) require.NoError(t, err) qargs = <-midr.queryCh require.Equal(t, len(qargs.ParamsList), 1) require.Equal(t, encodeTableIndexKey(mDDLJobTable1NewID, int64(2)), qargs.ParamsList[0].StartKey) // add index for table 0 - err = schemaReplace.restoreFromHistory(addTable0IndexJob) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(addTable0IndexJob) require.NoError(t, err) oldPartitionIDMap = make(map[string]struct{}) for i := 0; i < len(mDDLJobALLNewPartitionIDSet); i++ { @@ -830,14 +829,14 @@ func TestDeleteRangeForMDDLJob(t *testing.T) { } // add index for table 1 - err = schemaReplace.restoreFromHistory(addTable1IndexJob) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(addTable1IndexJob) require.NoError(t, err) qargs = <-midr.queryCh require.Equal(t, len(qargs.ParamsList), 1) require.Equal(t, encodeTableIndexKey(mDDLJobTable1NewID, tempIndex2), qargs.ParamsList[0].StartKey) // drop column for table0 - err = schemaReplace.restoreFromHistory(dropTable0ColumnJob) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(dropTable0ColumnJob) require.NoError(t, err) oldPartitionIDMap = make(map[string]struct{}) for i := 0; i < len(mDDLJobALLNewPartitionIDSet); i++ { @@ -858,7 +857,7 @@ func TestDeleteRangeForMDDLJob(t *testing.T) { } // drop column for table1 - err = schemaReplace.restoreFromHistory(dropTable1ColumnJob) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(dropTable1ColumnJob) require.NoError(t, err) qargs = <-midr.queryCh require.Equal(t, len(qargs.ParamsList), len(mDDLJobALLIndexesIDSet)) @@ -868,7 +867,7 @@ func TestDeleteRangeForMDDLJob(t *testing.T) { require.Equal(t, encodeTableIndexKey(mDDLJobTable1NewID, int64(3)), qargs.ParamsList[1].StartKey) // modify column for table0 - err = schemaReplace.restoreFromHistory(modifyTable0ColumnJob) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(modifyTable0ColumnJob) require.NoError(t, err) oldPartitionIDMap = make(map[string]struct{}) for i := 0; i < len(mDDLJobALLNewPartitionIDSet); i++ { @@ -889,7 +888,7 @@ func TestDeleteRangeForMDDLJob(t *testing.T) { } // modify column for table1 - err = schemaReplace.restoreFromHistory(modifyTable1ColumnJob) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(modifyTable1ColumnJob) require.NoError(t, err) qargs = <-midr.queryCh require.Equal(t, len(qargs.ParamsList), len(mDDLJobALLIndexesIDSet)) @@ -899,7 +898,7 @@ func TestDeleteRangeForMDDLJob(t *testing.T) { require.Equal(t, encodeTableIndexKey(mDDLJobTable1NewID, int64(3)), qargs.ParamsList[1].StartKey) // drop indexes(multi-schema-change) for table0 - err = schemaReplace.restoreFromHistory(multiSchemaChangeJob0) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(multiSchemaChangeJob0) require.NoError(t, err) oldPartitionIDMap = make(map[string]struct{}) for l := 0; l < 2; l++ { @@ -915,7 +914,7 @@ func TestDeleteRangeForMDDLJob(t *testing.T) { } // drop indexes(multi-schema-change) for table1 - err = schemaReplace.restoreFromHistory(multiSchemaChangeJob1) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(multiSchemaChangeJob1) require.NoError(t, err) qargs = <-midr.queryCh require.Equal(t, len(qargs.ParamsList), 1) @@ -953,7 +952,7 @@ func TestDeleteRangeForMDDLJob2(t *testing.T) { }) var qargs *PreDelRangeQuery // drop schema - err := schemaReplace.restoreFromHistory(dropSchemaJob) + err := schemaReplace.processIngestIndexAndDeleteRangeFromJob(dropSchemaJob) require.NoError(t, err) qargs = <-midr.queryCh require.Equal(t, len(qargs.ParamsList), len(mDDLJobALLNewTableIDSet)) @@ -971,7 +970,7 @@ func TestDeleteRangeForMDDLJob2(t *testing.T) { schemaReplace = MockEmptySchemasReplace(midr, map[int64]*DBReplace{ mDDLJobDBOldID: dbReplace, }) - err = schemaReplace.restoreFromHistory(dropSchemaJob) + err = schemaReplace.processIngestIndexAndDeleteRangeFromJob(dropSchemaJob) require.NoError(t, err) qargs = <-midr.queryCh require.Equal(t, len(qargs.ParamsList), len(mDDLJobALLNewPartitionIDSet)+1) diff --git a/br/pkg/stream/search.go b/br/pkg/stream/search.go index 7cf940a42f135..ea3664739576c 100644 --- a/br/pkg/stream/search.go +++ b/br/pkg/stream/search.go @@ -16,6 +16,7 @@ import ( backuppb "github.com/pingcap/kvproto/pkg/brpb" "github.com/pingcap/log" "github.com/pingcap/tidb/br/pkg/storage" + "github.com/pingcap/tidb/br/pkg/utils/consts" "github.com/pingcap/tidb/pkg/util" "github.com/pingcap/tidb/pkg/util/codec" "go.uber.org/zap" @@ -193,9 +194,9 @@ func (s *StreamBackupSearch) Search(ctx context.Context) ([]*StreamKVInfo, error writeCFEntries := make(map[string]*StreamKVInfo, 64) for entry := range entriesCh { - if entry.CFName == WriteCF { + if entry.CFName == consts.WriteCF { writeCFEntries[entry.EncodedKey] = entry - } else if entry.CFName == DefaultCF { + } else if entry.CFName == consts.DefaultCF { defaultCFEntries[entry.EncodedKey] = entry } } @@ -241,7 +242,7 @@ func (s *StreamBackupSearch) searchFromDataFile( return errors.Annotatef(err, "decode raw key error, file: %s", dataFile.Path) } - if dataFile.Cf == WriteCF { + if dataFile.Cf == consts.WriteCF { rawWriteCFValue := new(RawWriteCFValue) if err := rawWriteCFValue.ParseFrom(v); err != nil { return errors.Annotatef(err, "parse raw write cf value error, file: %s", dataFile.Path) @@ -262,7 +263,7 @@ func (s *StreamBackupSearch) searchFromDataFile( ShortValue: valueStr, } ch <- kvInfo - } else if dataFile.Cf == DefaultCF { + } else if dataFile.Cf == consts.DefaultCF { kvInfo := &StreamKVInfo{ CFName: dataFile.Cf, StartTs: ts, diff --git a/br/pkg/stream/search_test.go b/br/pkg/stream/search_test.go index 224beb5ac7403..2cf3b74b8efe0 100644 --- a/br/pkg/stream/search_test.go +++ b/br/pkg/stream/search_test.go @@ -13,6 +13,7 @@ import ( backuppb "github.com/pingcap/kvproto/pkg/brpb" "github.com/pingcap/tidb/br/pkg/storage" + "github.com/pingcap/tidb/br/pkg/utils/consts" "github.com/pingcap/tidb/pkg/util/codec" "github.com/stretchr/testify/require" ) @@ -121,7 +122,7 @@ func fakeDataFile(t *testing.T, s storage.ExternalStorage) (defaultCFDataFile, w defaultCFCheckSum := sha256.Sum256(defaultCFBuf.Bytes()) defaultCFDataFile = &backuppb.DataFileInfo{ Path: defaultCFFile, - Cf: DefaultCF, + Cf: consts.DefaultCF, Sha256: defaultCFCheckSum[:], } @@ -135,7 +136,7 @@ func fakeDataFile(t *testing.T, s storage.ExternalStorage) (defaultCFDataFile, w writeCFCheckSum := sha256.Sum256(writeCFBuf.Bytes()) writeCFDataFile = &backuppb.DataFileInfo{ Path: writeCFFile, - Cf: WriteCF, + Cf: consts.WriteCF, Sha256: writeCFCheckSum[:], } @@ -178,7 +179,7 @@ func TestMergeCFEntries(t *testing.T) { Key: hex.EncodeToString([]byte(defaultCF.key)), EncodedKey: encodedKey, StartTs: uint64(defaultCF.startTs), - CFName: DefaultCF, + CFName: consts.DefaultCF, Value: defaultCF.val, } } @@ -189,7 +190,7 @@ func TestMergeCFEntries(t *testing.T) { EncodedKey: encodedKey, StartTs: uint64(writeCF.startTs), CommitTs: uint64(writeCF.commitTS), - CFName: WriteCF, + CFName: consts.WriteCF, Value: writeCF.val, } } diff --git a/br/pkg/stream/stream_metas.go b/br/pkg/stream/stream_metas.go index 6801035ce7214..064ec6579a4b7 100644 --- a/br/pkg/stream/stream_metas.go +++ b/br/pkg/stream/stream_metas.go @@ -24,6 +24,7 @@ import ( "github.com/pingcap/tidb/br/pkg/glue" "github.com/pingcap/tidb/br/pkg/logutil" "github.com/pingcap/tidb/br/pkg/storage" + "github.com/pingcap/tidb/br/pkg/utils/consts" "github.com/pingcap/tidb/br/pkg/utils/iter" "github.com/pingcap/tidb/pkg/util" "github.com/pingcap/tidb/pkg/util/mathutil" @@ -295,7 +296,7 @@ func UpdateShiftTS(m *pb.Metadata, startTS uint64, restoreTS uint64) (uint64, bo for _, ds := range m.FileGroups { for _, d := range ds.DataFilesInfo { - if d.Cf == DefaultCF || d.MinBeginTsInDefaultCf == 0 { + if d.Cf == consts.DefaultCF || d.MinBeginTsInDefaultCf == 0 { continue } if d.MinTs > restoreTS || d.MaxTs < startTS { diff --git a/br/pkg/stream/stream_metas_test.go b/br/pkg/stream/stream_metas_test.go index c0fcbbae623ce..b7a2acdf35e89 100644 --- a/br/pkg/stream/stream_metas_test.go +++ b/br/pkg/stream/stream_metas_test.go @@ -22,6 +22,7 @@ import ( backuppb "github.com/pingcap/kvproto/pkg/brpb" "github.com/pingcap/log" "github.com/pingcap/tidb/br/pkg/storage" + . "github.com/pingcap/tidb/br/pkg/utils/consts" "github.com/pingcap/tidb/pkg/util/intest" "github.com/stretchr/testify/require" "go.uber.org/multierr" diff --git a/br/pkg/stream/stream_mgr.go b/br/pkg/stream/stream_mgr.go index a9dffb23f017d..1d1add48fbdaa 100644 --- a/br/pkg/stream/stream_mgr.go +++ b/br/pkg/stream/stream_mgr.go @@ -356,6 +356,15 @@ func (*MetadataHelper) Marshal(meta *backuppb.Metadata) ([]byte, error) { return meta.Marshal() } +func (m *MetadataHelper) Close() { + if m.decoder != nil { + m.decoder.Close() + } + if m.encryptionManager != nil { + m.encryptionManager.Close() + } +} + // FastUnmarshalMetaData used a 128 worker pool to speed up // read metadata content from external_storage. func FastUnmarshalMetaData( diff --git a/br/pkg/stream/stream_status.go b/br/pkg/stream/stream_status.go index 10e916cbd6578..f18f3d1c055bd 100644 --- a/br/pkg/stream/stream_status.go +++ b/br/pkg/stream/stream_status.go @@ -24,6 +24,7 @@ import ( "github.com/pingcap/tidb/br/pkg/logutil" "github.com/pingcap/tidb/br/pkg/storage" . "github.com/pingcap/tidb/br/pkg/streamhelper" + "github.com/pingcap/tidb/br/pkg/utils" "github.com/tikv/client-go/v2/oracle" pd "github.com/tikv/pd/client" "go.uber.org/zap" @@ -120,9 +121,9 @@ func (p *printByTable) AddTask(task TaskStatus) { table := p.console.CreateTable() table.Add("name", task.Info.Name) table.Add("status", task.colorfulStatusString()) - table.Add("start", fmt.Sprint(FormatDate(oracle.GetTimeFromTS(task.Info.StartTs)))) + table.Add("start", fmt.Sprint(utils.FormatDate(oracle.GetTimeFromTS(task.Info.StartTs)))) if task.Info.EndTs > 0 { - table.Add("end", fmt.Sprint(FormatDate(oracle.GetTimeFromTS(task.Info.EndTs)))) + table.Add("end", fmt.Sprint(utils.FormatDate(oracle.GetTimeFromTS(task.Info.EndTs)))) } s := storage.FormatBackendURL(task.Info.GetStorage()) table.Add("storage", s.String()) @@ -136,7 +137,7 @@ func (p *printByTable) AddTask(task TaskStatus) { if gap > 10*time.Minute { gapColor = color.New(color.FgRed) } - info := fmt.Sprintf("%s; gap=%s", FormatDate(pTime), gapColor.Sprint(gap)) + info := fmt.Sprintf("%s; gap=%s", utils.FormatDate(pTime), gapColor.Sprint(gap)) return info } table.Add("checkpoint[global]", formatTS(task.globalCheckpoint)) diff --git a/br/pkg/stream/table_history.go b/br/pkg/stream/table_history.go new file mode 100644 index 0000000000000..9b79d4a18ac7e --- /dev/null +++ b/br/pkg/stream/table_history.go @@ -0,0 +1,69 @@ +// Copyright 2022-present PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package stream + +// TableLocationInfo stores db id and the table name to locate the table +type TableLocationInfo struct { + DbID int64 + TableName string +} + +type LogBackupTableHistoryManager struct { + // maps table ID to its original and current names + // [0] is original location, [1] is current location + tableNameHistory map[int64][2]TableLocationInfo + // record all the db id to name that were seen during log backup DDL history + dbIdToName map[int64]string +} + +func NewTableHistoryManager() *LogBackupTableHistoryManager { + return &LogBackupTableHistoryManager{ + tableNameHistory: make(map[int64][2]TableLocationInfo), + } +} + +func (info *LogBackupTableHistoryManager) AddTableHistory(tableId int64, tableName string, dbID int64) { + tableLocationInfo := TableLocationInfo{ + DbID: dbID, + TableName: tableName, + } + names, exists := info.tableNameHistory[tableId] + if !exists { + // first occurrence - store as original name + info.tableNameHistory[tableId] = [2]TableLocationInfo{tableLocationInfo, tableLocationInfo} + } else { + // update current name while preserving original name + info.tableNameHistory[tableId] = [2]TableLocationInfo{names[0], tableLocationInfo} + } +} + +func (info *LogBackupTableHistoryManager) RecordDBIdToName(dbId int64, dbName string) { + info.dbIdToName[dbId] = dbName +} + +// GetTableHistory returns information about all tables that have been renamed. +// Returns a map of table IDs to their original and current locations +func (info *LogBackupTableHistoryManager) GetTableHistory() map[int64][2]TableLocationInfo { + return info.tableNameHistory +} + +func (info *LogBackupTableHistoryManager) GetDBNameByID(dbId int64) (string, bool) { + name, ok := info.dbIdToName[dbId] + return name, ok +} + +func (info *LogBackupTableHistoryManager) GetNewlyCreatedDBHistory() map[int64]string { + return info.dbIdToName +} diff --git a/br/pkg/stream/table_mapping.go b/br/pkg/stream/table_mapping.go index ff44f1fdc7b35..c92e052f4d908 100644 --- a/br/pkg/stream/table_mapping.go +++ b/br/pkg/stream/table_mapping.go @@ -22,26 +22,41 @@ import ( "github.com/pingcap/errors" backuppb "github.com/pingcap/kvproto/pkg/brpb" "github.com/pingcap/log" - "github.com/pingcap/tidb/pkg/kv" - "github.com/pingcap/tidb/pkg/meta" + berrors "github.com/pingcap/tidb/br/pkg/errors" + "github.com/pingcap/tidb/br/pkg/utils" "github.com/pingcap/tidb/pkg/meta/model" + "go.uber.org/zap" ) -// TableMappingManager iterates on log backup meta kvs and generate new id for DB, table and partition for +const InitialTempId int64 = 0 + +// TableMappingManager processes each log backup meta kv and generate new id for DB, table and partition for // downstream cluster. It maintains the id mapping and passes down later to the rewrite logic. type TableMappingManager struct { - DbReplaceMap map[UpstreamID]*DBReplace - globalIdMap map[UpstreamID]DownstreamID - genGlobalIdFn func(ctx context.Context) (int64, error) + DBReplaceMap map[UpstreamID]*DBReplace + globalIdMap map[UpstreamID]DownstreamID + + // a counter for temporary IDs, need to get real global id + // once full restore completes + tempIDCounter DownstreamID } -func NewTableMappingManager( - dbReplaceMap map[UpstreamID]*DBReplace, - genGlobalIdFn func(ctx context.Context) (int64, error)) *TableMappingManager { +func NewTableMappingManager() *TableMappingManager { + return &TableMappingManager{ + DBReplaceMap: make(map[UpstreamID]*DBReplace), + globalIdMap: make(map[UpstreamID]DownstreamID), + tempIDCounter: InitialTempId, + } +} + +func (tm *TableMappingManager) FromDBReplaceMap(dbReplaceMap map[UpstreamID]*DBReplace) error { + if !tm.IsEmpty() { + return errors.Annotate(berrors.ErrInvalidState, "expect table mapping manager empty when need to load ID map") + } + if dbReplaceMap == nil { dbReplaceMap = make(map[UpstreamID]*DBReplace) } - globalTableIdMap := make(map[UpstreamID]DownstreamID) for _, dr := range dbReplaceMap { for tblID, tr := range dr.TableMap { @@ -51,71 +66,24 @@ func NewTableMappingManager( } } } + tm.globalIdMap = globalTableIdMap + tm.DBReplaceMap = dbReplaceMap - return &TableMappingManager{ - DbReplaceMap: dbReplaceMap, - globalIdMap: globalTableIdMap, - genGlobalIdFn: genGlobalIdFn, - } -} - -// ParseMetaKvAndUpdateIdMapping collect table information -func (tc *TableMappingManager) ParseMetaKvAndUpdateIdMapping(e *kv.Entry, cf string) error { - if !IsMetaDBKey(e.Key) { - return nil - } - - rawKey, err := ParseTxnMetaKeyFrom(e.Key) - if err != nil { - return errors.Trace(err) - } - - value, err := extractValue(e, cf) - if err != nil { - return errors.Trace(err) - } - // sanity check - if value == nil { - log.Warn("entry suggests having short value but is nil") - return nil - } - - if meta.IsDBkey(rawKey.Field) { - return tc.parseDBValueAndUpdateIdMapping(value) - } else if !meta.IsDBkey(rawKey.Key) { - return nil - } - - if meta.IsTableKey(rawKey.Field) { - dbID, err := ParseDBIDFromTableKey(e.Key) - if err != nil { - return errors.Trace(err) - } - return tc.parseTableValueAndUpdateIdMapping(dbID, value) - } return nil } -func (tc *TableMappingManager) parseDBValueAndUpdateIdMapping(value []byte) error { - dbInfo := new(model.DBInfo) - if err := json.Unmarshal(value, dbInfo); err != nil { - return errors.Trace(err) - } - - if dr, exist := tc.DbReplaceMap[dbInfo.ID]; !exist { - newID, err := tc.genGlobalIdFn(context.Background()) - if err != nil { - return errors.Trace(err) - } - tc.DbReplaceMap[dbInfo.ID] = NewDBReplace(dbInfo.Name.O, newID) - tc.globalIdMap[dbInfo.ID] = newID +func (tm *TableMappingManager) ProcessDBEntryAndUpdateIdMapping(dbInfo model.DBInfo) error { + if dr, exist := tm.DBReplaceMap[dbInfo.ID]; !exist { + newID := tm.generateTempID() + tm.DBReplaceMap[dbInfo.ID] = NewDBReplace(dbInfo.Name.O, newID) + tm.globalIdMap[dbInfo.ID] = newID } else { dr.Name = dbInfo.Name.O } return nil } -func (tc *TableMappingManager) parseTableValueAndUpdateIdMapping(dbID int64, value []byte) error { +func (tm *TableMappingManager) parseTableValueAndUpdateIdMapping(dbID int64, value []byte) error { var ( tableInfo model.TableInfo err error @@ -129,26 +97,20 @@ func (tc *TableMappingManager) parseTableValueAndUpdateIdMapping(dbID int64, val } // construct or find the id map. - dbReplace, exist = tc.DbReplaceMap[dbID] + dbReplace, exist = tm.DBReplaceMap[dbID] if !exist { - newID, err := tc.genGlobalIdFn(context.Background()) - if err != nil { - return errors.Trace(err) - } - tc.globalIdMap[dbID] = newID + newID := tm.generateTempID() + tm.globalIdMap[dbID] = newID dbReplace = NewDBReplace("", newID) - tc.DbReplaceMap[dbID] = dbReplace + tm.DBReplaceMap[dbID] = dbReplace } tableReplace, exist = dbReplace.TableMap[tableInfo.ID] if !exist { - newID, exist := tc.globalIdMap[tableInfo.ID] + newID, exist := tm.globalIdMap[tableInfo.ID] if !exist { - newID, err = tc.genGlobalIdFn(context.Background()) - if err != nil { - return errors.Trace(err) - } - tc.globalIdMap[tableInfo.ID] = newID + newID = tm.generateTempID() + tm.globalIdMap[tableInfo.ID] = newID } tableReplace = NewTableReplace(tableInfo.Name.O, newID) @@ -164,13 +126,14 @@ func (tc *TableMappingManager) parseTableValueAndUpdateIdMapping(dbID int64, val for i, partition := range partitions.Definitions { newID, exist := tableReplace.PartitionMap[partition.ID] if !exist { - newID, exist = tc.globalIdMap[partition.ID] + newID, exist = tm.globalIdMap[partition.ID] + newID, exist = tm.globalIdMap[partition.ID] if !exist { - newID, err = tc.genGlobalIdFn(context.Background()) + newID, err = tm.generateTempID() if err != nil { return errors.Trace(err) } - tc.globalIdMap[partition.ID] = newID + tm.globalIdMap[partition.ID] = newID } tableReplace.PartitionMap[partition.ID] = newID } @@ -180,11 +143,175 @@ func (tc *TableMappingManager) parseTableValueAndUpdateIdMapping(dbID int64, val return nil } +func (tm *TableMappingManager) MergeBaseDBReplace(baseMap map[UpstreamID]*DBReplace) { + for upstreamID, dbReplace := range baseMap { + tm.globalIdMap[upstreamID] = dbReplace.DbID + + for tableUpID, tableReplace := range dbReplace.TableMap { + log.Info("########### merging table", zap.Any("up", tableUpID), zap.Any("new", tableReplace.TableID)) + tm.globalIdMap[tableUpID] = tableReplace.TableID + for partUpID, partDownID := range tableReplace.PartitionMap { + tm.globalIdMap[partUpID] = partDownID + } + } + } + + for upstreamID, baseDBReplace := range baseMap { + if existingDBReplace, exists := tm.DBReplaceMap[upstreamID]; exists { + existingDBReplace.DbID = baseDBReplace.DbID + + for tableUpID, baseTableReplace := range baseDBReplace.TableMap { + if existingTableReplace, tableExists := existingDBReplace.TableMap[tableUpID]; tableExists { + existingTableReplace.TableID = baseTableReplace.TableID + + for partUpID, basePartDownID := range baseTableReplace.PartitionMap { + existingTableReplace.PartitionMap[partUpID] = basePartDownID + } + } else { + existingDBReplace.TableMap[tableUpID] = baseTableReplace + } + } + } else { + tm.DBReplaceMap[upstreamID] = baseDBReplace + } + } +} + +func (tm *TableMappingManager) IsEmpty() bool { + return len(tm.DBReplaceMap) == 0 && len(tm.globalIdMap) == 0 +} + +func (tm *TableMappingManager) ReplaceTemporaryIDs( + ctx context.Context, genGenGlobalIDs func(ctx context.Context, n int) ([]int64, error)) error { + if tm.tempIDCounter == InitialTempId { + // no temporary IDs were allocated + return nil + } + + // Find actually used temporary IDs + usedTempIDs := make(map[DownstreamID]struct{}) + + // Check DBReplaceMap for used temporary IDs + for _, dr := range tm.DBReplaceMap { + if dr.DbID < 0 { // is temporary ID + usedTempIDs[dr.DbID] = struct{}{} + } + for _, tr := range dr.TableMap { + if tr.TableID < 0 { // is temporary ID + usedTempIDs[tr.TableID] = struct{}{} + } + for _, partID := range tr.PartitionMap { + if partID < 0 { // is temporary ID + usedTempIDs[partID] = struct{}{} + } + } + } + } + + // Check globalIdMap for used temporary IDs + for _, downID := range tm.globalIdMap { + if downID < 0 { // is temporary ID + usedTempIDs[downID] = struct{}{} + } + } + + if len(usedTempIDs) == 0 { + tm.tempIDCounter = InitialTempId + return nil + } + + // Generate new IDs only for actually used temporary IDs + newIDs, err := genGenGlobalIDs(ctx, len(usedTempIDs)) + if err != nil { + return errors.Trace(err) + } + + // Create mapping from temp IDs to new IDs + idMapping := make(map[DownstreamID]DownstreamID, len(usedTempIDs)) + i := 0 + for tempID := range usedTempIDs { + idMapping[tempID] = newIDs[i] + i++ + } + + // Update all mappings + for upID, downID := range tm.globalIdMap { + if newID, exists := idMapping[downID]; exists { + tm.globalIdMap[upID] = newID + } + } + + // Update DBReplace mappings + for _, dr := range tm.DBReplaceMap { + if newID, exists := idMapping[dr.DbID]; exists { + dr.DbID = newID + } + + for _, tr := range dr.TableMap { + if newID, exists := idMapping[tr.TableID]; exists { + tr.TableID = newID + } + + for oldPID, tempPID := range tr.PartitionMap { + if newID, exists := idMapping[tempPID]; exists { + tr.PartitionMap[oldPID] = newID + } + } + } + } + + tm.tempIDCounter = InitialTempId + return nil +} + +func (tm *TableMappingManager) FilterDBMap(filter *utils.PiTRTableFilter) { + // First collect all IDs that should be kept + keepIDs := make(map[UpstreamID]struct{}) + + // Iterate through existing DBReplaceMap + for dbID, dbReplace := range tm.DBReplaceMap { + // Remove entire database if not in filter + if !filter.ContainsDB(dbID) { + delete(tm.DBReplaceMap, dbID) + continue + } + + keepIDs[dbID] = struct{}{} // keep database ID + + // Filter tables in this database + for tableID, tableReplace := range dbReplace.TableMap { + if !filter.ContainsTable(dbID, tableID) { + delete(dbReplace.TableMap, tableID) + } else { + keepIDs[tableID] = struct{}{} // keep table ID + // keep all partition IDs for kept tables + for partitionID := range tableReplace.PartitionMap { + keepIDs[partitionID] = struct{}{} + } + } + } + + // Remove database if it has no tables after filtering + if len(dbReplace.TableMap) == 0 { + delete(tm.DBReplaceMap, dbID) + } + } + + // Remove any ID from globalIdMap that isn't in keepIDs + for id := range tm.globalIdMap { + if _, ok := keepIDs[id]; !ok { + delete(tm.globalIdMap, id) + } + } + + return +} + // ToProto produces schemas id maps from up-stream to down-stream. -func (tc *TableMappingManager) ToProto() []*backuppb.PitrDBMap { - dbMaps := make([]*backuppb.PitrDBMap, 0, len(tc.DbReplaceMap)) +func (tm *TableMappingManager) ToProto() []*backuppb.PitrDBMap { + dbMaps := make([]*backuppb.PitrDBMap, 0, len(tm.DBReplaceMap)) - for dbID, dr := range tc.DbReplaceMap { + for dbID, dr := range tm.DBReplaceMap { dbm := backuppb.PitrDBMap{ Name: dr.Name, IdMap: &backuppb.IDMap{ @@ -215,7 +342,6 @@ func (tc *TableMappingManager) ToProto() []*backuppb.PitrDBMap { } dbMaps = append(dbMaps, &dbm) } - return dbMaps } @@ -234,15 +360,14 @@ func FromDBMapProto(dbMaps []*backuppb.PitrDBMap) map[UpstreamID]*DBReplace { } } } - return dbReplaces } func extractValue(e *kv.Entry, cf string) ([]byte, error) { switch cf { - case DefaultCF: + case consts.DefaultCF: return e.Value, nil - case WriteCF: + case consts.WriteCF: rawWriteCFValue := new(RawWriteCFValue) if err := rawWriteCFValue.ParseFrom(e.Value); err != nil { return nil, errors.Trace(err) @@ -255,3 +380,7 @@ func extractValue(e *kv.Entry, cf string) ([]byte, error) { panic(fmt.Sprintf("not support cf:%s", cf)) } } +func (tm *TableMappingManager) generateTempID() DownstreamID { + tm.tempIDCounter-- + return tm.tempIDCounter +} diff --git a/br/pkg/stream/util.go b/br/pkg/stream/util.go deleted file mode 100644 index 10215a68df61d..0000000000000 --- a/br/pkg/stream/util.go +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright 2022 PingCAP, Inc. Licensed under Apache-2.0. - -package stream - -import ( - "strings" - "time" -) - -const DATE_FORMAT = "2006-01-02 15:04:05.999999999 -0700" - -func FormatDate(ts time.Time) string { - return ts.Format(DATE_FORMAT) -} - -func IsMetaDBKey(key []byte) bool { - return strings.HasPrefix(string(key), "mDB") -} - -func IsMetaDDLJobHistoryKey(key []byte) bool { - return strings.HasPrefix(string(key), "mDDLJobH") -} - -func MaybeDBOrDDLJobHistoryKey(key []byte) bool { - return strings.HasPrefix(string(key), "mD") -} diff --git a/br/pkg/stream/util_test.go b/br/pkg/stream/util_test.go deleted file mode 100644 index 6dda62a04ad60..0000000000000 --- a/br/pkg/stream/util_test.go +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright 2022 PingCAP, Inc. Licensed under Apache-2.0. - -package stream - -import ( - "testing" - "time" - - "github.com/stretchr/testify/require" - "github.com/tikv/client-go/v2/oracle" -) - -func TestDateFormat(t *testing.T) { - cases := []struct { - ts uint64 - target string - }{ - { - 434604259287760897, - "2022-07-15 19:14:39.534 +0800", - }, - { - 434605479096221697, - "2022-07-15 20:32:12.734 +0800", - }, - { - 434605478903808000, - "2022-07-15 20:32:12 +0800", - }, - } - - timeZone, _ := time.LoadLocation("Asia/Shanghai") - for _, ca := range cases { - date := FormatDate(oracle.GetTimeFromTS(ca.ts).In(timeZone)) - require.Equal(t, ca.target, date) - } -} - -func TestPrefix(t *testing.T) { - require.True(t, IsMetaDBKey([]byte("mDBs"))) - require.False(t, IsMetaDBKey([]byte("mDDL"))) - require.True(t, IsMetaDDLJobHistoryKey([]byte("mDDLJobHistory"))) - require.False(t, IsMetaDDLJobHistoryKey([]byte("mDDL"))) - require.True(t, MaybeDBOrDDLJobHistoryKey([]byte("mDL"))) - require.True(t, MaybeDBOrDDLJobHistoryKey([]byte("mDB:"))) - require.True(t, MaybeDBOrDDLJobHistoryKey([]byte("mDDLHistory"))) - require.False(t, MaybeDBOrDDLJobHistoryKey([]byte("DDL"))) -} diff --git a/br/pkg/task/backup.go b/br/pkg/task/backup.go index af92518043a90..b7f05f14552fe 100644 --- a/br/pkg/task/backup.go +++ b/br/pkg/task/backup.go @@ -585,8 +585,8 @@ func RunBackup(c context.Context, g glue.Glue, cmdName string, cfg *BackupConfig }) } - // nothing to backup - if len(ranges) == 0 { + // check on ranges and schemas and if nothing to back up do early return + if len(ranges) == 0 && (schemas == nil || schemas.Len() == 0) { pdAddress := strings.Join(cfg.PD, ",") log.Warn("Nothing to backup, maybe connected to cluster for restoring", zap.String("PD address", pdAddress)) diff --git a/br/pkg/task/common.go b/br/pkg/task/common.go index 1813741634609..5be1921f965e7 100644 --- a/br/pkg/task/common.go +++ b/br/pkg/task/common.go @@ -252,8 +252,13 @@ type Config struct { // should be removed after TiDB upgrades the BR dependency. Filter filter.MySQLReplicationRules - FilterStr []string `json:"filter-strings" toml:"filter-strings"` - TableFilter filter.Filter `json:"-" toml:"-"` + FilterStr []string `json:"filter-strings" toml:"filter-strings"` + // generated from FilterStr provides by user + TableFilter filter.Filter `json:"-" toml:"-"` + // PiTRTableFilter generated from TableFilter during snapshot restore, it has all the db id and table id that needs + // to be restored + PiTRTableFilter *utils.PiTRTableFilter `json:"-" toml:"-"` + SwitchModeInterval time.Duration `json:"switch-mode-interval" toml:"switch-mode-interval"` // Schemas is a database name set, to check whether the restore database has been backup Schemas map[string]struct{} @@ -1000,3 +1005,9 @@ func progressFileWriterRoutine(ctx context.Context, progress glue.Progress, tota } } } + +func WriteStringToConsole(g glue.Glue, msg string) error { + b := []byte(msg) + _, err := glue.GetConsole(g).Out().Write(b) + return err +} diff --git a/br/pkg/task/config_test.go b/br/pkg/task/config_test.go index 9d225163d5f9c..b19f1ecfe757c 100644 --- a/br/pkg/task/config_test.go +++ b/br/pkg/task/config_test.go @@ -193,7 +193,7 @@ func TestCheckRestoreDBAndTable(t *testing.T) { for _, db := range ca.backupDBs { backupDBs = append(backupDBs, db) } - err := CheckRestoreDBAndTable(backupDBs, cfg) + err := VerifyDBAndTableInBackup(backupDBs, cfg) require.NoError(t, err) } } diff --git a/br/pkg/task/restore.go b/br/pkg/task/restore.go index 7aa5303e9c6e2..a23bac1371de4 100644 --- a/br/pkg/task/restore.go +++ b/br/pkg/task/restore.go @@ -29,6 +29,7 @@ import ( "github.com/pingcap/tidb/br/pkg/restore" snapclient "github.com/pingcap/tidb/br/pkg/restore/snap_client" "github.com/pingcap/tidb/br/pkg/restore/tiflashrec" + "github.com/pingcap/tidb/br/pkg/stream" "github.com/pingcap/tidb/br/pkg/summary" "github.com/pingcap/tidb/br/pkg/utils" "github.com/pingcap/tidb/br/pkg/version" @@ -597,8 +598,8 @@ func CheckNewCollationEnable( return enabled, nil } -// CheckRestoreDBAndTable is used to check whether the restore dbs or tables have been backup -func CheckRestoreDBAndTable(schemas []*metautil.Database, cfg *RestoreConfig) error { +// VerifyDBAndTableInBackup is used to check whether the restore dbs or tables have been backup +func VerifyDBAndTableInBackup(schemas []*metautil.Database, cfg *RestoreConfig) error { if len(cfg.Schemas) == 0 && len(cfg.Tables) == 0 { return nil } @@ -708,7 +709,10 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf if err := version.CheckClusterVersion(c, mgr.GetPDClient(), version.CheckVersionForBR); err != nil { return errors.Trace(err) } - restoreError = runSnapshotRestore(c, mgr, g, cmdName, cfg, nil) + snapshotRestoreConfig := SnapshotRestoreConfig{ + RestoreConfig: cfg, + } + restoreError = runSnapshotRestore(c, mgr, g, cmdName, &snapshotRestoreConfig) } if restoreError != nil { return errors.Trace(restoreError) @@ -745,7 +749,13 @@ func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConf return nil } -func runSnapshotRestore(c context.Context, mgr *conn.Mgr, g glue.Glue, cmdName string, cfg *RestoreConfig, checkInfo *PiTRTaskInfo) error { +type SnapshotRestoreConfig struct { + *RestoreConfig + piTRTaskInfo *PiTRTaskInfo + logTableHistoryManager *stream.LogBackupTableHistoryManager +} + +func runSnapshotRestore(c context.Context, mgr *conn.Mgr, g glue.Glue, cmdName string, cfg *SnapshotRestoreConfig) error { cfg.Adjust() defer summary.Summary(cmdName) ctx, cancel := context.WithCancel(c) @@ -758,9 +768,31 @@ func runSnapshotRestore(c context.Context, mgr *conn.Mgr, g glue.Glue, cmdName s ctx = opentracing.ContextWithSpan(ctx, span1) } - codec := mgr.GetStorage().GetCodec() + // reads out information from backup meta file and do requirement checking if needed + u, s, backupMeta, err := ReadBackupMeta(ctx, metautil.MetaFile, &cfg.Config) + if err != nil { + return errors.Trace(err) + } + if cfg.CheckRequirements { + log.Info("Checking incompatible TiCDC changefeeds before restoring.", + logutil.ShortError(err), zap.Uint64("restore-ts", backupMeta.EndVersion)) + if err := checkIncompatibleChangefeed(ctx, backupMeta.EndVersion, mgr.GetDomain().GetEtcdClient()); err != nil { + return errors.Trace(err) + } - // need retrieve these configs from tikv if not set in command. + backupVersion := version.NormalizeBackupVersion(backupMeta.ClusterVersion) + if backupVersion != nil { + if versionErr := version.CheckClusterVersion(ctx, mgr.GetPDClient(), version.CheckVersionForBackup(backupVersion)); versionErr != nil { + return errors.Trace(versionErr) + } + } + } + if _, err = CheckNewCollationEnable(backupMeta.GetNewCollationsEnabled(), g, mgr.GetStorage(), cfg.CheckRequirements); err != nil { + return errors.Trace(err) + } + + // build restore client + // need to retrieve these configs from tikv if not set in command. kvConfigs := &pconfig.KVConfig{ ImportGoroutines: cfg.ConcurrencyPerStore, MergeRegionSize: cfg.MergeSmallRegionSizeBytes, @@ -779,56 +811,58 @@ func runSnapshotRestore(c context.Context, mgr *conn.Mgr, g glue.Glue, cmdName s cfg.ConcurrencyPerStore = kvConfigs.ImportGoroutines // using tikv config to set the concurrency-per-store for client. client.SetConcurrencyPerStore(cfg.ConcurrencyPerStore.Value) - err := configureRestoreClient(ctx, client, cfg) + err = configureRestoreClient(ctx, client, cfg.RestoreConfig) if err != nil { return errors.Trace(err) } - // Init DB connection sessions - err = client.Init(g, mgr.GetStorage()) + // InitConnections DB connection sessions + err = client.InitConnections(g, mgr.GetStorage()) defer client.Close() - if err != nil { return errors.Trace(err) } - u, s, backupMeta, err := ReadBackupMeta(ctx, metautil.MetaFile, &cfg.Config) - if err != nil { - return errors.Trace(err) - } - if cfg.CheckRequirements { - err := checkIncompatibleChangefeed(ctx, backupMeta.EndVersion, mgr.GetDomain().GetEtcdClient()) - log.Info("Checking incompatible TiCDC changefeeds before restoring.", - logutil.ShortError(err), zap.Uint64("restore-ts", backupMeta.EndVersion)) - if err != nil { - return errors.Trace(err) - } - } - backupVersion := version.NormalizeBackupVersion(backupMeta.ClusterVersion) - if cfg.CheckRequirements && backupVersion != nil { - if versionErr := version.CheckClusterVersion(ctx, mgr.GetPDClient(), version.CheckVersionForBackup(backupVersion)); versionErr != nil { - return errors.Trace(versionErr) - } - } - if _, err = CheckNewCollationEnable(backupMeta.GetNewCollationsEnabled(), g, mgr.GetStorage(), cfg.CheckRequirements); err != nil { - return errors.Trace(err) - } - - reader := metautil.NewMetaReader(backupMeta, s, &cfg.CipherInfo) - if err = client.LoadSchemaIfNeededAndInitClient(c, backupMeta, u, reader, cfg.LoadStats, nil, nil); err != nil { + metaReader := metautil.NewMetaReader(backupMeta, s, &cfg.CipherInfo) + if err = client.LoadSchemaIfNeededAndInitClient(ctx, backupMeta, u, metaReader, cfg.LoadStats, nil, nil); err != nil { return errors.Trace(err) } if client.IsRawKvMode() { return errors.Annotate(berrors.ErrRestoreModeMismatch, "cannot do transactional restore from raw kv data") } - if err = CheckRestoreDBAndTable(client.GetDatabases(), cfg); err != nil { + if err = VerifyDBAndTableInBackup(client.GetDatabases(), cfg.RestoreConfig); err != nil { return err } - files, tables, dbs := filterRestoreFiles(client, cfg) - if len(dbs) == 0 && len(tables) != 0 { - return errors.Annotate(berrors.ErrRestoreInvalidBackup, "contain tables but no databases") + + // filters out db/table/files using filter + fileMap, tableMap, dbMap, err := filterRestoreFiles(client, cfg.RestoreConfig) + if err != nil { + return errors.Trace(err) + } + log.Info("found items to restore after filtering", + zap.Int("files", len(fileMap)), + zap.Int("tables", len(tableMap)), + zap.Int("db", len(dbMap))) + + if cfg.logTableHistoryManager != nil { + // adjust tables to restore in the snapshot restore phase since it will later be renamed during + // log restore and will fall into or out of the filter range. + err := adjustTablesToRestoreAndCreateFilter(cfg.logTableHistoryManager, cfg.RestoreConfig, client, fileMap, tableMap) + if err != nil { + return errors.Trace(err) + } + + log.Info("adjusted items to restore", + zap.Int("files", len(fileMap)), + zap.Int("tables", len(tableMap)), + zap.Int("db", len(dbMap))) + + // need to update to include all eligible table id from snapshot restore + UpdatePiTRFilter(cfg.RestoreConfig, tableMap) } + files, tables, dbs := convertMapsToSlices(fileMap, tableMap, dbMap) + // after figuring out what files to restore, check if disk has enough space if cfg.CheckRequirements { if err := checkDiskSpace(ctx, mgr, files, tables); err != nil { return errors.Trace(err) @@ -845,8 +879,8 @@ func runSnapshotRestore(c context.Context, mgr *conn.Mgr, g glue.Glue, cmdName s } // for full + log restore. should check the cluster is empty. - if client.IsFull() && checkInfo != nil && checkInfo.FullRestoreCheckErr != nil { - return checkInfo.FullRestoreCheckErr + if client.IsFull() && cfg.piTRTaskInfo != nil && cfg.piTRTaskInfo.FullRestoreCheckErr != nil { + return cfg.piTRTaskInfo.FullRestoreCheckErr } if client.IsIncremental() { @@ -856,23 +890,24 @@ func runSnapshotRestore(c context.Context, mgr *conn.Mgr, g glue.Glue, cmdName s } importModeSwitcher := restore.NewImportModeSwitcher(mgr.GetPDClient(), cfg.Config.SwitchModeInterval, mgr.GetTLSConfig()) - restoreSchedulers, schedulersConfig, err := restore.RestorePreWork(ctx, mgr, importModeSwitcher, cfg.Online, true) + restoreSchedulersFunc, schedulersConfig, err := restore.RestorePreWork(ctx, mgr, importModeSwitcher, cfg.Online, true) if err != nil { return errors.Trace(err) } - schedulersRemovable := false + // need to know whether restore has been completed so can restore schedulers + canRestoreSchedulers := false defer func() { // don't reset pd scheduler if checkpoint mode is used and restored is not finished - if cfg.UseCheckpoint && !schedulersRemovable { - log.Info("skip removing pd schehduler for next retry") + if cfg.UseCheckpoint && !canRestoreSchedulers { + log.Info("skip removing pd scheduler for next retry") return } - log.Info("start to remove the pd scheduler") + log.Info("start to restore pd scheduler") // run the post-work to avoid being stuck in the import // mode or emptied schedulers. - restore.RestorePostWork(ctx, importModeSwitcher, restoreSchedulers, cfg.Online) - log.Info("finish removing pd scheduler") + restore.RestorePostWork(ctx, importModeSwitcher, restoreSchedulersFunc, cfg.Online) + log.Info("finish restoring pd scheduler") }() var checkpointFirstRun = true @@ -898,7 +933,7 @@ func runSnapshotRestore(c context.Context, mgr *conn.Mgr, g glue.Glue, cmdName s } } else if client.IsFull() && checkpointFirstRun && cfg.CheckRequirements { if err := checkTableExistence(ctx, mgr, tables, g); err != nil { - schedulersRemovable = true + canRestoreSchedulers = true return errors.Trace(err) } } @@ -910,8 +945,7 @@ func runSnapshotRestore(c context.Context, mgr *conn.Mgr, g glue.Glue, cmdName s } // preallocate the table id, because any ddl job or database creation(include checkpoint) also allocates the global ID - err = client.AllocTableIDs(ctx, tables) - if err != nil { + if err = client.AllocTableIDs(ctx, tables); err != nil { return errors.Trace(err) } @@ -923,7 +957,7 @@ func runSnapshotRestore(c context.Context, mgr *conn.Mgr, g glue.Glue, cmdName s return errors.Trace(err) } if restoreSchedulersConfigFromCheckpoint != nil { - restoreSchedulers = mgr.MakeUndoFunctionByConfig(*restoreSchedulersConfigFromCheckpoint) + restoreSchedulersFunc = mgr.MakeUndoFunctionByConfig(*restoreSchedulersConfigFromCheckpoint) } checkpointSetWithTableID = sets @@ -931,7 +965,7 @@ func runSnapshotRestore(c context.Context, mgr *conn.Mgr, g glue.Glue, cmdName s // need to flush the whole checkpoint data so that br can quickly jump to // the log kv restore step when the next retry. log.Info("wait for flush checkpoint...") - client.WaitForFinishCheckpoint(ctx, len(cfg.FullBackupStorage) > 0 || !schedulersRemovable) + client.WaitForFinishCheckpoint(ctx, len(cfg.FullBackupStorage) > 0 || !canRestoreSchedulers) }() } @@ -1005,8 +1039,7 @@ func runSnapshotRestore(c context.Context, mgr *conn.Mgr, g glue.Glue, cmdName s } // execute DDL first - err = client.ExecDDLs(ctx, ddlJobs) - if err != nil { + if err = client.ExecDDLs(ctx, ddlJobs); err != nil { return errors.Trace(err) } @@ -1041,6 +1074,7 @@ func runSnapshotRestore(c context.Context, mgr *conn.Mgr, g glue.Glue, cmdName s return errors.Trace(err) } + codec := mgr.GetStorage().GetCodec() if len(files) == 0 { log.Info("no files, empty databases and tables are restored") summary.SetSuccessStatus(true) @@ -1163,7 +1197,7 @@ func runSnapshotRestore(c context.Context, mgr *conn.Mgr, g glue.Glue, cmdName s return errors.Trace(err) } - schedulersRemovable = true + canRestoreSchedulers = true // Set task summary to success status. summary.SetSuccessStatus(true) @@ -1378,15 +1412,20 @@ func dropToBlackhole( return outCh } -// filterRestoreFiles filters tables that can't be processed after applying cfg.TableFilter.MatchTable. -// if the db has no table that can be processed, the db will be filtered too. +// filterRestoreFiles filters out tables that can't be processed after applying cfg.TableFilter.MatchTable. func filterRestoreFiles( client *snapclient.SnapClient, cfg *RestoreConfig, -) (files []*backuppb.File, tables []*metautil.Table, dbs []*metautil.Database) { +) (fileMap map[string]*backuppb.File, tableMap map[int64]*metautil.Table, dbMap map[int64]*metautil.Database, err error) { + // Initialize maps + fileMap = make(map[string]*backuppb.File) + tableMap = make(map[int64]*metautil.Table) + dbMap = make(map[int64]*metautil.Database) + for _, db := range client.GetDatabases() { dbName := db.Info.Name.O - if name, ok := utils.GetSysDBName(db.Info.Name); utils.IsSysDB(name) && ok { + dbMap[db.Info.ID] = db + if name, ok := utils.StripTempTableNamePrefixIfNeeded(db.Info.Name.O); utils.IsSysDB(name) && ok { dbName = name } if checkpoint.IsCheckpointDB(db.Info.Name) { @@ -1395,18 +1434,141 @@ func filterRestoreFiles( if !cfg.TableFilter.MatchSchema(dbName) { continue } - dbs = append(dbs, db) + dbMap[db.Info.ID] = db for _, table := range db.Tables { if table.Info == nil || !cfg.TableFilter.MatchTable(dbName, table.Info.Name.O) { continue } - files = append(files, table.Files...) - tables = append(tables, table) + + // Add table to tableMap using table ID as key + tableMap[table.Info.ID] = table + + // Add files to fileMap using file name as key + for _, file := range table.Files { + fileMap[file.Name] = file + } + } + } + + // sanity check + if len(dbMap) == 0 && len(tableMap) != 0 { + err = errors.Annotate(berrors.ErrRestoreInvalidBackup, "contains tables but no databases") + } + return +} + +func adjustTablesToRestoreAndCreateFilter( + logBackupTableHistory *stream.LogBackupTableHistoryManager, + cfg *RestoreConfig, + client *snapclient.SnapClient, + fileMap map[string]*backuppb.File, + tableMap map[int64]*metautil.Table, +) (err error) { + snapshotDbMap := client.GetDatabaseMap() + + // build filter for pitr restore to use later + piTRTableFilter := utils.NewPiTRTableFilter() + + // put all the newly created db that matches the filter during log backup into the pitr filter + newlyCreatedDBs := logBackupTableHistory.GetNewlyCreatedDBHistory() + for dbId, dbName := range newlyCreatedDBs { + if cfg.TableFilter.MatchSchema(dbName) { + piTRTableFilter.UpdateDB(dbId) + } + } + + // get all the tables seen during the log backup + tableHistory := logBackupTableHistory.GetTableHistory() + + for tableID, locations := range tableHistory { + originalLocation := locations[0] + currentLocation := locations[1] + + var dbName string + if snapDb, exists := snapshotDbMap[currentLocation.DbID]; exists { + dbName = snapDb.Info.Name.O + } else if name, exists := logBackupTableHistory.GetDBNameByID(currentLocation.DbID); exists { + // if db id does not exist in the snapshot, meaning it's created during log backup + dbName = name + } else { + err = errors.Annotate(berrors.ErrRestoreInvalidBackup, + "internal error: did not find db id to name information in snapshot and log backup history") + return + } + + if name, ok := utils.StripTempTableNamePrefixIfNeeded(dbName); utils.IsSysDB(name) && ok { + dbName = name + } + // handle in filter range cases + // 1. original == current, didn't have renaming + // 2. original has been renamed and current is in the filter range + // we need to restore original table + if cfg.TableFilter.MatchTable(dbName, currentLocation.TableName) { + // put this db/table id into pitr filter as it matches with user's filter + // have to update filter here since table might be empty or not in snapshot so nothing will be returned . + // but we still need to capture this table id to restore during log restore. + piTRTableFilter.UpdateTable(currentLocation.DbID, tableID) + + // check if snapshot contains the original db/table + originalDB, exists := snapshotDbMap[originalLocation.DbID] + if !exists { + // original db created during log backup, snapshot doesn't have information about this db so doesn't + // need to restore at snapshot + continue + } + + // need to restore the matching table in snapshot restore phase + for _, originalTable := range originalDB.Tables { + if originalTable.Info == nil { + continue + } + if originalTable.Info.ID == tableID { + for _, file := range originalTable.Files { + fileMap[file.Name] = file + } + tableMap[originalTable.Info.ID] = originalTable + // only one table id will match + break + } + } + // handle case where current is not in range and original was in range, we need to remove the original from + // restoring + } else if cfg.TableFilter.MatchTable(dbName, originalLocation.TableName) { + // remove it from the filter, will not remove db even table size becomes 0 + _ = piTRTableFilter.Remove(originalLocation.DbID, tableID) + + // check if snapshot contains the original db/table + originalDB, exists := snapshotDbMap[originalLocation.DbID] + if !exists { + // original db created during log backup, no need to process further + continue + } + for _, originalTable := range originalDB.Tables { + if originalTable.Info == nil { + continue + } + if originalTable.Info.ID == tableID { + for _, file := range originalTable.Files { + delete(fileMap, file.Name) + } + delete(tableMap, originalTable.Info.ID) + // only one table id will match + break + } + } } } + // store the filter into config + cfg.PiTRTableFilter = piTRTableFilter return } +func UpdatePiTRFilter(cfg *RestoreConfig, tableMap map[int64]*metautil.Table) { + for _, table := range tableMap { + cfg.PiTRTableFilter.UpdateTable(table.DB.ID, table.Info.ID) + } +} + // enableTiDBConfig tweaks some of configs of TiDB to make the restore progress go well. // return a function that could restore the config to origin. func enableTiDBConfig() func() { @@ -1575,6 +1737,9 @@ func FilterDDLJobs(allDDLJobs []*model.Job, tables []*metautil.Table) (ddlJobs [ } } } + slices.SortFunc(ddlJobs, func(i, j *model.Job) int { + return cmp.Compare(i.BinlogInfo.SchemaVersion, j.BinlogInfo.SchemaVersion) + }) return ddlJobs } @@ -1657,3 +1822,26 @@ func afterTableRestoredCh(ctx context.Context, createdTables []*snapclient.Creat }() return outCh } + +func convertMapsToSlices( + fileMap map[string]*backuppb.File, + tableMap map[int64]*metautil.Table, + dbMap map[int64]*metautil.Database, +) ([]*backuppb.File, []*metautil.Table, []*metautil.Database) { + files := make([]*backuppb.File, 0, len(fileMap)) + for _, file := range fileMap { + files = append(files, file) + } + + tables := make([]*metautil.Table, 0, len(tableMap)) + for _, table := range tableMap { + tables = append(tables, table) + } + + dbs := make([]*metautil.Database, 0, len(dbMap)) + for _, db := range dbMap { + dbs = append(dbs, db) + } + + return files, tables, dbs +} diff --git a/br/pkg/task/restore_raw.go b/br/pkg/task/restore_raw.go index 1680e60472f7b..f38040d569b70 100644 --- a/br/pkg/task/restore_raw.go +++ b/br/pkg/task/restore_raw.go @@ -101,7 +101,7 @@ func RunRestoreRaw(c context.Context, g glue.Glue, cmdName string, cfg *RestoreR client.SetRateLimit(cfg.RateLimit) client.SetCrypter(&cfg.CipherInfo) client.SetConcurrencyPerStore(cfg.ConcurrencyPerStore.Value) - err = client.Init(g, mgr.GetStorage()) + err = client.InitConnections(g, mgr.GetStorage()) defer client.Close() if err != nil { return errors.Trace(err) diff --git a/br/pkg/task/restore_test.go b/br/pkg/task/restore_test.go index 86ceb3755ee09..5cb3c8a67abe7 100644 --- a/br/pkg/task/restore_test.go +++ b/br/pkg/task/restore_test.go @@ -308,6 +308,8 @@ func TestFilterDDLJobs(t *testing.T) { ddlJobs := task.FilterDDLJobs(allDDLJobs, tables) for _, job := range ddlJobs { t.Logf("get ddl job: %s", job.Query) + t.Logf("table name: %s", job.TableName) + t.Logf("dbid: %s", job.SchemaName) } require.Equal(t, 7, len(ddlJobs)) } diff --git a/br/pkg/task/restore_txn.go b/br/pkg/task/restore_txn.go index 16d8e099f659d..720d40dc6bf69 100644 --- a/br/pkg/task/restore_txn.go +++ b/br/pkg/task/restore_txn.go @@ -44,7 +44,7 @@ func RunRestoreTxn(c context.Context, g glue.Glue, cmdName string, cfg *Config) client.SetRateLimit(cfg.RateLimit) client.SetCrypter(&cfg.CipherInfo) client.SetConcurrencyPerStore(uint(cfg.Concurrency)) - err = client.Init(g, mgr.GetStorage()) + err = client.InitConnections(g, mgr.GetStorage()) defer client.Close() if err != nil { return errors.Trace(err) diff --git a/br/pkg/task/stream.go b/br/pkg/task/stream.go index 5bf2c0eddc51a..8362e68f4f5e2 100644 --- a/br/pkg/task/stream.go +++ b/br/pkg/task/stream.go @@ -467,11 +467,11 @@ func (s *streamMgr) checkStreamStartEnable(ctx context.Context) error { return nil } -type RestoreFunc func(string) error +type RestoreGcFunc func(string) error -// KeepGcDisabled keeps GC disabled and return a function that used to gc enabled. +// DisableGc disables and returns a function that can enable gc back. // gc.ratio-threshold = "-1.0", which represents disable gc in TiKV. -func KeepGcDisabled(g glue.Glue, store kv.Storage) (RestoreFunc, string, error) { +func DisableGc(g glue.Glue, store kv.Storage) (RestoreGcFunc, string, error) { se, err := g.CreateSession(store) if err != nil { return nil, "", errors.Trace(err) @@ -582,7 +582,7 @@ func RunStreamStart( // locked means this is a stream task restart. Or create a new stream task. if locked { - logInfo, err := getLogRange(ctx, &cfg.Config) + logInfo, err := getLogInfo(ctx, &cfg.Config) if err != nil { return errors.Trace(err) } @@ -696,13 +696,13 @@ func RunStreamMetadata( ctx = opentracing.ContextWithSpan(ctx, span1) } - logInfo, err := getLogRange(ctx, &cfg.Config) + logInfo, err := getLogInfo(ctx, &cfg.Config) if err != nil { return errors.Trace(err) } - logMinDate := stream.FormatDate(oracle.GetTimeFromTS(logInfo.logMinTS)) - logMaxDate := stream.FormatDate(oracle.GetTimeFromTS(logInfo.logMaxTS)) + logMinDate := utils.FormatDate(oracle.GetTimeFromTS(logInfo.logMinTS)) + logMaxDate := utils.FormatDate(oracle.GetTimeFromTS(logInfo.logMaxTS)) summary.Log(cmdName, zap.Uint64("log-min-ts", logInfo.logMinTS), zap.String("log-min-date", logMinDate), zap.Uint64("log-max-ts", logInfo.logMaxTS), @@ -1185,7 +1185,8 @@ func checkIncompatibleChangefeed(ctx context.Context, backupTS uint64, etcdCLI * return nil } -// RunStreamRestore restores stream log. +// RunStreamRestore is the entry point to do PiTR restore. It can optionally start a full/snapshot restore followed +// by the log restore. func RunStreamRestore( c context.Context, mgr *conn.Mgr, @@ -1204,7 +1205,7 @@ func RunStreamRestore( if err != nil { return errors.Trace(err) } - logInfo, err := getLogRangeWithStorage(ctx, s) + logInfo, err := getLogInfoFromStorage(ctx, s) if err != nil { return errors.Trace(err) } @@ -1242,7 +1243,7 @@ func RunStreamRestore( return errors.Trace(err) } - checkInfo, err := checkPiTRTaskInfo(ctx, mgr, g, cfg) + taskInfo, err := generatePiTRTaskInfo(ctx, mgr, g, cfg) if err != nil { return errors.Trace(err) } @@ -1251,42 +1252,92 @@ func RunStreamRestore( failpoint.Return(errors.New("failpoint: failed before full restore")) }) - recorder := tiflashrec.New() - cfg.tiflashRecorder = recorder + cfg.tiflashRecorder = tiflashrec.New() + logClient, err := createLogClient(ctx, g, cfg, mgr) + if err != nil { + return errors.Trace(err) + } + defer logClient.Close(ctx) + + ddlFiles, err := logClient.LoadDDLFiles(ctx) + if err != nil { + return errors.Trace(err) + } + metaInfoProcessor := logclient.NewMetaKVInfoProcessor(logClient) + // only doesn't need to build if id map has been saved during log restore + buildIdMap := needToBuildIdMap(taskInfo.CheckpointInfo) + if buildIdMap { + // we restore additional tables at full snapshot phase when it is renamed into the filter range + // later in log backup. + // we also ignore the tables that currently in filter range but later renamed out of the filter. + log.Info("reading meta kv files to collect table renaming and id mapping information") + err = metaInfoProcessor.ReadMetaKVFilesAndBuildInfo(ctx, ddlFiles) + dbreplace := metaInfoProcessor.GetTableMappingManager().DBReplaceMap + for id, replace := range dbreplace { + log.Info("#### db replace", zap.Any("name", replace.Name), zap.Any("upid", id), zap.Any("downid", replace.DbID)) + for id, table := range replace.TableMap { + log.Info("#### table ", zap.Any("name", table.Name), zap.Any("upId", id), zap.Any("downId", table.TableID)) + } + } + + log.Info("got db replace size", zap.Int("size", len(metaInfoProcessor.GetTableMappingManager().DBReplaceMap))) + if err != nil { + return errors.Trace(err) + } + } + // restore full snapshot. - if checkInfo.NeedFullRestore { + if taskInfo.NeedFullRestore { logStorage := cfg.Config.Storage cfg.Config.Storage = cfg.FullBackupStorage + snapshotRestoreConfig := SnapshotRestoreConfig{ + RestoreConfig: cfg, + piTRTaskInfo: taskInfo, + logTableHistoryManager: metaInfoProcessor.GetTableHistoryManager(), + } // TiFlash replica is restored to down-stream on 'pitr' currently. - if err = runSnapshotRestore(ctx, mgr, g, FullRestoreCmd, cfg, checkInfo); err != nil { + if err = runSnapshotRestore(ctx, mgr, g, FullRestoreCmd, &snapshotRestoreConfig); err != nil { return errors.Trace(err) } cfg.Config.Storage = logStorage } else if len(cfg.FullBackupStorage) > 0 { - skipMsg := []byte(fmt.Sprintf("%s command is skipped due to checkpoint mode for restore\n", FullRestoreCmd)) - if _, err := glue.GetConsole(g).Out().Write(skipMsg); err != nil { + if err = WriteStringToConsole(g, fmt.Sprintf("%s is skipped due to checkpoint mode for restore\n", FullRestoreCmd)); err != nil { return errors.Trace(err) } - if checkInfo.CheckpointInfo != nil && checkInfo.CheckpointInfo.Metadata != nil && checkInfo.CheckpointInfo.Metadata.TiFlashItems != nil { + if taskInfo.hasTiFlashItemsInCheckpoint() { log.Info("load tiflash records of snapshot restore from checkpoint") - cfg.tiflashRecorder.Load(checkInfo.CheckpointInfo.Metadata.TiFlashItems) + cfg.tiflashRecorder.Load(taskInfo.CheckpointInfo.Metadata.TiFlashItems) } } // restore log. cfg.adjustRestoreConfigForStreamRestore() - if err := restoreStream(ctx, mgr, g, cfg, checkInfo.CheckpointInfo); err != nil { + logRestoreConfig := &LogRestoreConfig{ + RestoreConfig: cfg, + checkpointTaskInfo: taskInfo.CheckpointInfo, + tableMappingManager: metaInfoProcessor.GetTableMappingManager(), + logClient: logClient, + ddlFiles: ddlFiles, + } + if err := restoreStream(ctx, mgr, g, logRestoreConfig); err != nil { return errors.Trace(err) } return nil } -// RunStreamRestore start restore job +type LogRestoreConfig struct { + *RestoreConfig + checkpointTaskInfo *checkpoint.CheckpointTaskInfoForLogRestore + tableMappingManager *stream.TableMappingManager + logClient *logclient.LogClient + ddlFiles []logclient.Log +} + +// restoreStream starts the log restore func restoreStream( c context.Context, mgr *conn.Mgr, g glue.Glue, - cfg *RestoreConfig, - taskInfo *checkpoint.CheckpointTaskInfoForLogRestore, + cfg *LogRestoreConfig, ) (err error) { var ( totalKVCount uint64 @@ -1306,9 +1357,9 @@ func restoreStream( zap.Uint64("source-start-point", cfg.StartTS), zap.Uint64("source-end-point", cfg.RestoreTS), zap.Uint64("target-end-point", currentTS), - zap.String("source-start", stream.FormatDate(oracle.GetTimeFromTS(cfg.StartTS))), - zap.String("source-end", stream.FormatDate(oracle.GetTimeFromTS(cfg.RestoreTS))), - zap.String("target-end", stream.FormatDate(oracle.GetTimeFromTS(currentTS))), + zap.String("source-start", utils.FormatDate(oracle.GetTimeFromTS(cfg.StartTS))), + zap.String("source-end", utils.FormatDate(oracle.GetTimeFromTS(cfg.RestoreTS))), + zap.String("target-end", utils.FormatDate(oracle.GetTimeFromTS(currentTS))), zap.Uint64("total-kv-count", totalKVCount), zap.Uint64("skipped-kv-count-by-checkpoint", checkpointTotalKVCount), zap.String("total-size", units.HumanSize(float64(totalSize))), @@ -1330,38 +1381,31 @@ func restoreStream( ctx = opentracing.ContextWithSpan(ctx, span1) } - client, err := createRestoreClient(ctx, g, cfg, mgr) + client := cfg.logClient + ddlFiles := cfg.ddlFiles + tableMappingManager := cfg.tableMappingManager + + currentTS, err = getCurrentTSFromCheckpointOrPD(ctx, mgr, cfg) if err != nil { - return errors.Annotate(err, "failed to create restore client") + return errors.Trace(err) } - defer client.Close(ctx) - if taskInfo != nil && taskInfo.Metadata != nil { - // reuse the task's rewrite ts - log.Info("reuse the task's rewrite ts", zap.Uint64("rewrite-ts", taskInfo.Metadata.RewriteTS)) - currentTS = taskInfo.Metadata.RewriteTS - } else { - currentTS, err = restore.GetTSWithRetry(ctx, mgr.GetPDClient()) - if err != nil { - return errors.Trace(err) - } - } if err := client.SetCurrentTS(currentTS); err != nil { return errors.Trace(err) } importModeSwitcher := restore.NewImportModeSwitcher(mgr.GetPDClient(), cfg.Config.SwitchModeInterval, mgr.GetTLSConfig()) - restoreSchedulers, _, err := restore.RestorePreWork(ctx, mgr, importModeSwitcher, cfg.Online, false) + restoreSchedulersFunc, _, err := restore.RestorePreWork(ctx, mgr, importModeSwitcher, cfg.Online, false) + // Always run the post-work even on error, so we don't stuck in the import + // mode or emptied schedulers + defer restore.RestorePostWork(ctx, importModeSwitcher, restoreSchedulersFunc, cfg.Online) if err != nil { return errors.Trace(err) } - // Always run the post-work even on error, so we don't stuck in the import - // mode or emptied schedulers - defer restore.RestorePostWork(ctx, importModeSwitcher, restoreSchedulers, cfg.Online) // It need disable GC in TiKV when PiTR. // because the process of PITR is concurrent and kv events isn't sorted by tso. - restoreGc, oldRatio, err := KeepGcDisabled(g, mgr.GetStorage()) + restoreGcFunc, oldGcRatio, err := DisableGc(g, mgr.GetStorage()) if err != nil { return errors.Trace(err) } @@ -1373,89 +1417,42 @@ func restoreStream( return } - // If the oldRatio is negative, which is not normal status. + // If the oldGcRatio is negative, which is not normal status. // It should set default value "1.1" after PiTR finished. - if strings.HasPrefix(oldRatio, "-") { - log.Warn("the original gc-ratio is negative, reset by default value 1.1", zap.String("old-gc-ratio", oldRatio)) - oldRatio = utils.DefaultGcRatioVal + if strings.HasPrefix(oldGcRatio, "-") { + log.Warn("the original gc-ratio is negative, reset by default value 1.1", zap.String("old-gc-ratio", oldGcRatio)) + oldGcRatio = utils.DefaultGcRatioVal } - log.Info("start to restore gc", zap.String("ratio", oldRatio)) - if err := restoreGc(oldRatio); err != nil { - log.Error("failed to set gc enabled", zap.Error(err)) + log.Info("start to restore gc", zap.String("ratio", oldGcRatio)) + if err := restoreGcFunc(oldGcRatio); err != nil { + log.Error("failed to restore gc", zap.Error(err)) } log.Info("finish restoring gc") }() var sstCheckpointSets map[string]struct{} if cfg.UseCheckpoint { - oldRatioFromCheckpoint, err := client.InitCheckpointMetadataForLogRestore(ctx, cfg.StartTS, cfg.RestoreTS, oldRatio, cfg.tiflashRecorder) + gcRatioFromCheckpoint, err := client.LoadOrCreateCheckpointMetadataForLogRestore(ctx, cfg.StartTS, cfg.RestoreTS, oldGcRatio, cfg.tiflashRecorder) if err != nil { return errors.Trace(err) } - oldRatio = oldRatioFromCheckpoint + oldGcRatio = gcRatioFromCheckpoint sstCheckpointSets, err = client.InitCheckpointMetadataForCompactedSstRestore(ctx) if err != nil { return errors.Trace(err) } } - encryptionManager, err := encryption.NewManager(&cfg.LogBackupCipherInfo, &cfg.MasterKeyConfig) - if err != nil { - return errors.Annotate(err, "failed to create encryption manager for log restore") - } - defer encryptionManager.Close() - err = client.InstallLogFileManager(ctx, cfg.StartTS, cfg.RestoreTS, cfg.MetadataDownloadBatchSize, encryptionManager) - if err != nil { - return err - } - migs, err := client.GetMigrations(ctx) - if err != nil { - return errors.Trace(err) - } - client.BuildMigrations(migs) - // get full backup meta storage to generate rewrite rules. - fullBackupStorage, err := parseFullBackupTablesStorage(cfg) - if err != nil { + // build and save id map + if err := buildAndSaveIDMapIfNeeded(ctx, client, cfg, tableMappingManager); err != nil { return errors.Trace(err) } - // load the id maps only when the checkpoint mode is used and not the first execution - currentIdMapSaved := false - if taskInfo != nil && taskInfo.Progress == checkpoint.InLogRestoreAndIdMapPersist { - currentIdMapSaved = true - } - - ddlFiles, err := client.LoadDDLFilesAndCountDMLFiles(ctx) - if err != nil { - return err - } - - // get the schemas ID replace information. - // since targeted full backup storage, need to use the full backup cipher - tableMappingManager, err := client.BuildTableMappingManager(ctx, &logclient.BuildTableMappingManagerConfig{ - CurrentIdMapSaved: currentIdMapSaved, - TableFilter: cfg.TableFilter, - FullBackupStorage: fullBackupStorage, - CipherInfo: &cfg.Config.CipherInfo, - Files: ddlFiles, - }) + // build schema replace + schemasReplace, err := buildSchemaReplace(client, cfg, tableMappingManager) if err != nil { return errors.Trace(err) } - schemasReplace := stream.NewSchemasReplace(tableMappingManager.DbReplaceMap, cfg.tiflashRecorder, - client.CurrentTS(), cfg.TableFilter, client.RecordDeleteRange) - schemasReplace.AfterTableRewritten = func(deleted bool, tableInfo *model.TableInfo) { - // When the table replica changed to 0, the tiflash replica might be set to `nil`. - // We should remove the table if we meet. - if deleted || tableInfo.TiFlashReplica == nil { - cfg.tiflashRecorder.DelTable(tableInfo.ID) - return - } - cfg.tiflashRecorder.AddTable(tableInfo.ID, *tableInfo.TiFlashReplica) - // Remove the replica firstly. Let's restore them at the end. - tableInfo.TiFlashReplica = nil - } - updateStats := func(kvCount uint64, size uint64) { mu.Lock() defer mu.Unlock() @@ -1464,14 +1461,14 @@ func restoreStream( } pm := g.StartProgress(ctx, "Restore Meta Files", int64(len(ddlFiles)), !cfg.LogProgress) + var rp *logclient.RestoreMetaKVProcessor if err = withProgress(pm, func(p glue.Progress) error { - client.RunGCRowsLoader(ctx) - return client.RestoreAndRewriteMetaKVFiles(ctx, ddlFiles, schemasReplace, updateStats, p.Inc) + rp = logclient.NewRestoreMetaKVProcessor(client, schemasReplace, updateStats, p.Inc) + return rp.RestoreAndRewriteMetaKVFiles(ctx, ddlFiles) }); err != nil { return errors.Annotate(err, "failed to restore meta files") } - - rewriteRules := initRewriteRules(schemasReplace) + rewriteRules := buildRewriteRules(schemasReplace) ingestRecorder := schemasReplace.GetIngestRecorder() if err := rangeFilterFromIngestRecorder(ingestRecorder, rewriteRules); err != nil { @@ -1558,10 +1555,12 @@ func restoreStream( return errors.Annotate(err, "failed to clean up") } + // to delete range(table, schema) that's dropped during log backup if err = client.InsertGCRows(ctx); err != nil { return errors.Annotate(err, "failed to insert rows into gc_delete_range") } + // index ingestion is not captured by regular log backup, so we need to manually ingest again if err = client.RepairIngestIndex(ctx, ingestRecorder, g); err != nil { return errors.Annotate(err, "failed to repair ingest index") } @@ -1597,11 +1596,11 @@ func restoreStream( return nil } -func createRestoreClient(ctx context.Context, g glue.Glue, cfg *RestoreConfig, mgr *conn.Mgr) (*logclient.LogClient, error) { +func createLogClient(ctx context.Context, g glue.Glue, cfg *RestoreConfig, mgr *conn.Mgr) (*logclient.LogClient, error) { var err error keepaliveCfg := GetKeepalive(&cfg.Config) keepaliveCfg.PermitWithoutStream = true - client := logclient.NewRestoreClient(mgr.GetPDClient(), mgr.GetPDHTTPClient(), mgr.GetTLSConfig(), keepaliveCfg) + client := logclient.NewLogClient(mgr.GetPDClient(), mgr.GetPDHTTPClient(), mgr.GetTLSConfig(), keepaliveCfg) err = client.Init(ctx, g, mgr.GetStorage()) if err != nil { @@ -1625,15 +1624,12 @@ func createRestoreClient(ctx context.Context, g glue.Glue, cfg *RestoreConfig, m client.SetCrypter(&cfg.CipherInfo) client.SetUpstreamClusterID(cfg.upstreamClusterID) - createCheckpointSessionFn := func() (glue.Session, error) { - // always create a new session for checkpoint runner - // because session is not thread safe - if cfg.UseCheckpoint { - return g.CreateSession(mgr.GetStorage()) - } - return nil, nil + // always create a new session for checkpoint runner + // because session is not thread safe + createSessionFn := func() (glue.Session, error) { + return g.CreateSession(mgr.GetStorage()) } - err = client.InitClients(ctx, u, createCheckpointSessionFn, uint(cfg.Concurrency), cfg.ConcurrencyPerStore.Value) + err = client.InitClients(ctx, u, createSessionFn, cfg.UseCheckpoint, uint(cfg.Concurrency), cfg.ConcurrencyPerStore.Value) if err != nil { return nil, errors.Trace(err) } @@ -1643,6 +1639,20 @@ func createRestoreClient(ctx context.Context, g glue.Glue, cfg *RestoreConfig, m return nil, errors.Trace(err) } + encryptionManager, err := encryption.NewManager(&cfg.LogBackupCipherInfo, &cfg.MasterKeyConfig) + if err != nil { + return nil, errors.Annotate(err, "failed to create encryption manager for log restore") + } + if err = client.InstallLogFileManager(ctx, cfg.StartTS, cfg.RestoreTS, cfg.MetadataDownloadBatchSize, encryptionManager); err != nil { + return nil, errors.Trace(err) + } + + migs, err := client.GetMigrations(ctx) + if err != nil { + return nil, errors.Trace(err) + } + client.BuildMigrations(migs) + return client, nil } @@ -1704,8 +1714,8 @@ type backupLogInfo struct { clusterID uint64 } -// getLogRange gets the log-min-ts and log-max-ts of starting log backup. -func getLogRange( +// getLogInfo gets the log-min-ts and log-max-ts of starting log backup. +func getLogInfo( ctx context.Context, cfg *Config, ) (backupLogInfo, error) { @@ -1713,10 +1723,10 @@ func getLogRange( if err != nil { return backupLogInfo{}, errors.Trace(err) } - return getLogRangeWithStorage(ctx, s) + return getLogInfoFromStorage(ctx, s) } -func getLogRangeWithStorage( +func getLogInfoFromStorage( ctx context.Context, s storage.ExternalStorage, ) (backupLogInfo, error) { @@ -1823,20 +1833,11 @@ func parseFullBackupTablesStorage( }, nil } -func initRewriteRules(schemasReplace *stream.SchemasReplace) map[int64]*restoreutils.RewriteRules { +func buildRewriteRules(schemasReplace *stream.SchemasReplace) map[int64]*restoreutils.RewriteRules { rules := make(map[int64]*restoreutils.RewriteRules) - filter := schemasReplace.TableFilter - - for _, dbReplace := range schemasReplace.DbMap { - if utils.IsSysDB(dbReplace.Name) || !filter.MatchSchema(dbReplace.Name) { - continue - } + for _, dbReplace := range schemasReplace.DbReplaceMap { for oldTableID, tableReplace := range dbReplace.TableMap { - if !filter.MatchTable(dbReplace.Name, tableReplace.Name) { - continue - } - if _, exist := rules[oldTableID]; !exist { log.Info("add rewrite rule", zap.String("tableName", dbReplace.Name+"."+tableReplace.Name), @@ -1859,7 +1860,7 @@ func initRewriteRules(schemasReplace *stream.SchemasReplace) map[int64]*restoreu } // ShiftTS gets a smaller shiftTS than startTS. -// It has a safe duration between shiftTS and startTS for trasaction. +// It has a safe duration between shiftTS and startTS for transaction. func ShiftTS(startTS uint64) uint64 { physical := oracle.ExtractPhysical(startTS) logical := oracle.ExtractLogical(startTS) @@ -1885,7 +1886,11 @@ type PiTRTaskInfo struct { FullRestoreCheckErr error } -func checkPiTRTaskInfo( +func (p *PiTRTaskInfo) hasTiFlashItemsInCheckpoint() bool { + return p.CheckpointInfo != nil && p.CheckpointInfo.Metadata != nil && p.CheckpointInfo.Metadata.TiFlashItems != nil +} + +func generatePiTRTaskInfo( ctx context.Context, mgr *conn.Mgr, g glue.Glue, @@ -1908,7 +1913,7 @@ func checkPiTRTaskInfo( if err != nil { return checkInfo, errors.Trace(err) } - // the log restore checkpoint metadata is persist, so the PITR is in the log restore stage. + // the log restore checkpoint metadata is persisted, so the PITR is in the log restore stage. if curTaskInfo.Metadata != nil { // TODO: check whether user has manually modified the cluster(ddl). If so, regard the behavior // as restore from scratch. (update `curTaskInfo.RewriteTs` to 0 as an uninitial value) @@ -1970,3 +1975,90 @@ func waitUntilSchemaReload(ctx context.Context, client *logclient.LogClient) err log.Info("reloading schema finished", zap.Duration("timeTaken", time.Since(reloadStart))) return nil } + +func needToBuildIdMap(checkpointTaskInfo *checkpoint.CheckpointTaskInfoForLogRestore) bool { + newTask := true + if checkpointTaskInfo != nil && checkpointTaskInfo.Progress == checkpoint.InLogRestoreAndIdMapPersisted { + newTask = false + } + return newTask +} + +func buildSchemaReplace( + client *logclient.LogClient, + cfg *LogRestoreConfig, + tableMappingManager *stream.TableMappingManager) (*stream.SchemasReplace, error) { + + schemasReplace := stream.NewSchemasReplace(tableMappingManager.DBReplaceMap, cfg.tiflashRecorder, + client.CurrentTS(), client.RecordDeleteRange) + schemasReplace.AfterTableRewrittenFn = func(deleted bool, tableInfo *model.TableInfo) { + // When the table replica changed to 0, the tiflash replica might be set to `nil`. + // We should remove the table if we meet. + if deleted || tableInfo.TiFlashReplica == nil { + cfg.tiflashRecorder.DelTable(tableInfo.ID) + return + } + cfg.tiflashRecorder.AddTable(tableInfo.ID, *tableInfo.TiFlashReplica) + // Remove the replica first and restore them at the end. + tableInfo.TiFlashReplica = nil + } + return schemasReplace, nil +} + +func buildAndSaveIDMapIfNeeded(ctx context.Context, client *logclient.LogClient, cfg *LogRestoreConfig, + tableMappingManager *stream.TableMappingManager) error { + // get full backup meta storage if needed. + fullBackupStorage, err := parseFullBackupTablesStorage(cfg.RestoreConfig) + if err != nil { + return errors.Trace(err) + } + + // get the schemas ID replace information. + // since targeted full backup storage, need to use the full backup cipher + loadSavedIdMap := !needToBuildIdMap(cfg.checkpointTaskInfo) + dbReplaces, err := client.GetIDMapFromCheckpointOrFullBackup(ctx, &logclient.GetIDMapConfig{ + LoadSavedIDMap: loadSavedIdMap, + TableFilter: cfg.TableFilter, + PiTRTableFilter: cfg.PiTRTableFilter, + FullBackupStorage: fullBackupStorage, + CipherInfo: &cfg.Config.CipherInfo, + }) + if err != nil { + return errors.Trace(err) + } + + if loadSavedIdMap { + err := tableMappingManager.FromDBReplaceMap(dbReplaces) + if err != nil { + return errors.Trace(err) + } + stream.LogDBReplaceMap("######## load from saved id map", tableMappingManager.DBReplaceMap) + } else { + stream.LogDBReplaceMap("######## before merge", tableMappingManager.DBReplaceMap) + tableMappingManager.MergeBaseDBReplace(dbReplaces) + tableMappingManager.FilterDBMap(cfg.PiTRTableFilter) + err = tableMappingManager.ReplaceTemporaryIDs(ctx, client.GenGlobalIDs) + stream.LogDBReplaceMap("######## after filter", tableMappingManager.DBReplaceMap) + if err != nil { + return errors.Trace(err) + } + if err = client.SaveIdMapWithFailPoints(ctx, tableMappingManager); err != nil { + return errors.Trace(err) + } + } + return nil +} + +func getCurrentTSFromCheckpointOrPD(ctx context.Context, mgr *conn.Mgr, cfg *LogRestoreConfig) (uint64, error) { + if cfg.checkpointTaskInfo != nil && cfg.checkpointTaskInfo.Metadata != nil { + // reuse the checkpoint task's rewrite ts + rewriteTS := cfg.checkpointTaskInfo.Metadata.RewriteTS + log.Info("reuse the task's rewrite ts", zap.Uint64("rewrite-ts", rewriteTS)) + return rewriteTS, nil + } + currentTS, err := restore.GetTSWithRetry(ctx, mgr.GetPDClient()) + if err != nil { + return 0, errors.Trace(err) + } + return currentTS, nil +} diff --git a/br/pkg/task/stream_test.go b/br/pkg/task/stream_test.go index 847699bc152cd..bdf2fdf41aa80 100644 --- a/br/pkg/task/stream_test.go +++ b/br/pkg/task/stream_test.go @@ -192,7 +192,7 @@ func TestGetLogRangeWithFullBackupDir(t *testing.T) { cfg := Config{ Storage: testDir, } - _, err = getLogRange(context.TODO(), &cfg) + _, err = getLogInfo(context.TODO(), &cfg) require.Error(t, err, errors.Annotate(berrors.ErrStorageUnknown, "the storage has been used for full backup")) } @@ -215,7 +215,7 @@ func TestGetLogRangeWithLogBackupDir(t *testing.T) { cfg := Config{ Storage: testDir, } - logInfo, err := getLogRange(context.TODO(), &cfg) + logInfo, err := getLogInfo(context.TODO(), &cfg) require.Nil(t, err) require.Equal(t, logInfo.logMinTS, startLogBackupTS) } diff --git a/br/pkg/utils/BUILD.bazel b/br/pkg/utils/BUILD.bazel index 424329b7b3134..32eec7b9243b3 100644 --- a/br/pkg/utils/BUILD.bazel +++ b/br/pkg/utils/BUILD.bazel @@ -9,6 +9,7 @@ go_library( "dyn_pprof_unix.go", "encryption.go", "error_handling.go", + "filter.go", "json.go", "key.go", "misc.go", @@ -36,7 +37,9 @@ go_library( "//pkg/parser/terror", "//pkg/parser/types", "//pkg/sessionctx", + "//pkg/tablecodec", "//pkg/util", + "//pkg/util/codec", "//pkg/util/encrypt", "//pkg/util/logutil", "//pkg/util/sqlexec", @@ -86,7 +89,7 @@ go_test( ], embed = [":utils"], flaky = True, - shard_count = 34, + shard_count = 36, deps = [ "//br/pkg/errors", "//pkg/kv", @@ -104,6 +107,7 @@ go_test( "@com_github_pingcap_kvproto//pkg/brpb", "@com_github_pingcap_kvproto//pkg/errorpb", "@com_github_stretchr_testify//require", + "@com_github_tikv_client_go_v2//oracle", "@com_github_tikv_client_go_v2//tikv", "@com_github_tikv_pd_client//:client", "@io_etcd_go_etcd_tests_v3//integration", diff --git a/br/pkg/utils/consts/BUILD.bazel b/br/pkg/utils/consts/BUILD.bazel new file mode 100644 index 0000000000000..1c9766fe93e1a --- /dev/null +++ b/br/pkg/utils/consts/BUILD.bazel @@ -0,0 +1,8 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library") + +go_library( + name = "consts", + srcs = ["consts.go"], + importpath = "github.com/pingcap/tidb/br/pkg/utils/consts", + visibility = ["//visibility:public"], +) diff --git a/br/pkg/utils/consts/consts.go b/br/pkg/utils/consts/consts.go new file mode 100644 index 0000000000000..93390bcbacc38 --- /dev/null +++ b/br/pkg/utils/consts/consts.go @@ -0,0 +1,21 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package consts + +// Default columnFamily and write columnFamily +const ( + DefaultCF = "default" + WriteCF = "write" +) diff --git a/br/pkg/utils/filter.go b/br/pkg/utils/filter.go new file mode 100644 index 0000000000000..da5170ef82971 --- /dev/null +++ b/br/pkg/utils/filter.go @@ -0,0 +1,76 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package utils + +type PiTRTableFilter struct { + DbIdToTable map[int64]map[int64]struct{} +} + +func NewPiTRTableFilter() *PiTRTableFilter { + return &PiTRTableFilter{ + DbIdToTable: make(map[int64]map[int64]struct{}), + } +} + +// UpdateTable adds a table ID to the filter for the given database ID +func (f *PiTRTableFilter) UpdateTable(dbID, tableID int64) { + if f.DbIdToTable == nil { + f.DbIdToTable = make(map[int64]map[int64]struct{}) + } + + if _, ok := f.DbIdToTable[dbID]; !ok { + f.DbIdToTable[dbID] = make(map[int64]struct{}) + } + + f.DbIdToTable[dbID][tableID] = struct{}{} +} + +// UpdateDB adds the database id +func (f *PiTRTableFilter) UpdateDB(dbID int64) { + if f.DbIdToTable == nil { + f.DbIdToTable = make(map[int64]map[int64]struct{}) + } + + if _, ok := f.DbIdToTable[dbID]; !ok { + f.DbIdToTable[dbID] = make(map[int64]struct{}) + } +} + +// Remove removes a table ID from the filter for the given database ID. +// Returns true if the table was found and removed, false otherwise. +func (f *PiTRTableFilter) Remove(dbID, tableID int64) bool { + if tables, ok := f.DbIdToTable[dbID]; ok { + if _, exists := tables[tableID]; exists { + delete(tables, tableID) + return true + } + } + return false +} + +// ContainsTable checks if the given database ID and table ID combination exists in the filter +func (f *PiTRTableFilter) ContainsTable(dbID, tableID int64) bool { + if tables, ok := f.DbIdToTable[dbID]; ok { + _, exists := tables[tableID] + return exists + } + return false +} + +// ContainsDB checks if the given database ID exists in the filter +func (f *PiTRTableFilter) ContainsDB(dbID int64) bool { + _, ok := f.DbIdToTable[dbID] + return ok +} diff --git a/br/pkg/utils/key.go b/br/pkg/utils/key.go index d8371a023fca7..4eceaf9ea2162 100644 --- a/br/pkg/utils/key.go +++ b/br/pkg/utils/key.go @@ -8,12 +8,15 @@ import ( "fmt" "io" "strings" + "time" "github.com/pingcap/errors" "github.com/pingcap/log" berrors "github.com/pingcap/tidb/br/pkg/errors" "github.com/pingcap/tidb/br/pkg/logutil" "github.com/pingcap/tidb/pkg/kv" + "github.com/pingcap/tidb/pkg/tablecodec" + "github.com/pingcap/tidb/pkg/util/codec" "go.uber.org/zap" ) @@ -190,3 +193,27 @@ func IntersectAll(s1 []kv.KeyRange, s2 []kv.KeyRange) []kv.KeyRange { } return rs } + +const DateFormat = "2006-01-02 15:04:05.999999999 -0700" + +func FormatDate(ts time.Time) string { + return ts.Format(DateFormat) +} + +func IsMetaDBKey(key []byte) bool { + return strings.HasPrefix(string(key), "mDB") +} + +func IsMetaDDLJobHistoryKey(key []byte) bool { + return strings.HasPrefix(string(key), "mDDLJobH") +} + +func IsDBOrDDLJobHistoryKey(key []byte) bool { + return strings.HasPrefix(string(key), "mD") +} + +func EncodeTxnMetaKey(key []byte, field []byte, ts uint64) []byte { + k := tablecodec.EncodeMetaKey(key, field) + txnKey := codec.EncodeBytes(nil, k) + return codec.EncodeUintDesc(txnKey, ts) +} diff --git a/br/pkg/utils/key_test.go b/br/pkg/utils/key_test.go index 28e24055443de..86dec2c546609 100644 --- a/br/pkg/utils/key_test.go +++ b/br/pkg/utils/key_test.go @@ -7,9 +7,11 @@ import ( "fmt" "slices" "testing" + "time" "github.com/pingcap/tidb/pkg/kv" "github.com/stretchr/testify/require" + "github.com/tikv/client-go/v2/oracle" ) func TestParseKey(t *testing.T) { @@ -177,3 +179,40 @@ func TestClampKeyRanges(t *testing.T) { }) } } + +func TestDateFormat(t *testing.T) { + cases := []struct { + ts uint64 + target string + }{ + { + 434604259287760897, + "2022-07-15 19:14:39.534 +0800", + }, + { + 434605479096221697, + "2022-07-15 20:32:12.734 +0800", + }, + { + 434605478903808000, + "2022-07-15 20:32:12 +0800", + }, + } + + timeZone, _ := time.LoadLocation("Asia/Shanghai") + for _, ca := range cases { + date := FormatDate(oracle.GetTimeFromTS(ca.ts).In(timeZone)) + require.Equal(t, ca.target, date) + } +} + +func TestPrefix(t *testing.T) { + require.True(t, IsMetaDBKey([]byte("mDBs"))) + require.False(t, IsMetaDBKey([]byte("mDDL"))) + require.True(t, IsMetaDDLJobHistoryKey([]byte("mDDLJobHistory"))) + require.False(t, IsMetaDDLJobHistoryKey([]byte("mDDL"))) + require.True(t, IsDBOrDDLJobHistoryKey([]byte("mDL"))) + require.True(t, IsDBOrDDLJobHistoryKey([]byte("mDB:"))) + require.True(t, IsDBOrDDLJobHistoryKey([]byte("mDDLHistory"))) + require.False(t, IsDBOrDDLJobHistoryKey([]byte("DDL"))) +} diff --git a/br/pkg/utils/schema.go b/br/pkg/utils/schema.go index 47ea86dcc9370..04da73817b239 100644 --- a/br/pkg/utils/schema.go +++ b/br/pkg/utils/schema.go @@ -47,12 +47,12 @@ func TemporaryDBName(db string) pmodel.CIStr { return pmodel.NewCIStr(temporaryDBNamePrefix + db) } -// GetSysDBName get the original name of system DB -func GetSysDBName(tempDB pmodel.CIStr) (string, bool) { - if ok := strings.HasPrefix(tempDB.O, temporaryDBNamePrefix); !ok { - return tempDB.O, false +// StripTempTableNamePrefixIfNeeded get the original name of system DB +func StripTempTableNamePrefixIfNeeded(tempDB string) (string, bool) { + if ok := strings.HasPrefix(tempDB, temporaryDBNamePrefix); !ok { + return tempDB, false } - return tempDB.O[len(temporaryDBNamePrefix):], true + return tempDB[len(temporaryDBNamePrefix):], true } // GetSysDBCIStrName get the CIStr name of system DB diff --git a/br/tests/br_pitr_table_filter/run.sh b/br/tests/br_pitr_table_filter/run.sh new file mode 100755 index 0000000000000..98784f9d563ca --- /dev/null +++ b/br/tests/br_pitr_table_filter/run.sh @@ -0,0 +1,448 @@ +#!/bin/sh +# +# Copyright 2024 PingCAP, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -eux +DB="$TEST_NAME" +CUR=$(cd `dirname $0`; pwd) +TASK_NAME="pitr_table_filter" +. run_services + +# helper methods +create_tables_with_values() { + local prefix=$1 # table name prefix + local count=$2 # number of tables to create + + for i in $(seq 1 $count); do + run_sql "create table $DB.${prefix}_${i}(c int); insert into $DB.${prefix}_${i} values ($i);" + done +} + +verify_tables() { + local prefix=$1 # table name prefix + local count=$2 # number of tables to verify + local should_exist=$3 # true/false - whether tables should exist + + for i in $(seq 1 $count); do + if [ "$should_exist" = "true" ]; then + run_sql "select count(*) = 1 from $DB.${prefix}_${i} where c = $i" || { + echo "Table $DB.${prefix}_${i} doesn't have expected value $i" + exit 1 + } + else + if run_sql "select * from $DB.${prefix}_${i}" 2>/dev/null; then + echo "Table $DB.${prefix}_${i} exists but should not" + exit 1 + fi + fi + done +} + +rename_tables() { + local old_prefix=$1 # original table name prefix + local new_prefix=$2 # new table name prefix + local count=$3 # number of tables to rename + + for i in $(seq 1 $count); do + run_sql "rename table $DB.${old_prefix}_${i} to $DB.${new_prefix}_${i};" + done +} + +drop_tables() { + local prefix=$1 # table name prefix + local count=$2 # number of tables to drop + + for i in $(seq 1 $count); do + run_sql "drop table $DB.${prefix}_${i};" + done +} + +test_basic_filter() { + restart_services || { echo "Failed to restart services"; exit 1; } + + echo "start basic filter testing" + run_br --pd $PD_ADDR log start --task-name $TASK_NAME -s "local://$TEST_DIR/$TASK_NAME/log" + + run_sql "create schema $DB;" + + echo "write initial data and do snapshot backup" + create_tables_with_values "full_backup" 3 + create_tables_with_values "table_to_drop" 3 + + run_br backup full -f "$DB.*" -s "local://$TEST_DIR/$TASK_NAME/full" --pd $PD_ADDR + + echo "write more data and wait for log backup to catch up" + create_tables_with_values "log_backup_lower" 3 + create_tables_with_values "LOG_BACKUP_UPPER" 3 + create_tables_with_values "other" 3 + drop_tables "table_to_drop" 3 + + . "$CUR/../br_test_utils.sh" && wait_log_checkpoint_advance "$TASK_NAME" + + # restart services to clean up the cluster + restart_services || { echo "Failed to restart services"; exit 1; } + + echo "case 1 sanity check, zero filter" + run_br --pd "$PD_ADDR" restore point -s "local://$TEST_DIR/$TASK_NAME/log" --full-backup-storage "local://$TEST_DIR/$TASK_NAME/full" + + verify_tables "log_backup_lower" 3 true + verify_tables "LOG_BACKUP_UPPER" 3 true + verify_tables "full_backup" 3 true + verify_tables "other" 3 true + verify_tables "table_to_drop" 3 false + + echo "case 2 with log backup table filter" + run_sql "drop schema $DB;" + run_br --pd "$PD_ADDR" restore point -s "local://$TEST_DIR/$TASK_NAME/log" --full-backup-storage "local://$TEST_DIR/$TASK_NAME/full" -f "$DB.log*" + + verify_tables "log_backup_lower" 3 true + verify_tables "LOG_BACKUP_UPPER" 3 true + verify_tables "full_backup" 3 false + verify_tables "other" 3 false + verify_tables "table_to_drop" 3 false + + echo "case 3 with multiple filters" + run_sql "drop schema $DB;" + run_br --pd "$PD_ADDR" restore point -s "local://$TEST_DIR/$TASK_NAME/log" --full-backup-storage "local://$TEST_DIR/$TASK_NAME/full" -f "$DB.log*" -f "$DB.full*" + + verify_tables "log_backup_lower" 3 true + verify_tables "LOG_BACKUP_UPPER" 3 true + verify_tables "full_backup" 3 true + verify_tables "other" 3 false + verify_tables "table_to_drop" 3 false + + echo "case 4 with negative filters" + run_sql "drop schema $DB;" + # have to use a match all filter before using negative filters + run_br --pd "$PD_ADDR" restore point -s "local://$TEST_DIR/$TASK_NAME/log" --full-backup-storage "local://$TEST_DIR/$TASK_NAME/full" -f "*.*" -f "!$DB.log*" + + verify_tables "log_backup_lower" 3 false + verify_tables "LOG_BACKUP_UPPER" 3 false + verify_tables "full_backup" 3 true + verify_tables "other" 3 true + verify_tables "table_to_drop" 3 false + + echo "case 5 restore dropped table" + run_sql "drop schema $DB;" + run_br --pd "$PD_ADDR" restore point -s "local://$TEST_DIR/$TASK_NAME/log" --full-backup-storage "local://$TEST_DIR/$TASK_NAME/full" -f "$DB.table*" + + verify_tables "log_backup_lower" 3 false + verify_tables "LOG_BACKUP_UPPER" 3 false + verify_tables "full_backup" 3 false + verify_tables "other" 3 false + verify_tables "table_to_drop" 3 false + + # cleanup + rm -rf "$TEST_DIR/$TASK_NAME" + + echo "basic filter test cases passed" +} + +test_table_rename() { + restart_services || { echo "Failed to restart services"; exit 1; } + + echo "start table rename with filter testing" + run_br --pd $PD_ADDR log start --task-name $TASK_NAME -s "local://$TEST_DIR/$TASK_NAME/log" + + run_sql "create schema $DB;" + + echo "write initial data and do snapshot backup" + create_tables_with_values "full_backup" 3 + create_tables_with_values "renamed_in" 3 + create_tables_with_values "log_renamed_out" 3 + + run_br backup full -f "$DB.*" -s "local://$TEST_DIR/$TASK_NAME/full" --pd $PD_ADDR + + echo "write more data and wait for log backup to catch up" + create_tables_with_values "log_backup" 3 + rename_tables "renamed_in" "log_backup_renamed_in" 3 + rename_tables "log_renamed_out" "renamed_out" 3 + + . "$CUR/../br_test_utils.sh" && wait_log_checkpoint_advance "$TASK_NAME" + + # restart services to clean up the cluster + restart_services || { echo "Failed to restart services"; exit 1; } + + run_br --pd "$PD_ADDR" restore point -s "local://$TEST_DIR/$TASK_NAME/log" --full-backup-storage "local://$TEST_DIR/$TASK_NAME/full" -f "$DB.log*" + + verify_tables "log_backup" 3 true + verify_tables "log_backup_renamed_in" 3 true + + verify_tables "full_backup" 3 false + # has been renamed, should not visible anymore + verify_tables "renamed_in" 3 false + # also renamed out of filter range, should not be visible for both + verify_tables "renamed_out" 3 false + verify_tables "log_renamed_out" 3 false + + # cleanup + rm -rf "$TEST_DIR/$TASK_NAME" + + echo "table rename with filter passed" +} + +test_with_checkpoint() { + restart_services || { echo "Failed to restart services"; exit 1; } + + echo "start table filter with checkpoint" + run_br --pd $PD_ADDR log start --task-name $TASK_NAME -s "local://$TEST_DIR/$TASK_NAME/log" + + run_sql "create schema $DB;" + + echo "write initial data and do snapshot backup" + create_tables_with_values "full_backup" 3 + create_tables_with_values "renamed_in" 3 + create_tables_with_values "log_renamed_out" 3 + + run_br backup full -f "$DB.*" -s "local://$TEST_DIR/$TASK_NAME/full" --pd $PD_ADDR + + echo "write more data and wait for log backup to catch up" + create_tables_with_values "log_backup" 3 + rename_tables "renamed_in" "log_backup_renamed_in" 3 + rename_tables "log_renamed_out" "renamed_out" 3 + + . "$CUR/../br_test_utils.sh" && wait_log_checkpoint_advance "$TASK_NAME" + + # restart services to clean up the cluster + restart_services || { echo "Failed to restart services"; exit 1; } + + # Using single quotes to prevent shell interpretation + export GO_FAILPOINTS='github.com/pingcap/tidb/br/pkg/restore/snap_client/corrupt-files=return("corrupt-last-table-files")' + restore_fail=0 + run_br --pd $PD_ADDR restore point --full-backup-storage "local://$TEST_DIR/$TASK_NAME/full" -s "local://$TEST_DIR/$TASK_NAME/log" -f "$DB.log*" || restore_fail=1 + export GO_FAILPOINTS="" + if [ $restore_fail -ne 1 ]; then + echo 'expecting full backup last table corruption but success' + exit 1 + fi + + # PITR with checkpoint but failed in the log restore metakv stage + export GO_FAILPOINTS='github.com/pingcap/tidb/br/pkg/restore/snap_client/corrupt-files=return("only-last-table-files");github.com/pingcap/tidb/br/pkg/restore/log_client/failed-after-id-maps-saved=return(true)' + restore_fail=0 + run_br --pd $PD_ADDR restore point --full-backup-storage "local://$TEST_DIR/$TASK_NAME/full" -s "local://$TEST_DIR/$TASK_NAME/log" -f "$DB.log*" || restore_fail=1 + export GO_FAILPOINTS="" + if [ $restore_fail -ne 1 ]; then + echo 'expecting failed after id map saved but success' + exit 1 + fi + + # PITR with checkpoint but failed in the log restore datakv stage + # skip the snapshot restore stage + export GO_FAILPOINTS='github.com/pingcap/tidb/br/pkg/task/corrupt-files=return("corrupt-last-table-files")' + restore_fail=0 + run_br --pd $PD_ADDR restore point --full-backup-storage "local://$TEST_DIR/$TASK_NAME/full" -s "local://$TEST_DIR/$TASK_NAME/log" -f "$DB.log*" || restore_fail=1 + export GO_FAILPOINTS="" + if [ $restore_fail -ne 1 ]; then + echo 'expecting log restore last table corruption but success' + exit 1 + fi + + # PITR with checkpoint + export GO_FAILPOINTS='github.com/pingcap/tidb/br/pkg/task/corrupt-files=return("only-last-table-files")' + run_br --pd $PD_ADDR restore point --full-backup-storage "local://$TEST_DIR/$TASK_NAME/full" -s "local://$TEST_DIR/$TASK_NAME/log" -f "$DB.log*" + export GO_FAILPOINTS="" + + verify_tables "log_backup" 3 true + verify_tables "log_backup_renamed_in" 3 true + + verify_tables "full_backup" 3 false + # has been renamed, should not visible anymore + verify_tables "renamed_in" 3 false + # also renamed out of filter range, should not be visible for both + verify_tables "renamed_out" 3 false + verify_tables "log_renamed_out" 3 false + + # cleanup + rm -rf "$TEST_DIR/$TASK_NAME" + + echo "table filter checkpoint passed" +} + +test_exchange_partition() { + restart_services || { echo "Failed to restart services"; exit 1; } + + echo "start testing exchange partition with filter" + run_br --pd $PD_ADDR log start --task-name $TASK_NAME -s "local://$TEST_DIR/$TASK_NAME/log" + + run_sql "create schema $DB;" + + # create a partitioned table and a normal table for exchange + run_sql "CREATE TABLE $DB.full_partitioned ( + id INT, + value INT + ) PARTITION BY RANGE (id) ( + PARTITION p0 VALUES LESS THAN (100), + PARTITION p1 VALUES LESS THAN (200) + );" + + run_sql "CREATE TABLE $DB.log_table ( + id INT, + value INT + );" + + run_sql "INSERT INTO $DB.full_partitioned VALUES (50, 1), (150, 2);" + run_sql "INSERT INTO $DB.log_table VALUES (75, 3);" + + run_br backup full -f "$DB.*" -s "local://$TEST_DIR/$TASK_NAME/full" --pd $PD_ADDR + + # exchange partition and create some new tables with log_ prefix + run_sql "ALTER TABLE $DB.full_partitioned EXCHANGE PARTITION p0 WITH TABLE $DB.log_table;" + # some sanity check + run_sql "CREATE TABLE $DB.log_after_exchange (id INT, value INT);" + run_sql "INSERT INTO $DB.log_after_exchange VALUES (1, 1);" + + . "$CUR/../br_test_utils.sh" && wait_log_checkpoint_advance "$TASK_NAME" + + restart_services || { echo "Failed to restart services"; exit 1; } + + run_br --pd "$PD_ADDR" restore point -s "local://$TEST_DIR/$TASK_NAME/log" --full-backup-storage "local://$TEST_DIR/$TASK_NAME/full" -f "$DB.log*" + + # verify the results + run_sql "SELECT count(*) = 1 FROM $DB.log_after_exchange WHERE id = 1 AND value = 1" || { + echo "Table log_after_exchange doesn't have expected values" + exit 1 + } + + # table should be exchanged and kept after restore + run_sql "SELECT count(*) = 1 FROM $DB.log_table WHERE id = 50 AND value = 1" || { + echo "log_table doesn't have the exchanged partition data (50,1)" + exit 1 + } + + if run_sql "SELECT * FROM $DB.full_partitioned" 2>/dev/null; then + echo "Table full_partitioned exists but should not" + exit 1 + fi + + # cleanup + rm -rf "$TEST_DIR/$TASK_NAME" + + echo "exchange partition with filter test passed" +} + +test_random_operations() { + restart_services || { echo "Failed to restart services"; exit 1; } + + echo "start random operations filter testing" + run_br --pd $PD_ADDR log start --task-name $TASK_NAME -s "local://$TEST_DIR/$TASK_NAME/log" + + run_sql "create schema $DB;" + + # Define our constants + NUM_TABLES=10 + PREFIXES=("log_table" "full_table" "test_table") + + create_tables_with_values "log_table" $NUM_TABLES + create_tables_with_values "full_table" $NUM_TABLES + create_tables_with_values "test_table" $NUM_TABLES + + # Take full backup + run_br backup full -f "$DB.*" -s "local://$TEST_DIR/$TASK_NAME/full" --pd $PD_ADDR + + # Perform random operations + echo "Performing random operations..." + for i in $(seq 1 $((NUM_TABLES * 2))); do + operation=$((RANDOM % 3)) # 0: rename, 1: drop, 2: create new + table_idx=$((RANDOM % NUM_TABLES + 1)) + prefix=${PREFIXES[$((RANDOM % 3))]} + + case $operation in + 0) # Rename + old_prefix=${PREFIXES[$((RANDOM % 3))]} + new_prefix=${PREFIXES[$((RANDOM % 3))]} + if run_sql "SELECT COUNT(*) FROM information_schema.tables WHERE table_schema='$DB' AND table_name='${old_prefix}_${table_idx}'" | grep -q "COUNT(\*): 1"; then + run_sql "RENAME TABLE $DB.${old_prefix}_${table_idx} TO $DB.${new_prefix}_${table_idx}_renamed;" + echo "Renamed ${old_prefix}_${table_idx} to ${new_prefix}_${table_idx}_renamed" + fi + ;; + 1) # Drop + if run_sql "SELECT COUNT(*) FROM information_schema.tables WHERE table_schema='$DB' AND table_name='${prefix}_${table_idx}'" | grep -q "COUNT(\*): 1"; then + run_sql "DROP TABLE $DB.${prefix}_${table_idx};" + echo "Dropped ${prefix}_${table_idx}" + fi + ;; + 2) # Create new + if ! run_sql "SELECT COUNT(*) FROM information_schema.tables WHERE table_schema='$DB' AND table_name='${prefix}_new_${table_idx}'" | grep -q "COUNT(\*): 1"; then + run_sql "CREATE TABLE $DB.${prefix}_new_${table_idx} ( + id INT PRIMARY KEY, + value INT + );" + num_rows=$((RANDOM % 10 + 1)) + for j in $(seq 1 $num_rows); do + value=$((RANDOM % 1000)) + run_sql "INSERT INTO $DB.${prefix}_new_${table_idx} (id, value) VALUES ($j, $value);" + done + echo "Created new table ${prefix}_new_${table_idx}" + fi + ;; + esac + done + + . "$CUR/../br_test_utils.sh" && wait_log_checkpoint_advance "$TASK_NAME" + + # Save checksums before restore + echo "Saving checksums of log_ prefixed tables..." + original_checksums="" + for table in $(run_sql "SHOW TABLES FROM $DB" | grep "^log_"); do + checksum=$(run_sql "CHECKSUM TABLE $DB.$table") + original_checksums="$original_checksums $table:$checksum" + echo $original_checksums + done + + # Restart and restore with filter + restart_services || { echo "Failed to restart services"; exit 1; } + + echo "Restoring with log_ filter..." + run_br --pd "$PD_ADDR" restore point -s "local://$TEST_DIR/$TASK_NAME/log" --full-backup-storage "local://$TEST_DIR/$TASK_NAME/full" -f "$DB.log*" + + # Verify checksums after restore + echo "Verifying checksums..." + for pair in $original_checksums; do + table=${pair%%:*} + original_checksum=${pair#*:} + new_checksum=$(run_sql "CHECKSUM TABLE $DB.$table") + echo $new_checksum + if [ "$original_checksum" != "$new_checksum" ]; then + echo "Checksum mismatch for table $table" + echo "Original: $original_checksum" + echo "After restore: $new_checksum" + exit 1 + fi + done + + # Verify non-log tables don't exist + tables=$(run_sql "SELECT table_name FROM information_schema.tables WHERE table_schema='$DB'" | grep "table_name:" | awk '{print $2}') + for table in $tables; do + if ! echo "$table" | grep -q "^log_"; then + echo "Non-log table $table exists but should not" + exit 1 + fi + done + + # cleanup + rm -rf "$TEST_DIR/$TASK_NAME" + + echo "Random operations test passed" +} + + +# run all test cases +test_basic_filter +test_table_rename +test_with_checkpoint +test_exchange_partition +test_random_operations + +echo "br pitr table filter all tests passed" diff --git a/br/tests/run_group_br_tests.sh b/br/tests/run_group_br_tests.sh index 0c9518f69fb97..eb5b4fa0bbe6a 100755 --- a/br/tests/run_group_br_tests.sh +++ b/br/tests/run_group_br_tests.sh @@ -26,7 +26,7 @@ groups=( ["G03"]='br_incompatible_tidb_config br_incremental br_incremental_index br_incremental_only_ddl br_incremental_same_table br_insert_after_restore br_key_locked br_log_test br_move_backup br_mv_index' ["G04"]='br_range br_replica_read br_restore_TDE_enable br_restore_log_task_enable br_s3 br_shuffle_leader br_shuffle_region br_single_table ' ["G05"]='br_skip_checksum br_split_region_fail br_systables br_table_filter br_txn br_stats br_clustered_index br_crypter br_partition_add_index' - ["G06"]='br_tikv_outage br_tikv_outage3 br_restore_checkpoint br_encryption' + ["G06"]='br_tikv_outage br_tikv_outage3 br_restore_checkpoint br_encryption br_pitr_table_filter' ["G07"]='br_pitr' ["G08"]='br_tikv_outage2 br_ttl br_views_and_sequences br_z_gc_safepoint br_autorandom br_file_corruption br_tiflash_conflict' ) diff --git a/br/tests/utils.go b/br/tests/utils.go index e8653aaaabb43..2033c712a756c 100644 --- a/br/tests/utils.go +++ b/br/tests/utils.go @@ -75,41 +75,42 @@ func runValidateBackupFiles(cmd *cobra.Command, args []string) { // as full backup will have backup files ready in the storage path after returning from the command // and log backup will not, so we can only use restore point to validate. func parseCommand(cmd string) (string, bool) { - // Create a temporary cobra command to parse the input - tempCmd := &cobra.Command{} - tempCmd.Flags().String("s", "", "Storage path (short)") - tempCmd.Flags().String("storage", "", "Storage path (long)") - - // Split the command string into args + // not using cobra since it has to define all the possible flags otherwise will report parsing error args := strings.Fields(cmd) - // Parse the args - if err := tempCmd.Flags().Parse(args); err != nil { - return "", false - } - - // Check for backup or restore point command + // check for backup or restore point command hasBackupOrRestorePoint := false - for i, arg := range args { + storagePath := "" + + for i := 0; i < len(args); i++ { + arg := args[i] if arg == "backup" { hasBackupOrRestorePoint = true - break + continue } if i < len(args)-1 && arg == "restore" && args[i+1] == "point" { hasBackupOrRestorePoint = true - break + continue } - } - // Get the storage path from either -s or -storage flag - storagePath, _ := tempCmd.Flags().GetString("s") - if storagePath == "" { - storagePath, _ = tempCmd.Flags().GetString("storage") + // check for storage path in various formats + if arg == "-s" || arg == "--storage" { + if i+1 < len(args) { + storagePath = args[i+1] + i++ // skip the next arg since we consumed it + } + } else if strings.HasPrefix(arg, "--storage=") { + storagePath = strings.TrimPrefix(arg, "--storage=") + } else if strings.HasPrefix(arg, "-s=") { + storagePath = strings.TrimPrefix(arg, "-s=") + } } - storagePath = strings.TrimPrefix(storagePath, "local://") - if hasBackupOrRestorePoint && storagePath != "" { - return storagePath, true + if strings.HasPrefix(storagePath, "local://") { + storagePath = strings.TrimPrefix(storagePath, "local://") + if hasBackupOrRestorePoint && storagePath != "" { + return storagePath, true + } } return "", false } diff --git a/pkg/table/tables/bench_test.go b/pkg/table/tables/bench_test.go index c389a947778c5..db0b96e831e0d 100644 --- a/pkg/table/tables/bench_test.go +++ b/pkg/table/tables/bench_test.go @@ -197,7 +197,7 @@ func BenchmarkUpdateRecordInPipelinedDML(b *testing.B) { b.StartTimer() for j := 0; j < batchSize; j++ { - // Update record + // UpdateTable record handle := kv.IntHandle(j) err := tb.UpdateRecord(se.GetTableCtx(), txn, handle, records[j], newData[j], touched, table.WithCtx(context.TODO())) if err != nil { diff --git a/tests/_utils/run_services b/tests/_utils/run_services index 617821cfc7577..8b9d3f89f9b08 100644 --- a/tests/_utils/run_services +++ b/tests/_utils/run_services @@ -261,7 +261,7 @@ start_tiflash() { i=0 while ! run_curl "https://$TIFLASH_HTTP/tiflash/store-status" 1>/dev/null 2>&1; do i=$((i+1)) - if [ "$i" -gt 20 ]; then + if [ "$i" -gt 1 ]; then echo "failed to start tiflash" return 1 fi