From 33d63ac202113a601898ff4d1bad35c499bcfd05 Mon Sep 17 00:00:00 2001 From: Molly Gao <31704180+mgao0@users.noreply.github.com> Date: Mon, 24 May 2021 16:35:11 -0700 Subject: [PATCH] Integrate spot restoration tool with CLI restore command (#52) This commit integrates the SpotRestorationTool to RestoreFromBackupTool: when user triggers a restore command using ZK CLI, if a znode path is specific, it will run spot restoration on top of an offline restoration of backup files, the spot restoration will use the restored backup files. --- .../apache/zookeeper/cli/RestoreCommand.java | 84 ++++++-- .../server/backup/RestoreFromBackupTool.java | 189 +++++++++++++----- .../server/backup/SpotRestorationTool.java | 17 +- .../server/backup/RestorationToolTest.java | 93 +++++++-- 4 files changed, 302 insertions(+), 81 deletions(-) diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/cli/RestoreCommand.java b/zookeeper-server/src/main/java/org/apache/zookeeper/cli/RestoreCommand.java index 7d4acdd453a..71aa042a290 100644 --- a/zookeeper-server/src/main/java/org/apache/zookeeper/cli/RestoreCommand.java +++ b/zookeeper-server/src/main/java/org/apache/zookeeper/cli/RestoreCommand.java @@ -44,7 +44,10 @@ public class RestoreCommand extends CliCommand { + OptionFullCommand.LOG_DESTINATION + "] [" + OptionFullCommand.TIMETABLE_STORAGE_PATH + "](needed if restore to a timestamp) [" + OptionFullCommand.LOCAL_RESTORE_TEMP_DIR_PATH + "](optional) [" + OptionFullCommand.DRY_RUN + "](optional) [" - + OptionFullCommand.OVERWRITE + "](optional)"; + + OptionFullCommand.OVERWRITE + "](optional)\n Options for spot restoration: \n[" + + OptionFullCommand.ZNODE_PATH_TO_RESTORE + "] [" + + OptionFullCommand.ZK_SERVER_CONNECTION_STRING + "] [" + + OptionFullCommand.RECURSIVE_SPOT_RESTORE + "](optional)"; public final class OptionLongForm { /* Required if no restore timestamp is specified */ @@ -66,6 +69,14 @@ public final class OptionLongForm { /* Optional. Default value false */ public static final String OVERWRITE = "overwrite"; + //For spot restoration + /* Required */ + public static final String ZNODE_PATH_TO_RESTORE = "znode_path_to_restore"; + /* Required */ + public static final String ZK_SERVER_CONNECTION_STRING = "zk_server_connection_string"; + /* Optional. Default value false */ + public static final String RECURSIVE_SPOT_RESTORE = "recursive_spot_restore"; + // Create a private constructor so it can't be instantiated private OptionLongForm() { } @@ -83,6 +94,11 @@ public final class OptionShortForm { public static final String HELP = "h"; public static final String OVERWRITE = "f"; + //For spot restoration + public static final String ZNODE_PATH_TO_RESTORE = "p"; + public static final String ZK_SERVER_CONNECTION_STRING = "c"; + public static final String RECURSIVE_SPOT_RESTORE = "a"; + // Create a private constructor so it can't be instantiated private OptionShortForm() { } @@ -107,6 +123,15 @@ public final class OptionFullCommand { public static final String DRY_RUN = "-" + OptionShortForm.DRY_RUN; public static final String OVERWRITE = "-" + OptionShortForm.OVERWRITE; + //For spot restoration + public static final String ZNODE_PATH_TO_RESTORE = + "-" + OptionShortForm.ZNODE_PATH_TO_RESTORE + " " + OptionLongForm.ZNODE_PATH_TO_RESTORE; + public static final String ZK_SERVER_CONNECTION_STRING = + "-" + OptionShortForm.ZK_SERVER_CONNECTION_STRING + " " + + OptionLongForm.ZK_SERVER_CONNECTION_STRING; + public static final String RECURSIVE_SPOT_RESTORE = + "-" + OptionShortForm.RECURSIVE_SPOT_RESTORE; + // Create a private constructor so it can't be instantiated private OptionFullCommand() { } @@ -127,6 +152,14 @@ private OptionFullCommand() { OptionLongForm.LOCAL_RESTORE_TEMP_DIR_PATH)); options.addOption(new Option(OptionShortForm.DRY_RUN, false, OptionLongForm.DRY_RUN)); options.addOption(new Option(OptionShortForm.OVERWRITE, false, OptionLongForm.OVERWRITE)); + + //For spot restoration + options.addOption(new Option(OptionShortForm.ZNODE_PATH_TO_RESTORE, true, + OptionLongForm.ZNODE_PATH_TO_RESTORE)); + options.addOption(new Option(OptionShortForm.ZK_SERVER_CONNECTION_STRING, true, + OptionLongForm.ZK_SERVER_CONNECTION_STRING)); + options.addOption(new Option(OptionShortForm.RECURSIVE_SPOT_RESTORE, false, + OptionLongForm.RECURSIVE_SPOT_RESTORE)); } public RestoreCommand() { @@ -136,26 +169,46 @@ public RestoreCommand() { @Override public String getUsageStr() { - return "Usage: RestoreFromBackupTool " + RESTORE_CMD_STR + " " + OPTION_STR + "\n " + return "Usage: RestoreFromBackupTool " + RESTORE_CMD_STR + " " + OPTION_STR + + "\n Options for both offline restoration and spot restoration:\n " + OptionFullCommand.RESTORE_ZXID - + ": the point to restore to, either the string 'latest' or a zxid in hex format. Choose one between this option or " - + OptionFullCommand.RESTORE_TIMESTAMP - + ", if both are specified, this option will be prioritized\n " + + ": the point to restore to, either the string 'latest' or a zxid in hex format. " + + "Choose one between this option or " + OptionFullCommand.RESTORE_TIMESTAMP + + ", if both are specified, this option will be prioritized. " + + "Required for both offline restoration and spot restoration.\n " + OptionFullCommand.RESTORE_TIMESTAMP + ": the point to restore to, a timestamp in long format. Choose one between this option or " - + OptionFullCommand.RESTORE_ZXID + ".\n " + OptionFullCommand.BACKUP_STORE - + ": the connection information for the backup store\n For GPFS the format is: gpfs:::\n " - + OptionFullCommand.SNAP_DESTINATION - + ": local destination path for restored snapshots\n " - + OptionFullCommand.LOG_DESTINATION + ": local destination path for restored txlogs\n " - + OptionFullCommand.TIMETABLE_STORAGE_PATH - + ": Needed if restore to a timestamp. Backup storage path for timetable files, for GPFS the format is: gpfs:::, if not set, default to be same as backup storage path\n " + + OptionFullCommand.RESTORE_ZXID + + ". Required for both offline restoration and spot restoration.\n " + + OptionFullCommand.BACKUP_STORE + + ": the connection information for the backup store\n " + + "For GPFS the format is: gpfs:::\n " + + "Required for both offline restoration and spot restoration.\n " + + OptionFullCommand.SNAP_DESTINATION + ": local destination path for restored snapshots. " + + "Required for offline restoration.\n " + OptionFullCommand.LOG_DESTINATION + + ": local destination path for restored txlogs. " + + "Required for offline restoration.\n " + OptionFullCommand.TIMETABLE_STORAGE_PATH + + ": Needed if restore to a timestamp. Backup storage path for timetable files. " + + "For GPFS the format is: gpfs:::. " + + "If not set, default to be same as backup storage path\n " + OptionFullCommand.LOCAL_RESTORE_TEMP_DIR_PATH - + ": Optional, local path for creating a temporary intermediate directory for restoration, the directory will be deleted after restoration is done\n " + + ": Required for spot restoration, and optional for offline restoration. " + + "The restore tool will use this local path to stage temporary files needed for restoration work, " + + "this directory will be deleted after restoration is done\n " + OptionFullCommand.DRY_RUN + " " + OptionLongForm.DRY_RUN + ": Optional, no files will be actually copied in a dry run\n " + OptionFullCommand.OVERWRITE + " " + OptionLongForm.OVERWRITE - + ": Optional, default false. If true, the destination directories will be overwritten\n"; + + ": Optional, default false. If true, all existing files will be wiped out and the directories " + + "be populated with restored files\n " + "Options for spot restoration only:\n " + + OptionFullCommand.ZNODE_PATH_TO_RESTORE + + ": The znode path to restore in the zk server\n " + + OptionFullCommand.ZK_SERVER_CONNECTION_STRING + + ": The connection string used to establish a client to server connection " + + "in order to do spot restoration on zk server. " + + "The format of this string should be host:port, " + "for example: 127.0.0.1:3000\n " + + OptionFullCommand.RECURSIVE_SPOT_RESTORE + + ": Optional, default false. If false, the spot restoration will be done on one single node only; " + + "if true, it will be done recursively on all of its descendants as well"; } @Override @@ -168,8 +221,7 @@ public CliCommand parse(String[] cmdArgs) throws CliParseException { } if ((!cl.hasOption(OptionShortForm.RESTORE_ZXID) && !cl .hasOption(OptionShortForm.RESTORE_TIMESTAMP)) || !cl - .hasOption(OptionShortForm.BACKUP_STORE) || !cl.hasOption(OptionShortForm.SNAP_DESTINATION) - || !cl.hasOption(OptionShortForm.LOG_DESTINATION)) { + .hasOption(OptionShortForm.BACKUP_STORE)) { throw new CliParseException("Missing required argument(s).\n" + getUsageStr()); } return this; diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/backup/RestoreFromBackupTool.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/backup/RestoreFromBackupTool.java index cc9f021c30a..0edd97c7d4f 100644 --- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/backup/RestoreFromBackupTool.java +++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/backup/RestoreFromBackupTool.java @@ -29,9 +29,11 @@ import java.util.Map; import java.util.Objects; +import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import com.google.common.collect.Range; import org.apache.commons.cli.CommandLine; +import org.apache.zookeeper.ZooKeeper; import org.apache.zookeeper.cli.RestoreCommand; import org.apache.zookeeper.common.ConfigException; import org.apache.zookeeper.server.backup.BackupUtil.BackupFileType; @@ -57,6 +59,7 @@ public class RestoreFromBackupTool { private static final int MAX_RETRIES = 10; private static final String HEX_PREFIX = "0x"; + private static final int CONNECTION_TIMEOUT = 300000; BackupStorageProvider storage; FileTxnSnapLog snapLog; @@ -65,6 +68,14 @@ public class RestoreFromBackupTool { File restoreTempDir; boolean overwrite = false; + // Spot restoration + boolean isSpotRestoration = false; + String znodePathToRestore; + String zkServerConnectionStr; + boolean restoreRecursively = false; + ZooKeeper zk; + SpotRestorationTool spotRestorationTool; + List logs; List snaps; List filesToCopy; @@ -138,7 +149,10 @@ public void parseArgs(CommandLine cl) { parseRestoreTimestamp(cl, backupStoragePath); } - parseRestoreDestination(cl); + parseAndValidateSpotRestorationArgs(cl); + + parseAndValidateOfflineRestoreDestination(cl); + parseRestoreTempDir(cl); // Check if overwriting the destination directories is allowed @@ -180,16 +194,19 @@ private void createBackupStorageProvider(String backupStoragePath) { } catch (IllegalArgumentException e) { System.err.println("Could not find a valid backup storage option based on the input: " + userProvidedStorageName + ". Error message: " + e.getMessage()); + e.printStackTrace(); System.exit(1); } catch (ConfigException e) { System.err.println( "Could not generate a backup config based on the input, error message: " + e .getMessage()); + e.getStackTrace(); System.exit(1); } catch (InstantiationException | InvocationTargetException | NoSuchMethodException | IllegalAccessException | ClassNotFoundException e) { System.err.println( "Could not generate a backup storage provider based on the input, error message: " + e .getMessage()); + e.printStackTrace(); System.exit(1); } } @@ -250,16 +267,30 @@ private void parseRestoreTimestamp(CommandLine cl, String backupStoragePath) { System.err.println( "Could not find a valid zxid from timetable using the timestamp provided: " + timestampStr + ". The error message is: " + e.getMessage()); + e.printStackTrace(); System.exit(2); } } - private void parseRestoreDestination(CommandLine cl) { + private void parseAndValidateOfflineRestoreDestination(CommandLine cl) { + if (isSpotRestoration) { + return; + } // Read restore destination: dataDir and logDir try { - File snapDir = new File(cl.getOptionValue(RestoreCommand.OptionShortForm.SNAP_DESTINATION)); - File logDir = new File(cl.getOptionValue(RestoreCommand.OptionShortForm.LOG_DESTINATION)); + String snapDirPath = cl.getOptionValue(RestoreCommand.OptionShortForm.SNAP_DESTINATION); + String logDirPath = cl.getOptionValue(RestoreCommand.OptionShortForm.LOG_DESTINATION); + + if (snapDirPath == null || logDirPath == null) { + throw new BackupException( + "Snap destination path and log destination path are not defined for offline restoration. SnapDirPath: " + + snapDirPath + ", logDirPath: " + logDirPath); + } + + File snapDir = new File(snapDirPath); + File logDir = new File(logDirPath); snapLog = new FileTxnSnapLog(logDir, snapDir); + checkSnapDataDirFileExistence(); } catch (IOException ioe) { System.err.println("Could not setup transaction log utility." + ioe); System.exit(3); @@ -274,8 +305,38 @@ private void parseRestoreTempDir(CommandLine cl) { } if (restoreTempDir == null) { - // Default address for restore temp dir if not set. It will be deleted after the restoration is done. - this.restoreTempDir = new File(snapLog.getDataDir(), "RestoreTempDir_" + zxidToRestore); + if (isSpotRestoration) { + throw new BackupException( + "Local restore temp dir path is not defined for spot restoration."); + } else { + // This is an offline restoration + // If the user hasn't provided the restore temp dir parameter, + //then the tool will just create a temporary folder inside snapLog and delete it afterwards. + this.restoreTempDir = new File(snapLog.getDataDir(), "RestoreTempDir_" + zxidToRestore); + } + } + } + + private void parseAndValidateSpotRestorationArgs(CommandLine cl) { + if (cl.hasOption(RestoreCommand.OptionShortForm.ZNODE_PATH_TO_RESTORE)) { + znodePathToRestore = cl.getOptionValue(RestoreCommand.OptionShortForm.ZNODE_PATH_TO_RESTORE); + } + if (cl.hasOption(RestoreCommand.OptionShortForm.ZK_SERVER_CONNECTION_STRING)) { + zkServerConnectionStr = + cl.getOptionValue(RestoreCommand.OptionShortForm.ZK_SERVER_CONNECTION_STRING); + } + if (cl.hasOption(RestoreCommand.OptionShortForm.RECURSIVE_SPOT_RESTORE)) { + restoreRecursively = true; + } + if (znodePathToRestore != null && zkServerConnectionStr != null) { + isSpotRestoration = true; + } else if (znodePathToRestore == null && zkServerConnectionStr == null) { + isSpotRestoration = false; + } else { + throw new BackupException( + "Znode path and zk server connection string must be provided in order to do spot restoration. Provided znode path: " + + znodePathToRestore + ", provided zk server connection string: " + + zkServerConnectionStr); } } @@ -300,17 +361,20 @@ public boolean runWithRetries(CommandLine cl) { System.err.println( "Restore attempt failed, could not find all the required backup files to restore. " + "Error message: " + re.getMessage()); + re.printStackTrace(); return false; } catch (BackupException be) { System.err.println( - "Restoration attempt failed due to a backup exception, it's usually caused by required" - + "directories not exist or failure of creating directories, etc. Please check the message. " + "Restoration attempt failed due to a backup exception, it's usually caused by required " + + "directories not existing or failure of creating directories, etc. Please check the message. " + "Error message: " + be.getMessage()); + be.printStackTrace(); return false; } catch (Exception e) { tries++; System.err.println("Restore attempt failed; attempting again. " + tries + "/" + MAX_RETRIES + ". Error message: " + e.getMessage()); + e.printStackTrace(); } } @@ -321,54 +385,23 @@ public boolean runWithRetries(CommandLine cl) { /** * Attempts to perform a restore. */ - public void run() throws IOException { + public void run() throws IOException, InterruptedException { try { if (!findFilesToRestore()) { throw new IllegalArgumentException("Failed to find valid snapshot and logs to restore."); } - if (snapLog == null || restoreTempDir == null || storage == null) { - throw new BackupException( - "The FileTxnSnapLog, RestoreTempDir and BackupStorageProvider cannot be null."); + if (restoreTempDir == null || storage == null) { + throw new BackupException("The RestoreTempDir and BackupStorageProvider cannot be null."); } - File dataDir = snapLog.getDataDir(); - File snapDir = snapLog.getSnapDir(); - if (!dataDir.exists() && !dataDir.mkdirs()) { - throw new BackupException("Failed to create a data directory at path: " + dataDir.getPath() - + " to store restored txn logs."); - } - if (!snapDir.exists() && !snapDir.mkdirs()) { - throw new BackupException("Failed to create a snap directory at path: " + snapDir.getPath() - + " to store restored snapshot files."); - } - String[] dataDirFiles = dataDir.list(); - String[] snapDirFiles = snapDir.list(); - if (Objects.requireNonNull(dataDirFiles).length > 0 - || Objects.requireNonNull(snapDirFiles).length > 0) { - if (overwrite) { - LOG.warn( - "Overwriting the destination directories for restoration. The files under dataDir: " - + dataDir.getPath() + " are: " + Arrays.toString(dataDirFiles) - + "; and files under snapDir: " + snapDir.getPath() + " are: " + Arrays - .toString(snapDirFiles) + "."); - Arrays.stream(Objects.requireNonNull(dataDir.listFiles())).forEach(File::delete); - Arrays.stream(Objects.requireNonNull(snapDir.listFiles())).forEach(File::delete); - } else { - throw new BackupException( - "The destination directories are not empty, user chose not to overwrite existing files, exiting restoration. " - + "Please check the destination directory dataDir path: " + dataDir.getPath() - + ", and snapDir path" + snapDir.getPath()); - } + if (!restoreTempDir.exists() && !restoreTempDir.mkdirs()) { + throw new BackupException( + "Failed to create a temporary directory at path: " + restoreTempDir.getPath() + + " to store copied backup files."); } if (!dryRun) { - if (!restoreTempDir.exists() && !restoreTempDir.mkdirs()) { - throw new BackupException( - "Failed to create a temporary directory at path: " + restoreTempDir.getPath() - + " to store copied backup files."); - } - // This step will create a "version-2" directory inside restoreTempDir, // all the selected backup files will be copied to version-2 directory FileTxnSnapLog restoreTempSnapLog = @@ -376,7 +409,12 @@ public void run() throws IOException { copyBackupFilesToLocalTempDir(restoreTempSnapLog); processCopiedBackupFiles(restoreTempSnapLog, zxidToRestore); - copyProcessedRestoredFilesToDestination(restoreTempSnapLog); + if (isSpotRestoration) { + performSpotRestoration(restoreTempDir); + } else { + // It is an offline restoration + copyProcessedRestoredFilesToDestination(restoreTempSnapLog); + } } } finally { if (restoreTempDir != null && restoreTempDir.exists()) { @@ -546,6 +584,43 @@ private boolean validateLogRange() { return true; } + /** + * Check if the specified snap dir and data dir already have files inside. + * If so, ask user to confirm if they want to overwrite these two directories with restored files, + * which means to wipe out all existing files and the directories be populated with restored files. + */ + private void checkSnapDataDirFileExistence() { + File dataDir = snapLog.getDataDir(); + File snapDir = snapLog.getSnapDir(); + if (!dataDir.exists() && !dataDir.mkdirs()) { + throw new BackupException("Failed to create a data directory at path: " + dataDir.getPath() + + " to store restored txn logs."); + } + if (!snapDir.exists() && !snapDir.mkdirs()) { + throw new BackupException("Failed to create a snap directory at path: " + snapDir.getPath() + + " to store restored snapshot files."); + } + String[] dataDirFiles = dataDir.list(); + String[] snapDirFiles = snapDir.list(); + if (Objects.requireNonNull(dataDirFiles).length > 0 + || Objects.requireNonNull(snapDirFiles).length > 0) { + if (overwrite) { + LOG.warn( + "Overwriting the destination directories for restoration, deleting all existing files. " + + "The files under dataDir: " + dataDir.getPath() + " are: " + Arrays + .toString(dataDirFiles) + "; and files under snapDir: " + snapDir.getPath() + + " are: " + Arrays.toString(snapDirFiles) + "."); + Arrays.stream(Objects.requireNonNull(dataDir.listFiles())).forEach(File::delete); + Arrays.stream(Objects.requireNonNull(snapDir.listFiles())).forEach(File::delete); + } else { + throw new BackupException( + "The destination directories are not empty, user chose not to overwrite the entire directory, " + + "exiting restoration. Please check the destination directory dataDir path: " + + dataDir.getPath() + ", and snapDir path" + snapDir.getPath()); + } + } + } + /** * Copy selected backup files from backup storage to a local restore temporary directory for further processing later * @param restoreTempSnapLog A FileTxnSnapLog instance created on the specified local temporary directory path @@ -617,5 +692,25 @@ private void copyProcessedRestoredFilesToDestination(FileTxnSnapLog restoreTempS + finalDestinationBase.getPath() + "."); Files.copy(processedFile.toPath(), new File(finalDestinationBase, fileName).toPath()); } + LOG.info( + "All files were successfully copied to destination directory. Offline restoration was completed."); + } + + /** + * If the CLI command has specified a znode path to perform spot restoration, + * run the spot restoration tool on that path + * @throws IOException + */ + @VisibleForTesting + protected void performSpotRestoration(File restoreTempDir) + throws IOException, InterruptedException { + LOG.info("Starting spot restoration for zk path " + znodePathToRestore); + zk = new ZooKeeper(zkServerConnectionStr, CONNECTION_TIMEOUT, (event) -> { + LOG.info("WATCHER:: client-server connection event received for spot restoration: " + event + .toString()); + }); + spotRestorationTool = + new SpotRestorationTool(restoreTempDir, zk, znodePathToRestore, restoreRecursively); + spotRestorationTool.run(); } } diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/backup/SpotRestorationTool.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/backup/SpotRestorationTool.java index 72b227c583c..6eb73e9c01b 100644 --- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/backup/SpotRestorationTool.java +++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/backup/SpotRestorationTool.java @@ -67,7 +67,7 @@ public SpotRestorationTool(File dataDir, ZooKeeper zk, String targetZNodePath, * manually delete the node after this spot restoration run and run spot restoration on the node path again * @throws IOException */ - public void run() throws IOException { + public void run() throws IOException, InterruptedException { LOG.info( "Starting spot restoration for znode path " + targetZNodePath + ", using data provided in " + snapLog.getDataDir().getPath()); @@ -97,6 +97,8 @@ public void run() throws IOException { "Spot restoration aborted. No change was made to the ZNode path: ." + targetZNodePath; if (getUserConfirmation(requestMsg, yesMsg, noMsg)) { recursiveRestore(zk, dataTree, targetZNodePath, true); + zk.close(); + LOG.info("Spot restoration for " + targetZNodePath + " was successfully done."); printExitMessages(); } } @@ -112,7 +114,7 @@ public void run() throws IOException { * false if the node will be skipped */ private void recursiveRestore(ZooKeeper zk, DataTree dataTree, String path, - boolean shouldOverwrite) { + boolean shouldOverwrite) throws InterruptedException { if (!singleNodeRestore(zk, dataTree, path, shouldOverwrite)) { // This node is skipped, there's no need to traverse its child nodes return; @@ -157,7 +159,7 @@ private void recursiveRestore(ZooKeeper zk, DataTree dataTree, String path, * @return True if the node is successfully restored, false if the node is skipped */ private boolean singleNodeRestore(ZooKeeper zk, DataTree dataTree, String path, - boolean shouldOverwrite) { + boolean shouldOverwrite) throws InterruptedException { DataNode node = dataTree.getNode(path); try { if (zk.exists(path, false) == null) { @@ -186,7 +188,8 @@ private boolean singleNodeRestore(ZooKeeper zk, DataTree dataTree, String path, } @VisibleForTesting - protected boolean getUserConfirmation(String requestMsg, String yesMsg, String noMsg) { + protected boolean getUserConfirmation(String requestMsg, String yesMsg, String noMsg) + throws InterruptedException { Scanner scanner = new Scanner(System.in); int cnt = 3; while (cnt > 0) { @@ -208,11 +211,13 @@ protected boolean getUserConfirmation(String requestMsg, String yesMsg, String n System.err.println("Could not recognize user's input for the request: " + requestMsg + ". Exiting spot restoration..."); printExitMessages(); + zk.close(); System.exit(1); return false; } - private void skipNodeOrStopRestoration(String errorNodePath, Exception exception) { + private void skipNodeOrStopRestoration(String errorNodePath, Exception exception) + throws InterruptedException { String errorMsg = exception.toString() + "\n"; String requestMsg = errorMsg + "Do you want to continue the spot restoration? Enter \"yes\" to skip this node " @@ -223,6 +228,7 @@ private void skipNodeOrStopRestoration(String errorNodePath, Exception exception String noMsg = "Spot restoration is stopped. Reason: " + errorMsg; if (!getUserConfirmation(requestMsg, yesMsg, noMsg)) { printExitMessages(); + zk.close(); System.exit(1); } else { messages.add(errorNodePath + ": " + errorMsg); @@ -233,7 +239,6 @@ private void skipNodeOrStopRestoration(String errorNodePath, Exception exception * Print out all the messages about skipped nodes during restoration */ private void printExitMessages() { - LOG.info("Spot restoration for " + targetZNodePath + " was successfully done."); if (!messages.isEmpty()) { LOG.warn("During the spot restoration, the following nodes were skipped."); messages.forEach(System.err::println); diff --git a/zookeeper-server/src/test/java/org/apache/zookeeper/server/backup/RestorationToolTest.java b/zookeeper-server/src/test/java/org/apache/zookeeper/server/backup/RestorationToolTest.java index 2bdf1eac8c6..b462b3686d8 100644 --- a/zookeeper-server/src/test/java/org/apache/zookeeper/server/backup/RestorationToolTest.java +++ b/zookeeper-server/src/test/java/org/apache/zookeeper/server/backup/RestorationToolTest.java @@ -182,7 +182,7 @@ public void teardown() throws Exception { } @Test - public void testSuccessfulRestorationToZxid() throws IOException { + public void testSuccessfulRestorationToZxid() throws IOException, InterruptedException { for (int i = 0; i < 5; i++) { int restoreZxid = random.nextInt(txnCnt); File restoreTempDir = ClientBase.createTmpDir(); @@ -207,7 +207,7 @@ public void testSuccessfulRestorationToZxid() throws IOException { } @Test - public void testSuccessfulRestorationToLatest() throws IOException { + public void testSuccessfulRestorationToLatest() throws IOException, InterruptedException { RestoreFromBackupTool restoreTool = new RestoreFromBackupTool(backupStorage, restoreSnapLog, Long.MAX_VALUE, false, restoreTempDir); @@ -409,8 +409,8 @@ public void testSpotRestorationTool() throws IOException, InterruptedException, // Expected: messages printed at the end indicate the node is skipped // Create several znodes in original zk server whose data will be backed up for testing - connection - .create("/testsr", "restoredTarget".getBytes(), ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); + connection.create("/testsr", "restoredTarget".getBytes(), ZooDefs.Ids.OPEN_ACL_UNSAFE, + CreateMode.PERSISTENT); connection.create("/testsr/restore", "restore".getBytes(), ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); connection.create("/testsr/restore/node0", "restore0".getBytes(), ZooDefs.Ids.OPEN_ACL_UNSAFE, @@ -438,13 +438,12 @@ public void testSpotRestorationTool() throws IOException, InterruptedException, connection = new ZooKeeper(HOSTPORT, CONNECTION_TIMEOUT, DummyWatcher.INSTANCE); // Create several znodes in the running zk server - connection.create("/testsr", "target".getBytes(), ZooDefs.Ids.OPEN_ACL_UNSAFE, - CreateMode.PERSISTENT); + connection + .create("/testsr", "target".getBytes(), ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); connection.create("/testsr/new", "new".getBytes(), ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); - connection - .create("/testsr/new/node0", "new0".getBytes(), ZooDefs.Ids.OPEN_ACL_UNSAFE, - CreateMode.PERSISTENT); + connection.create("/testsr/new/node0", "new0".getBytes(), ZooDefs.Ids.OPEN_ACL_UNSAFE, + CreateMode.PERSISTENT); connection.create("/testsr/existing", "existingVal".getBytes(), ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); @@ -471,6 +470,8 @@ public void testSpotRestorationTool() throws IOException, InterruptedException, new MockSpotRestorationTool(restoreDir, connection, targetZnodePath, true); tool.run(); + connection = new ZooKeeper(HOSTPORT, CONNECTION_TIMEOUT, DummyWatcher.INSTANCE); + // The target node's value should be updated Assert.assertArrayEquals("restoredTarget".getBytes(), connection.getData("/testsr", false, new Stat())); @@ -492,7 +493,6 @@ public void testSpotRestorationTool() throws IOException, InterruptedException, LOG.info("Please examine the messages printed out. " + "There should be messages for skipping nodes: \"/testsr/new\" and \"/testsr/existing\"."); - //TEST 2. // Test target node's parent nodes do not exist connection.close(); @@ -511,10 +511,11 @@ public void testSpotRestorationTool() throws IOException, InterruptedException, connection = new ZooKeeper(HOSTPORT, CONNECTION_TIMEOUT, DummyWatcher.INSTANCE); targetZnodePath = "/testsr/restore/node0"; - tool = - new MockSpotRestorationTool(restoreDir, connection, targetZnodePath, true); + tool = new MockSpotRestorationTool(restoreDir, connection, targetZnodePath, true); tool.run(); + connection = new ZooKeeper(HOSTPORT, CONNECTION_TIMEOUT, DummyWatcher.INSTANCE); + Assert.assertArrayEquals("restoredTarget".getBytes(), connection.getData("/testsr", false, new Stat())); Assert.assertArrayEquals("restore".getBytes(), @@ -523,6 +524,59 @@ public void testSpotRestorationTool() throws IOException, InterruptedException, connection.getData("/testsr/restore/node0", false, new Stat())); } + @Test + public void testSpotRestorationByCommandLine() throws IOException, InterruptedException { + //Test restoration CLI using a timestamp recorded in the midpoint of the test ZNode creation + backupManager.getTimetableBackup().run(1); + + // Close the original zk server and zk client; + //start a new server as the server to be restored + connection.close(); + zks.shutdown(); + LOG.info("ZK server is shut down."); + + dataDir = ClientBase.createTmpDir(); + LOG.info("Starting a new zk server."); + zks = new ZooKeeperServer(dataDir, dataDir, 3000); + SyncRequestProcessor.setSnapCount(100); + serverCnxnFactory.startup(zks); + + LOG.info("Waiting for server startup"); + Assert.assertTrue("waiting for server being up", + ClientBase.waitForServerUp(HOSTPORT, CONNECTION_TIMEOUT)); + + // Mock CLI command + CommandLine cl = Mockito.mock(CommandLine.class); + when(cl.hasOption(RestoreCommand.OptionShortForm.RESTORE_ZXID)).thenReturn(false); + when(cl.hasOption(RestoreCommand.OptionShortForm.RESTORE_TIMESTAMP)).thenReturn(true); + when(cl.hasOption(RestoreCommand.OptionShortForm.BACKUP_STORE)).thenReturn(true); + when(cl.hasOption(RestoreCommand.OptionShortForm.SNAP_DESTINATION)).thenReturn(false); + when(cl.hasOption(RestoreCommand.OptionShortForm.LOG_DESTINATION)).thenReturn(false); + when(cl.hasOption(RestoreCommand.OptionShortForm.LOCAL_RESTORE_TEMP_DIR_PATH)).thenReturn(true); + when(cl.hasOption(RestoreCommand.OptionShortForm.TIMETABLE_STORAGE_PATH)).thenReturn(true); + when(cl.hasOption(RestoreCommand.OptionShortForm.ZNODE_PATH_TO_RESTORE)).thenReturn(true); + when(cl.hasOption(RestoreCommand.OptionShortForm.ZK_SERVER_CONNECTION_STRING)).thenReturn(true); + when(cl.getOptionValue(RestoreCommand.OptionShortForm.RESTORE_TIMESTAMP)) + .thenReturn(String.valueOf(timestampInMiddle)); + when(cl.getOptionValue(RestoreCommand.OptionShortForm.BACKUP_STORE)) + .thenReturn("gpfs::" + backupDir.getPath() + ":" + TEST_NAMESPACE); + when(cl.getOptionValue(RestoreCommand.OptionShortForm.LOCAL_RESTORE_TEMP_DIR_PATH)) + .thenReturn(restoreTempDir.getPath()); + when(cl.getOptionValue(RestoreCommand.OptionShortForm.TIMETABLE_STORAGE_PATH)) + .thenReturn(timetableDir.getPath() + "/" + TEST_NAMESPACE); + // Restore the first node created, so we are sure this node exists at the moment of timestamp provided + String nodePath = "/node1"; + when(cl.getOptionValue(RestoreCommand.OptionShortForm.ZNODE_PATH_TO_RESTORE)) + .thenReturn(nodePath); + when(cl.getOptionValue(RestoreCommand.OptionShortForm.ZK_SERVER_CONNECTION_STRING)) + .thenReturn(HOSTPORT); + + // Run restoration + RestoreFromBackupTool restoreTool = new MockRestoreFromBackupTool(); + Assert.assertTrue(restoreTool.runWithRetries(cl)); + Assert.assertNotNull(zks.getZKDatabase().getNode(nodePath)); + } + class MockSpotRestorationTool extends SpotRestorationTool { public MockSpotRestorationTool(File dataDir, ZooKeeper zk, String targetZNodePath, boolean restoreRecursively) throws IOException { @@ -534,4 +588,19 @@ protected boolean getUserConfirmation(String requestMsg, String yesMsg, String n return true; } } + + class MockRestoreFromBackupTool extends RestoreFromBackupTool { + @Override + protected void performSpotRestoration(File restoreTempDir) + throws IOException, InterruptedException { + LOG.info("Starting spot restoration for zk path " + znodePathToRestore); + zk = new ZooKeeper(zkServerConnectionStr, CONNECTION_TIMEOUT, (event) -> { + LOG.info("WATCHER:: client-server connection event received for spot restoration: " + event + .toString()); + }); + spotRestorationTool = + new MockSpotRestorationTool(restoreTempDir, zk, znodePathToRestore, restoreRecursively); + spotRestorationTool.run(); + } + } }