diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index e0e1bdbcb5cf..f94e5efb22e3 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -3822,6 +3822,11 @@ public static enum ConfVars { "To cleanup the Hive scratchdir when starting the Hive Server"), HIVE_SCRATCH_DIR_LOCK("hive.scratchdir.lock", false, "To hold a lock file in scratchdir to prevent to be removed by cleardanglingscratchdir"), + HIVE_SCRATCH_DIR_CLEANUP_GRACE_PERIOD("hive.scratchdir.cleanup.grace.period.hours","0h", + new TimeValidator(TimeUnit.HOURS), + "Prevents cleanup of scratch directories that have been modified within the specified time window. " + + "Useful for avoiding premature deletion while queries are still returning results." + + "Not enabled by default."), HIVE_INSERT_INTO_MULTILEVEL_DIRS("hive.insert.into.multilevel.dirs", false, "Where to insert into multilevel directories like\n" + "\"insert directory '/HIVEFT25686/china/' from table\""), diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/session/TestClearDanglingScratchDir.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/session/TestClearDanglingScratchDir.java index 8557946331c3..c336cf0969d6 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/session/TestClearDanglingScratchDir.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/session/TestClearDanglingScratchDir.java @@ -72,8 +72,14 @@ static public void shutdown() throws Exception { // Need to make sure deleting in correct FS FileSystem fs = customScratchDir.getFileSystem(new Configuration()); - fs.delete(customScratchDir, true); - fs.delete(customLocalTmpDir, true); + + if (customScratchDir != null) { + fs.delete(customScratchDir, true); + } + + if (customLocalTmpDir != null) { + fs.delete(customLocalTmpDir, true); + } } public void redirectStdOutErr() { @@ -206,4 +212,36 @@ public void testLocalDanglingFilesCleaning() throws Exception { Assert.assertTrue("Local .pipeout file '" + localPipeOutFileFailRemove + "' does not exist, should have not been removed!", fs.exists(localPipeOutFileFailRemove)); } + + /** + * Unit test for ClearDanglingScratchDir.isWithinGracePeriod(). + */ + @Test + public void testGracePeriodPreventsRemoval() throws Exception { + // Configuration + HiveConf conf = new HiveConfForTest(getClass()); + conf.set("fs.default.name", "file:///"); + String tmpDir = System.getProperty("test.tmp.dir"); + conf.set("hive.exec.scratchdir", tmpDir + "/scratch-grace-test"); + conf.set(String.valueOf(HiveConf.ConfVars.HIVE_SCRATCH_DIR_CLEANUP_GRACE_PERIOD), "1h"); + + // Simulating session dir and its inuse.lck file to make it eligible for removal normally + FileSystem fs = FileSystem.get(conf); + FsPermission allPermissions = new FsPermission((short)00777); + customScratchDir = new Path(HiveConf.getVar(conf, HiveConf.ConfVars.SCRATCH_DIR)); + Path rootDir = new Path(customScratchDir, "user"); + Path hdfsDir = new Path(rootDir, "hdfs"); + Path sessionDir = new Path(hdfsDir, "session1"); + Utilities.createDirsWithPermission(conf, sessionDir, allPermissions, true); + Path sessionLock = new Path(sessionDir + "/inuse.lck"); + fs.create(sessionLock); + + // Initialize cleaner and run the full cleanup logic + ClearDanglingScratchDir cleaner = new ClearDanglingScratchDir(false, true, true, + rootDir.toString(), conf); + cleaner.run(); + + // The directory should NOT be removed because it is within grace period + Assert.assertTrue("Directory should still exist due to grace period.", fs.exists(sessionDir)); + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/session/ClearDanglingScratchDir.java b/ql/src/java/org/apache/hadoop/hive/ql/session/ClearDanglingScratchDir.java index 305923023800..4a0f791e5475 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/session/ClearDanglingScratchDir.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/session/ClearDanglingScratchDir.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; +import java.util.concurrent.TimeUnit; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.GnuParser; @@ -173,7 +174,7 @@ public void run() { consoleMessage(message); } } - if (removable) { + if (removable && !isWithinGracePeriod(scratchDir, conf, verbose)) { scratchDirToRemove.add(scratchDir.getPath()); } } @@ -264,4 +265,25 @@ private void removeLocalTmpFiles(String sessionName, String localTmpdir) { } } } + + /** + * Returns true if the scratch directory was modified within the grace period, + * meaning it should be skipped from cleanup. + * Hive updates the directory’s last modified time when finalizing output streaming. + */ + private boolean isWithinGracePeriod(FileStatus scratchDir, HiveConf conf, boolean verbose) throws IOException { + long lastModifiedTime = scratchDir.getModificationTime(); + long currentTime = System.currentTimeMillis(); + + long gracePeriodMs = HiveConf.getTimeVar(conf, + HiveConf.ConfVars.HIVE_SCRATCH_DIR_CLEANUP_GRACE_PERIOD, TimeUnit.MILLISECONDS); + + if (gracePeriodMs > 0 && (currentTime - lastModifiedTime) < gracePeriodMs) { + if (verbose) { + consoleMessage("Skipping " + scratchDir.getPath() + " because it was modified within the grace period."); + } + return true; + } + return false; + } } \ No newline at end of file