Skip to content

Commit fc04659

Browse files
gvprathyusha6apurtell
authored andcommitted
HBASE-29662 - Avoid regionDir/tableDir creation as part of .regioninfo file creation in HRegion initialize (#7406)
Signed-off-by: Andrew Purtell <[email protected]> Signed-off-by: Viraj Jasani <[email protected]>
1 parent 7c1c78d commit fc04659

File tree

14 files changed

+317
-10
lines changed

14 files changed

+317
-10
lines changed

hbase-common/src/main/java/org/apache/hadoop/hbase/util/CommonFSUtils.java

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -187,11 +187,39 @@ public static int getDefaultBufferSize(final FileSystem fs) {
187187
*/
188188
public static FSDataOutputStream create(FileSystem fs, Path path, FsPermission perm,
189189
boolean overwrite) throws IOException {
190+
return create(fs, path, perm, overwrite, true);
191+
}
192+
193+
/**
194+
* Create the specified file on the filesystem. By default, this will:
195+
* <ol>
196+
* <li>apply the umask in the configuration (if it is enabled)</li>
197+
* <li>use the fs configured buffer size (or 4096 if not set)</li>
198+
* <li>use the default replication</li>
199+
* <li>use the default block size</li>
200+
* <li>not track progress</li>
201+
* </ol>
202+
* @param fs {@link FileSystem} on which to write the file
203+
* @param path {@link Path} to the file to write
204+
* @param perm intial permissions
205+
* @param overwrite Whether or not the created file should be overwritten.
206+
* @param isRecursiveCreate recursively create parent directories
207+
* @return output stream to the created file
208+
* @throws IOException if the file cannot be created
209+
*/
210+
public static FSDataOutputStream create(FileSystem fs, Path path, FsPermission perm,
211+
boolean overwrite, boolean isRecursiveCreate) throws IOException {
190212
if (LOG.isTraceEnabled()) {
191-
LOG.trace("Creating file={} with permission={}, overwrite={}", path, perm, overwrite);
213+
LOG.trace("Creating file={} with permission={}, overwrite={}, recursive={}", path, perm,
214+
overwrite, isRecursiveCreate);
215+
}
216+
if (isRecursiveCreate) {
217+
return fs.create(path, perm, overwrite, getDefaultBufferSize(fs),
218+
getDefaultReplication(fs, path), getDefaultBlockSize(fs, path), null);
219+
} else {
220+
return fs.createNonRecursive(path, perm, overwrite, getDefaultBufferSize(fs),
221+
getDefaultReplication(fs, path), getDefaultBlockSize(fs, path), null);
192222
}
193-
return fs.create(path, perm, overwrite, getDefaultBufferSize(fs),
194-
getDefaultReplication(fs, path), getDefaultBlockSize(fs, path), null);
195223
}
196224

197225
/**

hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSnapshotInputFormat.java

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
import org.apache.hadoop.hbase.client.TestTableSnapshotScanner;
4747
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
4848
import org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat.TableSnapshotRegionSplit;
49+
import org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper;
4950
import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils;
5051
import org.apache.hadoop.hbase.testclassification.LargeTests;
5152
import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
@@ -582,4 +583,104 @@ public void testCleanRestoreDir() throws Exception {
582583
TableSnapshotInputFormat.cleanRestoreDir(job, snapshotName);
583584
Assert.assertFalse(fs.exists(restorePath));
584585
}
586+
587+
/**
588+
* Test that explicitly restores a snapshot to a temp directory and reads the restored regions via
589+
* ClientSideRegionScanner through a MapReduce job.
590+
* <p>
591+
* This test verifies the full workflow: 1. Create and load a table with data 2. Create a snapshot
592+
* and restore the snapshot to a temporary directory 3. Configure a job to read the restored
593+
* regions via ClientSideRegionScanner using TableSnapshotInputFormat and verify that it succeeds
594+
* 4. Delete restored temporary directory 5. Configure a new job and verify that it fails
595+
*/
596+
@Test
597+
public void testReadFromRestoredSnapshotViaMR() throws Exception {
598+
final TableName tableName = TableName.valueOf(name.getMethodName());
599+
final String snapshotName = tableName + "_snapshot";
600+
try {
601+
if (UTIL.getAdmin().tableExists(tableName)) {
602+
UTIL.deleteTable(tableName);
603+
}
604+
UTIL.createTable(tableName, FAMILIES, new byte[][] { bbb, yyy });
605+
606+
Admin admin = UTIL.getAdmin();
607+
int regionNum = admin.getRegions(tableName).size();
608+
LOG.info("Created table with {} regions", regionNum);
609+
610+
Table table = UTIL.getConnection().getTable(tableName);
611+
UTIL.loadTable(table, FAMILIES);
612+
table.close();
613+
614+
Path rootDir = CommonFSUtils.getRootDir(UTIL.getConfiguration());
615+
FileSystem fs = rootDir.getFileSystem(UTIL.getConfiguration());
616+
SnapshotTestingUtils.createSnapshotAndValidate(admin, tableName, Arrays.asList(FAMILIES),
617+
null, snapshotName, rootDir, fs, true);
618+
Path tempRestoreDir = UTIL.getDataTestDirOnTestFS("restore_" + snapshotName);
619+
RestoreSnapshotHelper.copySnapshotForScanner(UTIL.getConfiguration(), fs, rootDir,
620+
tempRestoreDir, snapshotName);
621+
Assert.assertTrue("Restore directory should exist", fs.exists(tempRestoreDir));
622+
623+
Job job = Job.getInstance(UTIL.getConfiguration());
624+
job.setJarByClass(TestTableSnapshotInputFormat.class);
625+
TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(),
626+
TestTableSnapshotInputFormat.class);
627+
Scan scan = new Scan().withStartRow(getStartRow()).withStopRow(getEndRow());
628+
Configuration conf = job.getConfiguration();
629+
conf.set("hbase.TableSnapshotInputFormat.snapshot.name", snapshotName);
630+
conf.set("hbase.TableSnapshotInputFormat.restore.dir", tempRestoreDir.toString());
631+
conf.setInt("hbase.mapreduce.splits.per.region", 1);
632+
job.setReducerClass(TestTableSnapshotReducer.class);
633+
job.setNumReduceTasks(1);
634+
job.setOutputFormatClass(NullOutputFormat.class);
635+
TableMapReduceUtil.initTableMapperJob(snapshotName, // table name (snapshot name in this case)
636+
scan, TestTableSnapshotMapper.class, ImmutableBytesWritable.class, NullWritable.class, job,
637+
false, false, TableSnapshotInputFormat.class);
638+
TableMapReduceUtil.resetCacheConfig(conf);
639+
Assert.assertTrue(job.waitForCompletion(true));
640+
Assert.assertTrue(job.isSuccessful());
641+
642+
// Now verify that job fails when restore directory is deleted
643+
Assert.assertTrue(fs.delete(tempRestoreDir, true));
644+
Assert.assertFalse("Restore directory should not exist after deletion",
645+
fs.exists(tempRestoreDir));
646+
Job failureJob = Job.getInstance(UTIL.getConfiguration());
647+
failureJob.setJarByClass(TestTableSnapshotInputFormat.class);
648+
TableMapReduceUtil.addDependencyJarsForClasses(failureJob.getConfiguration(),
649+
TestTableSnapshotInputFormat.class);
650+
Configuration failureConf = failureJob.getConfiguration();
651+
// Configure job to use the deleted restore directory
652+
failureConf.set("hbase.TableSnapshotInputFormat.snapshot.name", snapshotName);
653+
failureConf.set("hbase.TableSnapshotInputFormat.restore.dir", tempRestoreDir.toString());
654+
failureConf.setInt("hbase.mapreduce.splits.per.region", 1);
655+
failureJob.setReducerClass(TestTableSnapshotReducer.class);
656+
failureJob.setNumReduceTasks(1);
657+
failureJob.setOutputFormatClass(NullOutputFormat.class);
658+
659+
TableMapReduceUtil.initTableMapperJob(snapshotName, scan, TestTableSnapshotMapper.class,
660+
ImmutableBytesWritable.class, NullWritable.class, failureJob, false, false,
661+
TableSnapshotInputFormat.class);
662+
TableMapReduceUtil.resetCacheConfig(failureConf);
663+
664+
Assert.assertFalse("Restore directory should not exist before job execution",
665+
fs.exists(tempRestoreDir));
666+
failureJob.waitForCompletion(true);
667+
668+
Assert.assertFalse("Job should fail since the restored snapshot directory is deleted",
669+
failureJob.isSuccessful());
670+
671+
} finally {
672+
try {
673+
if (UTIL.getAdmin().tableExists(tableName)) {
674+
UTIL.deleteTable(tableName);
675+
}
676+
} catch (Exception e) {
677+
LOG.warn("Error deleting table", e);
678+
}
679+
try {
680+
UTIL.getAdmin().deleteSnapshot(snapshotName);
681+
} catch (Exception e) {
682+
LOG.warn("Error deleting snapshot", e);
683+
}
684+
}
685+
}
585686
}

hbase-server/src/main/java/org/apache/hadoop/hbase/master/janitor/MetaFixer.java

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import java.util.SortedSet;
3030
import java.util.TreeSet;
3131
import java.util.stream.Collectors;
32+
import org.apache.hadoop.fs.Path;
3233
import org.apache.hadoop.hbase.HConstants;
3334
import org.apache.hadoop.hbase.MetaTableAccessor;
3435
import org.apache.hadoop.hbase.TableName;
@@ -37,9 +38,12 @@
3738
import org.apache.hadoop.hbase.client.RegionReplicaUtil;
3839
import org.apache.hadoop.hbase.client.TableDescriptor;
3940
import org.apache.hadoop.hbase.exceptions.MergeRegionException;
41+
import org.apache.hadoop.hbase.master.MasterFileSystem;
4042
import org.apache.hadoop.hbase.master.MasterServices;
4143
import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure;
44+
import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
4245
import org.apache.hadoop.hbase.util.Bytes;
46+
import org.apache.hadoop.hbase.util.CommonFSUtils;
4347
import org.apache.hadoop.hbase.util.Pair;
4448
import org.apache.yetus.audience.InterfaceAudience;
4549
import org.slf4j.Logger;
@@ -102,6 +106,7 @@ void fixHoles(CatalogJanitorReport report) {
102106

103107
final List<RegionInfo> newRegionInfos = createRegionInfosForHoles(holes);
104108
final List<RegionInfo> newMetaEntries = createMetaEntries(masterServices, newRegionInfos);
109+
createRegionDirectories(masterServices, newMetaEntries);
105110
final TransitRegionStateProcedure[] assignProcedures =
106111
masterServices.getAssignmentManager().createRoundRobinAssignProcedures(newMetaEntries);
107112

@@ -217,6 +222,27 @@ private static List<RegionInfo> createMetaEntries(final MasterServices masterSer
217222
return createMetaEntriesSuccesses;
218223
}
219224

225+
private static void createRegionDirectories(final MasterServices masterServices,
226+
final List<RegionInfo> regions) {
227+
if (regions.isEmpty()) {
228+
return;
229+
}
230+
final MasterFileSystem mfs = masterServices.getMasterFileSystem();
231+
final Path rootDir = mfs.getRootDir();
232+
for (RegionInfo regionInfo : regions) {
233+
if (regionInfo.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
234+
try {
235+
Path tableDir = CommonFSUtils.getTableDir(rootDir, regionInfo.getTable());
236+
HRegionFileSystem.createRegionOnFileSystem(masterServices.getConfiguration(),
237+
mfs.getFileSystem(), tableDir, regionInfo);
238+
} catch (IOException e) {
239+
LOG.warn("Failed to create region directory for {}: {}",
240+
regionInfo.getRegionNameAsString(), e.getMessage(), e);
241+
}
242+
}
243+
}
244+
}
245+
220246
/**
221247
* Fix overlaps noted in CJ consistency report.
222248
*/

hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/TruncateRegionProcedure.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ assert getRegion().getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID || isFailed()
109109
setNextState(TruncateRegionState.TRUNCATE_REGION_MAKE_ONLINE);
110110
break;
111111
case TRUNCATE_REGION_MAKE_ONLINE:
112+
createRegionOnFileSystem(env);
112113
addChildProcedure(createAssignProcedures(env));
113114
setNextState(TruncateRegionState.TRUNCATE_REGION_POST_OPERATION);
114115
break;
@@ -130,6 +131,20 @@ assert getRegion().getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID || isFailed()
130131
return Flow.HAS_MORE_STATE;
131132
}
132133

134+
private void createRegionOnFileSystem(final MasterProcedureEnv env) throws IOException {
135+
RegionStateNode regionNode =
136+
env.getAssignmentManager().getRegionStates().getRegionStateNode(getRegion());
137+
regionNode.lock();
138+
try {
139+
final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem();
140+
final Path tableDir = CommonFSUtils.getTableDir(mfs.getRootDir(), getTableName());
141+
HRegionFileSystem.createRegionOnFileSystem(env.getMasterConfiguration(), mfs.getFileSystem(),
142+
tableDir, getRegion());
143+
} finally {
144+
regionNode.unlock();
145+
}
146+
}
147+
133148
private void deleteRegionFromFileSystem(final MasterProcedureEnv env) throws IOException {
134149
RegionStateNode regionNode =
135150
env.getAssignmentManager().getRegionStates().getRegionStateNode(getRegion());

hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -787,7 +787,10 @@ private static void writeRegionInfoFileContent(final Configuration conf, final F
787787
// First check to get the permissions
788788
FsPermission perms = CommonFSUtils.getFilePermissions(fs, conf, HConstants.DATA_FILE_UMASK_KEY);
789789
// Write the RegionInfo file content
790-
try (FSDataOutputStream out = FSUtils.create(conf, fs, regionInfoFile, perms, null)) {
790+
// HBASE-29662: Fail .regioninfo file creation, if the region directory doesn't exist,
791+
// avoiding silent masking of missing region directories during region initialization.
792+
// The region directory should already exist when this method is called.
793+
try (FSDataOutputStream out = FSUtils.create(conf, fs, regionInfoFile, perms, null, false)) {
791794
out.write(content);
792795
}
793796
}
@@ -871,6 +874,14 @@ private void writeRegionInfoOnFilesystem(final byte[] regionInfoContent, final b
871874
CommonFSUtils.delete(fs, tmpPath, true);
872875
}
873876

877+
// Check parent (region) directory exists first to maintain HBASE-29662 protection
878+
if (!fs.exists(getRegionDir())) {
879+
throw new IOException("Region directory does not exist: " + getRegionDir());
880+
}
881+
if (!fs.exists(getTempDir())) {
882+
fs.mkdirs(getTempDir());
883+
}
884+
874885
// Write HRI to a file in case we need to recover hbase:meta
875886
writeRegionInfoFileContent(conf, fs, tmpPath, regionInfoContent);
876887

hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,32 @@ public static boolean deleteRegionDir(final Configuration conf, final RegionInfo
212212
*/
213213
public static FSDataOutputStream create(Configuration conf, FileSystem fs, Path path,
214214
FsPermission perm, InetSocketAddress[] favoredNodes) throws IOException {
215+
return create(conf, fs, path, perm, favoredNodes, true);
216+
}
217+
218+
/**
219+
* Create the specified file on the filesystem. By default, this will:
220+
* <ol>
221+
* <li>overwrite the file if it exists</li>
222+
* <li>apply the umask in the configuration (if it is enabled)</li>
223+
* <li>use the fs configured buffer size (or 4096 if not set)</li>
224+
* <li>use the configured column family replication or default replication if
225+
* {@link ColumnFamilyDescriptorBuilder#DEFAULT_DFS_REPLICATION}</li>
226+
* <li>use the default block size</li>
227+
* <li>not track progress</li>
228+
* </ol>
229+
* @param conf configurations
230+
* @param fs {@link FileSystem} on which to write the file
231+
* @param path {@link Path} to the file to write
232+
* @param perm permissions
233+
* @param favoredNodes favored data nodes
234+
* @param isRecursiveCreate recursively create parent directories
235+
* @return output stream to the created file
236+
* @throws IOException if the file cannot be created
237+
*/
238+
public static FSDataOutputStream create(Configuration conf, FileSystem fs, Path path,
239+
FsPermission perm, InetSocketAddress[] favoredNodes, boolean isRecursiveCreate)
240+
throws IOException {
215241
if (fs instanceof HFileSystem) {
216242
FileSystem backingFs = ((HFileSystem) fs).getBackingFs();
217243
if (backingFs instanceof DistributedFileSystem) {
@@ -230,7 +256,7 @@ public static FSDataOutputStream create(Configuration conf, FileSystem fs, Path
230256
}
231257

232258
}
233-
return CommonFSUtils.create(fs, path, perm, true);
259+
return CommonFSUtils.create(fs, path, perm, true, isRecursiveCreate);
234260
}
235261

236262
/**

hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtil.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@
9999
import org.apache.hadoop.hbase.ipc.RpcServerInterface;
100100
import org.apache.hadoop.hbase.mapreduce.MapreduceTestingShim;
101101
import org.apache.hadoop.hbase.master.HMaster;
102+
import org.apache.hadoop.hbase.master.MasterFileSystem;
102103
import org.apache.hadoop.hbase.master.RegionState;
103104
import org.apache.hadoop.hbase.master.ServerManager;
104105
import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
@@ -109,6 +110,7 @@
109110
import org.apache.hadoop.hbase.regionserver.BloomType;
110111
import org.apache.hadoop.hbase.regionserver.ChunkCreator;
111112
import org.apache.hadoop.hbase.regionserver.HRegion;
113+
import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
112114
import org.apache.hadoop.hbase.regionserver.HRegionServer;
113115
import org.apache.hadoop.hbase.regionserver.HStore;
114116
import org.apache.hadoop.hbase.regionserver.InternalScanner;
@@ -3724,4 +3726,22 @@ public static void await(final long sleepMillis, final BooleanSupplier condition
37243726
throw e;
37253727
}
37263728
}
3729+
3730+
public void createRegionDir(RegionInfo hri) throws IOException {
3731+
Path rootDir = getDataTestDir();
3732+
Path tableDir = CommonFSUtils.getTableDir(rootDir, hri.getTable());
3733+
Path regionDir = new Path(tableDir, hri.getEncodedName());
3734+
FileSystem fs = getTestFileSystem();
3735+
if (!fs.exists(regionDir)) {
3736+
fs.mkdirs(regionDir);
3737+
}
3738+
}
3739+
3740+
public void createRegionDir(RegionInfo regionInfo, MasterFileSystem masterFileSystem)
3741+
throws IOException {
3742+
Path tableDir =
3743+
CommonFSUtils.getTableDir(CommonFSUtils.getRootDir(conf), regionInfo.getTable());
3744+
HRegionFileSystem.createRegionOnFileSystem(conf, masterFileSystem.getFileSystem(), tableDir,
3745+
regionInfo);
3746+
}
37273747
}

hbase-server/src/test/java/org/apache/hadoop/hbase/coprocessor/TestCoreRegionCoprocessor.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,12 +76,14 @@ public void before() throws IOException {
7676
this.rss = new MockRegionServerServices(HTU.getConfiguration());
7777
ChunkCreator.initialize(MemStoreLAB.CHUNK_SIZE_DEFAULT, false, 0, 0, 0, null,
7878
MemStoreLAB.INDEX_CHUNK_SIZE_PERCENTAGE_DEFAULT);
79+
HTU.createRegionDir(ri);
7980
this.region = HRegion.openHRegion(ri, td, null, HTU.getConfiguration(), this.rss, null);
8081
}
8182

8283
@After
8384
public void after() throws IOException {
8485
this.region.close();
86+
HTU.cleanupTestDir();
8587
}
8688

8789
/**

hbase-server/src/test/java/org/apache/hadoop/hbase/master/janitor/TestMetaFixer.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,7 @@ private static RegionInfo makeOverlap(MasterServices services, RegionInfo a, Reg
172172
throws IOException {
173173
RegionInfo overlapRegion = RegionInfoBuilder.newBuilder(a.getTable())
174174
.setStartKey(a.getStartKey()).setEndKey(b.getEndKey()).build();
175+
TEST_UTIL.createRegionDir(overlapRegion, services.getMasterFileSystem());
175176
MetaTableAccessor.putsToMetaTable(services.getConnection(),
176177
Collections.singletonList(MetaTableAccessor.makePutFromRegionInfo(overlapRegion,
177178
EnvironmentEdgeManager.currentTime())));

hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompactionArchiveConcurrentClose.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -178,9 +178,11 @@ private HRegion initHRegion(TableDescriptor htd, RegionInfo info) throws IOExcep
178178
CommonFSUtils.setRootDir(walConf, tableDir);
179179
final WALFactory wals = new WALFactory(walConf, "log_" + info.getEncodedName());
180180
HRegion region = new HRegion(fs, wals.getWAL(info), conf, htd, null);
181-
181+
Path regionDir = new Path(tableDir, info.getEncodedName());
182+
if (!fs.getFileSystem().exists(regionDir)) {
183+
fs.getFileSystem().mkdirs(regionDir);
184+
}
182185
region.initialize();
183-
184186
return region;
185187
}
186188

0 commit comments

Comments
 (0)