Skip to content

Commit ee74894

Browse files
gvprathyusha6apurtell
authored andcommitted
HBASE-29662 - Avoid regionDir/tableDir creation as part of .regioninfo file creation in HRegion initialize (#7406)
Signed-off-by: Andrew Purtell <[email protected]> Signed-off-by: Viraj Jasani <[email protected]> Conflicts: hbase-server/src/main/java/org/apache/hadoop/hbase/master/janitor/MetaFixer.java hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java
1 parent 635f0f4 commit ee74894

File tree

14 files changed

+323
-16
lines changed

14 files changed

+323
-16
lines changed

hbase-common/src/main/java/org/apache/hadoop/hbase/util/CommonFSUtils.java

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -188,11 +188,39 @@ public static int getDefaultBufferSize(final FileSystem fs) {
188188
*/
189189
public static FSDataOutputStream create(FileSystem fs, Path path, FsPermission perm,
190190
boolean overwrite) throws IOException {
191+
return create(fs, path, perm, overwrite, true);
192+
}
193+
194+
/**
195+
* Create the specified file on the filesystem. By default, this will:
196+
* <ol>
197+
* <li>apply the umask in the configuration (if it is enabled)</li>
198+
* <li>use the fs configured buffer size (or 4096 if not set)</li>
199+
* <li>use the default replication</li>
200+
* <li>use the default block size</li>
201+
* <li>not track progress</li>
202+
* </ol>
203+
* @param fs {@link FileSystem} on which to write the file
204+
* @param path {@link Path} to the file to write
205+
* @param perm intial permissions
206+
* @param overwrite Whether or not the created file should be overwritten.
207+
* @param isRecursiveCreate recursively create parent directories
208+
* @return output stream to the created file
209+
* @throws IOException if the file cannot be created
210+
*/
211+
public static FSDataOutputStream create(FileSystem fs, Path path, FsPermission perm,
212+
boolean overwrite, boolean isRecursiveCreate) throws IOException {
191213
if (LOG.isTraceEnabled()) {
192-
LOG.trace("Creating file={} with permission={}, overwrite={}", path, perm, overwrite);
214+
LOG.trace("Creating file={} with permission={}, overwrite={}, recursive={}", path, perm,
215+
overwrite, isRecursiveCreate);
216+
}
217+
if (isRecursiveCreate) {
218+
return fs.create(path, perm, overwrite, getDefaultBufferSize(fs),
219+
getDefaultReplication(fs, path), getDefaultBlockSize(fs, path), null);
220+
} else {
221+
return fs.createNonRecursive(path, perm, overwrite, getDefaultBufferSize(fs),
222+
getDefaultReplication(fs, path), getDefaultBlockSize(fs, path), null);
193223
}
194-
return fs.create(path, perm, overwrite, getDefaultBufferSize(fs),
195-
getDefaultReplication(fs, path), getDefaultBlockSize(fs, path), null);
196224
}
197225

198226
/**

hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSnapshotInputFormat.java

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
import org.apache.hadoop.hbase.client.TestTableSnapshotScanner;
4747
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
4848
import org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat.TableSnapshotRegionSplit;
49+
import org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper;
4950
import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils;
5051
import org.apache.hadoop.hbase.testclassification.LargeTests;
5152
import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
@@ -583,4 +584,104 @@ public void testCleanRestoreDir() throws Exception {
583584
TableSnapshotInputFormat.cleanRestoreDir(job, snapshotName);
584585
Assert.assertFalse(fs.exists(restorePath));
585586
}
587+
588+
/**
589+
* Test that explicitly restores a snapshot to a temp directory and reads the restored regions via
590+
* ClientSideRegionScanner through a MapReduce job.
591+
* <p>
592+
* This test verifies the full workflow: 1. Create and load a table with data 2. Create a snapshot
593+
* and restore the snapshot to a temporary directory 3. Configure a job to read the restored
594+
* regions via ClientSideRegionScanner using TableSnapshotInputFormat and verify that it succeeds
595+
* 4. Delete restored temporary directory 5. Configure a new job and verify that it fails
596+
*/
597+
@Test
598+
public void testReadFromRestoredSnapshotViaMR() throws Exception {
599+
final TableName tableName = TableName.valueOf(name.getMethodName());
600+
final String snapshotName = tableName + "_snapshot";
601+
try {
602+
if (UTIL.getAdmin().tableExists(tableName)) {
603+
UTIL.deleteTable(tableName);
604+
}
605+
UTIL.createTable(tableName, FAMILIES, new byte[][] { bbb, yyy });
606+
607+
Admin admin = UTIL.getAdmin();
608+
int regionNum = admin.getRegions(tableName).size();
609+
LOG.info("Created table with {} regions", regionNum);
610+
611+
Table table = UTIL.getConnection().getTable(tableName);
612+
UTIL.loadTable(table, FAMILIES);
613+
table.close();
614+
615+
Path rootDir = CommonFSUtils.getRootDir(UTIL.getConfiguration());
616+
FileSystem fs = rootDir.getFileSystem(UTIL.getConfiguration());
617+
SnapshotTestingUtils.createSnapshotAndValidate(admin, tableName, Arrays.asList(FAMILIES),
618+
null, snapshotName, rootDir, fs, true);
619+
Path tempRestoreDir = UTIL.getDataTestDirOnTestFS("restore_" + snapshotName);
620+
RestoreSnapshotHelper.copySnapshotForScanner(UTIL.getConfiguration(), fs, rootDir,
621+
tempRestoreDir, snapshotName);
622+
Assert.assertTrue("Restore directory should exist", fs.exists(tempRestoreDir));
623+
624+
Job job = Job.getInstance(UTIL.getConfiguration());
625+
job.setJarByClass(TestTableSnapshotInputFormat.class);
626+
TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(),
627+
TestTableSnapshotInputFormat.class);
628+
Scan scan = new Scan().withStartRow(getStartRow()).withStopRow(getEndRow());
629+
Configuration conf = job.getConfiguration();
630+
conf.set("hbase.TableSnapshotInputFormat.snapshot.name", snapshotName);
631+
conf.set("hbase.TableSnapshotInputFormat.restore.dir", tempRestoreDir.toString());
632+
conf.setInt("hbase.mapreduce.splits.per.region", 1);
633+
job.setReducerClass(TestTableSnapshotReducer.class);
634+
job.setNumReduceTasks(1);
635+
job.setOutputFormatClass(NullOutputFormat.class);
636+
TableMapReduceUtil.initTableMapperJob(snapshotName, // table name (snapshot name in this case)
637+
scan, TestTableSnapshotMapper.class, ImmutableBytesWritable.class, NullWritable.class, job,
638+
false, false, TableSnapshotInputFormat.class);
639+
TableMapReduceUtil.resetCacheConfig(conf);
640+
Assert.assertTrue(job.waitForCompletion(true));
641+
Assert.assertTrue(job.isSuccessful());
642+
643+
// Now verify that job fails when restore directory is deleted
644+
Assert.assertTrue(fs.delete(tempRestoreDir, true));
645+
Assert.assertFalse("Restore directory should not exist after deletion",
646+
fs.exists(tempRestoreDir));
647+
Job failureJob = Job.getInstance(UTIL.getConfiguration());
648+
failureJob.setJarByClass(TestTableSnapshotInputFormat.class);
649+
TableMapReduceUtil.addDependencyJarsForClasses(failureJob.getConfiguration(),
650+
TestTableSnapshotInputFormat.class);
651+
Configuration failureConf = failureJob.getConfiguration();
652+
// Configure job to use the deleted restore directory
653+
failureConf.set("hbase.TableSnapshotInputFormat.snapshot.name", snapshotName);
654+
failureConf.set("hbase.TableSnapshotInputFormat.restore.dir", tempRestoreDir.toString());
655+
failureConf.setInt("hbase.mapreduce.splits.per.region", 1);
656+
failureJob.setReducerClass(TestTableSnapshotReducer.class);
657+
failureJob.setNumReduceTasks(1);
658+
failureJob.setOutputFormatClass(NullOutputFormat.class);
659+
660+
TableMapReduceUtil.initTableMapperJob(snapshotName, scan, TestTableSnapshotMapper.class,
661+
ImmutableBytesWritable.class, NullWritable.class, failureJob, false, false,
662+
TableSnapshotInputFormat.class);
663+
TableMapReduceUtil.resetCacheConfig(failureConf);
664+
665+
Assert.assertFalse("Restore directory should not exist before job execution",
666+
fs.exists(tempRestoreDir));
667+
failureJob.waitForCompletion(true);
668+
669+
Assert.assertFalse("Job should fail since the restored snapshot directory is deleted",
670+
failureJob.isSuccessful());
671+
672+
} finally {
673+
try {
674+
if (UTIL.getAdmin().tableExists(tableName)) {
675+
UTIL.deleteTable(tableName);
676+
}
677+
} catch (Exception e) {
678+
LOG.warn("Error deleting table", e);
679+
}
680+
try {
681+
UTIL.getAdmin().deleteSnapshot(snapshotName);
682+
} catch (Exception e) {
683+
LOG.warn("Error deleting snapshot", e);
684+
}
685+
}
686+
}
586687
}

hbase-server/src/main/java/org/apache/hadoop/hbase/master/janitor/MetaFixer.java

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import java.util.SortedSet;
3030
import java.util.TreeSet;
3131
import java.util.stream.Collectors;
32+
import org.apache.hadoop.fs.Path;
3233
import org.apache.hadoop.hbase.HBaseIOException;
3334
import org.apache.hadoop.hbase.HConstants;
3435
import org.apache.hadoop.hbase.MetaTableAccessor;
@@ -38,10 +39,13 @@
3839
import org.apache.hadoop.hbase.client.RegionReplicaUtil;
3940
import org.apache.hadoop.hbase.client.TableDescriptor;
4041
import org.apache.hadoop.hbase.exceptions.MergeRegionException;
42+
import org.apache.hadoop.hbase.master.MasterFileSystem;
4143
import org.apache.hadoop.hbase.master.MasterServices;
4244
import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure;
45+
import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
4346
import org.apache.hadoop.hbase.replication.ReplicationException;
4447
import org.apache.hadoop.hbase.util.Bytes;
48+
import org.apache.hadoop.hbase.util.CommonFSUtils;
4549
import org.apache.hadoop.hbase.util.Pair;
4650
import org.apache.hadoop.hbase.util.ServerRegionReplicaUtil;
4751
import org.apache.yetus.audience.InterfaceAudience;
@@ -105,6 +109,7 @@ void fixHoles(CatalogJanitorReport report) {
105109

106110
final List<RegionInfo> newRegionInfos = createRegionInfosForHoles(holes);
107111
final List<RegionInfo> newMetaEntries = createMetaEntries(masterServices, newRegionInfos);
112+
createRegionDirectories(masterServices, newMetaEntries);
108113
final TransitRegionStateProcedure[] assignProcedures =
109114
masterServices.getAssignmentManager().createRoundRobinAssignProcedures(newMetaEntries);
110115

@@ -226,6 +231,27 @@ private static List<RegionInfo> createMetaEntries(final MasterServices masterSer
226231
return createMetaEntriesSuccesses;
227232
}
228233

234+
private static void createRegionDirectories(final MasterServices masterServices,
235+
final List<RegionInfo> regions) {
236+
if (regions.isEmpty()) {
237+
return;
238+
}
239+
final MasterFileSystem mfs = masterServices.getMasterFileSystem();
240+
final Path rootDir = mfs.getRootDir();
241+
for (RegionInfo regionInfo : regions) {
242+
if (regionInfo.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
243+
try {
244+
Path tableDir = CommonFSUtils.getTableDir(rootDir, regionInfo.getTable());
245+
HRegionFileSystem.createRegionOnFileSystem(masterServices.getConfiguration(),
246+
mfs.getFileSystem(), tableDir, regionInfo);
247+
} catch (IOException e) {
248+
LOG.warn("Failed to create region directory for {}: {}",
249+
regionInfo.getRegionNameAsString(), e.getMessage(), e);
250+
}
251+
}
252+
}
253+
}
254+
229255
/**
230256
* Fix overlaps noted in CJ consistency report.
231257
*/

hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/TruncateRegionProcedure.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ assert getRegion().getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID || isFailed()
109109
setNextState(TruncateRegionState.TRUNCATE_REGION_MAKE_ONLINE);
110110
break;
111111
case TRUNCATE_REGION_MAKE_ONLINE:
112+
createRegionOnFileSystem(env);
112113
addChildProcedure(createAssignProcedures(env));
113114
setNextState(TruncateRegionState.TRUNCATE_REGION_POST_OPERATION);
114115
break;
@@ -130,6 +131,20 @@ assert getRegion().getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID || isFailed()
130131
return Flow.HAS_MORE_STATE;
131132
}
132133

134+
private void createRegionOnFileSystem(final MasterProcedureEnv env) throws IOException {
135+
RegionStateNode regionNode =
136+
env.getAssignmentManager().getRegionStates().getRegionStateNode(getRegion());
137+
regionNode.lock();
138+
try {
139+
final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem();
140+
final Path tableDir = CommonFSUtils.getTableDir(mfs.getRootDir(), getTableName());
141+
HRegionFileSystem.createRegionOnFileSystem(env.getMasterConfiguration(), mfs.getFileSystem(),
142+
tableDir, getRegion());
143+
} finally {
144+
regionNode.unlock();
145+
}
146+
}
147+
133148
private void deleteRegionFromFileSystem(final MasterProcedureEnv env) throws IOException {
134149
RegionStateNode regionNode =
135150
env.getAssignmentManager().getRegionStates().getRegionStateNode(getRegion());

hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -810,7 +810,10 @@ private static void writeRegionInfoFileContent(final Configuration conf, final F
810810
// First check to get the permissions
811811
FsPermission perms = CommonFSUtils.getFilePermissions(fs, conf, HConstants.DATA_FILE_UMASK_KEY);
812812
// Write the RegionInfo file content
813-
try (FSDataOutputStream out = FSUtils.create(conf, fs, regionInfoFile, perms, null)) {
813+
// HBASE-29662: Fail .regioninfo file creation, if the region directory doesn't exist,
814+
// avoiding silent masking of missing region directories during region initialization.
815+
// The region directory should already exist when this method is called.
816+
try (FSDataOutputStream out = FSUtils.create(conf, fs, regionInfoFile, perms, null, false)) {
814817
out.write(content);
815818
}
816819
}
@@ -894,6 +897,14 @@ private void writeRegionInfoOnFilesystem(final byte[] regionInfoContent, final b
894897
CommonFSUtils.delete(fs, tmpPath, true);
895898
}
896899

900+
// Check parent (region) directory exists first to maintain HBASE-29662 protection
901+
if (!fs.exists(getRegionDir())) {
902+
throw new IOException("Region directory does not exist: " + getRegionDir());
903+
}
904+
if (!fs.exists(getTempDir())) {
905+
fs.mkdirs(getTempDir());
906+
}
907+
897908
// Write HRI to a file in case we need to recover hbase:meta
898909
writeRegionInfoFileContent(conf, fs, tmpPath, regionInfoContent);
899910

hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,32 @@ public static boolean deleteRegionDir(final Configuration conf, final RegionInfo
213213
*/
214214
public static FSDataOutputStream create(Configuration conf, FileSystem fs, Path path,
215215
FsPermission perm, InetSocketAddress[] favoredNodes) throws IOException {
216+
return create(conf, fs, path, perm, favoredNodes, true);
217+
}
218+
219+
/**
220+
* Create the specified file on the filesystem. By default, this will:
221+
* <ol>
222+
* <li>overwrite the file if it exists</li>
223+
* <li>apply the umask in the configuration (if it is enabled)</li>
224+
* <li>use the fs configured buffer size (or 4096 if not set)</li>
225+
* <li>use the configured column family replication or default replication if
226+
* {@link ColumnFamilyDescriptorBuilder#DEFAULT_DFS_REPLICATION}</li>
227+
* <li>use the default block size</li>
228+
* <li>not track progress</li>
229+
* </ol>
230+
* @param conf configurations
231+
* @param fs {@link FileSystem} on which to write the file
232+
* @param path {@link Path} to the file to write
233+
* @param perm permissions
234+
* @param favoredNodes favored data nodes
235+
* @param isRecursiveCreate recursively create parent directories
236+
* @return output stream to the created file
237+
* @throws IOException if the file cannot be created
238+
*/
239+
public static FSDataOutputStream create(Configuration conf, FileSystem fs, Path path,
240+
FsPermission perm, InetSocketAddress[] favoredNodes, boolean isRecursiveCreate)
241+
throws IOException {
216242
if (fs instanceof HFileSystem) {
217243
FileSystem backingFs = ((HFileSystem) fs).getBackingFs();
218244
if (backingFs instanceof DistributedFileSystem) {
@@ -231,7 +257,7 @@ public static FSDataOutputStream create(Configuration conf, FileSystem fs, Path
231257
}
232258

233259
}
234-
return CommonFSUtils.create(fs, path, perm, true);
260+
return CommonFSUtils.create(fs, path, perm, true, isRecursiveCreate);
235261
}
236262

237263
/**

hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@
102102
import org.apache.hadoop.hbase.logging.Log4jUtils;
103103
import org.apache.hadoop.hbase.mapreduce.MapreduceTestingShim;
104104
import org.apache.hadoop.hbase.master.HMaster;
105+
import org.apache.hadoop.hbase.master.MasterFileSystem;
105106
import org.apache.hadoop.hbase.master.RegionState;
106107
import org.apache.hadoop.hbase.master.ServerManager;
107108
import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
@@ -112,6 +113,7 @@
112113
import org.apache.hadoop.hbase.regionserver.BloomType;
113114
import org.apache.hadoop.hbase.regionserver.ChunkCreator;
114115
import org.apache.hadoop.hbase.regionserver.HRegion;
116+
import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
115117
import org.apache.hadoop.hbase.regionserver.HRegionServer;
116118
import org.apache.hadoop.hbase.regionserver.HStore;
117119
import org.apache.hadoop.hbase.regionserver.InternalScanner;
@@ -4321,4 +4323,22 @@ public static void await(final long sleepMillis, final BooleanSupplier condition
43214323
throw e;
43224324
}
43234325
}
4326+
4327+
public void createRegionDir(RegionInfo hri) throws IOException {
4328+
Path rootDir = getDataTestDir();
4329+
Path tableDir = CommonFSUtils.getTableDir(rootDir, hri.getTable());
4330+
Path regionDir = new Path(tableDir, hri.getEncodedName());
4331+
FileSystem fs = getTestFileSystem();
4332+
if (!fs.exists(regionDir)) {
4333+
fs.mkdirs(regionDir);
4334+
}
4335+
}
4336+
4337+
public void createRegionDir(RegionInfo regionInfo, MasterFileSystem masterFileSystem)
4338+
throws IOException {
4339+
Path tableDir =
4340+
CommonFSUtils.getTableDir(CommonFSUtils.getRootDir(conf), regionInfo.getTable());
4341+
HRegionFileSystem.createRegionOnFileSystem(conf, masterFileSystem.getFileSystem(), tableDir,
4342+
regionInfo);
4343+
}
43244344
}

hbase-server/src/test/java/org/apache/hadoop/hbase/coprocessor/TestCoreRegionCoprocessor.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,12 +76,14 @@ public void before() throws IOException {
7676
this.rss = new MockRegionServerServices(HTU.getConfiguration());
7777
ChunkCreator.initialize(MemStoreLAB.CHUNK_SIZE_DEFAULT, false, 0, 0, 0, null,
7878
MemStoreLAB.INDEX_CHUNK_SIZE_PERCENTAGE_DEFAULT);
79+
HTU.createRegionDir(ri);
7980
this.region = HRegion.openHRegion(ri, td, null, HTU.getConfiguration(), this.rss, null);
8081
}
8182

8283
@After
8384
public void after() throws IOException {
8485
this.region.close();
86+
HTU.cleanupTestDir();
8587
}
8688

8789
/**

hbase-server/src/test/java/org/apache/hadoop/hbase/master/janitor/TestMetaFixer.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,7 @@ private static RegionInfo makeOverlap(MasterServices services, RegionInfo a, Reg
170170
throws IOException {
171171
RegionInfo overlapRegion = RegionInfoBuilder.newBuilder(a.getTable())
172172
.setStartKey(a.getStartKey()).setEndKey(b.getEndKey()).build();
173+
TEST_UTIL.createRegionDir(overlapRegion, services.getMasterFileSystem());
173174
MetaTableAccessor.putsToMetaTable(services.getConnection(),
174175
Collections.singletonList(MetaTableAccessor.makePutFromRegionInfo(overlapRegion,
175176
EnvironmentEdgeManager.currentTime())));

hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompactionArchiveConcurrentClose.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -178,9 +178,11 @@ private HRegion initHRegion(TableDescriptor htd, RegionInfo info) throws IOExcep
178178
CommonFSUtils.setRootDir(walConf, tableDir);
179179
final WALFactory wals = new WALFactory(walConf, "log_" + info.getEncodedName());
180180
HRegion region = new HRegion(fs, wals.getWAL(info), conf, htd, null);
181-
181+
Path regionDir = new Path(tableDir, info.getEncodedName());
182+
if (!fs.getFileSystem().exists(regionDir)) {
183+
fs.getFileSystem().mkdirs(regionDir);
184+
}
182185
region.initialize();
183-
184186
return region;
185187
}
186188

0 commit comments

Comments
 (0)