1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.master.procedure;
20
21 import java.io.IOException;
22 import java.util.concurrent.CountDownLatch;
23
24 import org.apache.commons.logging.Log;
25 import org.apache.commons.logging.LogFactory;
26 import org.apache.hadoop.conf.Configuration;
27 import org.apache.hadoop.fs.FileSystem;
28 import org.apache.hadoop.fs.Path;
29 import org.apache.hadoop.hbase.HBaseTestingUtility;
30 import org.apache.hadoop.hbase.HRegionInfo;
31 import org.apache.hadoop.hbase.HTableDescriptor;
32 import org.apache.hadoop.hbase.MiniHBaseCluster;
33 import org.apache.hadoop.hbase.TableName;
34 import org.apache.hadoop.hbase.master.HMaster;
35 import org.apache.hadoop.hbase.procedure2.Procedure;
36 import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
37 import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
38 import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility.TestProcedure;
39 import org.apache.hadoop.hbase.procedure2.store.ProcedureStore;
40 import org.apache.hadoop.hbase.procedure2.store.wal.WALProcedureStore;
41 import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.CreateTableState;
42 import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.DeleteTableState;
43 import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.DisableTableState;
44 import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.EnableTableState;
45 import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.TruncateTableState;
46 import org.apache.hadoop.hbase.testclassification.LargeTests;
47 import org.apache.hadoop.hbase.util.Bytes;
48 import org.apache.hadoop.hbase.util.FSUtils;
49 import org.apache.hadoop.hbase.util.ModifyRegionUtils;
50 import org.junit.After;
51 import org.junit.Before;
52 import org.junit.Test;
53 import org.junit.experimental.categories.Category;
54 import org.mockito.Mockito;
55
56 import static org.junit.Assert.assertEquals;
57 import static org.junit.Assert.assertTrue;
58 import static org.junit.Assert.fail;
59
60 @Category(LargeTests.class)
61 public class TestMasterFailoverWithProcedures {
62 private static final Log LOG = LogFactory.getLog(TestMasterFailoverWithProcedures.class);
63
64 protected static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
65
66 private static void setupConf(Configuration conf) {
67
68 conf.setInt("hbase.procedure.store.wal.max.retries.before.roll", 1);
69 conf.setInt("hbase.procedure.store.wal.wait.before.roll", 0);
70 conf.setInt("hbase.procedure.store.wal.max.roll.retries", 1);
71 conf.setInt("hbase.procedure.store.wal.sync.failure.roll.max", 1);
72 }
73
74 @Before
75 public void setup() throws Exception {
76 setupConf(UTIL.getConfiguration());
77 UTIL.startMiniCluster(2, 1);
78
79 final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
80 ProcedureTestingUtility.setToggleKillBeforeStoreUpdate(procExec, false);
81 ProcedureTestingUtility.setKillBeforeStoreUpdate(procExec, false);
82 }
83
84 @After
85 public void tearDown() throws Exception {
86 try {
87 UTIL.shutdownMiniCluster();
88 } catch (Exception e) {
89 LOG.warn("failure shutting down cluster", e);
90 }
91 }
92
93 @Test(timeout=60000)
94 public void testWalRecoverLease() throws Exception {
95 final ProcedureStore masterStore = getMasterProcedureExecutor().getStore();
96 assertTrue("expected WALStore for this test", masterStore instanceof WALProcedureStore);
97
98 HMaster firstMaster = UTIL.getHBaseCluster().getMaster();
99
100 final CountDownLatch masterStoreAbort = new CountDownLatch(1);
101 masterStore.registerListener(new ProcedureStore.ProcedureStoreListener() {
102 @Override
103 public void postSync() {}
104
105 @Override
106 public void abortProcess() {
107 LOG.debug("Abort store of Master");
108 masterStoreAbort.countDown();
109 }
110 });
111
112
113
114 HMaster backupMaster3 = Mockito.mock(HMaster.class);
115 Mockito.doReturn(firstMaster.getConfiguration()).when(backupMaster3).getConfiguration();
116 Mockito.doReturn(true).when(backupMaster3).isActiveMaster();
117 final WALProcedureStore backupStore3 = new WALProcedureStore(firstMaster.getConfiguration(),
118 firstMaster.getMasterFileSystem().getFileSystem(),
119 ((WALProcedureStore)masterStore).getLogDir(),
120 new MasterProcedureEnv.WALStoreLeaseRecovery(backupMaster3));
121
122 final CountDownLatch backupStore3Abort = new CountDownLatch(1);
123 backupStore3.registerListener(new ProcedureStore.ProcedureStoreListener() {
124 @Override
125 public void postSync() {}
126
127 @Override
128 public void abortProcess() {
129 LOG.debug("Abort store of backupMaster3");
130 backupStore3Abort.countDown();
131 backupStore3.stop(true);
132 }
133 });
134 backupStore3.start(1);
135 backupStore3.recoverLease();
136
137
138 HTableDescriptor htd = MasterProcedureTestingUtility.createHTD(TableName.valueOf("mtb"), "f");
139 HRegionInfo[] regions = ModifyRegionUtils.createHRegionInfos(htd, null);
140 LOG.debug("submit proc");
141 try {
142 getMasterProcedureExecutor().submitProcedure(
143 new CreateTableProcedure(getMasterProcedureExecutor().getEnvironment(), htd, regions));
144 fail("expected RuntimeException 'sync aborted'");
145 } catch (RuntimeException e) {
146 LOG.info("got " + e.getMessage());
147 }
148 LOG.debug("wait master store abort");
149 masterStoreAbort.await();
150
151
152 LOG.debug("wait backup master to startup");
153 waitBackupMaster(UTIL, firstMaster);
154 assertEquals(true, firstMaster.isStopped());
155
156
157 LOG.debug("wait the store to abort");
158 backupStore3.getStoreTracker().setDeleted(1, false);
159 try {
160 backupStore3.delete(1);
161 fail("expected RuntimeException 'sync aborted'");
162 } catch (RuntimeException e) {
163 LOG.info("got " + e.getMessage());
164 }
165 backupStore3Abort.await();
166 }
167
168
169
170
171 @Test
172 public void testWALfencingWithoutWALRolling() throws IOException {
173 testWALfencing(false);
174 }
175
176
177
178
179
180 @Test
181 public void testWALfencingWithWALRolling() throws IOException {
182 testWALfencing(true);
183 }
184
185 public void testWALfencing(boolean walRolls) throws IOException {
186 final ProcedureStore procStore = getMasterProcedureExecutor().getStore();
187 assertTrue("expected WALStore for this test", procStore instanceof WALProcedureStore);
188
189 HMaster firstMaster = UTIL.getHBaseCluster().getMaster();
190
191
192 firstMaster.getConfiguration().setLong("hbase.procedure.store.wal.roll.threshold", 1);
193
194 HMaster backupMaster3 = Mockito.mock(HMaster.class);
195 Mockito.doReturn(firstMaster.getConfiguration()).when(backupMaster3).getConfiguration();
196 Mockito.doReturn(true).when(backupMaster3).isActiveMaster();
197 final WALProcedureStore procStore2 = new WALProcedureStore(firstMaster.getConfiguration(),
198 firstMaster.getMasterFileSystem().getFileSystem(),
199 ((WALProcedureStore)procStore).getLogDir(),
200 new MasterProcedureEnv.WALStoreLeaseRecovery(backupMaster3));
201
202
203 LOG.info("Starting new WALProcedureStore");
204 procStore2.start(1);
205 procStore2.recoverLease();
206
207
208
209 if (walRolls) {
210 LOG.info("Inserting into second WALProcedureStore, causing WAL rolls");
211 for (int i = 0; i < 512; i++) {
212
213 Procedure proc2 = new TestProcedure(i);
214 procStore2.insert(proc2, null);
215 procStore2.delete(proc2.getProcId());
216 }
217 }
218
219
220
221
222 LOG.info("Inserting into first WALProcedureStore");
223 try {
224 procStore.insert(new TestProcedure(11), null);
225 fail("Inserting into Procedure Store should have failed");
226 } catch (Exception ex) {
227 LOG.info("Received expected exception", ex);
228 }
229 }
230
231
232
233
234 @Test(timeout=60000)
235 public void testCreateWithFailover() throws Exception {
236
237
238
239
240
241 testCreateWithFailoverAtStep(CreateTableState.CREATE_TABLE_ASSIGN_REGIONS.ordinal());
242 }
243
244 private void testCreateWithFailoverAtStep(final int step) throws Exception {
245 final TableName tableName = TableName.valueOf("testCreateWithFailoverAtStep" + step);
246
247
248 ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
249 ProcedureTestingUtility.setKillBeforeStoreUpdate(procExec, true);
250 ProcedureTestingUtility.setToggleKillBeforeStoreUpdate(procExec, true);
251
252
253 byte[][] splitKeys = null;
254 HTableDescriptor htd = MasterProcedureTestingUtility.createHTD(tableName, "f1", "f2");
255 HRegionInfo[] regions = ModifyRegionUtils.createHRegionInfos(htd, splitKeys);
256 long procId = procExec.submitProcedure(
257 new CreateTableProcedure(procExec.getEnvironment(), htd, regions));
258 testRecoveryAndDoubleExecution(UTIL, procId, step, CreateTableState.values());
259
260 MasterProcedureTestingUtility.validateTableCreation(
261 UTIL.getHBaseCluster().getMaster(), tableName, regions, "f1", "f2");
262 }
263
264
265
266
267 @Test(timeout=60000)
268 public void testDeleteWithFailover() throws Exception {
269
270
271
272
273
274 testDeleteWithFailoverAtStep(DeleteTableState.DELETE_TABLE_UNASSIGN_REGIONS.ordinal());
275 }
276
277 private void testDeleteWithFailoverAtStep(final int step) throws Exception {
278 final TableName tableName = TableName.valueOf("testDeleteWithFailoverAtStep" + step);
279
280
281 byte[][] splitKeys = null;
282 HRegionInfo[] regions = MasterProcedureTestingUtility.createTable(
283 getMasterProcedureExecutor(), tableName, splitKeys, "f1", "f2");
284 Path tableDir = FSUtils.getTableDir(getRootDir(), tableName);
285 MasterProcedureTestingUtility.validateTableCreation(
286 UTIL.getHBaseCluster().getMaster(), tableName, regions, "f1", "f2");
287 UTIL.getHBaseAdmin().disableTable(tableName);
288
289 ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
290 ProcedureTestingUtility.setKillBeforeStoreUpdate(procExec, true);
291 ProcedureTestingUtility.setToggleKillBeforeStoreUpdate(procExec, true);
292
293
294 long procId = procExec.submitProcedure(
295 new DeleteTableProcedure(procExec.getEnvironment(), tableName));
296 testRecoveryAndDoubleExecution(UTIL, procId, step, DeleteTableState.values());
297
298 MasterProcedureTestingUtility.validateTableDeletion(
299 UTIL.getHBaseCluster().getMaster(), tableName, regions, "f1", "f2");
300 }
301
302
303
304
305 @Test(timeout=90000)
306 public void testTruncateWithFailover() throws Exception {
307
308
309
310
311
312 testTruncateWithFailoverAtStep(true, TruncateTableState.TRUNCATE_TABLE_ADD_TO_META.ordinal());
313 }
314
315 private void testTruncateWithFailoverAtStep(final boolean preserveSplits, final int step)
316 throws Exception {
317 final TableName tableName = TableName.valueOf("testTruncateWithFailoverAtStep" + step);
318
319
320 final String[] families = new String[] { "f1", "f2" };
321 final byte[][] splitKeys = new byte[][] {
322 Bytes.toBytes("a"), Bytes.toBytes("b"), Bytes.toBytes("c")
323 };
324 HRegionInfo[] regions = MasterProcedureTestingUtility.createTable(
325 getMasterProcedureExecutor(), tableName, splitKeys, families);
326
327 MasterProcedureTestingUtility.loadData(
328 UTIL.getConnection(), tableName, 100, splitKeys, families);
329 assertEquals(100, UTIL.countRows(tableName));
330
331 UTIL.getHBaseAdmin().disableTable(tableName);
332
333 ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
334 ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true);
335
336
337 long procId = procExec.submitProcedure(
338 new TruncateTableProcedure(procExec.getEnvironment(), tableName, preserveSplits));
339 testRecoveryAndDoubleExecution(UTIL, procId, step, TruncateTableState.values());
340
341 ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, false);
342 UTIL.waitUntilAllRegionsAssigned(tableName);
343
344
345 if (preserveSplits) {
346 assertEquals(1 + splitKeys.length, UTIL.getHBaseAdmin().getTableRegions(tableName).size());
347 } else {
348 regions = UTIL.getHBaseAdmin().getTableRegions(tableName).toArray(new HRegionInfo[1]);
349 assertEquals(1, regions.length);
350 }
351 MasterProcedureTestingUtility.validateTableCreation(
352 UTIL.getHBaseCluster().getMaster(), tableName, regions, families);
353
354
355 assertEquals(0, UTIL.countRows(tableName));
356
357
358 MasterProcedureTestingUtility.loadData(
359 UTIL.getConnection(), tableName, 50, splitKeys, families);
360 assertEquals(50, UTIL.countRows(tableName));
361 }
362
363
364
365
366 @Test(timeout=60000)
367 public void testDisableTableWithFailover() throws Exception {
368
369
370
371
372
373 testDisableTableWithFailoverAtStep(
374 DisableTableState.DISABLE_TABLE_MARK_REGIONS_OFFLINE.ordinal());
375 }
376
377 private void testDisableTableWithFailoverAtStep(final int step) throws Exception {
378 final TableName tableName = TableName.valueOf("testDisableTableWithFailoverAtStep" + step);
379
380
381 final byte[][] splitKeys = new byte[][] {
382 Bytes.toBytes("a"), Bytes.toBytes("b"), Bytes.toBytes("c")
383 };
384 MasterProcedureTestingUtility.createTable(
385 getMasterProcedureExecutor(), tableName, splitKeys, "f1", "f2");
386
387 ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
388 ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true);
389
390
391 long procId = procExec.submitProcedure(
392 new DisableTableProcedure(procExec.getEnvironment(), tableName, false));
393 testRecoveryAndDoubleExecution(UTIL, procId, step, DisableTableState.values());
394
395 MasterProcedureTestingUtility.validateTableIsDisabled(
396 UTIL.getHBaseCluster().getMaster(), tableName);
397 }
398
399
400
401
402 @Test(timeout=60000)
403 public void testEnableTableWithFailover() throws Exception {
404
405
406
407
408
409 testEnableTableWithFailoverAtStep(
410 EnableTableState.ENABLE_TABLE_MARK_REGIONS_ONLINE.ordinal());
411 }
412
413 private void testEnableTableWithFailoverAtStep(final int step) throws Exception {
414 final TableName tableName = TableName.valueOf("testEnableTableWithFailoverAtStep" + step);
415
416
417 final byte[][] splitKeys = new byte[][] {
418 Bytes.toBytes("a"), Bytes.toBytes("b"), Bytes.toBytes("c")
419 };
420 MasterProcedureTestingUtility.createTable(
421 getMasterProcedureExecutor(), tableName, splitKeys, "f1", "f2");
422 UTIL.getHBaseAdmin().disableTable(tableName);
423
424 ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
425 ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true);
426
427
428 long procId = procExec.submitProcedure(
429 new EnableTableProcedure(procExec.getEnvironment(), tableName, false));
430 testRecoveryAndDoubleExecution(UTIL, procId, step, EnableTableState.values());
431
432 MasterProcedureTestingUtility.validateTableIsEnabled(
433 UTIL.getHBaseCluster().getMaster(), tableName);
434 }
435
436
437
438
439 public static <TState> void testRecoveryAndDoubleExecution(final HBaseTestingUtility testUtil,
440 final long procId, final int lastStepBeforeFailover, TState[] states) throws Exception {
441 ProcedureExecutor<MasterProcedureEnv> procExec =
442 testUtil.getHBaseCluster().getMaster().getMasterProcedureExecutor();
443 ProcedureTestingUtility.waitProcedure(procExec, procId);
444
445 for (int i = 0; i < lastStepBeforeFailover; ++i) {
446 LOG.info("Restart "+ i +" exec state: " + states[i]);
447 ProcedureTestingUtility.assertProcNotYetCompleted(procExec, procId);
448 ProcedureTestingUtility.restart(procExec);
449 ProcedureTestingUtility.waitProcedure(procExec, procId);
450 }
451 ProcedureTestingUtility.assertProcNotYetCompleted(procExec, procId);
452
453 LOG.info("Trigger master failover");
454 masterFailover(testUtil);
455
456 procExec = testUtil.getHBaseCluster().getMaster().getMasterProcedureExecutor();
457 ProcedureTestingUtility.waitProcedure(procExec, procId);
458 ProcedureTestingUtility.assertProcNotFailed(procExec, procId);
459 }
460
461
462
463
464 public static void masterFailover(final HBaseTestingUtility testUtil)
465 throws Exception {
466 MiniHBaseCluster cluster = testUtil.getMiniHBaseCluster();
467
468
469 HMaster oldMaster = cluster.getMaster();
470 cluster.killMaster(cluster.getMaster().getServerName());
471
472
473 waitBackupMaster(testUtil, oldMaster);
474 }
475
476 public static void waitBackupMaster(final HBaseTestingUtility testUtil,
477 final HMaster oldMaster) throws Exception {
478 MiniHBaseCluster cluster = testUtil.getMiniHBaseCluster();
479
480 HMaster newMaster = cluster.getMaster();
481 while (newMaster == null || newMaster == oldMaster) {
482 Thread.sleep(250);
483 newMaster = cluster.getMaster();
484 }
485
486 while (!(newMaster.isActiveMaster() && newMaster.isInitialized())) {
487 Thread.sleep(250);
488 }
489 }
490
491
492
493
494 private MasterProcedureEnv getMasterProcedureEnv() {
495 return getMasterProcedureExecutor().getEnvironment();
496 }
497
498 private ProcedureExecutor<MasterProcedureEnv> getMasterProcedureExecutor() {
499 return UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor();
500 }
501
502 private FileSystem getFileSystem() {
503 return UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getFileSystem();
504 }
505
506 private Path getRootDir() {
507 return UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getRootDir();
508 }
509
510 private Path getTempDir() {
511 return UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getTempDir();
512 }
513 }