1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.master;
20
21 import static org.junit.Assert.assertEquals;
22 import static org.junit.Assert.assertFalse;
23 import static org.junit.Assert.assertNotNull;
24 import static org.junit.Assert.assertTrue;
25
26 import java.io.IOException;
27 import java.util.ArrayList;
28 import java.util.Iterator;
29 import java.util.List;
30 import java.util.Set;
31 import java.util.TreeSet;
32
33 import org.apache.commons.logging.Log;
34 import org.apache.commons.logging.LogFactory;
35 import org.apache.hadoop.conf.Configuration;
36 import org.apache.hadoop.fs.FileSystem;
37 import org.apache.hadoop.fs.Path;
38 import org.apache.hadoop.hbase.Abortable;
39 import org.apache.hadoop.hbase.ClusterStatus;
40 import org.apache.hadoop.hbase.HBaseConfiguration;
41 import org.apache.hadoop.hbase.HBaseTestingUtility;
42 import org.apache.hadoop.hbase.HColumnDescriptor;
43 import org.apache.hadoop.hbase.HConstants;
44 import org.apache.hadoop.hbase.HRegionInfo;
45 import org.apache.hadoop.hbase.HTableDescriptor;
46 import org.apache.hadoop.hbase.testclassification.LargeTests;
47 import org.apache.hadoop.hbase.MetaTableAccessor;
48 import org.apache.hadoop.hbase.MiniHBaseCluster;
49 import org.apache.hadoop.hbase.RegionTransition;
50 import org.apache.hadoop.hbase.ServerName;
51 import org.apache.hadoop.hbase.TableName;
52 import org.apache.hadoop.hbase.TableStateManager;
53 import org.apache.hadoop.hbase.client.RegionLocator;
54 import org.apache.hadoop.hbase.client.Table;
55 import org.apache.hadoop.hbase.executor.EventType;
56 import org.apache.hadoop.hbase.master.RegionState.State;
57 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
58 import org.apache.hadoop.hbase.protobuf.RequestConverter;
59 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
60 import org.apache.hadoop.hbase.regionserver.HRegion;
61 import org.apache.hadoop.hbase.regionserver.HRegionServer;
62 import org.apache.hadoop.hbase.regionserver.Region;
63 import org.apache.hadoop.hbase.regionserver.RegionMergeTransactionImpl;
64 import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException;
65 import org.apache.hadoop.hbase.util.Bytes;
66 import org.apache.hadoop.hbase.util.FSTableDescriptors;
67 import org.apache.hadoop.hbase.util.FSUtils;
68 import org.apache.hadoop.hbase.util.JVMClusterUtil;
69 import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
70 import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
71 import org.apache.hadoop.hbase.util.Threads;
72 import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
73 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
74 import org.apache.hadoop.hbase.zookeeper.ZKTableStateManager;
75 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
76 import org.apache.zookeeper.data.Stat;
77 import org.junit.Test;
78 import org.junit.experimental.categories.Category;
79
80 @Category(LargeTests.class)
81 public class TestMasterFailover {
82 private static final Log LOG = LogFactory.getLog(TestMasterFailover.class);
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163 @Test (timeout=240000)
164 public void testMasterFailoverWithMockedRIT() throws Exception {
165
166 final int NUM_MASTERS = 1;
167 final int NUM_RS = 3;
168
169
170 Configuration conf = HBaseConfiguration.create();
171 conf.setBoolean("hbase.assignment.usezk", true);
172
173
174 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
175 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
176 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
177 log("Cluster started");
178
179
180 ZooKeeperWatcher zkw = HBaseTestingUtility.getZooKeeperWatcher(TEST_UTIL);
181
182
183 List<MasterThread> masterThreads = cluster.getMasterThreads();
184 assertEquals(1, masterThreads.size());
185
186
187 assertTrue(cluster.waitForActiveAndReadyMaster());
188 HMaster master = masterThreads.get(0).getMaster();
189 assertTrue(master.isActiveMaster());
190 assertTrue(master.isInitialized());
191
192
193 master.balanceSwitch(false);
194
195
196 byte [] FAMILY = Bytes.toBytes("family");
197 byte [][] SPLIT_KEYS = new byte [][] {
198 new byte[0], Bytes.toBytes("aaa"), Bytes.toBytes("bbb"),
199 Bytes.toBytes("ccc"), Bytes.toBytes("ddd"), Bytes.toBytes("eee"),
200 Bytes.toBytes("fff"), Bytes.toBytes("ggg"), Bytes.toBytes("hhh"),
201 Bytes.toBytes("iii"), Bytes.toBytes("jjj")
202 };
203
204 byte [] enabledTable = Bytes.toBytes("enabledTable");
205 HTableDescriptor htdEnabled = new HTableDescriptor(TableName.valueOf(enabledTable));
206 htdEnabled.addFamily(new HColumnDescriptor(FAMILY));
207
208 FileSystem filesystem = FileSystem.get(conf);
209 Path rootdir = FSUtils.getRootDir(conf);
210 FSTableDescriptors fstd = new FSTableDescriptors(conf, filesystem, rootdir);
211
212 fstd.createTableDescriptor(htdEnabled);
213
214 HRegionInfo hriEnabled = new HRegionInfo(htdEnabled.getTableName(), null, null);
215 createRegion(hriEnabled, rootdir, conf, htdEnabled);
216
217 List<HRegionInfo> enabledRegions = TEST_UTIL.createMultiRegionsInMeta(
218 TEST_UTIL.getConfiguration(), htdEnabled, SPLIT_KEYS);
219
220 TableName disabledTable = TableName.valueOf("disabledTable");
221 HTableDescriptor htdDisabled = new HTableDescriptor(disabledTable);
222 htdDisabled.addFamily(new HColumnDescriptor(FAMILY));
223
224 fstd.createTableDescriptor(htdDisabled);
225 HRegionInfo hriDisabled = new HRegionInfo(htdDisabled.getTableName(), null, null);
226 createRegion(hriDisabled, rootdir, conf, htdDisabled);
227 List<HRegionInfo> disabledRegions = TEST_UTIL.createMultiRegionsInMeta(
228 TEST_UTIL.getConfiguration(), htdDisabled, SPLIT_KEYS);
229
230 TableName tableWithMergingRegions = TableName.valueOf("tableWithMergingRegions");
231 TEST_UTIL.createTable(tableWithMergingRegions, FAMILY, new byte [][] {Bytes.toBytes("m")});
232
233 log("Regions in hbase:meta and namespace have been created");
234
235
236
237 assertTrue(4 <= cluster.countServedRegions());
238
239
240 AssignmentManager am = master.getAssignmentManager();
241 RegionStates regionStates = am.getRegionStates();
242 List<HRegionInfo> mergingRegions = regionStates.getRegionsOfTable(tableWithMergingRegions);
243 assertEquals(2, mergingRegions.size());
244 HRegionInfo a = mergingRegions.get(0);
245 HRegionInfo b = mergingRegions.get(1);
246 HRegionInfo newRegion = RegionMergeTransactionImpl.getMergedRegionInfo(a, b);
247 ServerName mergingServer = regionStates.getRegionServerOfRegion(a);
248 ServerName serverB = regionStates.getRegionServerOfRegion(b);
249 if (!serverB.equals(mergingServer)) {
250 RegionPlan plan = new RegionPlan(b, serverB, mergingServer);
251 am.balance(plan);
252 assertTrue(am.waitForAssignment(b));
253 }
254
255
256 HRegionServer hrs = cluster.getRegionServer(0);
257 ServerName serverName = hrs.getServerName();
258 HRegionInfo closingRegion = enabledRegions.remove(0);
259
260 List<HRegionInfo> enabledAndAssignedRegions = new ArrayList<HRegionInfo>();
261 enabledAndAssignedRegions.add(enabledRegions.remove(0));
262 enabledAndAssignedRegions.add(enabledRegions.remove(0));
263 enabledAndAssignedRegions.add(closingRegion);
264
265 List<HRegionInfo> disabledAndAssignedRegions = new ArrayList<HRegionInfo>();
266 disabledAndAssignedRegions.add(disabledRegions.remove(0));
267 disabledAndAssignedRegions.add(disabledRegions.remove(0));
268
269
270 for (HRegionInfo hri : enabledAndAssignedRegions) {
271 master.assignmentManager.addPlan(hri.getEncodedName(),
272 new RegionPlan(hri, null, serverName));
273 master.assignRegion(hri);
274 }
275
276 for (HRegionInfo hri : disabledAndAssignedRegions) {
277 master.assignmentManager.addPlan(hri.getEncodedName(),
278 new RegionPlan(hri, null, serverName));
279 master.assignRegion(hri);
280 }
281
282
283 log("Waiting for assignment to finish");
284 ZKAssign.blockUntilNoRIT(zkw);
285 log("Assignment completed");
286
287
288 log("Aborting master");
289 cluster.abortMaster(0);
290 cluster.waitOnMaster(0);
291 log("Master has aborted");
292
293
294
295
296
297
298 List<HRegionInfo> regionsThatShouldBeOnline = new ArrayList<HRegionInfo>();
299 List<HRegionInfo> regionsThatShouldBeOffline = new ArrayList<HRegionInfo>();
300
301 log("Beginning to mock scenarios");
302
303
304 TableStateManager zktable = new ZKTableStateManager(zkw);
305 zktable.setTableState(disabledTable, ZooKeeperProtos.Table.State.DISABLED);
306
307
308
309
310
311
312
313
314 HRegionInfo region = enabledRegions.remove(0);
315 regionsThatShouldBeOnline.add(region);
316 ZKAssign.createNodeOffline(zkw, region, serverName);
317
318
319
320
321
322 regionsThatShouldBeOnline.add(closingRegion);
323 ZKAssign.createNodeClosing(zkw, closingRegion, serverName);
324
325
326
327
328
329
330
331 region = enabledRegions.remove(0);
332 regionsThatShouldBeOnline.add(region);
333 int version = ZKAssign.createNodeClosing(zkw, region, serverName);
334 ZKAssign.transitionNodeClosed(zkw, region, serverName, version);
335
336
337 region = disabledRegions.remove(0);
338 regionsThatShouldBeOffline.add(region);
339 version = ZKAssign.createNodeClosing(zkw, region, serverName);
340 ZKAssign.transitionNodeClosed(zkw, region, serverName, version);
341
342
343
344
345
346
347
348 region = enabledRegions.remove(0);
349 regionsThatShouldBeOnline.add(region);
350 ZKAssign.createNodeOffline(zkw, region, serverName);
351 ProtobufUtil.openRegion(null, hrs.getRSRpcServices(), hrs.getServerName(), region);
352 while (true) {
353 byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
354 RegionTransition rt = RegionTransition.parseFrom(bytes);
355 if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
356 break;
357 }
358 Thread.sleep(100);
359 }
360
361
362
363 region = disabledRegions.remove(0);
364 regionsThatShouldBeOffline.add(region);
365 ZKAssign.createNodeOffline(zkw, region, serverName);
366 ProtobufUtil.openRegion(null, hrs.getRSRpcServices(), hrs.getServerName(), region);
367 while (true) {
368 byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
369 RegionTransition rt = RegionTransition.parseFrom(bytes);
370 if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
371 break;
372 }
373 Thread.sleep(100);
374 }
375
376
377
378
379
380
381
382 hrs.getCoordinatedStateManager().
383 getRegionMergeCoordination().startRegionMergeTransaction(newRegion, mergingServer, a, b);
384
385
386
387
388
389
390
391
392
393 log("Done mocking data up in ZK");
394
395
396 log("Starting up a new master");
397 master = cluster.startMaster().getMaster();
398 log("Waiting for master to be ready");
399 cluster.waitForActiveAndReadyMaster();
400 log("Master is ready");
401
402
403 regionStates = master.getAssignmentManager().getRegionStates();
404
405 assertTrue(regionStates.isRegionInState(a, State.MERGING));
406 assertTrue(regionStates.isRegionInState(b, State.MERGING));
407 assertTrue(regionStates.isRegionInState(newRegion, State.MERGING_NEW));
408
409
410 ZKAssign.deleteNodeFailSilent(zkw, newRegion);
411
412
413 log("Waiting for no more RIT");
414 ZKAssign.blockUntilNoRIT(zkw);
415 log("No more RIT in ZK, now doing final test verification");
416
417
418 Set<HRegionInfo> onlineRegions = new TreeSet<HRegionInfo>();
419 for (JVMClusterUtil.RegionServerThread rst :
420 cluster.getRegionServerThreads()) {
421 onlineRegions.addAll(ProtobufUtil.getOnlineRegions(
422 rst.getRegionServer().getRSRpcServices()));
423 }
424
425
426 for (HRegionInfo hri : regionsThatShouldBeOnline) {
427 assertTrue(onlineRegions.contains(hri));
428 }
429
430
431 for (HRegionInfo hri : regionsThatShouldBeOffline) {
432 if (onlineRegions.contains(hri)) {
433 LOG.debug(hri);
434 }
435 assertFalse(onlineRegions.contains(hri));
436 }
437
438 log("Done with verification, all passed, shutting down cluster");
439
440
441 TEST_UTIL.shutdownMiniCluster();
442 }
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500 @Test (timeout=180000)
501 public void testMasterFailoverWithMockedRITOnDeadRS() throws Exception {
502
503 final int NUM_MASTERS = 1;
504 final int NUM_RS = 2;
505
506
507 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
508 Configuration conf = TEST_UTIL.getConfiguration();
509 conf.setBoolean("hbase.assignment.usezk", true);
510
511 conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 1);
512 conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MAXTOSTART, 2);
513 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
514 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
515 log("Cluster started");
516
517
518 ZooKeeperWatcher zkw = new ZooKeeperWatcher(TEST_UTIL.getConfiguration(),
519 "unittest", new Abortable() {
520
521 @Override
522 public void abort(String why, Throwable e) {
523 LOG.error("Fatal ZK Error: " + why, e);
524 org.junit.Assert.assertFalse("Fatal ZK error", true);
525 }
526
527 @Override
528 public boolean isAborted() {
529 return false;
530 }
531
532 });
533
534
535 List<MasterThread> masterThreads = cluster.getMasterThreads();
536 assertEquals(1, masterThreads.size());
537
538
539 assertTrue(cluster.waitForActiveAndReadyMaster());
540 HMaster master = masterThreads.get(0).getMaster();
541 assertTrue(master.isActiveMaster());
542 assertTrue(master.isInitialized());
543
544
545 master.balanceSwitch(false);
546
547
548 byte [] FAMILY = Bytes.toBytes("family");
549 byte[][] SPLIT_KEYS =
550 TEST_UTIL.getRegionSplitStartKeys(Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), 30);
551
552 byte [] enabledTable = Bytes.toBytes("enabledTable");
553 HTableDescriptor htdEnabled = new HTableDescriptor(TableName.valueOf(enabledTable));
554 htdEnabled.addFamily(new HColumnDescriptor(FAMILY));
555 FileSystem filesystem = FileSystem.get(conf);
556 Path rootdir = FSUtils.getRootDir(conf);
557 FSTableDescriptors fstd = new FSTableDescriptors(conf, filesystem, rootdir);
558
559 fstd.createTableDescriptor(htdEnabled);
560 HRegionInfo hriEnabled = new HRegionInfo(htdEnabled.getTableName(),
561 null, null);
562 createRegion(hriEnabled, rootdir, conf, htdEnabled);
563
564 List<HRegionInfo> enabledRegions = TEST_UTIL.createMultiRegionsInMeta(
565 TEST_UTIL.getConfiguration(), htdEnabled, SPLIT_KEYS);
566
567 TableName disabledTable =
568 TableName.valueOf("disabledTable");
569 HTableDescriptor htdDisabled = new HTableDescriptor(disabledTable);
570 htdDisabled.addFamily(new HColumnDescriptor(FAMILY));
571
572 fstd.createTableDescriptor(htdDisabled);
573 HRegionInfo hriDisabled = new HRegionInfo(htdDisabled.getTableName(), null, null);
574 createRegion(hriDisabled, rootdir, conf, htdDisabled);
575
576 List<HRegionInfo> disabledRegions = TEST_UTIL.createMultiRegionsInMeta(
577 TEST_UTIL.getConfiguration(), htdDisabled, SPLIT_KEYS);
578
579 log("Regions in hbase:meta and Namespace have been created");
580
581
582 assertTrue(2 <= cluster.countServedRegions());
583
584
585 List<RegionServerThread> regionservers =
586 cluster.getRegionServerThreads();
587 HRegionServer hrs = regionservers.get(0).getRegionServer();
588
589
590 RegionServerThread hrsDeadThread = regionservers.get(1);
591 HRegionServer hrsDead = hrsDeadThread.getRegionServer();
592 ServerName deadServerName = hrsDead.getServerName();
593
594
595 List<HRegionInfo> enabledAndAssignedRegions = new ArrayList<HRegionInfo>();
596 enabledAndAssignedRegions.addAll(enabledRegions.subList(0, 6));
597 enabledRegions.removeAll(enabledAndAssignedRegions);
598 List<HRegionInfo> disabledAndAssignedRegions = new ArrayList<HRegionInfo>();
599 disabledAndAssignedRegions.addAll(disabledRegions.subList(0, 6));
600 disabledRegions.removeAll(disabledAndAssignedRegions);
601
602
603 for (HRegionInfo hri : enabledAndAssignedRegions) {
604 master.assignmentManager.addPlan(hri.getEncodedName(),
605 new RegionPlan(hri, null, hrs.getServerName()));
606 master.assignRegion(hri);
607 }
608 for (HRegionInfo hri : disabledAndAssignedRegions) {
609 master.assignmentManager.addPlan(hri.getEncodedName(),
610 new RegionPlan(hri, null, hrs.getServerName()));
611 master.assignRegion(hri);
612 }
613
614 log("Waiting for assignment to finish");
615 ZKAssign.blockUntilNoRIT(zkw);
616 master.assignmentManager.waitUntilNoRegionsInTransition(60000);
617 log("Assignment completed");
618
619 assertTrue(" Table must be enabled.", master.getAssignmentManager()
620 .getTableStateManager().isTableState(TableName.valueOf("enabledTable"),
621 ZooKeeperProtos.Table.State.ENABLED));
622
623 List<HRegionInfo> enabledAndOnDeadRegions = new ArrayList<HRegionInfo>();
624 enabledAndOnDeadRegions.addAll(enabledRegions.subList(0, 6));
625 enabledRegions.removeAll(enabledAndOnDeadRegions);
626 List<HRegionInfo> disabledAndOnDeadRegions = new ArrayList<HRegionInfo>();
627 disabledAndOnDeadRegions.addAll(disabledRegions.subList(0, 6));
628 disabledRegions.removeAll(disabledAndOnDeadRegions);
629
630
631 for (HRegionInfo hri : enabledAndOnDeadRegions) {
632 master.assignmentManager.addPlan(hri.getEncodedName(),
633 new RegionPlan(hri, null, deadServerName));
634 master.assignRegion(hri);
635 }
636 for (HRegionInfo hri : disabledAndOnDeadRegions) {
637 master.assignmentManager.addPlan(hri.getEncodedName(),
638 new RegionPlan(hri, null, deadServerName));
639 master.assignRegion(hri);
640 }
641
642
643 log("Waiting for assignment to finish");
644 ZKAssign.blockUntilNoRIT(zkw);
645 master.assignmentManager.waitUntilNoRegionsInTransition(60000);
646 log("Assignment completed");
647
648
649
650 verifyRegionLocation(hrs, enabledAndAssignedRegions);
651 verifyRegionLocation(hrs, disabledAndAssignedRegions);
652 verifyRegionLocation(hrsDead, enabledAndOnDeadRegions);
653 verifyRegionLocation(hrsDead, disabledAndOnDeadRegions);
654
655 assertTrue(" Didn't get enough regions of enabledTalbe on live rs.",
656 enabledAndAssignedRegions.size() >= 2);
657 assertTrue(" Didn't get enough regions of disalbedTable on live rs.",
658 disabledAndAssignedRegions.size() >= 2);
659 assertTrue(" Didn't get enough regions of enabledTalbe on dead rs.",
660 enabledAndOnDeadRegions.size() >= 2);
661 assertTrue(" Didn't get enough regions of disalbedTable on dead rs.",
662 disabledAndOnDeadRegions.size() >= 2);
663
664
665 log("Aborting master");
666 cluster.abortMaster(0);
667 cluster.waitOnMaster(0);
668 log("Master has aborted");
669
670
671
672
673
674
675 List<HRegionInfo> regionsThatShouldBeOnline = new ArrayList<HRegionInfo>();
676 List<HRegionInfo> regionsThatShouldBeOffline = new ArrayList<HRegionInfo>();
677
678 log("Beginning to mock scenarios");
679
680
681 TableStateManager zktable = new ZKTableStateManager(zkw);
682 zktable.setTableState(disabledTable, ZooKeeperProtos.Table.State.DISABLED);
683
684 assertTrue(" The enabled table should be identified on master fail over.",
685 zktable.isTableState(TableName.valueOf("enabledTable"),
686 ZooKeeperProtos.Table.State.ENABLED));
687
688
689
690
691
692
693 HRegionInfo region = enabledAndOnDeadRegions.remove(0);
694 regionsThatShouldBeOnline.add(region);
695 ZKAssign.createNodeClosing(zkw, region, deadServerName);
696 LOG.debug("\n\nRegion of enabled table was CLOSING on dead RS\n" +
697 region + "\n\n");
698
699
700 region = disabledAndOnDeadRegions.remove(0);
701 regionsThatShouldBeOffline.add(region);
702 ZKAssign.createNodeClosing(zkw, region, deadServerName);
703 LOG.debug("\n\nRegion of disabled table was CLOSING on dead RS\n" +
704 region + "\n\n");
705
706
707
708
709
710
711 region = enabledAndOnDeadRegions.remove(0);
712 regionsThatShouldBeOnline.add(region);
713 int version = ZKAssign.createNodeClosing(zkw, region, deadServerName);
714 ZKAssign.transitionNodeClosed(zkw, region, deadServerName, version);
715 LOG.debug("\n\nRegion of enabled table was CLOSED on dead RS\n" +
716 region + "\n\n");
717
718
719 region = disabledAndOnDeadRegions.remove(0);
720 regionsThatShouldBeOffline.add(region);
721 version = ZKAssign.createNodeClosing(zkw, region, deadServerName);
722 ZKAssign.transitionNodeClosed(zkw, region, deadServerName, version);
723 LOG.debug("\n\nRegion of disabled table was CLOSED on dead RS\n" +
724 region + "\n\n");
725
726
727
728
729
730
731 region = enabledRegions.remove(0);
732 regionsThatShouldBeOnline.add(region);
733 ZKAssign.createNodeOffline(zkw, region, deadServerName);
734 ZKAssign.transitionNodeOpening(zkw, region, deadServerName);
735 LOG.debug("\n\nRegion of enabled table was OPENING on dead RS\n" +
736 region + "\n\n");
737
738
739 region = disabledRegions.remove(0);
740 regionsThatShouldBeOffline.add(region);
741 ZKAssign.createNodeOffline(zkw, region, deadServerName);
742 ZKAssign.transitionNodeOpening(zkw, region, deadServerName);
743 LOG.debug("\n\nRegion of disabled table was OPENING on dead RS\n" +
744 region + "\n\n");
745
746
747
748
749
750
751 region = enabledRegions.remove(0);
752 regionsThatShouldBeOnline.add(region);
753 ZKAssign.createNodeOffline(zkw, region, deadServerName);
754 ProtobufUtil.openRegion(null, hrsDead.getRSRpcServices(),
755 hrsDead.getServerName(), region);
756 while (true) {
757 byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
758 RegionTransition rt = RegionTransition.parseFrom(bytes);
759 if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
760 break;
761 }
762 Thread.sleep(100);
763 }
764 LOG.debug("\n\nRegion of enabled table was OPENED on dead RS\n" +
765 region + "\n\n");
766
767
768 region = disabledRegions.remove(0);
769 regionsThatShouldBeOffline.add(region);
770 ZKAssign.createNodeOffline(zkw, region, deadServerName);
771 ProtobufUtil.openRegion(null, hrsDead.getRSRpcServices(),
772 hrsDead.getServerName(), region);
773 while (true) {
774 byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
775 RegionTransition rt = RegionTransition.parseFrom(bytes);
776 if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
777 break;
778 }
779 Thread.sleep(100);
780 }
781 LOG.debug("\n\nRegion of disabled table was OPENED on dead RS\n" +
782 region + "\n\n");
783
784
785
786
787
788
789 region = enabledRegions.remove(0);
790 regionsThatShouldBeOnline.add(region);
791 ZKAssign.createNodeOffline(zkw, region, deadServerName);
792 ProtobufUtil.openRegion(null, hrsDead.getRSRpcServices(),
793 hrsDead.getServerName(), region);
794 while (true) {
795 byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
796 RegionTransition rt = RegionTransition.parseFrom(bytes);
797 if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
798 ZKAssign.deleteOpenedNode(zkw, region.getEncodedName(), rt.getServerName());
799 LOG.debug("DELETED " + rt);
800 break;
801 }
802 Thread.sleep(100);
803 }
804 LOG.debug("\n\nRegion of enabled table was open at steady-state on dead RS"
805 + "\n" + region + "\n\n");
806
807
808 region = disabledRegions.remove(0);
809 regionsThatShouldBeOffline.add(region);
810 ZKAssign.createNodeOffline(zkw, region, deadServerName);
811 ProtobufUtil.openRegion(null, hrsDead.getRSRpcServices(),
812 hrsDead.getServerName(), region);
813 while (true) {
814 byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
815 RegionTransition rt = RegionTransition.parseFrom(bytes);
816 if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
817 ZKAssign.deleteOpenedNode(zkw, region.getEncodedName(), rt.getServerName());
818 break;
819 }
820 Thread.sleep(100);
821 }
822 LOG.debug("\n\nRegion of disabled table was open at steady-state on dead RS"
823 + "\n" + region + "\n\n");
824
825
826
827
828
829 log("Done mocking data up in ZK");
830
831
832 log("Killing RS " + deadServerName);
833 hrsDead.abort("Killing for unit test");
834 log("RS " + deadServerName + " killed");
835
836
837
838 while (hrsDeadThread.isAlive()) {
839 Threads.sleep(10);
840 }
841 log("Starting up a new master");
842 master = cluster.startMaster().getMaster();
843 log("Waiting for master to be ready");
844 assertTrue(cluster.waitForActiveAndReadyMaster());
845 log("Master is ready");
846
847
848 while (master.getServerManager().areDeadServersInProgress()) {
849 Thread.sleep(10);
850 }
851
852
853 log("Waiting for no more RIT");
854 ZKAssign.blockUntilNoRIT(zkw);
855 log("No more RIT in ZK");
856 long now = System.currentTimeMillis();
857 long maxTime = 120000;
858 boolean done = master.assignmentManager.waitUntilNoRegionsInTransition(maxTime);
859 if (!done) {
860 RegionStates regionStates = master.getAssignmentManager().getRegionStates();
861 LOG.info("rit=" + regionStates.getRegionsInTransition());
862 }
863 long elapsed = System.currentTimeMillis() - now;
864 assertTrue("Elapsed=" + elapsed + ", maxTime=" + maxTime + ", done=" + done,
865 elapsed < maxTime);
866 log("No more RIT in RIT map, doing final test verification");
867
868
869 Set<HRegionInfo> onlineRegions = new TreeSet<HRegionInfo>();
870 now = System.currentTimeMillis();
871 maxTime = 30000;
872 for (JVMClusterUtil.RegionServerThread rst :
873 cluster.getRegionServerThreads()) {
874 try {
875 HRegionServer rs = rst.getRegionServer();
876 while (!rs.getRegionsInTransitionInRS().isEmpty()) {
877 elapsed = System.currentTimeMillis() - now;
878 assertTrue("Test timed out in getting online regions", elapsed < maxTime);
879 if (rs.isAborted() || rs.isStopped()) {
880
881 break;
882 }
883 Thread.sleep(100);
884 }
885 onlineRegions.addAll(ProtobufUtil.getOnlineRegions(rs.getRSRpcServices()));
886 } catch (RegionServerStoppedException e) {
887 LOG.info("Got RegionServerStoppedException", e);
888 }
889 }
890
891
892 for (HRegionInfo hri : regionsThatShouldBeOnline) {
893 assertTrue("region=" + hri.getRegionNameAsString() + ", " + onlineRegions.toString(),
894 onlineRegions.contains(hri));
895 }
896
897
898 for (HRegionInfo hri : regionsThatShouldBeOffline) {
899 assertFalse(onlineRegions.contains(hri));
900 }
901
902 log("Done with verification, all passed, shutting down cluster");
903
904
905 TEST_UTIL.shutdownMiniCluster();
906 }
907
908
909
910
911 private void verifyRegionLocation(HRegionServer hrs, List<HRegionInfo> regions)
912 throws IOException {
913 List<HRegionInfo> tmpOnlineRegions =
914 ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
915 Iterator<HRegionInfo> itr = regions.iterator();
916 while (itr.hasNext()) {
917 HRegionInfo tmp = itr.next();
918 if (!tmpOnlineRegions.contains(tmp)) {
919 itr.remove();
920 }
921 }
922 }
923
924 HRegion createRegion(final HRegionInfo hri, final Path rootdir, final Configuration c,
925 final HTableDescriptor htd)
926 throws IOException {
927 HRegion r = HRegion.createHRegion(hri, rootdir, c, htd);
928
929
930
931
932
933 HRegion.closeHRegion(r);
934 return r;
935 }
936
937
938
939
940 private void log(String string) {
941 LOG.info("\n\n" + string + " \n\n");
942 }
943
944 @Test (timeout=180000)
945 public void testShouldCheckMasterFailOverWhenMETAIsInOpenedState()
946 throws Exception {
947 LOG.info("Starting testShouldCheckMasterFailOverWhenMETAIsInOpenedState");
948 final int NUM_MASTERS = 1;
949 final int NUM_RS = 2;
950
951
952 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
953 Configuration conf = TEST_UTIL.getConfiguration();
954 conf.setInt("hbase.master.info.port", -1);
955 conf.setBoolean("hbase.assignment.usezk", true);
956
957 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
958 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
959
960
961 List<RegionServerThread> regionServerThreads =
962 cluster.getRegionServerThreads();
963 Region metaRegion = null;
964 HRegionServer metaRegionServer = null;
965 for (RegionServerThread regionServerThread : regionServerThreads) {
966 HRegionServer regionServer = regionServerThread.getRegionServer();
967 metaRegion = regionServer.getOnlineRegion(HRegionInfo.FIRST_META_REGIONINFO.getRegionName());
968 regionServer.abort("");
969 if (null != metaRegion) {
970 metaRegionServer = regionServer;
971 break;
972 }
973 }
974
975 TEST_UTIL.shutdownMiniHBaseCluster();
976
977
978 ZooKeeperWatcher zkw =
979 HBaseTestingUtility.createAndForceNodeToOpenedState(TEST_UTIL,
980 metaRegion, metaRegionServer.getServerName());
981
982 LOG.info("Staring cluster for second time");
983 TEST_UTIL.startMiniHBaseCluster(NUM_MASTERS, NUM_RS);
984
985 HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
986 while (!master.isInitialized()) {
987 Thread.sleep(100);
988 }
989
990 log("Waiting for no more RIT");
991 ZKAssign.blockUntilNoRIT(zkw);
992
993 zkw.close();
994
995 TEST_UTIL.shutdownMiniCluster();
996 }
997
998
999
1000
1001 @Test(timeout=240000)
1002 public void testOfflineRegionReAssginedAfterMasterRestart() throws Exception {
1003 final TableName table = TableName.valueOf("testOfflineRegionReAssginedAfterMasterRestart");
1004 final int NUM_MASTERS = 1;
1005 final int NUM_RS = 2;
1006
1007
1008 Configuration conf = HBaseConfiguration.create();
1009 conf.setBoolean("hbase.assignment.usezk", true);
1010
1011
1012 final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
1013 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
1014 log("Cluster started");
1015
1016 TEST_UTIL.createTable(table, Bytes.toBytes("family"));
1017 HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
1018 RegionStates regionStates = master.getAssignmentManager().getRegionStates();
1019 HRegionInfo hri = regionStates.getRegionsOfTable(table).get(0);
1020 ServerName serverName = regionStates.getRegionServerOfRegion(hri);
1021 TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
1022
1023 ServerName dstName = null;
1024 for (ServerName tmpServer : master.serverManager.getOnlineServers().keySet()) {
1025 if (!tmpServer.equals(serverName)) {
1026 dstName = tmpServer;
1027 break;
1028 }
1029 }
1030
1031 assertTrue(dstName != null);
1032
1033 TEST_UTIL.shutdownMiniHBaseCluster();
1034
1035 ZooKeeperWatcher zkw = TEST_UTIL.getZooKeeperWatcher();
1036 ZKAssign.createNodeOffline(zkw, hri, dstName);
1037 Stat stat = new Stat();
1038 byte[] data =
1039 ZKAssign.getDataNoWatch(zkw, hri.getEncodedName(), stat);
1040 assertTrue(data != null);
1041 RegionTransition rt = RegionTransition.parseFrom(data);
1042 assertTrue(rt.getEventType() == EventType.M_ZK_REGION_OFFLINE);
1043
1044 LOG.info(hri.getEncodedName() + " region is in offline state with source server=" + serverName
1045 + " and dst server=" + dstName);
1046
1047
1048 TEST_UTIL.startMiniHBaseCluster(NUM_MASTERS, NUM_RS);
1049
1050 while (true) {
1051 master = TEST_UTIL.getHBaseCluster().getMaster();
1052 if (master != null && master.isInitialized()) {
1053 ServerManager serverManager = master.getServerManager();
1054 if (!serverManager.areDeadServersInProgress()) {
1055 break;
1056 }
1057 }
1058 Thread.sleep(200);
1059 }
1060
1061
1062 master = TEST_UTIL.getHBaseCluster().getMaster();
1063 master.getAssignmentManager().waitForAssignment(hri);
1064 regionStates = master.getAssignmentManager().getRegionStates();
1065 RegionState newState = regionStates.getRegionState(hri);
1066 assertTrue(newState.isOpened());
1067 }
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077 @Test (timeout=240000)
1078 public void testSimpleMasterFailover() throws Exception {
1079
1080 final int NUM_MASTERS = 3;
1081 final int NUM_RS = 3;
1082
1083
1084 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
1085
1086 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
1087 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1088
1089
1090 List<MasterThread> masterThreads = cluster.getMasterThreads();
1091
1092
1093 for (MasterThread mt : masterThreads) {
1094 assertTrue(mt.isAlive());
1095 }
1096
1097
1098 int numActive = 0;
1099 int activeIndex = -1;
1100 ServerName activeName = null;
1101 HMaster active = null;
1102 for (int i = 0; i < masterThreads.size(); i++) {
1103 if (masterThreads.get(i).getMaster().isActiveMaster()) {
1104 numActive++;
1105 activeIndex = i;
1106 active = masterThreads.get(activeIndex).getMaster();
1107 activeName = active.getServerName();
1108 }
1109 }
1110 assertEquals(1, numActive);
1111 assertEquals(NUM_MASTERS, masterThreads.size());
1112 LOG.info("Active master " + activeName);
1113
1114
1115 assertNotNull(active);
1116 ClusterStatus status = active.getClusterStatus();
1117 assertTrue(status.getMaster().equals(activeName));
1118 assertEquals(2, status.getBackupMastersSize());
1119 assertEquals(2, status.getBackupMasters().size());
1120
1121
1122 int backupIndex = (activeIndex == 0 ? 1 : activeIndex - 1);
1123 HMaster master = cluster.getMaster(backupIndex);
1124 LOG.debug("\n\nStopping a backup master: " + master.getServerName() + "\n");
1125 cluster.stopMaster(backupIndex, false);
1126 cluster.waitOnMaster(backupIndex);
1127
1128
1129 for (int i = 0; i < masterThreads.size(); i++) {
1130 if (masterThreads.get(i).getMaster().isActiveMaster()) {
1131 assertTrue(activeName.equals(masterThreads.get(i).getMaster().getServerName()));
1132 activeIndex = i;
1133 active = masterThreads.get(activeIndex).getMaster();
1134 }
1135 }
1136 assertEquals(1, numActive);
1137 assertEquals(2, masterThreads.size());
1138 int rsCount = masterThreads.get(activeIndex).getMaster().getClusterStatus().getServersSize();
1139 LOG.info("Active master " + active.getServerName() + " managing " + rsCount + " regions servers");
1140 assertEquals(3, rsCount);
1141
1142
1143 assertNotNull(active);
1144 status = active.getClusterStatus();
1145 assertTrue(status.getMaster().equals(activeName));
1146 assertEquals(1, status.getBackupMastersSize());
1147 assertEquals(1, status.getBackupMasters().size());
1148
1149
1150 LOG.debug("\n\nStopping the active master " + active.getServerName() + "\n");
1151 cluster.stopMaster(activeIndex, false);
1152 cluster.waitOnMaster(activeIndex);
1153
1154
1155 assertTrue(cluster.waitForActiveAndReadyMaster());
1156
1157 LOG.debug("\n\nVerifying backup master is now active\n");
1158
1159 assertEquals(1, masterThreads.size());
1160
1161
1162 active = masterThreads.get(0).getMaster();
1163 assertNotNull(active);
1164 status = active.getClusterStatus();
1165 ServerName mastername = status.getMaster();
1166 assertTrue(mastername.equals(active.getServerName()));
1167 assertTrue(active.isActiveMaster());
1168 assertEquals(0, status.getBackupMastersSize());
1169 assertEquals(0, status.getBackupMasters().size());
1170 int rss = status.getServersSize();
1171 LOG.info("Active master " + mastername.getServerName() + " managing " +
1172 rss + " region servers");
1173 assertEquals(3, rss);
1174
1175
1176 TEST_UTIL.shutdownMiniCluster();
1177 }
1178
1179
1180
1181
1182 @Test (timeout=180000)
1183 @SuppressWarnings("deprecation")
1184 public void testPendingOpenOrCloseWhenMasterFailover() throws Exception {
1185 final int NUM_MASTERS = 1;
1186 final int NUM_RS = 1;
1187
1188
1189 Configuration conf = HBaseConfiguration.create();
1190 conf.setBoolean("hbase.assignment.usezk", false);
1191
1192
1193 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
1194 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
1195 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1196 log("Cluster started");
1197
1198
1199 List<MasterThread> masterThreads = cluster.getMasterThreads();
1200 assertEquals(1, masterThreads.size());
1201
1202
1203 assertTrue(cluster.waitForActiveAndReadyMaster());
1204 HMaster master = masterThreads.get(0).getMaster();
1205 assertTrue(master.isActiveMaster());
1206 assertTrue(master.isInitialized());
1207
1208
1209 Table onlineTable = TEST_UTIL.createTable(TableName.valueOf("onlineTable"), "family");
1210 onlineTable.close();
1211
1212 HTableDescriptor offlineTable = new HTableDescriptor(
1213 TableName.valueOf(Bytes.toBytes("offlineTable")));
1214 offlineTable.addFamily(new HColumnDescriptor(Bytes.toBytes("family")));
1215
1216 FileSystem filesystem = FileSystem.get(conf);
1217 Path rootdir = FSUtils.getRootDir(conf);
1218 FSTableDescriptors fstd = new FSTableDescriptors(conf, filesystem, rootdir);
1219 fstd.createTableDescriptor(offlineTable);
1220
1221 HRegionInfo hriOffline = new HRegionInfo(offlineTable.getTableName(), null, null);
1222 createRegion(hriOffline, rootdir, conf, offlineTable);
1223 MetaTableAccessor.addRegionToMeta(master.getConnection(), hriOffline);
1224
1225 log("Regions in hbase:meta and namespace have been created");
1226
1227
1228
1229 assertTrue(3 <= cluster.countServedRegions());
1230 HRegionInfo hriOnline = null;
1231 try (RegionLocator locator =
1232 TEST_UTIL.getConnection().getRegionLocator(TableName.valueOf("onlineTable"))) {
1233 hriOnline = locator.getRegionLocation(HConstants.EMPTY_START_ROW).getRegionInfo();
1234 }
1235 RegionStates regionStates = master.getAssignmentManager().getRegionStates();
1236 RegionStateStore stateStore = master.getAssignmentManager().getRegionStateStore();
1237
1238
1239
1240 RegionState oldState = regionStates.getRegionState(hriOnline);
1241 RegionState newState = new RegionState(
1242 hriOnline, State.PENDING_CLOSE, oldState.getServerName());
1243 stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1244
1245
1246
1247 oldState = new RegionState(hriOffline, State.OFFLINE);
1248 newState = new RegionState(hriOffline, State.PENDING_OPEN, newState.getServerName());
1249 stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1250
1251 HRegionInfo failedClose = new HRegionInfo(offlineTable.getTableName(), null, null);
1252 createRegion(failedClose, rootdir, conf, offlineTable);
1253 MetaTableAccessor.addRegionToMeta(master.getConnection(), failedClose);
1254
1255 oldState = new RegionState(failedClose, State.PENDING_CLOSE);
1256 newState = new RegionState(failedClose, State.FAILED_CLOSE, newState.getServerName());
1257 stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1258
1259
1260 HRegionInfo failedOpen = new HRegionInfo(offlineTable.getTableName(), null, null);
1261 createRegion(failedOpen, rootdir, conf, offlineTable);
1262 MetaTableAccessor.addRegionToMeta(master.getConnection(), failedOpen);
1263
1264
1265
1266 oldState = new RegionState(failedOpen, State.PENDING_OPEN);
1267 newState = new RegionState(failedOpen, State.FAILED_OPEN, newState.getServerName());
1268 stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1269
1270 HRegionInfo failedOpenNullServer = new HRegionInfo(offlineTable.getTableName(), null, null);
1271 createRegion(failedOpenNullServer, rootdir, conf, offlineTable);
1272 MetaTableAccessor.addRegionToMeta(master.getConnection(), failedOpenNullServer);
1273
1274
1275
1276 oldState = new RegionState(failedOpenNullServer, State.OFFLINE);
1277 newState = new RegionState(failedOpenNullServer, State.FAILED_OPEN, null);
1278 stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1279
1280
1281
1282
1283 log("Aborting master");
1284 cluster.abortMaster(0);
1285 cluster.waitOnMaster(0);
1286 log("Master has aborted");
1287
1288
1289 log("Starting up a new master");
1290 master = cluster.startMaster().getMaster();
1291 log("Waiting for master to be ready");
1292 cluster.waitForActiveAndReadyMaster();
1293 log("Master is ready");
1294
1295
1296 master.getAssignmentManager().waitUntilNoRegionsInTransition(60000);
1297
1298
1299 regionStates = master.getAssignmentManager().getRegionStates();
1300
1301
1302 assertTrue(regionStates.isRegionOnline(hriOffline));
1303 assertTrue(regionStates.isRegionOnline(hriOnline));
1304 assertTrue(regionStates.isRegionOnline(failedClose));
1305 assertTrue(regionStates.isRegionOnline(failedOpenNullServer));
1306 assertTrue(regionStates.isRegionOnline(failedOpen));
1307
1308 log("Done with verification, shutting down cluster");
1309
1310
1311 TEST_UTIL.shutdownMiniCluster();
1312 }
1313
1314
1315
1316
1317 @Test(timeout = 180000)
1318 public void testMetaInTransitionWhenMasterFailover() throws Exception {
1319 final int NUM_MASTERS = 1;
1320 final int NUM_RS = 1;
1321
1322
1323 Configuration conf = HBaseConfiguration.create();
1324 conf.setBoolean("hbase.assignment.usezk", false);
1325 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
1326 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
1327 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1328 log("Cluster started");
1329
1330 log("Moving meta off the master");
1331 HMaster activeMaster = cluster.getMaster();
1332 HRegionServer rs = cluster.getRegionServer(0);
1333 ServerName metaServerName = cluster.getLiveRegionServerThreads()
1334 .get(0).getRegionServer().getServerName();
1335 activeMaster.move(HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes(),
1336 Bytes.toBytes(metaServerName.getServerName()));
1337 TEST_UTIL.waitUntilNoRegionsInTransition(60000);
1338 assertEquals("Meta should be assigned on expected regionserver",
1339 metaServerName, activeMaster.getMetaTableLocator()
1340 .getMetaRegionLocation(activeMaster.getZooKeeper()));
1341
1342
1343 log("Aborting master");
1344 activeMaster.abort("test-kill");
1345 cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
1346 log("Master has aborted");
1347
1348
1349 RegionState metaState =
1350 MetaTableLocator.getMetaRegionState(rs.getZooKeeper());
1351 assertEquals("hbase:meta should be onlined on RS",
1352 metaState.getServerName(), rs.getServerName());
1353 assertEquals("hbase:meta should be onlined on RS",
1354 metaState.getState(), State.OPEN);
1355
1356
1357 log("Starting up a new master");
1358 activeMaster = cluster.startMaster().getMaster();
1359 log("Waiting for master to be ready");
1360 cluster.waitForActiveAndReadyMaster();
1361 log("Master is ready");
1362
1363
1364 metaState =
1365 MetaTableLocator.getMetaRegionState(activeMaster.getZooKeeper());
1366 assertEquals("hbase:meta should be onlined on RS",
1367 metaState.getServerName(), rs.getServerName());
1368 assertEquals("hbase:meta should be onlined on RS",
1369 metaState.getState(), State.OPEN);
1370
1371
1372
1373
1374
1375 MetaTableLocator.setMetaLocation(activeMaster.getZooKeeper(),
1376 rs.getServerName(), State.PENDING_OPEN);
1377 Region meta = rs.getFromOnlineRegions(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
1378 rs.removeFromOnlineRegions(meta, null);
1379 ((HRegion)meta).close();
1380
1381 log("Aborting master");
1382 activeMaster.abort("test-kill");
1383 cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
1384 log("Master has aborted");
1385
1386
1387 log("Starting up a new master");
1388 activeMaster = cluster.startMaster().getMaster();
1389 log("Waiting for master to be ready");
1390 cluster.waitForActiveAndReadyMaster();
1391 log("Master is ready");
1392
1393 TEST_UTIL.waitUntilNoRegionsInTransition(60000);
1394 log("Meta was assigned");
1395
1396 metaState =
1397 MetaTableLocator.getMetaRegionState(activeMaster.getZooKeeper());
1398 assertEquals("hbase:meta should be onlined on RS",
1399 metaState.getServerName(), rs.getServerName());
1400 assertEquals("hbase:meta should be onlined on RS",
1401 metaState.getState(), State.OPEN);
1402
1403
1404
1405
1406
1407 MetaTableLocator.setMetaLocation(activeMaster.getZooKeeper(),
1408 rs.getServerName(), State.PENDING_CLOSE);
1409
1410 log("Aborting master");
1411 activeMaster.abort("test-kill");
1412 cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
1413 log("Master has aborted");
1414
1415 rs.getRSRpcServices().closeRegion(null, RequestConverter.buildCloseRegionRequest(
1416 rs.getServerName(), HRegionInfo.FIRST_META_REGIONINFO.getEncodedName(), false));
1417
1418
1419 log("Starting up a new master");
1420 activeMaster = cluster.startMaster().getMaster();
1421 log("Waiting for master to be ready");
1422 cluster.waitForActiveAndReadyMaster();
1423 log("Master is ready");
1424
1425 TEST_UTIL.waitUntilNoRegionsInTransition(60000);
1426 log("Meta was assigned");
1427
1428 rs.getRSRpcServices().closeRegion(
1429 null,
1430 RequestConverter.buildCloseRegionRequest(rs.getServerName(),
1431 HRegionInfo.FIRST_META_REGIONINFO.getEncodedName(), false));
1432
1433
1434 MetaTableLocator.setMetaLocation(activeMaster.getZooKeeper(),
1435 ServerName.valueOf("dummyserver.example.org", 1234, -1L), State.OPEN);
1436
1437 log("Aborting master");
1438 activeMaster.stop("test-kill");
1439
1440 cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
1441 log("Master has aborted");
1442
1443
1444 log("Starting up a new master");
1445 activeMaster = cluster.startMaster().getMaster();
1446 log("Waiting for master to be ready");
1447 cluster.waitForActiveAndReadyMaster();
1448 log("Master is ready");
1449
1450 TEST_UTIL.waitUntilNoRegionsInTransition(60000);
1451 log("Meta was assigned");
1452
1453
1454 TEST_UTIL.shutdownMiniCluster();
1455 }
1456 }
1457