View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.master;
20  
21  import static org.junit.Assert.assertEquals;
22  import static org.junit.Assert.assertFalse;
23  import static org.junit.Assert.assertNotNull;
24  import static org.junit.Assert.assertTrue;
25  
26  import java.io.IOException;
27  import java.util.ArrayList;
28  import java.util.Iterator;
29  import java.util.List;
30  import java.util.Set;
31  import java.util.TreeSet;
32  
33  import org.apache.commons.logging.Log;
34  import org.apache.commons.logging.LogFactory;
35  import org.apache.hadoop.conf.Configuration;
36  import org.apache.hadoop.fs.FileSystem;
37  import org.apache.hadoop.fs.Path;
38  import org.apache.hadoop.hbase.Abortable;
39  import org.apache.hadoop.hbase.ClusterStatus;
40  import org.apache.hadoop.hbase.HBaseConfiguration;
41  import org.apache.hadoop.hbase.HBaseTestingUtility;
42  import org.apache.hadoop.hbase.HColumnDescriptor;
43  import org.apache.hadoop.hbase.HConstants;
44  import org.apache.hadoop.hbase.HRegionInfo;
45  import org.apache.hadoop.hbase.HTableDescriptor;
46  import org.apache.hadoop.hbase.testclassification.LargeTests;
47  import org.apache.hadoop.hbase.MetaTableAccessor;
48  import org.apache.hadoop.hbase.MiniHBaseCluster;
49  import org.apache.hadoop.hbase.RegionTransition;
50  import org.apache.hadoop.hbase.ServerName;
51  import org.apache.hadoop.hbase.TableName;
52  import org.apache.hadoop.hbase.TableStateManager;
53  import org.apache.hadoop.hbase.client.RegionLocator;
54  import org.apache.hadoop.hbase.client.Table;
55  import org.apache.hadoop.hbase.executor.EventType;
56  import org.apache.hadoop.hbase.master.RegionState.State;
57  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
58  import org.apache.hadoop.hbase.protobuf.RequestConverter;
59  import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
60  import org.apache.hadoop.hbase.regionserver.HRegion;
61  import org.apache.hadoop.hbase.regionserver.HRegionServer;
62  import org.apache.hadoop.hbase.regionserver.Region;
63  import org.apache.hadoop.hbase.regionserver.RegionMergeTransactionImpl;
64  import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException;
65  import org.apache.hadoop.hbase.util.Bytes;
66  import org.apache.hadoop.hbase.util.FSTableDescriptors;
67  import org.apache.hadoop.hbase.util.FSUtils;
68  import org.apache.hadoop.hbase.util.JVMClusterUtil;
69  import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
70  import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
71  import org.apache.hadoop.hbase.util.Threads;
72  import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
73  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
74  import org.apache.hadoop.hbase.zookeeper.ZKTableStateManager;
75  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
76  import org.apache.zookeeper.data.Stat;
77  import org.junit.Test;
78  import org.junit.experimental.categories.Category;
79  
80  @Category(LargeTests.class)
81  public class TestMasterFailover {
82    private static final Log LOG = LogFactory.getLog(TestMasterFailover.class);
83  
84    /**
85     * Complex test of master failover that tests as many permutations of the
86     * different possible states that regions in transition could be in within ZK.
87     * <p>
88     * This tests the proper handling of these states by the failed-over master
89     * and includes a thorough testing of the timeout code as well.
90     * <p>
91     * Starts with a single master and three regionservers.
92     * <p>
93     * Creates two tables, enabledTable and disabledTable, each containing 5
94     * regions.  The disabledTable is then disabled.
95     * <p>
96     * After reaching steady-state, the master is killed.  We then mock several
97     * states in ZK.
98     * <p>
99     * After mocking them, we will startup a new master which should become the
100    * active master and also detect that it is a failover.  The primary test
101    * passing condition will be that all regions of the enabled table are
102    * assigned and all the regions of the disabled table are not assigned.
103    * <p>
104    * The different scenarios to be tested are below:
105    * <p>
106    * <b>ZK State:  OFFLINE</b>
107    * <p>A node can get into OFFLINE state if</p>
108    * <ul>
109    * <li>An RS fails to open a region, so it reverts the state back to OFFLINE
110    * <li>The Master is assigning the region to a RS before it sends RPC
111    * </ul>
112    * <p>We will mock the scenarios</p>
113    * <ul>
114    * <li>Master has assigned an enabled region but RS failed so a region is
115    *     not assigned anywhere and is sitting in ZK as OFFLINE</li>
116    * <li>This seems to cover both cases?</li>
117    * </ul>
118    * <p>
119    * <b>ZK State:  CLOSING</b>
120    * <p>A node can get into CLOSING state if</p>
121    * <ul>
122    * <li>An RS has begun to close a region
123    * </ul>
124    * <p>We will mock the scenarios</p>
125    * <ul>
126    * <li>Region of enabled table was being closed but did not complete
127    * <li>Region of disabled table was being closed but did not complete
128    * </ul>
129    * <p>
130    * <b>ZK State:  CLOSED</b>
131    * <p>A node can get into CLOSED state if</p>
132    * <ul>
133    * <li>An RS has completed closing a region but not acknowledged by master yet
134    * </ul>
135    * <p>We will mock the scenarios</p>
136    * <ul>
137    * <li>Region of a table that should be enabled was closed on an RS
138    * <li>Region of a table that should be disabled was closed on an RS
139    * </ul>
140    * <p>
141    * <b>ZK State:  OPENING</b>
142    * <p>A node can get into OPENING state if</p>
143    * <ul>
144    * <li>An RS has begun to open a region
145    * </ul>
146    * <p>We will mock the scenarios</p>
147    * <ul>
148    * <li>RS was opening a region of enabled table but never finishes
149    * </ul>
150    * <p>
151    * <b>ZK State:  OPENED</b>
152    * <p>A node can get into OPENED state if</p>
153    * <ul>
154    * <li>An RS has finished opening a region but not acknowledged by master yet
155    * </ul>
156    * <p>We will mock the scenarios</p>
157    * <ul>
158    * <li>Region of a table that should be enabled was opened on an RS
159    * <li>Region of a table that should be disabled was opened on an RS
160    * </ul>
161    * @throws Exception
162    */
163   @Test (timeout=240000)
164   public void testMasterFailoverWithMockedRIT() throws Exception {
165 
166     final int NUM_MASTERS = 1;
167     final int NUM_RS = 3;
168 
169     // Create config to use for this cluster
170     Configuration conf = HBaseConfiguration.create();
171     conf.setBoolean("hbase.assignment.usezk", true);
172 
173     // Start the cluster
174     HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
175     TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
176     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
177     log("Cluster started");
178 
179     // Create a ZKW to use in the test
180     ZooKeeperWatcher zkw = HBaseTestingUtility.getZooKeeperWatcher(TEST_UTIL);
181 
182     // get all the master threads
183     List<MasterThread> masterThreads = cluster.getMasterThreads();
184     assertEquals(1, masterThreads.size());
185 
186     // only one master thread, let's wait for it to be initialized
187     assertTrue(cluster.waitForActiveAndReadyMaster());
188     HMaster master = masterThreads.get(0).getMaster();
189     assertTrue(master.isActiveMaster());
190     assertTrue(master.isInitialized());
191 
192     // disable load balancing on this master
193     master.balanceSwitch(false);
194 
195     // create two tables in META, each with 10 regions
196     byte [] FAMILY = Bytes.toBytes("family");
197     byte [][] SPLIT_KEYS = new byte [][] {
198         new byte[0], Bytes.toBytes("aaa"), Bytes.toBytes("bbb"),
199         Bytes.toBytes("ccc"), Bytes.toBytes("ddd"), Bytes.toBytes("eee"),
200         Bytes.toBytes("fff"), Bytes.toBytes("ggg"), Bytes.toBytes("hhh"),
201         Bytes.toBytes("iii"), Bytes.toBytes("jjj")
202     };
203 
204     byte [] enabledTable = Bytes.toBytes("enabledTable");
205     HTableDescriptor htdEnabled = new HTableDescriptor(TableName.valueOf(enabledTable));
206     htdEnabled.addFamily(new HColumnDescriptor(FAMILY));
207 
208     FileSystem filesystem = FileSystem.get(conf);
209     Path rootdir = FSUtils.getRootDir(conf);
210     FSTableDescriptors fstd = new FSTableDescriptors(conf, filesystem, rootdir);
211     // Write the .tableinfo
212     fstd.createTableDescriptor(htdEnabled);
213 
214     HRegionInfo hriEnabled = new HRegionInfo(htdEnabled.getTableName(), null, null);
215     createRegion(hriEnabled, rootdir, conf, htdEnabled);
216 
217     List<HRegionInfo> enabledRegions = TEST_UTIL.createMultiRegionsInMeta(
218         TEST_UTIL.getConfiguration(), htdEnabled, SPLIT_KEYS);
219 
220     TableName disabledTable = TableName.valueOf("disabledTable");
221     HTableDescriptor htdDisabled = new HTableDescriptor(disabledTable);
222     htdDisabled.addFamily(new HColumnDescriptor(FAMILY));
223     // Write the .tableinfo
224     fstd.createTableDescriptor(htdDisabled);
225     HRegionInfo hriDisabled = new HRegionInfo(htdDisabled.getTableName(), null, null);
226     createRegion(hriDisabled, rootdir, conf, htdDisabled);
227     List<HRegionInfo> disabledRegions = TEST_UTIL.createMultiRegionsInMeta(
228         TEST_UTIL.getConfiguration(), htdDisabled, SPLIT_KEYS);
229 
230     TableName tableWithMergingRegions = TableName.valueOf("tableWithMergingRegions");
231     TEST_UTIL.createTable(tableWithMergingRegions, FAMILY, new byte [][] {Bytes.toBytes("m")});
232 
233     log("Regions in hbase:meta and namespace have been created");
234 
235     // at this point we expect at least 4 regions to be assigned out
236     // (meta and namespace, + 2 merging regions)
237     assertTrue(4 <= cluster.countServedRegions());
238 
239     // Move merging regions to the same region server
240     AssignmentManager am = master.getAssignmentManager();
241     RegionStates regionStates = am.getRegionStates();
242     List<HRegionInfo> mergingRegions = regionStates.getRegionsOfTable(tableWithMergingRegions);
243     assertEquals(2, mergingRegions.size());
244     HRegionInfo a = mergingRegions.get(0);
245     HRegionInfo b = mergingRegions.get(1);
246     HRegionInfo newRegion = RegionMergeTransactionImpl.getMergedRegionInfo(a, b);
247     ServerName mergingServer = regionStates.getRegionServerOfRegion(a);
248     ServerName serverB = regionStates.getRegionServerOfRegion(b);
249     if (!serverB.equals(mergingServer)) {
250       RegionPlan plan = new RegionPlan(b, serverB, mergingServer);
251       am.balance(plan);
252       assertTrue(am.waitForAssignment(b));
253     }
254 
255     // Let's just assign everything to first RS
256     HRegionServer hrs = cluster.getRegionServer(0);
257     ServerName serverName = hrs.getServerName();
258     HRegionInfo closingRegion = enabledRegions.remove(0);
259     // we'll need some regions to already be assigned out properly on live RS
260     List<HRegionInfo> enabledAndAssignedRegions = new ArrayList<HRegionInfo>();
261     enabledAndAssignedRegions.add(enabledRegions.remove(0));
262     enabledAndAssignedRegions.add(enabledRegions.remove(0));
263     enabledAndAssignedRegions.add(closingRegion);
264 
265     List<HRegionInfo> disabledAndAssignedRegions = new ArrayList<HRegionInfo>();
266     disabledAndAssignedRegions.add(disabledRegions.remove(0));
267     disabledAndAssignedRegions.add(disabledRegions.remove(0));
268 
269     // now actually assign them
270     for (HRegionInfo hri : enabledAndAssignedRegions) {
271       master.assignmentManager.addPlan(hri.getEncodedName(),
272           new RegionPlan(hri, null, serverName));
273       master.assignRegion(hri);
274     }
275 
276     for (HRegionInfo hri : disabledAndAssignedRegions) {
277       master.assignmentManager.addPlan(hri.getEncodedName(),
278           new RegionPlan(hri, null, serverName));
279       master.assignRegion(hri);
280     }
281 
282     // wait for no more RIT
283     log("Waiting for assignment to finish");
284     ZKAssign.blockUntilNoRIT(zkw);
285     log("Assignment completed");
286 
287     // Stop the master
288     log("Aborting master");
289     cluster.abortMaster(0);
290     cluster.waitOnMaster(0);
291     log("Master has aborted");
292 
293     /*
294      * Now, let's start mocking up some weird states as described in the method
295      * javadoc.
296      */
297 
298     List<HRegionInfo> regionsThatShouldBeOnline = new ArrayList<HRegionInfo>();
299     List<HRegionInfo> regionsThatShouldBeOffline = new ArrayList<HRegionInfo>();
300 
301     log("Beginning to mock scenarios");
302 
303     // Disable the disabledTable in ZK
304     TableStateManager zktable = new ZKTableStateManager(zkw);
305     zktable.setTableState(disabledTable, ZooKeeperProtos.Table.State.DISABLED);
306 
307     /*
308      *  ZK = OFFLINE
309      */
310 
311     // Region that should be assigned but is not and is in ZK as OFFLINE
312     // Cause: This can happen if the master crashed after creating the znode but before sending the
313     //  request to the region server
314     HRegionInfo region = enabledRegions.remove(0);
315     regionsThatShouldBeOnline.add(region);
316     ZKAssign.createNodeOffline(zkw, region, serverName);
317 
318     /*
319      * ZK = CLOSING
320      */
321     // Cause: Same as offline.
322     regionsThatShouldBeOnline.add(closingRegion);
323     ZKAssign.createNodeClosing(zkw, closingRegion, serverName);
324 
325     /*
326      * ZK = CLOSED
327      */
328 
329     // Region of enabled table closed but not ack
330     //Cause: Master was down while the region server updated the ZK status.
331     region = enabledRegions.remove(0);
332     regionsThatShouldBeOnline.add(region);
333     int version = ZKAssign.createNodeClosing(zkw, region, serverName);
334     ZKAssign.transitionNodeClosed(zkw, region, serverName, version);
335 
336     // Region of disabled table closed but not ack
337     region = disabledRegions.remove(0);
338     regionsThatShouldBeOffline.add(region);
339     version = ZKAssign.createNodeClosing(zkw, region, serverName);
340     ZKAssign.transitionNodeClosed(zkw, region, serverName, version);
341 
342     /*
343      * ZK = OPENED
344      */
345 
346     // Region of enabled table was opened on RS
347     // Cause: as offline
348     region = enabledRegions.remove(0);
349     regionsThatShouldBeOnline.add(region);
350     ZKAssign.createNodeOffline(zkw, region, serverName);
351     ProtobufUtil.openRegion(null, hrs.getRSRpcServices(), hrs.getServerName(), region);
352     while (true) {
353       byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
354       RegionTransition rt = RegionTransition.parseFrom(bytes);
355       if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
356         break;
357       }
358       Thread.sleep(100);
359     }
360 
361     // Region of disable table was opened on RS
362     // Cause: Master failed while updating the status for this region server.
363     region = disabledRegions.remove(0);
364     regionsThatShouldBeOffline.add(region);
365     ZKAssign.createNodeOffline(zkw, region, serverName);
366     ProtobufUtil.openRegion(null, hrs.getRSRpcServices(), hrs.getServerName(), region);
367     while (true) {
368       byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
369       RegionTransition rt = RegionTransition.parseFrom(bytes);
370       if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
371         break;
372       }
373       Thread.sleep(100);
374     }
375 
376     /*
377      * ZK = MERGING
378      */
379 
380     // Regions of table of merging regions
381     // Cause: Master was down while merging was going on
382     hrs.getCoordinatedStateManager().
383       getRegionMergeCoordination().startRegionMergeTransaction(newRegion, mergingServer, a, b);
384 
385     /*
386      * ZK = NONE
387      */
388 
389     /*
390      * DONE MOCKING
391      */
392 
393     log("Done mocking data up in ZK");
394 
395     // Start up a new master
396     log("Starting up a new master");
397     master = cluster.startMaster().getMaster();
398     log("Waiting for master to be ready");
399     cluster.waitForActiveAndReadyMaster();
400     log("Master is ready");
401 
402     // Get new region states since master restarted
403     regionStates = master.getAssignmentManager().getRegionStates();
404     // Merging region should remain merging
405     assertTrue(regionStates.isRegionInState(a, State.MERGING));
406     assertTrue(regionStates.isRegionInState(b, State.MERGING));
407     assertTrue(regionStates.isRegionInState(newRegion, State.MERGING_NEW));
408     // Now remove the faked merging znode, merging regions should be
409     // offlined automatically, otherwise it is a bug in AM.
410     ZKAssign.deleteNodeFailSilent(zkw, newRegion);
411 
412     // Failover should be completed, now wait for no RIT
413     log("Waiting for no more RIT");
414     ZKAssign.blockUntilNoRIT(zkw);
415     log("No more RIT in ZK, now doing final test verification");
416 
417     // Grab all the regions that are online across RSs
418     Set<HRegionInfo> onlineRegions = new TreeSet<HRegionInfo>();
419     for (JVMClusterUtil.RegionServerThread rst :
420       cluster.getRegionServerThreads()) {
421       onlineRegions.addAll(ProtobufUtil.getOnlineRegions(
422         rst.getRegionServer().getRSRpcServices()));
423     }
424 
425     // Now, everything that should be online should be online
426     for (HRegionInfo hri : regionsThatShouldBeOnline) {
427       assertTrue(onlineRegions.contains(hri));
428     }
429 
430     // Everything that should be offline should not be online
431     for (HRegionInfo hri : regionsThatShouldBeOffline) {
432       if (onlineRegions.contains(hri)) {
433        LOG.debug(hri);
434       }
435       assertFalse(onlineRegions.contains(hri));
436     }
437 
438     log("Done with verification, all passed, shutting down cluster");
439 
440     // Done, shutdown the cluster
441     TEST_UTIL.shutdownMiniCluster();
442   }
443 
444   /**
445    * Complex test of master failover that tests as many permutations of the
446    * different possible states that regions in transition could be in within ZK
447    * pointing to an RS that has died while no master is around to process it.
448    * <p>
449    * This tests the proper handling of these states by the failed-over master
450    * and includes a thorough testing of the timeout code as well.
451    * <p>
452    * Starts with a single master and two regionservers.
453    * <p>
454    * Creates two tables, enabledTable and disabledTable, each containing 5
455    * regions.  The disabledTable is then disabled.
456    * <p>
457    * After reaching steady-state, the master is killed.  We then mock several
458    * states in ZK.  And one of the RS will be killed.
459    * <p>
460    * After mocking them and killing an RS, we will startup a new master which
461    * should become the active master and also detect that it is a failover.  The
462    * primary test passing condition will be that all regions of the enabled
463    * table are assigned and all the regions of the disabled table are not
464    * assigned.
465    * <p>
466    * The different scenarios to be tested are below:
467    * <p>
468    * <b>ZK State:  CLOSING</b>
469    * <p>A node can get into CLOSING state if</p>
470    * <ul>
471    * <li>An RS has begun to close a region
472    * </ul>
473    * <p>We will mock the scenarios</p>
474    * <ul>
475    * <li>Region was being closed but the RS died before finishing the close
476    * </ul>
477    * <b>ZK State:  OPENED</b>
478    * <p>A node can get into OPENED state if</p>
479    * <ul>
480    * <li>An RS has finished opening a region but not acknowledged by master yet
481    * </ul>
482    * <p>We will mock the scenarios</p>
483    * <ul>
484    * <li>Region of a table that should be enabled was opened by a now-dead RS
485    * <li>Region of a table that should be disabled was opened by a now-dead RS
486    * </ul>
487    * <p>
488    * <b>ZK State:  NONE</b>
489    * <p>A region could not have a transition node if</p>
490    * <ul>
491    * <li>The server hosting the region died and no master processed it
492    * </ul>
493    * <p>We will mock the scenarios</p>
494    * <ul>
495    * <li>Region of enabled table was on a dead RS that was not yet processed
496    * <li>Region of disabled table was on a dead RS that was not yet processed
497    * </ul>
498    * @throws Exception
499    */
500   @Test (timeout=180000)
501   public void testMasterFailoverWithMockedRITOnDeadRS() throws Exception {
502 
503     final int NUM_MASTERS = 1;
504     final int NUM_RS = 2;
505 
506     // Create and start the cluster
507     HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
508     Configuration conf = TEST_UTIL.getConfiguration();
509     conf.setBoolean("hbase.assignment.usezk", true);
510 
511     conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 1);
512     conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MAXTOSTART, 2);
513     TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
514     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
515     log("Cluster started");
516 
517     // Create a ZKW to use in the test
518     ZooKeeperWatcher zkw = new ZooKeeperWatcher(TEST_UTIL.getConfiguration(),
519         "unittest", new Abortable() {
520 
521           @Override
522           public void abort(String why, Throwable e) {
523             LOG.error("Fatal ZK Error: " + why, e);
524             org.junit.Assert.assertFalse("Fatal ZK error", true);
525           }
526 
527           @Override
528           public boolean isAborted() {
529             return false;
530           }
531 
532     });
533 
534     // get all the master threads
535     List<MasterThread> masterThreads = cluster.getMasterThreads();
536     assertEquals(1, masterThreads.size());
537 
538     // only one master thread, let's wait for it to be initialized
539     assertTrue(cluster.waitForActiveAndReadyMaster());
540     HMaster master = masterThreads.get(0).getMaster();
541     assertTrue(master.isActiveMaster());
542     assertTrue(master.isInitialized());
543 
544     // disable load balancing on this master
545     master.balanceSwitch(false);
546 
547     // create two tables in META, each with 30 regions
548     byte [] FAMILY = Bytes.toBytes("family");
549     byte[][] SPLIT_KEYS =
550         TEST_UTIL.getRegionSplitStartKeys(Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), 30);
551 
552     byte [] enabledTable = Bytes.toBytes("enabledTable");
553     HTableDescriptor htdEnabled = new HTableDescriptor(TableName.valueOf(enabledTable));
554     htdEnabled.addFamily(new HColumnDescriptor(FAMILY));
555     FileSystem filesystem = FileSystem.get(conf);
556     Path rootdir = FSUtils.getRootDir(conf);
557     FSTableDescriptors fstd = new FSTableDescriptors(conf, filesystem, rootdir);
558     // Write the .tableinfo
559     fstd.createTableDescriptor(htdEnabled);
560     HRegionInfo hriEnabled = new HRegionInfo(htdEnabled.getTableName(),
561         null, null);
562     createRegion(hriEnabled, rootdir, conf, htdEnabled);
563 
564     List<HRegionInfo> enabledRegions = TEST_UTIL.createMultiRegionsInMeta(
565         TEST_UTIL.getConfiguration(), htdEnabled, SPLIT_KEYS);
566 
567     TableName disabledTable =
568         TableName.valueOf("disabledTable");
569     HTableDescriptor htdDisabled = new HTableDescriptor(disabledTable);
570     htdDisabled.addFamily(new HColumnDescriptor(FAMILY));
571     // Write the .tableinfo
572     fstd.createTableDescriptor(htdDisabled);
573     HRegionInfo hriDisabled = new HRegionInfo(htdDisabled.getTableName(), null, null);
574     createRegion(hriDisabled, rootdir, conf, htdDisabled);
575 
576     List<HRegionInfo> disabledRegions = TEST_UTIL.createMultiRegionsInMeta(
577         TEST_UTIL.getConfiguration(), htdDisabled, SPLIT_KEYS);
578 
579     log("Regions in hbase:meta and Namespace have been created");
580 
581     // at this point we expect at least 2 regions to be assigned out (meta and namespace)
582     assertTrue(2 <= cluster.countServedRegions());
583 
584     // The first RS will stay online
585     List<RegionServerThread> regionservers =
586       cluster.getRegionServerThreads();
587     HRegionServer hrs = regionservers.get(0).getRegionServer();
588 
589     // The second RS is going to be hard-killed
590     RegionServerThread hrsDeadThread = regionservers.get(1);
591     HRegionServer hrsDead = hrsDeadThread.getRegionServer();
592     ServerName deadServerName = hrsDead.getServerName();
593 
594     // we'll need some regions to already be assigned out properly on live RS
595     List<HRegionInfo> enabledAndAssignedRegions = new ArrayList<HRegionInfo>();
596     enabledAndAssignedRegions.addAll(enabledRegions.subList(0, 6));
597     enabledRegions.removeAll(enabledAndAssignedRegions);
598     List<HRegionInfo> disabledAndAssignedRegions = new ArrayList<HRegionInfo>();
599     disabledAndAssignedRegions.addAll(disabledRegions.subList(0, 6));
600     disabledRegions.removeAll(disabledAndAssignedRegions);
601 
602     // now actually assign them
603     for (HRegionInfo hri : enabledAndAssignedRegions) {
604       master.assignmentManager.addPlan(hri.getEncodedName(),
605           new RegionPlan(hri, null, hrs.getServerName()));
606       master.assignRegion(hri);
607     }
608     for (HRegionInfo hri : disabledAndAssignedRegions) {
609       master.assignmentManager.addPlan(hri.getEncodedName(),
610           new RegionPlan(hri, null, hrs.getServerName()));
611       master.assignRegion(hri);
612     }
613 
614     log("Waiting for assignment to finish");
615     ZKAssign.blockUntilNoRIT(zkw);
616     master.assignmentManager.waitUntilNoRegionsInTransition(60000);
617     log("Assignment completed");
618 
619     assertTrue(" Table must be enabled.", master.getAssignmentManager()
620         .getTableStateManager().isTableState(TableName.valueOf("enabledTable"),
621         ZooKeeperProtos.Table.State.ENABLED));
622     // we also need regions assigned out on the dead server
623     List<HRegionInfo> enabledAndOnDeadRegions = new ArrayList<HRegionInfo>();
624     enabledAndOnDeadRegions.addAll(enabledRegions.subList(0, 6));
625     enabledRegions.removeAll(enabledAndOnDeadRegions);
626     List<HRegionInfo> disabledAndOnDeadRegions = new ArrayList<HRegionInfo>();
627     disabledAndOnDeadRegions.addAll(disabledRegions.subList(0, 6));
628     disabledRegions.removeAll(disabledAndOnDeadRegions);
629 
630     // set region plan to server to be killed and trigger assign
631     for (HRegionInfo hri : enabledAndOnDeadRegions) {
632       master.assignmentManager.addPlan(hri.getEncodedName(),
633           new RegionPlan(hri, null, deadServerName));
634       master.assignRegion(hri);
635     }
636     for (HRegionInfo hri : disabledAndOnDeadRegions) {
637       master.assignmentManager.addPlan(hri.getEncodedName(),
638           new RegionPlan(hri, null, deadServerName));
639       master.assignRegion(hri);
640     }
641 
642     // wait for no more RIT
643     log("Waiting for assignment to finish");
644     ZKAssign.blockUntilNoRIT(zkw);
645     master.assignmentManager.waitUntilNoRegionsInTransition(60000);
646     log("Assignment completed");
647 
648     // Due to master.assignRegion(hri) could fail to assign a region to a specified RS
649     // therefore, we need make sure that regions are in the expected RS
650     verifyRegionLocation(hrs, enabledAndAssignedRegions);
651     verifyRegionLocation(hrs, disabledAndAssignedRegions);
652     verifyRegionLocation(hrsDead, enabledAndOnDeadRegions);
653     verifyRegionLocation(hrsDead, disabledAndOnDeadRegions);
654 
655     assertTrue(" Didn't get enough regions of enabledTalbe on live rs.",
656       enabledAndAssignedRegions.size() >= 2);
657     assertTrue(" Didn't get enough regions of disalbedTable on live rs.",
658       disabledAndAssignedRegions.size() >= 2);
659     assertTrue(" Didn't get enough regions of enabledTalbe on dead rs.",
660       enabledAndOnDeadRegions.size() >= 2);
661     assertTrue(" Didn't get enough regions of disalbedTable on dead rs.",
662       disabledAndOnDeadRegions.size() >= 2);
663 
664     // Stop the master
665     log("Aborting master");
666     cluster.abortMaster(0);
667     cluster.waitOnMaster(0);
668     log("Master has aborted");
669 
670     /*
671      * Now, let's start mocking up some weird states as described in the method
672      * javadoc.
673      */
674 
675     List<HRegionInfo> regionsThatShouldBeOnline = new ArrayList<HRegionInfo>();
676     List<HRegionInfo> regionsThatShouldBeOffline = new ArrayList<HRegionInfo>();
677 
678     log("Beginning to mock scenarios");
679 
680     // Disable the disabledTable in ZK
681     TableStateManager zktable = new ZKTableStateManager(zkw);
682     zktable.setTableState(disabledTable, ZooKeeperProtos.Table.State.DISABLED);
683 
684     assertTrue(" The enabled table should be identified on master fail over.",
685         zktable.isTableState(TableName.valueOf("enabledTable"),
686           ZooKeeperProtos.Table.State.ENABLED));
687 
688     /*
689      * ZK = CLOSING
690      */
691 
692     // Region of enabled table being closed on dead RS but not finished
693     HRegionInfo region = enabledAndOnDeadRegions.remove(0);
694     regionsThatShouldBeOnline.add(region);
695     ZKAssign.createNodeClosing(zkw, region, deadServerName);
696     LOG.debug("\n\nRegion of enabled table was CLOSING on dead RS\n" +
697         region + "\n\n");
698 
699     // Region of disabled table being closed on dead RS but not finished
700     region = disabledAndOnDeadRegions.remove(0);
701     regionsThatShouldBeOffline.add(region);
702     ZKAssign.createNodeClosing(zkw, region, deadServerName);
703     LOG.debug("\n\nRegion of disabled table was CLOSING on dead RS\n" +
704         region + "\n\n");
705 
706     /*
707      * ZK = CLOSED
708      */
709 
710     // Region of enabled on dead server gets closed but not ack'd by master
711     region = enabledAndOnDeadRegions.remove(0);
712     regionsThatShouldBeOnline.add(region);
713     int version = ZKAssign.createNodeClosing(zkw, region, deadServerName);
714     ZKAssign.transitionNodeClosed(zkw, region, deadServerName, version);
715     LOG.debug("\n\nRegion of enabled table was CLOSED on dead RS\n" +
716         region + "\n\n");
717 
718     // Region of disabled on dead server gets closed but not ack'd by master
719     region = disabledAndOnDeadRegions.remove(0);
720     regionsThatShouldBeOffline.add(region);
721     version = ZKAssign.createNodeClosing(zkw, region, deadServerName);
722     ZKAssign.transitionNodeClosed(zkw, region, deadServerName, version);
723     LOG.debug("\n\nRegion of disabled table was CLOSED on dead RS\n" +
724         region + "\n\n");
725 
726     /*
727      * ZK = OPENING
728      */
729 
730     // RS was opening a region of enabled table then died
731     region = enabledRegions.remove(0);
732     regionsThatShouldBeOnline.add(region);
733     ZKAssign.createNodeOffline(zkw, region, deadServerName);
734     ZKAssign.transitionNodeOpening(zkw, region, deadServerName);
735     LOG.debug("\n\nRegion of enabled table was OPENING on dead RS\n" +
736         region + "\n\n");
737 
738     // RS was opening a region of disabled table then died
739     region = disabledRegions.remove(0);
740     regionsThatShouldBeOffline.add(region);
741     ZKAssign.createNodeOffline(zkw, region, deadServerName);
742     ZKAssign.transitionNodeOpening(zkw, region, deadServerName);
743     LOG.debug("\n\nRegion of disabled table was OPENING on dead RS\n" +
744         region + "\n\n");
745 
746     /*
747      * ZK = OPENED
748      */
749 
750     // Region of enabled table was opened on dead RS
751     region = enabledRegions.remove(0);
752     regionsThatShouldBeOnline.add(region);
753     ZKAssign.createNodeOffline(zkw, region, deadServerName);
754     ProtobufUtil.openRegion(null, hrsDead.getRSRpcServices(),
755       hrsDead.getServerName(), region);
756     while (true) {
757       byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
758       RegionTransition rt = RegionTransition.parseFrom(bytes);
759       if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
760         break;
761       }
762       Thread.sleep(100);
763     }
764     LOG.debug("\n\nRegion of enabled table was OPENED on dead RS\n" +
765         region + "\n\n");
766 
767     // Region of disabled table was opened on dead RS
768     region = disabledRegions.remove(0);
769     regionsThatShouldBeOffline.add(region);
770     ZKAssign.createNodeOffline(zkw, region, deadServerName);
771     ProtobufUtil.openRegion(null, hrsDead.getRSRpcServices(),
772       hrsDead.getServerName(), region);
773     while (true) {
774       byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
775       RegionTransition rt = RegionTransition.parseFrom(bytes);
776       if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
777         break;
778       }
779       Thread.sleep(100);
780     }
781     LOG.debug("\n\nRegion of disabled table was OPENED on dead RS\n" +
782         region + "\n\n");
783 
784     /*
785      * ZK = NONE
786      */
787 
788     // Region of enabled table was open at steady-state on dead RS
789     region = enabledRegions.remove(0);
790     regionsThatShouldBeOnline.add(region);
791     ZKAssign.createNodeOffline(zkw, region, deadServerName);
792     ProtobufUtil.openRegion(null, hrsDead.getRSRpcServices(),
793       hrsDead.getServerName(), region);
794     while (true) {
795       byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
796       RegionTransition rt = RegionTransition.parseFrom(bytes);
797       if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
798         ZKAssign.deleteOpenedNode(zkw, region.getEncodedName(), rt.getServerName());
799         LOG.debug("DELETED " + rt);
800         break;
801       }
802       Thread.sleep(100);
803     }
804     LOG.debug("\n\nRegion of enabled table was open at steady-state on dead RS"
805         + "\n" + region + "\n\n");
806 
807     // Region of disabled table was open at steady-state on dead RS
808     region = disabledRegions.remove(0);
809     regionsThatShouldBeOffline.add(region);
810     ZKAssign.createNodeOffline(zkw, region, deadServerName);
811     ProtobufUtil.openRegion(null, hrsDead.getRSRpcServices(),
812       hrsDead.getServerName(), region);
813     while (true) {
814       byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
815       RegionTransition rt = RegionTransition.parseFrom(bytes);
816       if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
817         ZKAssign.deleteOpenedNode(zkw, region.getEncodedName(), rt.getServerName());
818         break;
819       }
820       Thread.sleep(100);
821     }
822     LOG.debug("\n\nRegion of disabled table was open at steady-state on dead RS"
823       + "\n" + region + "\n\n");
824 
825     /*
826      * DONE MOCKING
827      */
828 
829     log("Done mocking data up in ZK");
830 
831     // Kill the RS that had a hard death
832     log("Killing RS " + deadServerName);
833     hrsDead.abort("Killing for unit test");
834     log("RS " + deadServerName + " killed");
835 
836     // Start up a new master.  Wait until regionserver is completely down
837     // before starting new master because of hbase-4511.
838     while (hrsDeadThread.isAlive()) {
839       Threads.sleep(10);
840     }
841     log("Starting up a new master");
842     master = cluster.startMaster().getMaster();
843     log("Waiting for master to be ready");
844     assertTrue(cluster.waitForActiveAndReadyMaster());
845     log("Master is ready");
846 
847     // Wait until SSH processing completed for dead server.
848     while (master.getServerManager().areDeadServersInProgress()) {
849       Thread.sleep(10);
850     }
851 
852     // Failover should be completed, now wait for no RIT
853     log("Waiting for no more RIT");
854     ZKAssign.blockUntilNoRIT(zkw);
855     log("No more RIT in ZK");
856     long now = System.currentTimeMillis();
857     long maxTime = 120000;
858     boolean done = master.assignmentManager.waitUntilNoRegionsInTransition(maxTime);
859     if (!done) {
860       RegionStates regionStates = master.getAssignmentManager().getRegionStates();
861       LOG.info("rit=" + regionStates.getRegionsInTransition());
862     }
863     long elapsed = System.currentTimeMillis() - now;
864     assertTrue("Elapsed=" + elapsed + ", maxTime=" + maxTime + ", done=" + done,
865       elapsed < maxTime);
866     log("No more RIT in RIT map, doing final test verification");
867 
868     // Grab all the regions that are online across RSs
869     Set<HRegionInfo> onlineRegions = new TreeSet<HRegionInfo>();
870     now = System.currentTimeMillis();
871     maxTime = 30000;
872     for (JVMClusterUtil.RegionServerThread rst :
873         cluster.getRegionServerThreads()) {
874       try {
875         HRegionServer rs = rst.getRegionServer();
876         while (!rs.getRegionsInTransitionInRS().isEmpty()) {
877           elapsed = System.currentTimeMillis() - now;
878           assertTrue("Test timed out in getting online regions", elapsed < maxTime);
879           if (rs.isAborted() || rs.isStopped()) {
880             // This region server is stopped, skip it.
881             break;
882           }
883           Thread.sleep(100);
884         }
885         onlineRegions.addAll(ProtobufUtil.getOnlineRegions(rs.getRSRpcServices()));
886       } catch (RegionServerStoppedException e) {
887         LOG.info("Got RegionServerStoppedException", e);
888       }
889     }
890 
891     // Now, everything that should be online should be online
892     for (HRegionInfo hri : regionsThatShouldBeOnline) {
893       assertTrue("region=" + hri.getRegionNameAsString() + ", " + onlineRegions.toString(),
894         onlineRegions.contains(hri));
895     }
896 
897     // Everything that should be offline should not be online
898     for (HRegionInfo hri : regionsThatShouldBeOffline) {
899       assertFalse(onlineRegions.contains(hri));
900     }
901 
902     log("Done with verification, all passed, shutting down cluster");
903 
904     // Done, shutdown the cluster
905     TEST_UTIL.shutdownMiniCluster();
906   }
907 
908   /**
909    * Verify regions are on the expected region server
910    */
911   private void verifyRegionLocation(HRegionServer hrs, List<HRegionInfo> regions)
912       throws IOException {
913     List<HRegionInfo> tmpOnlineRegions =
914       ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
915     Iterator<HRegionInfo> itr = regions.iterator();
916     while (itr.hasNext()) {
917       HRegionInfo tmp = itr.next();
918       if (!tmpOnlineRegions.contains(tmp)) {
919         itr.remove();
920       }
921     }
922   }
923 
924   HRegion createRegion(final HRegionInfo  hri, final Path rootdir, final Configuration c,
925       final HTableDescriptor htd)
926   throws IOException {
927     HRegion r = HRegion.createHRegion(hri, rootdir, c, htd);
928     // The above call to create a region will create an wal file.  Each
929     // log file create will also create a running thread to do syncing.  We need
930     // to close out this log else we will have a running thread trying to sync
931     // the file system continuously which is ugly when dfs is taken away at the
932     // end of the test.
933     HRegion.closeHRegion(r);
934     return r;
935   }
936 
937   // TODO: Next test to add is with testing permutations of the RIT or the RS
938   //       killed are hosting ROOT and hbase:meta regions.
939 
940   private void log(String string) {
941     LOG.info("\n\n" + string + " \n\n");
942   }
943 
944   @Test (timeout=180000)
945   public void testShouldCheckMasterFailOverWhenMETAIsInOpenedState()
946       throws Exception {
947     LOG.info("Starting testShouldCheckMasterFailOverWhenMETAIsInOpenedState");
948     final int NUM_MASTERS = 1;
949     final int NUM_RS = 2;
950 
951     // Start the cluster
952     HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
953     Configuration conf = TEST_UTIL.getConfiguration();
954     conf.setInt("hbase.master.info.port", -1);
955     conf.setBoolean("hbase.assignment.usezk", true);
956 
957     TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
958     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
959 
960     // Find regionserver carrying meta.
961     List<RegionServerThread> regionServerThreads =
962       cluster.getRegionServerThreads();
963     Region metaRegion = null;
964     HRegionServer metaRegionServer = null;
965     for (RegionServerThread regionServerThread : regionServerThreads) {
966       HRegionServer regionServer = regionServerThread.getRegionServer();
967       metaRegion = regionServer.getOnlineRegion(HRegionInfo.FIRST_META_REGIONINFO.getRegionName());
968       regionServer.abort("");
969       if (null != metaRegion) {
970         metaRegionServer = regionServer;
971         break;
972       }
973     }
974 
975     TEST_UTIL.shutdownMiniHBaseCluster();
976 
977     // Create a ZKW to use in the test
978     ZooKeeperWatcher zkw =
979       HBaseTestingUtility.createAndForceNodeToOpenedState(TEST_UTIL,
980           metaRegion, metaRegionServer.getServerName());
981 
982     LOG.info("Staring cluster for second time");
983     TEST_UTIL.startMiniHBaseCluster(NUM_MASTERS, NUM_RS);
984 
985     HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
986     while (!master.isInitialized()) {
987       Thread.sleep(100);
988     }
989     // Failover should be completed, now wait for no RIT
990     log("Waiting for no more RIT");
991     ZKAssign.blockUntilNoRIT(zkw);
992 
993     zkw.close();
994     // Stop the cluster
995     TEST_UTIL.shutdownMiniCluster();
996   }
997 
998   /**
999    * This tests a RIT in offline state will get re-assigned after a master restart
1000    */
1001   @Test(timeout=240000)
1002   public void testOfflineRegionReAssginedAfterMasterRestart() throws Exception {
1003     final TableName table = TableName.valueOf("testOfflineRegionReAssginedAfterMasterRestart");
1004     final int NUM_MASTERS = 1;
1005     final int NUM_RS = 2;
1006 
1007     // Create config to use for this cluster
1008     Configuration conf = HBaseConfiguration.create();
1009     conf.setBoolean("hbase.assignment.usezk", true);
1010 
1011     // Start the cluster
1012     final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
1013     TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
1014     log("Cluster started");
1015 
1016     TEST_UTIL.createTable(table, Bytes.toBytes("family"));
1017     HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
1018     RegionStates regionStates = master.getAssignmentManager().getRegionStates();
1019     HRegionInfo hri = regionStates.getRegionsOfTable(table).get(0);
1020     ServerName serverName = regionStates.getRegionServerOfRegion(hri);
1021     TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
1022 
1023     ServerName dstName = null;
1024     for (ServerName tmpServer : master.serverManager.getOnlineServers().keySet()) {
1025       if (!tmpServer.equals(serverName)) {
1026         dstName = tmpServer;
1027         break;
1028       }
1029     }
1030     // find a different server
1031     assertTrue(dstName != null);
1032     // shutdown HBase cluster
1033     TEST_UTIL.shutdownMiniHBaseCluster();
1034     // create a RIT node in offline state
1035     ZooKeeperWatcher zkw = TEST_UTIL.getZooKeeperWatcher();
1036     ZKAssign.createNodeOffline(zkw, hri, dstName);
1037     Stat stat = new Stat();
1038     byte[] data =
1039         ZKAssign.getDataNoWatch(zkw, hri.getEncodedName(), stat);
1040     assertTrue(data != null);
1041     RegionTransition rt = RegionTransition.parseFrom(data);
1042     assertTrue(rt.getEventType() == EventType.M_ZK_REGION_OFFLINE);
1043 
1044     LOG.info(hri.getEncodedName() + " region is in offline state with source server=" + serverName
1045         + " and dst server=" + dstName);
1046 
1047     // start HBase cluster
1048     TEST_UTIL.startMiniHBaseCluster(NUM_MASTERS, NUM_RS);
1049 
1050     while (true) {
1051       master = TEST_UTIL.getHBaseCluster().getMaster();
1052       if (master != null && master.isInitialized()) {
1053         ServerManager serverManager = master.getServerManager();
1054         if (!serverManager.areDeadServersInProgress()) {
1055           break;
1056         }
1057       }
1058       Thread.sleep(200);
1059     }
1060 
1061     // verify the region is assigned
1062     master = TEST_UTIL.getHBaseCluster().getMaster();
1063     master.getAssignmentManager().waitForAssignment(hri);
1064     regionStates = master.getAssignmentManager().getRegionStates();
1065     RegionState newState = regionStates.getRegionState(hri);
1066     assertTrue(newState.isOpened());
1067   }
1068   
1069  /**
1070    * Simple test of master failover.
1071    * <p>
1072    * Starts with three masters.  Kills a backup master.  Then kills the active
1073    * master.  Ensures the final master becomes active and we can still contact
1074    * the cluster.
1075    * @throws Exception
1076    */
1077   @Test (timeout=240000)
1078   public void testSimpleMasterFailover() throws Exception {
1079 
1080     final int NUM_MASTERS = 3;
1081     final int NUM_RS = 3;
1082 
1083     // Start the cluster
1084     HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
1085 
1086     TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
1087     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1088 
1089     // get all the master threads
1090     List<MasterThread> masterThreads = cluster.getMasterThreads();
1091 
1092     // wait for each to come online
1093     for (MasterThread mt : masterThreads) {
1094       assertTrue(mt.isAlive());
1095     }
1096 
1097     // verify only one is the active master and we have right number
1098     int numActive = 0;
1099     int activeIndex = -1;
1100     ServerName activeName = null;
1101     HMaster active = null;
1102     for (int i = 0; i < masterThreads.size(); i++) {
1103       if (masterThreads.get(i).getMaster().isActiveMaster()) {
1104         numActive++;
1105         activeIndex = i;
1106         active = masterThreads.get(activeIndex).getMaster();
1107         activeName = active.getServerName();
1108       }
1109     }
1110     assertEquals(1, numActive);
1111     assertEquals(NUM_MASTERS, masterThreads.size());
1112     LOG.info("Active master " + activeName);
1113 
1114     // Check that ClusterStatus reports the correct active and backup masters
1115     assertNotNull(active);
1116     ClusterStatus status = active.getClusterStatus();
1117     assertTrue(status.getMaster().equals(activeName));
1118     assertEquals(2, status.getBackupMastersSize());
1119     assertEquals(2, status.getBackupMasters().size());
1120 
1121     // attempt to stop one of the inactive masters
1122     int backupIndex = (activeIndex == 0 ? 1 : activeIndex - 1);
1123     HMaster master = cluster.getMaster(backupIndex);
1124     LOG.debug("\n\nStopping a backup master: " + master.getServerName() + "\n");
1125     cluster.stopMaster(backupIndex, false);
1126     cluster.waitOnMaster(backupIndex);
1127 
1128     // Verify still one active master and it's the same
1129     for (int i = 0; i < masterThreads.size(); i++) {
1130       if (masterThreads.get(i).getMaster().isActiveMaster()) {
1131         assertTrue(activeName.equals(masterThreads.get(i).getMaster().getServerName()));
1132         activeIndex = i;
1133         active = masterThreads.get(activeIndex).getMaster();
1134       }
1135     }
1136     assertEquals(1, numActive);
1137     assertEquals(2, masterThreads.size());
1138     int rsCount = masterThreads.get(activeIndex).getMaster().getClusterStatus().getServersSize();
1139     LOG.info("Active master " + active.getServerName() + " managing " + rsCount +  " regions servers");
1140     assertEquals(3, rsCount);
1141 
1142     // Check that ClusterStatus reports the correct active and backup masters
1143     assertNotNull(active);
1144     status = active.getClusterStatus();
1145     assertTrue(status.getMaster().equals(activeName));
1146     assertEquals(1, status.getBackupMastersSize());
1147     assertEquals(1, status.getBackupMasters().size());
1148 
1149     // kill the active master
1150     LOG.debug("\n\nStopping the active master " + active.getServerName() + "\n");
1151     cluster.stopMaster(activeIndex, false);
1152     cluster.waitOnMaster(activeIndex);
1153 
1154     // wait for an active master to show up and be ready
1155     assertTrue(cluster.waitForActiveAndReadyMaster());
1156 
1157     LOG.debug("\n\nVerifying backup master is now active\n");
1158     // should only have one master now
1159     assertEquals(1, masterThreads.size());
1160 
1161     // and he should be active
1162     active = masterThreads.get(0).getMaster();
1163     assertNotNull(active);
1164     status = active.getClusterStatus();
1165     ServerName mastername = status.getMaster();
1166     assertTrue(mastername.equals(active.getServerName()));
1167     assertTrue(active.isActiveMaster());
1168     assertEquals(0, status.getBackupMastersSize());
1169     assertEquals(0, status.getBackupMasters().size());
1170     int rss = status.getServersSize();
1171     LOG.info("Active master " + mastername.getServerName() + " managing " +
1172       rss +  " region servers");
1173     assertEquals(3, rss);
1174 
1175     // Stop the cluster
1176     TEST_UTIL.shutdownMiniCluster();
1177   }
1178 
1179   /**
1180    * Test region in pending_open/close and failed_open/close when master failover
1181    */
1182   @Test (timeout=180000)
1183   @SuppressWarnings("deprecation")
1184   public void testPendingOpenOrCloseWhenMasterFailover() throws Exception {
1185     final int NUM_MASTERS = 1;
1186     final int NUM_RS = 1;
1187 
1188     // Create config to use for this cluster
1189     Configuration conf = HBaseConfiguration.create();
1190     conf.setBoolean("hbase.assignment.usezk", false);
1191 
1192     // Start the cluster
1193     HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
1194     TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
1195     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1196     log("Cluster started");
1197 
1198     // get all the master threads
1199     List<MasterThread> masterThreads = cluster.getMasterThreads();
1200     assertEquals(1, masterThreads.size());
1201 
1202     // only one master thread, let's wait for it to be initialized
1203     assertTrue(cluster.waitForActiveAndReadyMaster());
1204     HMaster master = masterThreads.get(0).getMaster();
1205     assertTrue(master.isActiveMaster());
1206     assertTrue(master.isInitialized());
1207 
1208     // Create a table with a region online
1209     Table onlineTable = TEST_UTIL.createTable(TableName.valueOf("onlineTable"), "family");
1210     onlineTable.close();
1211     // Create a table in META, so it has a region offline
1212     HTableDescriptor offlineTable = new HTableDescriptor(
1213       TableName.valueOf(Bytes.toBytes("offlineTable")));
1214     offlineTable.addFamily(new HColumnDescriptor(Bytes.toBytes("family")));
1215 
1216     FileSystem filesystem = FileSystem.get(conf);
1217     Path rootdir = FSUtils.getRootDir(conf);
1218     FSTableDescriptors fstd = new FSTableDescriptors(conf, filesystem, rootdir);
1219     fstd.createTableDescriptor(offlineTable);
1220 
1221     HRegionInfo hriOffline = new HRegionInfo(offlineTable.getTableName(), null, null);
1222     createRegion(hriOffline, rootdir, conf, offlineTable);
1223     MetaTableAccessor.addRegionToMeta(master.getConnection(), hriOffline);
1224 
1225     log("Regions in hbase:meta and namespace have been created");
1226 
1227     // at this point we expect at least 3 regions to be assigned out
1228     // (meta and namespace, + 1 online region)
1229     assertTrue(3 <= cluster.countServedRegions());
1230     HRegionInfo hriOnline = null;
1231     try (RegionLocator locator =
1232         TEST_UTIL.getConnection().getRegionLocator(TableName.valueOf("onlineTable"))) {
1233       hriOnline = locator.getRegionLocation(HConstants.EMPTY_START_ROW).getRegionInfo();
1234     }
1235     RegionStates regionStates = master.getAssignmentManager().getRegionStates();
1236     RegionStateStore stateStore = master.getAssignmentManager().getRegionStateStore();
1237 
1238     // Put the online region in pending_close. It is actually already opened.
1239     // This is to simulate that the region close RPC is not sent out before failover
1240     RegionState oldState = regionStates.getRegionState(hriOnline);
1241     RegionState newState = new RegionState(
1242       hriOnline, State.PENDING_CLOSE, oldState.getServerName());
1243     stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1244 
1245     // Put the offline region in pending_open. It is actually not opened yet.
1246     // This is to simulate that the region open RPC is not sent out before failover
1247     oldState = new RegionState(hriOffline, State.OFFLINE);
1248     newState = new RegionState(hriOffline, State.PENDING_OPEN, newState.getServerName());
1249     stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1250     
1251     HRegionInfo failedClose = new HRegionInfo(offlineTable.getTableName(), null, null);
1252     createRegion(failedClose, rootdir, conf, offlineTable);
1253     MetaTableAccessor.addRegionToMeta(master.getConnection(), failedClose);
1254     
1255     oldState = new RegionState(failedClose, State.PENDING_CLOSE);
1256     newState = new RegionState(failedClose, State.FAILED_CLOSE, newState.getServerName());
1257     stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1258     
1259    
1260     HRegionInfo failedOpen = new HRegionInfo(offlineTable.getTableName(), null, null);
1261     createRegion(failedOpen, rootdir, conf, offlineTable);
1262     MetaTableAccessor.addRegionToMeta(master.getConnection(), failedOpen);
1263     
1264     // Simulate a region transitioning to failed open when the region server reports the
1265     // transition as FAILED_OPEN
1266     oldState = new RegionState(failedOpen, State.PENDING_OPEN);
1267     newState = new RegionState(failedOpen, State.FAILED_OPEN, newState.getServerName());
1268     stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1269     
1270     HRegionInfo failedOpenNullServer = new HRegionInfo(offlineTable.getTableName(), null, null);
1271     createRegion(failedOpenNullServer, rootdir, conf, offlineTable);
1272     MetaTableAccessor.addRegionToMeta(master.getConnection(), failedOpenNullServer);
1273     
1274     // Simulate a region transitioning to failed open when the master couldn't find a plan for
1275     // the region
1276     oldState = new RegionState(failedOpenNullServer, State.OFFLINE);
1277     newState = new RegionState(failedOpenNullServer, State.FAILED_OPEN, null);
1278     stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1279     
1280     
1281 
1282     // Stop the master
1283     log("Aborting master");
1284     cluster.abortMaster(0);
1285     cluster.waitOnMaster(0);
1286     log("Master has aborted");
1287 
1288     // Start up a new master
1289     log("Starting up a new master");
1290     master = cluster.startMaster().getMaster();
1291     log("Waiting for master to be ready");
1292     cluster.waitForActiveAndReadyMaster();
1293     log("Master is ready");
1294 
1295     // Wait till no region in transition any more
1296     master.getAssignmentManager().waitUntilNoRegionsInTransition(60000);
1297 
1298     // Get new region states since master restarted
1299     regionStates = master.getAssignmentManager().getRegionStates();
1300 
1301     // Both pending_open (RPC sent/not yet) regions should be online
1302     assertTrue(regionStates.isRegionOnline(hriOffline));
1303     assertTrue(regionStates.isRegionOnline(hriOnline));
1304     assertTrue(regionStates.isRegionOnline(failedClose));
1305     assertTrue(regionStates.isRegionOnline(failedOpenNullServer));
1306     assertTrue(regionStates.isRegionOnline(failedOpen));
1307     
1308     log("Done with verification, shutting down cluster");
1309 
1310     // Done, shutdown the cluster
1311     TEST_UTIL.shutdownMiniCluster();
1312   }
1313 
1314   /**
1315    * Test meta in transition when master failover
1316    */
1317   @Test(timeout = 180000)
1318   public void testMetaInTransitionWhenMasterFailover() throws Exception {
1319     final int NUM_MASTERS = 1;
1320     final int NUM_RS = 1;
1321 
1322     // Start the cluster
1323     Configuration conf = HBaseConfiguration.create();
1324     conf.setBoolean("hbase.assignment.usezk", false);
1325     HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
1326     TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
1327     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1328     log("Cluster started");
1329 
1330     log("Moving meta off the master");
1331     HMaster activeMaster = cluster.getMaster();
1332     HRegionServer rs = cluster.getRegionServer(0);
1333     ServerName metaServerName = cluster.getLiveRegionServerThreads()
1334       .get(0).getRegionServer().getServerName();
1335     activeMaster.move(HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes(),
1336       Bytes.toBytes(metaServerName.getServerName()));
1337     TEST_UTIL.waitUntilNoRegionsInTransition(60000);
1338     assertEquals("Meta should be assigned on expected regionserver",
1339       metaServerName, activeMaster.getMetaTableLocator()
1340         .getMetaRegionLocation(activeMaster.getZooKeeper()));
1341 
1342     // Now kill master, meta should remain on rs, where we placed it before.
1343     log("Aborting master");
1344     activeMaster.abort("test-kill");
1345     cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
1346     log("Master has aborted");
1347 
1348     // meta should remain where it was
1349     RegionState metaState =
1350       MetaTableLocator.getMetaRegionState(rs.getZooKeeper());
1351     assertEquals("hbase:meta should be onlined on RS",
1352       metaState.getServerName(), rs.getServerName());
1353     assertEquals("hbase:meta should be onlined on RS",
1354       metaState.getState(), State.OPEN);
1355 
1356     // Start up a new master
1357     log("Starting up a new master");
1358     activeMaster = cluster.startMaster().getMaster();
1359     log("Waiting for master to be ready");
1360     cluster.waitForActiveAndReadyMaster();
1361     log("Master is ready");
1362 
1363     // ensure meta is still deployed on RS
1364     metaState =
1365       MetaTableLocator.getMetaRegionState(activeMaster.getZooKeeper());
1366     assertEquals("hbase:meta should be onlined on RS",
1367       metaState.getServerName(), rs.getServerName());
1368     assertEquals("hbase:meta should be onlined on RS",
1369       metaState.getState(), State.OPEN);
1370 
1371     // Update meta state as PENDING_OPEN, then kill master
1372     // that simulates, that RS successfully deployed, but
1373     // RPC was lost right before failure.
1374     // region server should expire (how it can be verified?)
1375     MetaTableLocator.setMetaLocation(activeMaster.getZooKeeper(),
1376       rs.getServerName(), State.PENDING_OPEN);
1377     Region meta = rs.getFromOnlineRegions(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
1378     rs.removeFromOnlineRegions(meta, null);
1379     ((HRegion)meta).close();
1380 
1381     log("Aborting master");
1382     activeMaster.abort("test-kill");
1383     cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
1384     log("Master has aborted");
1385 
1386     // Start up a new master
1387     log("Starting up a new master");
1388     activeMaster = cluster.startMaster().getMaster();
1389     log("Waiting for master to be ready");
1390     cluster.waitForActiveAndReadyMaster();
1391     log("Master is ready");
1392 
1393     TEST_UTIL.waitUntilNoRegionsInTransition(60000);
1394     log("Meta was assigned");
1395 
1396     metaState =
1397       MetaTableLocator.getMetaRegionState(activeMaster.getZooKeeper());
1398     assertEquals("hbase:meta should be onlined on RS",
1399       metaState.getServerName(), rs.getServerName());
1400     assertEquals("hbase:meta should be onlined on RS",
1401       metaState.getState(), State.OPEN);
1402 
1403     // Update meta state as PENDING_CLOSE, then kill master
1404     // that simulates, that RS successfully deployed, but
1405     // RPC was lost right before failure.
1406     // region server should expire (how it can be verified?)
1407     MetaTableLocator.setMetaLocation(activeMaster.getZooKeeper(),
1408       rs.getServerName(), State.PENDING_CLOSE);
1409 
1410     log("Aborting master");
1411     activeMaster.abort("test-kill");
1412     cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
1413     log("Master has aborted");
1414 
1415     rs.getRSRpcServices().closeRegion(null, RequestConverter.buildCloseRegionRequest(
1416       rs.getServerName(), HRegionInfo.FIRST_META_REGIONINFO.getEncodedName(), false));
1417 
1418     // Start up a new master
1419     log("Starting up a new master");
1420     activeMaster = cluster.startMaster().getMaster();
1421     log("Waiting for master to be ready");
1422     cluster.waitForActiveAndReadyMaster();
1423     log("Master is ready");
1424 
1425     TEST_UTIL.waitUntilNoRegionsInTransition(60000);
1426     log("Meta was assigned");
1427 
1428     rs.getRSRpcServices().closeRegion(
1429       null,
1430       RequestConverter.buildCloseRegionRequest(rs.getServerName(),
1431         HRegionInfo.FIRST_META_REGIONINFO.getEncodedName(), false));
1432 
1433     // Set a dummy server to check if master reassigns meta on restart
1434     MetaTableLocator.setMetaLocation(activeMaster.getZooKeeper(),
1435       ServerName.valueOf("dummyserver.example.org", 1234, -1L), State.OPEN);
1436 
1437     log("Aborting master");
1438     activeMaster.stop("test-kill");
1439 
1440     cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
1441     log("Master has aborted");
1442 
1443     // Start up a new master
1444     log("Starting up a new master");
1445     activeMaster = cluster.startMaster().getMaster();
1446     log("Waiting for master to be ready");
1447     cluster.waitForActiveAndReadyMaster();
1448     log("Master is ready");
1449 
1450     TEST_UTIL.waitUntilNoRegionsInTransition(60000);
1451     log("Meta was assigned");
1452 
1453     // Done, shutdown the cluster
1454     TEST_UTIL.shutdownMiniCluster();
1455   }
1456 }
1457