View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.master.procedure;
20  
21  import java.io.IOException;
22  import java.util.concurrent.atomic.AtomicInteger;
23  import java.util.concurrent.CountDownLatch;
24  
25  import org.apache.commons.logging.Log;
26  import org.apache.commons.logging.LogFactory;
27  import org.apache.hadoop.conf.Configuration;
28  import org.apache.hadoop.fs.FileSystem;
29  import org.apache.hadoop.fs.Path;
30  import org.apache.hadoop.hbase.HBaseTestingUtility;
31  import org.apache.hadoop.hbase.HRegionInfo;
32  import org.apache.hadoop.hbase.HTableDescriptor;
33  import org.apache.hadoop.hbase.MiniHBaseCluster;
34  import org.apache.hadoop.hbase.TableName;
35  import org.apache.hadoop.hbase.master.HMaster;
36  import org.apache.hadoop.hbase.procedure2.Procedure;
37  import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
38  import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
39  import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility.TestProcedure;
40  import org.apache.hadoop.hbase.procedure2.store.ProcedureStore;
41  import org.apache.hadoop.hbase.procedure2.store.wal.WALProcedureStore;
42  import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.CreateTableState;
43  import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.DeleteTableState;
44  import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.DisableTableState;
45  import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.EnableTableState;
46  import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.TruncateTableState;
47  import org.apache.hadoop.hbase.testclassification.LargeTests;
48  import org.apache.hadoop.hbase.util.Bytes;
49  import org.apache.hadoop.hbase.util.FSUtils;
50  import org.apache.hadoop.hbase.util.ModifyRegionUtils;
51  import org.apache.hadoop.hbase.util.Threads;
52  import org.apache.hadoop.hdfs.MiniDFSCluster;
53  import org.apache.hadoop.hdfs.server.datanode.DataNode;
54  
55  import org.junit.After;
56  import org.junit.Before;
57  import org.junit.Test;
58  import org.junit.experimental.categories.Category;
59  import org.mockito.Mockito;
60  
61  import static org.junit.Assert.assertEquals;
62  import static org.junit.Assert.assertFalse;
63  import static org.junit.Assert.assertTrue;
64  import static org.junit.Assert.fail;
65  
66  @Category(LargeTests.class)
67  public class TestWALProcedureStoreOnHDFS {
68    private static final Log LOG = LogFactory.getLog(TestWALProcedureStoreOnHDFS.class);
69  
70    protected static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
71  
72    private WALProcedureStore store;
73  
74    private static void setupConf(Configuration conf) {
75      conf.setInt("dfs.replication", 3);
76      conf.setInt("dfs.namenode.replication.min", 3);
77  
78      // increase the value for slow test-env
79      conf.setInt("hbase.procedure.store.wal.wait.before.roll", 1000);
80      conf.setInt("hbase.procedure.store.wal.max.roll.retries", 5);
81      conf.setInt("hbase.procedure.store.wal.sync.failure.roll.max", 5);
82    }
83  
84    @Before
85    public void setup() throws Exception {
86      setupConf(UTIL.getConfiguration());
87      MiniDFSCluster dfs = UTIL.startMiniDFSCluster(3);
88  
89      Path logDir = new Path(new Path(dfs.getFileSystem().getUri()), "/test-logs");
90      store = ProcedureTestingUtility.createWalStore(
91        UTIL.getConfiguration(), dfs.getFileSystem(), logDir);
92      store.registerListener(new ProcedureStore.ProcedureStoreListener() {
93        @Override
94        public void postSync() {}
95  
96        @Override
97        public void abortProcess() {
98          LOG.fatal("Abort the Procedure Store");
99          store.stop(true);
100       }
101     });
102     store.start(8);
103     store.recoverLease();
104   }
105 
106   @After
107   public void tearDown() throws Exception {
108     store.stop(false);
109     UTIL.getDFSCluster().getFileSystem().delete(store.getLogDir(), true);
110 
111     try {
112       UTIL.shutdownMiniCluster();
113     } catch (Exception e) {
114       LOG.warn("failure shutting down cluster", e);
115     }
116   }
117 
118   @Test(timeout=60000, expected=RuntimeException.class)
119   public void testWalAbortOnLowReplication() throws Exception {
120     assertEquals(3, UTIL.getDFSCluster().getDataNodes().size());
121 
122     LOG.info("Stop DataNode");
123     UTIL.getDFSCluster().stopDataNode(0);
124     assertEquals(2, UTIL.getDFSCluster().getDataNodes().size());
125 
126     store.insert(new TestProcedure(1, -1), null);
127     for (long i = 2; store.isRunning(); ++i) {
128       assertEquals(2, UTIL.getDFSCluster().getDataNodes().size());
129       store.insert(new TestProcedure(i, -1), null);
130       Thread.sleep(100);
131     }
132     assertFalse(store.isRunning());
133     fail("The store.insert() should throw an exeption");
134   }
135 
136   @Test(timeout=60000)
137   public void testWalAbortOnLowReplicationWithQueuedWriters() throws Exception {
138     assertEquals(3, UTIL.getDFSCluster().getDataNodes().size());
139 
140     store.registerListener(new ProcedureStore.ProcedureStoreListener() {
141       @Override
142       public void postSync() {
143         Threads.sleepWithoutInterrupt(2000);
144       }
145 
146       @Override
147       public void abortProcess() {}
148     });
149 
150     final AtomicInteger reCount = new AtomicInteger(0);
151     Thread[] thread = new Thread[store.getNumThreads() * 2 + 1];
152     for (int i = 0; i < thread.length; ++i) {
153       final long procId = i + 1;
154       thread[i] = new Thread() {
155         public void run() {
156           try {
157             LOG.debug("[S] INSERT " + procId);
158             store.insert(new TestProcedure(procId, -1), null);
159             LOG.debug("[E] INSERT " + procId);
160           } catch (RuntimeException e) {
161             reCount.incrementAndGet();
162             LOG.debug("[F] INSERT " + procId + ": " + e.getMessage());
163           }
164         }
165       };
166       thread[i].start();
167     }
168 
169     Thread.sleep(1000);
170     LOG.info("Stop DataNode");
171     UTIL.getDFSCluster().stopDataNode(0);
172     assertEquals(2, UTIL.getDFSCluster().getDataNodes().size());
173 
174     for (int i = 0; i < thread.length; ++i) {
175       thread[i].join();
176     }
177 
178     assertFalse(store.isRunning());
179     assertTrue(reCount.toString(), reCount.get() >= store.getNumThreads() &&
180                                    reCount.get() < thread.length);
181   }
182 
183   @Test(timeout=60000)
184   public void testWalRollOnLowReplication() throws Exception {
185     int dnCount = 0;
186     store.insert(new TestProcedure(1, -1), null);
187     UTIL.getDFSCluster().restartDataNode(dnCount);
188     for (long i = 2; i < 100; ++i) {
189       store.insert(new TestProcedure(i, -1), null);
190       waitForNumReplicas(3);
191       Thread.sleep(100);
192       if ((i % 30) == 0) {
193         LOG.info("Restart Data Node");
194         UTIL.getDFSCluster().restartDataNode(++dnCount % 3);
195       }
196     }
197     assertTrue(store.isRunning());
198   }
199 
200   public void waitForNumReplicas(int numReplicas) throws Exception {
201     while (UTIL.getDFSCluster().getDataNodes().size() < numReplicas) {
202       Thread.sleep(100);
203     }
204 
205     for (int i = 0; i < numReplicas; ++i) {
206       for (DataNode dn: UTIL.getDFSCluster().getDataNodes()) {
207         while (!dn.isDatanodeFullyStarted()) {
208           Thread.sleep(100);
209         }
210       }
211     }
212   }
213 }