1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.master.procedure;
20
21 import java.io.IOException;
22 import java.util.concurrent.atomic.AtomicInteger;
23 import java.util.concurrent.CountDownLatch;
24
25 import org.apache.commons.logging.Log;
26 import org.apache.commons.logging.LogFactory;
27 import org.apache.hadoop.conf.Configuration;
28 import org.apache.hadoop.fs.FileSystem;
29 import org.apache.hadoop.fs.Path;
30 import org.apache.hadoop.hbase.HBaseTestingUtility;
31 import org.apache.hadoop.hbase.HRegionInfo;
32 import org.apache.hadoop.hbase.HTableDescriptor;
33 import org.apache.hadoop.hbase.MiniHBaseCluster;
34 import org.apache.hadoop.hbase.TableName;
35 import org.apache.hadoop.hbase.master.HMaster;
36 import org.apache.hadoop.hbase.procedure2.Procedure;
37 import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
38 import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
39 import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility.TestProcedure;
40 import org.apache.hadoop.hbase.procedure2.store.ProcedureStore;
41 import org.apache.hadoop.hbase.procedure2.store.wal.WALProcedureStore;
42 import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.CreateTableState;
43 import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.DeleteTableState;
44 import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.DisableTableState;
45 import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.EnableTableState;
46 import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.TruncateTableState;
47 import org.apache.hadoop.hbase.testclassification.LargeTests;
48 import org.apache.hadoop.hbase.util.Bytes;
49 import org.apache.hadoop.hbase.util.FSUtils;
50 import org.apache.hadoop.hbase.util.ModifyRegionUtils;
51 import org.apache.hadoop.hbase.util.Threads;
52 import org.apache.hadoop.hdfs.MiniDFSCluster;
53 import org.apache.hadoop.hdfs.server.datanode.DataNode;
54
55 import org.junit.After;
56 import org.junit.Before;
57 import org.junit.Test;
58 import org.junit.experimental.categories.Category;
59 import org.mockito.Mockito;
60
61 import static org.junit.Assert.assertEquals;
62 import static org.junit.Assert.assertFalse;
63 import static org.junit.Assert.assertTrue;
64 import static org.junit.Assert.fail;
65
66 @Category(LargeTests.class)
67 public class TestWALProcedureStoreOnHDFS {
68 private static final Log LOG = LogFactory.getLog(TestWALProcedureStoreOnHDFS.class);
69
70 protected static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
71
72 private WALProcedureStore store;
73
74 private static void setupConf(Configuration conf) {
75 conf.setInt("dfs.replication", 3);
76 conf.setInt("dfs.namenode.replication.min", 3);
77
78
79 conf.setInt("hbase.procedure.store.wal.wait.before.roll", 1000);
80 conf.setInt("hbase.procedure.store.wal.max.roll.retries", 5);
81 conf.setInt("hbase.procedure.store.wal.sync.failure.roll.max", 5);
82 }
83
84 @Before
85 public void setup() throws Exception {
86 setupConf(UTIL.getConfiguration());
87 MiniDFSCluster dfs = UTIL.startMiniDFSCluster(3);
88
89 Path logDir = new Path(new Path(dfs.getFileSystem().getUri()), "/test-logs");
90 store = ProcedureTestingUtility.createWalStore(
91 UTIL.getConfiguration(), dfs.getFileSystem(), logDir);
92 store.registerListener(new ProcedureStore.ProcedureStoreListener() {
93 @Override
94 public void postSync() {}
95
96 @Override
97 public void abortProcess() {
98 LOG.fatal("Abort the Procedure Store");
99 store.stop(true);
100 }
101 });
102 store.start(8);
103 store.recoverLease();
104 }
105
106 @After
107 public void tearDown() throws Exception {
108 store.stop(false);
109 UTIL.getDFSCluster().getFileSystem().delete(store.getLogDir(), true);
110
111 try {
112 UTIL.shutdownMiniCluster();
113 } catch (Exception e) {
114 LOG.warn("failure shutting down cluster", e);
115 }
116 }
117
118 @Test(timeout=60000, expected=RuntimeException.class)
119 public void testWalAbortOnLowReplication() throws Exception {
120 assertEquals(3, UTIL.getDFSCluster().getDataNodes().size());
121
122 LOG.info("Stop DataNode");
123 UTIL.getDFSCluster().stopDataNode(0);
124 assertEquals(2, UTIL.getDFSCluster().getDataNodes().size());
125
126 store.insert(new TestProcedure(1, -1), null);
127 for (long i = 2; store.isRunning(); ++i) {
128 assertEquals(2, UTIL.getDFSCluster().getDataNodes().size());
129 store.insert(new TestProcedure(i, -1), null);
130 Thread.sleep(100);
131 }
132 assertFalse(store.isRunning());
133 fail("The store.insert() should throw an exeption");
134 }
135
136 @Test(timeout=60000)
137 public void testWalAbortOnLowReplicationWithQueuedWriters() throws Exception {
138 assertEquals(3, UTIL.getDFSCluster().getDataNodes().size());
139
140 store.registerListener(new ProcedureStore.ProcedureStoreListener() {
141 @Override
142 public void postSync() {
143 Threads.sleepWithoutInterrupt(2000);
144 }
145
146 @Override
147 public void abortProcess() {}
148 });
149
150 final AtomicInteger reCount = new AtomicInteger(0);
151 Thread[] thread = new Thread[store.getNumThreads() * 2 + 1];
152 for (int i = 0; i < thread.length; ++i) {
153 final long procId = i + 1;
154 thread[i] = new Thread() {
155 public void run() {
156 try {
157 LOG.debug("[S] INSERT " + procId);
158 store.insert(new TestProcedure(procId, -1), null);
159 LOG.debug("[E] INSERT " + procId);
160 } catch (RuntimeException e) {
161 reCount.incrementAndGet();
162 LOG.debug("[F] INSERT " + procId + ": " + e.getMessage());
163 }
164 }
165 };
166 thread[i].start();
167 }
168
169 Thread.sleep(1000);
170 LOG.info("Stop DataNode");
171 UTIL.getDFSCluster().stopDataNode(0);
172 assertEquals(2, UTIL.getDFSCluster().getDataNodes().size());
173
174 for (int i = 0; i < thread.length; ++i) {
175 thread[i].join();
176 }
177
178 assertFalse(store.isRunning());
179 assertTrue(reCount.toString(), reCount.get() >= store.getNumThreads() &&
180 reCount.get() < thread.length);
181 }
182
183 @Test(timeout=60000)
184 public void testWalRollOnLowReplication() throws Exception {
185 int dnCount = 0;
186 store.insert(new TestProcedure(1, -1), null);
187 UTIL.getDFSCluster().restartDataNode(dnCount);
188 for (long i = 2; i < 100; ++i) {
189 store.insert(new TestProcedure(i, -1), null);
190 waitForNumReplicas(3);
191 Thread.sleep(100);
192 if ((i % 30) == 0) {
193 LOG.info("Restart Data Node");
194 UTIL.getDFSCluster().restartDataNode(++dnCount % 3);
195 }
196 }
197 assertTrue(store.isRunning());
198 }
199
200 public void waitForNumReplicas(int numReplicas) throws Exception {
201 while (UTIL.getDFSCluster().getDataNodes().size() < numReplicas) {
202 Thread.sleep(100);
203 }
204
205 for (int i = 0; i < numReplicas; ++i) {
206 for (DataNode dn: UTIL.getDFSCluster().getDataNodes()) {
207 while (!dn.isDatanodeFullyStarted()) {
208 Thread.sleep(100);
209 }
210 }
211 }
212 }
213 }