1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.regionserver.wal;
21
22 import org.apache.commons.logging.Log;
23 import org.apache.commons.logging.LogFactory;
24 import org.apache.commons.logging.impl.Log4JLogger;
25 import org.apache.hadoop.fs.FileSystem;
26 import org.apache.hadoop.fs.Path;
27 import org.apache.hadoop.hbase.*;
28 import org.apache.hadoop.hbase.client.*;
29 import org.apache.hadoop.hbase.regionserver.HRegion;
30 import org.apache.hadoop.hbase.regionserver.HRegionServer;
31 import org.apache.hadoop.hbase.util.*;
32 import org.apache.hadoop.hdfs.DFSClient;
33 import org.apache.hadoop.hdfs.MiniDFSCluster;
34 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
35 import org.apache.hadoop.hdfs.server.datanode.DataNode;
36 import org.apache.hadoop.hdfs.server.namenode.LeaseManager;
37 import org.apache.log4j.Level;
38 import org.junit.*;
39 import org.junit.experimental.categories.Category;
40
41 import java.io.*;
42 import java.lang.reflect.InvocationTargetException;
43 import java.lang.reflect.Method;
44 import java.util.*;
45
46 import static org.junit.Assert.*;
47
48
49
50
51 @Category(LargeTests.class)
52 public class TestLogRolling {
53 private static final Log LOG = LogFactory.getLog(TestLogRolling.class);
54 private HRegionServer server;
55 private HLog log;
56 private String tableName;
57 private byte[] value;
58 private FileSystem fs;
59 private MiniDFSCluster dfsCluster;
60 private HBaseAdmin admin;
61 private MiniHBaseCluster cluster;
62 private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
63
64
65 {
66 ((Log4JLogger)DataNode.LOG).getLogger().setLevel(Level.ALL);
67 ((Log4JLogger)LeaseManager.LOG).getLogger().setLevel(Level.ALL);
68 ((Log4JLogger)LogFactory.getLog("org.apache.hadoop.hdfs.server.namenode.FSNamesystem"))
69 .getLogger().setLevel(Level.ALL);
70 ((Log4JLogger)DFSClient.LOG).getLogger().setLevel(Level.ALL);
71 ((Log4JLogger)HRegionServer.LOG).getLogger().setLevel(Level.ALL);
72 ((Log4JLogger)HRegion.LOG).getLogger().setLevel(Level.ALL);
73 ((Log4JLogger)HLog.LOG).getLogger().setLevel(Level.ALL);
74 }
75
76
77
78
79
80 public TestLogRolling() {
81 this.server = null;
82 this.log = null;
83 this.tableName = null;
84
85 String className = this.getClass().getName();
86 StringBuilder v = new StringBuilder(className);
87 while (v.length() < 1000) {
88 v.append(className);
89 }
90 this.value = Bytes.toBytes(v.toString());
91 }
92
93
94
95 @BeforeClass
96 public static void setUpBeforeClass() throws Exception {
97
98
99 TEST_UTIL.getConfiguration().setLong(HConstants.HREGION_MAX_FILESIZE, 768L * 1024L);
100
101
102 TEST_UTIL.getConfiguration().setInt("hbase.regionserver.maxlogentries", 32);
103
104 TEST_UTIL.getConfiguration().setInt(
105 "hbase.regionserver.logroll.errors.tolerated", 2);
106 TEST_UTIL.getConfiguration().setInt("ipc.ping.interval", 10 * 1000);
107 TEST_UTIL.getConfiguration().setInt("ipc.socket.timeout", 10 * 1000);
108 TEST_UTIL.getConfiguration().setInt("hbase.rpc.timeout", 10 * 1000);
109
110
111 TEST_UTIL.getConfiguration().setInt("hbase.hregion.memstore.optionalflushcount", 2);
112
113
114 TEST_UTIL.getConfiguration().setInt(
115 HConstants.HREGION_MEMSTORE_FLUSH_SIZE, 8192);
116
117
118 TEST_UTIL.getConfiguration().setLong("hbase.client.pause", 10 * 1000);
119
120
121
122 TEST_UTIL.getConfiguration().setInt(HConstants.THREAD_WAKE_FREQUENCY, 2 * 1000);
123
124
125
126 TEST_UTIL.getConfiguration().setBoolean("dfs.support.append", true);
127
128
129 TEST_UTIL.getConfiguration().setInt("heartbeat.recheck.interval", 5000);
130 TEST_UTIL.getConfiguration().setInt("dfs.heartbeat.interval", 1);
131
132
133 TEST_UTIL.getConfiguration().setInt("dfs.client.block.write.retries", 30);
134 TEST_UTIL.getConfiguration().setInt(
135 "hbase.regionserver.hlog.tolerable.lowreplication", 2);
136 TEST_UTIL.getConfiguration().setInt(
137 "hbase.regionserver.hlog.lowreplication.rolllimit", 3);
138 }
139
140 @Before
141 public void setUp() throws Exception {
142 TEST_UTIL.startMiniCluster(2);
143
144 cluster = TEST_UTIL.getHBaseCluster();
145 dfsCluster = TEST_UTIL.getDFSCluster();
146 fs = TEST_UTIL.getTestFileSystem();
147 admin = TEST_UTIL.getHBaseAdmin();
148
149
150 cluster.getMaster().balanceSwitch(false);
151 }
152
153 @After
154 public void tearDown() throws Exception {
155 TEST_UTIL.shutdownMiniCluster();
156 }
157
158 private void startAndWriteData() throws IOException {
159
160 new HTable(TEST_UTIL.getConfiguration(), HConstants.META_TABLE_NAME);
161 this.server = cluster.getRegionServerThreads().get(0).getRegionServer();
162 this.log = server.getWAL();
163
164
165 HTableDescriptor desc = new HTableDescriptor(tableName);
166 desc.addFamily(new HColumnDescriptor(HConstants.CATALOG_FAMILY));
167 admin.createTable(desc);
168 HTable table = new HTable(TEST_UTIL.getConfiguration(), tableName);
169
170 server = TEST_UTIL.getRSForFirstRegionInTable(Bytes.toBytes(tableName));
171 this.log = server.getWAL();
172 for (int i = 1; i <= 256; i++) {
173 Put put = new Put(Bytes.toBytes("row" + String.format("%1$04d", i)));
174 put.add(HConstants.CATALOG_FAMILY, null, value);
175 table.put(put);
176 if (i % 32 == 0) {
177
178 try {
179 Thread.sleep(2000);
180 } catch (InterruptedException e) {
181
182 }
183 }
184 }
185 }
186
187
188
189
190
191
192 @Test
193 public void testLogRolling() throws FailedLogCloseException, IOException {
194 this.tableName = getName();
195 startAndWriteData();
196 LOG.info("after writing there are " + log.getNumLogFiles() + " log files");
197
198
199
200 List<HRegion> regions =
201 new ArrayList<HRegion>(server.getOnlineRegionsLocalContext());
202 for (HRegion r: regions) {
203 r.flushcache();
204 }
205
206
207 log.rollWriter();
208
209 int count = log.getNumLogFiles();
210 LOG.info("after flushing all regions and rolling logs there are " +
211 log.getNumLogFiles() + " log files");
212 assertTrue(("actual count: " + count), count <= 2);
213 }
214
215 private static String getName() {
216 return "TestLogRolling";
217 }
218
219 void writeData(HTable table, int rownum) throws IOException {
220 Put put = new Put(Bytes.toBytes("row" + String.format("%1$04d", rownum)));
221 put.add(HConstants.CATALOG_FAMILY, null, value);
222 table.put(put);
223
224
225 try {
226 Thread.sleep(2000);
227 } catch (InterruptedException e) {
228
229 }
230 }
231
232 void validateData(HTable table, int rownum) throws IOException {
233 String row = "row" + String.format("%1$04d", rownum);
234 Get get = new Get(Bytes.toBytes(row));
235 get.addFamily(HConstants.CATALOG_FAMILY);
236 Result result = table.get(get);
237 assertTrue(result.size() == 1);
238 assertTrue(Bytes.equals(value,
239 result.getValue(HConstants.CATALOG_FAMILY, null)));
240 LOG.info("Validated row " + row);
241 }
242
243 void batchWriteAndWait(HTable table, int start, boolean expect, int timeout)
244 throws IOException {
245 for (int i = 0; i < 10; i++) {
246 Put put = new Put(Bytes.toBytes("row"
247 + String.format("%1$04d", (start + i))));
248 put.add(HConstants.CATALOG_FAMILY, null, value);
249 table.put(put);
250 }
251 Put tmpPut = new Put(Bytes.toBytes("tmprow"));
252 tmpPut.add(HConstants.CATALOG_FAMILY, null, value);
253 long startTime = System.currentTimeMillis();
254 long remaining = timeout;
255 while (remaining > 0) {
256 if (log.isLowReplicationRollEnabled() == expect) {
257 break;
258 } else {
259
260 table.put(tmpPut);
261 try {
262 Thread.sleep(200);
263 } catch (InterruptedException e) {
264
265 }
266 remaining = timeout - (System.currentTimeMillis() - startTime);
267 }
268 }
269 }
270
271
272
273
274 DatanodeInfo[] getPipeline(HLog log) throws IllegalArgumentException,
275 IllegalAccessException, InvocationTargetException {
276 OutputStream stm = log.getOutputStream();
277 Method getPipeline = null;
278 for (Method m : stm.getClass().getDeclaredMethods()) {
279 if (m.getName().endsWith("getPipeline")) {
280 getPipeline = m;
281 getPipeline.setAccessible(true);
282 break;
283 }
284 }
285
286 assertTrue("Need DFSOutputStream.getPipeline() for this test",
287 null != getPipeline);
288 Object repl = getPipeline.invoke(stm, new Object[] {}
289 return (DatanodeInfo[]) repl;
290 }
291
292
293
294
295
296
297
298
299
300
301
302 @Test
303 public void testLogRollOnDatanodeDeath() throws Exception {
304 assertTrue("This test requires HLog file replication set to 2.",
305 fs.getDefaultReplication() == 2);
306 LOG.info("Replication=" + fs.getDefaultReplication());
307
308 this.server = cluster.getRegionServer(0);
309 this.log = server.getWAL();
310
311
312 String tableName = getName();
313 HTableDescriptor desc = new HTableDescriptor(tableName);
314 desc.addFamily(new HColumnDescriptor(HConstants.CATALOG_FAMILY));
315
316 admin.createTable(desc);
317 HTable table = new HTable(TEST_UTIL.getConfiguration(), tableName);
318 assertTrue(table.isAutoFlush());
319
320 server = TEST_UTIL.getRSForFirstRegionInTable(Bytes.toBytes(tableName));
321 this.log = server.getWAL();
322
323 assertTrue("Need HDFS-826 for this test", log.canGetCurReplicas());
324
325 assertTrue("Need append support for this test", FSUtils
326 .isAppendSupported(TEST_UTIL.getConfiguration()));
327
328
329
330
331
332 List<DataNode> existingNodes = dfsCluster.getDataNodes();
333 int numDataNodes = 3;
334 dfsCluster.startDataNodes(TEST_UTIL.getConfiguration(), numDataNodes, true,
335 null, null);
336 List<DataNode> allNodes = dfsCluster.getDataNodes();
337 for (int i = allNodes.size()-1; i >= 0; i--) {
338 if (existingNodes.contains(allNodes.get(i))) {
339 dfsCluster.stopDataNode( i );
340 }
341 }
342
343 assertTrue("DataNodes " + dfsCluster.getDataNodes().size() +
344 " default replication " + fs.getDefaultReplication(),
345 dfsCluster.getDataNodes().size() >= fs.getDefaultReplication() + 1);
346
347 writeData(table, 2);
348
349 long curTime = System.currentTimeMillis();
350 long oldFilenum = log.getFilenum();
351 assertTrue("Log should have a timestamp older than now",
352 curTime > oldFilenum && oldFilenum != -1);
353
354 assertTrue("The log shouldn't have rolled yet",
355 oldFilenum == log.getFilenum());
356 final DatanodeInfo[] pipeline = getPipeline(log);
357 assertTrue(pipeline.length == fs.getDefaultReplication());
358
359
360
361 assertTrue(dfsCluster.stopDataNode(pipeline[0].getName()) != null);
362
363
364 writeData(table, 2);
365 long newFilenum = log.getFilenum();
366
367 assertTrue("Missing datanode should've triggered a log roll",
368 newFilenum > oldFilenum && newFilenum > curTime);
369
370
371 writeData(table, 3);
372 assertTrue("The log should not roll again.",
373 log.getFilenum() == newFilenum);
374
375
376 assertTrue(dfsCluster.stopDataNode(pipeline[1].getName()) != null);
377
378 batchWriteAndWait(table, 3, false, 10000);
379 assertTrue("LowReplication Roller should've been disabled, current replication="
380 + log.getLogReplication(),
381 !log.isLowReplicationRollEnabled());
382
383 dfsCluster
384 .startDataNodes(TEST_UTIL.getConfiguration(), 1, true, null, null);
385
386
387
388 log.rollWriter(true);
389 batchWriteAndWait(table, 13, true, 10000);
390 assertTrue("New log file should have the default replication instead of " +
391 log.getLogReplication(),
392 log.getLogReplication() == fs.getDefaultReplication());
393 assertTrue("LowReplication Roller should've been enabled",
394 log.isLowReplicationRollEnabled());
395 }
396
397
398
399
400
401
402
403 public void testLogRollOnPipelineRestart() throws Exception {
404 LOG.info("Starting testLogRollOnPipelineRestart");
405 assertTrue("This test requires HLog file replication.",
406 fs.getDefaultReplication() > 1);
407 LOG.info("Replication=" + fs.getDefaultReplication());
408
409 new HTable(TEST_UTIL.getConfiguration(), HConstants.META_TABLE_NAME);
410
411 this.server = cluster.getRegionServer(0);
412 this.log = server.getWAL();
413
414
415 String tableName = getName();
416 HTableDescriptor desc = new HTableDescriptor(tableName);
417 desc.addFamily(new HColumnDescriptor(HConstants.CATALOG_FAMILY));
418
419 admin.createTable(desc);
420 HTable table = new HTable(TEST_UTIL.getConfiguration(), tableName);
421
422 server = TEST_UTIL.getRSForFirstRegionInTable(Bytes.toBytes(tableName));
423 this.log = server.getWAL();
424 final List<Path> paths = new ArrayList<Path>();
425 final List<Integer> preLogRolledCalled = new ArrayList<Integer>();
426 paths.add(log.computeFilename());
427 log.registerWALActionsListener(new WALActionsListener() {
428 @Override
429 public void preLogRoll(Path oldFile, Path newFile) {
430 LOG.debug("preLogRoll: oldFile="+oldFile+" newFile="+newFile);
431 preLogRolledCalled.add(new Integer(1));
432 }
433 @Override
434 public void postLogRoll(Path oldFile, Path newFile) {
435 paths.add(newFile);
436 }
437 @Override
438 public void preLogArchive(Path oldFile, Path newFile) {}
439 @Override
440 public void postLogArchive(Path oldFile, Path newFile) {}
441 @Override
442 public void logRollRequested() {}
443 @Override
444 public void logCloseRequested() {}
445 @Override
446 public void visitLogEntryBeforeWrite(HRegionInfo info, HLogKey logKey,
447 WALEdit logEdit) {}
448 @Override
449 public void visitLogEntryBeforeWrite(HTableDescriptor htd, HLogKey logKey,
450 WALEdit logEdit) {}
451 });
452
453 assertTrue("Need HDFS-826 for this test", log.canGetCurReplicas());
454
455 assertTrue("Need append support for this test", FSUtils
456 .isAppendSupported(TEST_UTIL.getConfiguration()));
457
458 writeData(table, 1002);
459
460 table.setAutoFlush(true);
461
462 long curTime = System.currentTimeMillis();
463 long oldFilenum = log.getFilenum();
464 assertTrue("Log should have a timestamp older than now",
465 curTime > oldFilenum && oldFilenum != -1);
466
467 assertTrue("The log shouldn't have rolled yet", oldFilenum == log.getFilenum());
468
469
470 dfsCluster.restartDataNodes();
471 Thread.sleep(1000);
472 dfsCluster.waitActive();
473 LOG.info("Data Nodes restarted");
474 validateData(table, 1002);
475
476
477 writeData(table, 1003);
478 long newFilenum = log.getFilenum();
479
480 assertTrue("Missing datanode should've triggered a log roll",
481 newFilenum > oldFilenum && newFilenum > curTime);
482 validateData(table, 1003);
483
484 writeData(table, 1004);
485
486
487 dfsCluster.restartDataNodes();
488 Thread.sleep(1000);
489 dfsCluster.waitActive();
490 LOG.info("Data Nodes restarted");
491 validateData(table, 1004);
492
493
494 writeData(table, 1005);
495
496
497 log.rollWriter(true);
498 assertTrue("preLogRolledCalled has size of " + preLogRolledCalled.size(),
499 preLogRolledCalled.size() >= 1);
500
501
502 Set<String> loggedRows = new HashSet<String>();
503 for (Path p : paths) {
504 LOG.debug("Reading HLog "+FSUtils.getPath(p));
505 HLog.Reader reader = null;
506 try {
507 reader = HLog.getReader(fs, p, TEST_UTIL.getConfiguration());
508 HLog.Entry entry;
509 while ((entry = reader.next()) != null) {
510 LOG.debug("#"+entry.getKey().getLogSeqNum()+": "+entry.getEdit().getKeyValues());
511 for (KeyValue kv : entry.getEdit().getKeyValues()) {
512 loggedRows.add(Bytes.toStringBinary(kv.getRow()));
513 }
514 }
515 } catch (EOFException e) {
516 LOG.debug("EOF reading file "+FSUtils.getPath(p));
517 } finally {
518 if (reader != null) reader.close();
519 }
520 }
521
522
523 assertTrue(loggedRows.contains("row1002"));
524 assertTrue(loggedRows.contains("row1003"));
525 assertTrue(loggedRows.contains("row1004"));
526 assertTrue(loggedRows.contains("row1005"));
527
528
529 List<HRegion> regions =
530 new ArrayList<HRegion>(server.getOnlineRegionsLocalContext());
531 for (HRegion r: regions) {
532 r.flushcache();
533 }
534
535 ResultScanner scanner = table.getScanner(new Scan());
536 try {
537 for (int i=2; i<=5; i++) {
538 Result r = scanner.next();
539 assertNotNull(r);
540 assertFalse(r.isEmpty());
541 assertEquals("row100"+i, Bytes.toString(r.getRow()));
542 }
543 } finally {
544 scanner.close();
545 }
546
547
548 for (JVMClusterUtil.RegionServerThread rsThread:
549 TEST_UTIL.getHBaseCluster().getRegionServerThreads()) {
550 assertFalse(rsThread.getRegionServer().isAborted());
551 }
552 }
553
554 @org.junit.Rule
555 public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
556 new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
557 }
558