1   /**
2    * Copyright 2007 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.regionserver.wal;
21  
22  import org.apache.commons.logging.Log;
23  import org.apache.commons.logging.LogFactory;
24  import org.apache.commons.logging.impl.Log4JLogger;
25  import org.apache.hadoop.fs.FileSystem;
26  import org.apache.hadoop.fs.Path;
27  import org.apache.hadoop.hbase.*;
28  import org.apache.hadoop.hbase.client.*;
29  import org.apache.hadoop.hbase.regionserver.HRegion;
30  import org.apache.hadoop.hbase.regionserver.HRegionServer;
31  import org.apache.hadoop.hbase.util.*;
32  import org.apache.hadoop.hdfs.DFSClient;
33  import org.apache.hadoop.hdfs.MiniDFSCluster;
34  import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
35  import org.apache.hadoop.hdfs.server.datanode.DataNode;
36  import org.apache.hadoop.hdfs.server.namenode.LeaseManager;
37  import org.apache.log4j.Level;
38  import org.junit.*;
39  import org.junit.experimental.categories.Category;
40  
41  import java.io.*;
42  import java.lang.reflect.InvocationTargetException;
43  import java.lang.reflect.Method;
44  import java.util.*;
45  
46  import static org.junit.Assert.*;
47  
48  /**
49   * Test log deletion as logs are rolled.
50   */
51  @Category(LargeTests.class)
52  public class TestLogRolling  {
53    private static final Log LOG = LogFactory.getLog(TestLogRolling.class);
54    private HRegionServer server;
55    private HLog log;
56    private String tableName;
57    private byte[] value;
58    private FileSystem fs;
59    private MiniDFSCluster dfsCluster;
60    private HBaseAdmin admin;
61    private MiniHBaseCluster cluster;
62    private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
63  
64   // verbose logging on classes that are touched in these tests
65   {
66     ((Log4JLogger)DataNode.LOG).getLogger().setLevel(Level.ALL);
67     ((Log4JLogger)LeaseManager.LOG).getLogger().setLevel(Level.ALL);
68     ((Log4JLogger)LogFactory.getLog("org.apache.hadoop.hdfs.server.namenode.FSNamesystem"))
69       .getLogger().setLevel(Level.ALL);
70     ((Log4JLogger)DFSClient.LOG).getLogger().setLevel(Level.ALL);
71     ((Log4JLogger)HRegionServer.LOG).getLogger().setLevel(Level.ALL);
72     ((Log4JLogger)HRegion.LOG).getLogger().setLevel(Level.ALL);
73     ((Log4JLogger)HLog.LOG).getLogger().setLevel(Level.ALL);
74   }
75  
76    /**
77     * constructor
78     * @throws Exception
79     */
80    public TestLogRolling()  {
81      this.server = null;
82      this.log = null;
83      this.tableName = null;
84  
85      String className = this.getClass().getName();
86      StringBuilder v = new StringBuilder(className);
87      while (v.length() < 1000) {
88        v.append(className);
89      }
90      this.value = Bytes.toBytes(v.toString());
91    }
92  
93    // Need to override this setup so we can edit the config before it gets sent
94   // to the HDFS & HBase cluster startup.
95   @BeforeClass
96    public static void setUpBeforeClass() throws Exception {
97      /**** configuration for testLogRolling ****/
98      // Force a region split after every 768KB
99      TEST_UTIL.getConfiguration().setLong(HConstants.HREGION_MAX_FILESIZE, 768L * 1024L);
100 
101     // We roll the log after every 32 writes
102     TEST_UTIL.getConfiguration().setInt("hbase.regionserver.maxlogentries", 32);
103 
104     TEST_UTIL.getConfiguration().setInt(
105         "hbase.regionserver.logroll.errors.tolerated", 2);
106     TEST_UTIL.getConfiguration().setInt("ipc.ping.interval", 10 * 1000);
107     TEST_UTIL.getConfiguration().setInt("ipc.socket.timeout", 10 * 1000);
108     TEST_UTIL.getConfiguration().setInt("hbase.rpc.timeout", 10 * 1000);
109 
110     // For less frequently updated regions flush after every 2 flushes
111     TEST_UTIL.getConfiguration().setInt("hbase.hregion.memstore.optionalflushcount", 2);
112 
113     // We flush the cache after every 8192 bytes
114     TEST_UTIL.getConfiguration().setInt(
115         HConstants.HREGION_MEMSTORE_FLUSH_SIZE, 8192);
116 
117     // Increase the amount of time between client retries
118     TEST_UTIL.getConfiguration().setLong("hbase.client.pause", 10 * 1000);
119 
120     // Reduce thread wake frequency so that other threads can get
121     // a chance to run.
122     TEST_UTIL.getConfiguration().setInt(HConstants.THREAD_WAKE_FREQUENCY, 2 * 1000);
123 
124    /**** configuration for testLogRollOnDatanodeDeath ****/
125    // make sure log.hflush() calls syncFs() to open a pipeline
126     TEST_UTIL.getConfiguration().setBoolean("dfs.support.append", true);
127    // lower the namenode & datanode heartbeat so the namenode
128    // quickly detects datanode failures
129     TEST_UTIL.getConfiguration().setInt("heartbeat.recheck.interval", 5000);
130     TEST_UTIL.getConfiguration().setInt("dfs.heartbeat.interval", 1);
131    // the namenode might still try to choose the recently-dead datanode
132    // for a pipeline, so try to a new pipeline multiple times
133     TEST_UTIL.getConfiguration().setInt("dfs.client.block.write.retries", 30);
134     TEST_UTIL.getConfiguration().setInt(
135         "hbase.regionserver.hlog.tolerable.lowreplication", 2);
136     TEST_UTIL.getConfiguration().setInt(
137         "hbase.regionserver.hlog.lowreplication.rolllimit", 3);
138   }
139 
140   @Before
141   public void setUp() throws Exception {
142     TEST_UTIL.startMiniCluster(2);
143 
144     cluster = TEST_UTIL.getHBaseCluster();
145     dfsCluster = TEST_UTIL.getDFSCluster();
146     fs = TEST_UTIL.getTestFileSystem();
147     admin = TEST_UTIL.getHBaseAdmin();
148 
149     // disable region rebalancing (interferes with log watching)
150     cluster.getMaster().balanceSwitch(false);
151   }
152 
153   @After
154   public void tearDown() throws Exception  {
155     TEST_UTIL.shutdownMiniCluster();
156   }
157 
158   private void startAndWriteData() throws IOException {
159     // When the META table can be opened, the region servers are running
160     new HTable(TEST_UTIL.getConfiguration(), HConstants.META_TABLE_NAME);
161     this.server = cluster.getRegionServerThreads().get(0).getRegionServer();
162     this.log = server.getWAL();
163 
164     // Create the test table and open it
165     HTableDescriptor desc = new HTableDescriptor(tableName);
166     desc.addFamily(new HColumnDescriptor(HConstants.CATALOG_FAMILY));
167     admin.createTable(desc);
168     HTable table = new HTable(TEST_UTIL.getConfiguration(), tableName);
169 
170     server = TEST_UTIL.getRSForFirstRegionInTable(Bytes.toBytes(tableName));
171     this.log = server.getWAL();
172     for (int i = 1; i <= 256; i++) {    // 256 writes should cause 8 log rolls
173       Put put = new Put(Bytes.toBytes("row" + String.format("%1$04d", i)));
174       put.add(HConstants.CATALOG_FAMILY, null, value);
175       table.put(put);
176       if (i % 32 == 0) {
177         // After every 32 writes sleep to let the log roller run
178         try {
179           Thread.sleep(2000);
180         } catch (InterruptedException e) {
181           // continue
182         }
183       }
184     }
185   }
186 
187   /**
188    * Tests that logs are deleted
189    * @throws IOException
190    * @throws FailedLogCloseException
191    */
192   @Test
193   public void testLogRolling() throws FailedLogCloseException, IOException {
194     this.tableName = getName();
195       startAndWriteData();
196       LOG.info("after writing there are " + log.getNumLogFiles() + " log files");
197 
198       // flush all regions
199 
200       List<HRegion> regions =
201         new ArrayList<HRegion>(server.getOnlineRegionsLocalContext());
202       for (HRegion r: regions) {
203         r.flushcache();
204       }
205 
206       // Now roll the log
207       log.rollWriter();
208 
209       int count = log.getNumLogFiles();
210       LOG.info("after flushing all regions and rolling logs there are " +
211           log.getNumLogFiles() + " log files");
212       assertTrue(("actual count: " + count), count <= 2);
213   }
214 
215   private static String getName() {
216     return "TestLogRolling";
217   }
218 
219   void writeData(HTable table, int rownum) throws IOException {
220     Put put = new Put(Bytes.toBytes("row" + String.format("%1$04d", rownum)));
221     put.add(HConstants.CATALOG_FAMILY, null, value);
222     table.put(put);
223 
224     // sleep to let the log roller run (if it needs to)
225     try {
226       Thread.sleep(2000);
227     } catch (InterruptedException e) {
228       // continue
229     }
230   }
231 
232   void validateData(HTable table, int rownum) throws IOException {
233     String row = "row" + String.format("%1$04d", rownum);
234     Get get = new Get(Bytes.toBytes(row));
235     get.addFamily(HConstants.CATALOG_FAMILY);
236     Result result = table.get(get);
237     assertTrue(result.size() == 1);
238     assertTrue(Bytes.equals(value,
239                 result.getValue(HConstants.CATALOG_FAMILY, null)));
240     LOG.info("Validated row " + row);
241   }
242 
243   void batchWriteAndWait(HTable table, int start, boolean expect, int timeout)
244       throws IOException {
245     for (int i = 0; i < 10; i++) {
246       Put put = new Put(Bytes.toBytes("row"
247           + String.format("%1$04d", (start + i))));
248       put.add(HConstants.CATALOG_FAMILY, null, value);
249       table.put(put);
250     }
251     Put tmpPut = new Put(Bytes.toBytes("tmprow"));
252     tmpPut.add(HConstants.CATALOG_FAMILY, null, value);
253     long startTime = System.currentTimeMillis();
254     long remaining = timeout;
255     while (remaining > 0) {
256       if (log.isLowReplicationRollEnabled() == expect) {
257         break;
258       } else {
259         // Trigger calling Hlog#checkLowReplication()
260         table.put(tmpPut);
261         try {
262           Thread.sleep(200);
263         } catch (InterruptedException e) {
264           // continue
265         }
266         remaining = timeout - (System.currentTimeMillis() - startTime);
267       }
268     }
269   }
270 
271   /**
272    * Give me the HDFS pipeline for this log file
273    */
274   DatanodeInfo[] getPipeline(HLog log) throws IllegalArgumentException,
275       IllegalAccessException, InvocationTargetException {
276     OutputStream stm = log.getOutputStream();
277     Method getPipeline = null;
278     for (Method m : stm.getClass().getDeclaredMethods()) {
279       if (m.getName().endsWith("getPipeline")) {
280         getPipeline = m;
281         getPipeline.setAccessible(true);
282         break;
283       }
284     }
285 
286     assertTrue("Need DFSOutputStream.getPipeline() for this test",
287         null != getPipeline);
288     Object repl = getPipeline.invoke(stm, new Object[] {} /* NO_ARGS */);
289     return (DatanodeInfo[]) repl;
290   }
291 
292 
293   /**
294    * Tests that logs are rolled upon detecting datanode death
295    * Requires an HDFS jar with HDFS-826 & syncFs() support (HDFS-200)
296    * @throws IOException
297    * @throws InterruptedException
298    * @throws InvocationTargetException
299    * @throws IllegalAccessException
300    * @throws IllegalArgumentException
301     */
302   @Test
303   public void testLogRollOnDatanodeDeath() throws Exception {
304     assertTrue("This test requires HLog file replication set to 2.",
305       fs.getDefaultReplication() == 2);
306     LOG.info("Replication=" + fs.getDefaultReplication());
307 
308     this.server = cluster.getRegionServer(0);
309     this.log = server.getWAL();
310 
311     // Create the test table and open it
312     String tableName = getName();
313     HTableDescriptor desc = new HTableDescriptor(tableName);
314     desc.addFamily(new HColumnDescriptor(HConstants.CATALOG_FAMILY));
315 
316     admin.createTable(desc);
317     HTable table = new HTable(TEST_UTIL.getConfiguration(), tableName);
318     assertTrue(table.isAutoFlush());
319 
320     server = TEST_UTIL.getRSForFirstRegionInTable(Bytes.toBytes(tableName));
321     this.log = server.getWAL();
322 
323     assertTrue("Need HDFS-826 for this test", log.canGetCurReplicas());
324     // don't run this test without append support (HDFS-200 & HDFS-142)
325     assertTrue("Need append support for this test", FSUtils
326         .isAppendSupported(TEST_UTIL.getConfiguration()));
327 
328     // add up the datanode count, to ensure proper replication when we kill 1
329     // This function is synchronous; when it returns, the dfs cluster is active
330     // We start 3 servers and then stop 2 to avoid a directory naming conflict
331     //  when we stop/start a namenode later, as mentioned in HBASE-5163
332     List<DataNode> existingNodes = dfsCluster.getDataNodes();
333     int numDataNodes = 3;
334     dfsCluster.startDataNodes(TEST_UTIL.getConfiguration(), numDataNodes, true,
335         null, null);
336     List<DataNode> allNodes = dfsCluster.getDataNodes();
337     for (int i = allNodes.size()-1; i >= 0; i--) {
338       if (existingNodes.contains(allNodes.get(i))) {
339         dfsCluster.stopDataNode( i );
340       }
341     }
342 
343     assertTrue("DataNodes " + dfsCluster.getDataNodes().size() +
344         " default replication " + fs.getDefaultReplication(),
345       dfsCluster.getDataNodes().size() >= fs.getDefaultReplication() + 1);
346 
347     writeData(table, 2);
348 
349     long curTime = System.currentTimeMillis();
350     long oldFilenum = log.getFilenum();
351     assertTrue("Log should have a timestamp older than now",
352         curTime > oldFilenum && oldFilenum != -1);
353 
354     assertTrue("The log shouldn't have rolled yet",
355       oldFilenum == log.getFilenum());
356     final DatanodeInfo[] pipeline = getPipeline(log);
357     assertTrue(pipeline.length == fs.getDefaultReplication());
358 
359     // kill a datanode in the pipeline to force a log roll on the next sync()
360     // This function is synchronous, when it returns the node is killed.
361     assertTrue(dfsCluster.stopDataNode(pipeline[0].getName()) != null);
362 
363     // this write should succeed, but trigger a log roll
364     writeData(table, 2);
365     long newFilenum = log.getFilenum();
366 
367     assertTrue("Missing datanode should've triggered a log roll",
368         newFilenum > oldFilenum && newFilenum > curTime);
369 
370     // write some more log data (this should use a new hdfs_out)
371     writeData(table, 3);
372     assertTrue("The log should not roll again.",
373       log.getFilenum() == newFilenum);
374     // kill another datanode in the pipeline, so the replicas will be lower than
375     // the configured value 2.
376     assertTrue(dfsCluster.stopDataNode(pipeline[1].getName()) != null);
377 
378     batchWriteAndWait(table, 3, false, 10000);
379     assertTrue("LowReplication Roller should've been disabled, current replication="
380             + log.getLogReplication(),
381         !log.isLowReplicationRollEnabled());
382 
383     dfsCluster
384         .startDataNodes(TEST_UTIL.getConfiguration(), 1, true, null, null);
385 
386     // Force roll writer. The new log file will have the default replications,
387     // and the LowReplication Roller will be enabled.
388     log.rollWriter(true);
389     batchWriteAndWait(table, 13, true, 10000);
390     assertTrue("New log file should have the default replication instead of " +
391       log.getLogReplication(),
392       log.getLogReplication() == fs.getDefaultReplication());
393     assertTrue("LowReplication Roller should've been enabled",
394         log.isLowReplicationRollEnabled());
395   }
396 
397   /**
398    * Test that HLog is rolled when all data nodes in the pipeline have been
399    * restarted.
400    * @throws Exception
401    */
402   //DISABLED BECAUSE FLAKEY @Test
403   public void testLogRollOnPipelineRestart() throws Exception {
404     LOG.info("Starting testLogRollOnPipelineRestart");
405     assertTrue("This test requires HLog file replication.",
406       fs.getDefaultReplication() > 1);
407     LOG.info("Replication=" + fs.getDefaultReplication());
408     // When the META table can be opened, the region servers are running
409     new HTable(TEST_UTIL.getConfiguration(), HConstants.META_TABLE_NAME);
410 
411     this.server = cluster.getRegionServer(0);
412     this.log = server.getWAL();
413 
414     // Create the test table and open it
415     String tableName = getName();
416     HTableDescriptor desc = new HTableDescriptor(tableName);
417     desc.addFamily(new HColumnDescriptor(HConstants.CATALOG_FAMILY));
418 
419     admin.createTable(desc);
420     HTable table = new HTable(TEST_UTIL.getConfiguration(), tableName);
421 
422     server = TEST_UTIL.getRSForFirstRegionInTable(Bytes.toBytes(tableName));
423     this.log = server.getWAL();
424     final List<Path> paths = new ArrayList<Path>();
425     final List<Integer> preLogRolledCalled = new ArrayList<Integer>();
426     paths.add(log.computeFilename());
427     log.registerWALActionsListener(new WALActionsListener() {
428       @Override
429       public void preLogRoll(Path oldFile, Path newFile)  {
430         LOG.debug("preLogRoll: oldFile="+oldFile+" newFile="+newFile);
431         preLogRolledCalled.add(new Integer(1));
432       }
433       @Override
434       public void postLogRoll(Path oldFile, Path newFile) {
435         paths.add(newFile);
436       }
437       @Override
438       public void preLogArchive(Path oldFile, Path newFile) {}
439       @Override
440       public void postLogArchive(Path oldFile, Path newFile) {}
441       @Override
442       public void logRollRequested() {}
443       @Override
444       public void logCloseRequested() {}
445       @Override
446       public void visitLogEntryBeforeWrite(HRegionInfo info, HLogKey logKey,
447           WALEdit logEdit) {}
448       @Override
449       public void visitLogEntryBeforeWrite(HTableDescriptor htd, HLogKey logKey,
450           WALEdit logEdit) {}
451     });
452 
453     assertTrue("Need HDFS-826 for this test", log.canGetCurReplicas());
454     // don't run this test without append support (HDFS-200 & HDFS-142)
455     assertTrue("Need append support for this test", FSUtils
456         .isAppendSupported(TEST_UTIL.getConfiguration()));
457 
458     writeData(table, 1002);
459 
460     table.setAutoFlush(true);
461 
462     long curTime = System.currentTimeMillis();
463     long oldFilenum = log.getFilenum();
464     assertTrue("Log should have a timestamp older than now",
465         curTime > oldFilenum && oldFilenum != -1);
466 
467     assertTrue("The log shouldn't have rolled yet", oldFilenum == log.getFilenum());
468 
469     // roll all datanodes in the pipeline
470     dfsCluster.restartDataNodes();
471     Thread.sleep(1000);
472     dfsCluster.waitActive();
473     LOG.info("Data Nodes restarted");
474     validateData(table, 1002);
475 
476     // this write should succeed, but trigger a log roll
477     writeData(table, 1003);
478     long newFilenum = log.getFilenum();
479 
480     assertTrue("Missing datanode should've triggered a log roll",
481         newFilenum > oldFilenum && newFilenum > curTime);
482     validateData(table, 1003);
483 
484     writeData(table, 1004);
485 
486     // roll all datanode again
487     dfsCluster.restartDataNodes();
488     Thread.sleep(1000);
489     dfsCluster.waitActive();
490     LOG.info("Data Nodes restarted");
491     validateData(table, 1004);
492 
493     // this write should succeed, but trigger a log roll
494     writeData(table, 1005);
495 
496     // force a log roll to read back and verify previously written logs
497     log.rollWriter(true);
498     assertTrue("preLogRolledCalled has size of " + preLogRolledCalled.size(),
499         preLogRolledCalled.size() >= 1);
500 
501     // read back the data written
502     Set<String> loggedRows = new HashSet<String>();
503     for (Path p : paths) {
504       LOG.debug("Reading HLog "+FSUtils.getPath(p));
505       HLog.Reader reader = null;
506       try {
507         reader = HLog.getReader(fs, p, TEST_UTIL.getConfiguration());
508         HLog.Entry entry;
509         while ((entry = reader.next()) != null) {
510           LOG.debug("#"+entry.getKey().getLogSeqNum()+": "+entry.getEdit().getKeyValues());
511           for (KeyValue kv : entry.getEdit().getKeyValues()) {
512             loggedRows.add(Bytes.toStringBinary(kv.getRow()));
513           }
514         }
515       } catch (EOFException e) {
516         LOG.debug("EOF reading file "+FSUtils.getPath(p));
517       } finally {
518         if (reader != null) reader.close();
519       }
520     }
521 
522     // verify the written rows are there
523     assertTrue(loggedRows.contains("row1002"));
524     assertTrue(loggedRows.contains("row1003"));
525     assertTrue(loggedRows.contains("row1004"));
526     assertTrue(loggedRows.contains("row1005"));
527 
528     // flush all regions
529     List<HRegion> regions =
530         new ArrayList<HRegion>(server.getOnlineRegionsLocalContext());
531     for (HRegion r: regions) {
532       r.flushcache();
533     }
534 
535     ResultScanner scanner = table.getScanner(new Scan());
536     try {
537       for (int i=2; i<=5; i++) {
538         Result r = scanner.next();
539         assertNotNull(r);
540         assertFalse(r.isEmpty());
541         assertEquals("row100"+i, Bytes.toString(r.getRow()));
542       }
543     } finally {
544       scanner.close();
545     }
546 
547     // verify that no region servers aborted
548     for (JVMClusterUtil.RegionServerThread rsThread:
549         TEST_UTIL.getHBaseCluster().getRegionServerThreads()) {
550       assertFalse(rsThread.getRegionServer().isAborted());
551     }
552   }
553 
554   @org.junit.Rule
555   public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
556     new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
557 }
558