View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.util;
19  
20  import java.io.Closeable;
21  import java.io.FileNotFoundException;
22  import java.io.IOException;
23  import java.io.InterruptedIOException;
24  import java.io.PrintWriter;
25  import java.io.StringWriter;
26  import java.net.InetAddress;
27  import java.net.URI;
28  import java.util.ArrayList;
29  import java.util.Arrays;
30  import java.util.Collection;
31  import java.util.Collections;
32  import java.util.Comparator;
33  import java.util.HashMap;
34  import java.util.HashSet;
35  import java.util.Iterator;
36  import java.util.List;
37  import java.util.Map;
38  import java.util.Map.Entry;
39  import java.util.Set;
40  import java.util.SortedMap;
41  import java.util.SortedSet;
42  import java.util.TreeMap;
43  import java.util.TreeSet;
44  import java.util.Vector;
45  import java.util.concurrent.Callable;
46  import java.util.concurrent.ConcurrentSkipListMap;
47  import java.util.concurrent.ExecutionException;
48  import java.util.concurrent.ExecutorService;
49  import java.util.concurrent.Executors;
50  import java.util.concurrent.Future;
51  import java.util.concurrent.FutureTask;
52  import java.util.concurrent.ScheduledThreadPoolExecutor;
53  import java.util.concurrent.TimeUnit;
54  import java.util.concurrent.TimeoutException;
55  import java.util.concurrent.atomic.AtomicBoolean;
56  import java.util.concurrent.atomic.AtomicInteger;
57  
58  import com.google.common.base.Joiner;
59  import com.google.common.base.Preconditions;
60  import com.google.common.collect.ImmutableList;
61  import com.google.common.collect.Lists;
62  import com.google.common.collect.Multimap;
63  import com.google.common.collect.Ordering;
64  import com.google.common.collect.TreeMultimap;
65  import com.google.protobuf.ServiceException;
66  
67  import org.apache.commons.lang.StringUtils;
68  import org.apache.commons.logging.Log;
69  import org.apache.commons.logging.LogFactory;
70  import org.apache.hadoop.hbase.classification.InterfaceAudience;
71  import org.apache.hadoop.hbase.classification.InterfaceStability;
72  import org.apache.hadoop.conf.Configuration;
73  import org.apache.hadoop.conf.Configured;
74  import org.apache.hadoop.fs.FSDataOutputStream;
75  import org.apache.hadoop.fs.FileStatus;
76  import org.apache.hadoop.fs.FileSystem;
77  import org.apache.hadoop.fs.Path;
78  import org.apache.hadoop.fs.permission.FsAction;
79  import org.apache.hadoop.fs.permission.FsPermission;
80  import org.apache.hadoop.hbase.Abortable;
81  import org.apache.hadoop.hbase.Cell;
82  import org.apache.hadoop.hbase.ClusterStatus;
83  import org.apache.hadoop.hbase.CoordinatedStateException;
84  import org.apache.hadoop.hbase.HBaseConfiguration;
85  import org.apache.hadoop.hbase.HBaseInterfaceAudience;
86  import org.apache.hadoop.hbase.HColumnDescriptor;
87  import org.apache.hadoop.hbase.HConstants;
88  import org.apache.hadoop.hbase.HRegionInfo;
89  import org.apache.hadoop.hbase.HRegionLocation;
90  import org.apache.hadoop.hbase.HTableDescriptor;
91  import org.apache.hadoop.hbase.KeyValue;
92  import org.apache.hadoop.hbase.MasterNotRunningException;
93  import org.apache.hadoop.hbase.RegionLocations;
94  import org.apache.hadoop.hbase.ServerName;
95  import org.apache.hadoop.hbase.TableName;
96  import org.apache.hadoop.hbase.ZooKeeperConnectionException;
97  import org.apache.hadoop.hbase.MetaTableAccessor;
98  import org.apache.hadoop.hbase.classification.InterfaceAudience;
99  import org.apache.hadoop.hbase.classification.InterfaceStability;
100 import org.apache.hadoop.hbase.client.Admin;
101 import org.apache.hadoop.hbase.client.ClusterConnection;
102 import org.apache.hadoop.hbase.client.ConnectionFactory;
103 import org.apache.hadoop.hbase.client.Delete;
104 import org.apache.hadoop.hbase.client.Get;
105 import org.apache.hadoop.hbase.client.HBaseAdmin;
106 import org.apache.hadoop.hbase.client.HConnectable;
107 import org.apache.hadoop.hbase.client.HConnection;
108 import org.apache.hadoop.hbase.client.HConnectionManager;
109 import org.apache.hadoop.hbase.client.MetaScanner;
110 import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
111 import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitorBase;
112 import org.apache.hadoop.hbase.client.Put;
113 import org.apache.hadoop.hbase.client.RegionReplicaUtil;
114 import org.apache.hadoop.hbase.client.Result;
115 import org.apache.hadoop.hbase.client.RowMutations;
116 import org.apache.hadoop.hbase.client.Table;
117 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
118 import org.apache.hadoop.hbase.io.hfile.HFile;
119 import org.apache.hadoop.hbase.master.MasterFileSystem;
120 import org.apache.hadoop.hbase.master.RegionState;
121 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
122 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService.BlockingInterface;
123 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
124 import org.apache.hadoop.hbase.regionserver.HRegion;
125 import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
126 import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
127 import org.apache.hadoop.hbase.security.AccessDeniedException;
128 import org.apache.hadoop.hbase.security.UserProvider;
129 import org.apache.hadoop.hbase.util.Bytes.ByteArrayComparator;
130 import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
131 import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
132 import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandler;
133 import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandlerImpl;
134 import org.apache.hadoop.hbase.util.hbck.TableLockChecker;
135 import org.apache.hadoop.hbase.wal.WALSplitter;
136 import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
137 import org.apache.hadoop.hbase.zookeeper.ZKTableStateClientSideReader;
138 import org.apache.hadoop.hbase.zookeeper.ZKTableStateManager;
139 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
140 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
141 import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
142 import org.apache.hadoop.io.IOUtils;
143 import org.apache.hadoop.ipc.RemoteException;
144 import org.apache.hadoop.security.UserGroupInformation;
145 import org.apache.hadoop.util.ReflectionUtils;
146 import org.apache.hadoop.util.Tool;
147 import org.apache.hadoop.util.ToolRunner;
148 import org.apache.zookeeper.KeeperException;
149 
150 /**
151  * HBaseFsck (hbck) is a tool for checking and repairing region consistency and
152  * table integrity problems in a corrupted HBase.
153  * <p>
154  * Region consistency checks verify that hbase:meta, region deployment on region
155  * servers and the state of data in HDFS (.regioninfo files) all are in
156  * accordance.
157  * <p>
158  * Table integrity checks verify that all possible row keys resolve to exactly
159  * one region of a table.  This means there are no individual degenerate
160  * or backwards regions; no holes between regions; and that there are no
161  * overlapping regions.
162  * <p>
163  * The general repair strategy works in two phases:
164  * <ol>
165  * <li> Repair Table Integrity on HDFS. (merge or fabricate regions)
166  * <li> Repair Region Consistency with hbase:meta and assignments
167  * </ol>
168  * <p>
169  * For table integrity repairs, the tables' region directories are scanned
170  * for .regioninfo files.  Each table's integrity is then verified.  If there
171  * are any orphan regions (regions with no .regioninfo files) or holes, new
172  * regions are fabricated.  Backwards regions are sidelined as well as empty
173  * degenerate (endkey==startkey) regions.  If there are any overlapping regions,
174  * a new region is created and all data is merged into the new region.
175  * <p>
176  * Table integrity repairs deal solely with HDFS and could potentially be done
177  * offline -- the hbase region servers or master do not need to be running.
178  * This phase can eventually be used to completely reconstruct the hbase:meta table in
179  * an offline fashion.
180  * <p>
181  * Region consistency requires three conditions -- 1) valid .regioninfo file
182  * present in an HDFS region dir,  2) valid row with .regioninfo data in META,
183  * and 3) a region is deployed only at the regionserver that was assigned to
184  * with proper state in the master.
185  * <p>
186  * Region consistency repairs require hbase to be online so that hbck can
187  * contact the HBase master and region servers.  The hbck#connect() method must
188  * first be called successfully.  Much of the region consistency information
189  * is transient and less risky to repair.
190  * <p>
191  * If hbck is run from the command line, there are a handful of arguments that
192  * can be used to limit the kinds of repairs hbck will do.  See the code in
193  * {@link #printUsageAndExit()} for more details.
194  */
195 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
196 @InterfaceStability.Evolving
197 public class HBaseFsck extends Configured implements Closeable {
198   public static final long DEFAULT_TIME_LAG = 60000; // default value of 1 minute
199   public static final long DEFAULT_SLEEP_BEFORE_RERUN = 10000;
200   private static final int MAX_NUM_THREADS = 50; // #threads to contact regions
201   private static boolean rsSupportsOffline = true;
202   private static final int DEFAULT_OVERLAPS_TO_SIDELINE = 2;
203   private static final int DEFAULT_MAX_MERGE = 5;
204   private static final String TO_BE_LOADED = "to_be_loaded";
205   private static final String HBCK_LOCK_FILE = "hbase-hbck.lock";
206   private static final int DEFAULT_MAX_LOCK_FILE_ATTEMPTS = 5;
207   private static final int DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL = 200; // milliseconds
208   private static final int DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME = 5000; // milliseconds
209   // We have to set the timeout value > HdfsConstants.LEASE_SOFTLIMIT_PERIOD.
210   // In HADOOP-2.6 and later, the Namenode proxy now created with custom RetryPolicy for
211   // AlreadyBeingCreatedException which is implies timeout on this operations up to
212   // HdfsConstants.LEASE_SOFTLIMIT_PERIOD (60 seconds).
213   private static final int DEFAULT_WAIT_FOR_LOCK_TIMEOUT = 80; // seconds
214 
215   /**********************
216    * Internal resources
217    **********************/
218   private static final Log LOG = LogFactory.getLog(HBaseFsck.class.getName());
219   private ClusterStatus status;
220   private ClusterConnection connection;
221   private Admin admin;
222   private Table meta;
223   // threads to do ||izable tasks: retrieve data from regionservers, handle overlapping regions
224   protected ExecutorService executor;
225   private long startMillis = EnvironmentEdgeManager.currentTime();
226   private HFileCorruptionChecker hfcc;
227   private int retcode = 0;
228   private Path HBCK_LOCK_PATH;
229   private FSDataOutputStream hbckOutFd;
230   // This lock is to prevent cleanup of balancer resources twice between
231   // ShutdownHook and the main code. We cleanup only if the connect() is
232   // successful
233   private final AtomicBoolean hbckLockCleanup = new AtomicBoolean(false);
234 
235   /***********
236    * Options
237    ***********/
238   private static boolean details = false; // do we display the full report
239   private long timelag = DEFAULT_TIME_LAG; // tables whose modtime is older
240   private static boolean disableSplitAndMerge = false; // disable split and merge
241   private boolean fixAssignments = false; // fix assignment errors?
242   private boolean fixMeta = false; // fix meta errors?
243   private boolean checkHdfs = true; // load and check fs consistency?
244   private boolean fixHdfsHoles = false; // fix fs holes?
245   private boolean fixHdfsOverlaps = false; // fix fs overlaps (risky)
246   private boolean fixHdfsOrphans = false; // fix fs holes (missing .regioninfo)
247   private boolean fixTableOrphans = false; // fix fs holes (missing .tableinfo)
248   private boolean fixVersionFile = false; // fix missing hbase.version file in hdfs
249   private boolean fixSplitParents = false; // fix lingering split parents
250   private boolean fixReferenceFiles = false; // fix lingering reference store file
251   private boolean fixEmptyMetaCells = false; // fix (remove) empty REGIONINFO_QUALIFIER rows
252   private boolean fixTableLocks = false; // fix table locks which are expired
253   private boolean fixTableZNodes = false; // fix table Znodes which are orphaned
254   private boolean fixAny = false; // Set to true if any of the fix is required.
255 
256   // limit checking/fixes to listed tables, if empty attempt to check/fix all
257   // hbase:meta are always checked
258   private Set<TableName> tablesIncluded = new HashSet<TableName>();
259   private int maxMerge = DEFAULT_MAX_MERGE; // maximum number of overlapping regions to merge
260   private int maxOverlapsToSideline = DEFAULT_OVERLAPS_TO_SIDELINE; // maximum number of overlapping regions to sideline
261   private boolean sidelineBigOverlaps = false; // sideline overlaps with >maxMerge regions
262   private Path sidelineDir = null;
263 
264   private boolean rerun = false; // if we tried to fix something, rerun hbck
265   private static boolean summary = false; // if we want to print less output
266   private boolean checkMetaOnly = false;
267   private boolean checkRegionBoundaries = false;
268   private boolean ignorePreCheckPermission = false; // if pre-check permission
269 
270   /*********
271    * State
272    *********/
273   final private ErrorReporter errors;
274   int fixes = 0;
275 
276   /**
277    * This map contains the state of all hbck items.  It maps from encoded region
278    * name to HbckInfo structure.  The information contained in HbckInfo is used
279    * to detect and correct consistency (hdfs/meta/deployment) problems.
280    */
281   private TreeMap<String, HbckInfo> regionInfoMap = new TreeMap<String, HbckInfo>();
282   private TreeSet<TableName> disabledTables =
283     new TreeSet<TableName>();
284   // Empty regioninfo qualifiers in hbase:meta
285   private Set<Result> emptyRegionInfoQualifiers = new HashSet<Result>();
286 
287   /**
288    * This map from Tablename -> TableInfo contains the structures necessary to
289    * detect table consistency problems (holes, dupes, overlaps).  It is sorted
290    * to prevent dupes.
291    *
292    * If tablesIncluded is empty, this map contains all tables.
293    * Otherwise, it contains only meta tables and tables in tablesIncluded,
294    * unless checkMetaOnly is specified, in which case, it contains only
295    * the meta table
296    */
297   private SortedMap<TableName, TableInfo> tablesInfo =
298       new ConcurrentSkipListMap<TableName, TableInfo>();
299 
300   /**
301    * When initially looking at HDFS, we attempt to find any orphaned data.
302    */
303   private List<HbckInfo> orphanHdfsDirs = Collections.synchronizedList(new ArrayList<HbckInfo>());
304 
305   private Map<TableName, Set<String>> orphanTableDirs =
306       new HashMap<TableName, Set<String>>();
307 
308   private Map<TableName, Set<String>> skippedRegions = new HashMap<TableName, Set<String>>();
309 
310   /**
311    * List of orphaned table ZNodes
312    */
313   private Set<TableName> orphanedTableZNodes = new HashSet<TableName>();
314   private final RetryCounterFactory lockFileRetryCounterFactory;
315   
316 
317   /**
318    * Constructor
319    *
320    * @param conf Configuration object
321    * @throws MasterNotRunningException if the master is not running
322    * @throws ZooKeeperConnectionException if unable to connect to ZooKeeper
323    */
324   public HBaseFsck(Configuration conf) throws MasterNotRunningException,
325       ZooKeeperConnectionException, IOException, ClassNotFoundException {
326     this(conf, createThreadPool(conf));
327   }
328 
329   private static ExecutorService createThreadPool(Configuration conf) {
330     int numThreads = conf.getInt("hbasefsck.numthreads", MAX_NUM_THREADS);
331     return new ScheduledThreadPoolExecutor(numThreads, Threads.newDaemonThreadFactory("hbasefsck"));
332   }
333 
334   /**
335    * Constructor
336    *
337    * @param conf
338    *          Configuration object
339    * @throws MasterNotRunningException
340    *           if the master is not running
341    * @throws ZooKeeperConnectionException
342    *           if unable to connect to ZooKeeper
343    */
344   public HBaseFsck(Configuration conf, ExecutorService exec) throws MasterNotRunningException,
345       ZooKeeperConnectionException, IOException, ClassNotFoundException {
346     super(conf);
347     errors = getErrorReporter(getConf());
348     this.executor = exec;
349     lockFileRetryCounterFactory = new RetryCounterFactory(
350       getConf().getInt("hbase.hbck.lockfile.attempts", DEFAULT_MAX_LOCK_FILE_ATTEMPTS),
351       getConf().getInt(
352         "hbase.hbck.lockfile.attempt.sleep.interval", DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL),
353       getConf().getInt(
354         "hbase.hbck.lockfile.attempt.maxsleeptime", DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME));
355   }
356 
357   private class FileLockCallable implements Callable<FSDataOutputStream> {
358     RetryCounter retryCounter;
359 
360     public FileLockCallable(RetryCounter retryCounter) {
361       this.retryCounter = retryCounter;
362     }
363     @Override
364     public FSDataOutputStream call() throws IOException {
365       try {
366         FileSystem fs = FSUtils.getCurrentFileSystem(getConf());
367         FsPermission defaultPerms = FSUtils.getFilePermissions(fs, getConf(),
368             HConstants.DATA_FILE_UMASK_KEY);
369         Path tmpDir = new Path(FSUtils.getRootDir(getConf()), HConstants.HBASE_TEMP_DIRECTORY);
370         fs.mkdirs(tmpDir);
371         HBCK_LOCK_PATH = new Path(tmpDir, HBCK_LOCK_FILE);
372         final FSDataOutputStream out = createFileWithRetries(fs, HBCK_LOCK_PATH, defaultPerms);
373         out.writeBytes(InetAddress.getLocalHost().toString());
374         out.flush();
375         return out;
376       } catch(RemoteException e) {
377         if(AlreadyBeingCreatedException.class.getName().equals(e.getClassName())){
378           return null;
379         } else {
380           throw e;
381         }
382       }
383     }
384 
385     private FSDataOutputStream createFileWithRetries(final FileSystem fs,
386         final Path hbckLockFilePath, final FsPermission defaultPerms)
387         throws IOException {
388 
389       IOException exception = null;
390       do {
391         try {
392           return FSUtils.create(fs, hbckLockFilePath, defaultPerms, false);
393         } catch (IOException ioe) {
394           LOG.info("Failed to create lock file " + hbckLockFilePath.getName()
395               + ", try=" + (retryCounter.getAttemptTimes() + 1) + " of "
396               + retryCounter.getMaxAttempts());
397           LOG.debug("Failed to create lock file " + hbckLockFilePath.getName(), 
398               ioe);
399           try {
400             exception = ioe;
401             retryCounter.sleepUntilNextRetry();
402           } catch (InterruptedException ie) {
403             throw (InterruptedIOException) new InterruptedIOException(
404                 "Can't create lock file " + hbckLockFilePath.getName())
405             .initCause(ie);
406           }
407         }
408       } while (retryCounter.shouldRetry());
409 
410       throw exception;
411     }
412   }
413 
414   /**
415    * This method maintains a lock using a file. If the creation fails we return null
416    *
417    * @return FSDataOutputStream object corresponding to the newly opened lock file
418    * @throws IOException
419    */
420   private FSDataOutputStream checkAndMarkRunningHbck() throws IOException {
421     RetryCounter retryCounter = lockFileRetryCounterFactory.create();
422     FileLockCallable callable = new FileLockCallable(retryCounter);
423     ExecutorService executor = Executors.newFixedThreadPool(1);
424     FutureTask<FSDataOutputStream> futureTask = new FutureTask<FSDataOutputStream>(callable);
425     executor.execute(futureTask);
426     final int timeoutInSeconds = getConf().getInt(
427       "hbase.hbck.lockfile.maxwaittime", DEFAULT_WAIT_FOR_LOCK_TIMEOUT);
428     FSDataOutputStream stream = null;
429     try {
430       stream = futureTask.get(timeoutInSeconds, TimeUnit.SECONDS);
431     } catch (ExecutionException ee) {
432       LOG.warn("Encountered exception when opening lock file", ee);
433     } catch (InterruptedException ie) {
434       LOG.warn("Interrupted when opening lock file", ie);
435       Thread.currentThread().interrupt();
436     } catch (TimeoutException exception) {
437       // took too long to obtain lock
438       LOG.warn("Took more than " + timeoutInSeconds + " seconds in obtaining lock");
439       futureTask.cancel(true);
440     } finally {
441       executor.shutdownNow();
442     }
443     return stream;
444   }
445 
446   private void unlockHbck() {
447     if (hbckLockCleanup.compareAndSet(true, false)) {
448       RetryCounter retryCounter = lockFileRetryCounterFactory.create();
449       do {
450         try {
451           IOUtils.closeStream(hbckOutFd);
452           FSUtils.delete(FSUtils.getCurrentFileSystem(getConf()),
453               HBCK_LOCK_PATH, true);
454           LOG.info("Finishing hbck");
455           return;
456         } catch (IOException ioe) {
457           LOG.info("Failed to delete " + HBCK_LOCK_PATH + ", try="
458               + (retryCounter.getAttemptTimes() + 1) + " of "
459               + retryCounter.getMaxAttempts());
460           LOG.debug("Failed to delete " + HBCK_LOCK_PATH, ioe);
461           try {
462             retryCounter.sleepUntilNextRetry();
463           } catch (InterruptedException ie) {
464             Thread.currentThread().interrupt();
465             LOG.warn("Interrupted while deleting lock file" +
466                 HBCK_LOCK_PATH);
467             return;
468           }
469         }
470       } while (retryCounter.shouldRetry());
471     }
472   }
473 
474   /**
475    * To repair region consistency, one must call connect() in order to repair
476    * online state.
477    */
478   public void connect() throws IOException {
479 
480     // Check if another instance of balancer is running
481     hbckOutFd = checkAndMarkRunningHbck();
482     if (hbckOutFd == null) {
483       setRetCode(-1);
484       LOG.error("Another instance of hbck is running, exiting this instance.[If you are sure" +
485           " no other instance is running, delete the lock file " +
486           HBCK_LOCK_PATH + " and rerun the tool]");
487       throw new IOException("Duplicate hbck - Abort");
488     }
489 
490     // Make sure to cleanup the lock
491     hbckLockCleanup.set(true);
492 
493     // Add a shutdown hook to this thread, in case user tries to
494     // kill the hbck with a ctrl-c, we want to cleanup the lock so that
495     // it is available for further calls
496     Runtime.getRuntime().addShutdownHook(new Thread() {
497       @Override
498       public void run() {
499         IOUtils.closeStream(HBaseFsck.this);
500         unlockHbck();
501       }
502     });
503 
504     LOG.info("Launching hbck");
505 
506     connection = (ClusterConnection)ConnectionFactory.createConnection(getConf());
507     admin = connection.getAdmin();
508     meta = connection.getTable(TableName.META_TABLE_NAME);
509     status = admin.getClusterStatus();
510   }
511 
512   /**
513    * Get deployed regions according to the region servers.
514    */
515   private void loadDeployedRegions() throws IOException, InterruptedException {
516     // From the master, get a list of all known live region servers
517     Collection<ServerName> regionServers = status.getServers();
518     errors.print("Number of live region servers: " + regionServers.size());
519     if (details) {
520       for (ServerName rsinfo: regionServers) {
521         errors.print("  " + rsinfo.getServerName());
522       }
523     }
524 
525     // From the master, get a list of all dead region servers
526     Collection<ServerName> deadRegionServers = status.getDeadServerNames();
527     errors.print("Number of dead region servers: " + deadRegionServers.size());
528     if (details) {
529       for (ServerName name: deadRegionServers) {
530         errors.print("  " + name);
531       }
532     }
533 
534     // Print the current master name and state
535     errors.print("Master: " + status.getMaster());
536 
537     // Print the list of all backup masters
538     Collection<ServerName> backupMasters = status.getBackupMasters();
539     errors.print("Number of backup masters: " + backupMasters.size());
540     if (details) {
541       for (ServerName name: backupMasters) {
542         errors.print("  " + name);
543       }
544     }
545 
546     errors.print("Average load: " + status.getAverageLoad());
547     errors.print("Number of requests: " + status.getRequestsCount());
548     errors.print("Number of regions: " + status.getRegionsCount());
549 
550     Map<String, RegionState> rits = status.getRegionsInTransition();
551     errors.print("Number of regions in transition: " + rits.size());
552     if (details) {
553       for (RegionState state: rits.values()) {
554         errors.print("  " + state.toDescriptiveString());
555       }
556     }
557 
558     // Determine what's deployed
559     processRegionServers(regionServers);
560   }
561 
562   /**
563    * Clear the current state of hbck.
564    */
565   private void clearState() {
566     // Make sure regionInfo is empty before starting
567     fixes = 0;
568     regionInfoMap.clear();
569     emptyRegionInfoQualifiers.clear();
570     disabledTables.clear();
571     errors.clear();
572     tablesInfo.clear();
573     orphanHdfsDirs.clear();
574     skippedRegions.clear();
575   }
576 
577   /**
578    * This repair method analyzes hbase data in hdfs and repairs it to satisfy
579    * the table integrity rules.  HBase doesn't need to be online for this
580    * operation to work.
581    */
582   public void offlineHdfsIntegrityRepair() throws IOException, InterruptedException {
583     // Initial pass to fix orphans.
584     if (shouldCheckHdfs() && (shouldFixHdfsOrphans() || shouldFixHdfsHoles()
585         || shouldFixHdfsOverlaps() || shouldFixTableOrphans())) {
586       LOG.info("Loading regioninfos HDFS");
587       // if nothing is happening this should always complete in two iterations.
588       int maxIterations = getConf().getInt("hbase.hbck.integrityrepair.iterations.max", 3);
589       int curIter = 0;
590       do {
591         clearState(); // clears hbck state and reset fixes to 0 and.
592         // repair what's on HDFS
593         restoreHdfsIntegrity();
594         curIter++;// limit the number of iterations.
595       } while (fixes > 0 && curIter <= maxIterations);
596 
597       // Repairs should be done in the first iteration and verification in the second.
598       // If there are more than 2 passes, something funny has happened.
599       if (curIter > 2) {
600         if (curIter == maxIterations) {
601           LOG.warn("Exiting integrity repairs after max " + curIter + " iterations. "
602               + "Tables integrity may not be fully repaired!");
603         } else {
604           LOG.info("Successfully exiting integrity repairs after " + curIter + " iterations");
605         }
606       }
607     }
608   }
609 
610   /**
611    * This repair method requires the cluster to be online since it contacts
612    * region servers and the masters.  It makes each region's state in HDFS, in
613    * hbase:meta, and deployments consistent.
614    *
615    * @return If > 0 , number of errors detected, if < 0 there was an unrecoverable
616    * error.  If 0, we have a clean hbase.
617    */
618   public int onlineConsistencyRepair() throws IOException, KeeperException,
619     InterruptedException {
620     clearState();
621 
622     // get regions according to what is online on each RegionServer
623     loadDeployedRegions();
624     // check whether hbase:meta is deployed and online
625     recordMetaRegion();
626     // Check if hbase:meta is found only once and in the right place
627     if (!checkMetaRegion()) {
628       String errorMsg = "hbase:meta table is not consistent. ";
629       if (shouldFixAssignments()) {
630         errorMsg += "HBCK will try fixing it. Rerun once hbase:meta is back to consistent state.";
631       } else {
632         errorMsg += "Run HBCK with proper fix options to fix hbase:meta inconsistency.";
633       }
634       errors.reportError(errorMsg + " Exiting...");
635       return -2;
636     }
637     // Not going with further consistency check for tables when hbase:meta itself is not consistent.
638     LOG.info("Loading regionsinfo from the hbase:meta table");
639     boolean success = loadMetaEntries();
640     if (!success) return -1;
641 
642     // Empty cells in hbase:meta?
643     reportEmptyMetaCells();
644 
645     // Check if we have to cleanup empty REGIONINFO_QUALIFIER rows from hbase:meta
646     if (shouldFixEmptyMetaCells()) {
647       fixEmptyMetaCells();
648     }
649 
650     // get a list of all tables that have not changed recently.
651     if (!checkMetaOnly) {
652       reportTablesInFlux();
653     }
654 
655     // load regiondirs and regioninfos from HDFS
656     if (shouldCheckHdfs()) {
657       LOG.info("Loading region directories from HDFS");
658       loadHdfsRegionDirs();
659       LOG.info("Loading region information from HDFS");
660       loadHdfsRegionInfos();
661     }
662 
663     // Get disabled tables from ZooKeeper
664     loadDisabledTables();
665 
666     // fix the orphan tables
667     fixOrphanTables();
668 
669     LOG.info("Checking and fixing region consistency");
670 
671     // Check and fix consistency
672     checkAndFixConsistency();
673 
674     // Check integrity (does not fix)
675     checkIntegrity();
676     return errors.getErrorList().size();
677   }
678 
679   /**
680    * Contacts the master and prints out cluster-wide information
681    * @return 0 on success, non-zero on failure
682    */
683   public int onlineHbck() throws IOException, KeeperException, InterruptedException, ServiceException {
684     // print hbase server version
685     errors.print("Version: " + status.getHBaseVersion());
686     offlineHdfsIntegrityRepair();
687 
688     // turn the balancer off
689     boolean oldBalancer = admin.setBalancerRunning(false, true);
690     boolean[] oldSplitAndMerge = null;
691     if (shouldDisableSplitAndMerge()) {
692       oldSplitAndMerge = admin.setSplitOrMergeEnabled(false, false,
693         Admin.MasterSwitchType.SPLIT, Admin.MasterSwitchType.MERGE);
694     }
695 
696     try {
697       onlineConsistencyRepair();
698     }
699     finally {
700       admin.setBalancerRunning(oldBalancer, false);
701 
702       if (shouldDisableSplitAndMerge()) {
703         if (oldSplitAndMerge != null) {
704           if (oldSplitAndMerge[0] && oldSplitAndMerge[1]) {
705             admin.setSplitOrMergeEnabled(true, false,
706               Admin.MasterSwitchType.SPLIT, Admin.MasterSwitchType.MERGE);
707           } else if (oldSplitAndMerge[0]) {
708             admin.setSplitOrMergeEnabled(true, false, Admin.MasterSwitchType.SPLIT);
709           } else if (oldSplitAndMerge[1]) {
710             admin.setSplitOrMergeEnabled(true, false, Admin.MasterSwitchType.MERGE);
711           }
712         }
713       }
714     }
715 
716     if (checkRegionBoundaries) {
717       checkRegionBoundaries();
718     }
719 
720     offlineReferenceFileRepair();
721 
722     checkAndFixTableLocks();
723 
724     // Check (and fix if requested) orphaned table ZNodes
725     checkAndFixOrphanedTableZNodes();
726 
727     // Remove the hbck lock
728     unlockHbck();
729 
730     // Print table summary
731     printTableSummary(tablesInfo);
732     return errors.summarize();
733   }
734 
735   public static byte[] keyOnly (byte[] b) {
736     if (b == null)
737       return b;
738     int rowlength = Bytes.toShort(b, 0);
739     byte[] result = new byte[rowlength];
740     System.arraycopy(b, Bytes.SIZEOF_SHORT, result, 0, rowlength);
741     return result;
742   }
743 
744   @Override
745   public void close() throws IOException {
746     IOUtils.cleanup(null, admin, meta, connection);
747   }
748 
749   private static class RegionBoundariesInformation {
750     public byte [] regionName;
751     public byte [] metaFirstKey;
752     public byte [] metaLastKey;
753     public byte [] storesFirstKey;
754     public byte [] storesLastKey;
755     @Override
756     public String toString () {
757       return "regionName=" + Bytes.toStringBinary(regionName) +
758              "\nmetaFirstKey=" + Bytes.toStringBinary(metaFirstKey) +
759              "\nmetaLastKey=" + Bytes.toStringBinary(metaLastKey) +
760              "\nstoresFirstKey=" + Bytes.toStringBinary(storesFirstKey) +
761              "\nstoresLastKey=" + Bytes.toStringBinary(storesLastKey);
762     }
763   }
764 
765   public void checkRegionBoundaries() {
766     try {
767       ByteArrayComparator comparator = new ByteArrayComparator();
768       List<HRegionInfo> regions = MetaScanner.listAllRegions(getConf(), connection, false);
769       final RegionBoundariesInformation currentRegionBoundariesInformation =
770           new RegionBoundariesInformation();
771       Path hbaseRoot = FSUtils.getRootDir(getConf());
772       for (HRegionInfo regionInfo : regions) {
773         Path tableDir = FSUtils.getTableDir(hbaseRoot, regionInfo.getTable());
774         currentRegionBoundariesInformation.regionName = regionInfo.getRegionName();
775         // For each region, get the start and stop key from the META and compare them to the
776         // same information from the Stores.
777         Path path = new Path(tableDir, regionInfo.getEncodedName());
778         FileSystem fs = path.getFileSystem(getConf());
779         FileStatus[] files = fs.listStatus(path);
780         // For all the column families in this region...
781         byte[] storeFirstKey = null;
782         byte[] storeLastKey = null;
783         for (FileStatus file : files) {
784           String fileName = file.getPath().toString();
785           fileName = fileName.substring(fileName.lastIndexOf("/") + 1);
786           if (!fileName.startsWith(".") && !fileName.endsWith("recovered.edits")) {
787             FileStatus[] storeFiles = fs.listStatus(file.getPath());
788             // For all the stores in this column family.
789             for (FileStatus storeFile : storeFiles) {
790               HFile.Reader reader = HFile.createReader(fs, storeFile.getPath(), new CacheConfig(
791                   getConf()), getConf());
792               if ((reader.getFirstKey() != null)
793                   && ((storeFirstKey == null) || (comparator.compare(storeFirstKey,
794                       reader.getFirstKey()) > 0))) {
795                 storeFirstKey = reader.getFirstKey();
796               }
797               if ((reader.getLastKey() != null)
798                   && ((storeLastKey == null) || (comparator.compare(storeLastKey,
799                       reader.getLastKey())) < 0)) {
800                 storeLastKey = reader.getLastKey();
801               }
802               reader.close();
803             }
804           }
805         }
806         currentRegionBoundariesInformation.metaFirstKey = regionInfo.getStartKey();
807         currentRegionBoundariesInformation.metaLastKey = regionInfo.getEndKey();
808         currentRegionBoundariesInformation.storesFirstKey = keyOnly(storeFirstKey);
809         currentRegionBoundariesInformation.storesLastKey = keyOnly(storeLastKey);
810         if (currentRegionBoundariesInformation.metaFirstKey.length == 0)
811           currentRegionBoundariesInformation.metaFirstKey = null;
812         if (currentRegionBoundariesInformation.metaLastKey.length == 0)
813           currentRegionBoundariesInformation.metaLastKey = null;
814 
815         // For a region to be correct, we need the META start key to be smaller or equal to the
816         // smallest start key from all the stores, and the start key from the next META entry to
817         // be bigger than the last key from all the current stores. First region start key is null;
818         // Last region end key is null; some regions can be empty and not have any store.
819 
820         boolean valid = true;
821         // Checking start key.
822         if ((currentRegionBoundariesInformation.storesFirstKey != null)
823             && (currentRegionBoundariesInformation.metaFirstKey != null)) {
824           valid = valid
825               && comparator.compare(currentRegionBoundariesInformation.storesFirstKey,
826                 currentRegionBoundariesInformation.metaFirstKey) >= 0;
827         }
828         // Checking stop key.
829         if ((currentRegionBoundariesInformation.storesLastKey != null)
830             && (currentRegionBoundariesInformation.metaLastKey != null)) {
831           valid = valid
832               && comparator.compare(currentRegionBoundariesInformation.storesLastKey,
833                 currentRegionBoundariesInformation.metaLastKey) < 0;
834         }
835         if (!valid) {
836           errors.reportError(ERROR_CODE.BOUNDARIES_ERROR, "Found issues with regions boundaries",
837             tablesInfo.get(regionInfo.getTable()));
838           LOG.warn("Region's boundaries not alligned between stores and META for:");
839           LOG.warn(currentRegionBoundariesInformation);
840         }
841       }
842     } catch (IOException e) {
843       LOG.error(e);
844     }
845   }
846 
847   /**
848    * Iterates through the list of all orphan/invalid regiondirs.
849    */
850   private void adoptHdfsOrphans(Collection<HbckInfo> orphanHdfsDirs) throws IOException {
851     for (HbckInfo hi : orphanHdfsDirs) {
852       LOG.info("Attempting to handle orphan hdfs dir: " + hi.getHdfsRegionDir());
853       adoptHdfsOrphan(hi);
854     }
855   }
856 
857   /**
858    * Orphaned regions are regions without a .regioninfo file in them.  We "adopt"
859    * these orphans by creating a new region, and moving the column families,
860    * recovered edits, WALs, into the new region dir.  We determine the region
861    * startkey and endkeys by looking at all of the hfiles inside the column
862    * families to identify the min and max keys. The resulting region will
863    * likely violate table integrity but will be dealt with by merging
864    * overlapping regions.
865    */
866   @SuppressWarnings("deprecation")
867   private void adoptHdfsOrphan(HbckInfo hi) throws IOException {
868     Path p = hi.getHdfsRegionDir();
869     FileSystem fs = p.getFileSystem(getConf());
870     FileStatus[] dirs = fs.listStatus(p);
871     if (dirs == null) {
872       LOG.warn("Attempt to adopt ophan hdfs region skipped becuase no files present in " +
873           p + ". This dir could probably be deleted.");
874       return ;
875     }
876 
877     TableName tableName = hi.getTableName();
878     TableInfo tableInfo = tablesInfo.get(tableName);
879     Preconditions.checkNotNull(tableInfo, "Table '" + tableName + "' not present!");
880     HTableDescriptor template = tableInfo.getHTD();
881 
882     // find min and max key values
883     Pair<byte[],byte[]> orphanRegionRange = null;
884     for (FileStatus cf : dirs) {
885       String cfName= cf.getPath().getName();
886       // TODO Figure out what the special dirs are
887       if (cfName.startsWith(".") || cfName.equals(HConstants.SPLIT_LOGDIR_NAME)) continue;
888 
889       FileStatus[] hfiles = fs.listStatus(cf.getPath());
890       for (FileStatus hfile : hfiles) {
891         byte[] start, end;
892         HFile.Reader hf = null;
893         try {
894           CacheConfig cacheConf = new CacheConfig(getConf());
895           hf = HFile.createReader(fs, hfile.getPath(), cacheConf, getConf());
896           hf.loadFileInfo();
897           KeyValue startKv = KeyValue.createKeyValueFromKey(hf.getFirstKey());
898           start = startKv.getRow();
899           KeyValue endKv = KeyValue.createKeyValueFromKey(hf.getLastKey());
900           end = endKv.getRow();
901         } catch (IOException ioe) {
902           LOG.warn("Problem reading orphan file " + hfile + ", skipping");
903           continue;
904         } catch (NullPointerException ioe) {
905           LOG.warn("Orphan file " + hfile + " is possibly corrupted HFile, skipping");
906           continue;
907         } finally {
908           if (hf != null) {
909             hf.close();
910           }
911         }
912 
913         // expand the range to include the range of all hfiles
914         if (orphanRegionRange == null) {
915           // first range
916           orphanRegionRange = new Pair<byte[], byte[]>(start, end);
917         } else {
918           // TODO add test
919 
920           // expand range only if the hfile is wider.
921           if (Bytes.compareTo(orphanRegionRange.getFirst(), start) > 0) {
922             orphanRegionRange.setFirst(start);
923           }
924           if (Bytes.compareTo(orphanRegionRange.getSecond(), end) < 0 ) {
925             orphanRegionRange.setSecond(end);
926           }
927         }
928       }
929     }
930     if (orphanRegionRange == null) {
931       LOG.warn("No data in dir " + p + ", sidelining data");
932       fixes++;
933       sidelineRegionDir(fs, hi);
934       return;
935     }
936     LOG.info("Min max keys are : [" + Bytes.toString(orphanRegionRange.getFirst()) + ", " +
937         Bytes.toString(orphanRegionRange.getSecond()) + ")");
938 
939     // create new region on hdfs.  move data into place.
940     HRegionInfo hri = new HRegionInfo(template.getTableName(), orphanRegionRange.getFirst(),
941       Bytes.add(orphanRegionRange.getSecond(), new byte[1]));
942     LOG.info("Creating new region : " + hri);
943     HRegion region = HBaseFsckRepair.createHDFSRegionDir(getConf(), hri, template);
944     Path target = region.getRegionFileSystem().getRegionDir();
945 
946     // rename all the data to new region
947     mergeRegionDirs(target, hi);
948     fixes++;
949   }
950 
951   /**
952    * This method determines if there are table integrity errors in HDFS.  If
953    * there are errors and the appropriate "fix" options are enabled, the method
954    * will first correct orphan regions making them into legit regiondirs, and
955    * then reload to merge potentially overlapping regions.
956    *
957    * @return number of table integrity errors found
958    */
959   private int restoreHdfsIntegrity() throws IOException, InterruptedException {
960     // Determine what's on HDFS
961     LOG.info("Loading HBase regioninfo from HDFS...");
962     loadHdfsRegionDirs(); // populating regioninfo table.
963 
964     int errs = errors.getErrorList().size();
965     // First time just get suggestions.
966     tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
967     checkHdfsIntegrity(false, false);
968 
969     if (errors.getErrorList().size() == errs) {
970       LOG.info("No integrity errors.  We are done with this phase. Glorious.");
971       return 0;
972     }
973 
974     if (shouldFixHdfsOrphans() && orphanHdfsDirs.size() > 0) {
975       adoptHdfsOrphans(orphanHdfsDirs);
976       // TODO optimize by incrementally adding instead of reloading.
977     }
978 
979     // Make sure there are no holes now.
980     if (shouldFixHdfsHoles()) {
981       clearState(); // this also resets # fixes.
982       loadHdfsRegionDirs();
983       tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
984       tablesInfo = checkHdfsIntegrity(shouldFixHdfsHoles(), false);
985     }
986 
987     // Now we fix overlaps
988     if (shouldFixHdfsOverlaps()) {
989       // second pass we fix overlaps.
990       clearState(); // this also resets # fixes.
991       loadHdfsRegionDirs();
992       tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
993       tablesInfo = checkHdfsIntegrity(false, shouldFixHdfsOverlaps());
994     }
995 
996     return errors.getErrorList().size();
997   }
998 
999   /**
1000    * Scan all the store file names to find any lingering reference files,
1001    * which refer to some none-exiting files. If "fix" option is enabled,
1002    * any lingering reference file will be sidelined if found.
1003    * <p>
1004    * Lingering reference file prevents a region from opening. It has to
1005    * be fixed before a cluster can start properly.
1006    */
1007   private void offlineReferenceFileRepair() throws IOException, InterruptedException {
1008     Configuration conf = getConf();
1009     Path hbaseRoot = FSUtils.getRootDir(conf);
1010     FileSystem fs = hbaseRoot.getFileSystem(conf);
1011     LOG.info("Computing mapping of all store files");
1012     Map<String, Path> allFiles = FSUtils.getTableStoreFilePathMap(fs, hbaseRoot,
1013       new FSUtils.ReferenceFileFilter(fs), executor, errors);
1014     errors.print("");
1015     LOG.info("Validating mapping using HDFS state");
1016     for (Path path: allFiles.values()) {
1017       Path referredToFile = StoreFileInfo.getReferredToFile(path);
1018       if (fs.exists(referredToFile)) continue;  // good, expected
1019 
1020       // Found a lingering reference file
1021       errors.reportError(ERROR_CODE.LINGERING_REFERENCE_HFILE,
1022         "Found lingering reference file " + path);
1023       if (!shouldFixReferenceFiles()) continue;
1024 
1025       // Now, trying to fix it since requested
1026       boolean success = false;
1027       String pathStr = path.toString();
1028 
1029       // A reference file path should be like
1030       // ${hbase.rootdir}/data/namespace/table_name/region_id/family_name/referred_file.region_name
1031       // Up 5 directories to get the root folder.
1032       // So the file will be sidelined to a similar folder structure.
1033       int index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR);
1034       for (int i = 0; index > 0 && i < 5; i++) {
1035         index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR, index - 1);
1036       }
1037       if (index > 0) {
1038         Path rootDir = getSidelineDir();
1039         Path dst = new Path(rootDir, pathStr.substring(index + 1));
1040         fs.mkdirs(dst.getParent());
1041         LOG.info("Trying to sildeline reference file "
1042           + path + " to " + dst);
1043         setShouldRerun();
1044 
1045         success = fs.rename(path, dst);
1046       }
1047       if (!success) {
1048         LOG.error("Failed to sideline reference file " + path);
1049       }
1050     }
1051   }
1052 
1053   /**
1054    * TODO -- need to add tests for this.
1055    */
1056   private void reportEmptyMetaCells() {
1057     errors.print("Number of empty REGIONINFO_QUALIFIER rows in hbase:meta: " +
1058       emptyRegionInfoQualifiers.size());
1059     if (details) {
1060       for (Result r: emptyRegionInfoQualifiers) {
1061         errors.print("  " + r);
1062       }
1063     }
1064   }
1065 
1066   /**
1067    * TODO -- need to add tests for this.
1068    */
1069   private void reportTablesInFlux() {
1070     AtomicInteger numSkipped = new AtomicInteger(0);
1071     HTableDescriptor[] allTables = getTables(numSkipped);
1072     errors.print("Number of Tables: " + allTables.length);
1073     if (details) {
1074       if (numSkipped.get() > 0) {
1075         errors.detail("Number of Tables in flux: " + numSkipped.get());
1076       }
1077       for (HTableDescriptor td : allTables) {
1078         errors.detail("  Table: " + td.getTableName() + "\t" +
1079                            (td.isReadOnly() ? "ro" : "rw") + "\t" +
1080                             (td.isMetaRegion() ? "META" : "    ") + "\t" +
1081                            " families: " + td.getFamilies().size());
1082       }
1083     }
1084   }
1085 
1086   public ErrorReporter getErrors() {
1087     return errors;
1088   }
1089 
1090   /**
1091    * Read the .regioninfo file from the file system.  If there is no
1092    * .regioninfo, add it to the orphan hdfs region list.
1093    */
1094   private void loadHdfsRegioninfo(HbckInfo hbi) throws IOException {
1095     Path regionDir = hbi.getHdfsRegionDir();
1096     if (regionDir == null) {
1097       LOG.warn("No HDFS region dir found: " + hbi + " meta=" + hbi.metaEntry);
1098       return;
1099     }
1100 
1101     if (hbi.hdfsEntry.hri != null) {
1102       // already loaded data
1103       return;
1104     }
1105 
1106     FileSystem fs = FileSystem.get(getConf());
1107     HRegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir);
1108     LOG.debug("HRegionInfo read: " + hri.toString());
1109     hbi.hdfsEntry.hri = hri;
1110   }
1111 
1112   /**
1113    * Exception thrown when a integrity repair operation fails in an
1114    * unresolvable way.
1115    */
1116   public static class RegionRepairException extends IOException {
1117     private static final long serialVersionUID = 1L;
1118     final IOException ioe;
1119     public RegionRepairException(String s, IOException ioe) {
1120       super(s);
1121       this.ioe = ioe;
1122     }
1123   }
1124 
1125   /**
1126    * Populate hbi's from regionInfos loaded from file system.
1127    */
1128   private SortedMap<TableName, TableInfo> loadHdfsRegionInfos()
1129       throws IOException, InterruptedException {
1130     tablesInfo.clear(); // regenerating the data
1131     // generate region split structure
1132     Collection<HbckInfo> hbckInfos = regionInfoMap.values();
1133 
1134     // Parallelized read of .regioninfo files.
1135     List<WorkItemHdfsRegionInfo> hbis = new ArrayList<WorkItemHdfsRegionInfo>(hbckInfos.size());
1136     List<Future<Void>> hbiFutures;
1137 
1138     for (HbckInfo hbi : hbckInfos) {
1139       WorkItemHdfsRegionInfo work = new WorkItemHdfsRegionInfo(hbi, this, errors);
1140       hbis.add(work);
1141     }
1142 
1143     // Submit and wait for completion
1144     hbiFutures = executor.invokeAll(hbis);
1145 
1146     for(int i=0; i<hbiFutures.size(); i++) {
1147       WorkItemHdfsRegionInfo work = hbis.get(i);
1148       Future<Void> f = hbiFutures.get(i);
1149       try {
1150         f.get();
1151       } catch(ExecutionException e) {
1152         LOG.warn("Failed to read .regioninfo file for region " +
1153               work.hbi.getRegionNameAsString(), e.getCause());
1154       }
1155     }
1156 
1157     Path hbaseRoot = FSUtils.getRootDir(getConf());
1158     FileSystem fs = hbaseRoot.getFileSystem(getConf());
1159     // serialized table info gathering.
1160     for (HbckInfo hbi: hbckInfos) {
1161 
1162       if (hbi.getHdfsHRI() == null) {
1163         // was an orphan
1164         continue;
1165       }
1166 
1167 
1168       // get table name from hdfs, populate various HBaseFsck tables.
1169       TableName tableName = hbi.getTableName();
1170       if (tableName == null) {
1171         // There was an entry in hbase:meta not in the HDFS?
1172         LOG.warn("tableName was null for: " + hbi);
1173         continue;
1174       }
1175 
1176       TableInfo modTInfo = tablesInfo.get(tableName);
1177       if (modTInfo == null) {
1178         // only executed once per table.
1179         modTInfo = new TableInfo(tableName);
1180         tablesInfo.put(tableName, modTInfo);
1181         try {
1182           HTableDescriptor htd =
1183               FSTableDescriptors.getTableDescriptorFromFs(fs, hbaseRoot, tableName);
1184           modTInfo.htds.add(htd);
1185         } catch (IOException ioe) {
1186           if (!orphanTableDirs.containsKey(tableName)) {
1187             LOG.warn("Unable to read .tableinfo from " + hbaseRoot, ioe);
1188             //should only report once for each table
1189             errors.reportError(ERROR_CODE.NO_TABLEINFO_FILE,
1190                 "Unable to read .tableinfo from " + hbaseRoot + "/" + tableName);
1191             Set<String> columns = new HashSet<String>();
1192             orphanTableDirs.put(tableName, getColumnFamilyList(columns, hbi));
1193           }
1194         }
1195       }
1196       if (!hbi.isSkipChecks()) {
1197         modTInfo.addRegionInfo(hbi);
1198       }
1199     }
1200 
1201     loadTableInfosForTablesWithNoRegion();
1202     errors.print("");
1203 
1204     return tablesInfo;
1205   }
1206 
1207   /**
1208    * To get the column family list according to the column family dirs
1209    * @param columns
1210    * @param hbi
1211    * @return a set of column families
1212    * @throws IOException
1213    */
1214   private Set<String> getColumnFamilyList(Set<String> columns, HbckInfo hbi) throws IOException {
1215     Path regionDir = hbi.getHdfsRegionDir();
1216     FileSystem fs = regionDir.getFileSystem(getConf());
1217     FileStatus[] subDirs = fs.listStatus(regionDir, new FSUtils.FamilyDirFilter(fs));
1218     for (FileStatus subdir : subDirs) {
1219       String columnfamily = subdir.getPath().getName();
1220       columns.add(columnfamily);
1221     }
1222     return columns;
1223   }
1224 
1225   /**
1226    * To fabricate a .tableinfo file with following contents<br>
1227    * 1. the correct tablename <br>
1228    * 2. the correct colfamily list<br>
1229    * 3. the default properties for both {@link HTableDescriptor} and {@link HColumnDescriptor}<br>
1230    * @throws IOException
1231    */
1232   private boolean fabricateTableInfo(FSTableDescriptors fstd, TableName tableName,
1233       Set<String> columns) throws IOException {
1234     if (columns ==null || columns.isEmpty()) return false;
1235     HTableDescriptor htd = new HTableDescriptor(tableName);
1236     for (String columnfamimly : columns) {
1237       htd.addFamily(new HColumnDescriptor(columnfamimly));
1238     }
1239     fstd.createTableDescriptor(htd, true);
1240     return true;
1241   }
1242 
1243   /**
1244    * To fix the empty REGIONINFO_QUALIFIER rows from hbase:meta <br>
1245    * @throws IOException
1246    */
1247   public void fixEmptyMetaCells() throws IOException {
1248     if (shouldFixEmptyMetaCells() && !emptyRegionInfoQualifiers.isEmpty()) {
1249       LOG.info("Trying to fix empty REGIONINFO_QUALIFIER hbase:meta rows.");
1250       for (Result region : emptyRegionInfoQualifiers) {
1251         deleteMetaRegion(region.getRow());
1252         errors.getErrorList().remove(ERROR_CODE.EMPTY_META_CELL);
1253       }
1254       emptyRegionInfoQualifiers.clear();
1255     }
1256   }
1257 
1258   /**
1259    * To fix orphan table by creating a .tableinfo file under tableDir <br>
1260    * 1. if TableInfo is cached, to recover the .tableinfo accordingly <br>
1261    * 2. else create a default .tableinfo file with following items<br>
1262    * &nbsp;2.1 the correct tablename <br>
1263    * &nbsp;2.2 the correct colfamily list<br>
1264    * &nbsp;2.3 the default properties for both {@link HTableDescriptor} and {@link HColumnDescriptor}<br>
1265    * @throws IOException
1266    */
1267   public void fixOrphanTables() throws IOException {
1268     if (shouldFixTableOrphans() && !orphanTableDirs.isEmpty()) {
1269 
1270       List<TableName> tmpList = new ArrayList<TableName>();
1271       tmpList.addAll(orphanTableDirs.keySet());
1272       HTableDescriptor[] htds = getHTableDescriptors(tmpList);
1273       Iterator<Entry<TableName, Set<String>>> iter =
1274           orphanTableDirs.entrySet().iterator();
1275       int j = 0;
1276       int numFailedCase = 0;
1277       FSTableDescriptors fstd = new FSTableDescriptors(getConf());
1278       while (iter.hasNext()) {
1279         Entry<TableName, Set<String>> entry =
1280             iter.next();
1281         TableName tableName = entry.getKey();
1282         LOG.info("Trying to fix orphan table error: " + tableName);
1283         if (j < htds.length) {
1284           if (tableName.equals(htds[j].getTableName())) {
1285             HTableDescriptor htd = htds[j];
1286             LOG.info("fixing orphan table: " + tableName + " from cache");
1287             fstd.createTableDescriptor(htd, true);
1288             j++;
1289             iter.remove();
1290           }
1291         } else {
1292           if (fabricateTableInfo(fstd, tableName, entry.getValue())) {
1293             LOG.warn("fixing orphan table: " + tableName + " with a default .tableinfo file");
1294             LOG.warn("Strongly recommend to modify the HTableDescriptor if necessary for: " + tableName);
1295             iter.remove();
1296           } else {
1297             LOG.error("Unable to create default .tableinfo for " + tableName + " while missing column family information");
1298             numFailedCase++;
1299           }
1300         }
1301         fixes++;
1302       }
1303 
1304       if (orphanTableDirs.isEmpty()) {
1305         // all orphanTableDirs are luckily recovered
1306         // re-run doFsck after recovering the .tableinfo file
1307         setShouldRerun();
1308         LOG.warn("Strongly recommend to re-run manually hfsck after all orphanTableDirs being fixed");
1309       } else if (numFailedCase > 0) {
1310         LOG.error("Failed to fix " + numFailedCase
1311             + " OrphanTables with default .tableinfo files");
1312       }
1313 
1314     }
1315     //cleanup the list
1316     orphanTableDirs.clear();
1317 
1318   }
1319 
1320   /**
1321    * This borrows code from MasterFileSystem.bootstrap()
1322    *
1323    * @return an open hbase:meta HRegion
1324    */
1325   private HRegion createNewMeta() throws IOException {
1326       Path rootdir = FSUtils.getRootDir(getConf());
1327     Configuration c = getConf();
1328     HRegionInfo metaHRI = new HRegionInfo(HRegionInfo.FIRST_META_REGIONINFO);
1329     HTableDescriptor metaDescriptor = new FSTableDescriptors(c).get(TableName.META_TABLE_NAME);
1330     MasterFileSystem.setInfoFamilyCachingForMeta(metaDescriptor, false);
1331     HRegion meta = HRegion.createHRegion(metaHRI, rootdir, c, metaDescriptor);
1332     MasterFileSystem.setInfoFamilyCachingForMeta(metaDescriptor, true);
1333     return meta;
1334   }
1335 
1336   /**
1337    * Generate set of puts to add to new meta.  This expects the tables to be
1338    * clean with no overlaps or holes.  If there are any problems it returns null.
1339    *
1340    * @return An array list of puts to do in bulk, null if tables have problems
1341    */
1342   private ArrayList<Put> generatePuts(
1343       SortedMap<TableName, TableInfo> tablesInfo) throws IOException {
1344     ArrayList<Put> puts = new ArrayList<Put>();
1345     boolean hasProblems = false;
1346     for (Entry<TableName, TableInfo> e : tablesInfo.entrySet()) {
1347       TableName name = e.getKey();
1348 
1349       // skip "hbase:meta"
1350       if (name.compareTo(TableName.META_TABLE_NAME) == 0) {
1351         continue;
1352       }
1353 
1354       TableInfo ti = e.getValue();
1355       for (Entry<byte[], Collection<HbckInfo>> spl : ti.sc.getStarts().asMap()
1356           .entrySet()) {
1357         Collection<HbckInfo> his = spl.getValue();
1358         int sz = his.size();
1359         if (sz != 1) {
1360           // problem
1361           LOG.error("Split starting at " + Bytes.toStringBinary(spl.getKey())
1362               + " had " +  sz + " regions instead of exactly 1." );
1363           hasProblems = true;
1364           continue;
1365         }
1366 
1367         // add the row directly to meta.
1368         HbckInfo hi = his.iterator().next();
1369         HRegionInfo hri = hi.getHdfsHRI(); // hi.metaEntry;
1370         Put p = MetaTableAccessor.makePutFromRegionInfo(hri);
1371         puts.add(p);
1372       }
1373     }
1374     return hasProblems ? null : puts;
1375   }
1376 
1377   /**
1378    * Suggest fixes for each table
1379    */
1380   private void suggestFixes(
1381       SortedMap<TableName, TableInfo> tablesInfo) throws IOException {
1382     logParallelMerge();
1383     for (TableInfo tInfo : tablesInfo.values()) {
1384       TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1385       tInfo.checkRegionChain(handler);
1386     }
1387   }
1388 
1389   /**
1390    * Rebuilds meta from information in hdfs/fs.  Depends on configuration
1391    * settings passed into hbck constructor to point to a particular fs/dir.
1392    *
1393    * @param fix flag that determines if method should attempt to fix holes
1394    * @return true if successful, false if attempt failed.
1395    */
1396   public boolean rebuildMeta(boolean fix) throws IOException,
1397       InterruptedException {
1398 
1399     // TODO check to make sure hbase is offline. (or at least the table
1400     // currently being worked on is off line)
1401 
1402     // Determine what's on HDFS
1403     LOG.info("Loading HBase regioninfo from HDFS...");
1404     loadHdfsRegionDirs(); // populating regioninfo table.
1405 
1406     int errs = errors.getErrorList().size();
1407     tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1408     checkHdfsIntegrity(false, false);
1409 
1410     // make sure ok.
1411     if (errors.getErrorList().size() != errs) {
1412       // While in error state, iterate until no more fixes possible
1413       while(true) {
1414         fixes = 0;
1415         suggestFixes(tablesInfo);
1416         errors.clear();
1417         loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1418         checkHdfsIntegrity(shouldFixHdfsHoles(), shouldFixHdfsOverlaps());
1419 
1420         int errCount = errors.getErrorList().size();
1421 
1422         if (fixes == 0) {
1423           if (errCount > 0) {
1424             return false; // failed to fix problems.
1425           } else {
1426             break; // no fixes and no problems? drop out and fix stuff!
1427           }
1428         }
1429       }
1430     }
1431 
1432     // we can rebuild, move old meta out of the way and start
1433     LOG.info("HDFS regioninfo's seems good.  Sidelining old hbase:meta");
1434     Path backupDir = sidelineOldMeta();
1435 
1436     LOG.info("Creating new hbase:meta");
1437     HRegion meta = createNewMeta();
1438 
1439     // populate meta
1440     List<Put> puts = generatePuts(tablesInfo);
1441     if (puts == null) {
1442       LOG.fatal("Problem encountered when creating new hbase:meta entries.  " +
1443         "You may need to restore the previously sidelined hbase:meta");
1444       return false;
1445     }
1446     meta.batchMutate(puts.toArray(new Put[puts.size()]));
1447     HRegion.closeHRegion(meta);
1448     LOG.info("Success! hbase:meta table rebuilt.");
1449     LOG.info("Old hbase:meta is moved into " + backupDir);
1450     return true;
1451   }
1452 
1453   /**
1454    * Log an appropriate message about whether or not overlapping merges are computed in parallel.
1455    */
1456   private void logParallelMerge() {
1457     if (getConf().getBoolean("hbasefsck.overlap.merge.parallel", true)) {
1458       LOG.info("Handling overlap merges in parallel. set hbasefsck.overlap.merge.parallel to" +
1459           " false to run serially.");
1460     } else {
1461       LOG.info("Handling overlap merges serially.  set hbasefsck.overlap.merge.parallel to" +
1462           " true to run in parallel.");
1463     }
1464   }
1465 
1466   private SortedMap<TableName, TableInfo> checkHdfsIntegrity(boolean fixHoles,
1467       boolean fixOverlaps) throws IOException {
1468     LOG.info("Checking HBase region split map from HDFS data...");
1469     logParallelMerge();
1470     for (TableInfo tInfo : tablesInfo.values()) {
1471       TableIntegrityErrorHandler handler;
1472       if (fixHoles || fixOverlaps) {
1473         handler = tInfo.new HDFSIntegrityFixer(tInfo, errors, getConf(),
1474           fixHoles, fixOverlaps);
1475       } else {
1476         handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1477       }
1478       if (!tInfo.checkRegionChain(handler)) {
1479         // should dump info as well.
1480         errors.report("Found inconsistency in table " + tInfo.getName());
1481       }
1482     }
1483     return tablesInfo;
1484   }
1485 
1486   private Path getSidelineDir() throws IOException {
1487     if (sidelineDir == null) {
1488       Path hbaseDir = FSUtils.getRootDir(getConf());
1489       Path hbckDir = new Path(hbaseDir, HConstants.HBCK_SIDELINEDIR_NAME);
1490       sidelineDir = new Path(hbckDir, hbaseDir.getName() + "-"
1491           + startMillis);
1492     }
1493     return sidelineDir;
1494   }
1495 
1496   /**
1497    * Sideline a region dir (instead of deleting it)
1498    */
1499   Path sidelineRegionDir(FileSystem fs, HbckInfo hi) throws IOException {
1500     return sidelineRegionDir(fs, null, hi);
1501   }
1502 
1503   /**
1504    * Sideline a region dir (instead of deleting it)
1505    *
1506    * @param parentDir if specified, the region will be sidelined to
1507    * folder like .../parentDir/<table name>/<region name>. The purpose
1508    * is to group together similar regions sidelined, for example, those
1509    * regions should be bulk loaded back later on. If null, it is ignored.
1510    */
1511   Path sidelineRegionDir(FileSystem fs,
1512       String parentDir, HbckInfo hi) throws IOException {
1513     TableName tableName = hi.getTableName();
1514     Path regionDir = hi.getHdfsRegionDir();
1515 
1516     if (!fs.exists(regionDir)) {
1517       LOG.warn("No previous " + regionDir + " exists.  Continuing.");
1518       return null;
1519     }
1520 
1521     Path rootDir = getSidelineDir();
1522     if (parentDir != null) {
1523       rootDir = new Path(rootDir, parentDir);
1524     }
1525     Path sidelineTableDir= FSUtils.getTableDir(rootDir, tableName);
1526     Path sidelineRegionDir = new Path(sidelineTableDir, regionDir.getName());
1527     fs.mkdirs(sidelineRegionDir);
1528     boolean success = false;
1529     FileStatus[] cfs =  fs.listStatus(regionDir);
1530     if (cfs == null) {
1531       LOG.info("Region dir is empty: " + regionDir);
1532     } else {
1533       for (FileStatus cf : cfs) {
1534         Path src = cf.getPath();
1535         Path dst =  new Path(sidelineRegionDir, src.getName());
1536         if (fs.isFile(src)) {
1537           // simple file
1538           success = fs.rename(src, dst);
1539           if (!success) {
1540             String msg = "Unable to rename file " + src +  " to " + dst;
1541             LOG.error(msg);
1542             throw new IOException(msg);
1543           }
1544           continue;
1545         }
1546 
1547         // is a directory.
1548         fs.mkdirs(dst);
1549 
1550         LOG.info("Sidelining files from " + src + " into containing region " + dst);
1551         // FileSystem.rename is inconsistent with directories -- if the
1552         // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir,
1553         // it moves the src into the dst dir resulting in (foo/a/b).  If
1554         // the dst does not exist, and the src a dir, src becomes dst. (foo/b)
1555         FileStatus[] hfiles = fs.listStatus(src);
1556         if (hfiles != null && hfiles.length > 0) {
1557           for (FileStatus hfile : hfiles) {
1558             success = fs.rename(hfile.getPath(), dst);
1559             if (!success) {
1560               String msg = "Unable to rename file " + src +  " to " + dst;
1561               LOG.error(msg);
1562               throw new IOException(msg);
1563             }
1564           }
1565         }
1566         LOG.debug("Sideline directory contents:");
1567         debugLsr(sidelineRegionDir);
1568       }
1569     }
1570 
1571     LOG.info("Removing old region dir: " + regionDir);
1572     success = fs.delete(regionDir, true);
1573     if (!success) {
1574       String msg = "Unable to delete dir " + regionDir;
1575       LOG.error(msg);
1576       throw new IOException(msg);
1577     }
1578     return sidelineRegionDir;
1579   }
1580 
1581   /**
1582    * Side line an entire table.
1583    */
1584   void sidelineTable(FileSystem fs, TableName tableName, Path hbaseDir,
1585       Path backupHbaseDir) throws IOException {
1586     Path tableDir = FSUtils.getTableDir(hbaseDir, tableName);
1587     if (fs.exists(tableDir)) {
1588       Path backupTableDir= FSUtils.getTableDir(backupHbaseDir, tableName);
1589       fs.mkdirs(backupTableDir.getParent());
1590       boolean success = fs.rename(tableDir, backupTableDir);
1591       if (!success) {
1592         throw new IOException("Failed to move  " + tableName + " from "
1593             +  tableDir + " to " + backupTableDir);
1594       }
1595     } else {
1596       LOG.info("No previous " + tableName +  " exists.  Continuing.");
1597     }
1598   }
1599 
1600   /**
1601    * @return Path to backup of original directory
1602    */
1603   Path sidelineOldMeta() throws IOException {
1604     // put current hbase:meta aside.
1605     Path hbaseDir = FSUtils.getRootDir(getConf());
1606     FileSystem fs = hbaseDir.getFileSystem(getConf());
1607     Path backupDir = getSidelineDir();
1608     fs.mkdirs(backupDir);
1609 
1610     try {
1611       sidelineTable(fs, TableName.META_TABLE_NAME, hbaseDir, backupDir);
1612     } catch (IOException e) {
1613         LOG.fatal("... failed to sideline meta. Currently in inconsistent state.  To restore "
1614             + "try to rename hbase:meta in " + backupDir.getName() + " to "
1615             + hbaseDir.getName() + ".", e);
1616       throw e; // throw original exception
1617     }
1618     return backupDir;
1619   }
1620 
1621   /**
1622    * Load the list of disabled tables in ZK into local set.
1623    * @throws ZooKeeperConnectionException
1624    * @throws IOException
1625    */
1626   private void loadDisabledTables()
1627   throws ZooKeeperConnectionException, IOException {
1628     HConnectionManager.execute(new HConnectable<Void>(getConf()) {
1629       @Override
1630       public Void connect(HConnection connection) throws IOException {
1631         ZooKeeperWatcher zkw = createZooKeeperWatcher();
1632         try {
1633           for (TableName tableName :
1634               ZKTableStateClientSideReader.getDisabledOrDisablingTables(zkw)) {
1635             disabledTables.add(tableName);
1636           }
1637         } catch (KeeperException ke) {
1638           throw new IOException(ke);
1639         } catch (InterruptedException e) {
1640           throw new InterruptedIOException();
1641         } finally {
1642           zkw.close();
1643         }
1644         return null;
1645       }
1646     });
1647   }
1648 
1649   /**
1650    * Check if the specified region's table is disabled.
1651    */
1652   private boolean isTableDisabled(HRegionInfo regionInfo) {
1653     return disabledTables.contains(regionInfo.getTable());
1654   }
1655 
1656   /**
1657    * Scan HDFS for all regions, recording their information into
1658    * regionInfoMap
1659    */
1660   public void loadHdfsRegionDirs() throws IOException, InterruptedException {
1661     Path rootDir = FSUtils.getRootDir(getConf());
1662     FileSystem fs = rootDir.getFileSystem(getConf());
1663 
1664     // list all tables from HDFS
1665     List<FileStatus> tableDirs = Lists.newArrayList();
1666 
1667     boolean foundVersionFile = fs.exists(new Path(rootDir, HConstants.VERSION_FILE_NAME));
1668 
1669     List<Path> paths = FSUtils.getTableDirs(fs, rootDir);
1670     for (Path path : paths) {
1671       TableName tableName = FSUtils.getTableName(path);
1672        if ((!checkMetaOnly &&
1673            isTableIncluded(tableName)) ||
1674            tableName.equals(TableName.META_TABLE_NAME)) {
1675          tableDirs.add(fs.getFileStatus(path));
1676        }
1677     }
1678 
1679     // verify that version file exists
1680     if (!foundVersionFile) {
1681       errors.reportError(ERROR_CODE.NO_VERSION_FILE,
1682           "Version file does not exist in root dir " + rootDir);
1683       if (shouldFixVersionFile()) {
1684         LOG.info("Trying to create a new " + HConstants.VERSION_FILE_NAME
1685             + " file.");
1686         setShouldRerun();
1687         FSUtils.setVersion(fs, rootDir, getConf().getInt(
1688             HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000), getConf().getInt(
1689             HConstants.VERSION_FILE_WRITE_ATTEMPTS,
1690             HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS));
1691       }
1692     }
1693 
1694     // Avoid multithreading at table-level because already multithreaded internally at
1695     // region-level.  Additionally multithreading at table-level can lead to deadlock
1696     // if there are many tables in the cluster.  Since there are a limited # of threads
1697     // in the executor's thread pool and if we multithread at the table-level by putting
1698     // WorkItemHdfsDir callables into the executor, then we will have some threads in the
1699     // executor tied up solely in waiting for the tables' region-level calls to complete.
1700     // If there are enough tables then there will be no actual threads in the pool left
1701     // for the region-level callables to be serviced.
1702     for (FileStatus tableDir : tableDirs) {
1703       LOG.debug("Loading region dirs from " +tableDir.getPath());
1704       WorkItemHdfsDir item = new WorkItemHdfsDir(fs, errors, tableDir);
1705       try {
1706         item.call();
1707       } catch (ExecutionException e) {
1708         LOG.warn("Could not completely load table dir " +
1709             tableDir.getPath(), e.getCause());
1710       }
1711     }
1712     errors.print("");
1713   }
1714 
1715   /**
1716    * Record the location of the hbase:meta region as found in ZooKeeper.
1717    */
1718   private boolean recordMetaRegion() throws IOException {
1719     RegionLocations rl = ((ClusterConnection)connection).locateRegion(TableName.META_TABLE_NAME,
1720         HConstants.EMPTY_START_ROW, false, false);
1721     if (rl == null) {
1722       errors.reportError(ERROR_CODE.NULL_META_REGION,
1723           "META region or some of its attributes are null.");
1724       return false;
1725     }
1726     for (HRegionLocation metaLocation : rl.getRegionLocations()) {
1727       // Check if Meta region is valid and existing
1728       if (metaLocation == null || metaLocation.getRegionInfo() == null ||
1729           metaLocation.getHostname() == null) {
1730         errors.reportError(ERROR_CODE.NULL_META_REGION,
1731             "META region or some of its attributes are null.");
1732         return false;
1733       }
1734       ServerName sn = metaLocation.getServerName();
1735       MetaEntry m = new MetaEntry(metaLocation.getRegionInfo(), sn, EnvironmentEdgeManager.currentTime());
1736       HbckInfo hbckInfo = regionInfoMap.get(metaLocation.getRegionInfo().getEncodedName());
1737       if (hbckInfo == null) {
1738         regionInfoMap.put(metaLocation.getRegionInfo().getEncodedName(), new HbckInfo(m));
1739       } else {
1740         hbckInfo.metaEntry = m;
1741       }
1742     }
1743     return true;
1744   }
1745 
1746   private ZooKeeperWatcher createZooKeeperWatcher() throws IOException {
1747     return new ZooKeeperWatcher(getConf(), "hbase Fsck", new Abortable() {
1748       @Override
1749       public void abort(String why, Throwable e) {
1750         LOG.error(why, e);
1751         System.exit(1);
1752       }
1753 
1754       @Override
1755       public boolean isAborted() {
1756         return false;
1757       }
1758 
1759     });
1760   }
1761 
1762   private ServerName getMetaRegionServerName(int replicaId)
1763   throws IOException, KeeperException {
1764     ZooKeeperWatcher zkw = createZooKeeperWatcher();
1765     ServerName sn = null;
1766     try {
1767       sn = new MetaTableLocator().getMetaRegionLocation(zkw, replicaId);
1768     } finally {
1769       zkw.close();
1770     }
1771     return sn;
1772   }
1773 
1774   /**
1775    * Contacts each regionserver and fetches metadata about regions.
1776    * @param regionServerList - the list of region servers to connect to
1777    * @throws IOException if a remote or network exception occurs
1778    */
1779   void processRegionServers(Collection<ServerName> regionServerList)
1780     throws IOException, InterruptedException {
1781 
1782     List<WorkItemRegion> workItems = new ArrayList<WorkItemRegion>(regionServerList.size());
1783     List<Future<Void>> workFutures;
1784 
1785     // loop to contact each region server in parallel
1786     for (ServerName rsinfo: regionServerList) {
1787       workItems.add(new WorkItemRegion(this, rsinfo, errors, connection));
1788     }
1789 
1790     workFutures = executor.invokeAll(workItems);
1791 
1792     for(int i=0; i<workFutures.size(); i++) {
1793       WorkItemRegion item = workItems.get(i);
1794       Future<Void> f = workFutures.get(i);
1795       try {
1796         f.get();
1797       } catch(ExecutionException e) {
1798         LOG.warn("Could not process regionserver " + item.rsinfo.getHostAndPort(),
1799             e.getCause());
1800       }
1801     }
1802   }
1803 
1804   /**
1805    * Check consistency of all regions that have been found in previous phases.
1806    */
1807   private void checkAndFixConsistency()
1808   throws IOException, KeeperException, InterruptedException {
1809 	  // Divide the checks in two phases. One for default/primary replicas and another
1810 	  // for the non-primary ones. Keeps code cleaner this way.
1811     List<CheckRegionConsistencyWorkItem> workItems =
1812         new ArrayList<CheckRegionConsistencyWorkItem>(regionInfoMap.size());
1813     for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {
1814       if (e.getValue().getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
1815         workItems.add(new CheckRegionConsistencyWorkItem(e.getKey(), e.getValue()));
1816       }
1817     }
1818     checkRegionConsistencyConcurrently(workItems);
1819 
1820     boolean prevHdfsCheck = shouldCheckHdfs();
1821     setCheckHdfs(false); //replicas don't have any hdfs data
1822     // Run a pass over the replicas and fix any assignment issues that exist on the currently
1823     // deployed/undeployed replicas.
1824     List<CheckRegionConsistencyWorkItem> replicaWorkItems =
1825         new ArrayList<CheckRegionConsistencyWorkItem>(regionInfoMap.size());
1826     for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {
1827       if (e.getValue().getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) {
1828         replicaWorkItems.add(new CheckRegionConsistencyWorkItem(e.getKey(), e.getValue()));
1829       }
1830     }
1831     checkRegionConsistencyConcurrently(replicaWorkItems);
1832     setCheckHdfs(prevHdfsCheck);
1833 
1834     // If some regions is skipped during checkRegionConsistencyConcurrently() phase, we might
1835     // not get accurate state of the hbase if continuing. The config here allows users to tune
1836     // the tolerance of number of skipped region.
1837     // TODO: evaluate the consequence to continue the hbck operation without config.
1838     int terminateThreshold =  getConf().getInt("hbase.hbck.skipped.regions.limit", 0);
1839     int numOfSkippedRegions = skippedRegions.size();
1840     if (numOfSkippedRegions > 0 && numOfSkippedRegions > terminateThreshold) {
1841       throw new IOException(numOfSkippedRegions
1842         + " region(s) could not be checked or repaired.  See logs for detail.");
1843     }
1844   }
1845 
1846   /**
1847    * Check consistency of all regions using mulitple threads concurrently.
1848    */
1849   private void checkRegionConsistencyConcurrently(
1850     final List<CheckRegionConsistencyWorkItem> workItems)
1851     throws IOException, KeeperException, InterruptedException {
1852     if (workItems.isEmpty()) {
1853       return;  // nothing to check
1854     }
1855 
1856     List<Future<Void>> workFutures = executor.invokeAll(workItems);
1857     for(Future<Void> f: workFutures) {
1858       try {
1859         f.get();
1860       } catch(ExecutionException e1) {
1861         LOG.warn("Could not check region consistency " , e1.getCause());
1862         if (e1.getCause() instanceof IOException) {
1863           throw (IOException)e1.getCause();
1864         } else if (e1.getCause() instanceof KeeperException) {
1865           throw (KeeperException)e1.getCause();
1866         } else if (e1.getCause() instanceof InterruptedException) {
1867           throw (InterruptedException)e1.getCause();
1868         } else {
1869           throw new IOException(e1.getCause());
1870         }
1871       }
1872     }
1873   }
1874 
1875   class CheckRegionConsistencyWorkItem implements Callable<Void> {
1876     private final String key;
1877     private final HbckInfo hbi;
1878 
1879     CheckRegionConsistencyWorkItem(String key, HbckInfo hbi) {
1880       this.key = key;
1881       this.hbi = hbi;
1882     }
1883 
1884     @Override
1885     public synchronized Void call() throws Exception {
1886       try {
1887         checkRegionConsistency(key, hbi);
1888       } catch (Exception e) {
1889         // If the region is non-META region, skip this region and send warning/error message; if
1890         // the region is META region, we should not continue.
1891         LOG.warn("Unable to complete check or repair the region '" + hbi.getRegionNameAsString()
1892           + "'.", e);
1893         if (hbi.getHdfsHRI().isMetaRegion()) {
1894           throw e;
1895         }
1896         LOG.warn("Skip region '" + hbi.getRegionNameAsString() + "'");
1897         addSkippedRegion(hbi);
1898       }
1899       return null;
1900     }
1901   }
1902 
1903   private void addSkippedRegion(final HbckInfo hbi) {
1904     Set<String> skippedRegionNames = skippedRegions.get(hbi.getTableName());
1905     if (skippedRegionNames == null) {
1906       skippedRegionNames = new HashSet<String>();
1907     }
1908     skippedRegionNames.add(hbi.getRegionNameAsString());
1909     skippedRegions.put(hbi.getTableName(), skippedRegionNames);
1910   }
1911 
1912   private void preCheckPermission() throws IOException, AccessDeniedException {
1913     if (shouldIgnorePreCheckPermission()) {
1914       return;
1915     }
1916 
1917     Path hbaseDir = FSUtils.getRootDir(getConf());
1918     FileSystem fs = hbaseDir.getFileSystem(getConf());
1919     UserProvider userProvider = UserProvider.instantiate(getConf());
1920     UserGroupInformation ugi = userProvider.getCurrent().getUGI();
1921     FileStatus[] files = fs.listStatus(hbaseDir);
1922     for (FileStatus file : files) {
1923       try {
1924         FSUtils.checkAccess(ugi, file, FsAction.WRITE);
1925       } catch (AccessDeniedException ace) {
1926         LOG.warn("Got AccessDeniedException when preCheckPermission ", ace);
1927         errors.reportError(ERROR_CODE.WRONG_USAGE, "Current user " + ugi.getUserName()
1928           + " does not have write perms to " + file.getPath()
1929           + ". Please rerun hbck as hdfs user " + file.getOwner());
1930         throw ace;
1931       }
1932     }
1933   }
1934 
1935   /**
1936    * Deletes region from meta table
1937    */
1938   private void deleteMetaRegion(HbckInfo hi) throws IOException {
1939     deleteMetaRegion(hi.metaEntry.getRegionName());
1940   }
1941 
1942   /**
1943    * Deletes region from meta table
1944    */
1945   private void deleteMetaRegion(byte[] metaKey) throws IOException {
1946     Delete d = new Delete(metaKey);
1947     meta.delete(d);
1948     LOG.info("Deleted " + Bytes.toString(metaKey) + " from META" );
1949   }
1950 
1951   /**
1952    * Reset the split parent region info in meta table
1953    */
1954   private void resetSplitParent(HbckInfo hi) throws IOException {
1955     RowMutations mutations = new RowMutations(hi.metaEntry.getRegionName());
1956     Delete d = new Delete(hi.metaEntry.getRegionName());
1957     d.deleteColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER);
1958     d.deleteColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITB_QUALIFIER);
1959     mutations.add(d);
1960 
1961     HRegionInfo hri = new HRegionInfo(hi.metaEntry);
1962     hri.setOffline(false);
1963     hri.setSplit(false);
1964     Put p = MetaTableAccessor.makePutFromRegionInfo(hri);
1965     mutations.add(p);
1966 
1967     meta.mutateRow(mutations);
1968     LOG.info("Reset split parent " + hi.metaEntry.getRegionNameAsString() + " in META" );
1969   }
1970 
1971   /**
1972    * This backwards-compatibility wrapper for permanently offlining a region
1973    * that should not be alive.  If the region server does not support the
1974    * "offline" method, it will use the closest unassign method instead.  This
1975    * will basically work until one attempts to disable or delete the affected
1976    * table.  The problem has to do with in-memory only master state, so
1977    * restarting the HMaster or failing over to another should fix this.
1978    */
1979   private void offline(byte[] regionName) throws IOException {
1980     String regionString = Bytes.toStringBinary(regionName);
1981     if (!rsSupportsOffline) {
1982       LOG.warn("Using unassign region " + regionString
1983           + " instead of using offline method, you should"
1984           + " restart HMaster after these repairs");
1985       admin.unassign(regionName, true);
1986       return;
1987     }
1988 
1989     // first time we assume the rs's supports #offline.
1990     try {
1991       LOG.info("Offlining region " + regionString);
1992       admin.offline(regionName);
1993     } catch (IOException ioe) {
1994       String notFoundMsg = "java.lang.NoSuchMethodException: " +
1995         "org.apache.hadoop.hbase.master.HMaster.offline([B)";
1996       if (ioe.getMessage().contains(notFoundMsg)) {
1997         LOG.warn("Using unassign region " + regionString
1998             + " instead of using offline method, you should"
1999             + " restart HMaster after these repairs");
2000         rsSupportsOffline = false; // in the future just use unassign
2001         admin.unassign(regionName, true);
2002         return;
2003       }
2004       throw ioe;
2005     }
2006   }
2007 
2008   private void undeployRegions(HbckInfo hi) throws IOException, InterruptedException {
2009     undeployRegionsForHbi(hi);
2010     // undeploy replicas of the region (but only if the method is invoked for the primary)
2011     if (hi.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) {
2012       return;
2013     }
2014     int numReplicas = admin.getTableDescriptor(hi.getTableName()).getRegionReplication();
2015     for (int i = 1; i < numReplicas; i++) {
2016       if (hi.getPrimaryHRIForDeployedReplica() == null) continue;
2017       HRegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(
2018           hi.getPrimaryHRIForDeployedReplica(), i);
2019       HbckInfo h = regionInfoMap.get(hri.getEncodedName());
2020       if (h != null) {
2021         undeployRegionsForHbi(h);
2022         //set skip checks; we undeployed it, and we don't want to evaluate this anymore
2023         //in consistency checks
2024         h.setSkipChecks(true);
2025       }
2026     }
2027   }
2028 
2029   private void undeployRegionsForHbi(HbckInfo hi) throws IOException, InterruptedException {
2030     for (OnlineEntry rse : hi.deployedEntries) {
2031       LOG.debug("Undeploy region "  + rse.hri + " from " + rse.hsa);
2032       try {
2033         HBaseFsckRepair.closeRegionSilentlyAndWait(connection, rse.hsa, rse.hri);
2034         offline(rse.hri.getRegionName());
2035       } catch (IOException ioe) {
2036         LOG.warn("Got exception when attempting to offline region "
2037             + Bytes.toString(rse.hri.getRegionName()), ioe);
2038       }
2039     }
2040   }
2041 
2042   /**
2043    * Attempts to undeploy a region from a region server based in information in
2044    * META.  Any operations that modify the file system should make sure that
2045    * its corresponding region is not deployed to prevent data races.
2046    *
2047    * A separate call is required to update the master in-memory region state
2048    * kept in the AssignementManager.  Because disable uses this state instead of
2049    * that found in META, we can't seem to cleanly disable/delete tables that
2050    * have been hbck fixed.  When used on a version of HBase that does not have
2051    * the offline ipc call exposed on the master (<0.90.5, <0.92.0) a master
2052    * restart or failover may be required.
2053    */
2054   private void closeRegion(HbckInfo hi) throws IOException, InterruptedException {
2055     if (hi.metaEntry == null && hi.hdfsEntry == null) {
2056       undeployRegions(hi);
2057       return;
2058     }
2059 
2060     // get assignment info and hregioninfo from meta.
2061     Get get = new Get(hi.getRegionName());
2062     get.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
2063     get.addColumn(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
2064     get.addColumn(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER);
2065     // also get the locations of the replicas to close if the primary region is being closed
2066     if (hi.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
2067       int numReplicas = admin.getTableDescriptor(hi.getTableName()).getRegionReplication();
2068       for (int i = 0; i < numReplicas; i++) {
2069         get.addColumn(HConstants.CATALOG_FAMILY, MetaTableAccessor.getServerColumn(i));
2070         get.addColumn(HConstants.CATALOG_FAMILY, MetaTableAccessor.getStartCodeColumn(i));
2071       }
2072     }
2073     Result r = meta.get(get);
2074     RegionLocations rl = MetaTableAccessor.getRegionLocations(r);
2075     if (rl == null) {
2076       LOG.warn("Unable to close region " + hi.getRegionNameAsString() +
2077           " since meta does not have handle to reach it");
2078       return;
2079     }
2080     for (HRegionLocation h : rl.getRegionLocations()) {
2081       ServerName serverName = h.getServerName();
2082       if (serverName == null) {
2083         errors.reportError("Unable to close region "
2084             + hi.getRegionNameAsString() +  " because meta does not "
2085             + "have handle to reach it.");
2086         continue;
2087       }
2088       HRegionInfo hri = h.getRegionInfo();
2089       if (hri == null) {
2090         LOG.warn("Unable to close region " + hi.getRegionNameAsString()
2091             + " because hbase:meta had invalid or missing "
2092             + HConstants.CATALOG_FAMILY_STR + ":"
2093             + Bytes.toString(HConstants.REGIONINFO_QUALIFIER)
2094             + " qualifier value.");
2095         continue;
2096       }
2097       // close the region -- close files and remove assignment
2098       HBaseFsckRepair.closeRegionSilentlyAndWait(connection, serverName, hri);
2099     }
2100   }
2101 
2102   private void tryAssignmentRepair(HbckInfo hbi, String msg) throws IOException,
2103     KeeperException, InterruptedException {
2104     // If we are trying to fix the errors
2105     if (shouldFixAssignments()) {
2106       errors.print(msg);
2107       undeployRegions(hbi);
2108       setShouldRerun();
2109       HRegionInfo hri = hbi.getHdfsHRI();
2110       if (hri == null) {
2111         hri = hbi.metaEntry;
2112       }
2113       HBaseFsckRepair.fixUnassigned(admin, hri);
2114       HBaseFsckRepair.waitUntilAssigned(admin, hri);
2115 
2116       // also assign replicas if needed (do it only when this call operates on a primary replica)
2117       if (hbi.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) return;
2118       int replicationCount = admin.getTableDescriptor(hri.getTable()).getRegionReplication();
2119       for (int i = 1; i < replicationCount; i++) {
2120         hri = RegionReplicaUtil.getRegionInfoForReplica(hri, i);
2121         HbckInfo h = regionInfoMap.get(hri.getEncodedName());
2122         if (h != null) {
2123           undeployRegions(h);
2124           //set skip checks; we undeploy & deploy it; we don't want to evaluate this hbi anymore
2125           //in consistency checks
2126           h.setSkipChecks(true);
2127         }
2128         HBaseFsckRepair.fixUnassigned(admin, hri);
2129         HBaseFsckRepair.waitUntilAssigned(admin, hri);
2130       }
2131 
2132     }
2133   }
2134 
2135   /**
2136    * Check a single region for consistency and correct deployment.
2137    */
2138   private void checkRegionConsistency(final String key, final HbckInfo hbi)
2139   throws IOException, KeeperException, InterruptedException {
2140 
2141 	if (hbi.isSkipChecks()) return;
2142 	String descriptiveName = hbi.toString();
2143     boolean inMeta = hbi.metaEntry != null;
2144     // In case not checking HDFS, assume the region is on HDFS
2145     boolean inHdfs = !shouldCheckHdfs() || hbi.getHdfsRegionDir() != null;
2146     boolean hasMetaAssignment = inMeta && hbi.metaEntry.regionServer != null;
2147     boolean isDeployed = !hbi.deployedOn.isEmpty();
2148     boolean isMultiplyDeployed = hbi.deployedOn.size() > 1;
2149     boolean deploymentMatchesMeta =
2150       hasMetaAssignment && isDeployed && !isMultiplyDeployed &&
2151       hbi.metaEntry.regionServer.equals(hbi.deployedOn.get(0));
2152     boolean splitParent =
2153       (hbi.metaEntry == null)? false: hbi.metaEntry.isSplit() && hbi.metaEntry.isOffline();
2154     boolean shouldBeDeployed = inMeta && !isTableDisabled(hbi.metaEntry);
2155     boolean recentlyModified = inHdfs &&
2156       hbi.getModTime() + timelag > EnvironmentEdgeManager.currentTime();
2157 
2158     // ========== First the healthy cases =============
2159     if (hbi.containsOnlyHdfsEdits()) {
2160       return;
2161     }
2162     if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) {
2163       return;
2164     } else if (inMeta && inHdfs && !shouldBeDeployed && !isDeployed) {
2165       LOG.info("Region " + descriptiveName + " is in META, and in a disabled " +
2166         "tabled that is not deployed");
2167       return;
2168     } else if (recentlyModified) {
2169       LOG.warn("Region " + descriptiveName + " was recently modified -- skipping");
2170       return;
2171     }
2172     // ========== Cases where the region is not in hbase:meta =============
2173     else if (!inMeta && !inHdfs && !isDeployed) {
2174       // We shouldn't have record of this region at all then!
2175       assert false : "Entry for region with no data";
2176     } else if (!inMeta && !inHdfs && isDeployed) {
2177       errors.reportError(ERROR_CODE.NOT_IN_META_HDFS, "Region "
2178           + descriptiveName + ", key=" + key + ", not on HDFS or in hbase:meta but " +
2179           "deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2180       if (shouldFixAssignments()) {
2181         undeployRegions(hbi);
2182       }
2183 
2184     } else if (!inMeta && inHdfs && !isDeployed) {
2185       if (hbi.isMerged()) {
2186         // This region has already been merged, the remaining hdfs file will be
2187         // cleaned by CatalogJanitor later
2188         hbi.setSkipChecks(true);
2189         LOG.info("Region " + descriptiveName
2190             + " got merge recently, its file(s) will be cleaned by CatalogJanitor later");
2191         return;
2192       }
2193       errors.reportError(ERROR_CODE.NOT_IN_META_OR_DEPLOYED, "Region "
2194           + descriptiveName + " on HDFS, but not listed in hbase:meta " +
2195           "or deployed on any region server");
2196       // restore region consistency of an adopted orphan
2197       if (shouldFixMeta()) {
2198         if (!hbi.isHdfsRegioninfoPresent()) {
2199           LOG.error("Region " + hbi.getHdfsHRI() + " could have been repaired"
2200               +  " in table integrity repair phase if -fixHdfsOrphans was" +
2201               " used.");
2202           return;
2203         }
2204 
2205         HRegionInfo hri = hbi.getHdfsHRI();
2206         TableInfo tableInfo = tablesInfo.get(hri.getTable());
2207 
2208         for (HRegionInfo region : tableInfo.getRegionsFromMeta()) {
2209           if (Bytes.compareTo(region.getStartKey(), hri.getStartKey()) <= 0
2210               && (region.getEndKey().length == 0 || Bytes.compareTo(region.getEndKey(),
2211                 hri.getEndKey()) >= 0)
2212               && Bytes.compareTo(region.getStartKey(), hri.getEndKey()) <= 0) {
2213             if(region.isSplit() || region.isOffline()) continue;
2214             Path regionDir = hbi.getHdfsRegionDir();
2215             FileSystem fs = regionDir.getFileSystem(getConf());
2216             List<Path> familyDirs = FSUtils.getFamilyDirs(fs, regionDir);
2217             for (Path familyDir : familyDirs) {
2218               List<Path> referenceFilePaths = FSUtils.getReferenceFilePaths(fs, familyDir);
2219               for (Path referenceFilePath : referenceFilePaths) {
2220                 Path parentRegionDir =
2221                     StoreFileInfo.getReferredToFile(referenceFilePath).getParent().getParent();
2222                 if (parentRegionDir.toString().endsWith(region.getEncodedName())) {
2223                   LOG.warn(hri + " start and stop keys are in the range of " + region
2224                       + ". The region might not be cleaned up from hdfs when region " + region
2225                       + " split failed. Hence deleting from hdfs.");
2226                   HRegionFileSystem.deleteRegionFromFileSystem(getConf(), fs,
2227                     regionDir.getParent(), hri);
2228                   return;
2229                 }
2230               }
2231             }
2232           }
2233         }
2234 
2235         LOG.info("Patching hbase:meta with .regioninfo: " + hbi.getHdfsHRI());
2236         int numReplicas = admin.getTableDescriptor(hbi.getTableName()).getRegionReplication();
2237         HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(),
2238             admin.getClusterStatus().getServers(), numReplicas);
2239 
2240         tryAssignmentRepair(hbi, "Trying to reassign region...");
2241       }
2242 
2243     } else if (!inMeta && inHdfs && isDeployed) {
2244       errors.reportError(ERROR_CODE.NOT_IN_META, "Region " + descriptiveName
2245           + " not in META, but deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2246       debugLsr(hbi.getHdfsRegionDir());
2247       if (hbi.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) {
2248         // for replicas, this means that we should undeploy the region (we would have
2249         // gone over the primaries and fixed meta holes in first phase under
2250         // checkAndFixConsistency; we shouldn't get the condition !inMeta at
2251         // this stage unless unwanted replica)
2252         if (shouldFixAssignments()) {
2253           undeployRegionsForHbi(hbi);
2254         }
2255       }
2256       if (shouldFixMeta() && hbi.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
2257         if (!hbi.isHdfsRegioninfoPresent()) {
2258           LOG.error("This should have been repaired in table integrity repair phase");
2259           return;
2260         }
2261 
2262         LOG.info("Patching hbase:meta with with .regioninfo: " + hbi.getHdfsHRI());
2263         int numReplicas = admin.getTableDescriptor(hbi.getTableName()).getRegionReplication();
2264         HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(),
2265             admin.getClusterStatus().getServers(), numReplicas);
2266         tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
2267       }
2268 
2269     // ========== Cases where the region is in hbase:meta =============
2270     } else if (inMeta && inHdfs && !isDeployed && splitParent) {
2271       // check whether this is an actual error, or just transient state where parent
2272       // is not cleaned
2273       if (hbi.metaEntry.splitA != null && hbi.metaEntry.splitB != null) {
2274         // check that split daughters are there
2275         HbckInfo infoA = this.regionInfoMap.get(hbi.metaEntry.splitA.getEncodedName());
2276         HbckInfo infoB = this.regionInfoMap.get(hbi.metaEntry.splitB.getEncodedName());
2277         if (infoA != null && infoB != null) {
2278           // we already processed or will process daughters. Move on, nothing to see here.
2279           hbi.setSkipChecks(true);
2280           return;
2281         }
2282       }
2283       errors.reportError(ERROR_CODE.LINGERING_SPLIT_PARENT, "Region "
2284           + descriptiveName + " is a split parent in META, in HDFS, "
2285           + "and not deployed on any region server. This could be transient.");
2286       if (shouldFixSplitParents()) {
2287         setShouldRerun();
2288         resetSplitParent(hbi);
2289       }
2290     } else if (inMeta && !inHdfs && !isDeployed) {
2291       errors.reportError(ERROR_CODE.NOT_IN_HDFS_OR_DEPLOYED, "Region "
2292           + descriptiveName + " found in META, but not in HDFS "
2293           + "or deployed on any region server.");
2294       if (shouldFixMeta()) {
2295         deleteMetaRegion(hbi);
2296       }
2297     } else if (inMeta && !inHdfs && isDeployed) {
2298       errors.reportError(ERROR_CODE.NOT_IN_HDFS, "Region " + descriptiveName
2299           + " found in META, but not in HDFS, " +
2300           "and deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2301       // We treat HDFS as ground truth.  Any information in meta is transient
2302       // and equivalent data can be regenerated.  So, lets unassign and remove
2303       // these problems from META.
2304       if (shouldFixAssignments()) {
2305         errors.print("Trying to fix unassigned region...");
2306         undeployRegions(hbi);
2307       }
2308       if (shouldFixMeta()) {
2309         // wait for it to complete
2310         deleteMetaRegion(hbi);
2311       }
2312     } else if (inMeta && inHdfs && !isDeployed && shouldBeDeployed) {
2313       errors.reportError(ERROR_CODE.NOT_DEPLOYED, "Region " + descriptiveName
2314           + " not deployed on any region server.");
2315       tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
2316     } else if (inMeta && inHdfs && isDeployed && !shouldBeDeployed) {
2317       errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
2318           "Region " + descriptiveName + " should not be deployed according " +
2319           "to META, but is deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2320       if (shouldFixAssignments()) {
2321         errors.print("Trying to close the region " + descriptiveName);
2322         setShouldRerun();
2323         HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn);
2324       }
2325     } else if (inMeta && inHdfs && isMultiplyDeployed) {
2326       errors.reportError(ERROR_CODE.MULTI_DEPLOYED, "Region " + descriptiveName
2327           + " is listed in hbase:meta on region server " + hbi.metaEntry.regionServer
2328           + " but is multiply assigned to region servers " +
2329           Joiner.on(", ").join(hbi.deployedOn));
2330       // If we are trying to fix the errors
2331       if (shouldFixAssignments()) {
2332         errors.print("Trying to fix assignment error...");
2333         setShouldRerun();
2334         HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn);
2335       }
2336     } else if (inMeta && inHdfs && isDeployed && !deploymentMatchesMeta) {
2337       errors.reportError(ERROR_CODE.SERVER_DOES_NOT_MATCH_META, "Region "
2338           + descriptiveName + " listed in hbase:meta on region server " +
2339           hbi.metaEntry.regionServer + " but found on region server " +
2340           hbi.deployedOn.get(0));
2341       // If we are trying to fix the errors
2342       if (shouldFixAssignments()) {
2343         errors.print("Trying to fix assignment error...");
2344         setShouldRerun();
2345         HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn);
2346         HBaseFsckRepair.waitUntilAssigned(admin, hbi.getHdfsHRI());
2347       }
2348     } else {
2349       errors.reportError(ERROR_CODE.UNKNOWN, "Region " + descriptiveName +
2350           " is in an unforeseen state:" +
2351           " inMeta=" + inMeta +
2352           " inHdfs=" + inHdfs +
2353           " isDeployed=" + isDeployed +
2354           " isMultiplyDeployed=" + isMultiplyDeployed +
2355           " deploymentMatchesMeta=" + deploymentMatchesMeta +
2356           " shouldBeDeployed=" + shouldBeDeployed);
2357     }
2358   }
2359 
2360   /**
2361    * Checks tables integrity. Goes over all regions and scans the tables.
2362    * Collects all the pieces for each table and checks if there are missing,
2363    * repeated or overlapping ones.
2364    * @throws IOException
2365    */
2366   SortedMap<TableName, TableInfo> checkIntegrity() throws IOException {
2367     tablesInfo = new TreeMap<TableName,TableInfo> ();
2368     LOG.debug("There are " + regionInfoMap.size() + " region info entries");
2369     for (HbckInfo hbi : regionInfoMap.values()) {
2370       // Check only valid, working regions
2371       if (hbi.metaEntry == null) {
2372         // this assumes that consistency check has run loadMetaEntry
2373         Path p = hbi.getHdfsRegionDir();
2374         if (p == null) {
2375           errors.report("No regioninfo in Meta or HDFS. " + hbi);
2376         }
2377 
2378         // TODO test.
2379         continue;
2380       }
2381       if (hbi.metaEntry.regionServer == null) {
2382         errors.detail("Skipping region because no region server: " + hbi);
2383         continue;
2384       }
2385       if (hbi.metaEntry.isOffline()) {
2386         errors.detail("Skipping region because it is offline: " + hbi);
2387         continue;
2388       }
2389       if (hbi.containsOnlyHdfsEdits()) {
2390         errors.detail("Skipping region because it only contains edits" + hbi);
2391         continue;
2392       }
2393 
2394       // Missing regionDir or over-deployment is checked elsewhere. Include
2395       // these cases in modTInfo, so we can evaluate those regions as part of
2396       // the region chain in META
2397       //if (hbi.foundRegionDir == null) continue;
2398       //if (hbi.deployedOn.size() != 1) continue;
2399       if (hbi.deployedOn.size() == 0) continue;
2400 
2401       // We should be safe here
2402       TableName tableName = hbi.metaEntry.getTable();
2403       TableInfo modTInfo = tablesInfo.get(tableName);
2404       if (modTInfo == null) {
2405         modTInfo = new TableInfo(tableName);
2406       }
2407       for (ServerName server : hbi.deployedOn) {
2408         modTInfo.addServer(server);
2409       }
2410 
2411       if (!hbi.isSkipChecks()) {
2412         modTInfo.addRegionInfo(hbi);
2413       }
2414 
2415       tablesInfo.put(tableName, modTInfo);
2416     }
2417 
2418     loadTableInfosForTablesWithNoRegion();
2419 
2420     logParallelMerge();
2421     for (TableInfo tInfo : tablesInfo.values()) {
2422       TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
2423       if (!tInfo.checkRegionChain(handler)) {
2424         errors.report("Found inconsistency in table " + tInfo.getName());
2425       }
2426     }
2427     return tablesInfo;
2428   }
2429 
2430   /** Loads table info's for tables that may not have been included, since there are no
2431    * regions reported for the table, but table dir is there in hdfs
2432    */
2433   private void loadTableInfosForTablesWithNoRegion() throws IOException {
2434     Map<String, HTableDescriptor> allTables = new FSTableDescriptors(getConf()).getAll();
2435     for (HTableDescriptor htd : allTables.values()) {
2436       if (checkMetaOnly && !htd.isMetaTable()) {
2437         continue;
2438       }
2439 
2440       TableName tableName = htd.getTableName();
2441       if (isTableIncluded(tableName) && !tablesInfo.containsKey(tableName)) {
2442         TableInfo tableInfo = new TableInfo(tableName);
2443         tableInfo.htds.add(htd);
2444         tablesInfo.put(htd.getTableName(), tableInfo);
2445       }
2446     }
2447   }
2448 
2449   /**
2450    * Merge hdfs data by moving from contained HbckInfo into targetRegionDir.
2451    * @return number of file move fixes done to merge regions.
2452    */
2453   public int mergeRegionDirs(Path targetRegionDir, HbckInfo contained) throws IOException {
2454     int fileMoves = 0;
2455     String thread = Thread.currentThread().getName();
2456     LOG.debug("[" + thread + "] Contained region dir after close and pause");
2457     debugLsr(contained.getHdfsRegionDir());
2458 
2459     // rename the contained into the container.
2460     FileSystem fs = targetRegionDir.getFileSystem(getConf());
2461     FileStatus[] dirs = null;
2462     try {
2463       dirs = fs.listStatus(contained.getHdfsRegionDir());
2464     } catch (FileNotFoundException fnfe) {
2465       // region we are attempting to merge in is not present!  Since this is a merge, there is
2466       // no harm skipping this region if it does not exist.
2467       if (!fs.exists(contained.getHdfsRegionDir())) {
2468         LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
2469             + " is missing. Assuming already sidelined or moved.");
2470       } else {
2471         sidelineRegionDir(fs, contained);
2472       }
2473       return fileMoves;
2474     }
2475 
2476     if (dirs == null) {
2477       if (!fs.exists(contained.getHdfsRegionDir())) {
2478         LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
2479             + " already sidelined.");
2480       } else {
2481         sidelineRegionDir(fs, contained);
2482       }
2483       return fileMoves;
2484     }
2485 
2486     for (FileStatus cf : dirs) {
2487       Path src = cf.getPath();
2488       Path dst =  new Path(targetRegionDir, src.getName());
2489 
2490       if (src.getName().equals(HRegionFileSystem.REGION_INFO_FILE)) {
2491         // do not copy the old .regioninfo file.
2492         continue;
2493       }
2494 
2495       if (src.getName().equals(HConstants.HREGION_OLDLOGDIR_NAME)) {
2496         // do not copy the .oldlogs files
2497         continue;
2498       }
2499 
2500       LOG.info("[" + thread + "] Moving files from " + src + " into containing region " + dst);
2501       // FileSystem.rename is inconsistent with directories -- if the
2502       // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir,
2503       // it moves the src into the dst dir resulting in (foo/a/b).  If
2504       // the dst does not exist, and the src a dir, src becomes dst. (foo/b)
2505       for (FileStatus hfile : fs.listStatus(src)) {
2506         boolean success = fs.rename(hfile.getPath(), dst);
2507         if (success) {
2508           fileMoves++;
2509         }
2510       }
2511       LOG.debug("[" + thread + "] Sideline directory contents:");
2512       debugLsr(targetRegionDir);
2513     }
2514 
2515     // if all success.
2516     sidelineRegionDir(fs, contained);
2517     LOG.info("[" + thread + "] Sidelined region dir "+ contained.getHdfsRegionDir() + " into " +
2518         getSidelineDir());
2519     debugLsr(contained.getHdfsRegionDir());
2520 
2521     return fileMoves;
2522   }
2523 
2524 
2525   static class WorkItemOverlapMerge implements Callable<Void> {
2526     private TableIntegrityErrorHandler handler;
2527     Collection<HbckInfo> overlapgroup;
2528 
2529     WorkItemOverlapMerge(Collection<HbckInfo> overlapgroup, TableIntegrityErrorHandler handler) {
2530       this.handler = handler;
2531       this.overlapgroup = overlapgroup;
2532     }
2533 
2534     @Override
2535     public Void call() throws Exception {
2536       handler.handleOverlapGroup(overlapgroup);
2537       return null;
2538     }
2539   };
2540 
2541 
2542   /**
2543    * Maintain information about a particular table.
2544    */
2545   public class TableInfo {
2546     TableName tableName;
2547     TreeSet <ServerName> deployedOn;
2548 
2549     // backwards regions
2550     final List<HbckInfo> backwards = new ArrayList<HbckInfo>();
2551 
2552     // sidelined big overlapped regions
2553     final Map<Path, HbckInfo> sidelinedRegions = new HashMap<Path, HbckInfo>();
2554 
2555     // region split calculator
2556     final RegionSplitCalculator<HbckInfo> sc = new RegionSplitCalculator<HbckInfo>(cmp);
2557 
2558     // Histogram of different HTableDescriptors found.  Ideally there is only one!
2559     final Set<HTableDescriptor> htds = new HashSet<HTableDescriptor>();
2560 
2561     // key = start split, values = set of splits in problem group
2562     final Multimap<byte[], HbckInfo> overlapGroups =
2563       TreeMultimap.create(RegionSplitCalculator.BYTES_COMPARATOR, cmp);
2564 
2565     // list of regions derived from meta entries.
2566     private ImmutableList<HRegionInfo> regionsFromMeta = null;
2567 
2568     TableInfo(TableName name) {
2569       this.tableName = name;
2570       deployedOn = new TreeSet <ServerName>();
2571     }
2572 
2573     /**
2574      * @return descriptor common to all regions.  null if are none or multiple!
2575      */
2576     private HTableDescriptor getHTD() {
2577       if (htds.size() == 1) {
2578         return (HTableDescriptor)htds.toArray()[0];
2579       } else {
2580         LOG.error("None/Multiple table descriptors found for table '"
2581           + tableName + "' regions: " + htds);
2582       }
2583       return null;
2584     }
2585 
2586     public void addRegionInfo(HbckInfo hir) {
2587       if (Bytes.equals(hir.getEndKey(), HConstants.EMPTY_END_ROW)) {
2588         // end key is absolute end key, just add it.
2589         // ignore replicas other than primary for these checks
2590         if (hir.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) sc.add(hir);
2591         return;
2592       }
2593 
2594       // if not the absolute end key, check for cycle
2595       if (Bytes.compareTo(hir.getStartKey(), hir.getEndKey()) > 0) {
2596         errors.reportError(
2597             ERROR_CODE.REGION_CYCLE,
2598             String.format("The endkey for this region comes before the "
2599                 + "startkey, startkey=%s, endkey=%s",
2600                 Bytes.toStringBinary(hir.getStartKey()),
2601                 Bytes.toStringBinary(hir.getEndKey())), this, hir);
2602         backwards.add(hir);
2603         return;
2604       }
2605 
2606       // main case, add to split calculator
2607       // ignore replicas other than primary for these checks
2608       if (hir.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) sc.add(hir);
2609     }
2610 
2611     public void addServer(ServerName server) {
2612       this.deployedOn.add(server);
2613     }
2614 
2615     public TableName getName() {
2616       return tableName;
2617     }
2618 
2619     public int getNumRegions() {
2620       return sc.getStarts().size() + backwards.size();
2621     }
2622 
2623     public synchronized ImmutableList<HRegionInfo> getRegionsFromMeta() {
2624       // lazy loaded, synchronized to ensure a single load
2625       if (regionsFromMeta == null) {
2626         List<HRegionInfo> regions = new ArrayList<HRegionInfo>();
2627         for (HbckInfo h : HBaseFsck.this.regionInfoMap.values()) {
2628           if (tableName.equals(h.getTableName())) {
2629             if (h.metaEntry != null) {
2630               regions.add((HRegionInfo) h.metaEntry);
2631             }
2632           }
2633         }
2634         regionsFromMeta = Ordering.natural().immutableSortedCopy(regions);
2635       }
2636       
2637       return regionsFromMeta;
2638     }
2639     
2640 
2641       private class IntegrityFixSuggester extends TableIntegrityErrorHandlerImpl {
2642       ErrorReporter errors;
2643 
2644       IntegrityFixSuggester(TableInfo ti, ErrorReporter errors) {
2645         this.errors = errors;
2646         setTableInfo(ti);
2647       }
2648 
2649       @Override
2650       public void handleRegionStartKeyNotEmpty(HbckInfo hi) throws IOException{
2651         errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
2652             "First region should start with an empty key.  You need to "
2653             + " create a new region and regioninfo in HDFS to plug the hole.",
2654             getTableInfo(), hi);
2655       }
2656 
2657       @Override
2658       public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException {
2659         errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY,
2660             "Last region should end with an empty key. You need to "
2661                 + "create a new region and regioninfo in HDFS to plug the hole.", getTableInfo());
2662       }
2663 
2664       @Override
2665       public void handleDegenerateRegion(HbckInfo hi) throws IOException{
2666         errors.reportError(ERROR_CODE.DEGENERATE_REGION,
2667             "Region has the same start and end key.", getTableInfo(), hi);
2668       }
2669 
2670       @Override
2671       public void handleDuplicateStartKeys(HbckInfo r1, HbckInfo r2) throws IOException{
2672         byte[] key = r1.getStartKey();
2673         // dup start key
2674         errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
2675             "Multiple regions have the same startkey: "
2676             + Bytes.toStringBinary(key), getTableInfo(), r1);
2677         errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
2678             "Multiple regions have the same startkey: "
2679             + Bytes.toStringBinary(key), getTableInfo(), r2);
2680       }
2681 
2682       @Override
2683       public void handleOverlapInRegionChain(HbckInfo hi1, HbckInfo hi2) throws IOException{
2684         errors.reportError(ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
2685             "There is an overlap in the region chain.",
2686             getTableInfo(), hi1, hi2);
2687       }
2688 
2689       @Override
2690       public void handleHoleInRegionChain(byte[] holeStart, byte[] holeStop) throws IOException{
2691         errors.reportError(
2692             ERROR_CODE.HOLE_IN_REGION_CHAIN,
2693             "There is a hole in the region chain between "
2694                 + Bytes.toStringBinary(holeStart) + " and "
2695                 + Bytes.toStringBinary(holeStop)
2696                 + ".  You need to create a new .regioninfo and region "
2697                 + "dir in hdfs to plug the hole.");
2698       }
2699     };
2700 
2701     /**
2702      * This handler fixes integrity errors from hdfs information.  There are
2703      * basically three classes of integrity problems 1) holes, 2) overlaps, and
2704      * 3) invalid regions.
2705      *
2706      * This class overrides methods that fix holes and the overlap group case.
2707      * Individual cases of particular overlaps are handled by the general
2708      * overlap group merge repair case.
2709      *
2710      * If hbase is online, this forces regions offline before doing merge
2711      * operations.
2712      */
2713     private class HDFSIntegrityFixer extends IntegrityFixSuggester {
2714       Configuration conf;
2715 
2716       boolean fixOverlaps = true;
2717 
2718       HDFSIntegrityFixer(TableInfo ti, ErrorReporter errors, Configuration conf,
2719           boolean fixHoles, boolean fixOverlaps) {
2720         super(ti, errors);
2721         this.conf = conf;
2722         this.fixOverlaps = fixOverlaps;
2723         // TODO properly use fixHoles
2724       }
2725 
2726       /**
2727        * This is a special case hole -- when the first region of a table is
2728        * missing from META, HBase doesn't acknowledge the existance of the
2729        * table.
2730        */
2731       @Override
2732       public void handleRegionStartKeyNotEmpty(HbckInfo next) throws IOException {
2733         errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
2734             "First region should start with an empty key.  Creating a new " +
2735             "region and regioninfo in HDFS to plug the hole.",
2736             getTableInfo(), next);
2737         HTableDescriptor htd = getTableInfo().getHTD();
2738         // from special EMPTY_START_ROW to next region's startKey
2739         HRegionInfo newRegion = new HRegionInfo(htd.getTableName(),
2740             HConstants.EMPTY_START_ROW, next.getStartKey());
2741 
2742         // TODO test
2743         HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2744         LOG.info("Table region start key was not empty.  Created new empty region: "
2745             + newRegion + " " +region);
2746         fixes++;
2747       }
2748 
2749       @Override
2750       public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException {
2751         errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY,
2752             "Last region should end with an empty key.  Creating a new "
2753                 + "region and regioninfo in HDFS to plug the hole.", getTableInfo());
2754         HTableDescriptor htd = getTableInfo().getHTD();
2755         // from curEndKey to EMPTY_START_ROW
2756         HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), curEndKey,
2757             HConstants.EMPTY_START_ROW);
2758 
2759         HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2760         LOG.info("Table region end key was not empty.  Created new empty region: " + newRegion
2761             + " " + region);
2762         fixes++;
2763       }
2764 
2765       /**
2766        * There is a hole in the hdfs regions that violates the table integrity
2767        * rules.  Create a new empty region that patches the hole.
2768        */
2769       @Override
2770       public void handleHoleInRegionChain(byte[] holeStartKey, byte[] holeStopKey) throws IOException {
2771         errors.reportError(
2772             ERROR_CODE.HOLE_IN_REGION_CHAIN,
2773             "There is a hole in the region chain between "
2774                 + Bytes.toStringBinary(holeStartKey) + " and "
2775                 + Bytes.toStringBinary(holeStopKey)
2776                 + ".  Creating a new regioninfo and region "
2777                 + "dir in hdfs to plug the hole.");
2778         HTableDescriptor htd = getTableInfo().getHTD();
2779         HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), holeStartKey, holeStopKey);
2780         HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2781         LOG.info("Plugged hole by creating new empty region: "+ newRegion + " " +region);
2782         fixes++;
2783       }
2784 
2785       /**
2786        * This takes set of overlapping regions and merges them into a single
2787        * region.  This covers cases like degenerate regions, shared start key,
2788        * general overlaps, duplicate ranges, and partial overlapping regions.
2789        *
2790        * Cases:
2791        * - Clean regions that overlap
2792        * - Only .oldlogs regions (can't find start/stop range, or figure out)
2793        *
2794        * This is basically threadsafe, except for the fixer increment in mergeOverlaps.
2795        */
2796       @Override
2797       public void handleOverlapGroup(Collection<HbckInfo> overlap)
2798           throws IOException {
2799         Preconditions.checkNotNull(overlap);
2800         Preconditions.checkArgument(overlap.size() >0);
2801 
2802         if (!this.fixOverlaps) {
2803           LOG.warn("Not attempting to repair overlaps.");
2804           return;
2805         }
2806 
2807         if (overlap.size() > maxMerge) {
2808           LOG.warn("Overlap group has " + overlap.size() + " overlapping " +
2809             "regions which is greater than " + maxMerge + ", the max number of regions to merge");
2810           if (sidelineBigOverlaps) {
2811             // we only sideline big overlapped groups that exceeds the max number of regions to merge
2812             sidelineBigOverlaps(overlap);
2813           }
2814           return;
2815         }
2816 
2817         mergeOverlaps(overlap);
2818       }
2819 
2820       void mergeOverlaps(Collection<HbckInfo> overlap)
2821           throws IOException {
2822         String thread = Thread.currentThread().getName();
2823         LOG.info("== [" + thread + "] Merging regions into one region: "
2824           + Joiner.on(",").join(overlap));
2825         // get the min / max range and close all concerned regions
2826         Pair<byte[], byte[]> range = null;
2827         for (HbckInfo hi : overlap) {
2828           if (range == null) {
2829             range = new Pair<byte[], byte[]>(hi.getStartKey(), hi.getEndKey());
2830           } else {
2831             if (RegionSplitCalculator.BYTES_COMPARATOR
2832                 .compare(hi.getStartKey(), range.getFirst()) < 0) {
2833               range.setFirst(hi.getStartKey());
2834             }
2835             if (RegionSplitCalculator.BYTES_COMPARATOR
2836                 .compare(hi.getEndKey(), range.getSecond()) > 0) {
2837               range.setSecond(hi.getEndKey());
2838             }
2839           }
2840           // need to close files so delete can happen.
2841           LOG.debug("[" + thread + "] Closing region before moving data around: " +  hi);
2842           LOG.debug("[" + thread + "] Contained region dir before close");
2843           debugLsr(hi.getHdfsRegionDir());
2844           try {
2845             LOG.info("[" + thread + "] Closing region: " + hi);
2846             closeRegion(hi);
2847           } catch (IOException ioe) {
2848             LOG.warn("[" + thread + "] Was unable to close region " + hi
2849               + ".  Just continuing... ", ioe);
2850           } catch (InterruptedException e) {
2851             LOG.warn("[" + thread + "] Was unable to close region " + hi
2852               + ".  Just continuing... ", e);
2853           }
2854 
2855           try {
2856             LOG.info("[" + thread + "] Offlining region: " + hi);
2857             offline(hi.getRegionName());
2858           } catch (IOException ioe) {
2859             LOG.warn("[" + thread + "] Unable to offline region from master: " + hi
2860               + ".  Just continuing... ", ioe);
2861           }
2862         }
2863 
2864         // create new empty container region.
2865         HTableDescriptor htd = getTableInfo().getHTD();
2866         // from start key to end Key
2867         HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), range.getFirst(),
2868             range.getSecond());
2869         HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2870         LOG.info("[" + thread + "] Created new empty container region: " +
2871             newRegion + " to contain regions: " + Joiner.on(",").join(overlap));
2872         debugLsr(region.getRegionFileSystem().getRegionDir());
2873 
2874         // all target regions are closed, should be able to safely cleanup.
2875         boolean didFix= false;
2876         Path target = region.getRegionFileSystem().getRegionDir();
2877         for (HbckInfo contained : overlap) {
2878           LOG.info("[" + thread + "] Merging " + contained  + " into " + target );
2879           int merges = mergeRegionDirs(target, contained);
2880           if (merges > 0) {
2881             didFix = true;
2882           }
2883         }
2884         if (didFix) {
2885           fixes++;
2886         }
2887       }
2888 
2889       /**
2890        * Sideline some regions in a big overlap group so that it
2891        * will have fewer regions, and it is easier to merge them later on.
2892        *
2893        * @param bigOverlap the overlapped group with regions more than maxMerge
2894        * @throws IOException
2895        */
2896       void sidelineBigOverlaps(
2897           Collection<HbckInfo> bigOverlap) throws IOException {
2898         int overlapsToSideline = bigOverlap.size() - maxMerge;
2899         if (overlapsToSideline > maxOverlapsToSideline) {
2900           overlapsToSideline = maxOverlapsToSideline;
2901         }
2902         List<HbckInfo> regionsToSideline =
2903           RegionSplitCalculator.findBigRanges(bigOverlap, overlapsToSideline);
2904         FileSystem fs = FileSystem.get(conf);
2905         for (HbckInfo regionToSideline: regionsToSideline) {
2906           try {
2907             LOG.info("Closing region: " + regionToSideline);
2908             closeRegion(regionToSideline);
2909           } catch (IOException ioe) {
2910             LOG.warn("Was unable to close region " + regionToSideline
2911               + ".  Just continuing... ", ioe);
2912           } catch (InterruptedException e) {
2913             LOG.warn("Was unable to close region " + regionToSideline
2914               + ".  Just continuing... ", e);
2915           }
2916 
2917           try {
2918             LOG.info("Offlining region: " + regionToSideline);
2919             offline(regionToSideline.getRegionName());
2920           } catch (IOException ioe) {
2921             LOG.warn("Unable to offline region from master: " + regionToSideline
2922               + ".  Just continuing... ", ioe);
2923           }
2924 
2925           LOG.info("Before sideline big overlapped region: " + regionToSideline.toString());
2926           Path sidelineRegionDir = sidelineRegionDir(fs, TO_BE_LOADED, regionToSideline);
2927           if (sidelineRegionDir != null) {
2928             sidelinedRegions.put(sidelineRegionDir, regionToSideline);
2929             LOG.info("After sidelined big overlapped region: "
2930               + regionToSideline.getRegionNameAsString()
2931               + " to " + sidelineRegionDir.toString());
2932             fixes++;
2933           }
2934         }
2935       }
2936     }
2937 
2938     /**
2939      * Check the region chain (from META) of this table.  We are looking for
2940      * holes, overlaps, and cycles.
2941      * @return false if there are errors
2942      * @throws IOException
2943      */
2944     public boolean checkRegionChain(TableIntegrityErrorHandler handler) throws IOException {
2945       // When table is disabled no need to check for the region chain. Some of the regions
2946       // accidently if deployed, this below code might report some issues like missing start
2947       // or end regions or region hole in chain and may try to fix which is unwanted.
2948       if (disabledTables.contains(this.tableName)) {
2949         return true;
2950       }
2951       int originalErrorsCount = errors.getErrorList().size();
2952       Multimap<byte[], HbckInfo> regions = sc.calcCoverage();
2953       SortedSet<byte[]> splits = sc.getSplits();
2954 
2955       byte[] prevKey = null;
2956       byte[] problemKey = null;
2957 
2958       if (splits.size() == 0) {
2959         // no region for this table
2960         handler.handleHoleInRegionChain(HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW);
2961       }
2962 
2963       for (byte[] key : splits) {
2964         Collection<HbckInfo> ranges = regions.get(key);
2965         if (prevKey == null && !Bytes.equals(key, HConstants.EMPTY_BYTE_ARRAY)) {
2966           for (HbckInfo rng : ranges) {
2967             handler.handleRegionStartKeyNotEmpty(rng);
2968           }
2969         }
2970 
2971         // check for degenerate ranges
2972         for (HbckInfo rng : ranges) {
2973           // special endkey case converts '' to null
2974           byte[] endKey = rng.getEndKey();
2975           endKey = (endKey.length == 0) ? null : endKey;
2976           if (Bytes.equals(rng.getStartKey(),endKey)) {
2977             handler.handleDegenerateRegion(rng);
2978           }
2979         }
2980 
2981         if (ranges.size() == 1) {
2982           // this split key is ok -- no overlap, not a hole.
2983           if (problemKey != null) {
2984             LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
2985           }
2986           problemKey = null; // fell through, no more problem.
2987         } else if (ranges.size() > 1) {
2988           // set the new problem key group name, if already have problem key, just
2989           // keep using it.
2990           if (problemKey == null) {
2991             // only for overlap regions.
2992             LOG.warn("Naming new problem group: " + Bytes.toStringBinary(key));
2993             problemKey = key;
2994           }
2995           overlapGroups.putAll(problemKey, ranges);
2996 
2997           // record errors
2998           ArrayList<HbckInfo> subRange = new ArrayList<HbckInfo>(ranges);
2999           //  this dumb and n^2 but this shouldn't happen often
3000           for (HbckInfo r1 : ranges) {
3001             if (r1.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) continue;
3002             subRange.remove(r1);
3003             for (HbckInfo r2 : subRange) {
3004               if (r2.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) continue;
3005               if (Bytes.compareTo(r1.getStartKey(), r2.getStartKey())==0) {
3006                 handler.handleDuplicateStartKeys(r1,r2);
3007               } else {
3008                 // overlap
3009                 handler.handleOverlapInRegionChain(r1, r2);
3010               }
3011             }
3012           }
3013 
3014         } else if (ranges.size() == 0) {
3015           if (problemKey != null) {
3016             LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
3017           }
3018           problemKey = null;
3019 
3020           byte[] holeStopKey = sc.getSplits().higher(key);
3021           // if higher key is null we reached the top.
3022           if (holeStopKey != null) {
3023             // hole
3024             handler.handleHoleInRegionChain(key, holeStopKey);
3025           }
3026         }
3027         prevKey = key;
3028       }
3029 
3030       // When the last region of a table is proper and having an empty end key, 'prevKey'
3031       // will be null.
3032       if (prevKey != null) {
3033         handler.handleRegionEndKeyNotEmpty(prevKey);
3034       }
3035 
3036       // TODO fold this into the TableIntegrityHandler
3037       if (getConf().getBoolean("hbasefsck.overlap.merge.parallel", true)) {
3038         boolean ok = handleOverlapsParallel(handler, prevKey);
3039         if (!ok) {
3040           return false;
3041         }
3042       } else {
3043         for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) {
3044           handler.handleOverlapGroup(overlap);
3045         }
3046       }
3047 
3048       if (details) {
3049         // do full region split map dump
3050         errors.print("---- Table '"  +  this.tableName
3051             + "': region split map");
3052         dump(splits, regions);
3053         errors.print("---- Table '"  +  this.tableName
3054             + "': overlap groups");
3055         dumpOverlapProblems(overlapGroups);
3056         errors.print("There are " + overlapGroups.keySet().size()
3057             + " overlap groups with " + overlapGroups.size()
3058             + " overlapping regions");
3059       }
3060       if (!sidelinedRegions.isEmpty()) {
3061         LOG.warn("Sidelined big overlapped regions, please bulk load them!");
3062         errors.print("---- Table '"  +  this.tableName
3063             + "': sidelined big overlapped regions");
3064         dumpSidelinedRegions(sidelinedRegions);
3065       }
3066       return errors.getErrorList().size() == originalErrorsCount;
3067     }
3068 
3069     private boolean handleOverlapsParallel(TableIntegrityErrorHandler handler, byte[] prevKey)
3070         throws IOException {
3071       // we parallelize overlap handler for the case we have lots of groups to fix.  We can
3072       // safely assume each group is independent.
3073       List<WorkItemOverlapMerge> merges = new ArrayList<WorkItemOverlapMerge>(overlapGroups.size());
3074       List<Future<Void>> rets;
3075       for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) {
3076         //
3077         merges.add(new WorkItemOverlapMerge(overlap, handler));
3078       }
3079       try {
3080         rets = executor.invokeAll(merges);
3081       } catch (InterruptedException e) {
3082         LOG.error("Overlap merges were interrupted", e);
3083         return false;
3084       }
3085       for(int i=0; i<merges.size(); i++) {
3086         WorkItemOverlapMerge work = merges.get(i);
3087         Future<Void> f = rets.get(i);
3088         try {
3089           f.get();
3090         } catch(ExecutionException e) {
3091           LOG.warn("Failed to merge overlap group" + work, e.getCause());
3092         } catch (InterruptedException e) {
3093           LOG.error("Waiting for overlap merges was interrupted", e);
3094           return false;
3095         }
3096       }
3097       return true;
3098     }
3099 
3100     /**
3101      * This dumps data in a visually reasonable way for visual debugging
3102      *
3103      * @param splits
3104      * @param regions
3105      */
3106     void dump(SortedSet<byte[]> splits, Multimap<byte[], HbckInfo> regions) {
3107       // we display this way because the last end key should be displayed as well.
3108       StringBuilder sb = new StringBuilder();
3109       for (byte[] k : splits) {
3110         sb.setLength(0); // clear out existing buffer, if any.
3111         sb.append(Bytes.toStringBinary(k) + ":\t");
3112         for (HbckInfo r : regions.get(k)) {
3113           sb.append("[ "+ r.toString() + ", "
3114               + Bytes.toStringBinary(r.getEndKey())+ "]\t");
3115         }
3116         errors.print(sb.toString());
3117       }
3118     }
3119   }
3120 
3121   public void dumpOverlapProblems(Multimap<byte[], HbckInfo> regions) {
3122     // we display this way because the last end key should be displayed as
3123     // well.
3124     for (byte[] k : regions.keySet()) {
3125       errors.print(Bytes.toStringBinary(k) + ":");
3126       for (HbckInfo r : regions.get(k)) {
3127         errors.print("[ " + r.toString() + ", "
3128             + Bytes.toStringBinary(r.getEndKey()) + "]");
3129       }
3130       errors.print("----");
3131     }
3132   }
3133 
3134   public void dumpSidelinedRegions(Map<Path, HbckInfo> regions) {
3135     for (Map.Entry<Path, HbckInfo> entry: regions.entrySet()) {
3136       TableName tableName = entry.getValue().getTableName();
3137       Path path = entry.getKey();
3138       errors.print("This sidelined region dir should be bulk loaded: "
3139         + path.toString());
3140       errors.print("Bulk load command looks like: "
3141         + "hbase org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles "
3142         + path.toUri().getPath() + " "+ tableName);
3143     }
3144   }
3145 
3146   public Multimap<byte[], HbckInfo> getOverlapGroups(
3147       TableName table) {
3148     TableInfo ti = tablesInfo.get(table);
3149     return ti.overlapGroups;
3150   }
3151 
3152   /**
3153    * Return a list of user-space table names whose metadata have not been
3154    * modified in the last few milliseconds specified by timelag
3155    * if any of the REGIONINFO_QUALIFIER, SERVER_QUALIFIER, STARTCODE_QUALIFIER,
3156    * SPLITA_QUALIFIER, SPLITB_QUALIFIER have not changed in the last
3157    * milliseconds specified by timelag, then the table is a candidate to be returned.
3158    * @return tables that have not been modified recently
3159    * @throws IOException if an error is encountered
3160    */
3161   HTableDescriptor[] getTables(AtomicInteger numSkipped) {
3162     List<TableName> tableNames = new ArrayList<TableName>();
3163     long now = EnvironmentEdgeManager.currentTime();
3164 
3165     for (HbckInfo hbi : regionInfoMap.values()) {
3166       MetaEntry info = hbi.metaEntry;
3167 
3168       // if the start key is zero, then we have found the first region of a table.
3169       // pick only those tables that were not modified in the last few milliseconds.
3170       if (info != null && info.getStartKey().length == 0 && !info.isMetaRegion()) {
3171         if (info.modTime + timelag < now) {
3172           tableNames.add(info.getTable());
3173         } else {
3174           numSkipped.incrementAndGet(); // one more in-flux table
3175         }
3176       }
3177     }
3178     return getHTableDescriptors(tableNames);
3179   }
3180 
3181   HTableDescriptor[] getHTableDescriptors(List<TableName> tableNames) {
3182     HTableDescriptor[] htd = new HTableDescriptor[0];
3183     Admin admin = null;
3184     try {
3185       LOG.info("getHTableDescriptors == tableNames => " + tableNames);
3186       admin = new HBaseAdmin(getConf());
3187       htd = admin.getTableDescriptorsByTableName(tableNames);
3188     } catch (IOException e) {
3189       LOG.debug("Exception getting table descriptors", e);
3190     } finally {
3191       if (admin != null) {
3192         try {
3193           admin.close();
3194         } catch (IOException e) {
3195           LOG.debug("Exception closing HBaseAdmin", e);
3196         }
3197       }
3198     }
3199     return htd;
3200   }
3201 
3202   /**
3203    * Gets the entry in regionInfo corresponding to the the given encoded
3204    * region name. If the region has not been seen yet, a new entry is added
3205    * and returned.
3206    */
3207   private synchronized HbckInfo getOrCreateInfo(String name) {
3208     HbckInfo hbi = regionInfoMap.get(name);
3209     if (hbi == null) {
3210       hbi = new HbckInfo(null);
3211       regionInfoMap.put(name, hbi);
3212     }
3213     return hbi;
3214   }
3215 
3216   private void checkAndFixTableLocks() throws IOException {
3217     ZooKeeperWatcher zkw = createZooKeeperWatcher();
3218 
3219     try {
3220       TableLockChecker checker = new TableLockChecker(zkw, errors);
3221       checker.checkTableLocks();
3222 
3223       if (this.fixTableLocks) {
3224         checker.fixExpiredTableLocks();
3225       }
3226     } finally {
3227       zkw.close();
3228     }
3229   }
3230 
3231   /**
3232    * Check whether a orphaned table ZNode exists and fix it if requested.
3233    * @throws IOException
3234    * @throws KeeperException
3235    * @throws InterruptedException
3236    */
3237   private void checkAndFixOrphanedTableZNodes()
3238       throws IOException, KeeperException, InterruptedException {
3239     ZooKeeperWatcher zkw = createZooKeeperWatcher();
3240 
3241     try {
3242       Set<TableName> enablingTables = ZKTableStateClientSideReader.getEnablingTables(zkw);
3243       String msg;
3244       TableInfo tableInfo;
3245 
3246       for (TableName tableName : enablingTables) {
3247         // Check whether the table exists in hbase
3248         tableInfo = tablesInfo.get(tableName);
3249         if (tableInfo != null) {
3250           // Table exists.  This table state is in transit.  No problem for this table.
3251           continue;
3252         }
3253 
3254         msg = "Table " + tableName + " not found in hbase:meta. Orphaned table ZNode found.";
3255         LOG.warn(msg);
3256         orphanedTableZNodes.add(tableName);
3257         errors.reportError(ERROR_CODE.ORPHANED_ZK_TABLE_ENTRY, msg);
3258       }
3259 
3260       if (orphanedTableZNodes.size() > 0 && this.fixTableZNodes) {
3261         ZKTableStateManager zkTableStateMgr = new ZKTableStateManager(zkw);
3262 
3263         for (TableName tableName : orphanedTableZNodes) {
3264           try {
3265             // Set the table state to be disabled so that if we made mistake, we can trace
3266             // the history and figure it out.
3267             // Another choice is to call checkAndRemoveTableState() to delete the orphaned ZNode.
3268             // Both approaches works.
3269             zkTableStateMgr.setTableState(tableName, ZooKeeperProtos.Table.State.DISABLED);
3270           } catch (CoordinatedStateException e) {
3271             // This exception should not happen here
3272             LOG.error(
3273               "Got a CoordinatedStateException while fixing the ENABLING table znode " + tableName,
3274               e);
3275           }
3276         }
3277       }
3278     } finally {
3279       zkw.close();
3280     }
3281   }
3282 
3283   /**
3284     * Check values in regionInfo for hbase:meta
3285     * Check if zero or more than one regions with hbase:meta are found.
3286     * If there are inconsistencies (i.e. zero or more than one regions
3287     * pretend to be holding the hbase:meta) try to fix that and report an error.
3288     * @throws IOException from HBaseFsckRepair functions
3289     * @throws KeeperException
3290     * @throws InterruptedException
3291     */
3292   boolean checkMetaRegion() throws IOException, KeeperException, InterruptedException {
3293     Map<Integer, HbckInfo> metaRegions = new HashMap<Integer, HbckInfo>();
3294     for (HbckInfo value : regionInfoMap.values()) {
3295       if (value.metaEntry != null && value.metaEntry.isMetaRegion()) {
3296         metaRegions.put(value.getReplicaId(), value);
3297       }
3298     }
3299     int metaReplication = admin.getTableDescriptor(TableName.META_TABLE_NAME)
3300         .getRegionReplication();
3301     boolean noProblem = true;
3302     // There will be always entries in regionInfoMap corresponding to hbase:meta & its replicas
3303     // Check the deployed servers. It should be exactly one server for each replica.
3304     for (int i = 0; i < metaReplication; i++) {
3305       HbckInfo metaHbckInfo = metaRegions.remove(i);
3306       List<ServerName> servers = new ArrayList<ServerName>();
3307       if (metaHbckInfo != null) {
3308         servers = metaHbckInfo.deployedOn;
3309       }
3310       if (servers.size() != 1) {
3311         noProblem = false;
3312         if (servers.size() == 0) {
3313           assignMetaReplica(i);
3314         } else if (servers.size() > 1) {
3315           errors
3316           .reportError(ERROR_CODE.MULTI_META_REGION, "hbase:meta, replicaId " +
3317                        metaHbckInfo.getReplicaId() + " is found on more than one region.");
3318           if (shouldFixAssignments()) {
3319             errors.print("Trying to fix a problem with hbase:meta, replicaId " +
3320                          metaHbckInfo.getReplicaId() +"..");
3321             setShouldRerun();
3322             // try fix it (treat is a dupe assignment)
3323             HBaseFsckRepair.fixMultiAssignment(connection, metaHbckInfo.metaEntry, servers);
3324           }
3325         }
3326       }
3327     }
3328     // unassign whatever is remaining in metaRegions. They are excess replicas.
3329     for (Map.Entry<Integer, HbckInfo> entry : metaRegions.entrySet()) {
3330       noProblem = false;
3331       errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
3332           "hbase:meta replicas are deployed in excess. Configured " + metaReplication +
3333           ", deployed " + metaRegions.size());
3334       if (shouldFixAssignments()) {
3335         errors.print("Trying to undeploy excess replica, replicaId: " + entry.getKey() +
3336             " of hbase:meta..");
3337         setShouldRerun();
3338         unassignMetaReplica(entry.getValue());
3339       }
3340     }
3341     // if noProblem is false, rerun hbck with hopefully fixed META
3342     // if noProblem is true, no errors, so continue normally
3343     return noProblem;
3344   }
3345 
3346   private void unassignMetaReplica(HbckInfo hi) throws IOException, InterruptedException,
3347   KeeperException {
3348     undeployRegions(hi);
3349     ZooKeeperWatcher zkw = createZooKeeperWatcher();
3350     try {
3351       ZKUtil.deleteNode(zkw, zkw.getZNodeForReplica(hi.metaEntry.getReplicaId()));
3352     } finally {
3353       zkw.close();
3354     }
3355   }
3356 
3357   private void assignMetaReplica(int replicaId)
3358       throws IOException, KeeperException, InterruptedException {
3359     errors.reportError(ERROR_CODE.NO_META_REGION, "hbase:meta, replicaId " +
3360         replicaId +" is not found on any region.");
3361     if (shouldFixAssignments()) {
3362       errors.print("Trying to fix a problem with hbase:meta..");
3363       setShouldRerun();
3364       // try to fix it (treat it as unassigned region)
3365       HRegionInfo h = RegionReplicaUtil.getRegionInfoForReplica(
3366           HRegionInfo.FIRST_META_REGIONINFO, replicaId);
3367       HBaseFsckRepair.fixUnassigned(admin, h);
3368       HBaseFsckRepair.waitUntilAssigned(admin, h);
3369     }
3370   }
3371 
3372   /**
3373    * Scan hbase:meta, adding all regions found to the regionInfo map.
3374    * @throws IOException if an error is encountered
3375    */
3376   boolean loadMetaEntries() throws IOException {
3377     MetaScannerVisitor visitor = new MetaScannerVisitorBase() {
3378       int countRecord = 1;
3379 
3380       // comparator to sort KeyValues with latest modtime
3381       final Comparator<Cell> comp = new Comparator<Cell>() {
3382         @Override
3383         public int compare(Cell k1, Cell k2) {
3384           return (int)(k1.getTimestamp() - k2.getTimestamp());
3385         }
3386       };
3387 
3388       @Override
3389       public boolean processRow(Result result) throws IOException {
3390         try {
3391 
3392           // record the latest modification of this META record
3393           long ts =  Collections.max(result.listCells(), comp).getTimestamp();
3394           RegionLocations rl = MetaTableAccessor.getRegionLocations(result);
3395           if (rl == null) {
3396             emptyRegionInfoQualifiers.add(result);
3397             errors.reportError(ERROR_CODE.EMPTY_META_CELL,
3398               "Empty REGIONINFO_QUALIFIER found in hbase:meta");
3399             return true;
3400           }
3401           ServerName sn = null;
3402           if (rl.getRegionLocation(HRegionInfo.DEFAULT_REPLICA_ID) == null ||
3403               rl.getRegionLocation(HRegionInfo.DEFAULT_REPLICA_ID).getRegionInfo() == null) {
3404             emptyRegionInfoQualifiers.add(result);
3405             errors.reportError(ERROR_CODE.EMPTY_META_CELL,
3406               "Empty REGIONINFO_QUALIFIER found in hbase:meta");
3407             return true;
3408           }
3409           HRegionInfo hri = rl.getRegionLocation(HRegionInfo.DEFAULT_REPLICA_ID).getRegionInfo();
3410           if (!(isTableIncluded(hri.getTable())
3411               || hri.isMetaRegion())) {
3412             return true;
3413           }
3414           PairOfSameType<HRegionInfo> daughters = HRegionInfo.getDaughterRegions(result);
3415           for (HRegionLocation h : rl.getRegionLocations()) {
3416             if (h == null || h.getRegionInfo() == null) {
3417               continue;
3418             }
3419             sn = h.getServerName();
3420             hri = h.getRegionInfo();
3421 
3422             MetaEntry m = null;
3423             if (hri.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
3424               m = new MetaEntry(hri, sn, ts, daughters.getFirst(), daughters.getSecond());
3425             } else {
3426               m = new MetaEntry(hri, sn, ts, null, null);
3427             }
3428             HbckInfo previous = regionInfoMap.get(hri.getEncodedName());
3429             if (previous == null) {
3430               regionInfoMap.put(hri.getEncodedName(), new HbckInfo(m));
3431             } else if (previous.metaEntry == null) {
3432               previous.metaEntry = m;
3433             } else {
3434               throw new IOException("Two entries in hbase:meta are same " + previous);
3435             }
3436           }
3437           PairOfSameType<HRegionInfo> mergeRegions = HRegionInfo.getMergeRegions(result);
3438           for (HRegionInfo mergeRegion : new HRegionInfo[] {
3439               mergeRegions.getFirst(), mergeRegions.getSecond() }) {
3440             if (mergeRegion != null) {
3441               // This region is already been merged
3442               HbckInfo hbInfo = getOrCreateInfo(mergeRegion.getEncodedName());
3443               hbInfo.setMerged(true);
3444             }
3445           }
3446 
3447           // show proof of progress to the user, once for every 100 records.
3448           if (countRecord % 100 == 0) {
3449             errors.progress();
3450           }
3451           countRecord++;
3452           return true;
3453         } catch (RuntimeException e) {
3454           LOG.error("Result=" + result);
3455           throw e;
3456         }
3457       }
3458     };
3459     if (!checkMetaOnly) {
3460       // Scan hbase:meta to pick up user regions
3461       MetaScanner.metaScan(connection, visitor);
3462     }
3463 
3464     errors.print("");
3465     return true;
3466   }
3467 
3468   /**
3469    * Stores the regioninfo entries scanned from META
3470    */
3471   static class MetaEntry extends HRegionInfo {
3472     ServerName regionServer;   // server hosting this region
3473     long modTime;          // timestamp of most recent modification metadata
3474     HRegionInfo splitA, splitB; //split daughters
3475 
3476     public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime) {
3477       this(rinfo, regionServer, modTime, null, null);
3478     }
3479 
3480     public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime,
3481         HRegionInfo splitA, HRegionInfo splitB) {
3482       super(rinfo);
3483       this.regionServer = regionServer;
3484       this.modTime = modTime;
3485       this.splitA = splitA;
3486       this.splitB = splitB;
3487     }
3488 
3489     @Override
3490     public boolean equals(Object o) {
3491       boolean superEq = super.equals(o);
3492       if (!superEq) {
3493         return superEq;
3494       }
3495 
3496       MetaEntry me = (MetaEntry) o;
3497       if (!regionServer.equals(me.regionServer)) {
3498         return false;
3499       }
3500       return (modTime == me.modTime);
3501     }
3502 
3503     @Override
3504     public int hashCode() {
3505       int hash = Arrays.hashCode(getRegionName());
3506       hash ^= getRegionId();
3507       hash ^= Arrays.hashCode(getStartKey());
3508       hash ^= Arrays.hashCode(getEndKey());
3509       hash ^= Boolean.valueOf(isOffline()).hashCode();
3510       hash ^= getTable().hashCode();
3511       if (regionServer != null) {
3512         hash ^= regionServer.hashCode();
3513       }
3514       hash ^= modTime;
3515       return hash;
3516     }
3517   }
3518 
3519   /**
3520    * Stores the regioninfo entries from HDFS
3521    */
3522   static class HdfsEntry {
3523     HRegionInfo hri;
3524     Path hdfsRegionDir = null;
3525     long hdfsRegionDirModTime  = 0;
3526     boolean hdfsRegioninfoFilePresent = false;
3527     boolean hdfsOnlyEdits = false;
3528   }
3529 
3530   /**
3531    * Stores the regioninfo retrieved from Online region servers.
3532    */
3533   static class OnlineEntry {
3534     HRegionInfo hri;
3535     ServerName hsa;
3536 
3537     @Override
3538     public String toString() {
3539       return hsa.toString() + ";" + hri.getRegionNameAsString();
3540     }
3541   }
3542 
3543   /**
3544    * Maintain information about a particular region.  It gathers information
3545    * from three places -- HDFS, META, and region servers.
3546    */
3547   public static class HbckInfo implements KeyRange {
3548     private MetaEntry metaEntry = null; // info in META
3549     private HdfsEntry hdfsEntry = null; // info in HDFS
3550     private List<OnlineEntry> deployedEntries = Lists.newArrayList(); // on Region Server
3551     private List<ServerName> deployedOn = Lists.newArrayList(); // info on RS's
3552     private boolean skipChecks = false; // whether to skip further checks to this region info.
3553     private boolean isMerged = false;// whether this region has already been merged into another one
3554     private int deployedReplicaId = HRegionInfo.DEFAULT_REPLICA_ID;
3555     private HRegionInfo primaryHRIForDeployedReplica = null;
3556 
3557     HbckInfo(MetaEntry metaEntry) {
3558       this.metaEntry = metaEntry;
3559     }
3560 
3561     public int getReplicaId() {
3562       if (metaEntry != null) return metaEntry.getReplicaId();
3563       return deployedReplicaId;
3564     }
3565 
3566     public synchronized void addServer(HRegionInfo hri, ServerName server) {
3567       OnlineEntry rse = new OnlineEntry() ;
3568       rse.hri = hri;
3569       rse.hsa = server;
3570       this.deployedEntries.add(rse);
3571       this.deployedOn.add(server);
3572       // save the replicaId that we see deployed in the cluster
3573       this.deployedReplicaId = hri.getReplicaId();
3574       this.primaryHRIForDeployedReplica =
3575           RegionReplicaUtil.getRegionInfoForDefaultReplica(hri);
3576     }
3577 
3578     @Override
3579     public synchronized String toString() {
3580       StringBuilder sb = new StringBuilder();
3581       sb.append("{ meta => ");
3582       sb.append((metaEntry != null)? metaEntry.getRegionNameAsString() : "null");
3583       sb.append( ", hdfs => " + getHdfsRegionDir());
3584       sb.append( ", deployed => " + Joiner.on(", ").join(deployedEntries));
3585       sb.append( ", replicaId => " + getReplicaId());
3586       sb.append(" }");
3587       return sb.toString();
3588     }
3589 
3590     @Override
3591     public byte[] getStartKey() {
3592       if (this.metaEntry != null) {
3593         return this.metaEntry.getStartKey();
3594       } else if (this.hdfsEntry != null) {
3595         return this.hdfsEntry.hri.getStartKey();
3596       } else {
3597         LOG.error("Entry " + this + " has no meta or hdfs region start key.");
3598         return null;
3599       }
3600     }
3601 
3602     @Override
3603     public byte[] getEndKey() {
3604       if (this.metaEntry != null) {
3605         return this.metaEntry.getEndKey();
3606       } else if (this.hdfsEntry != null) {
3607         return this.hdfsEntry.hri.getEndKey();
3608       } else {
3609         LOG.error("Entry " + this + " has no meta or hdfs region start key.");
3610         return null;
3611       }
3612     }
3613 
3614     public TableName getTableName() {
3615       if (this.metaEntry != null) {
3616         return this.metaEntry.getTable();
3617       } else if (this.hdfsEntry != null) {
3618         // we are only guaranteed to have a path and not an HRI for hdfsEntry,
3619         // so we get the name from the Path
3620         Path tableDir = this.hdfsEntry.hdfsRegionDir.getParent();
3621         return FSUtils.getTableName(tableDir);
3622       } else {
3623         // return the info from the first online/deployed hri
3624         for (OnlineEntry e : deployedEntries) {
3625           return e.hri.getTable();
3626         }
3627         return null;
3628       }
3629     }
3630 
3631     public String getRegionNameAsString() {
3632       if (metaEntry != null) {
3633         return metaEntry.getRegionNameAsString();
3634       } else if (hdfsEntry != null) {
3635         if (hdfsEntry.hri != null) {
3636           return hdfsEntry.hri.getRegionNameAsString();
3637         }
3638       } else {
3639         // return the info from the first online/deployed hri
3640         for (OnlineEntry e : deployedEntries) {
3641           return e.hri.getRegionNameAsString();
3642         }
3643       }
3644       return null;
3645     }
3646 
3647     public byte[] getRegionName() {
3648       if (metaEntry != null) {
3649         return metaEntry.getRegionName();
3650       } else if (hdfsEntry != null) {
3651         return hdfsEntry.hri.getRegionName();
3652       } else {
3653         // return the info from the first online/deployed hri
3654         for (OnlineEntry e : deployedEntries) {
3655           return e.hri.getRegionName();
3656         }
3657         return null;
3658       }
3659     }
3660 
3661     public HRegionInfo getPrimaryHRIForDeployedReplica() {
3662       return primaryHRIForDeployedReplica;
3663     }
3664 
3665     Path getHdfsRegionDir() {
3666       if (hdfsEntry == null) {
3667         return null;
3668       }
3669       return hdfsEntry.hdfsRegionDir;
3670     }
3671 
3672     boolean containsOnlyHdfsEdits() {
3673       if (hdfsEntry == null) {
3674         return false;
3675       }
3676       return hdfsEntry.hdfsOnlyEdits;
3677     }
3678 
3679     boolean isHdfsRegioninfoPresent() {
3680       if (hdfsEntry == null) {
3681         return false;
3682       }
3683       return hdfsEntry.hdfsRegioninfoFilePresent;
3684     }
3685 
3686     long getModTime() {
3687       if (hdfsEntry == null) {
3688         return 0;
3689       }
3690       return hdfsEntry.hdfsRegionDirModTime;
3691     }
3692 
3693     HRegionInfo getHdfsHRI() {
3694       if (hdfsEntry == null) {
3695         return null;
3696       }
3697       return hdfsEntry.hri;
3698     }
3699 
3700     public void setSkipChecks(boolean skipChecks) {
3701       this.skipChecks = skipChecks;
3702     }
3703 
3704     public boolean isSkipChecks() {
3705       return skipChecks;
3706     }
3707 
3708     public void setMerged(boolean isMerged) {
3709       this.isMerged = isMerged;
3710     }
3711 
3712     public boolean isMerged() {
3713       return this.isMerged;
3714     }
3715   }
3716 
3717   final static Comparator<HbckInfo> cmp = new Comparator<HbckInfo>() {
3718     @Override
3719     public int compare(HbckInfo l, HbckInfo r) {
3720       if (l == r) {
3721         // same instance
3722         return 0;
3723       }
3724 
3725       int tableCompare = l.getTableName().compareTo(r.getTableName());
3726       if (tableCompare != 0) {
3727         return tableCompare;
3728       }
3729 
3730       int startComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
3731           l.getStartKey(), r.getStartKey());
3732       if (startComparison != 0) {
3733         return startComparison;
3734       }
3735 
3736       // Special case for absolute endkey
3737       byte[] endKey = r.getEndKey();
3738       endKey = (endKey.length == 0) ? null : endKey;
3739       byte[] endKey2 = l.getEndKey();
3740       endKey2 = (endKey2.length == 0) ? null : endKey2;
3741       int endComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
3742           endKey2,  endKey);
3743 
3744       if (endComparison != 0) {
3745         return endComparison;
3746       }
3747 
3748       // use regionId as tiebreaker.
3749       // Null is considered after all possible values so make it bigger.
3750       if (l.hdfsEntry == null && r.hdfsEntry == null) {
3751         return 0;
3752       }
3753       if (l.hdfsEntry == null && r.hdfsEntry != null) {
3754         return 1;
3755       }
3756       // l.hdfsEntry must not be null
3757       if (r.hdfsEntry == null) {
3758         return -1;
3759       }
3760       // both l.hdfsEntry and r.hdfsEntry must not be null.
3761       return (int) (l.hdfsEntry.hri.getRegionId()- r.hdfsEntry.hri.getRegionId());
3762     }
3763   };
3764 
3765   /**
3766    * Prints summary of all tables found on the system.
3767    */
3768   private void printTableSummary(SortedMap<TableName, TableInfo> tablesInfo) {
3769     StringBuilder sb = new StringBuilder();
3770     int numOfSkippedRegions;
3771     errors.print("Summary:");
3772     for (TableInfo tInfo : tablesInfo.values()) {
3773       numOfSkippedRegions = (skippedRegions.containsKey(tInfo.getName())) ?
3774           skippedRegions.get(tInfo.getName()).size() : 0;
3775 
3776       if (errors.tableHasErrors(tInfo)) {
3777         errors.print("Table " + tInfo.getName() + " is inconsistent.");
3778       } else if (numOfSkippedRegions > 0){
3779         errors.print("Table " + tInfo.getName() + " is okay (with "
3780           + numOfSkippedRegions + " skipped regions).");
3781       }
3782       else {
3783         errors.print("Table " + tInfo.getName() + " is okay.");
3784       }
3785       errors.print("    Number of regions: " + tInfo.getNumRegions());
3786       if (numOfSkippedRegions > 0) {
3787         Set<String> skippedRegionStrings = skippedRegions.get(tInfo.getName());
3788         System.out.println("    Number of skipped regions: " + numOfSkippedRegions);
3789         System.out.println("      List of skipped regions:");
3790         for(String sr : skippedRegionStrings) {
3791           System.out.println("        " + sr);
3792         }
3793       }
3794       sb.setLength(0); // clear out existing buffer, if any.
3795       sb.append("    Deployed on: ");
3796       for (ServerName server : tInfo.deployedOn) {
3797         sb.append(" " + server.toString());
3798       }
3799       errors.print(sb.toString());
3800     }
3801   }
3802 
3803   static ErrorReporter getErrorReporter(
3804       final Configuration conf) throws ClassNotFoundException {
3805     Class<? extends ErrorReporter> reporter = conf.getClass("hbasefsck.errorreporter", PrintingErrorReporter.class, ErrorReporter.class);
3806     return ReflectionUtils.newInstance(reporter, conf);
3807   }
3808 
3809   public interface ErrorReporter {
3810     enum ERROR_CODE {
3811       UNKNOWN, NO_META_REGION, NULL_META_REGION, NO_VERSION_FILE, NOT_IN_META_HDFS, NOT_IN_META,
3812       NOT_IN_META_OR_DEPLOYED, NOT_IN_HDFS_OR_DEPLOYED, NOT_IN_HDFS, SERVER_DOES_NOT_MATCH_META, NOT_DEPLOYED,
3813       MULTI_DEPLOYED, SHOULD_NOT_BE_DEPLOYED, MULTI_META_REGION, RS_CONNECT_FAILURE,
3814       FIRST_REGION_STARTKEY_NOT_EMPTY, LAST_REGION_ENDKEY_NOT_EMPTY, DUPE_STARTKEYS,
3815       HOLE_IN_REGION_CHAIN, OVERLAP_IN_REGION_CHAIN, REGION_CYCLE, DEGENERATE_REGION,
3816       ORPHAN_HDFS_REGION, LINGERING_SPLIT_PARENT, NO_TABLEINFO_FILE, LINGERING_REFERENCE_HFILE,
3817       WRONG_USAGE, EMPTY_META_CELL, EXPIRED_TABLE_LOCK, ORPHANED_ZK_TABLE_ENTRY, BOUNDARIES_ERROR
3818     }
3819     void clear();
3820     void report(String message);
3821     void reportError(String message);
3822     void reportError(ERROR_CODE errorCode, String message);
3823     void reportError(ERROR_CODE errorCode, String message, TableInfo table);
3824     void reportError(ERROR_CODE errorCode, String message, TableInfo table, HbckInfo info);
3825     void reportError(
3826       ERROR_CODE errorCode,
3827       String message,
3828       TableInfo table,
3829       HbckInfo info1,
3830       HbckInfo info2
3831     );
3832     int summarize();
3833     void detail(String details);
3834     ArrayList<ERROR_CODE> getErrorList();
3835     void progress();
3836     void print(String message);
3837     void resetErrors();
3838     boolean tableHasErrors(TableInfo table);
3839   }
3840 
3841   static class PrintingErrorReporter implements ErrorReporter {
3842     public int errorCount = 0;
3843     private int showProgress;
3844     // How frequently calls to progress() will create output
3845     private static final int progressThreshold = 100;
3846 
3847     Set<TableInfo> errorTables = new HashSet<TableInfo>();
3848 
3849     // for use by unit tests to verify which errors were discovered
3850     private ArrayList<ERROR_CODE> errorList = new ArrayList<ERROR_CODE>();
3851 
3852     @Override
3853     public void clear() {
3854       errorTables.clear();
3855       errorList.clear();
3856       errorCount = 0;
3857     }
3858 
3859     @Override
3860     public synchronized void reportError(ERROR_CODE errorCode, String message) {
3861       if (errorCode == ERROR_CODE.WRONG_USAGE) {
3862         System.err.println(message);
3863         return;
3864       }
3865 
3866       errorList.add(errorCode);
3867       if (!summary) {
3868         System.out.println("ERROR: " + message);
3869       }
3870       errorCount++;
3871       showProgress = 0;
3872     }
3873 
3874     @Override
3875     public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table) {
3876       errorTables.add(table);
3877       reportError(errorCode, message);
3878     }
3879 
3880     @Override
3881     public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
3882                                          HbckInfo info) {
3883       errorTables.add(table);
3884       String reference = "(region " + info.getRegionNameAsString() + ")";
3885       reportError(errorCode, reference + " " + message);
3886     }
3887 
3888     @Override
3889     public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
3890                                          HbckInfo info1, HbckInfo info2) {
3891       errorTables.add(table);
3892       String reference = "(regions " + info1.getRegionNameAsString()
3893           + " and " + info2.getRegionNameAsString() + ")";
3894       reportError(errorCode, reference + " " + message);
3895     }
3896 
3897     @Override
3898     public synchronized void reportError(String message) {
3899       reportError(ERROR_CODE.UNKNOWN, message);
3900     }
3901 
3902     /**
3903      * Report error information, but do not increment the error count.  Intended for cases
3904      * where the actual error would have been reported previously.
3905      * @param message
3906      */
3907     @Override
3908     public synchronized void report(String message) {
3909       if (! summary) {
3910         System.out.println("ERROR: " + message);
3911       }
3912       showProgress = 0;
3913     }
3914 
3915     @Override
3916     public synchronized int summarize() {
3917       System.out.println(Integer.toString(errorCount) +
3918                          " inconsistencies detected.");
3919       if (errorCount == 0) {
3920         System.out.println("Status: OK");
3921         return 0;
3922       } else {
3923         System.out.println("Status: INCONSISTENT");
3924         return -1;
3925       }
3926     }
3927 
3928     @Override
3929     public ArrayList<ERROR_CODE> getErrorList() {
3930       return errorList;
3931     }
3932 
3933     @Override
3934     public synchronized void print(String message) {
3935       if (!summary) {
3936         System.out.println(message);
3937       }
3938     }
3939 
3940     @Override
3941     public boolean tableHasErrors(TableInfo table) {
3942       return errorTables.contains(table);
3943     }
3944 
3945     @Override
3946     public void resetErrors() {
3947       errorCount = 0;
3948     }
3949 
3950     @Override
3951     public synchronized void detail(String message) {
3952       if (details) {
3953         System.out.println(message);
3954       }
3955       showProgress = 0;
3956     }
3957 
3958     @Override
3959     public synchronized void progress() {
3960       if (showProgress++ == progressThreshold) {
3961         if (!summary) {
3962           System.out.print(".");
3963         }
3964         showProgress = 0;
3965       }
3966     }
3967   }
3968 
3969   /**
3970    * Contact a region server and get all information from it
3971    */
3972   static class WorkItemRegion implements Callable<Void> {
3973     private HBaseFsck hbck;
3974     private ServerName rsinfo;
3975     private ErrorReporter errors;
3976     private HConnection connection;
3977 
3978     WorkItemRegion(HBaseFsck hbck, ServerName info,
3979                    ErrorReporter errors, HConnection connection) {
3980       this.hbck = hbck;
3981       this.rsinfo = info;
3982       this.errors = errors;
3983       this.connection = connection;
3984     }
3985 
3986     @Override
3987     public synchronized Void call() throws IOException {
3988       errors.progress();
3989       try {
3990         BlockingInterface server = connection.getAdmin(rsinfo);
3991 
3992         // list all online regions from this region server
3993         List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(server);
3994         regions = filterRegions(regions);
3995 
3996         if (details) {
3997           errors.detail("RegionServer: " + rsinfo.getServerName() +
3998                            " number of regions: " + regions.size());
3999           for (HRegionInfo rinfo: regions) {
4000             errors.detail("  " + rinfo.getRegionNameAsString() +
4001                              " id: " + rinfo.getRegionId() +
4002                              " encoded_name: " + rinfo.getEncodedName() +
4003                              " start: " + Bytes.toStringBinary(rinfo.getStartKey()) +
4004                              " end: " + Bytes.toStringBinary(rinfo.getEndKey()));
4005           }
4006         }
4007 
4008         // check to see if the existence of this region matches the region in META
4009         for (HRegionInfo r:regions) {
4010           HbckInfo hbi = hbck.getOrCreateInfo(r.getEncodedName());
4011           hbi.addServer(r, rsinfo);
4012         }
4013       } catch (IOException e) {          // unable to connect to the region server.
4014         errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "RegionServer: " + rsinfo.getServerName() +
4015           " Unable to fetch region information. " + e);
4016         throw e;
4017       }
4018       return null;
4019     }
4020 
4021     private List<HRegionInfo> filterRegions(List<HRegionInfo> regions) {
4022       List<HRegionInfo> ret = Lists.newArrayList();
4023       for (HRegionInfo hri : regions) {
4024         if (hri.isMetaTable() || (!hbck.checkMetaOnly
4025             && hbck.isTableIncluded(hri.getTable()))) {
4026           ret.add(hri);
4027         }
4028       }
4029       return ret;
4030     }
4031   }
4032 
4033   /**
4034    * Contact hdfs and get all information about specified table directory into
4035    * regioninfo list.
4036    */
4037   class WorkItemHdfsDir implements Callable<Void> {
4038     private FileStatus tableDir;
4039     private ErrorReporter errors;
4040     private FileSystem fs;
4041 
4042     WorkItemHdfsDir(FileSystem fs, ErrorReporter errors,
4043                     FileStatus status) {
4044       this.fs = fs;
4045       this.tableDir = status;
4046       this.errors = errors;
4047     }
4048 
4049     @Override
4050     public synchronized Void call() throws InterruptedException, ExecutionException {
4051       final Vector<Exception> exceptions = new Vector<Exception>();
4052       try {
4053         // level 2: <HBASE_DIR>/<table>/*
4054         final FileStatus[] regionDirs = fs.listStatus(tableDir.getPath());
4055         final List<Future<?>> futures = new ArrayList<Future<?>>(regionDirs.length);
4056         for (final FileStatus regionDir : regionDirs) {
4057           errors.progress();
4058           final String encodedName = regionDir.getPath().getName();
4059           // ignore directories that aren't hexadecimal
4060           if (!encodedName.toLowerCase().matches("[0-9a-f]+")) {
4061             continue;
4062           }
4063 
4064           if (!exceptions.isEmpty()) {
4065             break;
4066           }
4067 
4068           futures.add(executor.submit(new Runnable() {
4069             @Override
4070             public void run() {
4071               try {
4072                 LOG.debug("Loading region info from hdfs:"+ regionDir.getPath());
4073                 Path regioninfoFile = new Path(regionDir.getPath(),
4074                     HRegionFileSystem.REGION_INFO_FILE);
4075                 boolean regioninfoFileExists = fs.exists(regioninfoFile);
4076 
4077                 if (!regioninfoFileExists) {
4078                   // As tables become larger it is more and more likely that by the time you
4079                   // reach a given region that it will be gone due to region splits/merges.
4080                   if (!fs.exists(regionDir.getPath())) {
4081                     LOG.warn("By the time we tried to process this region dir it was already gone: "
4082                         + regionDir.getPath());
4083                     return;
4084                   }
4085                 }
4086                 HbckInfo hbi = HBaseFsck.this.getOrCreateInfo(encodedName);
4087                 HdfsEntry he = new HdfsEntry();
4088                 synchronized (hbi) {
4089                   if (hbi.getHdfsRegionDir() != null) {
4090                     errors.print("Directory " + encodedName + " duplicate??" +
4091                         hbi.getHdfsRegionDir());
4092                   }
4093 
4094                   he.hdfsRegionDir = regionDir.getPath();
4095                   he.hdfsRegionDirModTime = regionDir.getModificationTime();
4096                   he.hdfsRegioninfoFilePresent = regioninfoFileExists;
4097                   // we add to orphan list when we attempt to read .regioninfo
4098 
4099                   // Set a flag if this region contains only edits
4100                   // This is special case if a region is left after split
4101                   he.hdfsOnlyEdits = true;
4102                   FileStatus[] subDirs = fs.listStatus(regionDir.getPath());
4103                   Path ePath = WALSplitter.getRegionDirRecoveredEditsDir(regionDir.getPath());
4104                   for (FileStatus subDir : subDirs) {
4105                     errors.progress();
4106                     String sdName = subDir.getPath().getName();
4107                     if (!sdName.startsWith(".") && !sdName.equals(ePath.getName())) {
4108                       he.hdfsOnlyEdits = false;
4109                       break;
4110                     }
4111                   }
4112                   hbi.hdfsEntry = he;
4113                 }
4114               } catch (Exception e) {
4115                 LOG.error("Could not load region dir", e);
4116                 exceptions.add(e);
4117               }
4118             }
4119           }));
4120         }
4121         // Ensure all pending tasks are complete (or that we run into an exception)
4122         for (Future<?> f : futures) {
4123           if (!exceptions.isEmpty()) {
4124             break;
4125           }
4126           try {
4127             f.get();
4128           } catch (ExecutionException e) {
4129             LOG.error("Unexpected exec exception!  Should've been caught already.  (Bug?)", e);
4130             // Shouldn't happen, we already logged/caught any exceptions in the Runnable
4131           };
4132         }
4133       } catch (IOException e) {
4134         LOG.error("Cannot execute WorkItemHdfsDir for " + tableDir, e);
4135         exceptions.add(e);
4136       } finally {
4137         if (!exceptions.isEmpty()) {
4138           errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "Table Directory: "
4139               + tableDir.getPath().getName()
4140               + " Unable to fetch all HDFS region information. ");
4141           // Just throw the first exception as an indication something bad happened
4142           // Don't need to propagate all the exceptions, we already logged them all anyway
4143           throw new ExecutionException("First exception in WorkItemHdfsDir",
4144               exceptions.firstElement());
4145         }
4146       }
4147       return null;
4148     }
4149   }
4150 
4151   /**
4152    * Contact hdfs and get all information about specified table directory into
4153    * regioninfo list.
4154    */
4155   static class WorkItemHdfsRegionInfo implements Callable<Void> {
4156     private HbckInfo hbi;
4157     private HBaseFsck hbck;
4158     private ErrorReporter errors;
4159 
4160     WorkItemHdfsRegionInfo(HbckInfo hbi, HBaseFsck hbck, ErrorReporter errors) {
4161       this.hbi = hbi;
4162       this.hbck = hbck;
4163       this.errors = errors;
4164     }
4165 
4166     @Override
4167     public synchronized Void call() throws IOException {
4168       // only load entries that haven't been loaded yet.
4169       if (hbi.getHdfsHRI() == null) {
4170         try {
4171           errors.progress();
4172           hbck.loadHdfsRegioninfo(hbi);
4173         } catch (IOException ioe) {
4174           String msg = "Orphan region in HDFS: Unable to load .regioninfo from table "
4175               + hbi.getTableName() + " in hdfs dir "
4176               + hbi.getHdfsRegionDir()
4177               + "!  It may be an invalid format or version file.  Treating as "
4178               + "an orphaned regiondir.";
4179           errors.reportError(ERROR_CODE.ORPHAN_HDFS_REGION, msg);
4180           try {
4181             hbck.debugLsr(hbi.getHdfsRegionDir());
4182           } catch (IOException ioe2) {
4183             LOG.error("Unable to read directory " + hbi.getHdfsRegionDir(), ioe2);
4184             throw ioe2;
4185           }
4186           hbck.orphanHdfsDirs.add(hbi);
4187           throw ioe;
4188         }
4189       }
4190       return null;
4191     }
4192   };
4193 
4194   /**
4195    * Display the full report from fsck. This displays all live and dead region
4196    * servers, and all known regions.
4197    */
4198   public static void setDisplayFullReport() {
4199     details = true;
4200   }
4201 
4202   /**
4203    * Disable the split and merge
4204    */
4205   public static void setDisableSplitAndMerge() {
4206     disableSplitAndMerge = true;
4207   }
4208 
4209   /**
4210    * The split and merge should be disabled if we are modifying HBase.
4211    * It can be disabled if you want to prevent region movement from causing
4212    * false positives.
4213    */
4214   public boolean shouldDisableSplitAndMerge() {
4215     return fixAny || disableSplitAndMerge;
4216   }
4217 
4218   /**
4219    * Set summary mode.
4220    * Print only summary of the tables and status (OK or INCONSISTENT)
4221    */
4222   void setSummary() {
4223     summary = true;
4224   }
4225 
4226   /**
4227    * Set hbase:meta check mode.
4228    * Print only info about hbase:meta table deployment/state
4229    */
4230   void setCheckMetaOnly() {
4231     checkMetaOnly = true;
4232   }
4233 
4234   /**
4235    * Set region boundaries check mode.
4236    */
4237   void setRegionBoundariesCheck() {
4238     checkRegionBoundaries = true;
4239   }
4240 
4241   /**
4242    * Set table locks fix mode.
4243    * Delete table locks held for a long time
4244    */
4245   public void setFixTableLocks(boolean shouldFix) {
4246     fixTableLocks = shouldFix;
4247     fixAny |= shouldFix;
4248   }
4249 
4250   /**
4251    * Set orphaned table ZNodes fix mode.
4252    * Set the table state to disable in the orphaned table ZNode.
4253    */
4254   public void setFixTableZNodes(boolean shouldFix) {
4255     fixTableZNodes = shouldFix;
4256     fixAny |= shouldFix;
4257   }
4258 
4259   /**
4260    * Check if we should rerun fsck again. This checks if we've tried to
4261    * fix something and we should rerun fsck tool again.
4262    * Display the full report from fsck. This displays all live and dead
4263    * region servers, and all known regions.
4264    */
4265   void setShouldRerun() {
4266     rerun = true;
4267   }
4268 
4269   boolean shouldRerun() {
4270     return rerun;
4271   }
4272 
4273   /**
4274    * Fix inconsistencies found by fsck. This should try to fix errors (if any)
4275    * found by fsck utility.
4276    */
4277   public void setFixAssignments(boolean shouldFix) {
4278     fixAssignments = shouldFix;
4279     fixAny |= shouldFix;
4280   }
4281 
4282   boolean shouldFixAssignments() {
4283     return fixAssignments;
4284   }
4285 
4286   public void setFixMeta(boolean shouldFix) {
4287     fixMeta = shouldFix;
4288     fixAny |= shouldFix;
4289   }
4290 
4291   boolean shouldFixMeta() {
4292     return fixMeta;
4293   }
4294 
4295   public void setFixEmptyMetaCells(boolean shouldFix) {
4296     fixEmptyMetaCells = shouldFix;
4297     fixAny |= shouldFix;
4298   }
4299 
4300   boolean shouldFixEmptyMetaCells() {
4301     return fixEmptyMetaCells;
4302   }
4303 
4304   public void setCheckHdfs(boolean checking) {
4305     checkHdfs = checking;
4306   }
4307 
4308   boolean shouldCheckHdfs() {
4309     return checkHdfs;
4310   }
4311 
4312   public void setFixHdfsHoles(boolean shouldFix) {
4313     fixHdfsHoles = shouldFix;
4314     fixAny |= shouldFix;
4315   }
4316 
4317   boolean shouldFixHdfsHoles() {
4318     return fixHdfsHoles;
4319   }
4320 
4321   public void setFixTableOrphans(boolean shouldFix) {
4322     fixTableOrphans = shouldFix;
4323     fixAny |= shouldFix;
4324   }
4325 
4326   boolean shouldFixTableOrphans() {
4327     return fixTableOrphans;
4328   }
4329 
4330   public void setFixHdfsOverlaps(boolean shouldFix) {
4331     fixHdfsOverlaps = shouldFix;
4332     fixAny |= shouldFix;
4333   }
4334 
4335   boolean shouldFixHdfsOverlaps() {
4336     return fixHdfsOverlaps;
4337   }
4338 
4339   public void setFixHdfsOrphans(boolean shouldFix) {
4340     fixHdfsOrphans = shouldFix;
4341     fixAny |= shouldFix;
4342   }
4343 
4344   boolean shouldFixHdfsOrphans() {
4345     return fixHdfsOrphans;
4346   }
4347 
4348   public void setFixVersionFile(boolean shouldFix) {
4349     fixVersionFile = shouldFix;
4350     fixAny |= shouldFix;
4351   }
4352 
4353   public boolean shouldFixVersionFile() {
4354     return fixVersionFile;
4355   }
4356 
4357   public void setSidelineBigOverlaps(boolean sbo) {
4358     this.sidelineBigOverlaps = sbo;
4359   }
4360 
4361   public boolean shouldSidelineBigOverlaps() {
4362     return sidelineBigOverlaps;
4363   }
4364 
4365   public void setFixSplitParents(boolean shouldFix) {
4366     fixSplitParents = shouldFix;
4367     fixAny |= shouldFix;
4368   }
4369 
4370   boolean shouldFixSplitParents() {
4371     return fixSplitParents;
4372   }
4373 
4374   public void setFixReferenceFiles(boolean shouldFix) {
4375     fixReferenceFiles = shouldFix;
4376     fixAny |= shouldFix;
4377   }
4378 
4379   boolean shouldFixReferenceFiles() {
4380     return fixReferenceFiles;
4381   }
4382 
4383   public boolean shouldIgnorePreCheckPermission() {
4384     return !fixAny || ignorePreCheckPermission;
4385   }
4386 
4387   public void setIgnorePreCheckPermission(boolean ignorePreCheckPermission) {
4388     this.ignorePreCheckPermission = ignorePreCheckPermission;
4389   }
4390 
4391   /**
4392    * @param mm maximum number of regions to merge into a single region.
4393    */
4394   public void setMaxMerge(int mm) {
4395     this.maxMerge = mm;
4396   }
4397 
4398   public int getMaxMerge() {
4399     return maxMerge;
4400   }
4401 
4402   public void setMaxOverlapsToSideline(int mo) {
4403     this.maxOverlapsToSideline = mo;
4404   }
4405 
4406   public int getMaxOverlapsToSideline() {
4407     return maxOverlapsToSideline;
4408   }
4409 
4410   /**
4411    * Only check/fix tables specified by the list,
4412    * Empty list means all tables are included.
4413    */
4414   boolean isTableIncluded(TableName table) {
4415     return (tablesIncluded.size() == 0) || tablesIncluded.contains(table);
4416   }
4417 
4418   public void includeTable(TableName table) {
4419     tablesIncluded.add(table);
4420   }
4421 
4422   Set<TableName> getIncludedTables() {
4423     return new HashSet<TableName>(tablesIncluded);
4424   }
4425 
4426   /**
4427    * We are interested in only those tables that have not changed their state in
4428    * hbase:meta during the last few seconds specified by hbase.admin.fsck.timelag
4429    * @param seconds - the time in seconds
4430    */
4431   public void setTimeLag(long seconds) {
4432     timelag = seconds * 1000; // convert to milliseconds
4433   }
4434 
4435   /**
4436    *
4437    * @param sidelineDir - HDFS path to sideline data
4438    */
4439   public void setSidelineDir(String sidelineDir) {
4440     this.sidelineDir = new Path(sidelineDir);
4441   }
4442 
4443   protected HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
4444     return new HFileCorruptionChecker(getConf(), executor, sidelineCorruptHFiles);
4445   }
4446 
4447   public HFileCorruptionChecker getHFilecorruptionChecker() {
4448     return hfcc;
4449   }
4450 
4451   public void setHFileCorruptionChecker(HFileCorruptionChecker hfcc) {
4452     this.hfcc = hfcc;
4453   }
4454 
4455   public void setRetCode(int code) {
4456     this.retcode = code;
4457   }
4458 
4459   public int getRetCode() {
4460     return retcode;
4461   }
4462 
4463   protected HBaseFsck printUsageAndExit() {
4464     StringWriter sw = new StringWriter(2048);
4465     PrintWriter out = new PrintWriter(sw);
4466     out.println("Usage: fsck [opts] {only tables}");
4467     out.println(" where [opts] are:");
4468     out.println("   -help Display help options (this)");
4469     out.println("   -details Display full report of all regions.");
4470     out.println("   -timelag <timeInSeconds>  Process only regions that " +
4471                        " have not experienced any metadata updates in the last " +
4472                        " <timeInSeconds> seconds.");
4473     out.println("   -sleepBeforeRerun <timeInSeconds> Sleep this many seconds" +
4474         " before checking if the fix worked if run with -fix");
4475     out.println("   -summary Print only summary of the tables and status.");
4476     out.println("   -metaonly Only check the state of the hbase:meta table.");
4477     out.println("   -sidelineDir <hdfs://> HDFS path to backup existing meta.");
4478     out.println("   -boundaries Verify that regions boundaries are the same between META and store files.");
4479 
4480     out.println("");
4481     out.println("  Metadata Repair options: (expert features, use with caution!)");
4482     out.println("   -fix              Try to fix region assignments.  This is for backwards compatiblity");
4483     out.println("   -fixAssignments   Try to fix region assignments.  Replaces the old -fix");
4484     out.println("   -fixMeta          Try to fix meta problems.  This assumes HDFS region info is good.");
4485     out.println("   -noHdfsChecking   Don't load/check region info from HDFS."
4486         + " Assumes hbase:meta region info is good. Won't check/fix any HDFS issue, e.g. hole, orphan, or overlap");
4487     out.println("   -fixHdfsHoles     Try to fix region holes in hdfs.");
4488     out.println("   -fixHdfsOrphans   Try to fix region dirs with no .regioninfo file in hdfs");
4489     out.println("   -fixTableOrphans  Try to fix table dirs with no .tableinfo file in hdfs (online mode only)");
4490     out.println("   -fixHdfsOverlaps  Try to fix region overlaps in hdfs.");
4491     out.println("   -fixVersionFile   Try to fix missing hbase.version file in hdfs.");
4492     out.println("   -maxMerge <n>     When fixing region overlaps, allow at most <n> regions to merge. (n=" + DEFAULT_MAX_MERGE +" by default)");
4493     out.println("   -sidelineBigOverlaps  When fixing region overlaps, allow to sideline big overlaps");
4494     out.println("   -maxOverlapsToSideline <n>  When fixing region overlaps, allow at most <n> regions to sideline per group. (n=" + DEFAULT_OVERLAPS_TO_SIDELINE +" by default)");
4495     out.println("   -fixSplitParents  Try to force offline split parents to be online.");
4496     out.println("   -ignorePreCheckPermission  ignore filesystem permission pre-check");
4497     out.println("   -fixReferenceFiles  Try to offline lingering reference store files");
4498     out.println("   -fixEmptyMetaCells  Try to fix hbase:meta entries not referencing any region"
4499         + " (empty REGIONINFO_QUALIFIER rows)");
4500 
4501     out.println("");
4502     out.println("  Datafile Repair options: (expert features, use with caution!)");
4503     out.println("   -checkCorruptHFiles     Check all Hfiles by opening them to make sure they are valid");
4504     out.println("   -sidelineCorruptHFiles  Quarantine corrupted HFiles.  implies -checkCorruptHFiles");
4505 
4506     out.println("");
4507     out.println("  Metadata Repair shortcuts");
4508     out.println("   -repair           Shortcut for -fixAssignments -fixMeta -fixHdfsHoles " +
4509         "-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps " +
4510         "-fixReferenceFiles -fixTableLocks -fixOrphanedTableZnodes");
4511     out.println("   -repairHoles      Shortcut for -fixAssignments -fixMeta -fixHdfsHoles");
4512 
4513     out.println("");
4514     out.println("  Table lock options");
4515     out.println("   -fixTableLocks    Deletes table locks held for a long time (hbase.table.lock.expire.ms, 10min by default)");
4516 
4517     out.println("");
4518     out.println("  Table Znode options");
4519     out.println("   -fixOrphanedTableZnodes    Set table state in ZNode to disabled if table does not exists");
4520 
4521     out.flush();
4522     errors.reportError(ERROR_CODE.WRONG_USAGE, sw.toString());
4523 
4524     setRetCode(-2);
4525     return this;
4526   }
4527 
4528   /**
4529    * Main program
4530    *
4531    * @param args
4532    * @throws Exception
4533    */
4534   public static void main(String[] args) throws Exception {
4535     // create a fsck object
4536     Configuration conf = HBaseConfiguration.create();
4537     Path hbasedir = FSUtils.getRootDir(conf);
4538     URI defaultFs = hbasedir.getFileSystem(conf).getUri();
4539     FSUtils.setFsDefault(conf, new Path(defaultFs));
4540     int ret = ToolRunner.run(new HBaseFsckTool(conf), args);
4541     System.exit(ret);
4542   }
4543 
4544   /**
4545    * This is a Tool wrapper that gathers -Dxxx=yyy configuration settings from the command line.
4546    */
4547   static class HBaseFsckTool extends Configured implements Tool {
4548     HBaseFsckTool(Configuration conf) { super(conf); }
4549     @Override
4550     public int run(String[] args) throws Exception {
4551       HBaseFsck hbck = new HBaseFsck(getConf());
4552       hbck.exec(hbck.executor, args);
4553       hbck.close();
4554       return hbck.getRetCode();
4555     }
4556   };
4557 
4558 
4559   public HBaseFsck exec(ExecutorService exec, String[] args) throws KeeperException, IOException,
4560     ServiceException, InterruptedException {
4561     long sleepBeforeRerun = DEFAULT_SLEEP_BEFORE_RERUN;
4562 
4563     boolean checkCorruptHFiles = false;
4564     boolean sidelineCorruptHFiles = false;
4565 
4566     // Process command-line args.
4567     for (int i = 0; i < args.length; i++) {
4568       String cmd = args[i];
4569       if (cmd.equals("-help") || cmd.equals("-h")) {
4570         return printUsageAndExit();
4571       } else if (cmd.equals("-details")) {
4572         setDisplayFullReport();
4573       }  else if (cmd.equals("-disableSplitAndMerge")) {
4574         setDisableSplitAndMerge();
4575       } else if (cmd.equals("-timelag")) {
4576         if (i == args.length - 1) {
4577           errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -timelag needs a value.");
4578           return printUsageAndExit();
4579         }
4580         try {
4581           long timelag = Long.parseLong(args[i+1]);
4582           setTimeLag(timelag);
4583         } catch (NumberFormatException e) {
4584           errors.reportError(ERROR_CODE.WRONG_USAGE, "-timelag needs a numeric value.");
4585           return printUsageAndExit();
4586         }
4587         i++;
4588       } else if (cmd.equals("-sleepBeforeRerun")) {
4589         if (i == args.length - 1) {
4590           errors.reportError(ERROR_CODE.WRONG_USAGE,
4591             "HBaseFsck: -sleepBeforeRerun needs a value.");
4592           return printUsageAndExit();
4593         }
4594         try {
4595           sleepBeforeRerun = Long.parseLong(args[i+1]);
4596         } catch (NumberFormatException e) {
4597           errors.reportError(ERROR_CODE.WRONG_USAGE, "-sleepBeforeRerun needs a numeric value.");
4598           return printUsageAndExit();
4599         }
4600         i++;
4601       } else if (cmd.equals("-sidelineDir")) {
4602         if (i == args.length - 1) {
4603           errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -sidelineDir needs a value.");
4604           return printUsageAndExit();
4605         }
4606         i++;
4607         setSidelineDir(args[i]);
4608       } else if (cmd.equals("-fix")) {
4609         errors.reportError(ERROR_CODE.WRONG_USAGE,
4610           "This option is deprecated, please use  -fixAssignments instead.");
4611         setFixAssignments(true);
4612       } else if (cmd.equals("-fixAssignments")) {
4613         setFixAssignments(true);
4614       } else if (cmd.equals("-fixMeta")) {
4615         setFixMeta(true);
4616       } else if (cmd.equals("-noHdfsChecking")) {
4617         setCheckHdfs(false);
4618       } else if (cmd.equals("-fixHdfsHoles")) {
4619         setFixHdfsHoles(true);
4620       } else if (cmd.equals("-fixHdfsOrphans")) {
4621         setFixHdfsOrphans(true);
4622       } else if (cmd.equals("-fixTableOrphans")) {
4623         setFixTableOrphans(true);
4624       } else if (cmd.equals("-fixHdfsOverlaps")) {
4625         setFixHdfsOverlaps(true);
4626       } else if (cmd.equals("-fixVersionFile")) {
4627         setFixVersionFile(true);
4628       } else if (cmd.equals("-sidelineBigOverlaps")) {
4629         setSidelineBigOverlaps(true);
4630       } else if (cmd.equals("-fixSplitParents")) {
4631         setFixSplitParents(true);
4632       } else if (cmd.equals("-ignorePreCheckPermission")) {
4633         setIgnorePreCheckPermission(true);
4634       } else if (cmd.equals("-checkCorruptHFiles")) {
4635         checkCorruptHFiles = true;
4636       } else if (cmd.equals("-sidelineCorruptHFiles")) {
4637         sidelineCorruptHFiles = true;
4638       } else if (cmd.equals("-fixReferenceFiles")) {
4639         setFixReferenceFiles(true);
4640       } else if (cmd.equals("-fixEmptyMetaCells")) {
4641         setFixEmptyMetaCells(true);
4642       } else if (cmd.equals("-repair")) {
4643         // this attempts to merge overlapping hdfs regions, needs testing
4644         // under load
4645         setFixHdfsHoles(true);
4646         setFixHdfsOrphans(true);
4647         setFixMeta(true);
4648         setFixAssignments(true);
4649         setFixHdfsOverlaps(true);
4650         setFixVersionFile(true);
4651         setSidelineBigOverlaps(true);
4652         setFixSplitParents(false);
4653         setCheckHdfs(true);
4654         setFixReferenceFiles(true);
4655         setFixTableLocks(true);
4656         setFixTableZNodes(true);
4657       } else if (cmd.equals("-repairHoles")) {
4658         // this will make all missing hdfs regions available but may lose data
4659         setFixHdfsHoles(true);
4660         setFixHdfsOrphans(false);
4661         setFixMeta(true);
4662         setFixAssignments(true);
4663         setFixHdfsOverlaps(false);
4664         setSidelineBigOverlaps(false);
4665         setFixSplitParents(false);
4666         setCheckHdfs(true);
4667       } else if (cmd.equals("-maxOverlapsToSideline")) {
4668         if (i == args.length - 1) {
4669           errors.reportError(ERROR_CODE.WRONG_USAGE,
4670             "-maxOverlapsToSideline needs a numeric value argument.");
4671           return printUsageAndExit();
4672         }
4673         try {
4674           int maxOverlapsToSideline = Integer.parseInt(args[i+1]);
4675           setMaxOverlapsToSideline(maxOverlapsToSideline);
4676         } catch (NumberFormatException e) {
4677           errors.reportError(ERROR_CODE.WRONG_USAGE,
4678             "-maxOverlapsToSideline needs a numeric value argument.");
4679           return printUsageAndExit();
4680         }
4681         i++;
4682       } else if (cmd.equals("-maxMerge")) {
4683         if (i == args.length - 1) {
4684           errors.reportError(ERROR_CODE.WRONG_USAGE,
4685             "-maxMerge needs a numeric value argument.");
4686           return printUsageAndExit();
4687         }
4688         try {
4689           int maxMerge = Integer.parseInt(args[i+1]);
4690           setMaxMerge(maxMerge);
4691         } catch (NumberFormatException e) {
4692           errors.reportError(ERROR_CODE.WRONG_USAGE,
4693             "-maxMerge needs a numeric value argument.");
4694           return printUsageAndExit();
4695         }
4696         i++;
4697       } else if (cmd.equals("-summary")) {
4698         setSummary();
4699       } else if (cmd.equals("-metaonly")) {
4700         setCheckMetaOnly();
4701       } else if (cmd.equals("-boundaries")) {
4702         setRegionBoundariesCheck();
4703       } else if (cmd.equals("-fixTableLocks")) {
4704         setFixTableLocks(true);
4705       } else if (cmd.equals("-fixOrphanedTableZnodes")) {
4706         setFixTableZNodes(true);
4707       } else if (cmd.startsWith("-")) {
4708         errors.reportError(ERROR_CODE.WRONG_USAGE, "Unrecognized option:" + cmd);
4709         return printUsageAndExit();
4710       } else {
4711         includeTable(TableName.valueOf(cmd));
4712         errors.print("Allow checking/fixes for table: " + cmd);
4713       }
4714     }
4715 
4716     errors.print("HBaseFsck command line options: " + StringUtils.join(args, " "));
4717 
4718     // pre-check current user has FS write permission or not
4719     try {
4720       preCheckPermission();
4721     } catch (AccessDeniedException ace) {
4722       Runtime.getRuntime().exit(-1);
4723     } catch (IOException ioe) {
4724       Runtime.getRuntime().exit(-1);
4725     }
4726 
4727     // do the real work of hbck
4728     connect();
4729 
4730     try {
4731       // if corrupt file mode is on, first fix them since they may be opened later
4732       if (checkCorruptHFiles || sidelineCorruptHFiles) {
4733         LOG.info("Checking all hfiles for corruption");
4734         HFileCorruptionChecker hfcc = createHFileCorruptionChecker(sidelineCorruptHFiles);
4735         setHFileCorruptionChecker(hfcc); // so we can get result
4736         Collection<TableName> tables = getIncludedTables();
4737         Collection<Path> tableDirs = new ArrayList<Path>();
4738         Path rootdir = FSUtils.getRootDir(getConf());
4739         if (tables.size() > 0) {
4740           for (TableName t : tables) {
4741             tableDirs.add(FSUtils.getTableDir(rootdir, t));
4742           }
4743         } else {
4744           tableDirs = FSUtils.getTableDirs(FSUtils.getCurrentFileSystem(getConf()), rootdir);
4745         }
4746         hfcc.checkTables(tableDirs);
4747         hfcc.report(errors);
4748       }
4749 
4750       // check and fix table integrity, region consistency.
4751       int code = onlineHbck();
4752       setRetCode(code);
4753       // If we have changed the HBase state it is better to run hbck again
4754       // to see if we haven't broken something else in the process.
4755       // We run it only once more because otherwise we can easily fall into
4756       // an infinite loop.
4757       if (shouldRerun()) {
4758         try {
4759           LOG.info("Sleeping " + sleepBeforeRerun + "ms before re-checking after fix...");
4760           Thread.sleep(sleepBeforeRerun);
4761         } catch (InterruptedException ie) {
4762           LOG.warn("Interrupted while sleeping");
4763           return this;
4764         }
4765         // Just report
4766         setFixAssignments(false);
4767         setFixMeta(false);
4768         setFixHdfsHoles(false);
4769         setFixHdfsOverlaps(false);
4770         setFixVersionFile(false);
4771         setFixTableOrphans(false);
4772         errors.resetErrors();
4773         code = onlineHbck();
4774         setRetCode(code);
4775       }
4776     } finally {
4777       IOUtils.cleanup(null, this);
4778     }
4779     return this;
4780   }
4781 
4782   /**
4783    * ls -r for debugging purposes
4784    */
4785   void debugLsr(Path p) throws IOException {
4786     debugLsr(getConf(), p, errors);
4787   }
4788 
4789   /**
4790    * ls -r for debugging purposes
4791    */
4792   public static void debugLsr(Configuration conf,
4793       Path p) throws IOException {
4794     debugLsr(conf, p, new PrintingErrorReporter());
4795   }
4796 
4797   /**
4798    * ls -r for debugging purposes
4799    */
4800   public static void debugLsr(Configuration conf,
4801       Path p, ErrorReporter errors) throws IOException {
4802     if (!LOG.isDebugEnabled() || p == null) {
4803       return;
4804     }
4805     FileSystem fs = p.getFileSystem(conf);
4806 
4807     if (!fs.exists(p)) {
4808       // nothing
4809       return;
4810     }
4811     errors.print(p.toString());
4812 
4813     if (fs.isFile(p)) {
4814       return;
4815     }
4816 
4817     if (fs.getFileStatus(p).isDirectory()) {
4818       FileStatus[] fss= fs.listStatus(p);
4819       for (FileStatus status : fss) {
4820         debugLsr(conf, status.getPath(), errors);
4821       }
4822     }
4823   }
4824 }