View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.util;
20  
21  import java.io.ByteArrayInputStream;
22  import java.io.DataInputStream;
23  import java.io.EOFException;
24  import java.io.FileNotFoundException;
25  import java.io.IOException;
26  import java.io.InputStream;
27  import java.io.InterruptedIOException;
28  import java.lang.reflect.InvocationTargetException;
29  import java.lang.reflect.Method;
30  import java.net.InetSocketAddress;
31  import java.net.URI;
32  import java.net.URISyntaxException;
33  import java.util.ArrayList;
34  import java.util.Arrays;
35  import java.util.Collections;
36  import java.util.HashMap;
37  import java.util.Iterator;
38  import java.util.LinkedList;
39  import java.util.List;
40  import java.util.Map;
41  import java.util.Vector;
42  import java.util.concurrent.ArrayBlockingQueue;
43  import java.util.concurrent.ConcurrentHashMap;
44  import java.util.concurrent.ExecutionException;
45  import java.util.concurrent.ExecutorService;
46  import java.util.concurrent.Future;
47  import java.util.concurrent.FutureTask;
48  import java.util.concurrent.ThreadPoolExecutor;
49  import java.util.concurrent.TimeUnit;
50  import java.util.regex.Pattern;
51  
52  import org.apache.commons.logging.Log;
53  import org.apache.commons.logging.LogFactory;
54  import org.apache.hadoop.HadoopIllegalArgumentException;
55  import org.apache.hadoop.conf.Configuration;
56  import org.apache.hadoop.fs.BlockLocation;
57  import org.apache.hadoop.fs.FSDataInputStream;
58  import org.apache.hadoop.fs.FSDataOutputStream;
59  import org.apache.hadoop.fs.FileStatus;
60  import org.apache.hadoop.fs.FileSystem;
61  import org.apache.hadoop.fs.Path;
62  import org.apache.hadoop.fs.PathFilter;
63  import org.apache.hadoop.fs.permission.FsAction;
64  import org.apache.hadoop.fs.permission.FsPermission;
65  import org.apache.hadoop.hbase.ClusterId;
66  import org.apache.hadoop.hbase.HColumnDescriptor;
67  import org.apache.hadoop.hbase.HConstants;
68  import org.apache.hadoop.hbase.HDFSBlocksDistribution;
69  import org.apache.hadoop.hbase.HRegionInfo;
70  import org.apache.hadoop.hbase.RemoteExceptionHandler;
71  import org.apache.hadoop.hbase.TableName;
72  import org.apache.hadoop.hbase.classification.InterfaceAudience;
73  import org.apache.hadoop.hbase.exceptions.DeserializationException;
74  import org.apache.hadoop.hbase.fs.HFileSystem;
75  import org.apache.hadoop.hbase.master.HMaster;
76  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
77  import org.apache.hadoop.hbase.protobuf.generated.FSProtos;
78  import org.apache.hadoop.hbase.regionserver.HRegion;
79  import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
80  import org.apache.hadoop.hbase.security.AccessDeniedException;
81  import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter;
82  import org.apache.hadoop.hdfs.DistributedFileSystem;
83  import org.apache.hadoop.hdfs.protocol.FSConstants;
84  import org.apache.hadoop.io.IOUtils;
85  import org.apache.hadoop.io.SequenceFile;
86  import org.apache.hadoop.ipc.RemoteException;
87  import org.apache.hadoop.security.UserGroupInformation;
88  import org.apache.hadoop.util.Progressable;
89  import org.apache.hadoop.util.ReflectionUtils;
90  import org.apache.hadoop.util.StringUtils;
91  
92  import com.google.common.base.Throwables;
93  import com.google.common.collect.Iterators;
94  import com.google.common.primitives.Ints;
95  
96  import edu.umd.cs.findbugs.annotations.CheckForNull;
97  
98  /**
99   * Utility methods for interacting with the underlying file system.
100  */
101 @InterfaceAudience.Private
102 public abstract class FSUtils {
103   private static final Log LOG = LogFactory.getLog(FSUtils.class);
104 
105   /** Full access permissions (starting point for a umask) */
106   public static final String FULL_RWX_PERMISSIONS = "777";
107   private static final String THREAD_POOLSIZE = "hbase.client.localityCheck.threadPoolSize";
108   private static final int DEFAULT_THREAD_POOLSIZE = 2;
109 
110   /** Set to true on Windows platforms */
111   public static final boolean WINDOWS = System.getProperty("os.name").startsWith("Windows");
112 
113   protected FSUtils() {
114     super();
115   }
116 
117   /**
118    * Sets storage policy for given path according to config setting.
119    * If the passed path is a directory, we'll set the storage policy for all files
120    * created in the future in said directory. Note that this change in storage
121    * policy takes place at the HDFS level; it will persist beyond this RS's lifecycle.
122    * If we're running on a version of HDFS that doesn't support the given storage policy
123    * (or storage policies at all), then we'll issue a log message and continue.
124    *
125    * See http://hadoop.apache.org/docs/r2.6.0/hadoop-project-dist/hadoop-hdfs/ArchivalStorage.html
126    *
127    * @param fs We only do anything if an instance of DistributedFileSystem
128    * @param conf used to look up storage policy with given key; not modified.
129    * @param path the Path whose storage policy is to be set
130    * @param policyKey e.g. HConstants.WAL_STORAGE_POLICY
131    * @param defaultPolicy usually should be the policy NONE to delegate to HDFS
132    */
133   public static void setStoragePolicy(final FileSystem fs, final Configuration conf,
134       final Path path, final String policyKey, final String defaultPolicy) {
135     String storagePolicy = conf.get(policyKey, defaultPolicy).toUpperCase();
136     if (storagePolicy.equals(defaultPolicy)) {
137       if (LOG.isTraceEnabled()) {
138         LOG.trace("default policy of " + defaultPolicy + " requested, exiting early.");
139       }
140       return;
141     }
142     if (fs instanceof DistributedFileSystem) {
143       DistributedFileSystem dfs = (DistributedFileSystem)fs;
144       // Once our minimum supported Hadoop version is 2.6.0 we can remove reflection.
145       Class<? extends DistributedFileSystem> dfsClass = dfs.getClass();
146       Method m = null;
147       try {
148         m = dfsClass.getDeclaredMethod("setStoragePolicy",
149             new Class<?>[] { Path.class, String.class });
150         m.setAccessible(true);
151       } catch (NoSuchMethodException e) {
152         LOG.info("FileSystem doesn't support"
153             + " setStoragePolicy; --HDFS-6584 not available");
154       } catch (SecurityException e) {
155         LOG.info("Doesn't have access to setStoragePolicy on "
156             + "FileSystems --HDFS-6584 not available", e);
157         m = null; // could happen on setAccessible()
158       }
159       if (m != null) {
160         try {
161           m.invoke(dfs, path, storagePolicy);
162           LOG.info("set " + storagePolicy + " for " + path);
163         } catch (Exception e) {
164           // check for lack of HDFS-7228
165           boolean probablyBadPolicy = false;
166           if (e instanceof InvocationTargetException) {
167             final Throwable exception = e.getCause();
168             if (exception instanceof RemoteException &&
169                 HadoopIllegalArgumentException.class.getName().equals(
170                     ((RemoteException)exception).getClassName())) {
171               LOG.warn("Given storage policy, '" + storagePolicy + "', was rejected and probably " +
172                   "isn't a valid policy for the version of Hadoop you're running. I.e. if you're " +
173                   "trying to use SSD related policies then you're likely missing HDFS-7228. For " +
174                   "more information see the 'ArchivalStorage' docs for your Hadoop release.");
175               LOG.debug("More information about the invalid storage policy.", exception);
176               probablyBadPolicy = true;
177             }
178           }
179           if (!probablyBadPolicy) {
180             // This swallows FNFE, should we be throwing it? seems more likely to indicate dev
181             // misuse than a runtime problem with HDFS.
182             LOG.warn("Unable to set " + storagePolicy + " for " + path, e);
183           }
184         }
185       }
186     } else {
187       LOG.info("FileSystem isn't an instance of DistributedFileSystem; presuming it doesn't " +
188           "support setStoragePolicy.");
189     }
190   }
191 
192   /**
193    * Compare of path component. Does not consider schema; i.e. if schemas different but <code>path
194    * <code> starts with <code>rootPath<code>, then the function returns true
195    * @param rootPath
196    * @param path
197    * @return True if <code>path</code> starts with <code>rootPath</code>
198    */
199   public static boolean isStartingWithPath(final Path rootPath, final String path) {
200     String uriRootPath = rootPath.toUri().getPath();
201     String tailUriPath = (new Path(path)).toUri().getPath();
202     return tailUriPath.startsWith(uriRootPath);
203   }
204 
205   /**
206    * Compare path component of the Path URI; e.g. if hdfs://a/b/c and /a/b/c, it will compare the
207    * '/a/b/c' part. Does not consider schema; i.e. if schemas different but path or subpath matches,
208    * the two will equate.
209    * @param pathToSearch Path we will be trying to match.
210    * @param pathTail
211    * @return True if <code>pathTail</code> is tail on the path of <code>pathToSearch</code>
212    */
213   public static boolean isMatchingTail(final Path pathToSearch, String pathTail) {
214     return isMatchingTail(pathToSearch, new Path(pathTail));
215   }
216 
217   /**
218    * Compare path component of the Path URI; e.g. if hdfs://a/b/c and /a/b/c, it will compare the
219    * '/a/b/c' part. If you passed in 'hdfs://a/b/c and b/c, it would return true.  Does not consider
220    * schema; i.e. if schemas different but path or subpath matches, the two will equate.
221    * @param pathToSearch Path we will be trying to match.
222    * @param pathTail
223    * @return True if <code>pathTail</code> is tail on the path of <code>pathToSearch</code>
224    */
225   public static boolean isMatchingTail(final Path pathToSearch, final Path pathTail) {
226     if (pathToSearch.depth() != pathTail.depth()) return false;
227     Path tailPath = pathTail;
228     String tailName;
229     Path toSearch = pathToSearch;
230     String toSearchName;
231     boolean result = false;
232     do {
233       tailName = tailPath.getName();
234       if (tailName == null || tailName.length() <= 0) {
235         result = true;
236         break;
237       }
238       toSearchName = toSearch.getName();
239       if (toSearchName == null || toSearchName.length() <= 0) break;
240       // Move up a parent on each path for next go around.  Path doesn't let us go off the end.
241       tailPath = tailPath.getParent();
242       toSearch = toSearch.getParent();
243     } while(tailName.equals(toSearchName));
244     return result;
245   }
246 
247   public static FSUtils getInstance(FileSystem fs, Configuration conf) {
248     String scheme = fs.getUri().getScheme();
249     if (scheme == null) {
250       LOG.warn("Could not find scheme for uri " +
251           fs.getUri() + ", default to hdfs");
252       scheme = "hdfs";
253     }
254     Class<?> fsUtilsClass = conf.getClass("hbase.fsutil." +
255         scheme + ".impl", FSHDFSUtils.class); // Default to HDFS impl
256     FSUtils fsUtils = (FSUtils)ReflectionUtils.newInstance(fsUtilsClass, conf);
257     return fsUtils;
258   }
259 
260   /**
261    * Delete if exists.
262    * @param fs filesystem object
263    * @param dir directory to delete
264    * @return True if deleted <code>dir</code>
265    * @throws IOException e
266    */
267   public static boolean deleteDirectory(final FileSystem fs, final Path dir)
268   throws IOException {
269     return fs.exists(dir) && fs.delete(dir, true);
270   }
271 
272   /**
273    * Delete the region directory if exists.
274    * @param conf
275    * @param hri
276    * @return True if deleted the region directory.
277    * @throws IOException
278    */
279   public static boolean deleteRegionDir(final Configuration conf, final HRegionInfo hri)
280   throws IOException {
281     Path rootDir = getRootDir(conf);
282     FileSystem fs = rootDir.getFileSystem(conf);
283     return deleteDirectory(fs,
284       new Path(getTableDir(rootDir, hri.getTable()), hri.getEncodedName()));
285   }
286 
287   /**
288    * Return the number of bytes that large input files should be optimally
289    * be split into to minimize i/o time.
290    *
291    * use reflection to search for getDefaultBlockSize(Path f)
292    * if the method doesn't exist, fall back to using getDefaultBlockSize()
293    *
294    * @param fs filesystem object
295    * @return the default block size for the path's filesystem
296    * @throws IOException e
297    */
298   public static long getDefaultBlockSize(final FileSystem fs, final Path path) throws IOException {
299     Method m = null;
300     Class<? extends FileSystem> cls = fs.getClass();
301     try {
302       m = cls.getMethod("getDefaultBlockSize", new Class<?>[] { Path.class });
303     } catch (NoSuchMethodException e) {
304       LOG.info("FileSystem doesn't support getDefaultBlockSize");
305     } catch (SecurityException e) {
306       LOG.info("Doesn't have access to getDefaultBlockSize on FileSystems", e);
307       m = null; // could happen on setAccessible()
308     }
309     if (m == null) {
310       return fs.getDefaultBlockSize(path);
311     } else {
312       try {
313         Object ret = m.invoke(fs, path);
314         return ((Long)ret).longValue();
315       } catch (Exception e) {
316         throw new IOException(e);
317       }
318     }
319   }
320 
321   /*
322    * Get the default replication.
323    *
324    * use reflection to search for getDefaultReplication(Path f)
325    * if the method doesn't exist, fall back to using getDefaultReplication()
326    *
327    * @param fs filesystem object
328    * @param f path of file
329    * @return default replication for the path's filesystem
330    * @throws IOException e
331    */
332   public static short getDefaultReplication(final FileSystem fs, final Path path) throws IOException {
333     Method m = null;
334     Class<? extends FileSystem> cls = fs.getClass();
335     try {
336       m = cls.getMethod("getDefaultReplication", new Class<?>[] { Path.class });
337     } catch (NoSuchMethodException e) {
338       LOG.info("FileSystem doesn't support getDefaultReplication");
339     } catch (SecurityException e) {
340       LOG.info("Doesn't have access to getDefaultReplication on FileSystems", e);
341       m = null; // could happen on setAccessible()
342     }
343     if (m == null) {
344       return fs.getDefaultReplication(path);
345     } else {
346       try {
347         Object ret = m.invoke(fs, path);
348         return ((Number)ret).shortValue();
349       } catch (Exception e) {
350         throw new IOException(e);
351       }
352     }
353   }
354 
355   /**
356    * Returns the default buffer size to use during writes.
357    *
358    * The size of the buffer should probably be a multiple of hardware
359    * page size (4096 on Intel x86), and it determines how much data is
360    * buffered during read and write operations.
361    *
362    * @param fs filesystem object
363    * @return default buffer size to use during writes
364    */
365   public static int getDefaultBufferSize(final FileSystem fs) {
366     return fs.getConf().getInt("io.file.buffer.size", 4096);
367   }
368 
369   /**
370    * Create the specified file on the filesystem. By default, this will:
371    * <ol>
372    * <li>overwrite the file if it exists</li>
373    * <li>apply the umask in the configuration (if it is enabled)</li>
374    * <li>use the fs configured buffer size (or 4096 if not set)</li>
375    * <li>use the default replication</li>
376    * <li>use the default block size</li>
377    * <li>not track progress</li>
378    * </ol>
379    *
380    * @param fs {@link FileSystem} on which to write the file
381    * @param path {@link Path} to the file to write
382    * @param perm permissions
383    * @param favoredNodes
384    * @return output stream to the created file
385    * @throws IOException if the file cannot be created
386    */
387   public static FSDataOutputStream create(FileSystem fs, Path path,
388       FsPermission perm, InetSocketAddress[] favoredNodes) throws IOException {
389     if (fs instanceof HFileSystem) {
390       FileSystem backingFs = ((HFileSystem)fs).getBackingFs();
391       if (backingFs instanceof DistributedFileSystem) {
392         // Try to use the favoredNodes version via reflection to allow backwards-
393         // compatibility.
394         try {
395           return (FSDataOutputStream) (DistributedFileSystem.class
396               .getDeclaredMethod("create", Path.class, FsPermission.class,
397                   boolean.class, int.class, short.class, long.class,
398                   Progressable.class, InetSocketAddress[].class)
399                   .invoke(backingFs, path, perm, true,
400                       getDefaultBufferSize(backingFs),
401                       getDefaultReplication(backingFs, path),
402                       getDefaultBlockSize(backingFs, path),
403                       null, favoredNodes));
404         } catch (InvocationTargetException ite) {
405           // Function was properly called, but threw it's own exception.
406           throw new IOException(ite.getCause());
407         } catch (NoSuchMethodException e) {
408           LOG.debug("DFS Client does not support most favored nodes create; using default create");
409           if (LOG.isTraceEnabled()) LOG.trace("Ignoring; use default create", e);
410         } catch (IllegalArgumentException e) {
411           LOG.debug("Ignoring (most likely Reflection related exception) " + e);
412         } catch (SecurityException e) {
413           LOG.debug("Ignoring (most likely Reflection related exception) " + e);
414         } catch (IllegalAccessException e) {
415           LOG.debug("Ignoring (most likely Reflection related exception) " + e);
416         }
417       }
418     }
419     return create(fs, path, perm, true);
420   }
421 
422   /**
423    * Create the specified file on the filesystem. By default, this will:
424    * <ol>
425    * <li>apply the umask in the configuration (if it is enabled)</li>
426    * <li>use the fs configured buffer size (or 4096 if not set)</li>
427    * <li>use the default replication</li>
428    * <li>use the default block size</li>
429    * <li>not track progress</li>
430    * </ol>
431    *
432    * @param fs {@link FileSystem} on which to write the file
433    * @param path {@link Path} to the file to write
434    * @param perm
435    * @param overwrite Whether or not the created file should be overwritten.
436    * @return output stream to the created file
437    * @throws IOException if the file cannot be created
438    */
439   public static FSDataOutputStream create(FileSystem fs, Path path,
440       FsPermission perm, boolean overwrite) throws IOException {
441     if (LOG.isTraceEnabled()) {
442       LOG.trace("Creating file=" + path + " with permission=" + perm + ", overwrite=" + overwrite);
443     }
444     return fs.create(path, perm, overwrite, getDefaultBufferSize(fs),
445         getDefaultReplication(fs, path), getDefaultBlockSize(fs, path), null);
446   }
447 
448   /**
449    * Get the file permissions specified in the configuration, if they are
450    * enabled.
451    *
452    * @param fs filesystem that the file will be created on.
453    * @param conf configuration to read for determining if permissions are
454    *          enabled and which to use
455    * @param permssionConfKey property key in the configuration to use when
456    *          finding the permission
457    * @return the permission to use when creating a new file on the fs. If
458    *         special permissions are not specified in the configuration, then
459    *         the default permissions on the the fs will be returned.
460    */
461   public static FsPermission getFilePermissions(final FileSystem fs,
462       final Configuration conf, final String permssionConfKey) {
463     boolean enablePermissions = conf.getBoolean(
464         HConstants.ENABLE_DATA_FILE_UMASK, false);
465 
466     if (enablePermissions) {
467       try {
468         FsPermission perm = new FsPermission(FULL_RWX_PERMISSIONS);
469         // make sure that we have a mask, if not, go default.
470         String mask = conf.get(permssionConfKey);
471         if (mask == null)
472           return FsPermission.getFileDefault();
473         // appy the umask
474         FsPermission umask = new FsPermission(mask);
475         return perm.applyUMask(umask);
476       } catch (IllegalArgumentException e) {
477         LOG.warn(
478             "Incorrect umask attempted to be created: "
479                 + conf.get(permssionConfKey)
480                 + ", using default file permissions.", e);
481         return FsPermission.getFileDefault();
482       }
483     }
484     return FsPermission.getFileDefault();
485   }
486 
487   /**
488    * Checks to see if the specified file system is available
489    *
490    * @param fs filesystem
491    * @throws IOException e
492    */
493   public static void checkFileSystemAvailable(final FileSystem fs)
494   throws IOException {
495     if (!(fs instanceof DistributedFileSystem)) {
496       return;
497     }
498     IOException exception = null;
499     DistributedFileSystem dfs = (DistributedFileSystem) fs;
500     try {
501       if (dfs.exists(new Path("/"))) {
502         return;
503       }
504     } catch (IOException e) {
505       exception = RemoteExceptionHandler.checkIOException(e);
506     }
507     try {
508       fs.close();
509     } catch (Exception e) {
510       LOG.error("file system close failed: ", e);
511     }
512     IOException io = new IOException("File system is not available");
513     io.initCause(exception);
514     throw io;
515   }
516 
517   /**
518    * We use reflection because {@link DistributedFileSystem#setSafeMode(
519    * FSConstants.SafeModeAction action, boolean isChecked)} is not in hadoop 1.1
520    *
521    * @param dfs
522    * @return whether we're in safe mode
523    * @throws IOException
524    */
525   private static boolean isInSafeMode(DistributedFileSystem dfs) throws IOException {
526     boolean inSafeMode = false;
527     try {
528       Method m = DistributedFileSystem.class.getMethod("setSafeMode", new Class<?> []{
529           org.apache.hadoop.hdfs.protocol.FSConstants.SafeModeAction.class, boolean.class});
530       inSafeMode = (Boolean) m.invoke(dfs,
531         org.apache.hadoop.hdfs.protocol.FSConstants.SafeModeAction.SAFEMODE_GET, true);
532     } catch (Exception e) {
533       if (e instanceof IOException) throw (IOException) e;
534 
535       // Check whether dfs is on safemode.
536       inSafeMode = dfs.setSafeMode(
537         org.apache.hadoop.hdfs.protocol.FSConstants.SafeModeAction.SAFEMODE_GET);
538     }
539     return inSafeMode;
540   }
541 
542   /**
543    * Check whether dfs is in safemode.
544    * @param conf
545    * @throws IOException
546    */
547   public static void checkDfsSafeMode(final Configuration conf)
548   throws IOException {
549     boolean isInSafeMode = false;
550     FileSystem fs = FileSystem.get(conf);
551     if (fs instanceof DistributedFileSystem) {
552       DistributedFileSystem dfs = (DistributedFileSystem)fs;
553       isInSafeMode = isInSafeMode(dfs);
554     }
555     if (isInSafeMode) {
556       throw new IOException("File system is in safemode, it can't be written now");
557     }
558   }
559 
560   /**
561    * Verifies current version of file system
562    *
563    * @param fs filesystem object
564    * @param rootdir root hbase directory
565    * @return null if no version file exists, version string otherwise.
566    * @throws IOException e
567    * @throws org.apache.hadoop.hbase.exceptions.DeserializationException
568    */
569   public static String getVersion(FileSystem fs, Path rootdir)
570   throws IOException, DeserializationException {
571     Path versionFile = new Path(rootdir, HConstants.VERSION_FILE_NAME);
572     FileStatus[] status = null;
573     try {
574       // hadoop 2.0 throws FNFE if directory does not exist.
575       // hadoop 1.0 returns null if directory does not exist.
576       status = fs.listStatus(versionFile);
577     } catch (FileNotFoundException fnfe) {
578       return null;
579     }
580     if (status == null || status.length == 0) return null;
581     String version = null;
582     byte [] content = new byte [(int)status[0].getLen()];
583     FSDataInputStream s = fs.open(versionFile);
584     try {
585       IOUtils.readFully(s, content, 0, content.length);
586       if (ProtobufUtil.isPBMagicPrefix(content)) {
587         version = parseVersionFrom(content);
588       } else {
589         // Presume it pre-pb format.
590         InputStream is = new ByteArrayInputStream(content);
591         DataInputStream dis = new DataInputStream(is);
592         try {
593           version = dis.readUTF();
594         } finally {
595           dis.close();
596         }
597       }
598     } catch (EOFException eof) {
599       LOG.warn("Version file was empty, odd, will try to set it.");
600     } finally {
601       s.close();
602     }
603     return version;
604   }
605 
606   /**
607    * Parse the content of the ${HBASE_ROOTDIR}/hbase.version file.
608    * @param bytes The byte content of the hbase.version file.
609    * @return The version found in the file as a String.
610    * @throws DeserializationException
611    */
612   static String parseVersionFrom(final byte [] bytes)
613   throws DeserializationException {
614     ProtobufUtil.expectPBMagicPrefix(bytes);
615     int pblen = ProtobufUtil.lengthOfPBMagic();
616     FSProtos.HBaseVersionFileContent.Builder builder =
617       FSProtos.HBaseVersionFileContent.newBuilder();
618     try {
619       ProtobufUtil.mergeFrom(builder, bytes, pblen, bytes.length - pblen);
620       return builder.getVersion();
621     } catch (IOException e) {
622       // Convert
623       throw new DeserializationException(e);
624     }
625   }
626 
627   /**
628    * Create the content to write into the ${HBASE_ROOTDIR}/hbase.version file.
629    * @param version Version to persist
630    * @return Serialized protobuf with <code>version</code> content and a bit of pb magic for a prefix.
631    */
632   static byte [] toVersionByteArray(final String version) {
633     FSProtos.HBaseVersionFileContent.Builder builder =
634       FSProtos.HBaseVersionFileContent.newBuilder();
635     return ProtobufUtil.prependPBMagic(builder.setVersion(version).build().toByteArray());
636   }
637 
638   /**
639    * Verifies current version of file system
640    *
641    * @param fs file system
642    * @param rootdir root directory of HBase installation
643    * @param message if true, issues a message on System.out
644    *
645    * @throws IOException e
646    * @throws DeserializationException
647    */
648   public static void checkVersion(FileSystem fs, Path rootdir, boolean message)
649   throws IOException, DeserializationException {
650     checkVersion(fs, rootdir, message, 0, HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS);
651   }
652 
653   /**
654    * Verifies current version of file system
655    *
656    * @param fs file system
657    * @param rootdir root directory of HBase installation
658    * @param message if true, issues a message on System.out
659    * @param wait wait interval
660    * @param retries number of times to retry
661    *
662    * @throws IOException e
663    * @throws DeserializationException
664    */
665   public static void checkVersion(FileSystem fs, Path rootdir,
666       boolean message, int wait, int retries)
667   throws IOException, DeserializationException {
668     String version = getVersion(fs, rootdir);
669     if (version == null) {
670       if (!metaRegionExists(fs, rootdir)) {
671         // rootDir is empty (no version file and no root region)
672         // just create new version file (HBASE-1195)
673         setVersion(fs, rootdir, wait, retries);
674         return;
675       }
676     } else if (version.compareTo(HConstants.FILE_SYSTEM_VERSION) == 0) return;
677 
678     // version is deprecated require migration
679     // Output on stdout so user sees it in terminal.
680     String msg = "HBase file layout needs to be upgraded."
681       + " You have version " + version
682       + " and I want version " + HConstants.FILE_SYSTEM_VERSION
683       + ". Consult http://hbase.apache.org/book.html for further information about upgrading HBase."
684       + " Is your hbase.rootdir valid? If so, you may need to run "
685       + "'hbase hbck -fixVersionFile'.";
686     if (message) {
687       System.out.println("WARNING! " + msg);
688     }
689     throw new FileSystemVersionException(msg);
690   }
691 
692   /**
693    * Sets version of file system
694    *
695    * @param fs filesystem object
696    * @param rootdir hbase root
697    * @throws IOException e
698    */
699   public static void setVersion(FileSystem fs, Path rootdir)
700   throws IOException {
701     setVersion(fs, rootdir, HConstants.FILE_SYSTEM_VERSION, 0,
702       HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS);
703   }
704 
705   /**
706    * Sets version of file system
707    *
708    * @param fs filesystem object
709    * @param rootdir hbase root
710    * @param wait time to wait for retry
711    * @param retries number of times to retry before failing
712    * @throws IOException e
713    */
714   public static void setVersion(FileSystem fs, Path rootdir, int wait, int retries)
715   throws IOException {
716     setVersion(fs, rootdir, HConstants.FILE_SYSTEM_VERSION, wait, retries);
717   }
718 
719 
720   /**
721    * Sets version of file system
722    *
723    * @param fs filesystem object
724    * @param rootdir hbase root directory
725    * @param version version to set
726    * @param wait time to wait for retry
727    * @param retries number of times to retry before throwing an IOException
728    * @throws IOException e
729    */
730   public static void setVersion(FileSystem fs, Path rootdir, String version,
731       int wait, int retries) throws IOException {
732     Path versionFile = new Path(rootdir, HConstants.VERSION_FILE_NAME);
733     Path tempVersionFile = new Path(rootdir, HConstants.HBASE_TEMP_DIRECTORY + Path.SEPARATOR +
734       HConstants.VERSION_FILE_NAME);
735     while (true) {
736       try {
737         // Write the version to a temporary file
738         FSDataOutputStream s = fs.create(tempVersionFile);
739         try {
740           s.write(toVersionByteArray(version));
741           s.close();
742           s = null;
743           // Move the temp version file to its normal location. Returns false
744           // if the rename failed. Throw an IOE in that case.
745           if (!fs.rename(tempVersionFile, versionFile)) {
746             throw new IOException("Unable to move temp version file to " + versionFile);
747           }
748         } finally {
749           // Cleaning up the temporary if the rename failed would be trying
750           // too hard. We'll unconditionally create it again the next time
751           // through anyway, files are overwritten by default by create().
752 
753           // Attempt to close the stream on the way out if it is still open.
754           try {
755             if (s != null) s.close();
756           } catch (IOException ignore) { }
757         }
758         LOG.info("Created version file at " + rootdir.toString() + " with version=" + version);
759         return;
760       } catch (IOException e) {
761         if (retries > 0) {
762           LOG.debug("Unable to create version file at " + rootdir.toString() + ", retrying", e);
763           fs.delete(versionFile, false);
764           try {
765             if (wait > 0) {
766               Thread.sleep(wait);
767             }
768           } catch (InterruptedException ie) {
769             throw (InterruptedIOException)new InterruptedIOException().initCause(ie);
770           }
771           retries--;
772         } else {
773           throw e;
774         }
775       }
776     }
777   }
778 
779   /**
780    * Checks that a cluster ID file exists in the HBase root directory
781    * @param fs the root directory FileSystem
782    * @param rootdir the HBase root directory in HDFS
783    * @param wait how long to wait between retries
784    * @return <code>true</code> if the file exists, otherwise <code>false</code>
785    * @throws IOException if checking the FileSystem fails
786    */
787   public static boolean checkClusterIdExists(FileSystem fs, Path rootdir,
788       int wait) throws IOException {
789     while (true) {
790       try {
791         Path filePath = new Path(rootdir, HConstants.CLUSTER_ID_FILE_NAME);
792         return fs.exists(filePath);
793       } catch (IOException ioe) {
794         if (wait > 0) {
795           LOG.warn("Unable to check cluster ID file in " + rootdir.toString() +
796               ", retrying in "+wait+"msec: "+StringUtils.stringifyException(ioe));
797           try {
798             Thread.sleep(wait);
799           } catch (InterruptedException e) {
800             throw (InterruptedIOException)new InterruptedIOException().initCause(e);
801           }
802         } else {
803           throw ioe;
804         }
805       }
806     }
807   }
808 
809   /**
810    * Returns the value of the unique cluster ID stored for this HBase instance.
811    * @param fs the root directory FileSystem
812    * @param rootdir the path to the HBase root directory
813    * @return the unique cluster identifier
814    * @throws IOException if reading the cluster ID file fails
815    */
816   public static ClusterId getClusterId(FileSystem fs, Path rootdir)
817   throws IOException {
818     Path idPath = new Path(rootdir, HConstants.CLUSTER_ID_FILE_NAME);
819     ClusterId clusterId = null;
820     FileStatus status = fs.exists(idPath)? fs.getFileStatus(idPath):  null;
821     if (status != null) {
822       int len = Ints.checkedCast(status.getLen());
823       byte [] content = new byte[len];
824       FSDataInputStream in = fs.open(idPath);
825       try {
826         in.readFully(content);
827       } catch (EOFException eof) {
828         LOG.warn("Cluster ID file " + idPath.toString() + " was empty");
829       } finally{
830         in.close();
831       }
832       try {
833         clusterId = ClusterId.parseFrom(content);
834       } catch (DeserializationException e) {
835         throw new IOException("content=" + Bytes.toString(content), e);
836       }
837       // If not pb'd, make it so.
838       if (!ProtobufUtil.isPBMagicPrefix(content)) {
839         String cid = null;
840         in = fs.open(idPath);
841         try {
842           cid = in.readUTF();
843           clusterId = new ClusterId(cid);
844         } catch (EOFException eof) {
845           LOG.warn("Cluster ID file " + idPath.toString() + " was empty");
846         } finally {
847           in.close();
848         }
849         rewriteAsPb(fs, rootdir, idPath, clusterId);
850       }
851       return clusterId;
852     } else {
853       LOG.warn("Cluster ID file does not exist at " + idPath.toString());
854     }
855     return clusterId;
856   }
857 
858   /**
859    * @param cid
860    * @throws IOException
861    */
862   private static void rewriteAsPb(final FileSystem fs, final Path rootdir, final Path p,
863       final ClusterId cid)
864   throws IOException {
865     // Rewrite the file as pb.  Move aside the old one first, write new
866     // then delete the moved-aside file.
867     Path movedAsideName = new Path(p + "." + System.currentTimeMillis());
868     if (!fs.rename(p, movedAsideName)) throw new IOException("Failed rename of " + p);
869     setClusterId(fs, rootdir, cid, 100);
870     if (!fs.delete(movedAsideName, false)) {
871       throw new IOException("Failed delete of " + movedAsideName);
872     }
873     LOG.debug("Rewrote the hbase.id file as pb");
874   }
875 
876   /**
877    * Writes a new unique identifier for this cluster to the "hbase.id" file
878    * in the HBase root directory
879    * @param fs the root directory FileSystem
880    * @param rootdir the path to the HBase root directory
881    * @param clusterId the unique identifier to store
882    * @param wait how long (in milliseconds) to wait between retries
883    * @throws IOException if writing to the FileSystem fails and no wait value
884    */
885   public static void setClusterId(FileSystem fs, Path rootdir, ClusterId clusterId,
886       int wait) throws IOException {
887     while (true) {
888       try {
889         Path idFile = new Path(rootdir, HConstants.CLUSTER_ID_FILE_NAME);
890         Path tempIdFile = new Path(rootdir, HConstants.HBASE_TEMP_DIRECTORY +
891           Path.SEPARATOR + HConstants.CLUSTER_ID_FILE_NAME);
892         // Write the id file to a temporary location
893         FSDataOutputStream s = fs.create(tempIdFile);
894         try {
895           s.write(clusterId.toByteArray());
896           s.close();
897           s = null;
898           // Move the temporary file to its normal location. Throw an IOE if
899           // the rename failed
900           if (!fs.rename(tempIdFile, idFile)) {
901             throw new IOException("Unable to move temp version file to " + idFile);
902           }
903         } finally {
904           // Attempt to close the stream if still open on the way out
905           try {
906             if (s != null) s.close();
907           } catch (IOException ignore) { }
908         }
909         if (LOG.isDebugEnabled()) {
910           LOG.debug("Created cluster ID file at " + idFile.toString() + " with ID: " + clusterId);
911         }
912         return;
913       } catch (IOException ioe) {
914         if (wait > 0) {
915           LOG.warn("Unable to create cluster ID file in " + rootdir.toString() +
916               ", retrying in " + wait + "msec: " + StringUtils.stringifyException(ioe));
917           try {
918             Thread.sleep(wait);
919           } catch (InterruptedException e) {
920             throw (InterruptedIOException)new InterruptedIOException().initCause(e);
921           }
922         } else {
923           throw ioe;
924         }
925       }
926     }
927   }
928 
929   /**
930    * Verifies root directory path is a valid URI with a scheme
931    *
932    * @param root root directory path
933    * @return Passed <code>root</code> argument.
934    * @throws IOException if not a valid URI with a scheme
935    */
936   public static Path validateRootPath(Path root) throws IOException {
937     try {
938       URI rootURI = new URI(root.toString());
939       String scheme = rootURI.getScheme();
940       if (scheme == null) {
941         throw new IOException("Root directory does not have a scheme");
942       }
943       return root;
944     } catch (URISyntaxException e) {
945       IOException io = new IOException("Root directory path is not a valid " +
946         "URI -- check your " + HConstants.HBASE_DIR + " configuration");
947       io.initCause(e);
948       throw io;
949     }
950   }
951 
952   /**
953    * Checks for the presence of the root path (using the provided conf object) in the given path. If
954    * it exists, this method removes it and returns the String representation of remaining relative path.
955    * @param path
956    * @param conf
957    * @return String representation of the remaining relative path
958    * @throws IOException
959    */
960   public static String removeRootPath(Path path, final Configuration conf) throws IOException {
961     Path root = FSUtils.getRootDir(conf);
962     String pathStr = path.toString();
963     // check that the path is absolute... it has the root path in it.
964     if (!pathStr.startsWith(root.toString())) return pathStr;
965     // if not, return as it is.
966     return pathStr.substring(root.toString().length() + 1);// remove the "/" too.
967   }
968 
969   /**
970    * If DFS, check safe mode and if so, wait until we clear it.
971    * @param conf configuration
972    * @param wait Sleep between retries
973    * @throws IOException e
974    */
975   public static void waitOnSafeMode(final Configuration conf,
976     final long wait)
977   throws IOException {
978     FileSystem fs = FileSystem.get(conf);
979     if (!(fs instanceof DistributedFileSystem)) return;
980     DistributedFileSystem dfs = (DistributedFileSystem)fs;
981     // Make sure dfs is not in safe mode
982     while (isInSafeMode(dfs)) {
983       LOG.info("Waiting for dfs to exit safe mode...");
984       try {
985         Thread.sleep(wait);
986       } catch (InterruptedException e) {
987         throw (InterruptedIOException)new InterruptedIOException().initCause(e);
988       }
989     }
990   }
991 
992   /**
993    * Return the 'path' component of a Path.  In Hadoop, Path is an URI.  This
994    * method returns the 'path' component of a Path's URI: e.g. If a Path is
995    * <code>hdfs://example.org:9000/hbase_trunk/TestTable/compaction.dir</code>,
996    * this method returns <code>/hbase_trunk/TestTable/compaction.dir</code>.
997    * This method is useful if you want to print out a Path without qualifying
998    * Filesystem instance.
999    * @param p Filesystem Path whose 'path' component we are to return.
1000    * @return Path portion of the Filesystem
1001    */
1002   public static String getPath(Path p) {
1003     return p.toUri().getPath();
1004   }
1005 
1006   /**
1007    * @param c configuration
1008    * @return Path to hbase root directory: i.e. <code>hbase.rootdir</code> from
1009    * configuration as a qualified Path.
1010    * @throws IOException e
1011    */
1012   public static Path getRootDir(final Configuration c) throws IOException {
1013     Path p = new Path(c.get(HConstants.HBASE_DIR));
1014     FileSystem fs = p.getFileSystem(c);
1015     return p.makeQualified(fs);
1016   }
1017 
1018   public static void setRootDir(final Configuration c, final Path root) throws IOException {
1019     c.set(HConstants.HBASE_DIR, root.toString());
1020   }
1021 
1022   public static void setFsDefault(final Configuration c, final Path root) throws IOException {
1023     c.set("fs.defaultFS", root.toString());    // for hadoop 0.21+
1024   }
1025 
1026   /**
1027    * Checks if meta region exists
1028    *
1029    * @param fs file system
1030    * @param rootdir root directory of HBase installation
1031    * @return true if exists
1032    * @throws IOException e
1033    */
1034   @SuppressWarnings("deprecation")
1035   public static boolean metaRegionExists(FileSystem fs, Path rootdir)
1036   throws IOException {
1037     Path metaRegionDir =
1038       HRegion.getRegionDir(rootdir, HRegionInfo.FIRST_META_REGIONINFO);
1039     return fs.exists(metaRegionDir);
1040   }
1041 
1042   /**
1043    * Compute HDFS blocks distribution of a given file, or a portion of the file
1044    * @param fs file system
1045    * @param status file status of the file
1046    * @param start start position of the portion
1047    * @param length length of the portion
1048    * @return The HDFS blocks distribution
1049    */
1050   static public HDFSBlocksDistribution computeHDFSBlocksDistribution(
1051     final FileSystem fs, FileStatus status, long start, long length)
1052     throws IOException {
1053     HDFSBlocksDistribution blocksDistribution = new HDFSBlocksDistribution();
1054     BlockLocation [] blockLocations =
1055       fs.getFileBlockLocations(status, start, length);
1056     for(BlockLocation bl : blockLocations) {
1057       String [] hosts = bl.getHosts();
1058       long len = bl.getLength();
1059       blocksDistribution.addHostsAndBlockWeight(hosts, len);
1060     }
1061 
1062     return blocksDistribution;
1063   }
1064 
1065 
1066 
1067   /**
1068    * Runs through the hbase rootdir and checks all stores have only
1069    * one file in them -- that is, they've been major compacted.  Looks
1070    * at root and meta tables too.
1071    * @param fs filesystem
1072    * @param hbaseRootDir hbase root directory
1073    * @return True if this hbase install is major compacted.
1074    * @throws IOException e
1075    */
1076   public static boolean isMajorCompacted(final FileSystem fs,
1077       final Path hbaseRootDir)
1078   throws IOException {
1079     List<Path> tableDirs = getTableDirs(fs, hbaseRootDir);
1080     PathFilter regionFilter = new RegionDirFilter(fs);
1081     PathFilter familyFilter = new FamilyDirFilter(fs);
1082     for (Path d : tableDirs) {
1083       FileStatus[] regionDirs = fs.listStatus(d, regionFilter);
1084       for (FileStatus regionDir : regionDirs) {
1085         Path dd = regionDir.getPath();
1086         // Else its a region name.  Now look in region for families.
1087         FileStatus[] familyDirs = fs.listStatus(dd, familyFilter);
1088         for (FileStatus familyDir : familyDirs) {
1089           Path family = familyDir.getPath();
1090           // Now in family make sure only one file.
1091           FileStatus[] familyStatus = fs.listStatus(family);
1092           if (familyStatus.length > 1) {
1093             LOG.debug(family.toString() + " has " + familyStatus.length +
1094                 " files.");
1095             return false;
1096           }
1097         }
1098       }
1099     }
1100     return true;
1101   }
1102 
1103   // TODO move this method OUT of FSUtils. No dependencies to HMaster
1104   /**
1105    * Returns the total overall fragmentation percentage. Includes hbase:meta and
1106    * -ROOT- as well.
1107    *
1108    * @param master  The master defining the HBase root and file system.
1109    * @return A map for each table and its percentage.
1110    * @throws IOException When scanning the directory fails.
1111    */
1112   public static int getTotalTableFragmentation(final HMaster master)
1113   throws IOException {
1114     Map<String, Integer> map = getTableFragmentation(master);
1115     return map != null && map.size() > 0 ? map.get("-TOTAL-") : -1;
1116   }
1117 
1118   /**
1119    * Runs through the HBase rootdir and checks how many stores for each table
1120    * have more than one file in them. Checks -ROOT- and hbase:meta too. The total
1121    * percentage across all tables is stored under the special key "-TOTAL-".
1122    *
1123    * @param master  The master defining the HBase root and file system.
1124    * @return A map for each table and its percentage.
1125    *
1126    * @throws IOException When scanning the directory fails.
1127    */
1128   public static Map<String, Integer> getTableFragmentation(
1129     final HMaster master)
1130   throws IOException {
1131     Path path = getRootDir(master.getConfiguration());
1132     // since HMaster.getFileSystem() is package private
1133     FileSystem fs = path.getFileSystem(master.getConfiguration());
1134     return getTableFragmentation(fs, path);
1135   }
1136 
1137   /**
1138    * Runs through the HBase rootdir and checks how many stores for each table
1139    * have more than one file in them. Checks -ROOT- and hbase:meta too. The total
1140    * percentage across all tables is stored under the special key "-TOTAL-".
1141    *
1142    * @param fs  The file system to use.
1143    * @param hbaseRootDir  The root directory to scan.
1144    * @return A map for each table and its percentage.
1145    * @throws IOException When scanning the directory fails.
1146    */
1147   public static Map<String, Integer> getTableFragmentation(
1148     final FileSystem fs, final Path hbaseRootDir)
1149   throws IOException {
1150     Map<String, Integer> frags = new HashMap<String, Integer>();
1151     int cfCountTotal = 0;
1152     int cfFragTotal = 0;
1153     PathFilter regionFilter = new RegionDirFilter(fs);
1154     PathFilter familyFilter = new FamilyDirFilter(fs);
1155     List<Path> tableDirs = getTableDirs(fs, hbaseRootDir);
1156     for (Path d : tableDirs) {
1157       int cfCount = 0;
1158       int cfFrag = 0;
1159       FileStatus[] regionDirs = fs.listStatus(d, regionFilter);
1160       for (FileStatus regionDir : regionDirs) {
1161         Path dd = regionDir.getPath();
1162         // else its a region name, now look in region for families
1163         FileStatus[] familyDirs = fs.listStatus(dd, familyFilter);
1164         for (FileStatus familyDir : familyDirs) {
1165           cfCount++;
1166           cfCountTotal++;
1167           Path family = familyDir.getPath();
1168           // now in family make sure only one file
1169           FileStatus[] familyStatus = fs.listStatus(family);
1170           if (familyStatus.length > 1) {
1171             cfFrag++;
1172             cfFragTotal++;
1173           }
1174         }
1175       }
1176       // compute percentage per table and store in result list
1177       frags.put(FSUtils.getTableName(d).getNameAsString(),
1178         cfCount == 0? 0: Math.round((float) cfFrag / cfCount * 100));
1179     }
1180     // set overall percentage for all tables
1181     frags.put("-TOTAL-",
1182       cfCountTotal == 0? 0: Math.round((float) cfFragTotal / cfCountTotal * 100));
1183     return frags;
1184   }
1185 
1186   /**
1187    * Returns the {@link org.apache.hadoop.fs.Path} object representing the table directory under
1188    * path rootdir
1189    *
1190    * @param rootdir qualified path of HBase root directory
1191    * @param tableName name of table
1192    * @return {@link org.apache.hadoop.fs.Path} for table
1193    */
1194   public static Path getTableDir(Path rootdir, final TableName tableName) {
1195     return new Path(getNamespaceDir(rootdir, tableName.getNamespaceAsString()),
1196         tableName.getQualifierAsString());
1197   }
1198 
1199   /**
1200    * Returns the {@link org.apache.hadoop.hbase.TableName} object representing
1201    * the table directory under
1202    * path rootdir
1203    *
1204    * @param tablePath path of table
1205    * @return {@link org.apache.hadoop.fs.Path} for table
1206    */
1207   public static TableName getTableName(Path tablePath) {
1208     return TableName.valueOf(tablePath.getParent().getName(), tablePath.getName());
1209   }
1210 
1211   /**
1212    * Returns the {@link org.apache.hadoop.fs.Path} object representing
1213    * the namespace directory under path rootdir
1214    *
1215    * @param rootdir qualified path of HBase root directory
1216    * @param namespace namespace name
1217    * @return {@link org.apache.hadoop.fs.Path} for table
1218    */
1219   public static Path getNamespaceDir(Path rootdir, final String namespace) {
1220     return new Path(rootdir, new Path(HConstants.BASE_NAMESPACE_DIR,
1221         new Path(namespace)));
1222   }
1223 
1224   /**
1225    * A {@link PathFilter} that returns only regular files.
1226    */
1227   static class FileFilter extends AbstractFileStatusFilter {
1228     private final FileSystem fs;
1229 
1230     public FileFilter(final FileSystem fs) {
1231       this.fs = fs;
1232     }
1233 
1234     @Override
1235     protected boolean accept(Path p, @CheckForNull Boolean isDir) {
1236       try {
1237         return isFile(fs, isDir, p);
1238       } catch (IOException e) {
1239         LOG.warn("unable to verify if path=" + p + " is a regular file", e);
1240         return false;
1241       }
1242     }
1243   }
1244 
1245   /**
1246    * Directory filter that doesn't include any of the directories in the specified blacklist
1247    */
1248   public static class BlackListDirFilter extends AbstractFileStatusFilter {
1249     private final FileSystem fs;
1250     private List<String> blacklist;
1251 
1252     /**
1253      * Create a filter on the givem filesystem with the specified blacklist
1254      * @param fs filesystem to filter
1255      * @param directoryNameBlackList list of the names of the directories to filter. If
1256      *          <tt>null</tt>, all directories are returned
1257      */
1258     @SuppressWarnings("unchecked")
1259     public BlackListDirFilter(final FileSystem fs, final List<String> directoryNameBlackList) {
1260       this.fs = fs;
1261       blacklist =
1262         (List<String>) (directoryNameBlackList == null ? Collections.emptyList()
1263           : directoryNameBlackList);
1264     }
1265 
1266     @Override
1267     protected boolean accept(Path p, @CheckForNull Boolean isDir) {
1268       if (!isValidName(p.getName())) {
1269         return false;
1270       }
1271 
1272       try {
1273         return isDirectory(fs, isDir, p);
1274       } catch (IOException e) {
1275         LOG.warn("An error occurred while verifying if [" + p.toString()
1276             + "] is a valid directory. Returning 'not valid' and continuing.", e);
1277         return false;
1278       }
1279     }
1280 
1281     protected boolean isValidName(final String name) {
1282       return !blacklist.contains(name);
1283     }
1284   }
1285 
1286   /**
1287    * A {@link PathFilter} that only allows directories.
1288    */
1289   public static class DirFilter extends BlackListDirFilter {
1290 
1291     public DirFilter(FileSystem fs) {
1292       super(fs, null);
1293     }
1294   }
1295 
1296   /**
1297    * A {@link PathFilter} that returns usertable directories. To get all directories use the
1298    * {@link BlackListDirFilter} with a <tt>null</tt> blacklist
1299    */
1300   public static class UserTableDirFilter extends BlackListDirFilter {
1301     public UserTableDirFilter(FileSystem fs) {
1302       super(fs, HConstants.HBASE_NON_TABLE_DIRS);
1303     }
1304 
1305     protected boolean isValidName(final String name) {
1306       if (!super.isValidName(name))
1307         return false;
1308 
1309       try {
1310         TableName.isLegalTableQualifierName(Bytes.toBytes(name));
1311       } catch (IllegalArgumentException e) {
1312         LOG.info("INVALID NAME " + name);
1313         return false;
1314       }
1315       return true;
1316     }
1317   }
1318 
1319   /**
1320    * Heuristic to determine whether is safe or not to open a file for append
1321    * Looks both for dfs.support.append and use reflection to search
1322    * for SequenceFile.Writer.syncFs() or FSDataOutputStream.hflush()
1323    * @param conf
1324    * @return True if append support
1325    */
1326   public static boolean isAppendSupported(final Configuration conf) {
1327     boolean append = conf.getBoolean("dfs.support.append", false);
1328     if (append) {
1329       try {
1330         // TODO: The implementation that comes back when we do a createWriter
1331         // may not be using SequenceFile so the below is not a definitive test.
1332         // Will do for now (hdfs-200).
1333         SequenceFile.Writer.class.getMethod("syncFs", new Class<?> []{});
1334         append = true;
1335       } catch (SecurityException e) {
1336       } catch (NoSuchMethodException e) {
1337         append = false;
1338       }
1339     }
1340     if (!append) {
1341       // Look for the 0.21, 0.22, new-style append evidence.
1342       try {
1343         FSDataOutputStream.class.getMethod("hflush", new Class<?> []{});
1344         append = true;
1345       } catch (NoSuchMethodException e) {
1346         append = false;
1347       }
1348     }
1349     return append;
1350   }
1351 
1352   /**
1353    * @param conf
1354    * @return True if this filesystem whose scheme is 'hdfs'.
1355    * @throws IOException
1356    */
1357   public static boolean isHDFS(final Configuration conf) throws IOException {
1358     FileSystem fs = FileSystem.get(conf);
1359     String scheme = fs.getUri().getScheme();
1360     return scheme.equalsIgnoreCase("hdfs");
1361   }
1362 
1363   /**
1364    * Recover file lease. Used when a file might be suspect
1365    * to be had been left open by another process.
1366    * @param fs FileSystem handle
1367    * @param p Path of file to recover lease
1368    * @param conf Configuration handle
1369    * @throws IOException
1370    */
1371   public abstract void recoverFileLease(final FileSystem fs, final Path p,
1372       Configuration conf, CancelableProgressable reporter) throws IOException;
1373 
1374   public static List<Path> getTableDirs(final FileSystem fs, final Path rootdir)
1375       throws IOException {
1376     List<Path> tableDirs = new LinkedList<Path>();
1377 
1378     for(FileStatus status :
1379         fs.globStatus(new Path(rootdir,
1380             new Path(HConstants.BASE_NAMESPACE_DIR, "*")))) {
1381       tableDirs.addAll(FSUtils.getLocalTableDirs(fs, status.getPath()));
1382     }
1383     return tableDirs;
1384   }
1385 
1386   /**
1387    * @param fs
1388    * @param rootdir
1389    * @return All the table directories under <code>rootdir</code>. Ignore non table hbase folders such as
1390    * .logs, .oldlogs, .corrupt folders.
1391    * @throws IOException
1392    */
1393   public static List<Path> getLocalTableDirs(final FileSystem fs, final Path rootdir)
1394       throws IOException {
1395     // presumes any directory under hbase.rootdir is a table
1396     FileStatus[] dirs = fs.listStatus(rootdir, new UserTableDirFilter(fs));
1397     List<Path> tabledirs = new ArrayList<Path>(dirs.length);
1398     for (FileStatus dir: dirs) {
1399       tabledirs.add(dir.getPath());
1400     }
1401     return tabledirs;
1402   }
1403 
1404   /**
1405    * Checks if the given path is the one with 'recovered.edits' dir.
1406    * @param path
1407    * @return True if we recovered edits
1408    */
1409   public static boolean isRecoveredEdits(Path path) {
1410     return path.toString().contains(HConstants.RECOVERED_EDITS_DIR);
1411   }
1412 
1413   /**
1414    * Filter for all dirs that don't start with '.'
1415    */
1416   public static class RegionDirFilter extends AbstractFileStatusFilter {
1417     // This pattern will accept 0.90+ style hex region dirs and older numeric region dir names.
1418     final public static Pattern regionDirPattern = Pattern.compile("^[0-9a-f]*$");
1419     final FileSystem fs;
1420 
1421     public RegionDirFilter(FileSystem fs) {
1422       this.fs = fs;
1423     }
1424 
1425     @Override
1426     protected boolean accept(Path p, @CheckForNull Boolean isDir) {
1427       if (!regionDirPattern.matcher(p.getName()).matches()) {
1428         return false;
1429       }
1430 
1431       try {
1432         return isDirectory(fs, isDir, p);
1433       } catch (IOException ioe) {
1434         // Maybe the file was moved or the fs was disconnected.
1435         LOG.warn("Skipping file " + p +" due to IOException", ioe);
1436         return false;
1437       }
1438     }
1439   }
1440 
1441   /**
1442    * Given a particular table dir, return all the regiondirs inside it, excluding files such as
1443    * .tableinfo
1444    * @param fs A file system for the Path
1445    * @param tableDir Path to a specific table directory <hbase.rootdir>/<tabledir>
1446    * @return List of paths to valid region directories in table dir.
1447    * @throws IOException
1448    */
1449   public static List<Path> getRegionDirs(final FileSystem fs, final Path tableDir) throws IOException {
1450     // assumes we are in a table dir.
1451     List<FileStatus> rds = listStatusWithStatusFilter(fs, tableDir, new RegionDirFilter(fs));
1452     if (rds == null) {
1453       return new ArrayList<Path>();
1454     }
1455     List<Path> regionDirs = new ArrayList<Path>(rds.size());
1456     for (FileStatus rdfs: rds) {
1457       Path rdPath = rdfs.getPath();
1458       regionDirs.add(rdPath);
1459     }
1460     return regionDirs;
1461   }
1462 
1463   /**
1464    * Filter for all dirs that are legal column family names.  This is generally used for colfam
1465    * dirs <hbase.rootdir>/<tabledir>/<regiondir>/<colfamdir>.
1466    */
1467   public static class FamilyDirFilter extends AbstractFileStatusFilter {
1468     final FileSystem fs;
1469 
1470     public FamilyDirFilter(FileSystem fs) {
1471       this.fs = fs;
1472     }
1473 
1474     @Override
1475     protected boolean accept(Path p, @CheckForNull Boolean isDir) {
1476       try {
1477         // throws IAE if invalid
1478         HColumnDescriptor.isLegalFamilyName(Bytes.toBytes(p.getName()));
1479       } catch (IllegalArgumentException iae) {
1480         // path name is an invalid family name and thus is excluded.
1481         return false;
1482       }
1483 
1484       try {
1485         return isDirectory(fs, isDir, p);
1486       } catch (IOException ioe) {
1487         // Maybe the file was moved or the fs was disconnected.
1488         LOG.warn("Skipping file " + p +" due to IOException", ioe);
1489         return false;
1490       }
1491     }
1492   }
1493 
1494   /**
1495    * Given a particular region dir, return all the familydirs inside it
1496    *
1497    * @param fs A file system for the Path
1498    * @param regionDir Path to a specific region directory
1499    * @return List of paths to valid family directories in region dir.
1500    * @throws IOException
1501    */
1502   public static List<Path> getFamilyDirs(final FileSystem fs, final Path regionDir) throws IOException {
1503     // assumes we are in a region dir.
1504     FileStatus[] fds = fs.listStatus(regionDir, new FamilyDirFilter(fs));
1505     List<Path> familyDirs = new ArrayList<Path>(fds.length);
1506     for (FileStatus fdfs: fds) {
1507       Path fdPath = fdfs.getPath();
1508       familyDirs.add(fdPath);
1509     }
1510     return familyDirs;
1511   }
1512 
1513   public static List<Path> getReferenceFilePaths(final FileSystem fs, final Path familyDir) throws IOException {
1514     List<FileStatus> fds = listStatusWithStatusFilter(fs, familyDir, new ReferenceFileFilter(fs));
1515     if (fds == null) {
1516       return new ArrayList<Path>();
1517     }
1518     List<Path> referenceFiles = new ArrayList<Path>(fds.size());
1519     for (FileStatus fdfs: fds) {
1520       Path fdPath = fdfs.getPath();
1521       referenceFiles.add(fdPath);
1522     }
1523     return referenceFiles;
1524   }
1525 
1526   /**
1527    * Filter for HFiles that excludes reference files.
1528    */
1529   public static class HFileFilter extends AbstractFileStatusFilter {
1530     final FileSystem fs;
1531 
1532     public HFileFilter(FileSystem fs) {
1533       this.fs = fs;
1534     }
1535 
1536     @Override
1537     protected boolean accept(Path p, @CheckForNull Boolean isDir) {
1538       if (!StoreFileInfo.isHFile(p)) {
1539         return false;
1540       }
1541 
1542       try {
1543         return isFile(fs, isDir, p);
1544       } catch (IOException ioe) {
1545         // Maybe the file was moved or the fs was disconnected.
1546         LOG.warn("Skipping file " + p +" due to IOException", ioe);
1547         return false;
1548       }
1549     }
1550   }
1551 
1552   public static class ReferenceFileFilter extends AbstractFileStatusFilter {
1553 
1554     private final FileSystem fs;
1555 
1556     public ReferenceFileFilter(FileSystem fs) {
1557       this.fs = fs;
1558     }
1559 
1560     @Override
1561     protected boolean accept(Path p, @CheckForNull Boolean isDir) {
1562       if (!StoreFileInfo.isReference(p)) {
1563         return false;
1564       }
1565 
1566       try {
1567         // only files can be references.
1568         return isFile(fs, isDir, p);
1569       } catch (IOException ioe) {
1570         // Maybe the file was moved or the fs was disconnected.
1571         LOG.warn("Skipping file " + p +" due to IOException", ioe);
1572         return false;
1573       }
1574     }
1575   }
1576 
1577 
1578   /**
1579    * @param conf
1580    * @return Returns the filesystem of the hbase rootdir.
1581    * @throws IOException
1582    */
1583   public static FileSystem getCurrentFileSystem(Configuration conf)
1584   throws IOException {
1585     return getRootDir(conf).getFileSystem(conf);
1586   }
1587 
1588 
1589   /**
1590    * Runs through the HBase rootdir/tablename and creates a reverse lookup map for
1591    * table StoreFile names to the full Path.
1592    * <br>
1593    * Example...<br>
1594    * Key = 3944417774205889744  <br>
1595    * Value = hdfs://localhost:51169/user/userid/-ROOT-/70236052/info/3944417774205889744
1596    *
1597    * @param map map to add values.  If null, this method will create and populate one to return
1598    * @param fs  The file system to use.
1599    * @param hbaseRootDir  The root directory to scan.
1600    * @param tableName name of the table to scan.
1601    * @return Map keyed by StoreFile name with a value of the full Path.
1602    * @throws IOException When scanning the directory fails.
1603    * @throws InterruptedException
1604    */
1605   public static Map<String, Path> getTableStoreFilePathMap(Map<String, Path> map,
1606   final FileSystem fs, final Path hbaseRootDir, TableName tableName)
1607   throws IOException, InterruptedException {
1608     return getTableStoreFilePathMap(map, fs, hbaseRootDir, tableName, null, null, null);
1609   }
1610 
1611   /**
1612    * Runs through the HBase rootdir/tablename and creates a reverse lookup map for
1613    * table StoreFile names to the full Path.  Note that because this method can be called
1614    * on a 'live' HBase system that we will skip files that no longer exist by the time
1615    * we traverse them and similarly the user of the result needs to consider that some
1616    * entries in this map may not exist by the time this call completes.
1617    * <br>
1618    * Example...<br>
1619    * Key = 3944417774205889744  <br>
1620    * Value = hdfs://localhost:51169/user/userid/-ROOT-/70236052/info/3944417774205889744
1621    *
1622    * @param resultMap map to add values.  If null, this method will create and populate one to return
1623    * @param fs  The file system to use.
1624    * @param hbaseRootDir  The root directory to scan.
1625    * @param tableName name of the table to scan.
1626    * @param sfFilter optional path filter to apply to store files
1627    * @param executor optional executor service to parallelize this operation
1628    * @param errors ErrorReporter instance or null
1629    * @return Map keyed by StoreFile name with a value of the full Path.
1630    * @throws IOException When scanning the directory fails.
1631    * @throws InterruptedException
1632    */
1633   public static Map<String, Path> getTableStoreFilePathMap(
1634       Map<String, Path> resultMap,
1635       final FileSystem fs, final Path hbaseRootDir, TableName tableName, final PathFilter sfFilter,
1636       ExecutorService executor, final ErrorReporter errors) throws IOException, InterruptedException {
1637 
1638     final Map<String, Path> finalResultMap =
1639         resultMap == null ? new ConcurrentHashMap<String, Path>(128, 0.75f, 32) : resultMap;
1640 
1641     // only include the directory paths to tables
1642     Path tableDir = FSUtils.getTableDir(hbaseRootDir, tableName);
1643     // Inside a table, there are compaction.dir directories to skip.  Otherwise, all else
1644     // should be regions.
1645     final FamilyDirFilter familyFilter = new FamilyDirFilter(fs);
1646     final Vector<Exception> exceptions = new Vector<Exception>();
1647 
1648     try {
1649       List<FileStatus> regionDirs = FSUtils.listStatusWithStatusFilter(fs, tableDir, new RegionDirFilter(fs));
1650       if (regionDirs == null) {
1651         return finalResultMap;
1652       }
1653 
1654       final List<Future<?>> futures = new ArrayList<Future<?>>(regionDirs.size());
1655 
1656       for (FileStatus regionDir : regionDirs) {
1657         if (null != errors) {
1658           errors.progress();
1659         }
1660         final Path dd = regionDir.getPath();
1661 
1662         if (!exceptions.isEmpty()) {
1663           break;
1664         }
1665 
1666         Runnable getRegionStoreFileMapCall = new Runnable() {
1667           @Override
1668           public void run() {
1669             try {
1670               HashMap<String,Path> regionStoreFileMap = new HashMap<String, Path>();
1671               List<FileStatus> familyDirs = FSUtils.listStatusWithStatusFilter(fs, dd, familyFilter);
1672               if (familyDirs == null) {
1673                 if (!fs.exists(dd)) {
1674                   LOG.warn("Skipping region because it no longer exists: " + dd);
1675                 } else {
1676                   LOG.warn("Skipping region because it has no family dirs: " + dd);
1677                 }
1678                 return;
1679               }
1680               for (FileStatus familyDir : familyDirs) {
1681                 if (null != errors) {
1682                   errors.progress();
1683                 }
1684                 Path family = familyDir.getPath();
1685                 if (family.getName().equals(HConstants.RECOVERED_EDITS_DIR)) {
1686                   continue;
1687                 }
1688                 // now in family, iterate over the StoreFiles and
1689                 // put in map
1690                 FileStatus[] familyStatus = fs.listStatus(family);
1691                 for (FileStatus sfStatus : familyStatus) {
1692                   if (null != errors) {
1693                     errors.progress();
1694                   }
1695                   Path sf = sfStatus.getPath();
1696                   if (sfFilter == null || sfFilter.accept(sf)) {
1697                     regionStoreFileMap.put( sf.getName(), sf);
1698                   }
1699                 }
1700               }
1701               finalResultMap.putAll(regionStoreFileMap);
1702             } catch (Exception e) {
1703               LOG.error("Could not get region store file map for region: " + dd, e);
1704               exceptions.add(e);
1705             }
1706           }
1707         };
1708 
1709         // If executor is available, submit async tasks to exec concurrently, otherwise
1710         // just do serial sync execution
1711         if (executor != null) {
1712           Future<?> future = executor.submit(getRegionStoreFileMapCall);
1713           futures.add(future);
1714         } else {
1715           FutureTask<?> future = new FutureTask<Object>(getRegionStoreFileMapCall, null);
1716           future.run();
1717           futures.add(future);
1718         }
1719       }
1720 
1721       // Ensure all pending tasks are complete (or that we run into an exception)
1722       for (Future<?> f : futures) {
1723         if (!exceptions.isEmpty()) {
1724           break;
1725         }
1726         try {
1727           f.get();
1728         } catch (ExecutionException e) {
1729           LOG.error("Unexpected exec exception!  Should've been caught already.  (Bug?)", e);
1730           // Shouldn't happen, we already logged/caught any exceptions in the Runnable
1731         }
1732       }
1733     } catch (IOException e) {
1734       LOG.error("Cannot execute getTableStoreFilePathMap for " + tableName, e);
1735       exceptions.add(e);
1736     } finally {
1737       if (!exceptions.isEmpty()) {
1738         // Just throw the first exception as an indication something bad happened
1739         // Don't need to propagate all the exceptions, we already logged them all anyway
1740         Throwables.propagateIfInstanceOf(exceptions.firstElement(), IOException.class);
1741         throw Throwables.propagate(exceptions.firstElement());
1742       }
1743     }
1744 
1745     return finalResultMap;
1746   }
1747 
1748   public static int getRegionReferenceFileCount(final FileSystem fs, final Path p) {
1749     int result = 0;
1750     try {
1751       for (Path familyDir:getFamilyDirs(fs, p)){
1752         result += getReferenceFilePaths(fs, familyDir).size();
1753       }
1754     } catch (IOException e) {
1755       LOG.warn("Error Counting reference files.", e);
1756     }
1757     return result;
1758   }
1759 
1760   /**
1761    * Runs through the HBase rootdir and creates a reverse lookup map for
1762    * table StoreFile names to the full Path.
1763    * <br>
1764    * Example...<br>
1765    * Key = 3944417774205889744  <br>
1766    * Value = hdfs://localhost:51169/user/userid/-ROOT-/70236052/info/3944417774205889744
1767    *
1768    * @param fs  The file system to use.
1769    * @param hbaseRootDir  The root directory to scan.
1770    * @return Map keyed by StoreFile name with a value of the full Path.
1771    * @throws IOException When scanning the directory fails.
1772    * @throws InterruptedException
1773    */
1774   public static Map<String, Path> getTableStoreFilePathMap(
1775     final FileSystem fs, final Path hbaseRootDir)
1776   throws IOException, InterruptedException {
1777     return getTableStoreFilePathMap(fs, hbaseRootDir, null, null, null);
1778   }
1779 
1780   /**
1781    * Runs through the HBase rootdir and creates a reverse lookup map for
1782    * table StoreFile names to the full Path.
1783    * <br>
1784    * Example...<br>
1785    * Key = 3944417774205889744  <br>
1786    * Value = hdfs://localhost:51169/user/userid/-ROOT-/70236052/info/3944417774205889744
1787    *
1788    * @param fs  The file system to use.
1789    * @param hbaseRootDir  The root directory to scan.
1790    * @param sfFilter optional path filter to apply to store files
1791    * @param executor optional executor service to parallelize this operation
1792    * @param errors ErrorReporter instance or null
1793    * @return Map keyed by StoreFile name with a value of the full Path.
1794    * @throws IOException When scanning the directory fails.
1795    * @throws InterruptedException
1796    */
1797   public static Map<String, Path> getTableStoreFilePathMap(
1798     final FileSystem fs, final Path hbaseRootDir, PathFilter sfFilter,
1799     ExecutorService executor, ErrorReporter errors)
1800   throws IOException, InterruptedException {
1801     ConcurrentHashMap<String, Path> map = new ConcurrentHashMap<String, Path>(1024, 0.75f, 32);
1802 
1803     // if this method looks similar to 'getTableFragmentation' that is because
1804     // it was borrowed from it.
1805 
1806     // only include the directory paths to tables
1807     for (Path tableDir : FSUtils.getTableDirs(fs, hbaseRootDir)) {
1808       getTableStoreFilePathMap(map, fs, hbaseRootDir,
1809           FSUtils.getTableName(tableDir), sfFilter, executor, errors);
1810     }
1811     return map;
1812   }
1813 
1814   /**
1815    * Filters FileStatuses in an array and returns a list
1816    *
1817    * @param input   An array of FileStatuses
1818    * @param filter  A required filter to filter the array
1819    * @return        A list of FileStatuses
1820    */
1821   public static List<FileStatus> filterFileStatuses(FileStatus[] input,
1822       FileStatusFilter filter) {
1823     if (input == null) return null;
1824     return filterFileStatuses(Iterators.forArray(input), filter);
1825   }
1826 
1827   /**
1828    * Filters FileStatuses in an iterator and returns a list
1829    *
1830    * @param input   An iterator of FileStatuses
1831    * @param filter  A required filter to filter the array
1832    * @return        A list of FileStatuses
1833    */
1834   public static List<FileStatus> filterFileStatuses(Iterator<FileStatus> input,
1835       FileStatusFilter filter) {
1836     if (input == null) return null;
1837     ArrayList<FileStatus> results = new ArrayList<FileStatus>();
1838     while (input.hasNext()) {
1839       FileStatus f = input.next();
1840       if (filter.accept(f)) {
1841         results.add(f);
1842       }
1843     }
1844     return results;
1845   }
1846 
1847   /**
1848    * Calls fs.listStatus() and treats FileNotFoundException as non-fatal
1849    * This accommodates differences between hadoop versions, where hadoop 1
1850    * does not throw a FileNotFoundException, and return an empty FileStatus[]
1851    * while Hadoop 2 will throw FileNotFoundException.
1852    *
1853    * @param fs file system
1854    * @param dir directory
1855    * @param filter file status filter
1856    * @return null if dir is empty or doesn't exist, otherwise FileStatus list
1857    */
1858   public static List<FileStatus> listStatusWithStatusFilter(final FileSystem fs,
1859       final Path dir, final FileStatusFilter filter) throws IOException {
1860     FileStatus [] status = null;
1861     try {
1862       status = fs.listStatus(dir);
1863     } catch (FileNotFoundException fnfe) {
1864       // if directory doesn't exist, return null
1865       if (LOG.isTraceEnabled()) {
1866         LOG.trace(dir + " doesn't exist");
1867       }
1868     }
1869 
1870     if (status == null || status.length < 1)  {
1871       return null;
1872     }
1873 
1874     if (filter == null) {
1875       return Arrays.asList(status);
1876     } else {
1877       List<FileStatus> status2 = filterFileStatuses(status, filter);
1878       if (status2 == null || status2.isEmpty()) {
1879         return null;
1880       } else {
1881         return status2;
1882       }
1883     }
1884   }
1885 
1886   /**
1887    * Calls fs.listStatus() and treats FileNotFoundException as non-fatal
1888    * This accommodates differences between hadoop versions, where hadoop 1
1889    * does not throw a FileNotFoundException, and return an empty FileStatus[]
1890    * while Hadoop 2 will throw FileNotFoundException.
1891    *
1892    * Where possible, prefer {@link #listStatusWithStatusFilter(FileSystem,
1893    * Path, FileStatusFilter)} instead.
1894    *
1895    * @param fs file system
1896    * @param dir directory
1897    * @param filter path filter
1898    * @return null if dir is empty or doesn't exist, otherwise FileStatus array
1899    */
1900   public static FileStatus [] listStatus(final FileSystem fs,
1901       final Path dir, final PathFilter filter) throws IOException {
1902     FileStatus [] status = null;
1903     try {
1904       status = filter == null ? fs.listStatus(dir) : fs.listStatus(dir, filter);
1905     } catch (FileNotFoundException fnfe) {
1906       // if directory doesn't exist, return null
1907       if (LOG.isTraceEnabled()) {
1908         LOG.trace(dir + " doesn't exist");
1909       }
1910     }
1911     if (status == null || status.length < 1) return null;
1912     return status;
1913   }
1914 
1915   /**
1916    * Calls fs.listStatus() and treats FileNotFoundException as non-fatal
1917    * This would accommodates differences between hadoop versions
1918    *
1919    * @param fs file system
1920    * @param dir directory
1921    * @return null if dir is empty or doesn't exist, otherwise FileStatus array
1922    */
1923   public static FileStatus[] listStatus(final FileSystem fs, final Path dir) throws IOException {
1924     return listStatus(fs, dir, null);
1925   }
1926 
1927   /**
1928    * Calls fs.delete() and returns the value returned by the fs.delete()
1929    *
1930    * @param fs
1931    * @param path
1932    * @param recursive
1933    * @return the value returned by the fs.delete()
1934    * @throws IOException
1935    */
1936   public static boolean delete(final FileSystem fs, final Path path, final boolean recursive)
1937       throws IOException {
1938     return fs.delete(path, recursive);
1939   }
1940 
1941   /**
1942    * Calls fs.exists(). Checks if the specified path exists
1943    *
1944    * @param fs
1945    * @param path
1946    * @return the value returned by fs.exists()
1947    * @throws IOException
1948    */
1949   public static boolean isExists(final FileSystem fs, final Path path) throws IOException {
1950     return fs.exists(path);
1951   }
1952 
1953   /**
1954    * Throw an exception if an action is not permitted by a user on a file.
1955    *
1956    * @param ugi
1957    *          the user
1958    * @param file
1959    *          the file
1960    * @param action
1961    *          the action
1962    */
1963   public static void checkAccess(UserGroupInformation ugi, FileStatus file,
1964       FsAction action) throws AccessDeniedException {
1965     if (ugi.getShortUserName().equals(file.getOwner())) {
1966       if (file.getPermission().getUserAction().implies(action)) {
1967         return;
1968       }
1969     } else if (contains(ugi.getGroupNames(), file.getGroup())) {
1970       if (file.getPermission().getGroupAction().implies(action)) {
1971         return;
1972       }
1973     } else if (file.getPermission().getOtherAction().implies(action)) {
1974       return;
1975     }
1976     throw new AccessDeniedException("Permission denied:" + " action=" + action
1977         + " path=" + file.getPath() + " user=" + ugi.getShortUserName());
1978   }
1979 
1980   private static boolean contains(String[] groups, String user) {
1981     for (String group : groups) {
1982       if (group.equals(user)) {
1983         return true;
1984       }
1985     }
1986     return false;
1987   }
1988 
1989   /**
1990    * Log the current state of the filesystem from a certain root directory
1991    * @param fs filesystem to investigate
1992    * @param root root file/directory to start logging from
1993    * @param LOG log to output information
1994    * @throws IOException if an unexpected exception occurs
1995    */
1996   public static void logFileSystemState(final FileSystem fs, final Path root, Log LOG)
1997       throws IOException {
1998     LOG.debug("Current file system:");
1999     logFSTree(LOG, fs, root, "|-");
2000   }
2001 
2002   /**
2003    * Recursive helper to log the state of the FS
2004    *
2005    * @see #logFileSystemState(FileSystem, Path, Log)
2006    */
2007   private static void logFSTree(Log LOG, final FileSystem fs, final Path root, String prefix)
2008       throws IOException {
2009     FileStatus[] files = FSUtils.listStatus(fs, root, null);
2010     if (files == null) return;
2011 
2012     for (FileStatus file : files) {
2013       if (file.isDirectory()) {
2014         LOG.debug(prefix + file.getPath().getName() + "/");
2015         logFSTree(LOG, fs, file.getPath(), prefix + "---");
2016       } else {
2017         LOG.debug(prefix + file.getPath().getName());
2018       }
2019     }
2020   }
2021 
2022   public static boolean renameAndSetModifyTime(final FileSystem fs, final Path src, final Path dest)
2023       throws IOException {
2024     // set the modify time for TimeToLive Cleaner
2025     fs.setTimes(src, EnvironmentEdgeManager.currentTime(), -1);
2026     return fs.rename(src, dest);
2027   }
2028 
2029   /**
2030    * This function is to scan the root path of the file system to get the
2031    * degree of locality for each region on each of the servers having at least
2032    * one block of that region.
2033    * This is used by the tool {@link org.apache.hadoop.hbase.master.RegionPlacementMaintainer}
2034    *
2035    * @param conf
2036    *          the configuration to use
2037    * @return the mapping from region encoded name to a map of server names to
2038    *           locality fraction
2039    * @throws IOException
2040    *           in case of file system errors or interrupts
2041    */
2042   public static Map<String, Map<String, Float>> getRegionDegreeLocalityMappingFromFS(
2043       final Configuration conf) throws IOException {
2044     return getRegionDegreeLocalityMappingFromFS(
2045         conf, null,
2046         conf.getInt(THREAD_POOLSIZE, DEFAULT_THREAD_POOLSIZE));
2047 
2048   }
2049 
2050   /**
2051    * This function is to scan the root path of the file system to get the
2052    * degree of locality for each region on each of the servers having at least
2053    * one block of that region.
2054    *
2055    * @param conf
2056    *          the configuration to use
2057    * @param desiredTable
2058    *          the table you wish to scan locality for
2059    * @param threadPoolSize
2060    *          the thread pool size to use
2061    * @return the mapping from region encoded name to a map of server names to
2062    *           locality fraction
2063    * @throws IOException
2064    *           in case of file system errors or interrupts
2065    */
2066   public static Map<String, Map<String, Float>> getRegionDegreeLocalityMappingFromFS(
2067       final Configuration conf, final String desiredTable, int threadPoolSize)
2068       throws IOException {
2069     Map<String, Map<String, Float>> regionDegreeLocalityMapping =
2070         new ConcurrentHashMap<String, Map<String, Float>>();
2071     getRegionLocalityMappingFromFS(conf, desiredTable, threadPoolSize, null,
2072         regionDegreeLocalityMapping);
2073     return regionDegreeLocalityMapping;
2074   }
2075 
2076   /**
2077    * This function is to scan the root path of the file system to get either the
2078    * mapping between the region name and its best locality region server or the
2079    * degree of locality of each region on each of the servers having at least
2080    * one block of that region. The output map parameters are both optional.
2081    *
2082    * @param conf
2083    *          the configuration to use
2084    * @param desiredTable
2085    *          the table you wish to scan locality for
2086    * @param threadPoolSize
2087    *          the thread pool size to use
2088    * @param regionToBestLocalityRSMapping
2089    *          the map into which to put the best locality mapping or null
2090    * @param regionDegreeLocalityMapping
2091    *          the map into which to put the locality degree mapping or null,
2092    *          must be a thread-safe implementation
2093    * @throws IOException
2094    *           in case of file system errors or interrupts
2095    */
2096   private static void getRegionLocalityMappingFromFS(
2097       final Configuration conf, final String desiredTable,
2098       int threadPoolSize,
2099       Map<String, String> regionToBestLocalityRSMapping,
2100       Map<String, Map<String, Float>> regionDegreeLocalityMapping)
2101       throws IOException {
2102     FileSystem fs =  FileSystem.get(conf);
2103     Path rootPath = FSUtils.getRootDir(conf);
2104     long startTime = EnvironmentEdgeManager.currentTime();
2105     Path queryPath;
2106     // The table files are in ${hbase.rootdir}/data/<namespace>/<table>/*
2107     if (null == desiredTable) {
2108       queryPath = new Path(new Path(rootPath, HConstants.BASE_NAMESPACE_DIR).toString() + "/*/*/*/");
2109     } else {
2110       queryPath = new Path(FSUtils.getTableDir(rootPath, TableName.valueOf(desiredTable)).toString() + "/*/");
2111     }
2112 
2113     // reject all paths that are not appropriate
2114     PathFilter pathFilter = new PathFilter() {
2115       @Override
2116       public boolean accept(Path path) {
2117         // this is the region name; it may get some noise data
2118         if (null == path) {
2119           return false;
2120         }
2121 
2122         // no parent?
2123         Path parent = path.getParent();
2124         if (null == parent) {
2125           return false;
2126         }
2127 
2128         String regionName = path.getName();
2129         if (null == regionName) {
2130           return false;
2131         }
2132 
2133         if (!regionName.toLowerCase().matches("[0-9a-f]+")) {
2134           return false;
2135         }
2136         return true;
2137       }
2138     };
2139 
2140     FileStatus[] statusList = fs.globStatus(queryPath, pathFilter);
2141 
2142     if (null == statusList) {
2143       return;
2144     } else {
2145       LOG.debug("Query Path: " + queryPath + " ; # list of files: " +
2146           statusList.length);
2147     }
2148 
2149     // lower the number of threads in case we have very few expected regions
2150     threadPoolSize = Math.min(threadPoolSize, statusList.length);
2151 
2152     // run in multiple threads
2153     ThreadPoolExecutor tpe = new ThreadPoolExecutor(threadPoolSize,
2154         threadPoolSize, 60, TimeUnit.SECONDS,
2155         new ArrayBlockingQueue<Runnable>(statusList.length));
2156     try {
2157       // ignore all file status items that are not of interest
2158       for (FileStatus regionStatus : statusList) {
2159         if (null == regionStatus) {
2160           continue;
2161         }
2162 
2163         if (!regionStatus.isDirectory()) {
2164           continue;
2165         }
2166 
2167         Path regionPath = regionStatus.getPath();
2168         if (null == regionPath) {
2169           continue;
2170         }
2171 
2172         tpe.execute(new FSRegionScanner(fs, regionPath,
2173             regionToBestLocalityRSMapping, regionDegreeLocalityMapping));
2174       }
2175     } finally {
2176       tpe.shutdown();
2177       int threadWakeFrequency = conf.getInt(HConstants.THREAD_WAKE_FREQUENCY,
2178           60 * 1000);
2179       try {
2180         // here we wait until TPE terminates, which is either naturally or by
2181         // exceptions in the execution of the threads
2182         while (!tpe.awaitTermination(threadWakeFrequency,
2183             TimeUnit.MILLISECONDS)) {
2184           // printing out rough estimate, so as to not introduce
2185           // AtomicInteger
2186           LOG.info("Locality checking is underway: { Scanned Regions : "
2187               + tpe.getCompletedTaskCount() + "/"
2188               + tpe.getTaskCount() + " }");
2189         }
2190       } catch (InterruptedException e) {
2191         throw (InterruptedIOException)new InterruptedIOException().initCause(e);
2192       }
2193     }
2194 
2195     long overhead = EnvironmentEdgeManager.currentTime() - startTime;
2196     String overheadMsg = "Scan DFS for locality info takes " + overhead + " ms";
2197 
2198     LOG.info(overheadMsg);
2199   }
2200 
2201   /**
2202    * Do our short circuit read setup.
2203    * Checks buffer size to use and whether to do checksumming in hbase or hdfs.
2204    * @param conf
2205    */
2206   public static void setupShortCircuitRead(final Configuration conf) {
2207     // Check that the user has not set the "dfs.client.read.shortcircuit.skip.checksum" property.
2208     boolean shortCircuitSkipChecksum =
2209       conf.getBoolean("dfs.client.read.shortcircuit.skip.checksum", false);
2210     boolean useHBaseChecksum = conf.getBoolean(HConstants.HBASE_CHECKSUM_VERIFICATION, true);
2211     if (shortCircuitSkipChecksum) {
2212       LOG.warn("Configuration \"dfs.client.read.shortcircuit.skip.checksum\" should not " +
2213         "be set to true." + (useHBaseChecksum ? " HBase checksum doesn't require " +
2214         "it, see https://issues.apache.org/jira/browse/HBASE-6868." : ""));
2215       assert !shortCircuitSkipChecksum; //this will fail if assertions are on
2216     }
2217     checkShortCircuitReadBufferSize(conf);
2218   }
2219 
2220   /**
2221    * Check if short circuit read buffer size is set and if not, set it to hbase value.
2222    * @param conf
2223    */
2224   public static void checkShortCircuitReadBufferSize(final Configuration conf) {
2225     final int defaultSize = HConstants.DEFAULT_BLOCKSIZE * 2;
2226     final int notSet = -1;
2227     // DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_KEY is only defined in h2
2228     final String dfsKey = "dfs.client.read.shortcircuit.buffer.size";
2229     int size = conf.getInt(dfsKey, notSet);
2230     // If a size is set, return -- we will use it.
2231     if (size != notSet) return;
2232     // But short circuit buffer size is normally not set.  Put in place the hbase wanted size.
2233     int hbaseSize = conf.getInt("hbase." + dfsKey, defaultSize);
2234     conf.setIfUnset(dfsKey, Integer.toString(hbaseSize));
2235   }
2236 }