View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.io;
20  
21  import java.io.IOException;
22  import java.util.regex.Matcher;
23  import java.util.regex.Pattern;
24  
25  import org.apache.commons.logging.Log;
26  import org.apache.commons.logging.LogFactory;
27  import org.apache.hadoop.hbase.classification.InterfaceAudience;
28  import org.apache.hadoop.conf.Configuration;
29  import org.apache.hadoop.fs.FileSystem;
30  import org.apache.hadoop.fs.Path;
31  import org.apache.hadoop.hbase.TableName;
32  import org.apache.hadoop.hbase.HConstants;
33  import org.apache.hadoop.hbase.HRegionInfo;
34  import org.apache.hadoop.hbase.mob.MobConstants;
35  import org.apache.hadoop.hbase.regionserver.HRegion;
36  import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
37  import org.apache.hadoop.hbase.util.FSUtils;
38  import org.apache.hadoop.hbase.util.HFileArchiveUtil;
39  import org.apache.hadoop.hbase.util.Pair;
40  
41  /**
42   * HFileLink describes a link to an hfile.
43   *
44   * An hfile can be served from a region or from the hfile archive directory (/hbase/.archive)
45   * HFileLink allows to access the referenced hfile regardless of the location where it is.
46   *
47   * <p>Searches for hfiles in the following order and locations:
48   * <ul>
49   *  <li>/hbase/table/region/cf/hfile</li>
50   *  <li>/hbase/.archive/table/region/cf/hfile</li>
51   * </ul>
52   *
53   * The link checks first in the original path if it is not present
54   * it fallbacks to the archived path.
55   */
56  @InterfaceAudience.Private
57  public class HFileLink extends FileLink {
58    private static final Log LOG = LogFactory.getLog(HFileLink.class);
59  
60    /**
61     * A non-capture group, for HFileLink, so that this can be embedded.
62     * The HFileLink describe a link to an hfile in a different table/region
63     * and the name is in the form: table=region-hfile.
64     * <p>
65     * Table name is ([a-zA-Z_0-9][a-zA-Z_0-9.-]*), so '=' is an invalid character for the table name.
66     * Region name is ([a-f0-9]+), so '-' is an invalid character for the region name.
67     * HFile is ([0-9a-f]+(?:_SeqId_[0-9]+_)?) covering the plain hfiles (uuid)
68     * and the bulk loaded (_SeqId_[0-9]+_) hfiles.
69     */
70    public static final String LINK_NAME_REGEX =
71      String.format("(?:(?:%s=)?)%s=%s-%s",
72        TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX,
73        HRegionInfo.ENCODED_REGION_NAME_REGEX, StoreFileInfo.HFILE_NAME_REGEX);
74  
75    /** Define the HFile Link name parser in the form of: table=region-hfile */
76    //made package private for testing
77    static final Pattern LINK_NAME_PATTERN =
78      Pattern.compile(String.format("^(?:(%s)(?:\\=))?(%s)=(%s)-(%s)$",
79        TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX,
80        HRegionInfo.ENCODED_REGION_NAME_REGEX, StoreFileInfo.HFILE_NAME_REGEX));
81  
82    /**
83     * The pattern should be used for hfile and reference links
84     * that can be found in /hbase/table/region/family/
85     */
86    private static final Pattern REF_OR_HFILE_LINK_PATTERN =
87      Pattern.compile(String.format("^(?:(%s)(?:=))?(%s)=(%s)-(.+)$",
88        TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX,
89        HRegionInfo.ENCODED_REGION_NAME_REGEX));
90  
91    private final Path archivePath;
92    private final Path originPath;
93    private final Path mobPath;
94    private final Path tempPath;
95  
96    /**
97     * Dead simple hfile link constructor
98     */
99    public HFileLink(final Path originPath, final Path tempPath, final Path mobPath,
100                    final Path archivePath) {
101     this.tempPath = tempPath;
102     this.originPath = originPath;
103     this.mobPath = mobPath;
104     this.archivePath = archivePath;
105     setLocations(originPath, tempPath, mobPath, archivePath);
106   }
107 
108 
109   /**
110    * @param conf {@link Configuration} from which to extract specific archive locations
111    * @param hFileLinkPattern The path ending with a HFileLink pattern. (table=region-hfile)
112    * @throws IOException on unexpected error.
113    */
114   public static final HFileLink buildFromHFileLinkPattern(Configuration conf, Path hFileLinkPattern)
115           throws IOException {
116     return buildFromHFileLinkPattern(FSUtils.getRootDir(conf),
117             HFileArchiveUtil.getArchivePath(conf), hFileLinkPattern);
118   }
119 
120 
121 
122   /**
123    * @param rootDir Path to the root directory where hbase files are stored
124    * @param archiveDir Path to the hbase archive directory
125    * @param hFileLinkPattern The path of the HFile Link.
126    */
127   public final static HFileLink buildFromHFileLinkPattern(final Path rootDir,
128                                                           final Path archiveDir,
129                                                           final Path hFileLinkPattern) {
130     Path hfilePath = getHFileLinkPatternRelativePath(hFileLinkPattern);
131     Path tempPath = new Path(new Path(rootDir, HConstants.HBASE_TEMP_DIRECTORY), hfilePath);
132     Path originPath = new Path(rootDir, hfilePath);
133     Path mobPath = new Path(new Path(rootDir, MobConstants.MOB_DIR_NAME), hfilePath);
134     Path archivePath = new Path(archiveDir, hfilePath);
135     return new HFileLink(originPath, tempPath, mobPath, archivePath);
136   }
137 
138   /**
139    * Create an HFileLink relative path for the table/region/family/hfile location
140    * @param table Table name
141    * @param region Region Name
142    * @param family Family Name
143    * @param hfile HFile Name
144    * @return the relative Path to open the specified table/region/family/hfile link
145    */
146   public static Path createPath(final TableName table, final String region,
147                                 final String family, final String hfile) {
148     if (HFileLink.isHFileLink(hfile)) {
149       return new Path(family, hfile);
150     }
151     return new Path(family, HFileLink.createHFileLinkName(table, region, hfile));
152   }
153 
154   /**
155    * Create an HFileLink instance from table/region/family/hfile location
156    * @param conf {@link Configuration} from which to extract specific archive locations
157    * @param table Table name
158    * @param region Region Name
159    * @param family Family Name
160    * @param hfile HFile Name
161    * @return Link to the file with the specified table/region/family/hfile location
162    * @throws IOException on unexpected error.
163    */
164   public static HFileLink build(final Configuration conf, final TableName table,
165                                  final String region, final String family, final String hfile)
166           throws IOException {
167     return HFileLink.buildFromHFileLinkPattern(conf, createPath(table, region, family, hfile));
168   }
169 
170   /**
171    * @return the origin path of the hfile.
172    */
173   public Path getOriginPath() {
174     return this.originPath;
175   }
176 
177   /**
178    * @return the path of the archived hfile.
179    */
180   public Path getArchivePath() {
181     return this.archivePath;
182   }
183 
184   /**
185    * @return the path of the mob hfiles.
186    */
187   public Path getMobPath() {
188     return this.mobPath;
189   }
190 
191     /**
192    * @param path Path to check.
193    * @return True if the path is a HFileLink.
194    */
195   public static boolean isHFileLink(final Path path) {
196     return isHFileLink(path.getName());
197   }
198 
199 
200   /**
201    * @param fileName File name to check.
202    * @return True if the path is a HFileLink.
203    */
204   public static boolean isHFileLink(String fileName) {
205     Matcher m = LINK_NAME_PATTERN.matcher(fileName);
206     if (!m.matches()) return false;
207     return m.groupCount() > 2 && m.group(4) != null && m.group(3) != null && m.group(2) != null;
208   }
209 
210   /**
211    * Convert a HFileLink path to a table relative path.
212    * e.g. the link: /hbase/test/0123/cf/testtb=4567-abcd
213    *      becomes: /hbase/testtb/4567/cf/abcd
214    *
215    * @param path HFileLink path
216    * @return Relative table path
217    * @throws IOException on unexpected error.
218    */
219   private static Path getHFileLinkPatternRelativePath(final Path path) {
220     // table=region-hfile
221     Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(path.getName());
222     if (!m.matches()) {
223       throw new IllegalArgumentException(path.getName() + " is not a valid HFileLink pattern!");
224     }
225 
226     // Convert the HFileLink name into a real table/region/cf/hfile path.
227     TableName tableName = TableName.valueOf(m.group(1), m.group(2));
228     String regionName = m.group(3);
229     String hfileName = m.group(4);
230     String familyName = path.getParent().getName();
231     Path tableDir = FSUtils.getTableDir(new Path("./"), tableName);
232     return new Path(tableDir, new Path(regionName, new Path(familyName,
233         hfileName)));
234   }
235 
236   /**
237    * Get the HFile name of the referenced link
238    *
239    * @param fileName HFileLink file name
240    * @return the name of the referenced HFile
241    */
242   public static String getReferencedHFileName(final String fileName) {
243     Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName);
244     if (!m.matches()) {
245       throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!");
246     }
247     return(m.group(4));
248   }
249 
250   /**
251    * Get the Region name of the referenced link
252    *
253    * @param fileName HFileLink file name
254    * @return the name of the referenced Region
255    */
256   public static String getReferencedRegionName(final String fileName) {
257     Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName);
258     if (!m.matches()) {
259       throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!");
260     }
261     return(m.group(3));
262   }
263 
264   /**
265    * Get the Table name of the referenced link
266    *
267    * @param fileName HFileLink file name
268    * @return the name of the referenced Table
269    */
270   public static TableName getReferencedTableName(final String fileName) {
271     Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName);
272     if (!m.matches()) {
273       throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!");
274     }
275     return(TableName.valueOf(m.group(1), m.group(2)));
276   }
277 
278   /**
279    * Create a new HFileLink name
280    *
281    * @param hfileRegionInfo - Linked HFile Region Info
282    * @param hfileName - Linked HFile name
283    * @return file name of the HFile Link
284    */
285   public static String createHFileLinkName(final HRegionInfo hfileRegionInfo,
286       final String hfileName) {
287     return createHFileLinkName(hfileRegionInfo.getTable(),
288             hfileRegionInfo.getEncodedName(), hfileName);
289   }
290 
291   /**
292    * Create a new HFileLink name
293    *
294    * @param tableName - Linked HFile table name
295    * @param regionName - Linked HFile region name
296    * @param hfileName - Linked HFile name
297    * @return file name of the HFile Link
298    */
299   public static String createHFileLinkName(final TableName tableName,
300       final String regionName, final String hfileName) {
301     String s = String.format("%s=%s-%s",
302         tableName.getNameAsString().replace(TableName.NAMESPACE_DELIM, '='),
303         regionName, hfileName);
304     return s;
305   }
306 
307   /**
308    * Create a new HFileLink
309    *
310    * <p>It also adds a back-reference to the hfile back-reference directory
311    * to simplify the reference-count and the cleaning process.
312    *
313    * @param conf {@link Configuration} to read for the archive directory name
314    * @param fs {@link FileSystem} on which to write the HFileLink
315    * @param dstFamilyPath - Destination path (table/region/cf/)
316    * @param hfileRegionInfo - Linked HFile Region Info
317    * @param hfileName - Linked HFile name
318    * @return true if the file is created, otherwise the file exists.
319    * @throws IOException on file or parent directory creation failure
320    */
321   public static boolean create(final Configuration conf, final FileSystem fs,
322       final Path dstFamilyPath, final HRegionInfo hfileRegionInfo,
323       final String hfileName) throws IOException {
324     return create(conf, fs, dstFamilyPath, hfileRegionInfo, hfileName, true);
325   }
326 
327   /**
328    * Create a new HFileLink
329    *
330    * <p>It also adds a back-reference to the hfile back-reference directory
331    * to simplify the reference-count and the cleaning process.
332    *
333    * @param conf {@link Configuration} to read for the archive directory name
334    * @param fs {@link FileSystem} on which to write the HFileLink
335    * @param dstFamilyPath - Destination path (table/region/cf/)
336    * @param hfileRegionInfo - Linked HFile Region Info
337    * @param hfileName - Linked HFile name
338    * @param createBackRef - Whether back reference should be created. Defaults to true.
339    * @return true if the file is created, otherwise the file exists.
340    * @throws IOException on file or parent directory creation failure
341    */
342   public static boolean create(final Configuration conf, final FileSystem fs,
343       final Path dstFamilyPath, final HRegionInfo hfileRegionInfo,
344       final String hfileName, final boolean createBackRef) throws IOException {
345     TableName linkedTable = hfileRegionInfo.getTable();
346     String linkedRegion = hfileRegionInfo.getEncodedName();
347     return create(conf, fs, dstFamilyPath, linkedTable, linkedRegion, hfileName, createBackRef);
348   }
349 
350   /**
351    * Create a new HFileLink
352    *
353    * <p>It also adds a back-reference to the hfile back-reference directory
354    * to simplify the reference-count and the cleaning process.
355    *
356    * @param conf {@link Configuration} to read for the archive directory name
357    * @param fs {@link FileSystem} on which to write the HFileLink
358    * @param dstFamilyPath - Destination path (table/region/cf/)
359    * @param linkedTable - Linked Table Name
360    * @param linkedRegion - Linked Region Name
361    * @param hfileName - Linked HFile name
362    * @return true if the file is created, otherwise the file exists.
363    * @throws IOException on file or parent directory creation failure
364    */
365   public static boolean create(final Configuration conf, final FileSystem fs,
366       final Path dstFamilyPath, final TableName linkedTable, final String linkedRegion,
367       final String hfileName) throws IOException {
368     return create(conf, fs, dstFamilyPath, linkedTable, linkedRegion, hfileName, true);
369   }
370 
371   /**
372    * Create a new HFileLink
373    *
374    * <p>It also adds a back-reference to the hfile back-reference directory
375    * to simplify the reference-count and the cleaning process.
376    *
377    * @param conf {@link Configuration} to read for the archive directory name
378    * @param fs {@link FileSystem} on which to write the HFileLink
379    * @param dstFamilyPath - Destination path (table/region/cf/)
380    * @param linkedTable - Linked Table Name
381    * @param linkedRegion - Linked Region Name
382    * @param hfileName - Linked HFile name
383    * @param createBackRef - Whether back reference should be created. Defaults to true.
384    * @return true if the file is created, otherwise the file exists.
385    * @throws IOException on file or parent directory creation failure
386    */
387   public static boolean create(final Configuration conf, final FileSystem fs,
388       final Path dstFamilyPath, final TableName linkedTable, final String linkedRegion,
389       final String hfileName, final boolean createBackRef) throws IOException {
390     String familyName = dstFamilyPath.getName();
391     String regionName = dstFamilyPath.getParent().getName();
392     String tableName = FSUtils.getTableName(dstFamilyPath.getParent().getParent())
393         .getNameAsString();
394 
395     String name = createHFileLinkName(linkedTable, linkedRegion, hfileName);
396     String refName = createBackReferenceName(tableName, regionName);
397 
398     // Make sure the destination directory exists
399     fs.mkdirs(dstFamilyPath);
400 
401     // Make sure the FileLink reference directory exists
402     Path archiveStoreDir = HFileArchiveUtil.getStoreArchivePath(conf,
403           linkedTable, linkedRegion, familyName);
404     Path backRefPath = null;
405     if (createBackRef) {
406       Path backRefssDir = getBackReferencesDir(archiveStoreDir, hfileName);
407       fs.mkdirs(backRefssDir);
408 
409       // Create the reference for the link
410       backRefPath = new Path(backRefssDir, refName);
411       fs.createNewFile(backRefPath);
412     }
413     try {
414       // Create the link
415       return fs.createNewFile(new Path(dstFamilyPath, name));
416     } catch (IOException e) {
417       LOG.error("couldn't create the link=" + name + " for " + dstFamilyPath, e);
418       // Revert the reference if the link creation failed
419       if (createBackRef) {
420         fs.delete(backRefPath, false);
421       }
422       throw e;
423     }
424   }
425 
426   /**
427    * Create a new HFileLink starting from a hfileLink name
428    *
429    * <p>It also adds a back-reference to the hfile back-reference directory
430    * to simplify the reference-count and the cleaning process.
431    *
432    * @param conf {@link Configuration} to read for the archive directory name
433    * @param fs {@link FileSystem} on which to write the HFileLink
434    * @param dstFamilyPath - Destination path (table/region/cf/)
435    * @param hfileLinkName - HFileLink name (it contains hfile-region-table)
436    * @return true if the file is created, otherwise the file exists.
437    * @throws IOException on file or parent directory creation failure
438    */
439   public static boolean createFromHFileLink(final Configuration conf, final FileSystem fs,
440       final Path dstFamilyPath, final String hfileLinkName)
441           throws IOException {
442     return createFromHFileLink(conf, fs, dstFamilyPath, hfileLinkName, true);
443   }
444 
445   /**
446    * Create a new HFileLink starting from a hfileLink name
447    *
448    * <p>It also adds a back-reference to the hfile back-reference directory
449    * to simplify the reference-count and the cleaning process.
450    *
451    * @param conf {@link Configuration} to read for the archive directory name
452    * @param fs {@link FileSystem} on which to write the HFileLink
453    * @param dstFamilyPath - Destination path (table/region/cf/)
454    * @param hfileLinkName - HFileLink name (it contains hfile-region-table)
455    * @param createBackRef - Whether back reference should be created. Defaults to true.
456    * @return true if the file is created, otherwise the file exists.
457    * @throws IOException on file or parent directory creation failure
458    */
459   public static boolean createFromHFileLink(final Configuration conf, final FileSystem fs,
460       final Path dstFamilyPath, final String hfileLinkName, final boolean createBackRef)
461           throws IOException {
462     Matcher m = LINK_NAME_PATTERN.matcher(hfileLinkName);
463     if (!m.matches()) {
464       throw new IllegalArgumentException(hfileLinkName + " is not a valid HFileLink name!");
465     }
466     return create(conf, fs, dstFamilyPath, TableName.valueOf(m.group(1), m.group(2)),
467         m.group(3), m.group(4), createBackRef);
468   }
469 
470   /**
471    * Create the back reference name
472    */
473   //package-private for testing
474   static String createBackReferenceName(final String tableNameStr,
475                                         final String regionName) {
476 
477     return regionName + "." + tableNameStr.replace(TableName.NAMESPACE_DELIM, '=');
478   }
479 
480   /**
481    * Get the full path of the HFile referenced by the back reference
482    *
483    * @param rootDir root hbase directory
484    * @param linkRefPath Link Back Reference path
485    * @return full path of the referenced hfile
486    * @throws IOException on unexpected error.
487    */
488   public static Path getHFileFromBackReference(final Path rootDir, final Path linkRefPath) {
489     Pair<TableName, String> p = parseBackReferenceName(linkRefPath.getName());
490     TableName linkTableName = p.getFirst();
491     String linkRegionName = p.getSecond();
492 
493     String hfileName = getBackReferenceFileName(linkRefPath.getParent());
494     Path familyPath = linkRefPath.getParent().getParent();
495     Path regionPath = familyPath.getParent();
496     Path tablePath = regionPath.getParent();
497 
498     String linkName = createHFileLinkName(FSUtils.getTableName(tablePath),
499             regionPath.getName(), hfileName);
500     Path linkTableDir = FSUtils.getTableDir(rootDir, linkTableName);
501     Path regionDir = HRegion.getRegionDir(linkTableDir, linkRegionName);
502     return new Path(new Path(regionDir, familyPath.getName()), linkName);
503   }
504 
505   static Pair<TableName, String> parseBackReferenceName(String name) {
506     int separatorIndex = name.indexOf('.');
507     String linkRegionName = name.substring(0, separatorIndex);
508     String tableSubstr = name.substring(separatorIndex + 1)
509         .replace('=', TableName.NAMESPACE_DELIM);
510     TableName linkTableName = TableName.valueOf(tableSubstr);
511     return new Pair<TableName, String>(linkTableName, linkRegionName);
512   }
513 
514   /**
515    * Get the full path of the HFile referenced by the back reference
516    *
517    * @param conf {@link Configuration} to read for the archive directory name
518    * @param linkRefPath Link Back Reference path
519    * @return full path of the referenced hfile
520    * @throws IOException on unexpected error.
521    */
522   public static Path getHFileFromBackReference(final Configuration conf, final Path linkRefPath)
523       throws IOException {
524     return getHFileFromBackReference(FSUtils.getRootDir(conf), linkRefPath);
525   }
526 
527 }