View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.snapshot;
20  
21  import com.google.protobuf.CodedInputStream;
22  import com.google.protobuf.InvalidProtocolBufferException;
23  
24  import java.io.FileNotFoundException;
25  import java.io.IOException;
26  import java.util.ArrayList;
27  import java.util.Collection;
28  import java.util.HashMap;
29  import java.util.List;
30  import java.util.Map;
31  import java.util.concurrent.ThreadPoolExecutor;
32  import java.util.concurrent.TimeUnit;
33  
34  import org.apache.commons.logging.Log;
35  import org.apache.commons.logging.LogFactory;
36  import org.apache.hadoop.conf.Configuration;
37  import org.apache.hadoop.fs.FSDataInputStream;
38  import org.apache.hadoop.fs.FSDataOutputStream;
39  import org.apache.hadoop.fs.FileStatus;
40  import org.apache.hadoop.fs.FileSystem;
41  import org.apache.hadoop.fs.Path;
42  import org.apache.hadoop.hbase.HColumnDescriptor;
43  import org.apache.hadoop.hbase.HRegionInfo;
44  import org.apache.hadoop.hbase.HTableDescriptor;
45  import org.apache.hadoop.hbase.classification.InterfaceAudience;
46  import org.apache.hadoop.hbase.errorhandling.ForeignExceptionSnare;
47  import org.apache.hadoop.hbase.mob.MobUtils;
48  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
49  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
50  import org.apache.hadoop.hbase.protobuf.generated.SnapshotProtos.SnapshotDataManifest;
51  import org.apache.hadoop.hbase.protobuf.generated.SnapshotProtos.SnapshotRegionManifest;
52  import org.apache.hadoop.hbase.regionserver.HRegion;
53  import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
54  import org.apache.hadoop.hbase.regionserver.Store;
55  import org.apache.hadoop.hbase.regionserver.StoreFile;
56  import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
57  import org.apache.hadoop.hbase.util.Bytes;
58  import org.apache.hadoop.hbase.util.FSTableDescriptors;
59  import org.apache.hadoop.hbase.util.FSUtils;
60  import org.apache.hadoop.hbase.util.Threads;
61  
62  /**
63   * Utility class to help read/write the Snapshot Manifest.
64   *
65   * The snapshot format is transparent for the users of this class,
66   * once the snapshot is written, it will never be modified.
67   * On open() the snapshot will be loaded to the current in-memory format.
68   */
69  @InterfaceAudience.Private
70  public class SnapshotManifest {
71    private static final Log LOG = LogFactory.getLog(SnapshotManifest.class);
72  
73    public static final String SNAPSHOT_MANIFEST_SIZE_LIMIT_CONF_KEY = "snapshot.manifest.size.limit";
74  
75    public static final String DATA_MANIFEST_NAME = "data.manifest";
76  
77    private List<SnapshotRegionManifest> regionManifests;
78    private SnapshotDescription desc;
79    private HTableDescriptor htd;
80  
81    private final ForeignExceptionSnare monitor;
82    private final Configuration conf;
83    private final Path workingDir;
84    private final FileSystem fs;
85    private int manifestSizeLimit;
86  
87    private SnapshotManifest(final Configuration conf, final FileSystem fs,
88        final Path workingDir, final SnapshotDescription desc,
89        final ForeignExceptionSnare monitor) {
90      this.monitor = monitor;
91      this.desc = desc;
92      this.workingDir = workingDir;
93      this.conf = conf;
94      this.fs = fs;
95  
96      this.manifestSizeLimit = conf.getInt(SNAPSHOT_MANIFEST_SIZE_LIMIT_CONF_KEY, 64 * 1024 * 1024);
97    }
98  
99    /**
100    * Return a SnapshotManifest instance, used for writing a snapshot.
101    *
102    * There are two usage pattern:
103    *  - The Master will create a manifest, add the descriptor, offline regions
104    *    and consolidate the snapshot by writing all the pending stuff on-disk.
105    *      manifest = SnapshotManifest.create(...)
106    *      manifest.addRegion(tableDir, hri)
107    *      manifest.consolidate()
108    *  - The RegionServer will create a single region manifest
109    *      manifest = SnapshotManifest.create(...)
110    *      manifest.addRegion(region)
111    */
112   public static SnapshotManifest create(final Configuration conf, final FileSystem fs,
113       final Path workingDir, final SnapshotDescription desc,
114       final ForeignExceptionSnare monitor) {
115     return new SnapshotManifest(conf, fs, workingDir, desc, monitor);
116   }
117 
118   /**
119    * Return a SnapshotManifest instance with the information already loaded in-memory.
120    *    SnapshotManifest manifest = SnapshotManifest.open(...)
121    *    HTableDescriptor htd = manifest.getTableDescriptor()
122    *    for (SnapshotRegionManifest regionManifest: manifest.getRegionManifests())
123    *      hri = regionManifest.getRegionInfo()
124    *      for (regionManifest.getFamilyFiles())
125    *        ...
126    */
127   public static SnapshotManifest open(final Configuration conf, final FileSystem fs,
128       final Path workingDir, final SnapshotDescription desc) throws IOException {
129     SnapshotManifest manifest = new SnapshotManifest(conf, fs, workingDir, desc, null);
130     manifest.load();
131     return manifest;
132   }
133 
134 
135   /**
136    * Add the table descriptor to the snapshot manifest
137    */
138   public void addTableDescriptor(final HTableDescriptor htd) throws IOException {
139     this.htd = htd;
140   }
141 
142   interface RegionVisitor<TRegion, TFamily> {
143     TRegion regionOpen(final HRegionInfo regionInfo) throws IOException;
144     void regionClose(final TRegion region) throws IOException;
145 
146     TFamily familyOpen(final TRegion region, final byte[] familyName) throws IOException;
147     void familyClose(final TRegion region, final TFamily family) throws IOException;
148 
149     void storeFile(final TRegion region, final TFamily family, final StoreFileInfo storeFile)
150       throws IOException;
151   }
152 
153   private RegionVisitor createRegionVisitor(final SnapshotDescription desc) throws IOException {
154     switch (getSnapshotFormat(desc)) {
155       case SnapshotManifestV1.DESCRIPTOR_VERSION:
156         return new SnapshotManifestV1.ManifestBuilder(conf, fs, workingDir);
157       case SnapshotManifestV2.DESCRIPTOR_VERSION:
158         return new SnapshotManifestV2.ManifestBuilder(conf, fs, workingDir);
159       default:
160         throw new CorruptedSnapshotException("Invalid Snapshot version: "+ desc.getVersion(), desc);
161     }
162   }
163 
164   public void addMobRegion(HRegionInfo regionInfo, HColumnDescriptor[] hcds) throws IOException {
165     // 0. Get the ManifestBuilder/RegionVisitor
166     RegionVisitor visitor = createRegionVisitor(desc);
167 
168     // 1. dump region meta info into the snapshot directory
169     LOG.debug("Storing mob region '" + regionInfo + "' region-info for snapshot.");
170     Object regionData = visitor.regionOpen(regionInfo);
171     monitor.rethrowException();
172 
173     // 2. iterate through all the stores in the region
174     LOG.debug("Creating references for mob files");
175 
176     Path mobRegionPath = MobUtils.getMobRegionPath(conf, regionInfo.getTable());
177     for (HColumnDescriptor hcd : hcds) {
178       // 2.1. build the snapshot reference for the store if it's a mob store
179       if (!hcd.isMobEnabled()) {
180         continue;
181       }
182       Object familyData = visitor.familyOpen(regionData, hcd.getName());
183       monitor.rethrowException();
184 
185       Path storePath = MobUtils.getMobFamilyPath(mobRegionPath, hcd.getNameAsString());
186       if (!fs.exists(storePath)) {
187         continue;
188       }
189       FileStatus[] stats = fs.listStatus(storePath);
190       if (stats == null) {
191         continue;
192       }
193       List<StoreFileInfo> storeFiles = new ArrayList<StoreFileInfo>();
194       for (FileStatus stat : stats) {
195         storeFiles.add(new StoreFileInfo(conf, fs, stat));
196       }
197       if (LOG.isDebugEnabled()) {
198         LOG.debug("Adding snapshot references for " + storeFiles + " mob files");
199       }
200 
201       // 2.2. iterate through all the mob files and create "references".
202       for (int i = 0, sz = storeFiles.size(); i < sz; i++) {
203         StoreFileInfo storeFile = storeFiles.get(i);
204         monitor.rethrowException();
205 
206         // create "reference" to this store file.
207         if (LOG.isDebugEnabled()) {
208           LOG.debug("Adding reference for mob file (" + (i + 1) + "/" + sz + "): "
209             + storeFile.getPath());
210         }
211         visitor.storeFile(regionData, familyData, storeFile);
212       }
213       visitor.familyClose(regionData, familyData);
214     }
215     visitor.regionClose(regionData);
216   }
217 
218   /**
219    * Creates a 'manifest' for the specified region, by reading directly from the HRegion object.
220    * This is used by the "online snapshot" when the table is enabled.
221    */
222   public void addRegion(final HRegion region) throws IOException {
223     // 0. Get the ManifestBuilder/RegionVisitor
224     RegionVisitor visitor = createRegionVisitor(desc);
225 
226     // 1. dump region meta info into the snapshot directory
227     LOG.debug("Storing '" + region + "' region-info for snapshot.");
228     Object regionData = visitor.regionOpen(region.getRegionInfo());
229     monitor.rethrowException();
230 
231     // 2. iterate through all the stores in the region
232     LOG.debug("Creating references for hfiles");
233 
234     for (Store store : region.getStores()) {
235       // 2.1. build the snapshot reference for the store
236       Object familyData = visitor.familyOpen(regionData, store.getFamily().getName());
237       monitor.rethrowException();
238 
239       List<StoreFile> storeFiles = new ArrayList<StoreFile>(store.getStorefiles());
240       if (LOG.isDebugEnabled()) {
241         LOG.debug("Adding snapshot references for " + storeFiles  + " hfiles");
242       }
243 
244       // 2.2. iterate through all the store's files and create "references".
245       for (int i = 0, sz = storeFiles.size(); i < sz; i++) {
246         StoreFile storeFile = storeFiles.get(i);
247         monitor.rethrowException();
248 
249         // create "reference" to this store file.
250         LOG.debug("Adding reference for file (" + (i+1) + "/" + sz + "): " + storeFile.getPath());
251         visitor.storeFile(regionData, familyData, storeFile.getFileInfo());
252       }
253       visitor.familyClose(regionData, familyData);
254     }
255     visitor.regionClose(regionData);
256   }
257 
258   /**
259    * Creates a 'manifest' for the specified region, by reading directly from the disk.
260    * This is used by the "offline snapshot" when the table is disabled.
261    */
262   public void addRegion(final Path tableDir, final HRegionInfo regionInfo) throws IOException {
263     // 0. Get the ManifestBuilder/RegionVisitor
264     RegionVisitor visitor = createRegionVisitor(desc);
265 
266     boolean isMobRegion = MobUtils.isMobRegionInfo(regionInfo);
267     try {
268       // Open the RegionFS
269       HRegionFileSystem regionFs = HRegionFileSystem.openRegionFromFileSystem(conf, fs,
270             tableDir, regionInfo, true);
271       monitor.rethrowException();
272 
273       // 1. dump region meta info into the snapshot directory
274       LOG.debug("Storing region-info for snapshot.");
275       Object regionData = visitor.regionOpen(regionInfo);
276       monitor.rethrowException();
277 
278       // 2. iterate through all the stores in the region
279       LOG.debug("Creating references for hfiles");
280 
281       // This ensures that we have an atomic view of the directory as long as we have < ls limit
282       // (batch size of the files in a directory) on the namenode. Otherwise, we get back the files
283       // in batches and may miss files being added/deleted. This could be more robust (iteratively
284       // checking to see if we have all the files until we are sure), but the limit is currently
285       // 1000 files/batch, far more than the number of store files under a single column family.
286       Collection<String> familyNames = regionFs.getFamilies();
287       if (familyNames != null) {
288         for (String familyName: familyNames) {
289           Object familyData = visitor.familyOpen(regionData, Bytes.toBytes(familyName));
290           monitor.rethrowException();
291 
292           Collection<StoreFileInfo> storeFiles = null;
293           if (isMobRegion) {
294             Path regionPath = MobUtils.getMobRegionPath(conf, regionInfo.getTable());
295             Path storePath = MobUtils.getMobFamilyPath(regionPath, familyName);
296             if (!fs.exists(storePath)) {
297               continue;
298             }
299             FileStatus[] stats = fs.listStatus(storePath);
300             if (stats == null) {
301               continue;
302             }
303             storeFiles = new ArrayList<StoreFileInfo>();
304             for (FileStatus stat : stats) {
305               storeFiles.add(new StoreFileInfo(conf, fs, stat));
306             }
307           } else {
308             storeFiles = regionFs.getStoreFiles(familyName);
309           }
310           if (storeFiles == null) {
311             if (LOG.isDebugEnabled()) {
312               LOG.debug("No files under family: " + familyName);
313             }
314             continue;
315           }
316 
317           // 2.1. build the snapshot reference for the store
318           if (LOG.isDebugEnabled()) {
319             LOG.debug("Adding snapshot references for " + storeFiles  + " hfiles");
320           }
321 
322           // 2.2. iterate through all the store's files and create "references".
323           int i = 0;
324           int sz = storeFiles.size();
325           for (StoreFileInfo storeFile: storeFiles) {
326             monitor.rethrowException();
327 
328             // create "reference" to this store file.
329             LOG.debug("Adding reference for file (" + (++i) + "/" + sz + "): "
330                 + storeFile.getPath());
331             visitor.storeFile(regionData, familyData, storeFile);
332           }
333           visitor.familyClose(regionData, familyData);
334         }
335       }
336       visitor.regionClose(regionData);
337     } catch (IOException e) {
338       // the mob directory might not be created yet, so do nothing when it is a mob region
339       if (!isMobRegion) {
340         throw e;
341       }
342     }
343   }
344 
345   /**
346    * Load the information in the SnapshotManifest. Called by SnapshotManifest.open()
347    *
348    * If the format is v2 and there is no data-manifest, means that we are loading an
349    * in-progress snapshot. Since we support rolling-upgrades, we loook for v1 and v2
350    * regions format.
351    */
352   private void load() throws IOException {
353     switch (getSnapshotFormat(desc)) {
354       case SnapshotManifestV1.DESCRIPTOR_VERSION: {
355         this.htd = FSTableDescriptors.getTableDescriptorFromFs(fs, workingDir);
356         ThreadPoolExecutor tpool = createExecutor("SnapshotManifestLoader");
357         try {
358           this.regionManifests =
359             SnapshotManifestV1.loadRegionManifests(conf, tpool, fs, workingDir, desc);
360         } finally {
361           tpool.shutdown();
362         }
363         break;
364       }
365       case SnapshotManifestV2.DESCRIPTOR_VERSION: {
366         SnapshotDataManifest dataManifest = readDataManifest();
367         if (dataManifest != null) {
368           htd = HTableDescriptor.convert(dataManifest.getTableSchema());
369           regionManifests = dataManifest.getRegionManifestsList();
370         } else {
371           // Compatibility, load the v1 regions
372           // This happens only when the snapshot is in-progress and the cache wants to refresh.
373           List<SnapshotRegionManifest> v1Regions, v2Regions;
374           ThreadPoolExecutor tpool = createExecutor("SnapshotManifestLoader");
375           try {
376             v1Regions = SnapshotManifestV1.loadRegionManifests(conf, tpool, fs, workingDir, desc);
377             v2Regions = SnapshotManifestV2.loadRegionManifests(conf, tpool, fs, workingDir, desc);
378           } catch (InvalidProtocolBufferException e) {
379             throw new CorruptedSnapshotException("unable to parse region manifest " +
380                 e.getMessage(), e);
381           } finally {
382             tpool.shutdown();
383           }
384           if (v1Regions != null && v2Regions != null) {
385             regionManifests =
386               new ArrayList<SnapshotRegionManifest>(v1Regions.size() + v2Regions.size());
387             regionManifests.addAll(v1Regions);
388             regionManifests.addAll(v2Regions);
389           } else if (v1Regions != null) {
390             regionManifests = v1Regions;
391           } else /* if (v2Regions != null) */ {
392             regionManifests = v2Regions;
393           }
394         }
395         break;
396       }
397       default:
398         throw new CorruptedSnapshotException("Invalid Snapshot version: "+ desc.getVersion(), desc);
399     }
400   }
401 
402   /**
403    * Get the current snapshot working dir
404    */
405   public Path getSnapshotDir() {
406     return this.workingDir;
407   }
408 
409   /**
410    * Get the SnapshotDescription
411    */
412   public SnapshotDescription getSnapshotDescription() {
413     return this.desc;
414   }
415 
416   /**
417    * Get the table descriptor from the Snapshot
418    */
419   public HTableDescriptor getTableDescriptor() {
420     return this.htd;
421   }
422 
423   /**
424    * Get all the Region Manifest from the snapshot
425    */
426   public List<SnapshotRegionManifest> getRegionManifests() {
427     return this.regionManifests;
428   }
429 
430   /**
431    * Get all the Region Manifest from the snapshot.
432    * This is an helper to get a map with the region encoded name
433    */
434   public Map<String, SnapshotRegionManifest> getRegionManifestsMap() {
435     if (regionManifests == null || regionManifests.size() == 0) return null;
436 
437     HashMap<String, SnapshotRegionManifest> regionsMap =
438         new HashMap<String, SnapshotRegionManifest>(regionManifests.size());
439     for (SnapshotRegionManifest manifest: regionManifests) {
440       String regionName = getRegionNameFromManifest(manifest);
441       regionsMap.put(regionName, manifest);
442     }
443     return regionsMap;
444   }
445 
446   public void consolidate() throws IOException {
447     if (getSnapshotFormat(desc) == SnapshotManifestV1.DESCRIPTOR_VERSION) {
448       Path rootDir = FSUtils.getRootDir(conf);
449       LOG.info("Using old Snapshot Format");
450       // write a copy of descriptor to the snapshot directory
451       new FSTableDescriptors(conf, fs, rootDir)
452         .createTableDescriptorForTableDirectory(workingDir, htd, false);
453     } else {
454       LOG.debug("Convert to Single Snapshot Manifest");
455       convertToV2SingleManifest();
456     }
457   }
458 
459   /*
460    * In case of rolling-upgrade, we try to read all the formats and build
461    * the snapshot with the latest format.
462    */
463   private void convertToV2SingleManifest() throws IOException {
464     // Try to load v1 and v2 regions
465     List<SnapshotRegionManifest> v1Regions, v2Regions;
466     ThreadPoolExecutor tpool = createExecutor("SnapshotManifestLoader");
467     try {
468       v1Regions = SnapshotManifestV1.loadRegionManifests(conf, tpool, fs, workingDir, desc);
469       v2Regions = SnapshotManifestV2.loadRegionManifests(conf, tpool, fs, workingDir, desc);
470     } finally {
471       tpool.shutdown();
472     }
473 
474     SnapshotDataManifest.Builder dataManifestBuilder = SnapshotDataManifest.newBuilder();
475     dataManifestBuilder.setTableSchema(htd.convert());
476 
477     if (v1Regions != null && v1Regions.size() > 0) {
478       dataManifestBuilder.addAllRegionManifests(v1Regions);
479     }
480     if (v2Regions != null && v2Regions.size() > 0) {
481       dataManifestBuilder.addAllRegionManifests(v2Regions);
482     }
483 
484     // Write the v2 Data Manifest.
485     // Once the data-manifest is written, the snapshot can be considered complete.
486     // Currently snapshots are written in a "temporary" directory and later
487     // moved to the "complated" snapshot directory.
488     SnapshotDataManifest dataManifest = dataManifestBuilder.build();
489     writeDataManifest(dataManifest);
490     this.regionManifests = dataManifest.getRegionManifestsList();
491 
492     // Remove the region manifests. Everything is now in the data-manifest.
493     // The delete operation is "relaxed", unless we get an exception we keep going.
494     // The extra files in the snapshot directory will not give any problem,
495     // since they have the same content as the data manifest, and even by re-reading
496     // them we will get the same information.
497     if (v1Regions != null && v1Regions.size() > 0) {
498       for (SnapshotRegionManifest regionManifest: v1Regions) {
499         SnapshotManifestV1.deleteRegionManifest(fs, workingDir, regionManifest);
500       }
501     }
502     if (v2Regions != null && v2Regions.size() > 0) {
503       for (SnapshotRegionManifest regionManifest: v2Regions) {
504         SnapshotManifestV2.deleteRegionManifest(fs, workingDir, regionManifest);
505       }
506     }
507   }
508 
509   /*
510    * Write the SnapshotDataManifest file
511    */
512   private void writeDataManifest(final SnapshotDataManifest manifest)
513       throws IOException {
514     FSDataOutputStream stream = fs.create(new Path(workingDir, DATA_MANIFEST_NAME));
515     try {
516       manifest.writeTo(stream);
517     } finally {
518       stream.close();
519     }
520   }
521 
522   /*
523    * Read the SnapshotDataManifest file
524    */
525   private SnapshotDataManifest readDataManifest() throws IOException {
526     FSDataInputStream in = null;
527     try {
528       in = fs.open(new Path(workingDir, DATA_MANIFEST_NAME));
529       CodedInputStream cin = CodedInputStream.newInstance(in);
530       cin.setSizeLimit(manifestSizeLimit);
531       return SnapshotDataManifest.parseFrom(cin);
532     } catch (FileNotFoundException e) {
533       return null;
534     } catch (InvalidProtocolBufferException e) {
535       throw new CorruptedSnapshotException("unable to parse data manifest " + e.getMessage(), e);
536     } finally {
537       if (in != null) in.close();
538     }
539   }
540 
541   private ThreadPoolExecutor createExecutor(final String name) {
542     return createExecutor(conf, name);
543   }
544 
545   public static ThreadPoolExecutor createExecutor(final Configuration conf, final String name) {
546     int maxThreads = conf.getInt("hbase.snapshot.thread.pool.max", 8);
547     return Threads.getBoundedCachedThreadPool(maxThreads, 30L, TimeUnit.SECONDS,
548               Threads.getNamedThreadFactory(name));
549   }
550 
551   /**
552    * Extract the region encoded name from the region manifest
553    */
554   static String getRegionNameFromManifest(final SnapshotRegionManifest manifest) {
555     byte[] regionName = HRegionInfo.createRegionName(
556             ProtobufUtil.toTableName(manifest.getRegionInfo().getTableName()),
557             manifest.getRegionInfo().getStartKey().toByteArray(),
558             manifest.getRegionInfo().getRegionId(), true);
559     return HRegionInfo.encodeRegionName(regionName);
560   }
561 
562   /*
563    * Return the snapshot format
564    */
565   private static int getSnapshotFormat(final SnapshotDescription desc) {
566     return desc.hasVersion() ? desc.getVersion() : SnapshotManifestV1.DESCRIPTOR_VERSION;
567   }
568 }