View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master.snapshot;
19  
20  import java.io.FileNotFoundException;
21  import java.io.IOException;
22  import java.util.HashSet;
23  import java.util.List;
24  import java.util.Set;
25  import java.util.concurrent.CancellationException;
26  
27  import org.apache.commons.logging.Log;
28  import org.apache.commons.logging.LogFactory;
29  import org.apache.hadoop.classification.InterfaceAudience;
30  import org.apache.hadoop.conf.Configuration;
31  import org.apache.hadoop.fs.FileSystem;
32  import org.apache.hadoop.fs.Path;
33  import org.apache.hadoop.hbase.HRegionInfo;
34  import org.apache.hadoop.hbase.HTableDescriptor;
35  import org.apache.hadoop.hbase.ServerName;
36  import org.apache.hadoop.hbase.catalog.MetaReader;
37  import org.apache.hadoop.hbase.errorhandling.ForeignException;
38  import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher;
39  import org.apache.hadoop.hbase.errorhandling.ForeignExceptionSnare;
40  import org.apache.hadoop.hbase.executor.EventHandler;
41  import org.apache.hadoop.hbase.master.MasterServices;
42  import org.apache.hadoop.hbase.master.SnapshotSentinel;
43  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
44  import org.apache.hadoop.hbase.regionserver.HRegion;
45  import org.apache.hadoop.hbase.snapshot.CopyRecoveredEditsTask;
46  import org.apache.hadoop.hbase.snapshot.ReferenceRegionHFilesTask;
47  import org.apache.hadoop.hbase.snapshot.SnapshotCreationException;
48  import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
49  import org.apache.hadoop.hbase.snapshot.TableInfoCopyTask;
50  import org.apache.hadoop.hbase.snapshot.TakeSnapshotUtils;
51  import org.apache.hadoop.hbase.util.Bytes;
52  import org.apache.hadoop.hbase.util.Pair;
53  import org.apache.zookeeper.KeeperException;
54  
55  /**
56   * A handler for taking snapshots from the master.
57   *
58   * This is not a subclass of TableEventHandler because using that would incur an extra META scan.
59   *
60   * The {@link #snapshotRegions(List)} call should get implemented for each snapshot flavor.
61   */
62  @InterfaceAudience.Private
63  public abstract class TakeSnapshotHandler extends EventHandler implements SnapshotSentinel,
64      ForeignExceptionSnare {
65    private static final Log LOG = LogFactory.getLog(TakeSnapshotHandler.class);
66  
67    private volatile boolean finished;
68  
69    // none of these should ever be null
70    protected final MasterServices master;
71    protected final SnapshotDescription snapshot;
72    protected final Configuration conf;
73    protected final FileSystem fs;
74    protected final Path rootDir;
75    private final Path snapshotDir;
76    protected final Path workingDir;
77    private final MasterSnapshotVerifier verifier;
78    protected final ForeignExceptionDispatcher monitor;
79  
80    /**
81     * @param snapshot descriptor of the snapshot to take
82     * @param masterServices master services provider
83     * @throws IOException on unexpected error
84     */
85    public TakeSnapshotHandler(SnapshotDescription snapshot,
86        final MasterServices masterServices) throws IOException {
87      super(masterServices, EventType.C_M_SNAPSHOT_TABLE);
88      assert snapshot != null : "SnapshotDescription must not be nul1";
89      assert masterServices != null : "MasterServices must not be nul1";
90  
91      this.master = masterServices;
92      this.snapshot = snapshot;
93      this.conf = this.master.getConfiguration();
94      this.fs = this.master.getMasterFileSystem().getFileSystem();
95      this.rootDir = this.master.getMasterFileSystem().getRootDir();
96      this.snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshot, rootDir);
97      this.workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, rootDir);
98      this.monitor =  new ForeignExceptionDispatcher();
99  
100     loadTableDescriptor(); // check that .tableinfo is present
101 
102     // prepare the verify
103     this.verifier = new MasterSnapshotVerifier(masterServices, snapshot, rootDir);
104   }
105 
106   private HTableDescriptor loadTableDescriptor()
107       throws FileNotFoundException, IOException {
108     final String name = snapshot.getTable();
109     HTableDescriptor htd =
110       this.master.getTableDescriptors().get(name);
111     if (htd == null) {
112       throw new IOException("HTableDescriptor missing for " + name);
113     }
114     return htd;
115   }
116 
117   /**
118    * Execute the core common portions of taking a snapshot. The {@link #snapshotRegions(List)}
119    * call should get implemented for each snapshot flavor.
120    */
121   @Override
122   public void process() {
123     LOG.info("Running table snapshot operation " + eventType + " on table " + snapshot.getTable());
124     try {
125       // If regions move after this meta scan, the region specific snapshot should fail, triggering
126       // an external exception that gets captured here.
127 
128       // write down the snapshot info in the working directory
129       SnapshotDescriptionUtils.writeSnapshotInfo(snapshot, workingDir, this.fs);
130       new TableInfoCopyTask(monitor, snapshot, fs, rootDir).call();
131       monitor.rethrowException();
132 
133       List<Pair<HRegionInfo, ServerName>> regionsAndLocations =
134           MetaReader.getTableRegionsAndLocations(this.server.getCatalogTracker(),
135             Bytes.toBytes(snapshot.getTable()), false);
136 
137       // run the snapshot
138       snapshotRegions(regionsAndLocations);
139 
140       // extract each pair to separate lists
141       Set<String> serverNames = new HashSet<String>();
142       for (Pair<HRegionInfo, ServerName> p : regionsAndLocations) {
143         if (p != null && p.getFirst() != null && p.getSecond() != null) {
144           HRegionInfo hri = p.getFirst();
145           if (hri.isOffline() && (hri.isSplit() || hri.isSplitParent())) continue;
146           serverNames.add(p.getSecond().toString());
147         }
148       }
149 
150       // verify the snapshot is valid
151       verifier.verifySnapshot(this.workingDir, serverNames);
152 
153       // complete the snapshot, atomically moving from tmp to .snapshot dir.
154       completeSnapshot(this.snapshotDir, this.workingDir, this.fs);
155     } catch (Exception e) {
156       String reason = "Failed taking snapshot " + SnapshotDescriptionUtils.toString(snapshot)
157           + " due to exception:" + e.getMessage();
158       LOG.error(reason, e);
159       ForeignException ee = new ForeignException(reason, e);
160       monitor.receive(ee);
161       // need to mark this completed to close off and allow cleanup to happen.
162       cancel("Failed to take snapshot '" + SnapshotDescriptionUtils.toString(snapshot)
163           + "' due to exception");
164     } finally {
165       LOG.debug("Launching cleanup of working dir:" + workingDir);
166       try {
167         // if the working dir is still present, the snapshot has failed.  it is present we delete
168         // it.
169         if (fs.exists(workingDir) && !this.fs.delete(workingDir, true)) {
170           LOG.error("Couldn't delete snapshot working directory:" + workingDir);
171         }
172       } catch (IOException e) {
173         LOG.error("Couldn't delete snapshot working directory:" + workingDir);
174       }
175     }
176   }
177 
178   /**
179    * Reset the manager to allow another snapshot to proceed
180    *
181    * @param snapshotDir final path of the snapshot
182    * @param workingDir directory where the in progress snapshot was built
183    * @param fs {@link FileSystem} where the snapshot was built
184    * @throws SnapshotCreationException if the snapshot could not be moved
185    * @throws IOException the filesystem could not be reached
186    */
187   public void completeSnapshot(Path snapshotDir, Path workingDir, FileSystem fs)
188       throws SnapshotCreationException, IOException {
189     LOG.debug("Sentinel is done, just moving the snapshot from " + workingDir + " to "
190         + snapshotDir);
191     if (!fs.rename(workingDir, snapshotDir)) {
192       throw new SnapshotCreationException("Failed to move working directory(" + workingDir
193           + ") to completed directory(" + snapshotDir + ").");
194     }
195     finished = true;
196   }
197 
198   /**
199    * Take a snapshot of the specified disabled region
200    */
201   protected void snapshotDisabledRegion(final HRegionInfo regionInfo)
202       throws IOException {
203     // 1 copy the regionInfo files to the snapshot
204     Path snapshotRegionDir = TakeSnapshotUtils.getRegionSnapshotDirectory(snapshot, rootDir,
205       regionInfo.getEncodedName());
206     HRegion.writeRegioninfoOnFilesystem(regionInfo, snapshotRegionDir, fs, conf);
207     // check for error for each region
208     monitor.rethrowException();
209 
210     // 2 for each region, copy over its recovered.edits directory
211     Path regionDir = HRegion.getRegionDir(rootDir, regionInfo);
212     new CopyRecoveredEditsTask(snapshot, monitor, fs, regionDir, snapshotRegionDir).call();
213     monitor.rethrowException();
214 
215     // 3 reference all the files in the region
216     new ReferenceRegionHFilesTask(snapshot, monitor, regionDir, fs, snapshotRegionDir).call();
217     monitor.rethrowException();
218   }
219 
220   /**
221    * Snapshot the specified regions
222    */
223   protected abstract void snapshotRegions(List<Pair<HRegionInfo, ServerName>> regions)
224       throws IOException, KeeperException;
225 
226   @Override
227   public void cancel(String why) {
228     if (finished) return;
229 
230     this.finished = true;
231     LOG.info("Stop taking snapshot=" + SnapshotDescriptionUtils.toString(snapshot) + " because: "
232         + why);
233     CancellationException ce = new CancellationException(why);
234     monitor.receive(new ForeignException(master.getServerName().toString(), ce));
235   }
236 
237   @Override
238   public boolean isFinished() {
239     return finished;
240   }
241 
242   @Override
243   public SnapshotDescription getSnapshot() {
244     return snapshot;
245   }
246 
247   @Override
248   public ForeignException getExceptionIfFailed() {
249     return monitor.getException();
250   }
251 
252   @Override
253   public void rethrowException() throws ForeignException {
254     monitor.rethrowException();
255   }
256 
257   @Override
258   public boolean hasException() {
259     return monitor.hasException();
260   }
261 
262   @Override
263   public ForeignException getException() {
264     return monitor.getException();
265   }
266 
267 }