View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver.compactions;
20  
21  import com.google.common.annotations.VisibleForTesting;
22  import com.google.common.base.Predicate;
23  import com.google.common.collect.Iterables;
24  import com.google.common.collect.Iterators;
25  import com.google.common.collect.Lists;
26  import com.google.common.collect.PeekingIterator;
27  import com.google.common.math.LongMath;
28  
29  import java.io.IOException;
30  import java.util.ArrayList;
31  import java.util.Collection;
32  import java.util.Collections;
33  import java.util.List;
34  
35  import org.apache.commons.logging.Log;
36  import org.apache.commons.logging.LogFactory;
37  import org.apache.hadoop.conf.Configuration;
38  import org.apache.hadoop.hbase.HBaseInterfaceAudience;
39  import org.apache.hadoop.hbase.HConstants;
40  import org.apache.hadoop.hbase.HDFSBlocksDistribution;
41  import org.apache.hadoop.hbase.classification.InterfaceAudience;
42  import org.apache.hadoop.hbase.regionserver.RSRpcServices;
43  import org.apache.hadoop.hbase.regionserver.StoreConfigInformation;
44  import org.apache.hadoop.hbase.regionserver.StoreFile;
45  import org.apache.hadoop.hbase.regionserver.StoreUtils;
46  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
47  import org.apache.hadoop.hbase.util.Pair;
48  import org.apache.hadoop.hbase.util.ReflectionUtils;
49  
50  /**
51   * HBASE-15181 This is a simple implementation of date-based tiered compaction similar to
52   * Cassandra's for the following benefits:
53   * 1. Improve date-range-based scan by structuring store files in date-based tiered layout.
54   * 2. Reduce compaction overhead.
55   * 3. Improve TTL efficiency.
56   * Perfect fit for the use cases that:
57   * 1. has mostly date-based data write and scan and a focus on the most recent data.
58   * Out-of-order writes are handled gracefully. Time range overlapping among store files is
59   * tolerated and the performance impact is minimized. Configuration can be set at hbase-site
60   * or overridden at per-table or per-column-family level by hbase shell. Design spec is at
61   * https://docs.google.com/document/d/1_AmlNb2N8Us1xICsTeGDLKIqL6T-oHoRLZ323MG_uy8/
62   */
63  @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.CONFIG)
64  public class DateTieredCompactionPolicy extends SortedCompactionPolicy {
65    private static final Log LOG = LogFactory.getLog(DateTieredCompactionPolicy.class);
66  
67    private RatioBasedCompactionPolicy compactionPolicyPerWindow;
68  
69    public DateTieredCompactionPolicy(Configuration conf, StoreConfigInformation storeConfigInfo)
70        throws IOException {
71      super(conf, storeConfigInfo);
72      try {
73        compactionPolicyPerWindow =
74        ReflectionUtils.instantiateWithCustomCtor(comConf.getCompactionPolicyForTieredWindow(),
75          new Class[] { Configuration.class, StoreConfigInformation.class }, new Object[] { conf,
76          storeConfigInfo });
77      } catch (Exception e) {
78        throw new IOException("Unable to load configured compaction policy '"
79            + comConf.getCompactionPolicyForTieredWindow() + "'", e);
80      }
81    }
82  
83    /**
84     * Heuristics for guessing whether we need minor compaction.
85     */
86    @Override
87    @VisibleForTesting
88    public boolean needsCompaction(final Collection<StoreFile> storeFiles,
89        final List<StoreFile> filesCompacting) {
90      ArrayList<StoreFile> candidates = new ArrayList<StoreFile>(storeFiles);
91      try {
92        return selectMinorCompaction(candidates, false, true) != null;
93      } catch (Exception e) {
94        LOG.error("Can not check for compaction: ", e);
95        return false;
96      }
97    }
98  
99    public boolean shouldPerformMajorCompaction(final Collection<StoreFile> filesToCompact)
100       throws IOException {
101     long mcTime = getNextMajorCompactTime(filesToCompact);
102     if (filesToCompact == null || mcTime == 0) {
103       return false;
104     }
105 
106     // TODO: Use better method for determining stamp of last major (HBASE-2990)
107     long lowTimestamp = StoreUtils.getLowestTimestamp(filesToCompact);
108     long now = EnvironmentEdgeManager.currentTime();
109     if (lowTimestamp <= 0L || lowTimestamp >= (now - mcTime)) {
110       return false;
111     }
112 
113     long cfTTL = this.storeConfigInfo.getStoreFileTtl();
114     HDFSBlocksDistribution hdfsBlocksDistribution = new HDFSBlocksDistribution();
115     long oldestToCompact = getOldestToCompact(comConf.getMaxStoreFileAgeMillis(), now);
116     List<Long> boundaries = getCompactBoundariesForMajor(filesToCompact, oldestToCompact, now);
117     boolean[] filesInWindow = new boolean[boundaries.size()];
118 
119     for (StoreFile file: filesToCompact) {
120       Long minTimestamp = file.getMinimumTimestamp();
121       long oldest = (minTimestamp == null) ? (Long)Long.MIN_VALUE : now - minTimestamp.longValue();
122       if (cfTTL != HConstants.FOREVER && oldest >= cfTTL) {
123         LOG.debug("Major compaction triggered on store " + this
124           + "; for TTL maintenance");
125         return true;
126       }
127       if (!file.isMajorCompaction() || file.isBulkLoadResult()) {
128         LOG.debug("Major compaction triggered on store " + this
129           + ", because there are new files and time since last major compaction "
130           + (now - lowTimestamp) + "ms");
131         return true;
132       }
133       int lowerWindowIndex = Collections.binarySearch(boundaries,
134         minTimestamp == null ? (Long)Long.MAX_VALUE : minTimestamp);
135       int upperWindowIndex = Collections.binarySearch(boundaries,
136         file.getMaximumTimestamp() == null ? (Long)Long.MAX_VALUE : file.getMaximumTimestamp());
137       if (lowerWindowIndex != upperWindowIndex) {
138         LOG.debug("Major compaction triggered on store " + this + "; because file "
139             + file.getPath() + " has data with timestamps cross window boundaries");
140         return true;
141       } else if (filesInWindow[upperWindowIndex]) {
142         LOG.debug("Major compaction triggered on store " + this +
143             "; because there are more than one file in some windows");
144         return true;
145       } else {
146         filesInWindow[upperWindowIndex] = true;
147       }
148       hdfsBlocksDistribution.add(file.getHDFSBlockDistribution());
149     }
150 
151     float blockLocalityIndex = hdfsBlocksDistribution
152         .getBlockLocalityIndex(RSRpcServices.getHostname(comConf.conf, false));
153     if (blockLocalityIndex < comConf.getMinLocalityToForceCompact()) {
154       LOG.debug("Major compaction triggered on store " + this
155         + "; to make hdfs blocks local, current blockLocalityIndex is "
156         + blockLocalityIndex + " (min " + comConf.getMinLocalityToForceCompact() + ")");
157       return true;
158     }
159 
160     LOG.debug("Skipping major compaction of " + this +
161         ", because the files are already major compacted");
162     return false;
163   }
164 
165   @Override
166   protected CompactionRequest createCompactionRequest(ArrayList<StoreFile> candidateSelection,
167     boolean tryingMajor, boolean mayUseOffPeak, boolean mayBeStuck) throws IOException {
168     CompactionRequest result = tryingMajor ? selectMajorCompaction(candidateSelection)
169       : selectMinorCompaction(candidateSelection, mayUseOffPeak, mayBeStuck);
170     LOG.debug("Generated compaction request: " + result);
171     return result;
172   }
173 
174   public CompactionRequest selectMajorCompaction(ArrayList<StoreFile> candidateSelection) {
175     long now = EnvironmentEdgeManager.currentTime();
176     long oldestToCompact = getOldestToCompact(comConf.getMaxStoreFileAgeMillis(), now);
177     return new DateTieredCompactionRequest(candidateSelection,
178       this.getCompactBoundariesForMajor(candidateSelection, oldestToCompact, now));
179   }
180 
181   /**
182    * We receive store files sorted in ascending order by seqId then scan the list of files. If the
183    * current file has a maxTimestamp older than last known maximum, treat this file as it carries
184    * the last known maximum. This way both seqId and timestamp are in the same order. If files carry
185    * the same maxTimestamps, they are ordered by seqId. We then reverse the list so they are ordered
186    * by seqId and maxTimestamp in descending order and build the time windows. All the out-of-order
187    * data into the same compaction windows, guaranteeing contiguous compaction based on sequence id.
188    */
189   public CompactionRequest selectMinorCompaction(ArrayList<StoreFile> candidateSelection,
190       boolean mayUseOffPeak, boolean mayBeStuck) throws IOException {
191     long now = EnvironmentEdgeManager.currentTime();
192     long oldestToCompact = getOldestToCompact(comConf.getMaxStoreFileAgeMillis(), now);
193 
194     List<StoreFile> storeFileList = Lists.newArrayList(filterOldStoreFiles(candidateSelection,
195       oldestToCompact));
196 
197     List<Pair<StoreFile, Long>> storefileMaxTimestampPairs =
198         Lists.newArrayListWithCapacity(Iterables.size(storeFileList));
199     long maxTimestampSeen = Long.MIN_VALUE;
200     for (StoreFile storeFile : storeFileList) {
201       // if there is out-of-order data,
202       // we put them in the same window as the last file in increasing order
203       maxTimestampSeen = Math.max(maxTimestampSeen,
204         storeFile.getMaximumTimestamp() == null? Long.MIN_VALUE : storeFile.getMaximumTimestamp());
205       storefileMaxTimestampPairs.add(new Pair<StoreFile, Long>(storeFile, maxTimestampSeen));
206     }
207     Collections.reverse(storefileMaxTimestampPairs);
208 
209     Window window = getIncomingWindow(now, comConf.getBaseWindowMillis());
210     int minThreshold = comConf.getIncomingWindowMin();
211     PeekingIterator<Pair<StoreFile, Long>> it =
212         Iterators.peekingIterator(storefileMaxTimestampPairs.iterator());
213     while (it.hasNext()) {
214       int compResult = window.compareToTimestamp(it.peek().getSecond());
215       if (compResult > 0) {
216         // If the file is too old for the window, switch to the next window
217         window = window.nextWindow(comConf.getWindowsPerTier(),
218           oldestToCompact);
219         minThreshold = comConf.getMinFilesToCompact();
220       } else {
221         // The file is within the target window
222         ArrayList<StoreFile> fileList = Lists.newArrayList();
223         // Add all files in the same window. For incoming window
224         // we tolerate files with future data although it is sub-optimal
225         while (it.hasNext() && window.compareToTimestamp(it.peek().getSecond()) <= 0) {
226           fileList.add(it.next().getFirst());
227         }
228         if (fileList.size() >= minThreshold) {
229           LOG.debug("Processing files: " + fileList + " for window: " + window);
230           DateTieredCompactionRequest request = generateCompactionRequest(fileList, window,
231             mayUseOffPeak, mayBeStuck, minThreshold);
232           if (request != null) {
233             return request;
234           }
235         }
236       }
237     }
238     // A non-null file list is expected by HStore
239     return new CompactionRequest(Collections.<StoreFile> emptyList());
240   }
241 
242   private DateTieredCompactionRequest generateCompactionRequest(ArrayList<StoreFile> storeFiles,
243       Window window, boolean mayUseOffPeak, boolean mayBeStuck, int minThreshold)
244     throws IOException {
245     // The files has to be in ascending order for ratio-based compaction to work right
246     // and removeExcessFile to exclude youngest files.
247     Collections.reverse(storeFiles);
248 
249     // Compact everything in the window if have more files than comConf.maxBlockingFiles
250     compactionPolicyPerWindow.setMinThreshold(minThreshold);
251     ArrayList<StoreFile> storeFileSelection = mayBeStuck ? storeFiles
252       : compactionPolicyPerWindow.applyCompactionPolicy(storeFiles, mayUseOffPeak, false);
253     if (storeFileSelection != null && !storeFileSelection.isEmpty()) {
254       // If there is any file in the window excluded from compaction,
255       // only one file will be output from compaction.
256       boolean singleOutput = storeFiles.size() != storeFileSelection.size() ||
257         comConf.useSingleOutputForMinorCompaction();
258       List<Long> boundaries = getCompactionBoundariesForMinor(window, singleOutput);
259       DateTieredCompactionRequest result = new DateTieredCompactionRequest(storeFileSelection,
260         boundaries);
261       return result;
262     }
263     return null;
264   }
265 
266   /**
267    * Return a list of boundaries for multiple compaction output
268    *   in ascending order.
269    */
270   private List<Long> getCompactBoundariesForMajor(Collection<StoreFile> filesToCompact,
271     long oldestToCompact, long now) {
272     long minTimestamp = Long.MAX_VALUE;
273     for (StoreFile file : filesToCompact) {
274       minTimestamp = Math.min(minTimestamp,
275         file.getMinimumTimestamp() == null? Long.MAX_VALUE : file.getMinimumTimestamp());
276     }
277 
278     List<Long> boundaries = new ArrayList<Long>();
279 
280     // Add startMillis of all windows between now and min timestamp
281     for (Window window = getIncomingWindow(now, comConf.getBaseWindowMillis());
282       window.compareToTimestamp(minTimestamp) > 0;
283       window = window.nextWindow(comConf.getWindowsPerTier(), oldestToCompact)) {
284       boundaries.add(window.startMillis());
285     }
286     boundaries.add(Long.MIN_VALUE);
287     Collections.reverse(boundaries);
288     return boundaries;
289   }
290 
291   /**
292    * @return a list of boundaries for multiple compaction output
293    *   from minTimestamp to maxTimestamp.
294    */
295   private static List<Long> getCompactionBoundariesForMinor(Window window, boolean singleOutput) {
296     List<Long> boundaries = new ArrayList<Long>();
297     boundaries.add(Long.MIN_VALUE);
298     if (!singleOutput) {
299       boundaries.add(window.startMillis());
300     }
301     return boundaries;
302   }
303 
304   /**
305    * Removes all store files with max timestamp older than (current - maxAge).
306    * @param storeFiles all store files to consider
307    * @param maxAge the age in milliseconds when a store file stops participating in compaction.
308    * @return a list of storeFiles with the store file older than maxAge excluded
309    */
310   private static Iterable<StoreFile> filterOldStoreFiles(List<StoreFile> storeFiles,
311     final long cutoff) {
312     return Iterables.filter(storeFiles, new Predicate<StoreFile>() {
313       @Override
314       public boolean apply(StoreFile storeFile) {
315         // Known findbugs issue to guava. SuppressWarning or Nonnull annotation don't work.
316         if (storeFile == null) {
317           return false;
318         }
319         Long maxTimestamp = storeFile.getMaximumTimestamp();
320         return maxTimestamp == null ? true : maxTimestamp >= cutoff;
321       }
322     });
323   }
324 
325   private static Window getIncomingWindow(long now, long baseWindowMillis) {
326     return new Window(baseWindowMillis, now / baseWindowMillis);
327   }
328 
329   private static long getOldestToCompact(long maxAgeMillis, long now) {
330     try {
331       return LongMath.checkedSubtract(now, maxAgeMillis);
332     } catch (ArithmeticException ae) {
333       LOG.warn("Value for " + CompactionConfiguration.MAX_AGE_MILLIS_KEY + ": " + maxAgeMillis
334         + ". All the files will be eligible for minor compaction.");
335       return Long.MIN_VALUE;
336     }
337   }
338 
339   /**
340    * This is the class we use to partition from epoch time to now into tiers of exponential sizes of
341    * windows.
342    */
343   private static final class Window {
344     /**
345      * How big a range of timestamps fit inside the window in milliseconds.
346      */
347     private final long windowMillis;
348 
349     /**
350      * A timestamp t is within the window iff t / size == divPosition.
351      */
352     private final long divPosition;
353 
354     private Window(long baseWindowMillis, long divPosition) {
355       windowMillis = baseWindowMillis;
356       this.divPosition = divPosition;
357     }
358 
359     /**
360      * Compares the window to a timestamp.
361      * @param timestamp the timestamp to compare.
362      * @return a negative integer, zero, or a positive integer as the window lies before, covering,
363      *         or after than the timestamp.
364      */
365     public int compareToTimestamp(long timestamp) {
366       if (timestamp < 0) {
367         try {
368           timestamp = LongMath.checkedSubtract(timestamp, windowMillis - 1);
369         } catch (ArithmeticException ae) {
370           timestamp = Long.MIN_VALUE;
371         }
372       }
373       long pos = timestamp / windowMillis;
374       return divPosition == pos ? 0 : divPosition < pos ? -1 : 1;
375     }
376 
377     /**
378      * Move to the new window of the same tier or of the next tier, which represents an earlier time
379      * span.
380      * @param windowsPerTier The number of contiguous windows that will have the same size. Windows
381      *          following those will be <code>tierBase</code> times as big.
382      * @return The next window
383      */
384     public Window nextWindow(int windowsPerTier, long oldestToCompact) {
385       // Don't promote to the next tier if there is not even 1 window at current tier
386       // or if the next window crosses the max age.
387       if (divPosition % windowsPerTier > 0 ||
388           startMillis() - windowMillis * windowsPerTier < oldestToCompact) {
389         return new Window(windowMillis, divPosition - 1);
390       } else {
391         return new Window(windowMillis * windowsPerTier, divPosition / windowsPerTier - 1);
392       }
393     }
394 
395     /**
396      * Inclusive lower bound
397      */
398     public long startMillis() {
399       try {
400         return LongMath.checkedMultiply(windowMillis, divPosition);
401       } catch (ArithmeticException ae) {
402         return Long.MIN_VALUE;
403       }
404     }
405 
406     /**
407      * Exclusive upper bound
408      */
409     public long endMillis() {
410       try {
411         return LongMath.checkedMultiply(windowMillis, (divPosition + 1));
412       } catch (ArithmeticException ae) {
413         return Long.MAX_VALUE;
414       }
415     }
416 
417     @Override
418     public String toString() {
419       return "[" + startMillis() + ", " + endMillis() + ")";
420     }
421   }
422 }