View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver;
20  
21  import java.text.SimpleDateFormat;
22  import java.util.Date;
23  import java.util.Map;
24  import java.util.concurrent.ConcurrentHashMap;
25  
26  import org.apache.commons.logging.Log;
27  import org.apache.commons.logging.LogFactory;
28  import org.apache.hadoop.conf.Configuration;
29  import org.apache.hadoop.hbase.HConstants;
30  import org.apache.hadoop.hbase.ScheduledChore;
31  import org.apache.hadoop.hbase.Stoppable;
32  import org.apache.hadoop.hbase.classification.InterfaceAudience;
33  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
34  
35  import com.google.common.annotations.VisibleForTesting;
36  
37  /**
38   * Implementation of nonce manager that stores nonces in a hash map and cleans them up after
39   * some time; if nonce group/client ID is supplied, nonces are stored by client ID.
40   */
41  @InterfaceAudience.Private
42  public class ServerNonceManager {
43    public static final String HASH_NONCE_GRACE_PERIOD_KEY = "hbase.server.hashNonce.gracePeriod";
44    private static final Log LOG = LogFactory.getLog(ServerNonceManager.class);
45  
46    /** The time to wait in an extremely unlikely case of a conflict with a running op.
47     * Only here so that tests could override it and not wait. */
48    private int conflictWaitIterationMs = 30000;
49  
50    private static final SimpleDateFormat tsFormat = new SimpleDateFormat("HH:mm:ss.SSS");
51  
52    // This object is used to synchronize on in case of collisions, and for cleanup.
53    private static class OperationContext {
54      static final int DONT_PROCEED = 0;
55      static final int PROCEED = 1;
56      static final int WAIT = 2;
57  
58      // 0..1 - state, 2..2 - whether anyone is waiting, 3.. - ts of last activity
59      private long data = 0;
60      private static final long STATE_BITS = 3;
61      private static final long WAITING_BIT = 4;
62      private static final long ALL_FLAG_BITS = WAITING_BIT | STATE_BITS;
63  
64      @Override
65      public String toString() {
66        return "[state " + getState() + ", hasWait " + hasWait() + ", activity "
67            + tsFormat.format(new Date(getActivityTime())) + "]";
68      }
69  
70      public OperationContext() {
71        setState(WAIT);
72        reportActivity();
73      }
74  
75      public void setState(int state) {
76        this.data = (this.data & ~STATE_BITS) | state;
77      }
78  
79      public int getState() {
80        return (int)(this.data & STATE_BITS);
81      }
82  
83      public void setHasWait() {
84        this.data = this.data | WAITING_BIT;
85      }
86  
87      public boolean hasWait() {
88        return (this.data & WAITING_BIT) == WAITING_BIT;
89      }
90  
91      public void reportActivity() {
92        long now = EnvironmentEdgeManager.currentTime();
93        this.data = (this.data & ALL_FLAG_BITS) | (now << 3);
94      }
95  
96      public boolean isExpired(long minRelevantTime) {
97        return getActivityTime() < (minRelevantTime & (~0l >>> 3));
98      }
99  
100     private long getActivityTime() {
101       return this.data >>> 3;
102     }
103   }
104 
105   /**
106    * This implementation is not smart and just treats nonce group and nonce as random bits.
107    */
108   // TODO: we could use pure byte arrays, but then we wouldn't be able to use hash map.
109   private static class NonceKey {
110     private long group;
111     private long nonce;
112 
113     public NonceKey(long group, long nonce) {
114       assert nonce != HConstants.NO_NONCE;
115       this.group = group;
116       this.nonce = nonce;
117     }
118 
119     @Override
120     public boolean equals(Object obj) {
121       if (obj == null || !(obj instanceof NonceKey)) return false;
122       NonceKey nk = ((NonceKey)obj);
123       return this.nonce == nk.nonce && this.group == nk.group;
124     }
125 
126     @Override
127     public int hashCode() {
128       return (int)((group >> 32) ^ group ^ (nonce >> 32) ^ nonce);
129     }
130 
131     @Override
132     public String toString() {
133       return "[" + group + ":" + nonce + "]";
134     }
135   }
136 
137   /**
138    * Nonces.
139    * Approximate overhead per nonce: 64 bytes from hashmap, 32 from two objects (k/v),
140    * NK: 16 bytes (2 longs), OC: 8 bytes (1 long) - so, 120 bytes.
141    * With 30min expiration time, 5k increments/appends per sec., we'd use approximately 1Gb,
142    * which is a realistic worst case. If it's much worse, we could use some sort of memory
143    * limit and cleanup.
144    */
145   private ConcurrentHashMap<NonceKey, OperationContext> nonces =
146       new ConcurrentHashMap<NonceKey, OperationContext>();
147 
148   private int deleteNonceGracePeriod;
149 
150   public ServerNonceManager(Configuration conf) {
151     // Default - 30 minutes.
152     deleteNonceGracePeriod = conf.getInt(HASH_NONCE_GRACE_PERIOD_KEY, 30 * 60 * 1000);
153     if (deleteNonceGracePeriod < 60 * 1000) {
154       LOG.warn("Nonce grace period " + deleteNonceGracePeriod
155           + " is less than a minute; might be too small to be useful");
156     }
157   }
158 
159   @VisibleForTesting
160   public void setConflictWaitIterationMs(int conflictWaitIterationMs) {
161     this.conflictWaitIterationMs = conflictWaitIterationMs;
162   }
163 
164   /**
165    * Starts the operation if operation with such nonce has not already succeeded. If the
166    * operation is in progress, waits for it to end and checks whether it has succeeded.
167    * @param group Nonce group.
168    * @param nonce Nonce.
169    * @param stoppable Stoppable that terminates waiting (if any) when the server is stopped.
170    * @return true if the operation has not already succeeded and can proceed; false otherwise.
171    */
172   public boolean startOperation(long group, long nonce, Stoppable stoppable)
173       throws InterruptedException {
174     if (nonce == HConstants.NO_NONCE) return true;
175     NonceKey nk = new NonceKey(group, nonce);
176     OperationContext ctx = new OperationContext();
177     while (true) {
178       OperationContext oldResult = nonces.putIfAbsent(nk, ctx);
179       if (oldResult == null) return true;
180 
181       // Collision with some operation - should be extremely rare.
182       synchronized (oldResult) {
183         int oldState = oldResult.getState();
184         LOG.debug("Conflict detected by nonce: " + nk + ", " + oldResult);
185         if (oldState != OperationContext.WAIT) {
186           return oldState == OperationContext.PROCEED; // operation ended
187         }
188         oldResult.setHasWait();
189         oldResult.wait(this.conflictWaitIterationMs); // operation is still active... wait and loop
190         if (stoppable.isStopped()) {
191           throw new InterruptedException("Server stopped");
192         }
193       }
194     }
195   }
196 
197   /**
198    * Ends the operation started by startOperation.
199    * @param group Nonce group.
200    * @param nonce Nonce.
201    * @param success Whether the operation has succeeded.
202    */
203   public void endOperation(long group, long nonce, boolean success) {
204     if (nonce == HConstants.NO_NONCE) return;
205     NonceKey nk = new NonceKey(group, nonce);
206     OperationContext newResult = nonces.get(nk);
207     assert newResult != null;
208     synchronized (newResult) {
209       assert newResult.getState() == OperationContext.WAIT;
210       // If we failed, other retries can proceed.
211       newResult.setState(success ? OperationContext.DONT_PROCEED : OperationContext.PROCEED);
212       if (success) {
213         newResult.reportActivity(); // Set time to use for cleanup.
214       } else {
215         OperationContext val = nonces.remove(nk);
216         assert val == newResult;
217       }
218       if (newResult.hasWait()) {
219         LOG.debug("Conflict with running op ended: " + nk + ", " + newResult);
220         newResult.notifyAll();
221       }
222     }
223   }
224 
225   /**
226    * Reports the operation from WAL during replay.
227    * @param group Nonce group.
228    * @param nonce Nonce.
229    * @param writeTime Entry write time, used to ignore entries that are too old.
230    */
231   public void reportOperationFromWal(long group, long nonce, long writeTime) {
232     if (nonce == HConstants.NO_NONCE) return;
233     // Give the write time some slack in case the clocks are not synchronized.
234     long now = EnvironmentEdgeManager.currentTime();
235     if (now > writeTime + (deleteNonceGracePeriod * 1.5)) return;
236     OperationContext newResult = new OperationContext();
237     newResult.setState(OperationContext.DONT_PROCEED);
238     NonceKey nk = new NonceKey(group, nonce);
239     OperationContext oldResult = nonces.putIfAbsent(nk, newResult);
240     if (oldResult != null) {
241       // Some schemes can have collisions (for example, expiring hashes), so just log it.
242       // We have no idea about the semantics here, so this is the least of many evils.
243       LOG.warn("Nonce collision during WAL recovery: " + nk
244           + ", " + oldResult + " with " + newResult);
245     }
246   }
247 
248   /**
249    * Creates a scheduled chore that is used to clean up old nonces.
250    * @param stoppable Stoppable for the chore.
251    * @return ScheduledChore; the scheduled chore is not started.
252    */
253   public ScheduledChore createCleanupScheduledChore(Stoppable stoppable) {
254     // By default, it will run every 6 minutes (30 / 5).
255     return new ScheduledChore("nonceCleaner", stoppable, deleteNonceGracePeriod / 5) {
256       @Override
257       protected void chore() {
258         cleanUpOldNonces();
259       }
260     };
261   }
262 
263   private void cleanUpOldNonces() {
264     long cutoff = EnvironmentEdgeManager.currentTime() - deleteNonceGracePeriod;
265     for (Map.Entry<NonceKey, OperationContext> entry : nonces.entrySet()) {
266       OperationContext oc = entry.getValue();
267       if (!oc.isExpired(cutoff)) continue;
268       synchronized (oc) {
269         if (oc.getState() == OperationContext.WAIT || !oc.isExpired(cutoff)) continue;
270         nonces.remove(entry.getKey());
271       }
272     }
273   }
274 }