View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.ipc;
20  
21  import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHORIZATION;
22  
23  import java.io.ByteArrayInputStream;
24  import java.io.ByteArrayOutputStream;
25  import java.io.DataOutputStream;
26  import java.io.IOException;
27  import java.net.BindException;
28  import java.net.InetAddress;
29  import java.net.InetSocketAddress;
30  import java.net.ServerSocket;
31  import java.net.Socket;
32  import java.net.SocketException;
33  import java.net.UnknownHostException;
34  import java.nio.ByteBuffer;
35  import java.nio.channels.CancelledKeyException;
36  import java.nio.channels.Channels;
37  import java.nio.channels.ClosedChannelException;
38  import java.nio.channels.GatheringByteChannel;
39  import java.nio.channels.ReadableByteChannel;
40  import java.nio.channels.SelectionKey;
41  import java.nio.channels.Selector;
42  import java.nio.channels.ServerSocketChannel;
43  import java.nio.channels.SocketChannel;
44  import java.nio.channels.WritableByteChannel;
45  import java.security.PrivilegedExceptionAction;
46  import java.util.ArrayList;
47  import java.util.Arrays;
48  import java.util.Collections;
49  import java.util.HashMap;
50  import java.util.Iterator;
51  import java.util.LinkedList;
52  import java.util.List;
53  import java.util.Map;
54  import java.util.Random;
55  import java.util.Set;
56  import java.util.concurrent.ConcurrentHashMap;
57  import java.util.concurrent.ConcurrentLinkedDeque;
58  import java.util.concurrent.ExecutorService;
59  import java.util.concurrent.Executors;
60  import java.util.concurrent.atomic.AtomicInteger;
61  import java.util.concurrent.locks.Lock;
62  import java.util.concurrent.locks.ReentrantLock;
63  
64  import javax.security.sasl.Sasl;
65  import javax.security.sasl.SaslException;
66  import javax.security.sasl.SaslServer;
67  
68  import org.apache.commons.logging.Log;
69  import org.apache.commons.logging.LogFactory;
70  import org.apache.hadoop.hbase.classification.InterfaceAudience;
71  import org.apache.hadoop.hbase.classification.InterfaceStability;
72  import org.apache.hadoop.conf.Configuration;
73  import org.apache.hadoop.hbase.CellScanner;
74  import org.apache.hadoop.hbase.DoNotRetryIOException;
75  import org.apache.hadoop.hbase.HBaseIOException;
76  import org.apache.hadoop.hbase.HBaseInterfaceAudience;
77  import org.apache.hadoop.hbase.HConstants;
78  import org.apache.hadoop.hbase.HRegionInfo;
79  import org.apache.hadoop.hbase.Server;
80  import org.apache.hadoop.hbase.TableName;
81  import org.apache.hadoop.hbase.client.Operation;
82  import org.apache.hadoop.hbase.codec.Codec;
83  import org.apache.hadoop.hbase.exceptions.RegionMovedException;
84  import org.apache.hadoop.hbase.io.ByteBufferOutputStream;
85  import org.apache.hadoop.hbase.io.BoundedByteBufferPool;
86  import org.apache.hadoop.hbase.monitoring.MonitoredRPCHandler;
87  import org.apache.hadoop.hbase.monitoring.TaskMonitor;
88  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
89  import org.apache.hadoop.hbase.protobuf.generated.RPCProtos.CellBlockMeta;
90  import org.apache.hadoop.hbase.protobuf.generated.RPCProtos.ConnectionHeader;
91  import org.apache.hadoop.hbase.protobuf.generated.RPCProtos.ExceptionResponse;
92  import org.apache.hadoop.hbase.protobuf.generated.RPCProtos.RequestHeader;
93  import org.apache.hadoop.hbase.protobuf.generated.RPCProtos.ResponseHeader;
94  import org.apache.hadoop.hbase.protobuf.generated.RPCProtos.UserInformation;
95  import org.apache.hadoop.hbase.protobuf.generated.RPCProtos.VersionInfo;
96  import org.apache.hadoop.hbase.regionserver.HRegionServer;
97  import org.apache.hadoop.hbase.security.AccessDeniedException;
98  import org.apache.hadoop.hbase.security.AuthMethod;
99  import org.apache.hadoop.hbase.security.HBasePolicyProvider;
100 import org.apache.hadoop.hbase.security.HBaseSaslRpcServer;
101 import org.apache.hadoop.hbase.security.User;
102 import org.apache.hadoop.hbase.security.HBaseSaslRpcServer.SaslDigestCallbackHandler;
103 import org.apache.hadoop.hbase.security.HBaseSaslRpcServer.SaslGssCallbackHandler;
104 import org.apache.hadoop.hbase.security.SaslStatus;
105 import org.apache.hadoop.hbase.security.SaslUtil;
106 import org.apache.hadoop.hbase.security.UserProvider;
107 import org.apache.hadoop.hbase.security.token.AuthenticationTokenSecretManager;
108 import org.apache.hadoop.hbase.util.Bytes;
109 import org.apache.hadoop.hbase.util.Counter;
110 import org.apache.hadoop.hbase.util.Pair;
111 import org.apache.hadoop.io.BytesWritable;
112 import org.apache.hadoop.io.IntWritable;
113 import org.apache.hadoop.io.Writable;
114 import org.apache.hadoop.io.WritableUtils;
115 import org.apache.hadoop.io.compress.CompressionCodec;
116 import org.apache.hadoop.security.UserGroupInformation;
117 import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod;
118 import org.apache.hadoop.security.authorize.AuthorizationException;
119 import org.apache.hadoop.security.authorize.PolicyProvider;
120 import org.apache.hadoop.security.authorize.ProxyUsers;
121 import org.apache.hadoop.security.authorize.ServiceAuthorizationManager;
122 import org.apache.hadoop.security.token.SecretManager;
123 import org.apache.hadoop.security.token.SecretManager.InvalidToken;
124 import org.apache.hadoop.security.token.TokenIdentifier;
125 import org.apache.hadoop.util.StringUtils;
126 import org.codehaus.jackson.map.ObjectMapper;
127 import org.apache.htrace.TraceInfo;
128 
129 import com.google.common.util.concurrent.ThreadFactoryBuilder;
130 import com.google.protobuf.BlockingService;
131 import com.google.protobuf.CodedInputStream;
132 import com.google.protobuf.Descriptors.MethodDescriptor;
133 import com.google.protobuf.Message;
134 import com.google.protobuf.ServiceException;
135 import com.google.protobuf.TextFormat;
136 
137 /**
138  * An RPC server that hosts protobuf described Services.
139  *
140  * An RpcServer instance has a Listener that hosts the socket.  Listener has fixed number
141  * of Readers in an ExecutorPool, 10 by default.  The Listener does an accept and then
142  * round robin a Reader is chosen to do the read.  The reader is registered on Selector.  Read does
143  * total read off the channel and the parse from which it makes a Call.  The call is wrapped in a
144  * CallRunner and passed to the scheduler to be run.  Reader goes back to see if more to be done
145  * and loops till done.
146  *
147  * <p>Scheduler can be variously implemented but default simple scheduler has handlers to which it
148  * has given the queues into which calls (i.e. CallRunner instances) are inserted.  Handlers run
149  * taking from the queue.  They run the CallRunner#run method on each item gotten from queue
150  * and keep taking while the server is up.
151  *
152  * CallRunner#run executes the call.  When done, asks the included Call to put itself on new
153  * queue for Responder to pull from and return result to client.
154  *
155  * @see RpcClientImpl
156  */
157 @InterfaceAudience.LimitedPrivate({HBaseInterfaceAudience.COPROC, HBaseInterfaceAudience.PHOENIX})
158 @InterfaceStability.Evolving
159 public class RpcServer implements RpcServerInterface {
160   public static final Log LOG = LogFactory.getLog(RpcServer.class);
161   private static final CallQueueTooBigException CALL_QUEUE_TOO_BIG_EXCEPTION
162       = new CallQueueTooBigException();
163 
164   private final boolean authorize;
165   private boolean isSecurityEnabled;
166 
167   public static final byte CURRENT_VERSION = 0;
168 
169   /**
170    * How many calls/handler are allowed in the queue.
171    */
172   static final int DEFAULT_MAX_CALLQUEUE_LENGTH_PER_HANDLER = 10;
173 
174   /**
175    * The maximum size that we can hold in the RPC queue
176    */
177   private static final int DEFAULT_MAX_CALLQUEUE_SIZE = 1024 * 1024 * 1024;
178 
179   private static final String WARN_DELAYED_CALLS = "hbase.ipc.warn.delayedrpc.number";
180 
181   private static final int DEFAULT_WARN_DELAYED_CALLS = 1000;
182 
183   private final int warnDelayedCalls;
184 
185   private AtomicInteger delayedCalls;
186   private final IPCUtil ipcUtil;
187 
188   private static final String AUTH_FAILED_FOR = "Auth failed for ";
189   private static final String AUTH_SUCCESSFUL_FOR = "Auth successful for ";
190   private static final Log AUDITLOG = LogFactory.getLog("SecurityLogger." +
191     Server.class.getName());
192   protected SecretManager<TokenIdentifier> secretManager;
193   protected ServiceAuthorizationManager authManager;
194 
195   /** This is set to Call object before Handler invokes an RPC and ybdie
196    * after the call returns.
197    */
198   protected static final ThreadLocal<Call> CurCall = new ThreadLocal<Call>();
199 
200   /** Keeps MonitoredRPCHandler per handler thread. */
201   static final ThreadLocal<MonitoredRPCHandler> MONITORED_RPC
202       = new ThreadLocal<MonitoredRPCHandler>();
203 
204   protected final InetSocketAddress bindAddress;
205   protected int port;                             // port we listen on
206   private int readThreads;                        // number of read threads
207   protected int maxIdleTime;                      // the maximum idle time after
208                                                   // which a client may be
209                                                   // disconnected
210   protected int thresholdIdleConnections;         // the number of idle
211                                                   // connections after which we
212                                                   // will start cleaning up idle
213                                                   // connections
214   int maxConnectionsToNuke;                       // the max number of
215                                                   // connections to nuke
216                                                   // during a cleanup
217 
218   protected MetricsHBaseServer metrics;
219 
220   protected final Configuration conf;
221 
222   private int maxQueueSize;
223   protected int socketSendBufferSize;
224   protected final boolean tcpNoDelay;   // if T then disable Nagle's Algorithm
225   protected final boolean tcpKeepAlive; // if T then use keepalives
226   protected final long purgeTimeout;    // in milliseconds
227 
228   /**
229    * This flag is used to indicate to sub threads when they should go down.  When we call
230    * {@link #start()}, all threads started will consult this flag on whether they should
231    * keep going.  It is set to false when {@link #stop()} is called.
232    */
233   volatile boolean running = true;
234 
235   /**
236    * This flag is set to true after all threads are up and 'running' and the server is then opened
237    * for business by the call to {@link #start()}.
238    */
239   volatile boolean started = false;
240 
241   /**
242    * This is a running count of the size of all outstanding calls by size.
243    */
244   protected final Counter callQueueSize = new Counter();
245 
246   protected final List<Connection> connectionList =
247     Collections.synchronizedList(new LinkedList<Connection>());
248   //maintain a list
249   //of client connections
250   private Listener listener = null;
251   protected Responder responder = null;
252   protected AuthenticationTokenSecretManager authTokenSecretMgr = null;
253   protected int numConnections = 0;
254 
255   protected HBaseRPCErrorHandler errorHandler = null;
256 
257   private static final String WARN_RESPONSE_TIME = "hbase.ipc.warn.response.time";
258   private static final String WARN_RESPONSE_SIZE = "hbase.ipc.warn.response.size";
259 
260   /** Default value for above params */
261   private static final int DEFAULT_WARN_RESPONSE_TIME = 10000; // milliseconds
262   private static final int DEFAULT_WARN_RESPONSE_SIZE = 100 * 1024 * 1024;
263 
264   private static final ObjectMapper MAPPER = new ObjectMapper();
265 
266   private final int warnResponseTime;
267   private final int warnResponseSize;
268   private final Server server;
269   private final List<BlockingServiceAndInterface> services;
270 
271   private final RpcScheduler scheduler;
272 
273   private UserProvider userProvider;
274 
275   private final BoundedByteBufferPool reservoir;
276 
277 
278   /**
279    * Datastructure that holds all necessary to a method invocation and then afterward, carries
280    * the result.
281    */
282   @InterfaceAudience.LimitedPrivate({HBaseInterfaceAudience.COPROC, HBaseInterfaceAudience.PHOENIX})
283   @InterfaceStability.Evolving
284   public class Call implements RpcCallContext {
285     protected int id;                             // the client's call id
286     protected BlockingService service;
287     protected MethodDescriptor md;
288     protected RequestHeader header;
289     protected Message param;                      // the parameter passed
290     // Optional cell data passed outside of protobufs.
291     protected CellScanner cellScanner;
292     protected Connection connection;              // connection to client
293     protected long timestamp;      // the time received when response is null
294                                    // the time served when response is not null
295     /**
296      * Chain of buffers to send as response.
297      */
298     protected BufferChain response;
299     protected boolean delayResponse;
300     protected Responder responder;
301     protected boolean delayReturnValue;           // if the return value should be
302                                                   // set at call completion
303     protected long size;                          // size of current call
304     protected boolean isError;
305     protected TraceInfo tinfo;
306     private ByteBuffer cellBlock = null;
307 
308     private User user;
309     private InetAddress remoteAddress;
310 
311     Call(int id, final BlockingService service, final MethodDescriptor md, RequestHeader header,
312          Message param, CellScanner cellScanner, Connection connection, Responder responder,
313          long size, TraceInfo tinfo, final InetAddress remoteAddress) {
314       this.id = id;
315       this.service = service;
316       this.md = md;
317       this.header = header;
318       this.param = param;
319       this.cellScanner = cellScanner;
320       this.connection = connection;
321       this.timestamp = System.currentTimeMillis();
322       this.response = null;
323       this.delayResponse = false;
324       this.responder = responder;
325       this.isError = false;
326       this.size = size;
327       this.tinfo = tinfo;
328       this.user = connection.user == null? null: userProvider.create(connection.user);
329       this.remoteAddress = remoteAddress;
330     }
331 
332     /**
333      * Call is done. Execution happened and we returned results to client. It is now safe to
334      * cleanup.
335      */
336     void done() {
337       if (this.cellBlock != null) {
338         // Return buffer to reservoir now we are done with it.
339         reservoir.putBuffer(this.cellBlock);
340         this.cellBlock = null;
341       }
342       this.connection.decRpcCount();  // Say that we're done with this call.
343     }
344 
345     @Override
346     public String toString() {
347       return toShortString() + " param: " +
348         (this.param != null? ProtobufUtil.getShortTextFormat(this.param): "") +
349         " connection: " + connection.toString();
350     }
351 
352     protected RequestHeader getHeader() {
353       return this.header;
354     }
355 
356     public boolean hasPriority() {
357       return this.header.hasPriority();
358     }
359 
360     public int getPriority() {
361       return this.header.getPriority();
362     }
363 
364     /*
365      * Short string representation without param info because param itself could be huge depends on
366      * the payload of a command
367      */
368     String toShortString() {
369       String serviceName = this.connection.service != null ?
370           this.connection.service.getDescriptorForType().getName() : "null";
371       return "callId: " + this.id + " service: " + serviceName +
372           " methodName: " + ((this.md != null) ? this.md.getName() : "n/a") +
373           " size: " + StringUtils.TraditionalBinaryPrefix.long2String(this.size, "", 1) +
374           " connection: " + connection.toString();
375     }
376 
377     String toTraceString() {
378       String serviceName = this.connection.service != null ?
379                            this.connection.service.getDescriptorForType().getName() : "";
380       String methodName = (this.md != null) ? this.md.getName() : "";
381       return serviceName + "." + methodName;
382     }
383 
384     protected synchronized void setSaslTokenResponse(ByteBuffer response) {
385       this.response = new BufferChain(response);
386     }
387 
388     protected synchronized void setResponse(Object m, final CellScanner cells,
389         Throwable t, String errorMsg) {
390       if (this.isError) return;
391       if (t != null) this.isError = true;
392       BufferChain bc = null;
393       try {
394         ResponseHeader.Builder headerBuilder = ResponseHeader.newBuilder();
395         // Presume it a pb Message.  Could be null.
396         Message result = (Message)m;
397         // Call id.
398         headerBuilder.setCallId(this.id);
399         if (t != null) {
400           ExceptionResponse.Builder exceptionBuilder = ExceptionResponse.newBuilder();
401           exceptionBuilder.setExceptionClassName(t.getClass().getName());
402           exceptionBuilder.setStackTrace(errorMsg);
403           exceptionBuilder.setDoNotRetry(t instanceof DoNotRetryIOException);
404           if (t instanceof RegionMovedException) {
405             // Special casing for this exception.  This is only one carrying a payload.
406             // Do this instead of build a generic system for allowing exceptions carry
407             // any kind of payload.
408             RegionMovedException rme = (RegionMovedException)t;
409             exceptionBuilder.setHostname(rme.getHostname());
410             exceptionBuilder.setPort(rme.getPort());
411           }
412           // Set the exception as the result of the method invocation.
413           headerBuilder.setException(exceptionBuilder.build());
414         }
415         // Pass reservoir to buildCellBlock. Keep reference to returne so can add it back to the
416         // reservoir when finished. This is hacky and the hack is not contained but benefits are
417         // high when we can avoid a big buffer allocation on each rpc.
418         this.cellBlock = ipcUtil.buildCellBlock(this.connection.codec,
419           this.connection.compressionCodec, cells, reservoir);
420         if (this.cellBlock != null) {
421           CellBlockMeta.Builder cellBlockBuilder = CellBlockMeta.newBuilder();
422           // Presumes the cellBlock bytebuffer has been flipped so limit has total size in it.
423           cellBlockBuilder.setLength(this.cellBlock.limit());
424           headerBuilder.setCellBlockMeta(cellBlockBuilder.build());
425         }
426         Message header = headerBuilder.build();
427 
428         // Organize the response as a set of bytebuffers rather than collect it all together inside
429         // one big byte array; save on allocations.
430         ByteBuffer bbHeader = IPCUtil.getDelimitedMessageAsByteBuffer(header);
431         ByteBuffer bbResult = IPCUtil.getDelimitedMessageAsByteBuffer(result);
432         int totalSize = bbHeader.capacity() + (bbResult == null? 0: bbResult.limit()) +
433           (this.cellBlock == null? 0: this.cellBlock.limit());
434         ByteBuffer bbTotalSize = ByteBuffer.wrap(Bytes.toBytes(totalSize));
435         bc = new BufferChain(bbTotalSize, bbHeader, bbResult, this.cellBlock);
436         if (connection.useWrap) {
437           bc = wrapWithSasl(bc);
438         }
439       } catch (IOException e) {
440         LOG.warn("Exception while creating response " + e);
441       }
442       this.response = bc;
443     }
444 
445     private BufferChain wrapWithSasl(BufferChain bc)
446         throws IOException {
447       if (!this.connection.useSasl) return bc;
448       // Looks like no way around this; saslserver wants a byte array.  I have to make it one.
449       // THIS IS A BIG UGLY COPY.
450       byte [] responseBytes = bc.getBytes();
451       byte [] token;
452       // synchronization may be needed since there can be multiple Handler
453       // threads using saslServer to wrap responses.
454       synchronized (connection.saslServer) {
455         token = connection.saslServer.wrap(responseBytes, 0, responseBytes.length);
456       }
457       if (LOG.isTraceEnabled()) {
458         LOG.trace("Adding saslServer wrapped token of size " + token.length
459             + " as call response.");
460       }
461 
462       ByteBuffer bbTokenLength = ByteBuffer.wrap(Bytes.toBytes(token.length));
463       ByteBuffer bbTokenBytes = ByteBuffer.wrap(token);
464       return new BufferChain(bbTokenLength, bbTokenBytes);
465     }
466 
467     @Override
468     public synchronized void endDelay(Object result) throws IOException {
469       assert this.delayResponse;
470       assert this.delayReturnValue || result == null;
471       this.delayResponse = false;
472       delayedCalls.decrementAndGet();
473       if (this.delayReturnValue) {
474         this.setResponse(result, null, null, null);
475       }
476       this.responder.doRespond(this);
477     }
478 
479     @Override
480     public synchronized void endDelay() throws IOException {
481       this.endDelay(null);
482     }
483 
484     @Override
485     public synchronized void startDelay(boolean delayReturnValue) {
486       assert !this.delayResponse;
487       this.delayResponse = true;
488       this.delayReturnValue = delayReturnValue;
489       int numDelayed = delayedCalls.incrementAndGet();
490       if (numDelayed > warnDelayedCalls) {
491         LOG.warn("Too many delayed calls: limit " + warnDelayedCalls + " current " + numDelayed);
492       }
493     }
494 
495     @Override
496     public synchronized void endDelayThrowing(Throwable t) throws IOException {
497       this.setResponse(null, null, t, StringUtils.stringifyException(t));
498       this.delayResponse = false;
499       this.sendResponseIfReady();
500     }
501 
502     @Override
503     public synchronized boolean isDelayed() {
504       return this.delayResponse;
505     }
506 
507     @Override
508     public synchronized boolean isReturnValueDelayed() {
509       return this.delayReturnValue;
510     }
511 
512     @Override
513     public boolean isClientCellBlockSupport() {
514       return this.connection != null && this.connection.codec != null;
515     }
516 
517     @Override
518     public long disconnectSince() {
519       if (!connection.channel.isOpen()) {
520         return System.currentTimeMillis() - timestamp;
521       } else {
522         return -1L;
523       }
524     }
525 
526     public long getSize() {
527       return this.size;
528     }
529 
530     /**
531      * If we have a response, and delay is not set, then respond
532      * immediately.  Otherwise, do not respond to client.  This is
533      * called by the RPC code in the context of the Handler thread.
534      */
535     public synchronized void sendResponseIfReady() throws IOException {
536       if (!this.delayResponse) {
537         this.responder.doRespond(this);
538       }
539     }
540 
541     public UserGroupInformation getRemoteUser() {
542       return connection.user;
543     }
544 
545     @Override
546     public User getRequestUser() {
547       return user;
548     }
549 
550     @Override
551     public String getRequestUserName() {
552       User user = getRequestUser();
553       return user == null? null: user.getShortName();
554     }
555 
556     @Override
557     public InetAddress getRemoteAddress() {
558       return remoteAddress;
559     }
560 
561     @Override
562     public VersionInfo getClientVersionInfo() {
563       return connection.getVersionInfo();
564     }
565   }
566 
567   /** Listens on the socket. Creates jobs for the handler threads*/
568   private class Listener extends Thread {
569 
570     private ServerSocketChannel acceptChannel = null; //the accept channel
571     private Selector selector = null; //the selector that we use for the server
572     private Reader[] readers = null;
573     private int currentReader = 0;
574     private Random rand = new Random();
575     private long lastCleanupRunTime = 0; //the last time when a cleanup connec-
576                                          //-tion (for idle connections) ran
577     private long cleanupInterval = 10000; //the minimum interval between
578                                           //two cleanup runs
579     private int backlogLength;
580 
581     private ExecutorService readPool;
582 
583     public Listener(final String name) throws IOException {
584       super(name);
585       backlogLength = conf.getInt("hbase.ipc.server.listen.queue.size", 128);
586       // Create a new server socket and set to non blocking mode
587       acceptChannel = ServerSocketChannel.open();
588       acceptChannel.configureBlocking(false);
589 
590       // Bind the server socket to the binding addrees (can be different from the default interface)
591       bind(acceptChannel.socket(), bindAddress, backlogLength);
592       port = acceptChannel.socket().getLocalPort(); //Could be an ephemeral port
593       // create a selector;
594       selector= Selector.open();
595 
596       readers = new Reader[readThreads];
597       readPool = Executors.newFixedThreadPool(readThreads,
598         new ThreadFactoryBuilder().setNameFormat(
599           "RpcServer.reader=%d,bindAddress=" + bindAddress.getHostName() +
600           ",port=" + port).setDaemon(true).build());
601       for (int i = 0; i < readThreads; ++i) {
602         Reader reader = new Reader();
603         readers[i] = reader;
604         readPool.execute(reader);
605       }
606       LOG.info(getName() + ": started " + readThreads + " reader(s).");
607 
608       // Register accepts on the server socket with the selector.
609       acceptChannel.register(selector, SelectionKey.OP_ACCEPT);
610       this.setName("RpcServer.listener,port=" + port);
611       this.setDaemon(true);
612     }
613 
614 
615     private class Reader implements Runnable {
616       private volatile boolean adding = false;
617       private final Selector readSelector;
618 
619       Reader() throws IOException {
620         this.readSelector = Selector.open();
621       }
622       @Override
623       public void run() {
624         try {
625           doRunLoop();
626         } finally {
627           try {
628             readSelector.close();
629           } catch (IOException ioe) {
630             LOG.error(getName() + ": error closing read selector in " + getName(), ioe);
631           }
632         }
633       }
634 
635       private synchronized void doRunLoop() {
636         while (running) {
637           try {
638             readSelector.select();
639             while (adding) {
640               this.wait(1000);
641             }
642 
643             Iterator<SelectionKey> iter = readSelector.selectedKeys().iterator();
644             while (iter.hasNext()) {
645               SelectionKey key = iter.next();
646               iter.remove();
647               if (key.isValid()) {
648                 if (key.isReadable()) {
649                   doRead(key);
650                 }
651               }
652             }
653           } catch (InterruptedException e) {
654             LOG.debug("Interrupted while sleeping");
655             return;
656           } catch (IOException ex) {
657             LOG.info(getName() + ": IOException in Reader", ex);
658           }
659         }
660       }
661 
662       /**
663        * This gets reader into the state that waits for the new channel
664        * to be registered with readSelector. If it was waiting in select()
665        * the thread will be woken up, otherwise whenever select() is called
666        * it will return even if there is nothing to read and wait
667        * in while(adding) for finishAdd call
668        */
669       public void startAdd() {
670         adding = true;
671         readSelector.wakeup();
672       }
673 
674       public synchronized SelectionKey registerChannel(SocketChannel channel)
675         throws IOException {
676         return channel.register(readSelector, SelectionKey.OP_READ);
677       }
678 
679       public synchronized void finishAdd() {
680         adding = false;
681         this.notify();
682       }
683     }
684 
685     /** cleanup connections from connectionList. Choose a random range
686      * to scan and also have a limit on the number of the connections
687      * that will be cleanedup per run. The criteria for cleanup is the time
688      * for which the connection was idle. If 'force' is true then all
689      * connections will be looked at for the cleanup.
690      * @param force all connections will be looked at for cleanup
691      */
692     private void cleanupConnections(boolean force) {
693       if (force || numConnections > thresholdIdleConnections) {
694         long currentTime = System.currentTimeMillis();
695         if (!force && (currentTime - lastCleanupRunTime) < cleanupInterval) {
696           return;
697         }
698         int start = 0;
699         int end = numConnections - 1;
700         if (!force) {
701           start = rand.nextInt() % numConnections;
702           end = rand.nextInt() % numConnections;
703           int temp;
704           if (end < start) {
705             temp = start;
706             start = end;
707             end = temp;
708           }
709         }
710         int i = start;
711         int numNuked = 0;
712         while (i <= end) {
713           Connection c;
714           synchronized (connectionList) {
715             try {
716               c = connectionList.get(i);
717             } catch (Exception e) {return;}
718           }
719           if (c.timedOut(currentTime)) {
720             if (LOG.isDebugEnabled())
721               LOG.debug(getName() + ": disconnecting client " + c.getHostAddress());
722             closeConnection(c);
723             numNuked++;
724             end--;
725             //noinspection UnusedAssignment
726             c = null;
727             if (!force && numNuked == maxConnectionsToNuke) break;
728           }
729           else i++;
730         }
731         lastCleanupRunTime = System.currentTimeMillis();
732       }
733     }
734 
735     @Override
736     public void run() {
737       LOG.info(getName() + ": starting");
738       while (running) {
739         SelectionKey key = null;
740         try {
741           selector.select(); // FindBugs IS2_INCONSISTENT_SYNC
742           Iterator<SelectionKey> iter = selector.selectedKeys().iterator();
743           while (iter.hasNext()) {
744             key = iter.next();
745             iter.remove();
746             try {
747               if (key.isValid()) {
748                 if (key.isAcceptable())
749                   doAccept(key);
750               }
751             } catch (IOException ignored) {
752               if (LOG.isTraceEnabled()) LOG.trace("ignored", ignored);
753             }
754             key = null;
755           }
756         } catch (OutOfMemoryError e) {
757           if (errorHandler != null) {
758             if (errorHandler.checkOOME(e)) {
759               LOG.info(getName() + ": exiting on OutOfMemoryError");
760               closeCurrentConnection(key, e);
761               cleanupConnections(true);
762               return;
763             }
764           } else {
765             // we can run out of memory if we have too many threads
766             // log the event and sleep for a minute and give
767             // some thread(s) a chance to finish
768             LOG.warn(getName() + ": OutOfMemoryError in server select", e);
769             closeCurrentConnection(key, e);
770             cleanupConnections(true);
771             try {
772               Thread.sleep(60000);
773             } catch (InterruptedException ex) {
774               LOG.debug("Interrupted while sleeping");
775               return;
776             }
777           }
778         } catch (Exception e) {
779           closeCurrentConnection(key, e);
780         }
781         cleanupConnections(false);
782       }
783 
784       LOG.info(getName() + ": stopping");
785 
786       synchronized (this) {
787         try {
788           acceptChannel.close();
789           selector.close();
790         } catch (IOException ignored) {
791           if (LOG.isTraceEnabled()) LOG.trace("ignored", ignored);
792         }
793 
794         selector= null;
795         acceptChannel= null;
796 
797         // clean up all connections
798         while (!connectionList.isEmpty()) {
799           closeConnection(connectionList.remove(0));
800         }
801       }
802     }
803 
804     private void closeCurrentConnection(SelectionKey key, Throwable e) {
805       if (key != null) {
806         Connection c = (Connection)key.attachment();
807         if (c != null) {
808           if (LOG.isDebugEnabled()) {
809             LOG.debug(getName() + ": disconnecting client " + c.getHostAddress() +
810                 (e != null ? " on error " + e.getMessage() : ""));
811           }
812           closeConnection(c);
813           key.attach(null);
814         }
815       }
816     }
817 
818     InetSocketAddress getAddress() {
819       return (InetSocketAddress)acceptChannel.socket().getLocalSocketAddress();
820     }
821 
822     void doAccept(SelectionKey key) throws IOException, OutOfMemoryError {
823       Connection c;
824       ServerSocketChannel server = (ServerSocketChannel) key.channel();
825 
826       SocketChannel channel;
827       while ((channel = server.accept()) != null) {
828         try {
829           channel.configureBlocking(false);
830           channel.socket().setTcpNoDelay(tcpNoDelay);
831           channel.socket().setKeepAlive(tcpKeepAlive);
832         } catch (IOException ioe) {
833           channel.close();
834           throw ioe;
835         }
836 
837         Reader reader = getReader();
838         try {
839           reader.startAdd();
840           SelectionKey readKey = reader.registerChannel(channel);
841           c = getConnection(channel, System.currentTimeMillis());
842           readKey.attach(c);
843           synchronized (connectionList) {
844             connectionList.add(numConnections, c);
845             numConnections++;
846           }
847           if (LOG.isDebugEnabled())
848             LOG.debug(getName() + ": connection from " + c.toString() +
849                 "; # active connections: " + numConnections);
850         } finally {
851           reader.finishAdd();
852         }
853       }
854     }
855 
856     void doRead(SelectionKey key) throws InterruptedException {
857       int count;
858       Connection c = (Connection) key.attachment();
859       if (c == null) {
860         return;
861       }
862       c.setLastContact(System.currentTimeMillis());
863       try {
864         count = c.readAndProcess();
865 
866         if (count > 0) {
867           c.setLastContact(System.currentTimeMillis());
868         }
869 
870       } catch (InterruptedException ieo) {
871         throw ieo;
872       } catch (Exception e) {
873         if (LOG.isDebugEnabled()) {
874           LOG.debug(getName() + ": Caught exception while reading:" + e.getMessage());
875         }
876         count = -1; //so that the (count < 0) block is executed
877       }
878       if (count < 0) {
879         if (LOG.isDebugEnabled()) {
880           LOG.debug(getName() + ": DISCONNECTING client " + c.toString() +
881               " because read count=" + count +
882               ". Number of active connections: " + numConnections);
883         }
884         closeConnection(c);
885       }
886     }
887 
888     synchronized void doStop() {
889       if (selector != null) {
890         selector.wakeup();
891         Thread.yield();
892       }
893       if (acceptChannel != null) {
894         try {
895           acceptChannel.socket().close();
896         } catch (IOException e) {
897           LOG.info(getName() + ": exception in closing listener socket. " + e);
898         }
899       }
900       readPool.shutdownNow();
901     }
902 
903     // The method that will return the next reader to work with
904     // Simplistic implementation of round robin for now
905     Reader getReader() {
906       currentReader = (currentReader + 1) % readers.length;
907       return readers[currentReader];
908     }
909   }
910 
911   // Sends responses of RPC back to clients.
912   protected class Responder extends Thread {
913     private final Selector writeSelector;
914     private final Set<Connection> writingCons =
915         Collections.newSetFromMap(new ConcurrentHashMap<Connection, Boolean>());
916 
917     Responder() throws IOException {
918       this.setName("RpcServer.responder");
919       this.setDaemon(true);
920       writeSelector = Selector.open(); // create a selector
921     }
922 
923     @Override
924     public void run() {
925       LOG.info(getName() + ": starting");
926       try {
927         doRunLoop();
928       } finally {
929         LOG.info(getName() + ": stopping");
930         try {
931           writeSelector.close();
932         } catch (IOException ioe) {
933           LOG.error(getName() + ": couldn't close write selector", ioe);
934         }
935       }
936     }
937 
938     /**
939      * Take the list of the connections that want to write, and register them
940      * in the selector.
941      */
942     private void registerWrites() {
943       Iterator<Connection> it = writingCons.iterator();
944       while (it.hasNext()) {
945         Connection c = it.next();
946         it.remove();
947         SelectionKey sk = c.channel.keyFor(writeSelector);
948         try {
949           if (sk == null) {
950             try {
951               c.channel.register(writeSelector, SelectionKey.OP_WRITE, c);
952             } catch (ClosedChannelException e) {
953               // ignore: the client went away.
954               if (LOG.isTraceEnabled()) LOG.trace("ignored", e);
955             }
956           } else {
957             sk.interestOps(SelectionKey.OP_WRITE);
958           }
959         } catch (CancelledKeyException e) {
960           // ignore: the client went away.
961           if (LOG.isTraceEnabled()) LOG.trace("ignored", e);
962         }
963       }
964     }
965 
966     /**
967      * Add a connection to the list that want to write,
968      */
969     public void registerForWrite(Connection c) {
970       if (writingCons.add(c)) {
971         writeSelector.wakeup();
972       }
973     }
974 
975     private void doRunLoop() {
976       long lastPurgeTime = 0;   // last check for old calls.
977       while (running) {
978         try {
979           registerWrites();
980           int keyCt = writeSelector.select(purgeTimeout);
981           if (keyCt == 0) {
982             continue;
983           }
984 
985           Set<SelectionKey> keys = writeSelector.selectedKeys();
986           Iterator<SelectionKey> iter = keys.iterator();
987           while (iter.hasNext()) {
988             SelectionKey key = iter.next();
989             iter.remove();
990             try {
991               if (key.isValid() && key.isWritable()) {
992                 doAsyncWrite(key);
993               }
994             } catch (IOException e) {
995               LOG.debug(getName() + ": asyncWrite", e);
996             }
997           }
998 
999           lastPurgeTime = purge(lastPurgeTime);
1000 
1001         } catch (OutOfMemoryError e) {
1002           if (errorHandler != null) {
1003             if (errorHandler.checkOOME(e)) {
1004               LOG.info(getName() + ": exiting on OutOfMemoryError");
1005               return;
1006             }
1007           } else {
1008             //
1009             // we can run out of memory if we have too many threads
1010             // log the event and sleep for a minute and give
1011             // some thread(s) a chance to finish
1012             //
1013             LOG.warn(getName() + ": OutOfMemoryError in server select", e);
1014             try {
1015               Thread.sleep(60000);
1016             } catch (InterruptedException ex) {
1017               LOG.debug("Interrupted while sleeping");
1018               return;
1019             }
1020           }
1021         } catch (Exception e) {
1022           LOG.warn(getName() + ": exception in Responder " +
1023               StringUtils.stringifyException(e), e);
1024         }
1025       }
1026       LOG.info(getName() + ": stopped");
1027     }
1028 
1029     /**
1030      * If there were some calls that have not been sent out for a
1031      * long time, we close the connection.
1032      * @return the time of the purge.
1033      */
1034     private long purge(long lastPurgeTime) {
1035       long now = System.currentTimeMillis();
1036       if (now < lastPurgeTime + purgeTimeout) {
1037         return lastPurgeTime;
1038       }
1039 
1040       ArrayList<Connection> conWithOldCalls = new ArrayList<Connection>();
1041       // get the list of channels from list of keys.
1042       synchronized (writeSelector.keys()) {
1043         for (SelectionKey key : writeSelector.keys()) {
1044           Connection connection = (Connection) key.attachment();
1045           if (connection == null) {
1046             throw new IllegalStateException("Coding error: SelectionKey key without attachment.");
1047           }
1048           Call call = connection.responseQueue.peekFirst();
1049           if (call != null && now > call.timestamp + purgeTimeout) {
1050             conWithOldCalls.add(call.connection);
1051           }
1052         }
1053       }
1054 
1055       // Seems safer to close the connection outside of the synchronized loop...
1056       for (Connection connection : conWithOldCalls) {
1057         closeConnection(connection);
1058       }
1059 
1060       return now;
1061     }
1062 
1063     private void doAsyncWrite(SelectionKey key) throws IOException {
1064       Connection connection = (Connection) key.attachment();
1065       if (connection == null) {
1066         throw new IOException("doAsyncWrite: no connection");
1067       }
1068       if (key.channel() != connection.channel) {
1069         throw new IOException("doAsyncWrite: bad channel");
1070       }
1071 
1072       if (processAllResponses(connection)) {
1073         try {
1074           // We wrote everything, so we don't need to be told when the socket is ready for
1075           //  write anymore.
1076          key.interestOps(0);
1077         } catch (CancelledKeyException e) {
1078           /* The Listener/reader might have closed the socket.
1079            * We don't explicitly cancel the key, so not sure if this will
1080            * ever fire.
1081            * This warning could be removed.
1082            */
1083           LOG.warn("Exception while changing ops : " + e);
1084         }
1085       }
1086     }
1087 
1088     /**
1089      * Process the response for this call. You need to have the lock on
1090      * {@link org.apache.hadoop.hbase.ipc.RpcServer.Connection#responseWriteLock}
1091      *
1092      * @param call the call
1093      * @return true if we proceed the call fully, false otherwise.
1094      * @throws IOException
1095      */
1096     private boolean processResponse(final Call call) throws IOException {
1097       boolean error = true;
1098       try {
1099         // Send as much data as we can in the non-blocking fashion
1100         long numBytes = channelWrite(call.connection.channel, call.response);
1101         if (numBytes < 0) {
1102           throw new HBaseIOException("Error writing on the socket " +
1103             "for the call:" + call.toShortString());
1104         }
1105         error = false;
1106       } finally {
1107         if (error) {
1108           LOG.debug(getName() + call.toShortString() + ": output error -- closing");
1109           closeConnection(call.connection);
1110         }
1111       }
1112 
1113       if (!call.response.hasRemaining()) {
1114         call.done();
1115         return true;
1116       } else {
1117         return false; // Socket can't take more, we will have to come back.
1118       }
1119     }
1120 
1121     /**
1122      * Process all the responses for this connection
1123      *
1124      * @return true if all the calls were processed or that someone else is doing it.
1125      * false if there * is still some work to do. In this case, we expect the caller to
1126      * delay us.
1127      * @throws IOException
1128      */
1129     private boolean processAllResponses(final Connection connection) throws IOException {
1130       // We want only one writer on the channel for a connection at a time.
1131       connection.responseWriteLock.lock();
1132       try {
1133         for (int i = 0; i < 20; i++) {
1134           // protection if some handlers manage to need all the responder
1135           Call call = connection.responseQueue.pollFirst();
1136           if (call == null) {
1137             return true;
1138           }
1139           if (!processResponse(call)) {
1140             connection.responseQueue.addFirst(call);
1141             return false;
1142           }
1143         }
1144       } finally {
1145         connection.responseWriteLock.unlock();
1146       }
1147 
1148       return connection.responseQueue.isEmpty();
1149     }
1150 
1151     //
1152     // Enqueue a response from the application.
1153     //
1154     void doRespond(Call call) throws IOException {
1155       boolean added = false;
1156 
1157       // If there is already a write in progress, we don't wait. This allows to free the handlers
1158       //  immediately for other tasks.
1159       if (call.connection.responseQueue.isEmpty() && call.connection.responseWriteLock.tryLock()) {
1160         try {
1161           if (call.connection.responseQueue.isEmpty()) {
1162             // If we're alone, we can try to do a direct call to the socket. It's
1163             //  an optimisation to save on context switches and data transfer between cores..
1164             if (processResponse(call)) {
1165               return; // we're done.
1166             }
1167             // Too big to fit, putting ahead.
1168             call.connection.responseQueue.addFirst(call);
1169             added = true; // We will register to the selector later, outside of the lock.
1170           }
1171         } finally {
1172           call.connection.responseWriteLock.unlock();
1173         }
1174       }
1175 
1176       if (!added) {
1177         call.connection.responseQueue.addLast(call);
1178       }
1179       call.responder.registerForWrite(call.connection);
1180 
1181       // set the serve time when the response has to be sent later
1182       call.timestamp = System.currentTimeMillis();
1183     }
1184   }
1185 
1186   @SuppressWarnings("serial")
1187   public static class CallQueueTooBigException extends IOException {
1188     CallQueueTooBigException() {
1189       super();
1190     }
1191   }
1192 
1193   /** Reads calls from a connection and queues them for handling. */
1194   @edu.umd.cs.findbugs.annotations.SuppressWarnings(
1195       value="VO_VOLATILE_INCREMENT",
1196       justification="False positive according to http://sourceforge.net/p/findbugs/bugs/1032/")
1197   public class Connection {
1198     // If initial preamble with version and magic has been read or not.
1199     private boolean connectionPreambleRead = false;
1200     // If the connection header has been read or not.
1201     private boolean connectionHeaderRead = false;
1202     protected SocketChannel channel;
1203     private ByteBuffer data;
1204     private ByteBuffer dataLengthBuffer;
1205     protected final ConcurrentLinkedDeque<Call> responseQueue = new ConcurrentLinkedDeque<Call>();
1206     private final Lock responseWriteLock = new ReentrantLock();
1207     private Counter rpcCount = new Counter(); // number of outstanding rpcs
1208     private long lastContact;
1209     private InetAddress addr;
1210     protected Socket socket;
1211     // Cache the remote host & port info so that even if the socket is
1212     // disconnected, we can say where it used to connect to.
1213     protected String hostAddress;
1214     protected int remotePort;
1215     ConnectionHeader connectionHeader;
1216     /**
1217      * Codec the client asked use.
1218      */
1219     private Codec codec;
1220     /**
1221      * Compression codec the client asked us use.
1222      */
1223     private CompressionCodec compressionCodec;
1224     BlockingService service;
1225     protected UserGroupInformation user = null;
1226     private AuthMethod authMethod;
1227     private boolean saslContextEstablished;
1228     private boolean skipInitialSaslHandshake;
1229     private ByteBuffer unwrappedData;
1230     // When is this set?  FindBugs wants to know!  Says NP
1231     private ByteBuffer unwrappedDataLengthBuffer = ByteBuffer.allocate(4);
1232     boolean useSasl;
1233     SaslServer saslServer;
1234     private boolean useWrap = false;
1235     // Fake 'call' for failed authorization response
1236     private static final int AUTHORIZATION_FAILED_CALLID = -1;
1237     private final Call authFailedCall =
1238       new Call(AUTHORIZATION_FAILED_CALLID, null, null, null, null, null, this, null, 0, null,
1239         null);
1240     private ByteArrayOutputStream authFailedResponse =
1241         new ByteArrayOutputStream();
1242     // Fake 'call' for SASL context setup
1243     private static final int SASL_CALLID = -33;
1244     private final Call saslCall =
1245       new Call(SASL_CALLID, this.service, null, null, null, null, this, null, 0, null, null);
1246 
1247     public UserGroupInformation attemptingUser = null; // user name before auth
1248 
1249     public Connection(SocketChannel channel, long lastContact) {
1250       this.channel = channel;
1251       this.lastContact = lastContact;
1252       this.data = null;
1253       this.dataLengthBuffer = ByteBuffer.allocate(4);
1254       this.socket = channel.socket();
1255       this.addr = socket.getInetAddress();
1256       if (addr == null) {
1257         this.hostAddress = "*Unknown*";
1258       } else {
1259         this.hostAddress = addr.getHostAddress();
1260       }
1261       this.remotePort = socket.getPort();
1262       if (socketSendBufferSize != 0) {
1263         try {
1264           socket.setSendBufferSize(socketSendBufferSize);
1265         } catch (IOException e) {
1266           LOG.warn("Connection: unable to set socket send buffer size to " +
1267                    socketSendBufferSize);
1268         }
1269       }
1270     }
1271 
1272       @Override
1273     public String toString() {
1274       return getHostAddress() + ":" + remotePort;
1275     }
1276 
1277     public String getHostAddress() {
1278       return hostAddress;
1279     }
1280 
1281     public InetAddress getHostInetAddress() {
1282       return addr;
1283     }
1284 
1285     public int getRemotePort() {
1286       return remotePort;
1287     }
1288 
1289     public void setLastContact(long lastContact) {
1290       this.lastContact = lastContact;
1291     }
1292 
1293     public VersionInfo getVersionInfo() {
1294       if (connectionHeader.hasVersionInfo()) {
1295         return connectionHeader.getVersionInfo();
1296       }
1297       return null;
1298     }
1299 
1300     /* Return true if the connection has no outstanding rpc */
1301     private boolean isIdle() {
1302       return rpcCount.get() == 0;
1303     }
1304 
1305     /* Decrement the outstanding RPC count */
1306     protected void decRpcCount() {
1307       rpcCount.decrement();
1308     }
1309 
1310     /* Increment the outstanding RPC count */
1311     protected void incRpcCount() {
1312       rpcCount.increment();
1313     }
1314 
1315     protected boolean timedOut(long currentTime) {
1316       return isIdle() && currentTime - lastContact > maxIdleTime;
1317     }
1318 
1319     private UserGroupInformation getAuthorizedUgi(String authorizedId)
1320         throws IOException {
1321       if (authMethod == AuthMethod.DIGEST) {
1322         TokenIdentifier tokenId = HBaseSaslRpcServer.getIdentifier(authorizedId,
1323             secretManager);
1324         UserGroupInformation ugi = tokenId.getUser();
1325         if (ugi == null) {
1326           throw new AccessDeniedException(
1327               "Can't retrieve username from tokenIdentifier.");
1328         }
1329         ugi.addTokenIdentifier(tokenId);
1330         return ugi;
1331       } else {
1332         return UserGroupInformation.createRemoteUser(authorizedId);
1333       }
1334     }
1335 
1336     private void saslReadAndProcess(byte[] saslToken) throws IOException,
1337         InterruptedException {
1338       if (saslContextEstablished) {
1339         if (LOG.isTraceEnabled())
1340           LOG.trace("Have read input token of size " + saslToken.length
1341               + " for processing by saslServer.unwrap()");
1342 
1343         if (!useWrap) {
1344           processOneRpc(saslToken);
1345         } else {
1346           byte [] plaintextData = saslServer.unwrap(saslToken, 0, saslToken.length);
1347           processUnwrappedData(plaintextData);
1348         }
1349       } else {
1350         byte[] replyToken;
1351         try {
1352           if (saslServer == null) {
1353             switch (authMethod) {
1354             case DIGEST:
1355               if (secretManager == null) {
1356                 throw new AccessDeniedException(
1357                     "Server is not configured to do DIGEST authentication.");
1358               }
1359               saslServer = Sasl.createSaslServer(AuthMethod.DIGEST
1360                   .getMechanismName(), null, SaslUtil.SASL_DEFAULT_REALM,
1361                   SaslUtil.SASL_PROPS, new SaslDigestCallbackHandler(
1362                       secretManager, this));
1363               break;
1364             default:
1365               UserGroupInformation current = UserGroupInformation.getCurrentUser();
1366               String fullName = current.getUserName();
1367               if (LOG.isDebugEnabled()) {
1368                 LOG.debug("Kerberos principal name is " + fullName);
1369               }
1370               final String names[] = SaslUtil.splitKerberosName(fullName);
1371               if (names.length != 3) {
1372                 throw new AccessDeniedException(
1373                     "Kerberos principal name does NOT have the expected "
1374                         + "hostname part: " + fullName);
1375               }
1376               current.doAs(new PrivilegedExceptionAction<Object>() {
1377                 @Override
1378                 public Object run() throws SaslException {
1379                   saslServer = Sasl.createSaslServer(AuthMethod.KERBEROS
1380                       .getMechanismName(), names[0], names[1],
1381                       SaslUtil.SASL_PROPS, new SaslGssCallbackHandler());
1382                   return null;
1383                 }
1384               });
1385             }
1386             if (saslServer == null)
1387               throw new AccessDeniedException(
1388                   "Unable to find SASL server implementation for "
1389                       + authMethod.getMechanismName());
1390             if (LOG.isDebugEnabled()) {
1391               LOG.debug("Created SASL server with mechanism = " + authMethod.getMechanismName());
1392             }
1393           }
1394           if (LOG.isDebugEnabled()) {
1395             LOG.debug("Have read input token of size " + saslToken.length
1396                 + " for processing by saslServer.evaluateResponse()");
1397           }
1398           replyToken = saslServer.evaluateResponse(saslToken);
1399         } catch (IOException e) {
1400           IOException sendToClient = e;
1401           Throwable cause = e;
1402           while (cause != null) {
1403             if (cause instanceof InvalidToken) {
1404               sendToClient = (InvalidToken) cause;
1405               break;
1406             }
1407             cause = cause.getCause();
1408           }
1409           doRawSaslReply(SaslStatus.ERROR, null, sendToClient.getClass().getName(),
1410             sendToClient.getLocalizedMessage());
1411           metrics.authenticationFailure();
1412           String clientIP = this.toString();
1413           // attempting user could be null
1414           AUDITLOG.warn(AUTH_FAILED_FOR + clientIP + ":" + attemptingUser);
1415           throw e;
1416         }
1417         if (replyToken != null) {
1418           if (LOG.isDebugEnabled()) {
1419             LOG.debug("Will send token of size " + replyToken.length
1420                 + " from saslServer.");
1421           }
1422           doRawSaslReply(SaslStatus.SUCCESS, new BytesWritable(replyToken), null,
1423               null);
1424         }
1425         if (saslServer.isComplete()) {
1426           String qop = (String) saslServer.getNegotiatedProperty(Sasl.QOP);
1427           useWrap = qop != null && !"auth".equalsIgnoreCase(qop);
1428           user = getAuthorizedUgi(saslServer.getAuthorizationID());
1429           if (LOG.isDebugEnabled()) {
1430             LOG.debug("SASL server context established. Authenticated client: "
1431               + user + ". Negotiated QoP is "
1432               + saslServer.getNegotiatedProperty(Sasl.QOP));
1433           }
1434           metrics.authenticationSuccess();
1435           AUDITLOG.info(AUTH_SUCCESSFUL_FOR + user);
1436           saslContextEstablished = true;
1437         }
1438       }
1439     }
1440 
1441     /**
1442      * No protobuf encoding of raw sasl messages
1443      */
1444     private void doRawSaslReply(SaslStatus status, Writable rv,
1445         String errorClass, String error) throws IOException {
1446       ByteBufferOutputStream saslResponse = null;
1447       DataOutputStream out = null;
1448       try {
1449         // In my testing, have noticed that sasl messages are usually
1450         // in the ballpark of 100-200. That's why the initial capacity is 256.
1451         saslResponse = new ByteBufferOutputStream(256);
1452         out = new DataOutputStream(saslResponse);
1453         out.writeInt(status.state); // write status
1454         if (status == SaslStatus.SUCCESS) {
1455           rv.write(out);
1456         } else {
1457           WritableUtils.writeString(out, errorClass);
1458           WritableUtils.writeString(out, error);
1459         }
1460         saslCall.setSaslTokenResponse(saslResponse.getByteBuffer());
1461         saslCall.responder = responder;
1462         saslCall.sendResponseIfReady();
1463       } finally {
1464         if (saslResponse != null) {
1465           saslResponse.close();
1466         }
1467         if (out != null) {
1468           out.close();
1469         }
1470       }
1471     }
1472 
1473     private void disposeSasl() {
1474       if (saslServer != null) {
1475         try {
1476           saslServer.dispose();
1477           saslServer = null;
1478         } catch (SaslException ignored) {
1479           // Ignored. This is being disposed of anyway.
1480         }
1481       }
1482     }
1483 
1484     private int readPreamble() throws IOException {
1485       int count;
1486       // Check for 'HBas' magic.
1487       this.dataLengthBuffer.flip();
1488       if (!Arrays.equals(HConstants.RPC_HEADER, dataLengthBuffer.array())) {
1489         return doBadPreambleHandling("Expected HEADER=" +
1490             Bytes.toStringBinary(HConstants.RPC_HEADER) +
1491             " but received HEADER=" + Bytes.toStringBinary(dataLengthBuffer.array()) +
1492             " from " + toString());
1493       }
1494       // Now read the next two bytes, the version and the auth to use.
1495       ByteBuffer versionAndAuthBytes = ByteBuffer.allocate(2);
1496       count = channelRead(channel, versionAndAuthBytes);
1497       if (count < 0 || versionAndAuthBytes.remaining() > 0) {
1498         return count;
1499       }
1500       int version = versionAndAuthBytes.get(0);
1501       byte authbyte = versionAndAuthBytes.get(1);
1502       this.authMethod = AuthMethod.valueOf(authbyte);
1503       if (version != CURRENT_VERSION) {
1504         String msg = getFatalConnectionString(version, authbyte);
1505         return doBadPreambleHandling(msg, new WrongVersionException(msg));
1506       }
1507       if (authMethod == null) {
1508         String msg = getFatalConnectionString(version, authbyte);
1509         return doBadPreambleHandling(msg, new BadAuthException(msg));
1510       }
1511       if (isSecurityEnabled && authMethod == AuthMethod.SIMPLE) {
1512         AccessDeniedException ae = new AccessDeniedException("Authentication is required");
1513         setupResponse(authFailedResponse, authFailedCall, ae, ae.getMessage());
1514         responder.doRespond(authFailedCall);
1515         throw ae;
1516       }
1517       if (!isSecurityEnabled && authMethod != AuthMethod.SIMPLE) {
1518         doRawSaslReply(SaslStatus.SUCCESS, new IntWritable(
1519             SaslUtil.SWITCH_TO_SIMPLE_AUTH), null, null);
1520         authMethod = AuthMethod.SIMPLE;
1521         // client has already sent the initial Sasl message and we
1522         // should ignore it. Both client and server should fall back
1523         // to simple auth from now on.
1524         skipInitialSaslHandshake = true;
1525       }
1526       if (authMethod != AuthMethod.SIMPLE) {
1527         useSasl = true;
1528       }
1529 
1530       dataLengthBuffer.clear();
1531       connectionPreambleRead = true;
1532       return count;
1533     }
1534 
1535     private int read4Bytes() throws IOException {
1536       if (this.dataLengthBuffer.remaining() > 0) {
1537         return channelRead(channel, this.dataLengthBuffer);
1538       } else {
1539         return 0;
1540       }
1541     }
1542 
1543 
1544     /**
1545      * Read off the wire. If there is not enough data to read, update the connection state with
1546      *  what we have and returns.
1547      * @return Returns -1 if failure (and caller will close connection), else zero or more.
1548      * @throws IOException
1549      * @throws InterruptedException
1550      */
1551     public int readAndProcess() throws IOException, InterruptedException {
1552       // Try and read in an int.  If new connection, the int will hold the 'HBas' HEADER.  If it
1553       // does, read in the rest of the connection preamble, the version and the auth method.
1554       // Else it will be length of the data to read (or -1 if a ping).  We catch the integer
1555       // length into the 4-byte this.dataLengthBuffer.
1556       int count = read4Bytes();
1557       if (count < 0 || dataLengthBuffer.remaining() > 0) {
1558         return count;
1559       }
1560 
1561       // If we have not read the connection setup preamble, look to see if that is on the wire.
1562       if (!connectionPreambleRead) {
1563         count = readPreamble();
1564         if (!connectionPreambleRead) {
1565           return count;
1566         }
1567 
1568         count = read4Bytes();
1569         if (count < 0 || dataLengthBuffer.remaining() > 0) {
1570           return count;
1571         }
1572       }
1573 
1574       // We have read a length and we have read the preamble.  It is either the connection header
1575       // or it is a request.
1576       if (data == null) {
1577         dataLengthBuffer.flip();
1578         int dataLength = dataLengthBuffer.getInt();
1579         if (dataLength == RpcClient.PING_CALL_ID) {
1580           if (!useWrap) { //covers the !useSasl too
1581             dataLengthBuffer.clear();
1582             return 0;  //ping message
1583           }
1584         }
1585         if (dataLength < 0) { // A data length of zero is legal.
1586           throw new IllegalArgumentException("Unexpected data length "
1587               + dataLength + "!! from " + getHostAddress());
1588         }
1589         data = ByteBuffer.allocate(dataLength);
1590 
1591         // Increment the rpc count. This counter will be decreased when we write
1592         //  the response.  If we want the connection to be detected as idle properly, we
1593         //  need to keep the inc / dec correct.
1594         incRpcCount();
1595       }
1596 
1597       count = channelRead(channel, data);
1598 
1599       if (count >= 0 && data.remaining() == 0) { // count==0 if dataLength == 0
1600         process();
1601       }
1602 
1603       return count;
1604     }
1605 
1606     /**
1607      * Process the data buffer and clean the connection state for the next call.
1608      */
1609     private void process() throws IOException, InterruptedException {
1610       data.flip();
1611       try {
1612         if (skipInitialSaslHandshake) {
1613           skipInitialSaslHandshake = false;
1614           return;
1615         }
1616 
1617         if (useSasl) {
1618           saslReadAndProcess(data.array());
1619         } else {
1620           processOneRpc(data.array());
1621         }
1622 
1623       } finally {
1624         dataLengthBuffer.clear(); // Clean for the next call
1625         data = null; // For the GC
1626       }
1627     }
1628 
1629     private String getFatalConnectionString(final int version, final byte authByte) {
1630       return "serverVersion=" + CURRENT_VERSION +
1631       ", clientVersion=" + version + ", authMethod=" + authByte +
1632       ", authSupported=" + (authMethod != null) + " from " + toString();
1633     }
1634 
1635     private int doBadPreambleHandling(final String msg) throws IOException {
1636       return doBadPreambleHandling(msg, new FatalConnectionException(msg));
1637     }
1638 
1639     private int doBadPreambleHandling(final String msg, final Exception e) throws IOException {
1640       LOG.warn(msg);
1641       Call fakeCall = new Call(-1, null, null, null, null, null, this, responder, -1, null, null);
1642       setupResponse(null, fakeCall, e, msg);
1643       responder.doRespond(fakeCall);
1644       // Returning -1 closes out the connection.
1645       return -1;
1646     }
1647 
1648     // Reads the connection header following version
1649     private void processConnectionHeader(byte[] buf) throws IOException {
1650       this.connectionHeader = ConnectionHeader.parseFrom(buf);
1651       String serviceName = connectionHeader.getServiceName();
1652       if (serviceName == null) throw new EmptyServiceNameException();
1653       this.service = getService(services, serviceName);
1654       if (this.service == null) throw new UnknownServiceException(serviceName);
1655       setupCellBlockCodecs(this.connectionHeader);
1656       UserGroupInformation protocolUser = createUser(connectionHeader);
1657       if (!useSasl) {
1658         user = protocolUser;
1659         if (user != null) {
1660           user.setAuthenticationMethod(AuthMethod.SIMPLE.authenticationMethod);
1661         }
1662       } else {
1663         // user is authenticated
1664         user.setAuthenticationMethod(authMethod.authenticationMethod);
1665         //Now we check if this is a proxy user case. If the protocol user is
1666         //different from the 'user', it is a proxy user scenario. However,
1667         //this is not allowed if user authenticated with DIGEST.
1668         if ((protocolUser != null)
1669             && (!protocolUser.getUserName().equals(user.getUserName()))) {
1670           if (authMethod == AuthMethod.DIGEST) {
1671             // Not allowed to doAs if token authentication is used
1672             throw new AccessDeniedException("Authenticated user (" + user
1673                 + ") doesn't match what the client claims to be ("
1674                 + protocolUser + ")");
1675           } else {
1676             // Effective user can be different from authenticated user
1677             // for simple auth or kerberos auth
1678             // The user is the real user. Now we create a proxy user
1679             UserGroupInformation realUser = user;
1680             user = UserGroupInformation.createProxyUser(protocolUser
1681                 .getUserName(), realUser);
1682             // Now the user is a proxy user, set Authentication method Proxy.
1683             user.setAuthenticationMethod(AuthenticationMethod.PROXY);
1684           }
1685         }
1686       }
1687       if (connectionHeader.hasVersionInfo()) {
1688         AUDITLOG.info("Connection from " + this.hostAddress + " port: " + this.remotePort
1689             + " with version info: "
1690             + TextFormat.shortDebugString(connectionHeader.getVersionInfo()));
1691       } else {
1692         AUDITLOG.info("Connection from " + this.hostAddress + " port: " + this.remotePort
1693             + " with unknown version info");
1694       }
1695     }
1696 
1697     /**
1698      * Set up cell block codecs
1699      * @throws FatalConnectionException
1700      */
1701     private void setupCellBlockCodecs(final ConnectionHeader header)
1702     throws FatalConnectionException {
1703       // TODO: Plug in other supported decoders.
1704       if (!header.hasCellBlockCodecClass()) return;
1705       String className = header.getCellBlockCodecClass();
1706       if (className == null || className.length() == 0) return;
1707       try {
1708         this.codec = (Codec)Class.forName(className).newInstance();
1709       } catch (Exception e) {
1710         throw new UnsupportedCellCodecException(className, e);
1711       }
1712       if (!header.hasCellBlockCompressorClass()) return;
1713       className = header.getCellBlockCompressorClass();
1714       try {
1715         this.compressionCodec = (CompressionCodec)Class.forName(className).newInstance();
1716       } catch (Exception e) {
1717         throw new UnsupportedCompressionCodecException(className, e);
1718       }
1719     }
1720 
1721     private void processUnwrappedData(byte[] inBuf) throws IOException,
1722     InterruptedException {
1723       ReadableByteChannel ch = Channels.newChannel(new ByteArrayInputStream(inBuf));
1724       // Read all RPCs contained in the inBuf, even partial ones
1725       while (true) {
1726         int count;
1727         if (unwrappedDataLengthBuffer.remaining() > 0) {
1728           count = channelRead(ch, unwrappedDataLengthBuffer);
1729           if (count <= 0 || unwrappedDataLengthBuffer.remaining() > 0)
1730             return;
1731         }
1732 
1733         if (unwrappedData == null) {
1734           unwrappedDataLengthBuffer.flip();
1735           int unwrappedDataLength = unwrappedDataLengthBuffer.getInt();
1736 
1737           if (unwrappedDataLength == RpcClient.PING_CALL_ID) {
1738             if (LOG.isDebugEnabled())
1739               LOG.debug("Received ping message");
1740             unwrappedDataLengthBuffer.clear();
1741             continue; // ping message
1742           }
1743           unwrappedData = ByteBuffer.allocate(unwrappedDataLength);
1744         }
1745 
1746         count = channelRead(ch, unwrappedData);
1747         if (count <= 0 || unwrappedData.remaining() > 0)
1748           return;
1749 
1750         if (unwrappedData.remaining() == 0) {
1751           unwrappedDataLengthBuffer.clear();
1752           unwrappedData.flip();
1753           processOneRpc(unwrappedData.array());
1754           unwrappedData = null;
1755         }
1756       }
1757     }
1758 
1759     private void processOneRpc(byte[] buf) throws IOException, InterruptedException {
1760       if (connectionHeaderRead) {
1761         processRequest(buf);
1762       } else {
1763         processConnectionHeader(buf);
1764         this.connectionHeaderRead = true;
1765         if (!authorizeConnection()) {
1766           // Throw FatalConnectionException wrapping ACE so client does right thing and closes
1767           // down the connection instead of trying to read non-existent retun.
1768           throw new AccessDeniedException("Connection from " + this + " for service " +
1769             connectionHeader.getServiceName() + " is unauthorized for user: " + user);
1770         }
1771       }
1772     }
1773 
1774     /**
1775      * @param buf Has the request header and the request param and optionally encoded data buffer
1776      * all in this one array.
1777      * @throws IOException
1778      * @throws InterruptedException
1779      */
1780     protected void processRequest(byte[] buf) throws IOException, InterruptedException {
1781       long totalRequestSize = buf.length;
1782       int offset = 0;
1783       // Here we read in the header.  We avoid having pb
1784       // do its default 4k allocation for CodedInputStream.  We force it to use backing array.
1785       CodedInputStream cis = CodedInputStream.newInstance(buf, offset, buf.length);
1786       int headerSize = cis.readRawVarint32();
1787       offset = cis.getTotalBytesRead();
1788       Message.Builder builder = RequestHeader.newBuilder();
1789       ProtobufUtil.mergeFrom(builder, buf, offset, headerSize);
1790       RequestHeader header = (RequestHeader) builder.build();
1791       offset += headerSize;
1792       int id = header.getCallId();
1793       if (LOG.isTraceEnabled()) {
1794         LOG.trace("RequestHeader " + TextFormat.shortDebugString(header) +
1795           " totalRequestSize: " + totalRequestSize + " bytes");
1796       }
1797       // Enforcing the call queue size, this triggers a retry in the client
1798       // This is a bit late to be doing this check - we have already read in the total request.
1799       if ((totalRequestSize + callQueueSize.get()) > maxQueueSize) {
1800         final Call callTooBig =
1801           new Call(id, this.service, null, null, null, null, this,
1802             responder, totalRequestSize, null, null);
1803         ByteArrayOutputStream responseBuffer = new ByteArrayOutputStream();
1804         metrics.exception(CALL_QUEUE_TOO_BIG_EXCEPTION);
1805         InetSocketAddress address = getListenerAddress();
1806         setupResponse(responseBuffer, callTooBig, CALL_QUEUE_TOO_BIG_EXCEPTION,
1807             "Call queue is full on " + (address != null ? address : "(channel closed)") +
1808                 ", is hbase.ipc.server.max.callqueue.size too small?");
1809         responder.doRespond(callTooBig);
1810         return;
1811       }
1812       MethodDescriptor md = null;
1813       Message param = null;
1814       CellScanner cellScanner = null;
1815       try {
1816         if (header.hasRequestParam() && header.getRequestParam()) {
1817           md = this.service.getDescriptorForType().findMethodByName(header.getMethodName());
1818           if (md == null) throw new UnsupportedOperationException(header.getMethodName());
1819           builder = this.service.getRequestPrototype(md).newBuilderForType();
1820           // To read the varint, I need an inputstream; might as well be a CIS.
1821           cis = CodedInputStream.newInstance(buf, offset, buf.length);
1822           int paramSize = cis.readRawVarint32();
1823           offset += cis.getTotalBytesRead();
1824           if (builder != null) {
1825             ProtobufUtil.mergeFrom(builder, buf, offset, paramSize);
1826             param = builder.build();
1827           }
1828           offset += paramSize;
1829         }
1830         if (header.hasCellBlockMeta()) {
1831           cellScanner = ipcUtil.createCellScanner(this.codec, this.compressionCodec,
1832             buf, offset, buf.length);
1833         }
1834       } catch (Throwable t) {
1835         InetSocketAddress address = getListenerAddress();
1836         String msg = (address != null ? address : "(channel closed)") +
1837             " is unable to read call parameter from client " + getHostAddress();
1838         LOG.warn(msg, t);
1839 
1840         metrics.exception(t);
1841 
1842         // probably the hbase hadoop version does not match the running hadoop version
1843         if (t instanceof LinkageError) {
1844           t = new DoNotRetryIOException(t);
1845         }
1846         // If the method is not present on the server, do not retry.
1847         if (t instanceof UnsupportedOperationException) {
1848           t = new DoNotRetryIOException(t);
1849         }
1850 
1851         final Call readParamsFailedCall =
1852           new Call(id, this.service, null, null, null, null, this,
1853             responder, totalRequestSize, null, null);
1854         ByteArrayOutputStream responseBuffer = new ByteArrayOutputStream();
1855         setupResponse(responseBuffer, readParamsFailedCall, t,
1856           msg + "; " + t.getMessage());
1857         responder.doRespond(readParamsFailedCall);
1858         return;
1859       }
1860 
1861       TraceInfo traceInfo = header.hasTraceInfo()
1862           ? new TraceInfo(header.getTraceInfo().getTraceId(), header.getTraceInfo().getParentId())
1863           : null;
1864       Call call = new Call(id, this.service, md, header, param, cellScanner, this, responder,
1865               totalRequestSize, traceInfo, RpcServer.getRemoteIp());
1866       scheduler.dispatch(new CallRunner(RpcServer.this, call));
1867     }
1868 
1869     private boolean authorizeConnection() throws IOException {
1870       try {
1871         // If auth method is DIGEST, the token was obtained by the
1872         // real user for the effective user, therefore not required to
1873         // authorize real user. doAs is allowed only for simple or kerberos
1874         // authentication
1875         if (user != null && user.getRealUser() != null
1876             && (authMethod != AuthMethod.DIGEST)) {
1877           ProxyUsers.authorize(user, this.getHostAddress(), conf);
1878         }
1879         authorize(user, connectionHeader, getHostInetAddress());
1880         metrics.authorizationSuccess();
1881       } catch (AuthorizationException ae) {
1882         if (LOG.isDebugEnabled()) {
1883           LOG.debug("Connection authorization failed: " + ae.getMessage(), ae);
1884         }
1885         metrics.authorizationFailure();
1886         setupResponse(authFailedResponse, authFailedCall,
1887           new AccessDeniedException(ae), ae.getMessage());
1888         responder.doRespond(authFailedCall);
1889         return false;
1890       }
1891       return true;
1892     }
1893 
1894     protected synchronized void close() {
1895       disposeSasl();
1896       data = null;
1897       if (!channel.isOpen())
1898         return;
1899       try {socket.shutdownOutput();} catch(Exception ignored) {} // FindBugs DE_MIGHT_IGNORE
1900       if (channel.isOpen()) {
1901         try {channel.close();} catch(Exception ignored) {}
1902       }
1903       try {socket.close();} catch(Exception ignored) {}
1904     }
1905 
1906     private UserGroupInformation createUser(ConnectionHeader head) {
1907       UserGroupInformation ugi = null;
1908 
1909       if (!head.hasUserInfo()) {
1910         return null;
1911       }
1912       UserInformation userInfoProto = head.getUserInfo();
1913       String effectiveUser = null;
1914       if (userInfoProto.hasEffectiveUser()) {
1915         effectiveUser = userInfoProto.getEffectiveUser();
1916       }
1917       String realUser = null;
1918       if (userInfoProto.hasRealUser()) {
1919         realUser = userInfoProto.getRealUser();
1920       }
1921       if (effectiveUser != null) {
1922         if (realUser != null) {
1923           UserGroupInformation realUserUgi =
1924               UserGroupInformation.createRemoteUser(realUser);
1925           ugi = UserGroupInformation.createProxyUser(effectiveUser, realUserUgi);
1926         } else {
1927           ugi = UserGroupInformation.createRemoteUser(effectiveUser);
1928         }
1929       }
1930       return ugi;
1931     }
1932   }
1933 
1934   /**
1935    * Datastructure for passing a {@link BlockingService} and its associated class of
1936    * protobuf service interface.  For example, a server that fielded what is defined
1937    * in the client protobuf service would pass in an implementation of the client blocking service
1938    * and then its ClientService.BlockingInterface.class.  Used checking connection setup.
1939    */
1940   public static class BlockingServiceAndInterface {
1941     private final BlockingService service;
1942     private final Class<?> serviceInterface;
1943     public BlockingServiceAndInterface(final BlockingService service,
1944         final Class<?> serviceInterface) {
1945       this.service = service;
1946       this.serviceInterface = serviceInterface;
1947     }
1948     public Class<?> getServiceInterface() {
1949       return this.serviceInterface;
1950     }
1951     public BlockingService getBlockingService() {
1952       return this.service;
1953     }
1954   }
1955 
1956   /**
1957    * Constructs a server listening on the named port and address.
1958    * @param server hosting instance of {@link Server}. We will do authentications if an
1959    * instance else pass null for no authentication check.
1960    * @param name Used keying this rpc servers' metrics and for naming the Listener thread.
1961    * @param services A list of services.
1962    * @param bindAddress Where to listen
1963    * @param conf
1964    * @param scheduler
1965    */
1966   public RpcServer(final Server server, final String name,
1967       final List<BlockingServiceAndInterface> services,
1968       final InetSocketAddress bindAddress, Configuration conf,
1969       RpcScheduler scheduler)
1970       throws IOException {
1971     this.reservoir = new BoundedByteBufferPool(
1972       conf.getInt("hbase.ipc.server.reservoir.max.buffer.size",  1024 * 1024),
1973       conf.getInt("hbase.ipc.server.reservoir.initial.buffer.size", 16 * 1024),
1974       // Make the max twice the number of handlers to be safe.
1975       conf.getInt("hbase.ipc.server.reservoir.initial.max",
1976         conf.getInt(HConstants.REGION_SERVER_HANDLER_COUNT,
1977           HConstants.DEFAULT_REGION_SERVER_HANDLER_COUNT) * 2));
1978     this.server = server;
1979     this.services = services;
1980     this.bindAddress = bindAddress;
1981     this.conf = conf;
1982     this.socketSendBufferSize = 0;
1983     this.maxQueueSize =
1984       this.conf.getInt("hbase.ipc.server.max.callqueue.size", DEFAULT_MAX_CALLQUEUE_SIZE);
1985     this.readThreads = conf.getInt("hbase.ipc.server.read.threadpool.size", 10);
1986     this.maxIdleTime = 2 * conf.getInt("hbase.ipc.client.connection.maxidletime", 1000);
1987     this.maxConnectionsToNuke = conf.getInt("hbase.ipc.client.kill.max", 10);
1988     this.thresholdIdleConnections = conf.getInt("hbase.ipc.client.idlethreshold", 4000);
1989     this.purgeTimeout = conf.getLong("hbase.ipc.client.call.purge.timeout",
1990       2 * HConstants.DEFAULT_HBASE_RPC_TIMEOUT);
1991     this.warnResponseTime = conf.getInt(WARN_RESPONSE_TIME, DEFAULT_WARN_RESPONSE_TIME);
1992     this.warnResponseSize = conf.getInt(WARN_RESPONSE_SIZE, DEFAULT_WARN_RESPONSE_SIZE);
1993 
1994     // Start the listener here and let it bind to the port
1995     listener = new Listener(name);
1996     this.port = listener.getAddress().getPort();
1997 
1998     this.metrics = new MetricsHBaseServer(name, new MetricsHBaseServerWrapperImpl(this));
1999     this.tcpNoDelay = conf.getBoolean("hbase.ipc.server.tcpnodelay", true);
2000     this.tcpKeepAlive = conf.getBoolean("hbase.ipc.server.tcpkeepalive", true);
2001 
2002     this.warnDelayedCalls = conf.getInt(WARN_DELAYED_CALLS, DEFAULT_WARN_DELAYED_CALLS);
2003     this.delayedCalls = new AtomicInteger(0);
2004     this.ipcUtil = new IPCUtil(conf);
2005 
2006 
2007     // Create the responder here
2008     responder = new Responder();
2009     this.authorize = conf.getBoolean(HADOOP_SECURITY_AUTHORIZATION, false);
2010     this.userProvider = UserProvider.instantiate(conf);
2011     this.isSecurityEnabled = userProvider.isHBaseSecurityEnabled();
2012     if (isSecurityEnabled) {
2013       HBaseSaslRpcServer.init(conf);
2014     }
2015     this.scheduler = scheduler;
2016     this.scheduler.init(new RpcSchedulerContext(this));
2017   }
2018 
2019   /**
2020    * Subclasses of HBaseServer can override this to provide their own
2021    * Connection implementations.
2022    */
2023   protected Connection getConnection(SocketChannel channel, long time) {
2024     return new Connection(channel, time);
2025   }
2026 
2027   /**
2028    * Setup response for the RPC Call.
2029    *
2030    * @param response buffer to serialize the response into
2031    * @param call {@link Call} to which we are setting up the response
2032    * @param error error message, if the call failed
2033    * @throws IOException
2034    */
2035   private void setupResponse(ByteArrayOutputStream response, Call call, Throwable t, String error)
2036   throws IOException {
2037     if (response != null) response.reset();
2038     call.setResponse(null, null, t, error);
2039   }
2040 
2041   protected void closeConnection(Connection connection) {
2042     synchronized (connectionList) {
2043       if (connectionList.remove(connection)) {
2044         numConnections--;
2045       }
2046     }
2047     connection.close();
2048   }
2049 
2050   Configuration getConf() {
2051     return conf;
2052   }
2053 
2054   /** Sets the socket buffer size used for responding to RPCs.
2055    * @param size send size
2056    */
2057   @Override
2058   public void setSocketSendBufSize(int size) { this.socketSendBufferSize = size; }
2059 
2060   @Override
2061   public boolean isStarted() {
2062     return this.started;
2063   }
2064 
2065   /** Starts the service.  Must be called before any calls will be handled. */
2066   @Override
2067   public synchronized void start() {
2068     if (started) return;
2069     authTokenSecretMgr = createSecretManager();
2070     if (authTokenSecretMgr != null) {
2071       setSecretManager(authTokenSecretMgr);
2072       authTokenSecretMgr.start();
2073     }
2074     this.authManager = new ServiceAuthorizationManager();
2075     HBasePolicyProvider.init(conf, authManager);
2076     responder.start();
2077     listener.start();
2078     scheduler.start();
2079     started = true;
2080   }
2081 
2082   @Override
2083   public void refreshAuthManager(PolicyProvider pp) {
2084     // Ignore warnings that this should be accessed in a static way instead of via an instance;
2085     // it'll break if you go via static route.
2086     this.authManager.refresh(this.conf, pp);
2087   }
2088 
2089   private AuthenticationTokenSecretManager createSecretManager() {
2090     if (!isSecurityEnabled) return null;
2091     if (server == null) return null;
2092     Configuration conf = server.getConfiguration();
2093     long keyUpdateInterval =
2094         conf.getLong("hbase.auth.key.update.interval", 24*60*60*1000);
2095     long maxAge =
2096         conf.getLong("hbase.auth.token.max.lifetime", 7*24*60*60*1000);
2097     return new AuthenticationTokenSecretManager(conf, server.getZooKeeper(),
2098         server.getServerName().toString(), keyUpdateInterval, maxAge);
2099   }
2100 
2101   public SecretManager<? extends TokenIdentifier> getSecretManager() {
2102     return this.secretManager;
2103   }
2104 
2105   @SuppressWarnings("unchecked")
2106   public void setSecretManager(SecretManager<? extends TokenIdentifier> secretManager) {
2107     this.secretManager = (SecretManager<TokenIdentifier>) secretManager;
2108   }
2109 
2110   /**
2111    * This is a server side method, which is invoked over RPC. On success
2112    * the return response has protobuf response payload. On failure, the
2113    * exception name and the stack trace are returned in the protobuf response.
2114    */
2115   @Override
2116   public Pair<Message, CellScanner> call(BlockingService service, MethodDescriptor md,
2117       Message param, CellScanner cellScanner, long receiveTime, MonitoredRPCHandler status)
2118   throws IOException {
2119     try {
2120       status.setRPC(md.getName(), new Object[]{param}, receiveTime);
2121       // TODO: Review after we add in encoded data blocks.
2122       status.setRPCPacket(param);
2123       status.resume("Servicing call");
2124       //get an instance of the method arg type
2125       long startTime = System.currentTimeMillis();
2126       PayloadCarryingRpcController controller = new PayloadCarryingRpcController(cellScanner);
2127       Message result = service.callBlockingMethod(md, controller, param);
2128       long endTime = System.currentTimeMillis();
2129       int processingTime = (int) (endTime - startTime);
2130       int qTime = (int) (startTime - receiveTime);
2131       int totalTime = (int) (endTime - receiveTime);
2132       if (LOG.isTraceEnabled()) {
2133         LOG.trace(CurCall.get().toString() +
2134             ", response " + TextFormat.shortDebugString(result) +
2135             " queueTime: " + qTime +
2136             " processingTime: " + processingTime +
2137             " totalTime: " + totalTime);
2138       }
2139       long requestSize = param.getSerializedSize();
2140       long responseSize = result.getSerializedSize();
2141       metrics.dequeuedCall(qTime);
2142       metrics.processedCall(processingTime);
2143       metrics.totalCall(totalTime);
2144       metrics.receivedRequest(requestSize);
2145       metrics.sentResponse(responseSize);
2146       // log any RPC responses that are slower than the configured warn
2147       // response time or larger than configured warning size
2148       boolean tooSlow = (processingTime > warnResponseTime && warnResponseTime > -1);
2149       boolean tooLarge = (responseSize > warnResponseSize && warnResponseSize > -1);
2150       if (tooSlow || tooLarge) {
2151         // when tagging, we let TooLarge trump TooSmall to keep output simple
2152         // note that large responses will often also be slow.
2153         logResponse(new Object[]{param},
2154             md.getName(), md.getName() + "(" + param.getClass().getName() + ")",
2155             (tooLarge ? "TooLarge" : "TooSlow"),
2156             status.getClient(), startTime, processingTime, qTime,
2157             responseSize);
2158       }
2159       return new Pair<Message, CellScanner>(result, controller.cellScanner());
2160     } catch (Throwable e) {
2161       // The above callBlockingMethod will always return a SE.  Strip the SE wrapper before
2162       // putting it on the wire.  Its needed to adhere to the pb Service Interface but we don't
2163       // need to pass it over the wire.
2164       if (e instanceof ServiceException) e = e.getCause();
2165 
2166       // increment the number of requests that were exceptions.
2167       metrics.exception(e);
2168 
2169       if (e instanceof LinkageError) throw new DoNotRetryIOException(e);
2170       if (e instanceof IOException) throw (IOException)e;
2171       LOG.error("Unexpected throwable object ", e);
2172       throw new IOException(e.getMessage(), e);
2173     }
2174   }
2175 
2176   /**
2177    * Logs an RPC response to the LOG file, producing valid JSON objects for
2178    * client Operations.
2179    * @param params The parameters received in the call.
2180    * @param methodName The name of the method invoked
2181    * @param call The string representation of the call
2182    * @param tag  The tag that will be used to indicate this event in the log.
2183    * @param clientAddress   The address of the client who made this call.
2184    * @param startTime       The time that the call was initiated, in ms.
2185    * @param processingTime  The duration that the call took to run, in ms.
2186    * @param qTime           The duration that the call spent on the queue
2187    *                        prior to being initiated, in ms.
2188    * @param responseSize    The size in bytes of the response buffer.
2189    */
2190   void logResponse(Object[] params, String methodName, String call, String tag,
2191       String clientAddress, long startTime, int processingTime, int qTime,
2192       long responseSize)
2193           throws IOException {
2194     // base information that is reported regardless of type of call
2195     Map<String, Object> responseInfo = new HashMap<String, Object>();
2196     responseInfo.put("starttimems", startTime);
2197     responseInfo.put("processingtimems", processingTime);
2198     responseInfo.put("queuetimems", qTime);
2199     responseInfo.put("responsesize", responseSize);
2200     responseInfo.put("client", clientAddress);
2201     responseInfo.put("class", server == null? "": server.getClass().getSimpleName());
2202     responseInfo.put("method", methodName);
2203     if (params.length == 2 && server instanceof HRegionServer &&
2204         params[0] instanceof byte[] &&
2205         params[1] instanceof Operation) {
2206       // if the slow process is a query, we want to log its table as well
2207       // as its own fingerprint
2208       TableName tableName = TableName.valueOf(
2209           HRegionInfo.parseRegionName((byte[]) params[0])[0]);
2210       responseInfo.put("table", tableName.getNameAsString());
2211       // annotate the response map with operation details
2212       responseInfo.putAll(((Operation) params[1]).toMap());
2213       // report to the log file
2214       LOG.warn("(operation" + tag + "): " +
2215                MAPPER.writeValueAsString(responseInfo));
2216     } else if (params.length == 1 && server instanceof HRegionServer &&
2217         params[0] instanceof Operation) {
2218       // annotate the response map with operation details
2219       responseInfo.putAll(((Operation) params[0]).toMap());
2220       // report to the log file
2221       LOG.warn("(operation" + tag + "): " +
2222                MAPPER.writeValueAsString(responseInfo));
2223     } else {
2224       // can't get JSON details, so just report call.toString() along with
2225       // a more generic tag.
2226       responseInfo.put("call", call);
2227       LOG.warn("(response" + tag + "): " + MAPPER.writeValueAsString(responseInfo));
2228     }
2229   }
2230 
2231   /** Stops the service.  No new calls will be handled after this is called. */
2232   @Override
2233   public synchronized void stop() {
2234     LOG.info("Stopping server on " + port);
2235     running = false;
2236     if (authTokenSecretMgr != null) {
2237       authTokenSecretMgr.stop();
2238       authTokenSecretMgr = null;
2239     }
2240     listener.interrupt();
2241     listener.doStop();
2242     responder.interrupt();
2243     scheduler.stop();
2244     notifyAll();
2245   }
2246 
2247   /** Wait for the server to be stopped.
2248    * Does not wait for all subthreads to finish.
2249    *  See {@link #stop()}.
2250    * @throws InterruptedException e
2251    */
2252   @Override
2253   public synchronized void join() throws InterruptedException {
2254     while (running) {
2255       wait();
2256     }
2257   }
2258 
2259   /**
2260    * Return the socket (ip+port) on which the RPC server is listening to. May return null if
2261    * the listener channel is closed.
2262    * @return the socket (ip+port) on which the RPC server is listening to, or null if this
2263    * information cannot be determined
2264    */
2265   @Override
2266   public synchronized InetSocketAddress getListenerAddress() {
2267     if (listener == null) {
2268       return null;
2269     }
2270     return listener.getAddress();
2271   }
2272 
2273   /**
2274    * Set the handler for calling out of RPC for error conditions.
2275    * @param handler the handler implementation
2276    */
2277   @Override
2278   public void setErrorHandler(HBaseRPCErrorHandler handler) {
2279     this.errorHandler = handler;
2280   }
2281 
2282   @Override
2283   public HBaseRPCErrorHandler getErrorHandler() {
2284     return this.errorHandler;
2285   }
2286 
2287   /**
2288    * Returns the metrics instance for reporting RPC call statistics
2289    */
2290   @Override
2291   public MetricsHBaseServer getMetrics() {
2292     return metrics;
2293   }
2294 
2295   @Override
2296   public void addCallSize(final long diff) {
2297     this.callQueueSize.add(diff);
2298   }
2299 
2300   /**
2301    * Authorize the incoming client connection.
2302    *
2303    * @param user client user
2304    * @param connection incoming connection
2305    * @param addr InetAddress of incoming connection
2306    * @throws org.apache.hadoop.security.authorize.AuthorizationException
2307    *         when the client isn't authorized to talk the protocol
2308    */
2309   public void authorize(UserGroupInformation user, ConnectionHeader connection, InetAddress addr)
2310   throws AuthorizationException {
2311     if (authorize) {
2312       Class<?> c = getServiceInterface(services, connection.getServiceName());
2313       this.authManager.authorize(user != null ? user : null, c, getConf(), addr);
2314     }
2315   }
2316 
2317   /**
2318    * When the read or write buffer size is larger than this limit, i/o will be
2319    * done in chunks of this size. Most RPC requests and responses would be
2320    * be smaller.
2321    */
2322   private static int NIO_BUFFER_LIMIT = 64 * 1024; //should not be more than 64KB.
2323 
2324   /**
2325    * This is a wrapper around {@link java.nio.channels.WritableByteChannel#write(java.nio.ByteBuffer)}.
2326    * If the amount of data is large, it writes to channel in smaller chunks.
2327    * This is to avoid jdk from creating many direct buffers as the size of
2328    * buffer increases. This also minimizes extra copies in NIO layer
2329    * as a result of multiple write operations required to write a large
2330    * buffer.
2331    *
2332    * @param channel writable byte channel to write to
2333    * @param bufferChain Chain of buffers to write
2334    * @return number of bytes written
2335    * @throws java.io.IOException e
2336    * @see java.nio.channels.WritableByteChannel#write(java.nio.ByteBuffer)
2337    */
2338   protected long channelWrite(GatheringByteChannel channel, BufferChain bufferChain)
2339   throws IOException {
2340     long count =  bufferChain.write(channel, NIO_BUFFER_LIMIT);
2341     if (count > 0) this.metrics.sentBytes(count);
2342     return count;
2343   }
2344 
2345   /**
2346    * This is a wrapper around {@link java.nio.channels.ReadableByteChannel#read(java.nio.ByteBuffer)}.
2347    * If the amount of data is large, it writes to channel in smaller chunks.
2348    * This is to avoid jdk from creating many direct buffers as the size of
2349    * ByteBuffer increases. There should not be any performance degredation.
2350    *
2351    * @param channel writable byte channel to write on
2352    * @param buffer buffer to write
2353    * @return number of bytes written
2354    * @throws java.io.IOException e
2355    * @see java.nio.channels.ReadableByteChannel#read(java.nio.ByteBuffer)
2356    */
2357   protected int channelRead(ReadableByteChannel channel,
2358                                    ByteBuffer buffer) throws IOException {
2359 
2360     int count = (buffer.remaining() <= NIO_BUFFER_LIMIT) ?
2361            channel.read(buffer) : channelIO(channel, null, buffer);
2362     if (count > 0) {
2363       metrics.receivedBytes(count);
2364     }
2365     return count;
2366   }
2367 
2368   /**
2369    * Helper for {@link #channelRead(java.nio.channels.ReadableByteChannel, java.nio.ByteBuffer)}
2370    * and {@link #channelWrite(GatheringByteChannel, BufferChain)}. Only
2371    * one of readCh or writeCh should be non-null.
2372    *
2373    * @param readCh read channel
2374    * @param writeCh write channel
2375    * @param buf buffer to read or write into/out of
2376    * @return bytes written
2377    * @throws java.io.IOException e
2378    * @see #channelRead(java.nio.channels.ReadableByteChannel, java.nio.ByteBuffer)
2379    * @see #channelWrite(GatheringByteChannel, BufferChain)
2380    */
2381   private static int channelIO(ReadableByteChannel readCh,
2382                                WritableByteChannel writeCh,
2383                                ByteBuffer buf) throws IOException {
2384 
2385     int originalLimit = buf.limit();
2386     int initialRemaining = buf.remaining();
2387     int ret = 0;
2388 
2389     while (buf.remaining() > 0) {
2390       try {
2391         int ioSize = Math.min(buf.remaining(), NIO_BUFFER_LIMIT);
2392         buf.limit(buf.position() + ioSize);
2393 
2394         ret = (readCh == null) ? writeCh.write(buf) : readCh.read(buf);
2395 
2396         if (ret < ioSize) {
2397           break;
2398         }
2399 
2400       } finally {
2401         buf.limit(originalLimit);
2402       }
2403     }
2404 
2405     int nBytes = initialRemaining - buf.remaining();
2406     return (nBytes > 0) ? nBytes : ret;
2407   }
2408 
2409   /**
2410    * Needed for features such as delayed calls.  We need to be able to store the current call
2411    * so that we can complete it later or ask questions of what is supported by the current ongoing
2412    * call.
2413    * @return An RpcCallContext backed by the currently ongoing call (gotten from a thread local)
2414    */
2415   public static RpcCallContext getCurrentCall() {
2416     return CurCall.get();
2417   }
2418 
2419   public static boolean isInRpcCallContext() {
2420     return CurCall.get() != null;
2421   }
2422 
2423   /**
2424    * Returns the user credentials associated with the current RPC request or
2425    * <code>null</code> if no credentials were provided.
2426    * @return A User
2427    */
2428   public static User getRequestUser() {
2429     RpcCallContext ctx = getCurrentCall();
2430     return ctx == null? null: ctx.getRequestUser();
2431   }
2432 
2433   /**
2434    * Returns the username for any user associated with the current RPC
2435    * request or <code>null</code> if no user is set.
2436    */
2437   public static String getRequestUserName() {
2438     User user = getRequestUser();
2439     return user == null? null: user.getShortName();
2440   }
2441 
2442   /**
2443    * @return Address of remote client if a request is ongoing, else null
2444    */
2445   public static InetAddress getRemoteAddress() {
2446     RpcCallContext ctx = getCurrentCall();
2447     return ctx == null? null: ctx.getRemoteAddress();
2448   }
2449 
2450   /**
2451    * @param serviceName Some arbitrary string that represents a 'service'.
2452    * @param services Available service instances
2453    * @return Matching BlockingServiceAndInterface pair
2454    */
2455   static BlockingServiceAndInterface getServiceAndInterface(
2456       final List<BlockingServiceAndInterface> services, final String serviceName) {
2457     for (BlockingServiceAndInterface bs : services) {
2458       if (bs.getBlockingService().getDescriptorForType().getName().equals(serviceName)) {
2459         return bs;
2460       }
2461     }
2462     return null;
2463   }
2464 
2465   /**
2466    * @param serviceName Some arbitrary string that represents a 'service'.
2467    * @param services Available services and their service interfaces.
2468    * @return Service interface class for <code>serviceName</code>
2469    */
2470   static Class<?> getServiceInterface(
2471       final List<BlockingServiceAndInterface> services,
2472       final String serviceName) {
2473     BlockingServiceAndInterface bsasi =
2474         getServiceAndInterface(services, serviceName);
2475     return bsasi == null? null: bsasi.getServiceInterface();
2476   }
2477 
2478   /**
2479    * @param serviceName Some arbitrary string that represents a 'service'.
2480    * @param services Available services and their service interfaces.
2481    * @return BlockingService that goes with the passed <code>serviceName</code>
2482    */
2483   static BlockingService getService(
2484       final List<BlockingServiceAndInterface> services,
2485       final String serviceName) {
2486     BlockingServiceAndInterface bsasi =
2487         getServiceAndInterface(services, serviceName);
2488     return bsasi == null? null: bsasi.getBlockingService();
2489   }
2490 
2491   static MonitoredRPCHandler getStatus() {
2492     // It is ugly the way we park status up in RpcServer.  Let it be for now.  TODO.
2493     MonitoredRPCHandler status = RpcServer.MONITORED_RPC.get();
2494     if (status != null) {
2495       return status;
2496     }
2497     status = TaskMonitor.get().createRPCStatus(Thread.currentThread().getName());
2498     status.pause("Waiting for a call");
2499     RpcServer.MONITORED_RPC.set(status);
2500     return status;
2501   }
2502 
2503   /** Returns the remote side ip address when invoked inside an RPC
2504    *  Returns null incase of an error.
2505    *  @return InetAddress
2506    */
2507   public static InetAddress getRemoteIp() {
2508     Call call = CurCall.get();
2509     if (call != null && call.connection.socket != null) {
2510       return call.connection.socket.getInetAddress();
2511     }
2512     return null;
2513   }
2514 
2515 
2516   /**
2517    * A convenience method to bind to a given address and report
2518    * better exceptions if the address is not a valid host.
2519    * @param socket the socket to bind
2520    * @param address the address to bind to
2521    * @param backlog the number of connections allowed in the queue
2522    * @throws BindException if the address can't be bound
2523    * @throws UnknownHostException if the address isn't a valid host name
2524    * @throws IOException other random errors from bind
2525    */
2526   public static void bind(ServerSocket socket, InetSocketAddress address,
2527                           int backlog) throws IOException {
2528     try {
2529       socket.bind(address, backlog);
2530     } catch (BindException e) {
2531       BindException bindException =
2532         new BindException("Problem binding to " + address + " : " +
2533             e.getMessage());
2534       bindException.initCause(e);
2535       throw bindException;
2536     } catch (SocketException e) {
2537       // If they try to bind to a different host's address, give a better
2538       // error message.
2539       if ("Unresolved address".equals(e.getMessage())) {
2540         throw new UnknownHostException("Invalid hostname for server: " +
2541                                        address.getHostName());
2542       }
2543       throw e;
2544     }
2545   }
2546 
2547   @Override
2548   public RpcScheduler getScheduler() {
2549     return scheduler;
2550   }
2551 }