1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.master.balancer;
19
20 import java.util.ArrayList;
21 import java.util.Arrays;
22 import java.util.Collection;
23 import java.util.Collections;
24 import java.util.Comparator;
25 import java.util.Deque;
26 import java.util.HashMap;
27 import java.util.HashSet;
28 import java.util.Iterator;
29 import java.util.List;
30 import java.util.Map;
31 import java.util.Map.Entry;
32 import java.util.NavigableMap;
33 import java.util.Random;
34 import java.util.Set;
35 import java.util.TreeMap;
36
37 import org.apache.commons.lang.NotImplementedException;
38 import org.apache.commons.logging.Log;
39 import org.apache.commons.logging.LogFactory;
40 import org.apache.hadoop.conf.Configuration;
41 import org.apache.hadoop.hbase.ClusterStatus;
42 import org.apache.hadoop.hbase.HBaseIOException;
43 import org.apache.hadoop.hbase.HRegionInfo;
44 import org.apache.hadoop.hbase.RegionLoad;
45 import org.apache.hadoop.hbase.ServerName;
46 import org.apache.hadoop.hbase.client.RegionReplicaUtil;
47 import org.apache.hadoop.hbase.master.LoadBalancer;
48 import org.apache.hadoop.hbase.master.MasterServices;
49 import org.apache.hadoop.hbase.master.RackManager;
50 import org.apache.hadoop.hbase.master.RegionPlan;
51 import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.Action.Type;
52 import org.apache.hadoop.util.StringUtils;
53
54 import com.google.common.base.Joiner;
55 import com.google.common.collect.ArrayListMultimap;
56 import com.google.common.collect.Lists;
57 import com.google.common.collect.Sets;
58
59
60
61
62
63
64
65
66 public abstract class BaseLoadBalancer implements LoadBalancer {
67 private static final int MIN_SERVER_BALANCE = 2;
68 private volatile boolean stopped = false;
69
70 private static final List<HRegionInfo> EMPTY_REGION_LIST = new ArrayList<HRegionInfo>(0);
71
72 protected final RegionLocationFinder regionFinder = new RegionLocationFinder();
73
74 private static class DefaultRackManager extends RackManager {
75 @Override
76 public String getRack(ServerName server) {
77 return UNKNOWN_RACK;
78 }
79 }
80
81
82
83
84 protected BaseLoadBalancer() {
85 metricsBalancer = new MetricsBalancer();
86 }
87
88
89
90
91
92 protected BaseLoadBalancer(MetricsBalancer metricsBalancer) {
93 this.metricsBalancer = (metricsBalancer != null) ? metricsBalancer : new MetricsBalancer();
94 }
95
96
97
98
99
100
101
102
103
104
105
106 protected static class Cluster {
107 ServerName[] servers;
108 String[] hosts;
109 String[] racks;
110 boolean multiServersPerHost = false;
111
112 ArrayList<String> tables;
113 HRegionInfo[] regions;
114 Deque<RegionLoad>[] regionLoads;
115
116 int[][] regionLocations;
117
118 int[] serverIndexToHostIndex;
119 int[] serverIndexToRackIndex;
120
121 int[][] regionsPerServer;
122 int[][] regionsPerHost;
123 int[][] regionsPerRack;
124 int[][] primariesOfRegionsPerServer;
125 int[][] primariesOfRegionsPerHost;
126 int[][] primariesOfRegionsPerRack;
127
128 int[][] serversPerHost;
129 int[][] serversPerRack;
130 int[] regionIndexToServerIndex;
131 int[] initialRegionIndexToServerIndex;
132 int[] regionIndexToTableIndex;
133 int[][] numRegionsPerServerPerTable;
134 int[] numMaxRegionsPerTable;
135 int[] regionIndexToPrimaryIndex;
136 boolean hasRegionReplicas = false;
137
138 Integer[] serverIndicesSortedByRegionCount;
139
140 Map<String, Integer> serversToIndex;
141 Map<String, Integer> hostsToIndex;
142 Map<String, Integer> racksToIndex;
143 Map<String, Integer> tablesToIndex;
144 Map<HRegionInfo, Integer> regionsToIndex;
145
146 int numServers;
147 int numHosts;
148 int numRacks;
149 int numTables;
150 int numRegions;
151
152 int numMovedRegions = 0;
153 Map<ServerName, List<HRegionInfo>> clusterState;
154
155 protected final RackManager rackManager;
156
157 protected Cluster(
158 Map<ServerName, List<HRegionInfo>> clusterState,
159 Map<String, Deque<RegionLoad>> loads,
160 RegionLocationFinder regionFinder,
161 RackManager rackManager) {
162 this(null, clusterState, loads, regionFinder,
163 rackManager);
164 }
165
166 @SuppressWarnings("unchecked")
167 protected Cluster(
168 Collection<HRegionInfo> unassignedRegions,
169 Map<ServerName, List<HRegionInfo>> clusterState,
170 Map<String, Deque<RegionLoad>> loads,
171 RegionLocationFinder regionFinder,
172 RackManager rackManager) {
173
174 if (unassignedRegions == null) {
175 unassignedRegions = EMPTY_REGION_LIST;
176 }
177
178 serversToIndex = new HashMap<String, Integer>();
179 hostsToIndex = new HashMap<String, Integer>();
180 racksToIndex = new HashMap<String, Integer>();
181 tablesToIndex = new HashMap<String, Integer>();
182
183
184 tables = new ArrayList<String>();
185 this.rackManager = rackManager != null ? rackManager : new DefaultRackManager();
186
187 numRegions = 0;
188
189 List<List<Integer>> serversPerHostList = new ArrayList<List<Integer>>();
190 List<List<Integer>> serversPerRackList = new ArrayList<List<Integer>>();
191 this.clusterState = clusterState;
192
193
194
195 for (ServerName sn : clusterState.keySet()) {
196 if (serversToIndex.get(sn.getHostAndPort()) == null) {
197 serversToIndex.put(sn.getHostAndPort(), numServers++);
198 }
199 if (!hostsToIndex.containsKey(sn.getHostname())) {
200 hostsToIndex.put(sn.getHostname(), numHosts++);
201 serversPerHostList.add(new ArrayList<Integer>(1));
202 }
203
204 int serverIndex = serversToIndex.get(sn.getHostAndPort());
205 int hostIndex = hostsToIndex.get(sn.getHostname());
206 serversPerHostList.get(hostIndex).add(serverIndex);
207
208 String rack = this.rackManager.getRack(sn);
209 if (!racksToIndex.containsKey(rack)) {
210 racksToIndex.put(rack, numRacks++);
211 serversPerRackList.add(new ArrayList<Integer>());
212 }
213 int rackIndex = racksToIndex.get(rack);
214 serversPerRackList.get(rackIndex).add(serverIndex);
215 }
216
217
218 for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
219 numRegions += entry.getValue().size();
220 }
221 numRegions += unassignedRegions.size();
222
223 regionsToIndex = new HashMap<HRegionInfo, Integer>(numRegions);
224 servers = new ServerName[numServers];
225 serversPerHost = new int[numHosts][];
226 serversPerRack = new int[numRacks][];
227 regions = new HRegionInfo[numRegions];
228 regionIndexToServerIndex = new int[numRegions];
229 initialRegionIndexToServerIndex = new int[numRegions];
230 regionIndexToTableIndex = new int[numRegions];
231 regionIndexToPrimaryIndex = new int[numRegions];
232 regionLoads = new Deque[numRegions];
233 regionLocations = new int[numRegions][];
234 serverIndicesSortedByRegionCount = new Integer[numServers];
235
236 serverIndexToHostIndex = new int[numServers];
237 serverIndexToRackIndex = new int[numServers];
238 regionsPerServer = new int[numServers][];
239 regionsPerHost = new int[numHosts][];
240 regionsPerRack = new int[numRacks][];
241 primariesOfRegionsPerServer = new int[numServers][];
242 primariesOfRegionsPerHost = new int[numHosts][];
243 primariesOfRegionsPerRack = new int[numRacks][];
244
245 int tableIndex = 0, regionIndex = 0, regionPerServerIndex = 0;
246
247 for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
248 int serverIndex = serversToIndex.get(entry.getKey().getHostAndPort());
249
250
251
252 if (servers[serverIndex] == null ||
253 servers[serverIndex].getStartcode() < entry.getKey().getStartcode()) {
254 servers[serverIndex] = entry.getKey();
255 }
256
257 if (regionsPerServer[serverIndex] != null) {
258
259
260 regionsPerServer[serverIndex] = new int[entry.getValue().size() + regionsPerServer[serverIndex].length];
261 } else {
262 regionsPerServer[serverIndex] = new int[entry.getValue().size()];
263 }
264 primariesOfRegionsPerServer[serverIndex] = new int[regionsPerServer[serverIndex].length];
265 serverIndicesSortedByRegionCount[serverIndex] = serverIndex;
266 }
267
268 hosts = new String[numHosts];
269 for (Entry<String, Integer> entry : hostsToIndex.entrySet()) {
270 hosts[entry.getValue()] = entry.getKey();
271 }
272 racks = new String[numRacks];
273 for (Entry<String, Integer> entry : racksToIndex.entrySet()) {
274 racks[entry.getValue()] = entry.getKey();
275 }
276
277 for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
278 int serverIndex = serversToIndex.get(entry.getKey().getHostAndPort());
279 regionPerServerIndex = 0;
280
281 int hostIndex = hostsToIndex.get(entry.getKey().getHostname());
282 serverIndexToHostIndex[serverIndex] = hostIndex;
283
284 int rackIndex = racksToIndex.get(this.rackManager.getRack(entry.getKey()));
285 serverIndexToRackIndex[serverIndex] = rackIndex;
286
287 for (HRegionInfo region : entry.getValue()) {
288 registerRegion(region, regionIndex, serverIndex, loads, regionFinder);
289
290 regionsPerServer[serverIndex][regionPerServerIndex++] = regionIndex;
291 regionIndex++;
292 }
293 }
294 for (HRegionInfo region : unassignedRegions) {
295 registerRegion(region, regionIndex, -1, loads, regionFinder);
296 regionIndex++;
297 }
298
299 for (int i = 0; i < serversPerHostList.size(); i++) {
300 serversPerHost[i] = new int[serversPerHostList.get(i).size()];
301 for (int j = 0; j < serversPerHost[i].length; j++) {
302 serversPerHost[i][j] = serversPerHostList.get(i).get(j);
303 }
304 if (serversPerHost[i].length > 1) {
305 multiServersPerHost = true;
306 }
307 }
308
309 for (int i = 0; i < serversPerRackList.size(); i++) {
310 serversPerRack[i] = new int[serversPerRackList.get(i).size()];
311 for (int j = 0; j < serversPerRack[i].length; j++) {
312 serversPerRack[i][j] = serversPerRackList.get(i).get(j);
313 }
314 }
315
316 numTables = tables.size();
317 numRegionsPerServerPerTable = new int[numServers][numTables];
318
319 for (int i = 0; i < numServers; i++) {
320 for (int j = 0; j < numTables; j++) {
321 numRegionsPerServerPerTable[i][j] = 0;
322 }
323 }
324
325 for (int i=0; i < regionIndexToServerIndex.length; i++) {
326 if (regionIndexToServerIndex[i] >= 0) {
327 numRegionsPerServerPerTable[regionIndexToServerIndex[i]][regionIndexToTableIndex[i]]++;
328 }
329 }
330
331 numMaxRegionsPerTable = new int[numTables];
332 for (int serverIndex = 0 ; serverIndex < numRegionsPerServerPerTable.length; serverIndex++) {
333 for (tableIndex = 0 ; tableIndex < numRegionsPerServerPerTable[serverIndex].length; tableIndex++) {
334 if (numRegionsPerServerPerTable[serverIndex][tableIndex] > numMaxRegionsPerTable[tableIndex]) {
335 numMaxRegionsPerTable[tableIndex] = numRegionsPerServerPerTable[serverIndex][tableIndex];
336 }
337 }
338 }
339
340 for (int i = 0; i < regions.length; i ++) {
341 HRegionInfo info = regions[i];
342 if (RegionReplicaUtil.isDefaultReplica(info)) {
343 regionIndexToPrimaryIndex[i] = i;
344 } else {
345 hasRegionReplicas = true;
346 HRegionInfo primaryInfo = RegionReplicaUtil.getRegionInfoForDefaultReplica(info);
347 regionIndexToPrimaryIndex[i] =
348 regionsToIndex.containsKey(primaryInfo) ?
349 regionsToIndex.get(primaryInfo):
350 -1;
351 }
352 }
353
354 for (int i = 0; i < regionsPerServer.length; i++) {
355 primariesOfRegionsPerServer[i] = new int[regionsPerServer[i].length];
356 for (int j = 0; j < regionsPerServer[i].length; j++) {
357 int primaryIndex = regionIndexToPrimaryIndex[regionsPerServer[i][j]];
358 primariesOfRegionsPerServer[i][j] = primaryIndex;
359 }
360
361 Arrays.sort(primariesOfRegionsPerServer[i]);
362 }
363
364
365 if (multiServersPerHost) {
366 for (int i = 0 ; i < serversPerHost.length; i++) {
367 int numRegionsPerHost = 0;
368 for (int j = 0; j < serversPerHost[i].length; j++) {
369 numRegionsPerHost += regionsPerServer[serversPerHost[i][j]].length;
370 }
371 regionsPerHost[i] = new int[numRegionsPerHost];
372 primariesOfRegionsPerHost[i] = new int[numRegionsPerHost];
373 }
374 for (int i = 0 ; i < serversPerHost.length; i++) {
375 int numRegionPerHostIndex = 0;
376 for (int j = 0; j < serversPerHost[i].length; j++) {
377 for (int k = 0; k < regionsPerServer[serversPerHost[i][j]].length; k++) {
378 int region = regionsPerServer[serversPerHost[i][j]][k];
379 regionsPerHost[i][numRegionPerHostIndex] = region;
380 int primaryIndex = regionIndexToPrimaryIndex[region];
381 primariesOfRegionsPerHost[i][numRegionPerHostIndex] = primaryIndex;
382 numRegionPerHostIndex++;
383 }
384 }
385
386 Arrays.sort(primariesOfRegionsPerHost[i]);
387 }
388 }
389
390
391 if (numRacks > 1) {
392 for (int i = 0 ; i < serversPerRack.length; i++) {
393 int numRegionsPerRack = 0;
394 for (int j = 0; j < serversPerRack[i].length; j++) {
395 numRegionsPerRack += regionsPerServer[serversPerRack[i][j]].length;
396 }
397 regionsPerRack[i] = new int[numRegionsPerRack];
398 primariesOfRegionsPerRack[i] = new int[numRegionsPerRack];
399 }
400
401 for (int i = 0 ; i < serversPerRack.length; i++) {
402 int numRegionPerRackIndex = 0;
403 for (int j = 0; j < serversPerRack[i].length; j++) {
404 for (int k = 0; k < regionsPerServer[serversPerRack[i][j]].length; k++) {
405 int region = regionsPerServer[serversPerRack[i][j]][k];
406 regionsPerRack[i][numRegionPerRackIndex] = region;
407 int primaryIndex = regionIndexToPrimaryIndex[region];
408 primariesOfRegionsPerRack[i][numRegionPerRackIndex] = primaryIndex;
409 numRegionPerRackIndex++;
410 }
411 }
412
413 Arrays.sort(primariesOfRegionsPerRack[i]);
414 }
415 }
416 }
417
418
419 private void registerRegion(HRegionInfo region, int regionIndex, int serverIndex,
420 Map<String, Deque<RegionLoad>> loads, RegionLocationFinder regionFinder) {
421 String tableName = region.getTable().getNameAsString();
422 if (!tablesToIndex.containsKey(tableName)) {
423 tables.add(tableName);
424 tablesToIndex.put(tableName, tablesToIndex.size());
425 }
426 int tableIndex = tablesToIndex.get(tableName);
427
428 regionsToIndex.put(region, regionIndex);
429 regions[regionIndex] = region;
430 regionIndexToServerIndex[regionIndex] = serverIndex;
431 initialRegionIndexToServerIndex[regionIndex] = serverIndex;
432 regionIndexToTableIndex[regionIndex] = tableIndex;
433
434
435 if (loads != null) {
436 Deque<RegionLoad> rl = loads.get(region.getRegionNameAsString());
437
438 if (rl == null) {
439
440 rl = loads.get(region.getEncodedName());
441 }
442 regionLoads[regionIndex] = rl;
443 }
444
445 if (regionFinder != null) {
446
447 List<ServerName> loc = regionFinder.getTopBlockLocations(region);
448 regionLocations[regionIndex] = new int[loc.size()];
449 for (int i=0; i < loc.size(); i++) {
450 regionLocations[regionIndex][i] =
451 loc.get(i) == null ? -1 :
452 (serversToIndex.get(loc.get(i).getHostAndPort()) == null ? -1
453 : serversToIndex.get(loc.get(i).getHostAndPort()));
454 }
455 }
456 }
457
458
459 public static class Action {
460 public static enum Type {
461 ASSIGN_REGION,
462 MOVE_REGION,
463 SWAP_REGIONS,
464 NULL,
465 }
466
467 public Type type;
468 public Action (Type type) {this.type = type;}
469
470 public Action undoAction() { return this; }
471 @Override
472 public String toString() { return type + ":";}
473 }
474
475 public static class AssignRegionAction extends Action {
476 public int region;
477 public int server;
478 public AssignRegionAction(int region, int server) {
479 super(Type.ASSIGN_REGION);
480 this.region = region;
481 this.server = server;
482 }
483 @Override
484 public Action undoAction() {
485
486
487 throw new NotImplementedException();
488 }
489 @Override
490 public String toString() {
491 return type + ": " + region + ":" + server;
492 }
493 }
494
495 public static class MoveRegionAction extends Action {
496 public int region;
497 public int fromServer;
498 public int toServer;
499
500 public MoveRegionAction(int region, int fromServer, int toServer) {
501 super(Type.MOVE_REGION);
502 this.fromServer = fromServer;
503 this.region = region;
504 this.toServer = toServer;
505 }
506 @Override
507 public Action undoAction() {
508 return new MoveRegionAction (region, toServer, fromServer);
509 }
510 @Override
511 public String toString() {
512 return type + ": " + region + ":" + fromServer + " -> " + toServer;
513 }
514 }
515
516 public static class SwapRegionsAction extends Action {
517 public int fromServer;
518 public int fromRegion;
519 public int toServer;
520 public int toRegion;
521 public SwapRegionsAction(int fromServer, int fromRegion, int toServer, int toRegion) {
522 super(Type.SWAP_REGIONS);
523 this.fromServer = fromServer;
524 this.fromRegion = fromRegion;
525 this.toServer = toServer;
526 this.toRegion = toRegion;
527 }
528 @Override
529 public Action undoAction() {
530 return new SwapRegionsAction (fromServer, toRegion, toServer, fromRegion);
531 }
532 @Override
533 public String toString() {
534 return type + ": " + fromRegion + ":" + fromServer + " <-> " + toRegion + ":" + toServer;
535 }
536 }
537
538 @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="NM_FIELD_NAMING_CONVENTION",
539 justification="Mistake. Too disruptive to change now")
540 public static final Action NullAction = new Action(Type.NULL);
541
542 public void doAction(Action action) {
543 switch (action.type) {
544 case NULL: break;
545 case ASSIGN_REGION:
546 AssignRegionAction ar = (AssignRegionAction) action;
547 regionsPerServer[ar.server] = addRegion(regionsPerServer[ar.server], ar.region);
548 regionMoved(ar.region, -1, ar.server);
549 break;
550 case MOVE_REGION:
551 MoveRegionAction mra = (MoveRegionAction) action;
552 regionsPerServer[mra.fromServer] = removeRegion(regionsPerServer[mra.fromServer], mra.region);
553 regionsPerServer[mra.toServer] = addRegion(regionsPerServer[mra.toServer], mra.region);
554 regionMoved(mra.region, mra.fromServer, mra.toServer);
555 break;
556 case SWAP_REGIONS:
557 SwapRegionsAction a = (SwapRegionsAction) action;
558 regionsPerServer[a.fromServer] = replaceRegion(regionsPerServer[a.fromServer], a.fromRegion, a.toRegion);
559 regionsPerServer[a.toServer] = replaceRegion(regionsPerServer[a.toServer], a.toRegion, a.fromRegion);
560 regionMoved(a.fromRegion, a.fromServer, a.toServer);
561 regionMoved(a.toRegion, a.toServer, a.fromServer);
562 break;
563 default:
564 throw new RuntimeException("Uknown action:" + action.type);
565 }
566 }
567
568
569
570
571
572
573
574
575 boolean wouldLowerAvailability(HRegionInfo regionInfo, ServerName serverName) {
576 if (!serversToIndex.containsKey(serverName.getHostAndPort())) {
577 return false;
578 }
579 int server = serversToIndex.get(serverName.getHostAndPort());
580 int region = regionsToIndex.get(regionInfo);
581
582 int primary = regionIndexToPrimaryIndex[region];
583
584
585
586
587 if (contains(primariesOfRegionsPerServer[server], primary)) {
588
589 for (int i = 0; i < primariesOfRegionsPerServer.length; i++) {
590 if (i != server && !contains(primariesOfRegionsPerServer[i], primary)) {
591 return true;
592 }
593 }
594 return false;
595 }
596
597
598 if (multiServersPerHost) {
599 int host = serverIndexToHostIndex[server];
600 if (contains(primariesOfRegionsPerHost[host], primary)) {
601
602 for (int i = 0; i < primariesOfRegionsPerHost.length; i++) {
603 if (i != host && !contains(primariesOfRegionsPerHost[i], primary)) {
604 return true;
605 }
606 }
607 return false;
608 }
609 }
610
611
612 if (numRacks > 1) {
613 int rack = serverIndexToRackIndex[server];
614 if (contains(primariesOfRegionsPerRack[rack], primary)) {
615
616 for (int i = 0; i < primariesOfRegionsPerRack.length; i++) {
617 if (i != rack && !contains(primariesOfRegionsPerRack[i], primary)) {
618 return true;
619 }
620 }
621 return false;
622 }
623 }
624 return false;
625 }
626
627 void doAssignRegion(HRegionInfo regionInfo, ServerName serverName) {
628 if (!serversToIndex.containsKey(serverName.getHostAndPort())) {
629 return;
630 }
631 int server = serversToIndex.get(serverName.getHostAndPort());
632 int region = regionsToIndex.get(regionInfo);
633 doAction(new AssignRegionAction(region, server));
634 }
635
636 void regionMoved(int region, int oldServer, int newServer) {
637 regionIndexToServerIndex[region] = newServer;
638 if (initialRegionIndexToServerIndex[region] == newServer) {
639 numMovedRegions--;
640 } else if (oldServer >= 0 && initialRegionIndexToServerIndex[region] == oldServer) {
641 numMovedRegions++;
642 }
643 int tableIndex = regionIndexToTableIndex[region];
644 if (oldServer >= 0) {
645 numRegionsPerServerPerTable[oldServer][tableIndex]--;
646 }
647 numRegionsPerServerPerTable[newServer][tableIndex]++;
648
649
650 if (numRegionsPerServerPerTable[newServer][tableIndex] > numMaxRegionsPerTable[tableIndex]) {
651 numRegionsPerServerPerTable[newServer][tableIndex] = numMaxRegionsPerTable[tableIndex];
652 } else if (oldServer >= 0 && (numRegionsPerServerPerTable[oldServer][tableIndex] + 1)
653 == numMaxRegionsPerTable[tableIndex]) {
654
655 for (int serverIndex = 0 ; serverIndex < numRegionsPerServerPerTable.length; serverIndex++) {
656 if (numRegionsPerServerPerTable[serverIndex][tableIndex] > numMaxRegionsPerTable[tableIndex]) {
657 numMaxRegionsPerTable[tableIndex] = numRegionsPerServerPerTable[serverIndex][tableIndex];
658 }
659 }
660 }
661
662
663 int primary = regionIndexToPrimaryIndex[region];
664 if (oldServer >= 0) {
665 primariesOfRegionsPerServer[oldServer] = removeRegion(
666 primariesOfRegionsPerServer[oldServer], primary);
667 }
668 primariesOfRegionsPerServer[newServer] = addRegionSorted(
669 primariesOfRegionsPerServer[newServer], primary);
670
671
672 if (multiServersPerHost) {
673 int oldHost = oldServer >= 0 ? serverIndexToHostIndex[oldServer] : -1;
674 int newHost = serverIndexToHostIndex[newServer];
675 if (newHost != oldHost) {
676 regionsPerHost[newHost] = addRegion(regionsPerHost[newHost], region);
677 primariesOfRegionsPerHost[newHost] = addRegionSorted(primariesOfRegionsPerHost[newHost], primary);
678 if (oldHost >= 0) {
679 regionsPerHost[oldHost] = removeRegion(regionsPerHost[oldHost], region);
680 primariesOfRegionsPerHost[oldHost] = removeRegion(
681 primariesOfRegionsPerHost[oldHost], primary);
682 }
683 }
684 }
685
686
687 if (numRacks > 1) {
688 int oldRack = oldServer >= 0 ? serverIndexToRackIndex[oldServer] : -1;
689 int newRack = serverIndexToRackIndex[newServer];
690 if (newRack != oldRack) {
691 regionsPerRack[newRack] = addRegion(regionsPerRack[newRack], region);
692 primariesOfRegionsPerRack[newRack] = addRegionSorted(primariesOfRegionsPerRack[newRack], primary);
693 if (oldRack >= 0) {
694 regionsPerRack[oldRack] = removeRegion(regionsPerRack[oldRack], region);
695 primariesOfRegionsPerRack[oldRack] = removeRegion(
696 primariesOfRegionsPerRack[oldRack], primary);
697 }
698 }
699 }
700 }
701
702 int[] removeRegion(int[] regions, int regionIndex) {
703
704 int[] newRegions = new int[regions.length - 1];
705 int i = 0;
706 for (i = 0; i < regions.length; i++) {
707 if (regions[i] == regionIndex) {
708 break;
709 }
710 newRegions[i] = regions[i];
711 }
712 System.arraycopy(regions, i+1, newRegions, i, newRegions.length - i);
713 return newRegions;
714 }
715
716 int[] addRegion(int[] regions, int regionIndex) {
717 int[] newRegions = new int[regions.length + 1];
718 System.arraycopy(regions, 0, newRegions, 0, regions.length);
719 newRegions[newRegions.length - 1] = regionIndex;
720 return newRegions;
721 }
722
723 int[] addRegionSorted(int[] regions, int regionIndex) {
724 int[] newRegions = new int[regions.length + 1];
725 int i = 0;
726 for (i = 0; i < regions.length; i++) {
727 if (regions[i] > regionIndex) {
728 break;
729 }
730 }
731 System.arraycopy(regions, 0, newRegions, 0, i);
732 System.arraycopy(regions, i, newRegions, i+1, regions.length - i);
733 newRegions[i] = regionIndex;
734
735 return newRegions;
736 }
737
738 int[] replaceRegion(int[] regions, int regionIndex, int newRegionIndex) {
739 int i = 0;
740 for (i = 0; i < regions.length; i++) {
741 if (regions[i] == regionIndex) {
742 regions[i] = newRegionIndex;
743 break;
744 }
745 }
746 return regions;
747 }
748
749 void sortServersByRegionCount() {
750 Arrays.sort(serverIndicesSortedByRegionCount, numRegionsComparator);
751 }
752
753 int getNumRegions(int server) {
754 return regionsPerServer[server].length;
755 }
756
757 boolean contains(int[] arr, int val) {
758 return Arrays.binarySearch(arr, val) >= 0;
759 }
760
761 private Comparator<Integer> numRegionsComparator = new Comparator<Integer>() {
762 @Override
763 public int compare(Integer integer, Integer integer2) {
764 return Integer.valueOf(getNumRegions(integer)).compareTo(getNumRegions(integer2));
765 }
766 };
767
768 @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="SBSC_USE_STRINGBUFFER_CONCATENATION",
769 justification="Not important but should be fixed")
770 @Override
771 public String toString() {
772 String desc = "Cluster{" +
773 "servers=[";
774 for(ServerName sn:servers) {
775 desc += sn.getHostAndPort() + ", ";
776 }
777 desc +=
778 ", serverIndicesSortedByRegionCount="+
779 Arrays.toString(serverIndicesSortedByRegionCount) +
780 ", regionsPerServer=[";
781
782 for (int[]r:regionsPerServer) {
783 desc += Arrays.toString(r);
784 }
785 desc += "]" +
786 ", numMaxRegionsPerTable=" +
787 Arrays.toString(numMaxRegionsPerTable) +
788 ", numRegions=" +
789 numRegions +
790 ", numServers=" +
791 numServers +
792 ", numTables=" +
793 numTables +
794 ", numMovedRegions=" +
795 numMovedRegions +
796 '}';
797 return desc;
798 }
799 }
800
801
802 protected float slop;
803 protected Configuration config;
804 protected RackManager rackManager;
805 private static final Random RANDOM = new Random(System.currentTimeMillis());
806 private static final Log LOG = LogFactory.getLog(BaseLoadBalancer.class);
807
808 public static final String TABLES_ON_MASTER =
809 "hbase.balancer.tablesOnMaster";
810
811 protected final Set<String> tablesOnMaster = new HashSet<String>();
812 protected MetricsBalancer metricsBalancer = null;
813 protected ClusterStatus clusterStatus = null;
814 protected ServerName masterServerName;
815 protected MasterServices services;
816
817 protected static String[] getTablesOnMaster(Configuration conf) {
818 String valueString = conf.get(TABLES_ON_MASTER);
819 if (valueString != null) {
820 valueString = valueString.trim();
821 }
822 if (valueString == null || valueString.equalsIgnoreCase("none")) {
823 return null;
824 }
825 return StringUtils.getStrings(valueString);
826 }
827
828
829
830
831 public static boolean tablesOnMaster(Configuration conf) {
832 String[] tables = getTablesOnMaster(conf);
833 return tables != null && tables.length > 0;
834 }
835
836 @Override
837 public void setConf(Configuration conf) {
838 setSlop(conf);
839 if (slop < 0) slop = 0;
840 else if (slop > 1) slop = 1;
841
842 this.config = conf;
843 String[] tables = getTablesOnMaster(conf);
844 if (tables != null && tables.length > 0) {
845 Collections.addAll(tablesOnMaster, tables);
846 }
847 this.rackManager = new RackManager(getConf());
848 regionFinder.setConf(conf);
849 }
850
851 protected void setSlop(Configuration conf) {
852 this.slop = conf.getFloat("hbase.regions.slop", (float) 0.2);
853 }
854
855
856
857
858
859 public boolean shouldBeOnMaster(HRegionInfo region) {
860 return tablesOnMaster.contains(region.getTable().getNameAsString())
861 && region.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID;
862 }
863
864
865
866
867 protected List<RegionPlan> balanceMasterRegions(
868 Map<ServerName, List<HRegionInfo>> clusterMap) {
869 if (masterServerName == null
870 || clusterMap == null || clusterMap.size() <= 1) return null;
871 List<RegionPlan> plans = null;
872 List<HRegionInfo> regions = clusterMap.get(masterServerName);
873 if (regions != null) {
874 Iterator<ServerName> keyIt = null;
875 for (HRegionInfo region: regions) {
876 if (shouldBeOnMaster(region)) continue;
877
878
879 if (keyIt == null || !keyIt.hasNext()) {
880 keyIt = clusterMap.keySet().iterator();
881 }
882 ServerName dest = keyIt.next();
883 if (masterServerName.equals(dest)) {
884 if (!keyIt.hasNext()) {
885 keyIt = clusterMap.keySet().iterator();
886 }
887 dest = keyIt.next();
888 }
889
890
891 RegionPlan plan = new RegionPlan(region, masterServerName, dest);
892 if (plans == null) {
893 plans = new ArrayList<RegionPlan>();
894 }
895 plans.add(plan);
896 }
897 }
898 for (Map.Entry<ServerName, List<HRegionInfo>> server: clusterMap.entrySet()) {
899 if (masterServerName.equals(server.getKey())) continue;
900 for (HRegionInfo region: server.getValue()) {
901 if (!shouldBeOnMaster(region)) continue;
902
903
904 RegionPlan plan = new RegionPlan(region, server.getKey(), masterServerName);
905 if (plans == null) {
906 plans = new ArrayList<RegionPlan>();
907 }
908 plans.add(plan);
909 }
910 }
911 return plans;
912 }
913
914
915
916
917 protected Map<ServerName, List<HRegionInfo>> assignMasterRegions(
918 Collection<HRegionInfo> regions, List<ServerName> servers) {
919 if (servers == null || regions == null || regions.isEmpty()) {
920 return null;
921 }
922 Map<ServerName, List<HRegionInfo>> assignments
923 = new TreeMap<ServerName, List<HRegionInfo>>();
924 if (masterServerName != null && servers.contains(masterServerName)) {
925 assignments.put(masterServerName, new ArrayList<HRegionInfo>());
926 for (HRegionInfo region: regions) {
927 if (shouldBeOnMaster(region)) {
928 assignments.get(masterServerName).add(region);
929 }
930 }
931 }
932 return assignments;
933 }
934
935 @Override
936 public Configuration getConf() {
937 return this.config;
938 }
939
940 @Override
941 public void setClusterStatus(ClusterStatus st) {
942 this.clusterStatus = st;
943 regionFinder.setClusterStatus(st);
944 }
945
946 @Override
947 public void setMasterServices(MasterServices masterServices) {
948 masterServerName = masterServices.getServerName();
949 this.services = masterServices;
950 this.regionFinder.setServices(masterServices);
951 }
952
953 public void setRackManager(RackManager rackManager) {
954 this.rackManager = rackManager;
955 }
956
957 protected boolean needsBalance(Cluster c) {
958 ClusterLoadState cs = new ClusterLoadState(c.clusterState);
959 if (cs.getNumServers() < MIN_SERVER_BALANCE) {
960 if (LOG.isDebugEnabled()) {
961 LOG.debug("Not running balancer because only " + cs.getNumServers()
962 + " active regionserver(s)");
963 }
964 return false;
965 }
966 if(areSomeRegionReplicasColocated(c)) return true;
967
968
969 float average = cs.getLoadAverage();
970 int floor = (int) Math.floor(average * (1 - slop));
971 int ceiling = (int) Math.ceil(average * (1 + slop));
972 if (!(cs.getMaxLoad() > ceiling || cs.getMinLoad() < floor)) {
973 NavigableMap<ServerAndLoad, List<HRegionInfo>> serversByLoad = cs.getServersByLoad();
974 if (LOG.isTraceEnabled()) {
975
976 LOG.trace("Skipping load balancing because balanced cluster; " +
977 "servers=" + cs.getNumServers() +
978 " regions=" + cs.getNumRegions() + " average=" + average +
979 " mostloaded=" + serversByLoad.lastKey().getLoad() +
980 " leastloaded=" + serversByLoad.firstKey().getLoad());
981 }
982 return false;
983 }
984 return true;
985 }
986
987
988
989
990
991
992
993
994 protected boolean areSomeRegionReplicasColocated(Cluster c) {
995 return false;
996 }
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015 @Override
1016 public Map<ServerName, List<HRegionInfo>> roundRobinAssignment(List<HRegionInfo> regions,
1017 List<ServerName> servers) {
1018 metricsBalancer.incrMiscInvocations();
1019 Map<ServerName, List<HRegionInfo>> assignments = assignMasterRegions(regions, servers);
1020 if (assignments != null && !assignments.isEmpty()) {
1021 servers = new ArrayList<ServerName>(servers);
1022
1023 servers.remove(masterServerName);
1024 List<HRegionInfo> masterRegions = assignments.get(masterServerName);
1025 if (!masterRegions.isEmpty()) {
1026 regions = new ArrayList<HRegionInfo>(regions);
1027 for (HRegionInfo region: masterRegions) {
1028 regions.remove(region);
1029 }
1030 }
1031 }
1032 if (regions == null || regions.isEmpty()) {
1033 return assignments;
1034 }
1035
1036 int numServers = servers == null ? 0 : servers.size();
1037 if (numServers == 0) {
1038 LOG.warn("Wanted to do round robin assignment but no servers to assign to");
1039 return null;
1040 }
1041
1042
1043
1044
1045
1046
1047 if (numServers == 1) {
1048 ServerName server = servers.get(0);
1049 assignments.put(server, new ArrayList<HRegionInfo>(regions));
1050 return assignments;
1051 }
1052
1053 Cluster cluster = createCluster(servers, regions);
1054 List<HRegionInfo> unassignedRegions = new ArrayList<HRegionInfo>();
1055
1056 roundRobinAssignment(cluster, regions, unassignedRegions,
1057 servers, assignments);
1058
1059 List<HRegionInfo> lastFewRegions = new ArrayList<HRegionInfo>();
1060
1061 int serverIdx = RANDOM.nextInt(numServers);
1062 for (HRegionInfo region : unassignedRegions) {
1063 boolean assigned = false;
1064 for (int j = 0; j < numServers; j++) {
1065 ServerName serverName = servers.get((j + serverIdx) % numServers);
1066 if (!cluster.wouldLowerAvailability(region, serverName)) {
1067 List<HRegionInfo> serverRegions = assignments.get(serverName);
1068 if (serverRegions == null) {
1069 serverRegions = new ArrayList<HRegionInfo>();
1070 assignments.put(serverName, serverRegions);
1071 }
1072 serverRegions.add(region);
1073 cluster.doAssignRegion(region, serverName);
1074 serverIdx = (j + serverIdx + 1) % numServers;
1075 assigned = true;
1076 break;
1077 }
1078 }
1079 if (!assigned) {
1080 lastFewRegions.add(region);
1081 }
1082 }
1083
1084
1085 for (HRegionInfo region : lastFewRegions) {
1086 int i = RANDOM.nextInt(numServers);
1087 ServerName server = servers.get(i);
1088 List<HRegionInfo> serverRegions = assignments.get(server);
1089 if (serverRegions == null) {
1090 serverRegions = new ArrayList<HRegionInfo>();
1091 assignments.put(server, serverRegions);
1092 }
1093 serverRegions.add(region);
1094 cluster.doAssignRegion(region, server);
1095 }
1096 return assignments;
1097 }
1098
1099 protected Cluster createCluster(List<ServerName> servers,
1100 Collection<HRegionInfo> regions) {
1101
1102
1103
1104
1105 Map<ServerName, List<HRegionInfo>> clusterState = getRegionAssignmentsByServer(regions);
1106
1107 for (ServerName server : servers) {
1108 if (!clusterState.containsKey(server)) {
1109 clusterState.put(server, EMPTY_REGION_LIST);
1110 }
1111 }
1112 return new Cluster(regions, clusterState, null, this.regionFinder,
1113 rackManager);
1114 }
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133 @Override
1134 public Map<HRegionInfo, ServerName> immediateAssignment(List<HRegionInfo> regions,
1135 List<ServerName> servers) {
1136 metricsBalancer.incrMiscInvocations();
1137 if (servers == null || servers.isEmpty()) {
1138 LOG.warn("Wanted to do random assignment but no servers to assign to");
1139 return null;
1140 }
1141
1142 Map<HRegionInfo, ServerName> assignments = new TreeMap<HRegionInfo, ServerName>();
1143 for (HRegionInfo region : regions) {
1144 assignments.put(region, randomAssignment(region, servers));
1145 }
1146 return assignments;
1147 }
1148
1149
1150
1151
1152 @Override
1153 public ServerName randomAssignment(HRegionInfo regionInfo, List<ServerName> servers) {
1154 metricsBalancer.incrMiscInvocations();
1155 if (servers != null && servers.contains(masterServerName)) {
1156 if (shouldBeOnMaster(regionInfo)) {
1157 return masterServerName;
1158 }
1159 servers = new ArrayList<ServerName>(servers);
1160
1161 servers.remove(masterServerName);
1162 }
1163
1164 int numServers = servers == null ? 0 : servers.size();
1165 if (numServers == 0) {
1166 LOG.warn("Wanted to do retain assignment but no servers to assign to");
1167 return null;
1168 }
1169 if (numServers == 1) {
1170 return servers.get(0);
1171 }
1172
1173 List<HRegionInfo> regions = Lists.newArrayList(regionInfo);
1174 Cluster cluster = createCluster(servers, regions);
1175 return randomAssignment(cluster, regionInfo, servers);
1176 }
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195 @Override
1196 public Map<ServerName, List<HRegionInfo>> retainAssignment(Map<HRegionInfo, ServerName> regions,
1197 List<ServerName> servers) {
1198
1199 metricsBalancer.incrMiscInvocations();
1200 Map<ServerName, List<HRegionInfo>> assignments
1201 = assignMasterRegions(regions.keySet(), servers);
1202 if (assignments != null && !assignments.isEmpty()) {
1203 servers = new ArrayList<ServerName>(servers);
1204
1205 servers.remove(masterServerName);
1206 List<HRegionInfo> masterRegions = assignments.get(masterServerName);
1207 if (!masterRegions.isEmpty()) {
1208 regions = new HashMap<HRegionInfo, ServerName>(regions);
1209 for (HRegionInfo region: masterRegions) {
1210 regions.remove(region);
1211 }
1212 }
1213 }
1214 if (regions == null || regions.isEmpty()) {
1215 return assignments;
1216 }
1217
1218 int numServers = servers == null ? 0 : servers.size();
1219 if (numServers == 0) {
1220 LOG.warn("Wanted to do retain assignment but no servers to assign to");
1221 return null;
1222 }
1223 if (numServers == 1) {
1224 ServerName server = servers.get(0);
1225 assignments.put(server, new ArrayList<HRegionInfo>(regions.keySet()));
1226 return assignments;
1227 }
1228
1229
1230
1231
1232
1233
1234
1235 ArrayListMultimap<String, ServerName> serversByHostname = ArrayListMultimap.create();
1236 for (ServerName server : servers) {
1237 assignments.put(server, new ArrayList<HRegionInfo>());
1238 serversByHostname.put(server.getHostname(), server);
1239 }
1240
1241
1242
1243
1244 Set<String> oldHostsNoLongerPresent = Sets.newTreeSet();
1245
1246 int numRandomAssignments = 0;
1247 int numRetainedAssigments = 0;
1248
1249 Cluster cluster = createCluster(servers, regions.keySet());
1250
1251 for (Map.Entry<HRegionInfo, ServerName> entry : regions.entrySet()) {
1252 HRegionInfo region = entry.getKey();
1253 ServerName oldServerName = entry.getValue();
1254 List<ServerName> localServers = new ArrayList<ServerName>();
1255 if (oldServerName != null) {
1256 localServers = serversByHostname.get(oldServerName.getHostname());
1257 }
1258 if (localServers.isEmpty()) {
1259
1260
1261 ServerName randomServer = randomAssignment(cluster, region, servers);
1262 assignments.get(randomServer).add(region);
1263 numRandomAssignments++;
1264 if (oldServerName != null) oldHostsNoLongerPresent.add(oldServerName.getHostname());
1265 } else if (localServers.size() == 1) {
1266
1267 ServerName target = localServers.get(0);
1268 assignments.get(target).add(region);
1269 cluster.doAssignRegion(region, target);
1270 numRetainedAssigments++;
1271 } else {
1272
1273 if (localServers.contains(oldServerName)) {
1274 assignments.get(oldServerName).add(region);
1275 cluster.doAssignRegion(region, oldServerName);
1276 } else {
1277 ServerName target = null;
1278 for (ServerName tmp: localServers) {
1279 if (tmp.getPort() == oldServerName.getPort()) {
1280 target = tmp;
1281 break;
1282 }
1283 }
1284 if (target == null) {
1285 target = randomAssignment(cluster, region, localServers);
1286 }
1287 assignments.get(target).add(region);
1288 }
1289 numRetainedAssigments++;
1290 }
1291 }
1292
1293 String randomAssignMsg = "";
1294 if (numRandomAssignments > 0) {
1295 randomAssignMsg =
1296 numRandomAssignments + " regions were assigned "
1297 + "to random hosts, since the old hosts for these regions are no "
1298 + "longer present in the cluster. These hosts were:\n "
1299 + Joiner.on("\n ").join(oldHostsNoLongerPresent);
1300 }
1301
1302 LOG.info("Reassigned " + regions.size() + " regions. " + numRetainedAssigments
1303 + " retained the pre-restart assignment. " + randomAssignMsg);
1304 return assignments;
1305 }
1306
1307 @Override
1308 public void initialize() throws HBaseIOException{
1309 }
1310
1311 @Override
1312 public void regionOnline(HRegionInfo regionInfo, ServerName sn) {
1313 }
1314
1315 @Override
1316 public void regionOffline(HRegionInfo regionInfo) {
1317 }
1318
1319 @Override
1320 public boolean isStopped() {
1321 return stopped;
1322 }
1323
1324 @Override
1325 public void stop(String why) {
1326 LOG.info("Load Balancer stop requested: "+why);
1327 stopped = true;
1328 }
1329
1330
1331
1332
1333 private ServerName randomAssignment(Cluster cluster, HRegionInfo regionInfo,
1334 List<ServerName> servers) {
1335 int numServers = servers.size();
1336 ServerName sn = null;
1337 final int maxIterations = numServers * 4;
1338 int iterations = 0;
1339
1340 do {
1341 int i = RANDOM.nextInt(numServers);
1342 sn = servers.get(i);
1343 } while (cluster.wouldLowerAvailability(regionInfo, sn)
1344 && iterations++ < maxIterations);
1345 cluster.doAssignRegion(regionInfo, sn);
1346 return sn;
1347 }
1348
1349
1350
1351
1352 private void roundRobinAssignment(Cluster cluster, List<HRegionInfo> regions,
1353 List<HRegionInfo> unassignedRegions, List<ServerName> servers,
1354 Map<ServerName, List<HRegionInfo>> assignments) {
1355
1356 int numServers = servers.size();
1357 int numRegions = regions.size();
1358 int max = (int) Math.ceil((float) numRegions / numServers);
1359 int serverIdx = 0;
1360 if (numServers > 1) {
1361 serverIdx = RANDOM.nextInt(numServers);
1362 }
1363 int regionIdx = 0;
1364
1365 for (int j = 0; j < numServers; j++) {
1366 ServerName server = servers.get((j + serverIdx) % numServers);
1367 List<HRegionInfo> serverRegions = new ArrayList<HRegionInfo>(max);
1368 for (int i = regionIdx; i < numRegions; i += numServers) {
1369 HRegionInfo region = regions.get(i % numRegions);
1370 if (cluster.wouldLowerAvailability(region, server)) {
1371 unassignedRegions.add(region);
1372 } else {
1373 serverRegions.add(region);
1374 cluster.doAssignRegion(region, server);
1375 }
1376 }
1377 assignments.put(server, serverRegions);
1378 regionIdx++;
1379 }
1380 }
1381
1382 protected Map<ServerName, List<HRegionInfo>> getRegionAssignmentsByServer(
1383 Collection<HRegionInfo> regions) {
1384 if (this.services != null && this.services.getAssignmentManager() != null) {
1385 return this.services.getAssignmentManager().getSnapShotOfAssignment(regions);
1386 } else {
1387 return new HashMap<ServerName, List<HRegionInfo>>();
1388 }
1389 }
1390
1391 @Override
1392 public void onConfigurationChange(Configuration conf) {
1393 }
1394 }