1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.master.balancer;
19
20 import java.util.ArrayDeque;
21 import java.util.Arrays;
22 import java.util.Collection;
23 import java.util.Deque;
24 import java.util.HashMap;
25 import java.util.LinkedList;
26 import java.util.List;
27 import java.util.Map;
28 import java.util.Map.Entry;
29 import java.util.Random;
30
31 import org.apache.commons.logging.Log;
32 import org.apache.commons.logging.LogFactory;
33 import org.apache.hadoop.hbase.classification.InterfaceAudience;
34 import org.apache.hadoop.conf.Configuration;
35 import org.apache.hadoop.hbase.ClusterStatus;
36 import org.apache.hadoop.hbase.HBaseInterfaceAudience;
37 import org.apache.hadoop.hbase.HConstants;
38 import org.apache.hadoop.hbase.HRegionInfo;
39 import org.apache.hadoop.hbase.HTableDescriptor;
40 import org.apache.hadoop.hbase.RegionLoad;
41 import org.apache.hadoop.hbase.ServerLoad;
42 import org.apache.hadoop.hbase.ServerName;
43 import org.apache.hadoop.hbase.TableName;
44 import org.apache.hadoop.hbase.master.MasterServices;
45 import org.apache.hadoop.hbase.master.RegionPlan;
46 import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.Action;
47 import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.Action.Type;
48 import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.AssignRegionAction;
49 import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.MoveRegionAction;
50 import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.SwapRegionsAction;
51 import org.apache.hadoop.hbase.util.Bytes;
52 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.CONFIG)
98 public class StochasticLoadBalancer extends BaseLoadBalancer {
99
100 protected static final String STEPS_PER_REGION_KEY =
101 "hbase.master.balancer.stochastic.stepsPerRegion";
102 protected static final String MAX_STEPS_KEY =
103 "hbase.master.balancer.stochastic.maxSteps";
104 protected static final String MAX_RUNNING_TIME_KEY =
105 "hbase.master.balancer.stochastic.maxRunningTime";
106 protected static final String KEEP_REGION_LOADS =
107 "hbase.master.balancer.stochastic.numRegionLoadsToRemember";
108 private static final String TABLE_FUNCTION_SEP = "_";
109
110 private static final Random RANDOM = new Random(System.currentTimeMillis());
111 private static final Log LOG = LogFactory.getLog(StochasticLoadBalancer.class);
112
113 Map<String, Deque<RegionLoad>> loads = new HashMap<String, Deque<RegionLoad>>();
114
115
116 private int maxSteps = 1000000;
117 private int stepsPerRegion = 800;
118 private long maxRunningTime = 30 * 1000 * 1;
119 private int numRegionLoadsToRemember = 15;
120
121 private CandidateGenerator[] candidateGenerators;
122 private CostFromRegionLoadFunction[] regionLoadFunctions;
123 private CostFunction[] costFunctions;
124
125
126 private Double curOverallCost = 0d;
127 private Double[] tempFunctionCosts;
128 private Double[] curFunctionCosts;
129
130
131
132 private LocalityBasedCandidateGenerator localityCandidateGenerator;
133 private LocalityCostFunction localityCost;
134 private RegionReplicaHostCostFunction regionReplicaHostCostFunction;
135 private RegionReplicaRackCostFunction regionReplicaRackCostFunction;
136 private boolean isByTable = false;
137 private TableName tableName = null;
138
139
140
141
142
143 public StochasticLoadBalancer() {
144 super(new MetricsStochasticBalancer());
145 }
146
147 @Override
148 public void onConfigurationChange(Configuration conf) {
149 setConf(conf);
150 }
151
152 @Override
153 public synchronized void setConf(Configuration conf) {
154 super.setConf(conf);
155 LOG.info("loading config");
156
157 maxSteps = conf.getInt(MAX_STEPS_KEY, maxSteps);
158
159 stepsPerRegion = conf.getInt(STEPS_PER_REGION_KEY, stepsPerRegion);
160 maxRunningTime = conf.getLong(MAX_RUNNING_TIME_KEY, maxRunningTime);
161
162 numRegionLoadsToRemember = conf.getInt(KEEP_REGION_LOADS, numRegionLoadsToRemember);
163 isByTable = conf.getBoolean(HConstants.HBASE_MASTER_LOADBALANCE_BYTABLE, isByTable);
164
165 if (localityCandidateGenerator == null) {
166 localityCandidateGenerator = new LocalityBasedCandidateGenerator(services);
167 }
168 localityCost = new LocalityCostFunction(conf, services);
169
170 if (candidateGenerators == null) {
171 candidateGenerators = new CandidateGenerator[] {
172 new RandomCandidateGenerator(),
173 new LoadCandidateGenerator(),
174 localityCandidateGenerator,
175 new RegionReplicaRackCandidateGenerator(),
176 };
177 }
178
179 regionLoadFunctions = new CostFromRegionLoadFunction[] {
180 new ReadRequestCostFunction(conf),
181 new WriteRequestCostFunction(conf),
182 new MemstoreSizeCostFunction(conf),
183 new StoreFileCostFunction(conf)
184 };
185
186 regionReplicaHostCostFunction = new RegionReplicaHostCostFunction(conf);
187 regionReplicaRackCostFunction = new RegionReplicaRackCostFunction(conf);
188
189 costFunctions = new CostFunction[]{
190 new RegionCountSkewCostFunction(conf),
191 new PrimaryRegionCountSkewCostFunction(conf),
192 new MoveCostFunction(conf),
193 localityCost,
194 new TableSkewCostFunction(conf),
195 regionReplicaHostCostFunction,
196 regionReplicaRackCostFunction,
197 regionLoadFunctions[0],
198 regionLoadFunctions[1],
199 regionLoadFunctions[2],
200 regionLoadFunctions[3],
201 };
202
203 curFunctionCosts= new Double[costFunctions.length];
204 tempFunctionCosts= new Double[costFunctions.length];
205
206 }
207
208 @Override
209 protected void setSlop(Configuration conf) {
210 this.slop = conf.getFloat("hbase.regions.slop", 0.001F);
211 }
212
213 @Override
214 public synchronized void setClusterStatus(ClusterStatus st) {
215 super.setClusterStatus(st);
216 updateRegionLoad();
217 for(CostFromRegionLoadFunction cost : regionLoadFunctions) {
218 cost.setClusterStatus(st);
219 }
220
221
222 try {
223
224 int tablesCount = isByTable ? services.getTableDescriptors().getAll().size() : 1;
225 int functionsCount = getCostFunctionNames().length;
226
227 updateMetricsSize(tablesCount * (functionsCount + 1));
228 } catch (Exception e) {
229 LOG.error("failed to get the size of all tables, exception = " + e.getMessage());
230 }
231 }
232
233
234
235
236 public void updateMetricsSize(int size) {
237 if (metricsBalancer instanceof MetricsStochasticBalancer) {
238 ((MetricsStochasticBalancer) metricsBalancer).updateMetricsSize(size);
239 }
240 }
241
242 @Override
243 public synchronized void setMasterServices(MasterServices masterServices) {
244 super.setMasterServices(masterServices);
245 this.localityCost.setServices(masterServices);
246 this.localityCandidateGenerator.setServices(masterServices);
247
248 }
249
250 @Override
251 protected synchronized boolean areSomeRegionReplicasColocated(Cluster c) {
252 regionReplicaHostCostFunction.init(c);
253 if (regionReplicaHostCostFunction.cost() > 0) return true;
254 regionReplicaRackCostFunction.init(c);
255 if (regionReplicaRackCostFunction.cost() > 0) return true;
256 return false;
257 }
258
259 @Override
260 public synchronized List<RegionPlan> balanceCluster(TableName tableName, Map<ServerName,
261 List<HRegionInfo>> clusterState) {
262 this.tableName = tableName;
263 return balanceCluster(clusterState);
264 }
265
266
267
268
269
270 @Override
271 public synchronized List<RegionPlan> balanceCluster(Map<ServerName,
272 List<HRegionInfo>> clusterState) {
273 List<RegionPlan> plans = balanceMasterRegions(clusterState);
274 if (plans != null || clusterState == null || clusterState.size() <= 1) {
275 return plans;
276 }
277
278 if (masterServerName != null && clusterState.containsKey(masterServerName)) {
279 if (clusterState.size() <= 2) {
280 return null;
281 }
282 clusterState = new HashMap<ServerName, List<HRegionInfo>>(clusterState);
283 clusterState.remove(masterServerName);
284 }
285
286
287
288
289
290 RegionLocationFinder finder = null;
291 if (this.localityCost != null && this.localityCost.getMultiplier() > 0) {
292 finder = this.regionFinder;
293 }
294
295
296
297
298 Cluster cluster = new Cluster(clusterState, loads, finder, rackManager);
299
300 if (!needsBalance(cluster)) {
301 return null;
302 }
303
304 long startTime = EnvironmentEdgeManager.currentTime();
305
306 initCosts(cluster);
307
308 double currentCost = computeCost(cluster, Double.MAX_VALUE);
309 curOverallCost = currentCost;
310 for (int i = 0; i < this.curFunctionCosts.length; i++) {
311 curFunctionCosts[i] = tempFunctionCosts[i];
312 }
313
314 double initCost = currentCost;
315 double newCost = currentCost;
316
317 long computedMaxSteps = Math.min(this.maxSteps,
318 ((long)cluster.numRegions * (long)this.stepsPerRegion * (long)cluster.numServers));
319
320 long step;
321
322 for (step = 0; step < computedMaxSteps; step++) {
323 int generatorIdx = RANDOM.nextInt(candidateGenerators.length);
324 CandidateGenerator p = candidateGenerators[generatorIdx];
325 Cluster.Action action = p.generate(cluster);
326
327 if (action.type == Type.NULL) {
328 continue;
329 }
330
331 cluster.doAction(action);
332 updateCostsWithAction(cluster, action);
333
334 newCost = computeCost(cluster, currentCost);
335
336
337 if (newCost < currentCost) {
338 currentCost = newCost;
339
340
341 curOverallCost = currentCost;
342 for (int i = 0; i < this.curFunctionCosts.length; i++) {
343 curFunctionCosts[i] = tempFunctionCosts[i];
344 }
345 } else {
346
347
348 Action undoAction = action.undoAction();
349 cluster.doAction(undoAction);
350 updateCostsWithAction(cluster, undoAction);
351 }
352
353 if (EnvironmentEdgeManager.currentTime() - startTime >
354 maxRunningTime) {
355 break;
356 }
357 }
358
359 long endTime = EnvironmentEdgeManager.currentTime();
360
361 metricsBalancer.balanceCluster(endTime - startTime);
362
363
364 updateStochasticCosts(tableName, curOverallCost, curFunctionCosts);
365 if (initCost > currentCost) {
366 plans = createRegionPlans(cluster);
367 if (LOG.isDebugEnabled()) {
368 LOG.debug("Finished computing new load balance plan. Computation took "
369 + (endTime - startTime) + "ms to try " + step
370 + " different iterations. Found a solution that moves "
371 + plans.size() + " regions; Going from a computed cost of "
372 + initCost + " to a new cost of " + currentCost);
373 }
374
375 return plans;
376 }
377 if (LOG.isDebugEnabled()) {
378 LOG.debug("Could not find a better load balance plan. Tried "
379 + step + " different configurations in " + (endTime - startTime)
380 + "ms, and did not find anything with a computed cost less than " + initCost);
381 }
382 return null;
383 }
384
385
386
387
388 private void updateStochasticCosts(TableName tableName, Double overall, Double[] subCosts) {
389 if (tableName == null) return;
390
391
392 if (metricsBalancer instanceof MetricsStochasticBalancer) {
393 MetricsStochasticBalancer balancer = (MetricsStochasticBalancer) metricsBalancer;
394
395 balancer.updateStochasticCost(tableName.getNameAsString(),
396 "Overall", "Overall cost", overall);
397
398
399 for (int i = 0; i < costFunctions.length; i++) {
400 CostFunction costFunction = costFunctions[i];
401 String costFunctionName = costFunction.getClass().getSimpleName();
402 Double costPercent = (overall == 0) ? 0 : (subCosts[i] / overall);
403
404 balancer.updateStochasticCost(tableName.getNameAsString(), costFunctionName,
405 "The percent of " + costFunctionName, costPercent);
406 }
407 }
408 }
409
410
411
412
413
414
415
416
417
418 private List<RegionPlan> createRegionPlans(Cluster cluster) {
419 List<RegionPlan> plans = new LinkedList<RegionPlan>();
420 for (int regionIndex = 0;
421 regionIndex < cluster.regionIndexToServerIndex.length; regionIndex++) {
422 int initialServerIndex = cluster.initialRegionIndexToServerIndex[regionIndex];
423 int newServerIndex = cluster.regionIndexToServerIndex[regionIndex];
424
425 if (initialServerIndex != newServerIndex) {
426 HRegionInfo region = cluster.regions[regionIndex];
427 ServerName initialServer = cluster.servers[initialServerIndex];
428 ServerName newServer = cluster.servers[newServerIndex];
429
430 if (LOG.isTraceEnabled()) {
431 LOG.trace("Moving Region " + region.getEncodedName() + " from server "
432 + initialServer.getHostname() + " to " + newServer.getHostname());
433 }
434 RegionPlan rp = new RegionPlan(region, initialServer, newServer);
435 plans.add(rp);
436 }
437 }
438 return plans;
439 }
440
441
442
443
444 private synchronized void updateRegionLoad() {
445
446
447 Map<String, Deque<RegionLoad>> oldLoads = loads;
448 loads = new HashMap<String, Deque<RegionLoad>>();
449
450 for (ServerName sn : clusterStatus.getServers()) {
451 ServerLoad sl = clusterStatus.getLoad(sn);
452 if (sl == null) {
453 continue;
454 }
455 for (Entry<byte[], RegionLoad> entry : sl.getRegionsLoad().entrySet()) {
456 Deque<RegionLoad> rLoads = oldLoads.get(Bytes.toString(entry.getKey()));
457 if (rLoads == null) {
458
459 rLoads = new ArrayDeque<RegionLoad>();
460 } else if (rLoads.size() >= numRegionLoadsToRemember) {
461 rLoads.remove();
462 }
463 rLoads.add(entry.getValue());
464 loads.put(Bytes.toString(entry.getKey()), rLoads);
465
466 }
467 }
468
469 for(CostFromRegionLoadFunction cost : regionLoadFunctions) {
470 cost.setLoads(loads);
471 }
472 }
473
474 protected void initCosts(Cluster cluster) {
475 for (CostFunction c:costFunctions) {
476 c.init(cluster);
477 }
478 }
479
480 protected void updateCostsWithAction(Cluster cluster, Action action) {
481 for (CostFunction c : costFunctions) {
482 c.postAction(action);
483 }
484 }
485
486
487
488
489 public String[] getCostFunctionNames() {
490 if (costFunctions == null) return null;
491 String[] ret = new String[costFunctions.length];
492 for (int i = 0; i < costFunctions.length; i++) {
493 CostFunction c = costFunctions[i];
494 ret[i] = c.getClass().getSimpleName();
495 }
496
497 return ret;
498 }
499
500
501
502
503
504
505
506
507
508
509 protected double computeCost(Cluster cluster, double previousCost) {
510 double total = 0;
511
512 for (int i = 0; i < costFunctions.length; i++) {
513 CostFunction c = costFunctions[i];
514 this.tempFunctionCosts[i] = 0.0;
515
516 if (c.getMultiplier() <= 0) {
517 continue;
518 }
519
520 Float multiplier = c.getMultiplier();
521 Double cost = c.cost();
522
523 this.tempFunctionCosts[i] = multiplier*cost;
524 total += this.tempFunctionCosts[i];
525
526 if (total > previousCost) {
527 break;
528 }
529 }
530
531 return total;
532 }
533
534
535 abstract static class CandidateGenerator {
536 abstract Cluster.Action generate(Cluster cluster);
537
538
539
540
541
542
543
544
545
546
547
548
549
550 protected int pickRandomRegion(Cluster cluster, int server, double chanceOfNoSwap) {
551
552 if (cluster.regionsPerServer[server].length == 0 || RANDOM.nextFloat() < chanceOfNoSwap) {
553
554 return -1;
555 }
556 int rand = RANDOM.nextInt(cluster.regionsPerServer[server].length);
557 return cluster.regionsPerServer[server][rand];
558
559 }
560 protected int pickRandomServer(Cluster cluster) {
561 if (cluster.numServers < 1) {
562 return -1;
563 }
564
565 return RANDOM.nextInt(cluster.numServers);
566 }
567
568 protected int pickRandomRack(Cluster cluster) {
569 if (cluster.numRacks < 1) {
570 return -1;
571 }
572
573 return RANDOM.nextInt(cluster.numRacks);
574 }
575
576 protected int pickOtherRandomServer(Cluster cluster, int serverIndex) {
577 if (cluster.numServers < 2) {
578 return -1;
579 }
580 while (true) {
581 int otherServerIndex = pickRandomServer(cluster);
582 if (otherServerIndex != serverIndex) {
583 return otherServerIndex;
584 }
585 }
586 }
587
588 protected int pickOtherRandomRack(Cluster cluster, int rackIndex) {
589 if (cluster.numRacks < 2) {
590 return -1;
591 }
592 while (true) {
593 int otherRackIndex = pickRandomRack(cluster);
594 if (otherRackIndex != rackIndex) {
595 return otherRackIndex;
596 }
597 }
598 }
599
600 protected Cluster.Action pickRandomRegions(Cluster cluster,
601 int thisServer,
602 int otherServer) {
603 if (thisServer < 0 || otherServer < 0) {
604 return Cluster.NullAction;
605 }
606
607
608 int thisRegionCount = cluster.getNumRegions(thisServer);
609 int otherRegionCount = cluster.getNumRegions(otherServer);
610
611
612 double thisChance = (thisRegionCount > otherRegionCount) ? 0 : 0.5;
613 double otherChance = (thisRegionCount <= otherRegionCount) ? 0 : 0.5;
614
615 int thisRegion = pickRandomRegion(cluster, thisServer, thisChance);
616 int otherRegion = pickRandomRegion(cluster, otherServer, otherChance);
617
618 return getAction(thisServer, thisRegion, otherServer, otherRegion);
619 }
620
621 protected Cluster.Action getAction(int fromServer, int fromRegion,
622 int toServer, int toRegion) {
623 if (fromServer < 0 || toServer < 0) {
624 return Cluster.NullAction;
625 }
626 if (fromRegion > 0 && toRegion > 0) {
627 return new Cluster.SwapRegionsAction(fromServer, fromRegion,
628 toServer, toRegion);
629 } else if (fromRegion > 0) {
630 return new Cluster.MoveRegionAction(fromRegion, fromServer, toServer);
631 } else if (toRegion > 0) {
632 return new Cluster.MoveRegionAction(toRegion, toServer, fromServer);
633 } else {
634 return Cluster.NullAction;
635 }
636 }
637 }
638
639 static class RandomCandidateGenerator extends CandidateGenerator {
640
641 @Override
642 Cluster.Action generate(Cluster cluster) {
643
644 int thisServer = pickRandomServer(cluster);
645
646
647 int otherServer = pickOtherRandomServer(cluster, thisServer);
648
649 return pickRandomRegions(cluster, thisServer, otherServer);
650 }
651 }
652
653 static class LoadCandidateGenerator extends CandidateGenerator {
654
655 @Override
656 Cluster.Action generate(Cluster cluster) {
657 cluster.sortServersByRegionCount();
658 int thisServer = pickMostLoadedServer(cluster, -1);
659 int otherServer = pickLeastLoadedServer(cluster, thisServer);
660
661 return pickRandomRegions(cluster, thisServer, otherServer);
662 }
663
664 private int pickLeastLoadedServer(final Cluster cluster, int thisServer) {
665 Integer[] servers = cluster.serverIndicesSortedByRegionCount;
666
667 int index = 0;
668 while (servers[index] == null || servers[index] == thisServer) {
669 index++;
670 if (index == servers.length) {
671 return -1;
672 }
673 }
674 return servers[index];
675 }
676
677 private int pickMostLoadedServer(final Cluster cluster, int thisServer) {
678 Integer[] servers = cluster.serverIndicesSortedByRegionCount;
679
680 int index = servers.length - 1;
681 while (servers[index] == null || servers[index] == thisServer) {
682 index--;
683 if (index < 0) {
684 return -1;
685 }
686 }
687 return servers[index];
688 }
689 }
690
691 static class LocalityBasedCandidateGenerator extends CandidateGenerator {
692
693 private MasterServices masterServices;
694
695 LocalityBasedCandidateGenerator(MasterServices masterServices) {
696 this.masterServices = masterServices;
697 }
698
699 @Override
700 Cluster.Action generate(Cluster cluster) {
701 if (this.masterServices == null) {
702 return Cluster.NullAction;
703 }
704
705 int thisServer = pickRandomServer(cluster);
706
707
708 int thisRegion = pickRandomRegion(cluster, thisServer, 0.0f);
709
710 if (thisRegion == -1) {
711 return Cluster.NullAction;
712 }
713
714
715 int otherServer = pickHighestLocalityServer(cluster, thisServer, thisRegion);
716
717 if (otherServer == -1) {
718 return Cluster.NullAction;
719 }
720
721
722 int otherRegion = this.pickRandomRegion(cluster, otherServer, 0.5f);
723
724 return getAction(thisServer, thisRegion, otherServer, otherRegion);
725 }
726
727 private int pickHighestLocalityServer(Cluster cluster, int thisServer, int thisRegion) {
728 int[] regionLocations = cluster.regionLocations[thisRegion];
729
730 if (regionLocations == null || regionLocations.length <= 1) {
731 return pickOtherRandomServer(cluster, thisServer);
732 }
733
734 for (int loc : regionLocations) {
735 if (loc >= 0 && loc != thisServer) {
736 return loc;
737 }
738 }
739
740
741 return pickOtherRandomServer(cluster, thisServer);
742 }
743
744 void setServices(MasterServices services) {
745 this.masterServices = services;
746 }
747 }
748
749
750
751
752
753 static class RegionReplicaCandidateGenerator extends CandidateGenerator {
754
755 RandomCandidateGenerator randomGenerator = new RandomCandidateGenerator();
756
757
758
759
760
761
762
763
764
765
766 int selectCoHostedRegionPerGroup(int[] primariesOfRegionsPerGroup, int[] regionsPerGroup
767 , int[] regionIndexToPrimaryIndex) {
768 int currentPrimary = -1;
769 int currentPrimaryIndex = -1;
770 int selectedPrimaryIndex = -1;
771 double currentLargestRandom = -1;
772
773
774
775 for (int j = 0; j <= primariesOfRegionsPerGroup.length; j++) {
776 int primary = j < primariesOfRegionsPerGroup.length
777 ? primariesOfRegionsPerGroup[j] : -1;
778 if (primary != currentPrimary) {
779 int numReplicas = j - currentPrimaryIndex;
780 if (numReplicas > 1) {
781
782 double currentRandom = RANDOM.nextDouble();
783
784
785 if (currentRandom > currentLargestRandom) {
786 selectedPrimaryIndex = currentPrimary;
787 currentLargestRandom = currentRandom;
788 }
789 }
790 currentPrimary = primary;
791 currentPrimaryIndex = j;
792 }
793 }
794
795
796
797 for (int j = 0; j < regionsPerGroup.length; j++) {
798 int regionIndex = regionsPerGroup[j];
799 if (selectedPrimaryIndex == regionIndexToPrimaryIndex[regionIndex]) {
800
801 if (selectedPrimaryIndex != regionIndex) {
802 return regionIndex;
803 }
804 }
805 }
806 return -1;
807 }
808
809 @Override
810 Cluster.Action generate(Cluster cluster) {
811 int serverIndex = pickRandomServer(cluster);
812 if (cluster.numServers <= 1 || serverIndex == -1) {
813 return Cluster.NullAction;
814 }
815
816 int regionIndex = selectCoHostedRegionPerGroup(
817 cluster.primariesOfRegionsPerServer[serverIndex],
818 cluster.regionsPerServer[serverIndex],
819 cluster.regionIndexToPrimaryIndex);
820
821
822 if (regionIndex == -1) {
823
824 return randomGenerator.generate(cluster);
825 }
826
827 int toServerIndex = pickOtherRandomServer(cluster, serverIndex);
828 int toRegionIndex = pickRandomRegion(cluster, toServerIndex, 0.9f);
829 return getAction(serverIndex, regionIndex, toServerIndex, toRegionIndex);
830 }
831 }
832
833
834
835
836
837 static class RegionReplicaRackCandidateGenerator extends RegionReplicaCandidateGenerator {
838 @Override
839 Cluster.Action generate(Cluster cluster) {
840 int rackIndex = pickRandomRack(cluster);
841 if (cluster.numRacks <= 1 || rackIndex == -1) {
842 return super.generate(cluster);
843 }
844
845 int regionIndex = selectCoHostedRegionPerGroup(
846 cluster.primariesOfRegionsPerRack[rackIndex],
847 cluster.regionsPerRack[rackIndex],
848 cluster.regionIndexToPrimaryIndex);
849
850
851 if (regionIndex == -1) {
852
853 return randomGenerator.generate(cluster);
854 }
855
856 int serverIndex = cluster.regionIndexToServerIndex[regionIndex];
857 int toRackIndex = pickOtherRandomRack(cluster, rackIndex);
858
859 int rand = RANDOM.nextInt(cluster.serversPerRack[toRackIndex].length);
860 int toServerIndex = cluster.serversPerRack[toRackIndex][rand];
861 int toRegionIndex = pickRandomRegion(cluster, toServerIndex, 0.9f);
862 return getAction(serverIndex, regionIndex, toServerIndex, toRegionIndex);
863 }
864 }
865
866
867
868
869 abstract static class CostFunction {
870
871 private float multiplier = 0;
872
873 protected Cluster cluster;
874
875 CostFunction(Configuration c) {
876
877 }
878
879 float getMultiplier() {
880 return multiplier;
881 }
882
883 void setMultiplier(float m) {
884 this.multiplier = m;
885 }
886
887
888
889
890 void init(Cluster cluster) {
891 this.cluster = cluster;
892 }
893
894
895
896
897
898 void postAction(Action action) {
899 switch (action.type) {
900 case NULL: break;
901 case ASSIGN_REGION:
902 AssignRegionAction ar = (AssignRegionAction) action;
903 regionMoved(ar.region, -1, ar.server);
904 break;
905 case MOVE_REGION:
906 MoveRegionAction mra = (MoveRegionAction) action;
907 regionMoved(mra.region, mra.fromServer, mra.toServer);
908 break;
909 case SWAP_REGIONS:
910 SwapRegionsAction a = (SwapRegionsAction) action;
911 regionMoved(a.fromRegion, a.fromServer, a.toServer);
912 regionMoved(a.toRegion, a.toServer, a.fromServer);
913 break;
914 default:
915 throw new RuntimeException("Uknown action:" + action.type);
916 }
917 }
918
919 protected void regionMoved(int region, int oldServer, int newServer) {
920 }
921
922 abstract double cost();
923
924
925
926
927
928
929
930
931
932 protected double costFromArray(double[] stats) {
933 double totalCost = 0;
934 double total = getSum(stats);
935
936 double count = stats.length;
937 double mean = total/count;
938
939
940
941 double max = ((count - 1) * mean) + (total - mean);
942
943
944 double min;
945 if (count > total) {
946 min = ((count - total) * mean) + ((1 - mean) * total);
947 } else {
948
949 int numHigh = (int) (total - (Math.floor(mean) * count));
950 int numLow = (int) (count - numHigh);
951
952 min = (numHigh * (Math.ceil(mean) - mean)) + (numLow * (mean - Math.floor(mean)));
953
954 }
955 min = Math.max(0, min);
956 for (int i=0; i<stats.length; i++) {
957 double n = stats[i];
958 double diff = Math.abs(mean - n);
959 totalCost += diff;
960 }
961
962 double scaled = scale(min, max, totalCost);
963 return scaled;
964 }
965
966 private double getSum(double[] stats) {
967 double total = 0;
968 for(double s:stats) {
969 total += s;
970 }
971 return total;
972 }
973
974
975
976
977
978
979
980
981
982 protected double scale(double min, double max, double value) {
983 if (max <= min || value <= min) {
984 return 0;
985 }
986 if ((max - min) == 0) return 0;
987
988 return Math.max(0d, Math.min(1d, (value - min) / (max - min)));
989 }
990 }
991
992
993
994
995
996 static class MoveCostFunction extends CostFunction {
997 private static final String MOVE_COST_KEY = "hbase.master.balancer.stochastic.moveCost";
998 private static final String MAX_MOVES_PERCENT_KEY =
999 "hbase.master.balancer.stochastic.maxMovePercent";
1000 private static final float DEFAULT_MOVE_COST = 100;
1001 private static final int DEFAULT_MAX_MOVES = 600;
1002 private static final float DEFAULT_MAX_MOVE_PERCENT = 0.25f;
1003
1004 private final float maxMovesPercent;
1005
1006 MoveCostFunction(Configuration conf) {
1007 super(conf);
1008
1009
1010
1011 this.setMultiplier(conf.getFloat(MOVE_COST_KEY, DEFAULT_MOVE_COST));
1012
1013 maxMovesPercent = conf.getFloat(MAX_MOVES_PERCENT_KEY, DEFAULT_MAX_MOVE_PERCENT);
1014 }
1015
1016 @Override
1017 double cost() {
1018
1019 int maxMoves = Math.max((int) (cluster.numRegions * maxMovesPercent),
1020 DEFAULT_MAX_MOVES);
1021
1022 double moveCost = cluster.numMovedRegions;
1023
1024
1025
1026 if (moveCost > maxMoves) {
1027 return 1000000;
1028 }
1029
1030 return scale(0, cluster.numRegions, moveCost);
1031 }
1032 }
1033
1034
1035
1036
1037
1038 static class RegionCountSkewCostFunction extends CostFunction {
1039 private static final String REGION_COUNT_SKEW_COST_KEY =
1040 "hbase.master.balancer.stochastic.regionCountCost";
1041 private static final float DEFAULT_REGION_COUNT_SKEW_COST = 500;
1042
1043 private double[] stats = null;
1044
1045 RegionCountSkewCostFunction(Configuration conf) {
1046 super(conf);
1047
1048 this.setMultiplier(conf.getFloat(REGION_COUNT_SKEW_COST_KEY, DEFAULT_REGION_COUNT_SKEW_COST));
1049 }
1050
1051 @Override
1052 double cost() {
1053 if (stats == null || stats.length != cluster.numServers) {
1054 stats = new double[cluster.numServers];
1055 }
1056
1057 for (int i =0; i < cluster.numServers; i++) {
1058 stats[i] = cluster.regionsPerServer[i].length;
1059 }
1060
1061 return costFromArray(stats);
1062 }
1063 }
1064
1065
1066
1067
1068
1069 static class PrimaryRegionCountSkewCostFunction extends CostFunction {
1070 private static final String PRIMARY_REGION_COUNT_SKEW_COST_KEY =
1071 "hbase.master.balancer.stochastic.primaryRegionCountCost";
1072 private static final float DEFAULT_PRIMARY_REGION_COUNT_SKEW_COST = 500;
1073
1074 private double[] stats = null;
1075
1076 PrimaryRegionCountSkewCostFunction(Configuration conf) {
1077 super(conf);
1078
1079 this.setMultiplier(conf.getFloat(PRIMARY_REGION_COUNT_SKEW_COST_KEY,
1080 DEFAULT_PRIMARY_REGION_COUNT_SKEW_COST));
1081 }
1082
1083 @Override
1084 double cost() {
1085 if (!cluster.hasRegionReplicas) {
1086 return 0;
1087 }
1088 if (stats == null || stats.length != cluster.numServers) {
1089 stats = new double[cluster.numServers];
1090 }
1091
1092 for (int i =0; i < cluster.numServers; i++) {
1093 stats[i] = 0;
1094 for (int regionIdx : cluster.regionsPerServer[i]) {
1095 if (regionIdx == cluster.regionIndexToPrimaryIndex[regionIdx]) {
1096 stats[i] ++;
1097 }
1098 }
1099 }
1100
1101 return costFromArray(stats);
1102 }
1103 }
1104
1105
1106
1107
1108
1109 static class TableSkewCostFunction extends CostFunction {
1110
1111 private static final String TABLE_SKEW_COST_KEY =
1112 "hbase.master.balancer.stochastic.tableSkewCost";
1113 private static final float DEFAULT_TABLE_SKEW_COST = 35;
1114
1115 TableSkewCostFunction(Configuration conf) {
1116 super(conf);
1117 this.setMultiplier(conf.getFloat(TABLE_SKEW_COST_KEY, DEFAULT_TABLE_SKEW_COST));
1118 }
1119
1120 @Override
1121 double cost() {
1122 double max = cluster.numRegions;
1123 double min = ((double) cluster.numRegions) / cluster.numServers;
1124 double value = 0;
1125
1126 for (int i = 0; i < cluster.numMaxRegionsPerTable.length; i++) {
1127 value += cluster.numMaxRegionsPerTable[i];
1128 }
1129
1130 return scale(min, max, value);
1131 }
1132 }
1133
1134
1135
1136
1137
1138 static class LocalityCostFunction extends CostFunction {
1139
1140 private static final String LOCALITY_COST_KEY = "hbase.master.balancer.stochastic.localityCost";
1141 private static final float DEFAULT_LOCALITY_COST = 25;
1142
1143 private MasterServices services;
1144
1145 LocalityCostFunction(Configuration conf, MasterServices srv) {
1146 super(conf);
1147 this.setMultiplier(conf.getFloat(LOCALITY_COST_KEY, DEFAULT_LOCALITY_COST));
1148 this.services = srv;
1149 }
1150
1151 void setServices(MasterServices srvc) {
1152 this.services = srvc;
1153 }
1154
1155 @Override
1156 double cost() {
1157 double max = 0;
1158 double cost = 0;
1159
1160
1161 if (this.services == null) {
1162 return cost;
1163 }
1164
1165 for (int i = 0; i < cluster.regionLocations.length; i++) {
1166 max += 1;
1167 int serverIndex = cluster.regionIndexToServerIndex[i];
1168 int[] regionLocations = cluster.regionLocations[i];
1169
1170
1171
1172 if (regionLocations == null) {
1173 continue;
1174 }
1175
1176 int index = -1;
1177 for (int j = 0; j < regionLocations.length; j++) {
1178 if (regionLocations[j] >= 0 && regionLocations[j] == serverIndex) {
1179 index = j;
1180 break;
1181 }
1182 }
1183
1184 if (index < 0) {
1185 if (regionLocations.length > 0) {
1186 cost += 1;
1187 }
1188 } else {
1189 cost += (double) index / (double) regionLocations.length;
1190 }
1191 }
1192 return scale(0, max, cost);
1193 }
1194 }
1195
1196
1197
1198
1199
1200 abstract static class CostFromRegionLoadFunction extends CostFunction {
1201
1202 private ClusterStatus clusterStatus = null;
1203 private Map<String, Deque<RegionLoad>> loads = null;
1204 private double[] stats = null;
1205 CostFromRegionLoadFunction(Configuration conf) {
1206 super(conf);
1207 }
1208
1209 void setClusterStatus(ClusterStatus status) {
1210 this.clusterStatus = status;
1211 }
1212
1213 void setLoads(Map<String, Deque<RegionLoad>> l) {
1214 this.loads = l;
1215 }
1216
1217 @Override
1218 double cost() {
1219 if (clusterStatus == null || loads == null) {
1220 return 0;
1221 }
1222
1223 if (stats == null || stats.length != cluster.numServers) {
1224 stats = new double[cluster.numServers];
1225 }
1226
1227 for (int i =0; i < stats.length; i++) {
1228
1229 long cost = 0;
1230
1231
1232 for(int regionIndex:cluster.regionsPerServer[i]) {
1233 Collection<RegionLoad> regionLoadList = cluster.regionLoads[regionIndex];
1234
1235
1236 if (regionLoadList != null) {
1237 cost += getRegionLoadCost(regionLoadList);
1238 }
1239 }
1240
1241
1242 stats[i] = cost;
1243 }
1244
1245
1246 return costFromArray(stats);
1247 }
1248
1249 protected double getRegionLoadCost(Collection<RegionLoad> regionLoadList) {
1250 double cost = 0;
1251
1252 for (RegionLoad rl : regionLoadList) {
1253 double toAdd = getCostFromRl(rl);
1254
1255 if (cost == 0) {
1256 cost = toAdd;
1257 } else {
1258 cost = (.5 * cost) + (.5 * toAdd);
1259 }
1260 }
1261
1262 return cost;
1263 }
1264
1265 protected abstract double getCostFromRl(RegionLoad rl);
1266 }
1267
1268
1269
1270
1271
1272
1273 static class ReadRequestCostFunction extends CostFromRegionLoadFunction {
1274
1275 private static final String READ_REQUEST_COST_KEY =
1276 "hbase.master.balancer.stochastic.readRequestCost";
1277 private static final float DEFAULT_READ_REQUEST_COST = 5;
1278
1279 ReadRequestCostFunction(Configuration conf) {
1280 super(conf);
1281 this.setMultiplier(conf.getFloat(READ_REQUEST_COST_KEY, DEFAULT_READ_REQUEST_COST));
1282 }
1283
1284
1285 @Override
1286 protected double getCostFromRl(RegionLoad rl) {
1287 return rl.getReadRequestsCount();
1288 }
1289 }
1290
1291
1292
1293
1294
1295 static class WriteRequestCostFunction extends CostFromRegionLoadFunction {
1296
1297 private static final String WRITE_REQUEST_COST_KEY =
1298 "hbase.master.balancer.stochastic.writeRequestCost";
1299 private static final float DEFAULT_WRITE_REQUEST_COST = 5;
1300
1301 WriteRequestCostFunction(Configuration conf) {
1302 super(conf);
1303 this.setMultiplier(conf.getFloat(WRITE_REQUEST_COST_KEY, DEFAULT_WRITE_REQUEST_COST));
1304 }
1305
1306 @Override
1307 protected double getCostFromRl(RegionLoad rl) {
1308 return rl.getWriteRequestsCount();
1309 }
1310 }
1311
1312
1313
1314
1315
1316
1317
1318 static class RegionReplicaHostCostFunction extends CostFunction {
1319 private static final String REGION_REPLICA_HOST_COST_KEY =
1320 "hbase.master.balancer.stochastic.regionReplicaHostCostKey";
1321 private static final float DEFAULT_REGION_REPLICA_HOST_COST_KEY = 100000;
1322
1323 long maxCost = 0;
1324 long[] costsPerGroup;
1325 int[][] primariesOfRegionsPerGroup;
1326
1327 public RegionReplicaHostCostFunction(Configuration conf) {
1328 super(conf);
1329 this.setMultiplier(conf.getFloat(REGION_REPLICA_HOST_COST_KEY,
1330 DEFAULT_REGION_REPLICA_HOST_COST_KEY));
1331 }
1332
1333 @Override
1334 void init(Cluster cluster) {
1335 super.init(cluster);
1336
1337 maxCost = cluster.numHosts > 1 ? getMaxCost(cluster) : 0;
1338 costsPerGroup = new long[cluster.numHosts];
1339 primariesOfRegionsPerGroup = cluster.multiServersPerHost
1340 ? cluster.primariesOfRegionsPerHost
1341 : cluster.primariesOfRegionsPerServer;
1342 for (int i = 0 ; i < primariesOfRegionsPerGroup.length; i++) {
1343 costsPerGroup[i] = costPerGroup(primariesOfRegionsPerGroup[i]);
1344 }
1345 }
1346
1347 long getMaxCost(Cluster cluster) {
1348 if (!cluster.hasRegionReplicas) {
1349 return 0;
1350 }
1351
1352 int[] primariesOfRegions = new int[cluster.numRegions];
1353 System.arraycopy(cluster.regionIndexToPrimaryIndex, 0, primariesOfRegions, 0,
1354 cluster.regions.length);
1355
1356 Arrays.sort(primariesOfRegions);
1357
1358
1359 return costPerGroup(primariesOfRegions);
1360 }
1361
1362 @Override
1363 double cost() {
1364 if (maxCost <= 0) {
1365 return 0;
1366 }
1367
1368 long totalCost = 0;
1369 for (int i = 0 ; i < costsPerGroup.length; i++) {
1370 totalCost += costsPerGroup[i];
1371 }
1372 return scale(0, maxCost, totalCost);
1373 }
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383 protected long costPerGroup(int[] primariesOfRegions) {
1384 long cost = 0;
1385 int currentPrimary = -1;
1386 int currentPrimaryIndex = -1;
1387
1388
1389 for (int j = 0 ; j <= primariesOfRegions.length; j++) {
1390 int primary = j < primariesOfRegions.length ? primariesOfRegions[j] : -1;
1391 if (primary != currentPrimary) {
1392 int numReplicas = j - currentPrimaryIndex;
1393
1394 if (numReplicas > 1) {
1395 cost += (numReplicas - 1) * (numReplicas - 1);
1396 }
1397 currentPrimary = primary;
1398 currentPrimaryIndex = j;
1399 }
1400 }
1401
1402 return cost;
1403 }
1404
1405 @Override
1406 protected void regionMoved(int region, int oldServer, int newServer) {
1407 if (maxCost <= 0) {
1408 return;
1409 }
1410 if (cluster.multiServersPerHost) {
1411 int oldHost = cluster.serverIndexToHostIndex[oldServer];
1412 int newHost = cluster.serverIndexToHostIndex[newServer];
1413 if (newHost != oldHost) {
1414 costsPerGroup[oldHost] = costPerGroup(cluster.primariesOfRegionsPerHost[oldHost]);
1415 costsPerGroup[newHost] = costPerGroup(cluster.primariesOfRegionsPerHost[newHost]);
1416 }
1417 } else {
1418 costsPerGroup[oldServer] = costPerGroup(cluster.primariesOfRegionsPerServer[oldServer]);
1419 costsPerGroup[newServer] = costPerGroup(cluster.primariesOfRegionsPerServer[newServer]);
1420 }
1421 }
1422 }
1423
1424
1425
1426
1427
1428
1429 static class RegionReplicaRackCostFunction extends RegionReplicaHostCostFunction {
1430 private static final String REGION_REPLICA_RACK_COST_KEY =
1431 "hbase.master.balancer.stochastic.regionReplicaRackCostKey";
1432 private static final float DEFAULT_REGION_REPLICA_RACK_COST_KEY = 10000;
1433
1434 public RegionReplicaRackCostFunction(Configuration conf) {
1435 super(conf);
1436 this.setMultiplier(conf.getFloat(REGION_REPLICA_RACK_COST_KEY,
1437 DEFAULT_REGION_REPLICA_RACK_COST_KEY));
1438 }
1439
1440 @Override
1441 void init(Cluster cluster) {
1442 this.cluster = cluster;
1443 if (cluster.numRacks <= 1) {
1444 maxCost = 0;
1445 return;
1446 }
1447
1448 maxCost = getMaxCost(cluster);
1449 costsPerGroup = new long[cluster.numRacks];
1450 for (int i = 0 ; i < cluster.primariesOfRegionsPerRack.length; i++) {
1451 costsPerGroup[i] = costPerGroup(cluster.primariesOfRegionsPerRack[i]);
1452 }
1453 }
1454
1455 @Override
1456 protected void regionMoved(int region, int oldServer, int newServer) {
1457 if (maxCost <= 0) {
1458 return;
1459 }
1460 int oldRack = cluster.serverIndexToRackIndex[oldServer];
1461 int newRack = cluster.serverIndexToRackIndex[newServer];
1462 if (newRack != oldRack) {
1463 costsPerGroup[oldRack] = costPerGroup(cluster.primariesOfRegionsPerRack[oldRack]);
1464 costsPerGroup[newRack] = costPerGroup(cluster.primariesOfRegionsPerRack[newRack]);
1465 }
1466 }
1467 }
1468
1469
1470
1471
1472
1473 static class MemstoreSizeCostFunction extends CostFromRegionLoadFunction {
1474
1475 private static final String MEMSTORE_SIZE_COST_KEY =
1476 "hbase.master.balancer.stochastic.memstoreSizeCost";
1477 private static final float DEFAULT_MEMSTORE_SIZE_COST = 5;
1478
1479 MemstoreSizeCostFunction(Configuration conf) {
1480 super(conf);
1481 this.setMultiplier(conf.getFloat(MEMSTORE_SIZE_COST_KEY, DEFAULT_MEMSTORE_SIZE_COST));
1482 }
1483
1484 @Override
1485 protected double getCostFromRl(RegionLoad rl) {
1486 return rl.getMemStoreSizeMB();
1487 }
1488 }
1489
1490
1491
1492
1493 static class StoreFileCostFunction extends CostFromRegionLoadFunction {
1494
1495 private static final String STOREFILE_SIZE_COST_KEY =
1496 "hbase.master.balancer.stochastic.storefileSizeCost";
1497 private static final float DEFAULT_STOREFILE_SIZE_COST = 5;
1498
1499 StoreFileCostFunction(Configuration conf) {
1500 super(conf);
1501 this.setMultiplier(conf.getFloat(STOREFILE_SIZE_COST_KEY, DEFAULT_STOREFILE_SIZE_COST));
1502 }
1503
1504 @Override
1505 protected double getCostFromRl(RegionLoad rl) {
1506 return rl.getStorefileSizeMB();
1507 }
1508 }
1509
1510
1511
1512
1513 public static String composeAttributeName(String tableName, String costFunctionName) {
1514 return tableName + TABLE_FUNCTION_SEP + costFunctionName;
1515 }
1516 }