001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019 package org.apache.hadoop.yarn.applications.distributedshell; 020 021 import java.io.BufferedReader; 022 import java.io.IOException; 023 import java.io.InputStreamReader; 024 import java.net.InetSocketAddress; 025 import java.net.URI; 026 import java.net.URISyntaxException; 027 import java.util.ArrayList; 028 import java.util.HashMap; 029 import java.util.List; 030 import java.util.Map; 031 import java.util.Vector; 032 import java.util.concurrent.atomic.AtomicInteger; 033 034 import org.apache.commons.cli.CommandLine; 035 import org.apache.commons.cli.GnuParser; 036 import org.apache.commons.cli.HelpFormatter; 037 import org.apache.commons.cli.Options; 038 import org.apache.commons.cli.ParseException; 039 import org.apache.commons.logging.Log; 040 import org.apache.commons.logging.LogFactory; 041 042 import org.apache.hadoop.classification.InterfaceAudience; 043 import org.apache.hadoop.classification.InterfaceStability; 044 import org.apache.hadoop.conf.Configuration; 045 import org.apache.hadoop.net.NetUtils; 046 import org.apache.hadoop.yarn.api.AMRMProtocol; 047 import org.apache.hadoop.yarn.api.ApplicationConstants; 048 import org.apache.hadoop.yarn.api.ContainerManager; 049 050 import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest; 051 import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse; 052 import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest; 053 import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse; 054 import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest; 055 056 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; 057 import org.apache.hadoop.yarn.api.records.Container; 058 import org.apache.hadoop.yarn.api.records.ContainerId; 059 import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; 060 import org.apache.hadoop.yarn.api.records.ContainerState; 061 import org.apache.hadoop.yarn.api.records.ContainerStatus; 062 import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; 063 import org.apache.hadoop.yarn.api.records.LocalResource; 064 import org.apache.hadoop.yarn.api.records.LocalResourceType; 065 import org.apache.hadoop.yarn.api.records.LocalResourceVisibility; 066 import org.apache.hadoop.yarn.api.records.NodeReport; 067 import org.apache.hadoop.yarn.api.records.Priority; 068 import org.apache.hadoop.yarn.api.records.Resource; 069 import org.apache.hadoop.yarn.api.records.ResourceRequest; 070 import org.apache.hadoop.yarn.client.AMRMClient.ContainerRequest; 071 import org.apache.hadoop.yarn.client.AMRMClientAsync; 072 import org.apache.hadoop.yarn.conf.YarnConfiguration; 073 import org.apache.hadoop.yarn.exceptions.YarnRemoteException; 074 import org.apache.hadoop.yarn.ipc.YarnRPC; 075 import org.apache.hadoop.yarn.util.ConverterUtils; 076 import org.apache.hadoop.yarn.util.Records; 077 078 /** 079 * An ApplicationMaster for executing shell commands on a set of launched 080 * containers using the YARN framework. 081 * 082 * <p> 083 * This class is meant to act as an example on how to write yarn-based 084 * application masters. 085 * </p> 086 * 087 * <p> 088 * The ApplicationMaster is started on a container by the 089 * <code>ResourceManager</code>'s launcher. The first thing that the 090 * <code>ApplicationMaster</code> needs to do is to connect and register itself 091 * with the <code>ResourceManager</code>. The registration sets up information 092 * within the <code>ResourceManager</code> regarding what host:port the 093 * ApplicationMaster is listening on to provide any form of functionality to a 094 * client as well as a tracking url that a client can use to keep track of 095 * status/job history if needed. 096 * </p> 097 * 098 * <p> 099 * The <code>ApplicationMaster</code> needs to send a heartbeat to the 100 * <code>ResourceManager</code> at regular intervals to inform the 101 * <code>ResourceManager</code> that it is up and alive. The 102 * {@link AMRMProtocol#allocate} to the <code>ResourceManager</code> from the 103 * <code>ApplicationMaster</code> acts as a heartbeat. 104 * 105 * <p> 106 * For the actual handling of the job, the <code>ApplicationMaster</code> has to 107 * request the <code>ResourceManager</code> via {@link AllocateRequest} for the 108 * required no. of containers using {@link ResourceRequest} with the necessary 109 * resource specifications such as node location, computational 110 * (memory/disk/cpu) resource requirements. The <code>ResourceManager</code> 111 * responds with an {@link AllocateResponse} that informs the 112 * <code>ApplicationMaster</code> of the set of newly allocated containers, 113 * completed containers as well as current state of available resources. 114 * </p> 115 * 116 * <p> 117 * For each allocated container, the <code>ApplicationMaster</code> can then set 118 * up the necessary launch context via {@link ContainerLaunchContext} to specify 119 * the allocated container id, local resources required by the executable, the 120 * environment to be setup for the executable, commands to execute, etc. and 121 * submit a {@link StartContainerRequest} to the {@link ContainerManager} to 122 * launch and execute the defined commands on the given allocated container. 123 * </p> 124 * 125 * <p> 126 * The <code>ApplicationMaster</code> can monitor the launched container by 127 * either querying the <code>ResourceManager</code> using 128 * {@link AMRMProtocol#allocate} to get updates on completed containers or via 129 * the {@link ContainerManager} by querying for the status of the allocated 130 * container's {@link ContainerId}. 131 * 132 * <p> 133 * After the job has been completed, the <code>ApplicationMaster</code> has to 134 * send a {@link FinishApplicationMasterRequest} to the 135 * <code>ResourceManager</code> to inform it that the 136 * <code>ApplicationMaster</code> has been completed. 137 */ 138 @InterfaceAudience.Public 139 @InterfaceStability.Unstable 140 public class ApplicationMaster { 141 142 private static final Log LOG = LogFactory.getLog(ApplicationMaster.class); 143 144 // Configuration 145 private Configuration conf; 146 // YARN RPC to communicate with the Resource Manager or Node Manager 147 private YarnRPC rpc; 148 149 // Handle to communicate with the Resource Manager 150 private AMRMClientAsync resourceManager; 151 152 // Application Attempt Id ( combination of attemptId and fail count ) 153 private ApplicationAttemptId appAttemptID; 154 155 // TODO 156 // For status update for clients - yet to be implemented 157 // Hostname of the container 158 private String appMasterHostname = ""; 159 // Port on which the app master listens for status updates from clients 160 private int appMasterRpcPort = 0; 161 // Tracking url to which app master publishes info for clients to monitor 162 private String appMasterTrackingUrl = ""; 163 164 // App Master configuration 165 // No. of containers to run shell command on 166 private int numTotalContainers = 1; 167 // Memory to request for the container on which the shell command will run 168 private int containerMemory = 10; 169 // Priority of the request 170 private int requestPriority; 171 172 // Counter for completed containers ( complete denotes successful or failed ) 173 private AtomicInteger numCompletedContainers = new AtomicInteger(); 174 // Allocated container count so that we know how many containers has the RM 175 // allocated to us 176 private AtomicInteger numAllocatedContainers = new AtomicInteger(); 177 // Count of failed containers 178 private AtomicInteger numFailedContainers = new AtomicInteger(); 179 // Count of containers already requested from the RM 180 // Needed as once requested, we should not request for containers again. 181 // Only request for more if the original requirement changes. 182 private AtomicInteger numRequestedContainers = new AtomicInteger(); 183 184 // Shell command to be executed 185 private String shellCommand = ""; 186 // Args to be passed to the shell command 187 private String shellArgs = ""; 188 // Env variables to be setup for the shell command 189 private Map<String, String> shellEnv = new HashMap<String, String>(); 190 191 // Location of shell script ( obtained from info set in env ) 192 // Shell script path in fs 193 private String shellScriptPath = ""; 194 // Timestamp needed for creating a local resource 195 private long shellScriptPathTimestamp = 0; 196 // File length needed for local resource 197 private long shellScriptPathLen = 0; 198 199 // Hardcoded path to shell script in launch container's local env 200 private final String ExecShellStringPath = "ExecShellScript.sh"; 201 202 private volatile boolean done; 203 private volatile boolean success; 204 205 // Launch threads 206 private List<Thread> launchThreads = new ArrayList<Thread>(); 207 208 /** 209 * @param args Command line args 210 */ 211 public static void main(String[] args) { 212 boolean result = false; 213 try { 214 ApplicationMaster appMaster = new ApplicationMaster(); 215 LOG.info("Initializing ApplicationMaster"); 216 boolean doRun = appMaster.init(args); 217 if (!doRun) { 218 System.exit(0); 219 } 220 result = appMaster.run(); 221 } catch (Throwable t) { 222 LOG.fatal("Error running ApplicationMaster", t); 223 System.exit(1); 224 } 225 if (result) { 226 LOG.info("Application Master completed successfully. exiting"); 227 System.exit(0); 228 } else { 229 LOG.info("Application Master failed. exiting"); 230 System.exit(2); 231 } 232 } 233 234 /** 235 * Dump out contents of $CWD and the environment to stdout for debugging 236 */ 237 private void dumpOutDebugInfo() { 238 239 LOG.info("Dump debug output"); 240 Map<String, String> envs = System.getenv(); 241 for (Map.Entry<String, String> env : envs.entrySet()) { 242 LOG.info("System env: key=" + env.getKey() + ", val=" + env.getValue()); 243 System.out.println("System env: key=" + env.getKey() + ", val=" 244 + env.getValue()); 245 } 246 247 String cmd = "ls -al"; 248 Runtime run = Runtime.getRuntime(); 249 Process pr = null; 250 try { 251 pr = run.exec(cmd); 252 pr.waitFor(); 253 254 BufferedReader buf = new BufferedReader(new InputStreamReader( 255 pr.getInputStream())); 256 String line = ""; 257 while ((line = buf.readLine()) != null) { 258 LOG.info("System CWD content: " + line); 259 System.out.println("System CWD content: " + line); 260 } 261 buf.close(); 262 } catch (IOException e) { 263 e.printStackTrace(); 264 } catch (InterruptedException e) { 265 e.printStackTrace(); 266 } 267 } 268 269 public ApplicationMaster() throws Exception { 270 // Set up the configuration and RPC 271 conf = new YarnConfiguration(); 272 rpc = YarnRPC.create(conf); 273 } 274 275 /** 276 * Parse command line options 277 * 278 * @param args Command line args 279 * @return Whether init successful and run should be invoked 280 * @throws ParseException 281 * @throws IOException 282 */ 283 public boolean init(String[] args) throws ParseException, IOException { 284 285 Options opts = new Options(); 286 opts.addOption("app_attempt_id", true, 287 "App Attempt ID. Not to be used unless for testing purposes"); 288 opts.addOption("shell_command", true, 289 "Shell command to be executed by the Application Master"); 290 opts.addOption("shell_script", true, 291 "Location of the shell script to be executed"); 292 opts.addOption("shell_args", true, "Command line args for the shell script"); 293 opts.addOption("shell_env", true, 294 "Environment for shell script. Specified as env_key=env_val pairs"); 295 opts.addOption("container_memory", true, 296 "Amount of memory in MB to be requested to run the shell command"); 297 opts.addOption("num_containers", true, 298 "No. of containers on which the shell command needs to be executed"); 299 opts.addOption("priority", true, "Application Priority. Default 0"); 300 opts.addOption("debug", false, "Dump out debug information"); 301 302 opts.addOption("help", false, "Print usage"); 303 CommandLine cliParser = new GnuParser().parse(opts, args); 304 305 if (args.length == 0) { 306 printUsage(opts); 307 throw new IllegalArgumentException( 308 "No args specified for application master to initialize"); 309 } 310 311 if (cliParser.hasOption("help")) { 312 printUsage(opts); 313 return false; 314 } 315 316 if (cliParser.hasOption("debug")) { 317 dumpOutDebugInfo(); 318 } 319 320 Map<String, String> envs = System.getenv(); 321 322 if (envs.containsKey(ApplicationConstants.AM_APP_ATTEMPT_ID_ENV)) { 323 appAttemptID = ConverterUtils.toApplicationAttemptId(envs 324 .get(ApplicationConstants.AM_APP_ATTEMPT_ID_ENV)); 325 } else if (!envs.containsKey(ApplicationConstants.AM_CONTAINER_ID_ENV)) { 326 if (cliParser.hasOption("app_attempt_id")) { 327 String appIdStr = cliParser.getOptionValue("app_attempt_id", ""); 328 appAttemptID = ConverterUtils.toApplicationAttemptId(appIdStr); 329 } else { 330 throw new IllegalArgumentException( 331 "Application Attempt Id not set in the environment"); 332 } 333 } else { 334 ContainerId containerId = ConverterUtils.toContainerId(envs 335 .get(ApplicationConstants.AM_CONTAINER_ID_ENV)); 336 appAttemptID = containerId.getApplicationAttemptId(); 337 } 338 339 LOG.info("Application master for app" + ", appId=" 340 + appAttemptID.getApplicationId().getId() + ", clustertimestamp=" 341 + appAttemptID.getApplicationId().getClusterTimestamp() 342 + ", attemptId=" + appAttemptID.getAttemptId()); 343 344 if (!cliParser.hasOption("shell_command")) { 345 throw new IllegalArgumentException( 346 "No shell command specified to be executed by application master"); 347 } 348 shellCommand = cliParser.getOptionValue("shell_command"); 349 350 if (cliParser.hasOption("shell_args")) { 351 shellArgs = cliParser.getOptionValue("shell_args"); 352 } 353 if (cliParser.hasOption("shell_env")) { 354 String shellEnvs[] = cliParser.getOptionValues("shell_env"); 355 for (String env : shellEnvs) { 356 env = env.trim(); 357 int index = env.indexOf('='); 358 if (index == -1) { 359 shellEnv.put(env, ""); 360 continue; 361 } 362 String key = env.substring(0, index); 363 String val = ""; 364 if (index < (env.length() - 1)) { 365 val = env.substring(index + 1); 366 } 367 shellEnv.put(key, val); 368 } 369 } 370 371 if (envs.containsKey(DSConstants.DISTRIBUTEDSHELLSCRIPTLOCATION)) { 372 shellScriptPath = envs.get(DSConstants.DISTRIBUTEDSHELLSCRIPTLOCATION); 373 374 if (envs.containsKey(DSConstants.DISTRIBUTEDSHELLSCRIPTTIMESTAMP)) { 375 shellScriptPathTimestamp = Long.valueOf(envs 376 .get(DSConstants.DISTRIBUTEDSHELLSCRIPTTIMESTAMP)); 377 } 378 if (envs.containsKey(DSConstants.DISTRIBUTEDSHELLSCRIPTLEN)) { 379 shellScriptPathLen = Long.valueOf(envs 380 .get(DSConstants.DISTRIBUTEDSHELLSCRIPTLEN)); 381 } 382 383 if (!shellScriptPath.isEmpty() 384 && (shellScriptPathTimestamp <= 0 || shellScriptPathLen <= 0)) { 385 LOG.error("Illegal values in env for shell script path" + ", path=" 386 + shellScriptPath + ", len=" + shellScriptPathLen + ", timestamp=" 387 + shellScriptPathTimestamp); 388 throw new IllegalArgumentException( 389 "Illegal values in env for shell script path"); 390 } 391 } 392 393 containerMemory = Integer.parseInt(cliParser.getOptionValue( 394 "container_memory", "10")); 395 numTotalContainers = Integer.parseInt(cliParser.getOptionValue( 396 "num_containers", "1")); 397 requestPriority = Integer.parseInt(cliParser 398 .getOptionValue("priority", "0")); 399 400 return true; 401 } 402 403 /** 404 * Helper function to print usage 405 * 406 * @param opts Parsed command line options 407 */ 408 private void printUsage(Options opts) { 409 new HelpFormatter().printHelp("ApplicationMaster", opts); 410 } 411 412 /** 413 * Main run function for the application master 414 * 415 * @throws YarnRemoteException 416 */ 417 public boolean run() throws YarnRemoteException { 418 LOG.info("Starting ApplicationMaster"); 419 420 AMRMClientAsync.CallbackHandler allocListener = new RMCallbackHandler(); 421 422 resourceManager = new AMRMClientAsync(appAttemptID, 1000, allocListener); 423 resourceManager.init(conf); 424 resourceManager.start(); 425 426 // Setup local RPC Server to accept status requests directly from clients 427 // TODO need to setup a protocol for client to be able to communicate to 428 // the RPC server 429 // TODO use the rpc port info to register with the RM for the client to 430 // send requests to this app master 431 432 // Register self with ResourceManager 433 // This will start heartbeating to the RM 434 RegisterApplicationMasterResponse response = resourceManager 435 .registerApplicationMaster(appMasterHostname, appMasterRpcPort, 436 appMasterTrackingUrl); 437 // Dump out information about cluster capability as seen by the 438 // resource manager 439 int minMem = response.getMinimumResourceCapability().getMemory(); 440 int maxMem = response.getMaximumResourceCapability().getMemory(); 441 LOG.info("Min mem capabililty of resources in this cluster " + minMem); 442 LOG.info("Max mem capabililty of resources in this cluster " + maxMem); 443 444 // A resource ask has to be atleast the minimum of the capability of the 445 // cluster, the value has to be a multiple of the min value and cannot 446 // exceed the max. 447 // If it is not an exact multiple of min, the RM will allocate to the 448 // nearest multiple of min 449 if (containerMemory < minMem) { 450 LOG.info("Container memory specified below min threshold of cluster." 451 + " Using min value." + ", specified=" + containerMemory + ", min=" 452 + minMem); 453 containerMemory = minMem; 454 } else if (containerMemory > maxMem) { 455 LOG.info("Container memory specified above max threshold of cluster." 456 + " Using max value." + ", specified=" + containerMemory + ", max=" 457 + maxMem); 458 containerMemory = maxMem; 459 } 460 461 462 // Setup ask for containers from RM 463 // Send request for containers to RM 464 // Until we get our fully allocated quota, we keep on polling RM for 465 // containers 466 // Keep looping until all the containers are launched and shell script 467 // executed on them ( regardless of success/failure). 468 ContainerRequest containerAsk = setupContainerAskForRM(numTotalContainers); 469 resourceManager.addContainerRequest(containerAsk); 470 numRequestedContainers.set(numTotalContainers); 471 472 while (!done) { 473 try { 474 Thread.sleep(200); 475 } catch (InterruptedException ex) {} 476 } 477 finish(); 478 479 return success; 480 } 481 482 private void finish() { 483 // Join all launched threads 484 // needed for when we time out 485 // and we need to release containers 486 for (Thread launchThread : launchThreads) { 487 try { 488 launchThread.join(10000); 489 } catch (InterruptedException e) { 490 LOG.info("Exception thrown in thread join: " + e.getMessage()); 491 e.printStackTrace(); 492 } 493 } 494 495 // When the application completes, it should send a finish application 496 // signal to the RM 497 LOG.info("Application completed. Signalling finish to RM"); 498 499 FinalApplicationStatus appStatus; 500 String appMessage = null; 501 success = true; 502 if (numFailedContainers.get() == 0) { 503 appStatus = FinalApplicationStatus.SUCCEEDED; 504 } else { 505 appStatus = FinalApplicationStatus.FAILED; 506 appMessage = "Diagnostics." + ", total=" + numTotalContainers 507 + ", completed=" + numCompletedContainers.get() + ", allocated=" 508 + numAllocatedContainers.get() + ", failed=" 509 + numFailedContainers.get(); 510 success = false; 511 } 512 try { 513 resourceManager.unregisterApplicationMaster(appStatus, appMessage, null); 514 } catch (YarnRemoteException ex) { 515 LOG.error("Failed to unregister application", ex); 516 } 517 518 done = true; 519 resourceManager.stop(); 520 } 521 522 private class RMCallbackHandler implements AMRMClientAsync.CallbackHandler { 523 @Override 524 public void onContainersCompleted(List<ContainerStatus> completedContainers) { 525 LOG.info("Got response from RM for container ask, completedCnt=" 526 + completedContainers.size()); 527 for (ContainerStatus containerStatus : completedContainers) { 528 LOG.info("Got container status for containerID=" 529 + containerStatus.getContainerId() + ", state=" 530 + containerStatus.getState() + ", exitStatus=" 531 + containerStatus.getExitStatus() + ", diagnostics=" 532 + containerStatus.getDiagnostics()); 533 534 // non complete containers should not be here 535 assert (containerStatus.getState() == ContainerState.COMPLETE); 536 537 // increment counters for completed/failed containers 538 int exitStatus = containerStatus.getExitStatus(); 539 if (0 != exitStatus) { 540 // container failed 541 if (YarnConfiguration.ABORTED_CONTAINER_EXIT_STATUS != exitStatus) { 542 // shell script failed 543 // counts as completed 544 numCompletedContainers.incrementAndGet(); 545 numFailedContainers.incrementAndGet(); 546 } else { 547 // container was killed by framework, possibly preempted 548 // we should re-try as the container was lost for some reason 549 numAllocatedContainers.decrementAndGet(); 550 numRequestedContainers.decrementAndGet(); 551 // we do not need to release the container as it would be done 552 // by the RM 553 } 554 } else { 555 // nothing to do 556 // container completed successfully 557 numCompletedContainers.incrementAndGet(); 558 LOG.info("Container completed successfully." + ", containerId=" 559 + containerStatus.getContainerId()); 560 } 561 } 562 563 // ask for more containers if any failed 564 int askCount = numTotalContainers - numRequestedContainers.get(); 565 numRequestedContainers.addAndGet(askCount); 566 567 if (askCount > 0) { 568 ContainerRequest containerAsk = setupContainerAskForRM(askCount); 569 resourceManager.addContainerRequest(containerAsk); 570 } 571 572 // set progress to deliver to RM on next heartbeat 573 float progress = (float) numCompletedContainers.get() 574 / numTotalContainers; 575 resourceManager.setProgress(progress); 576 577 if (numCompletedContainers.get() == numTotalContainers) { 578 done = true; 579 } 580 } 581 582 @Override 583 public void onContainersAllocated(List<Container> allocatedContainers) { 584 LOG.info("Got response from RM for container ask, allocatedCnt=" 585 + allocatedContainers.size()); 586 numAllocatedContainers.addAndGet(allocatedContainers.size()); 587 for (Container allocatedContainer : allocatedContainers) { 588 LOG.info("Launching shell command on a new container." 589 + ", containerId=" + allocatedContainer.getId() 590 + ", containerNode=" + allocatedContainer.getNodeId().getHost() 591 + ":" + allocatedContainer.getNodeId().getPort() 592 + ", containerNodeURI=" + allocatedContainer.getNodeHttpAddress() 593 + ", containerState" + allocatedContainer.getState() 594 + ", containerResourceMemory" 595 + allocatedContainer.getResource().getMemory()); 596 // + ", containerToken" 597 // +allocatedContainer.getContainerToken().getIdentifier().toString()); 598 599 LaunchContainerRunnable runnableLaunchContainer = new LaunchContainerRunnable( 600 allocatedContainer); 601 Thread launchThread = new Thread(runnableLaunchContainer); 602 603 // launch and start the container on a separate thread to keep 604 // the main thread unblocked 605 // as all containers may not be allocated at one go. 606 launchThreads.add(launchThread); 607 launchThread.start(); 608 } 609 } 610 611 @Override 612 public void onRebootRequest() {} 613 614 @Override 615 public void onNodesUpdated(List<NodeReport> updatedNodes) {} 616 } 617 618 /** 619 * Thread to connect to the {@link ContainerManager} and launch the container 620 * that will execute the shell command. 621 */ 622 private class LaunchContainerRunnable implements Runnable { 623 624 // Allocated container 625 Container container; 626 // Handle to communicate with ContainerManager 627 ContainerManager cm; 628 629 /** 630 * @param lcontainer Allocated container 631 */ 632 public LaunchContainerRunnable(Container lcontainer) { 633 this.container = lcontainer; 634 } 635 636 /** 637 * Helper function to connect to CM 638 */ 639 private void connectToCM() { 640 LOG.debug("Connecting to ContainerManager for containerid=" 641 + container.getId()); 642 String cmIpPortStr = container.getNodeId().getHost() + ":" 643 + container.getNodeId().getPort(); 644 InetSocketAddress cmAddress = NetUtils.createSocketAddr(cmIpPortStr); 645 LOG.info("Connecting to ContainerManager at " + cmIpPortStr); 646 this.cm = ((ContainerManager) rpc.getProxy(ContainerManager.class, 647 cmAddress, conf)); 648 } 649 650 @Override 651 /** 652 * Connects to CM, sets up container launch context 653 * for shell command and eventually dispatches the container 654 * start request to the CM. 655 */ 656 public void run() { 657 // Connect to ContainerManager 658 connectToCM(); 659 660 LOG.info("Setting up container launch container for containerid=" 661 + container.getId()); 662 ContainerLaunchContext ctx = Records 663 .newRecord(ContainerLaunchContext.class); 664 665 ctx.setContainerId(container.getId()); 666 ctx.setResource(container.getResource()); 667 668 String jobUserName = System.getenv(ApplicationConstants.Environment.USER 669 .name()); 670 ctx.setUser(jobUserName); 671 LOG.info("Setting user in ContainerLaunchContext to: " + jobUserName); 672 673 // Set the environment 674 ctx.setEnvironment(shellEnv); 675 676 // Set the local resources 677 Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); 678 679 // The container for the eventual shell commands needs its own local 680 // resources too. 681 // In this scenario, if a shell script is specified, we need to have it 682 // copied and made available to the container. 683 if (!shellScriptPath.isEmpty()) { 684 LocalResource shellRsrc = Records.newRecord(LocalResource.class); 685 shellRsrc.setType(LocalResourceType.FILE); 686 shellRsrc.setVisibility(LocalResourceVisibility.APPLICATION); 687 try { 688 shellRsrc.setResource(ConverterUtils.getYarnUrlFromURI(new URI( 689 shellScriptPath))); 690 } catch (URISyntaxException e) { 691 LOG.error("Error when trying to use shell script path specified" 692 + " in env, path=" + shellScriptPath); 693 e.printStackTrace(); 694 695 // A failure scenario on bad input such as invalid shell script path 696 // We know we cannot continue launching the container 697 // so we should release it. 698 // TODO 699 numCompletedContainers.incrementAndGet(); 700 numFailedContainers.incrementAndGet(); 701 return; 702 } 703 shellRsrc.setTimestamp(shellScriptPathTimestamp); 704 shellRsrc.setSize(shellScriptPathLen); 705 localResources.put(ExecShellStringPath, shellRsrc); 706 } 707 ctx.setLocalResources(localResources); 708 709 // Set the necessary command to execute on the allocated container 710 Vector<CharSequence> vargs = new Vector<CharSequence>(5); 711 712 // Set executable command 713 vargs.add(shellCommand); 714 // Set shell script path 715 if (!shellScriptPath.isEmpty()) { 716 vargs.add(ExecShellStringPath); 717 } 718 719 // Set args for the shell command if any 720 vargs.add(shellArgs); 721 // Add log redirect params 722 vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stdout"); 723 vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stderr"); 724 725 // Get final commmand 726 StringBuilder command = new StringBuilder(); 727 for (CharSequence str : vargs) { 728 command.append(str).append(" "); 729 } 730 731 List<String> commands = new ArrayList<String>(); 732 commands.add(command.toString()); 733 ctx.setCommands(commands); 734 735 StartContainerRequest startReq = Records 736 .newRecord(StartContainerRequest.class); 737 startReq.setContainerLaunchContext(ctx); 738 try { 739 cm.startContainer(startReq); 740 } catch (YarnRemoteException e) { 741 LOG.info("Start container failed for :" + ", containerId=" 742 + container.getId()); 743 e.printStackTrace(); 744 // TODO do we need to release this container? 745 } 746 747 // Get container status? 748 // Left commented out as the shell scripts are short lived 749 // and we are relying on the status for completed containers 750 // from RM to detect status 751 752 // GetContainerStatusRequest statusReq = 753 // Records.newRecord(GetContainerStatusRequest.class); 754 // statusReq.setContainerId(container.getId()); 755 // GetContainerStatusResponse statusResp; 756 // try { 757 // statusResp = cm.getContainerStatus(statusReq); 758 // LOG.info("Container Status" 759 // + ", id=" + container.getId() 760 // + ", status=" +statusResp.getStatus()); 761 // } catch (YarnRemoteException e) { 762 // e.printStackTrace(); 763 // } 764 } 765 } 766 767 /** 768 * Setup the request that will be sent to the RM for the container ask. 769 * 770 * @param numContainers Containers to ask for from RM 771 * @return the setup ResourceRequest to be sent to RM 772 */ 773 private ContainerRequest setupContainerAskForRM(int numContainers) { 774 // setup requirements for hosts 775 // using * as any host will do for the distributed shell app 776 // set the priority for the request 777 Priority pri = Records.newRecord(Priority.class); 778 // TODO - what is the range for priority? how to decide? 779 pri.setPriority(requestPriority); 780 781 // Set up resource type requirements 782 // For now, only memory is supported so we set memory requirements 783 Resource capability = Records.newRecord(Resource.class); 784 capability.setMemory(containerMemory); 785 786 ContainerRequest request = new ContainerRequest(capability, null, null, 787 pri, numContainers); 788 LOG.info("Requested container ask: " + request.toString()); 789 return request; 790 } 791 }