001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    
019    package org.apache.hadoop.yarn.applications.distributedshell;
020    
021    import java.io.BufferedReader;
022    import java.io.IOException;
023    import java.io.InputStreamReader;
024    import java.net.InetSocketAddress;
025    import java.net.URI;
026    import java.net.URISyntaxException;
027    import java.util.ArrayList;
028    import java.util.HashMap;
029    import java.util.List;
030    import java.util.Map;
031    import java.util.Vector;
032    import java.util.concurrent.atomic.AtomicInteger;
033    
034    import org.apache.commons.cli.CommandLine;
035    import org.apache.commons.cli.GnuParser;
036    import org.apache.commons.cli.HelpFormatter;
037    import org.apache.commons.cli.Options;
038    import org.apache.commons.cli.ParseException;
039    import org.apache.commons.logging.Log;
040    import org.apache.commons.logging.LogFactory;
041    
042    import org.apache.hadoop.classification.InterfaceAudience;
043    import org.apache.hadoop.classification.InterfaceStability;
044    import org.apache.hadoop.conf.Configuration;
045    import org.apache.hadoop.net.NetUtils;
046    import org.apache.hadoop.yarn.api.AMRMProtocol;
047    import org.apache.hadoop.yarn.api.ApplicationConstants;
048    import org.apache.hadoop.yarn.api.ContainerManager;
049    
050    import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
051    import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
052    import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest;
053    import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
054    import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
055    
056    import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
057    import org.apache.hadoop.yarn.api.records.Container;
058    import org.apache.hadoop.yarn.api.records.ContainerId;
059    import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
060    import org.apache.hadoop.yarn.api.records.ContainerState;
061    import org.apache.hadoop.yarn.api.records.ContainerStatus;
062    import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
063    import org.apache.hadoop.yarn.api.records.LocalResource;
064    import org.apache.hadoop.yarn.api.records.LocalResourceType;
065    import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
066    import org.apache.hadoop.yarn.api.records.NodeReport;
067    import org.apache.hadoop.yarn.api.records.Priority;
068    import org.apache.hadoop.yarn.api.records.Resource;
069    import org.apache.hadoop.yarn.api.records.ResourceRequest;
070    import org.apache.hadoop.yarn.client.AMRMClient.ContainerRequest;
071    import org.apache.hadoop.yarn.client.AMRMClientAsync;
072    import org.apache.hadoop.yarn.conf.YarnConfiguration;
073    import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
074    import org.apache.hadoop.yarn.ipc.YarnRPC;
075    import org.apache.hadoop.yarn.util.ConverterUtils;
076    import org.apache.hadoop.yarn.util.Records;
077    
078    /**
079     * An ApplicationMaster for executing shell commands on a set of launched
080     * containers using the YARN framework.
081     * 
082     * <p>
083     * This class is meant to act as an example on how to write yarn-based
084     * application masters.
085     * </p>
086     * 
087     * <p>
088     * The ApplicationMaster is started on a container by the
089     * <code>ResourceManager</code>'s launcher. The first thing that the
090     * <code>ApplicationMaster</code> needs to do is to connect and register itself
091     * with the <code>ResourceManager</code>. The registration sets up information
092     * within the <code>ResourceManager</code> regarding what host:port the
093     * ApplicationMaster is listening on to provide any form of functionality to a
094     * client as well as a tracking url that a client can use to keep track of
095     * status/job history if needed.
096     * </p>
097     * 
098     * <p>
099     * The <code>ApplicationMaster</code> needs to send a heartbeat to the
100     * <code>ResourceManager</code> at regular intervals to inform the
101     * <code>ResourceManager</code> that it is up and alive. The
102     * {@link AMRMProtocol#allocate} to the <code>ResourceManager</code> from the
103     * <code>ApplicationMaster</code> acts as a heartbeat.
104     * 
105     * <p>
106     * For the actual handling of the job, the <code>ApplicationMaster</code> has to
107     * request the <code>ResourceManager</code> via {@link AllocateRequest} for the
108     * required no. of containers using {@link ResourceRequest} with the necessary
109     * resource specifications such as node location, computational
110     * (memory/disk/cpu) resource requirements. The <code>ResourceManager</code>
111     * responds with an {@link AllocateResponse} that informs the
112     * <code>ApplicationMaster</code> of the set of newly allocated containers,
113     * completed containers as well as current state of available resources.
114     * </p>
115     * 
116     * <p>
117     * For each allocated container, the <code>ApplicationMaster</code> can then set
118     * up the necessary launch context via {@link ContainerLaunchContext} to specify
119     * the allocated container id, local resources required by the executable, the
120     * environment to be setup for the executable, commands to execute, etc. and
121     * submit a {@link StartContainerRequest} to the {@link ContainerManager} to
122     * launch and execute the defined commands on the given allocated container.
123     * </p>
124     * 
125     * <p>
126     * The <code>ApplicationMaster</code> can monitor the launched container by
127     * either querying the <code>ResourceManager</code> using
128     * {@link AMRMProtocol#allocate} to get updates on completed containers or via
129     * the {@link ContainerManager} by querying for the status of the allocated
130     * container's {@link ContainerId}.
131     *
132     * <p>
133     * After the job has been completed, the <code>ApplicationMaster</code> has to
134     * send a {@link FinishApplicationMasterRequest} to the
135     * <code>ResourceManager</code> to inform it that the
136     * <code>ApplicationMaster</code> has been completed.
137     */
138    @InterfaceAudience.Public
139    @InterfaceStability.Unstable
140    public class ApplicationMaster {
141    
142      private static final Log LOG = LogFactory.getLog(ApplicationMaster.class);
143    
144      // Configuration
145      private Configuration conf;
146      // YARN RPC to communicate with the Resource Manager or Node Manager
147      private YarnRPC rpc;
148    
149      // Handle to communicate with the Resource Manager
150      private AMRMClientAsync resourceManager;
151      
152      // Application Attempt Id ( combination of attemptId and fail count )
153      private ApplicationAttemptId appAttemptID;
154    
155      // TODO
156      // For status update for clients - yet to be implemented
157      // Hostname of the container
158      private String appMasterHostname = "";
159      // Port on which the app master listens for status updates from clients
160      private int appMasterRpcPort = 0;
161      // Tracking url to which app master publishes info for clients to monitor
162      private String appMasterTrackingUrl = "";
163    
164      // App Master configuration
165      // No. of containers to run shell command on
166      private int numTotalContainers = 1;
167      // Memory to request for the container on which the shell command will run
168      private int containerMemory = 10;
169      // Priority of the request
170      private int requestPriority;
171    
172      // Counter for completed containers ( complete denotes successful or failed )
173      private AtomicInteger numCompletedContainers = new AtomicInteger();
174      // Allocated container count so that we know how many containers has the RM
175      // allocated to us
176      private AtomicInteger numAllocatedContainers = new AtomicInteger();
177      // Count of failed containers
178      private AtomicInteger numFailedContainers = new AtomicInteger();
179      // Count of containers already requested from the RM
180      // Needed as once requested, we should not request for containers again.
181      // Only request for more if the original requirement changes.
182      private AtomicInteger numRequestedContainers = new AtomicInteger();
183    
184      // Shell command to be executed
185      private String shellCommand = "";
186      // Args to be passed to the shell command
187      private String shellArgs = "";
188      // Env variables to be setup for the shell command
189      private Map<String, String> shellEnv = new HashMap<String, String>();
190    
191      // Location of shell script ( obtained from info set in env )
192      // Shell script path in fs
193      private String shellScriptPath = "";
194      // Timestamp needed for creating a local resource
195      private long shellScriptPathTimestamp = 0;
196      // File length needed for local resource
197      private long shellScriptPathLen = 0;
198    
199      // Hardcoded path to shell script in launch container's local env
200      private final String ExecShellStringPath = "ExecShellScript.sh";
201    
202      private volatile boolean done;
203      private volatile boolean success;
204      
205      // Launch threads
206      private List<Thread> launchThreads = new ArrayList<Thread>();
207    
208      /**
209       * @param args Command line args
210       */
211      public static void main(String[] args) {
212        boolean result = false;
213        try {
214          ApplicationMaster appMaster = new ApplicationMaster();
215          LOG.info("Initializing ApplicationMaster");
216          boolean doRun = appMaster.init(args);
217          if (!doRun) {
218            System.exit(0);
219          }
220          result = appMaster.run();
221        } catch (Throwable t) {
222          LOG.fatal("Error running ApplicationMaster", t);
223          System.exit(1);
224        }
225        if (result) {
226          LOG.info("Application Master completed successfully. exiting");
227          System.exit(0);
228        } else {
229          LOG.info("Application Master failed. exiting");
230          System.exit(2);
231        }
232      }
233    
234      /**
235       * Dump out contents of $CWD and the environment to stdout for debugging
236       */
237      private void dumpOutDebugInfo() {
238    
239        LOG.info("Dump debug output");
240        Map<String, String> envs = System.getenv();
241        for (Map.Entry<String, String> env : envs.entrySet()) {
242          LOG.info("System env: key=" + env.getKey() + ", val=" + env.getValue());
243          System.out.println("System env: key=" + env.getKey() + ", val="
244              + env.getValue());
245        }
246    
247        String cmd = "ls -al";
248        Runtime run = Runtime.getRuntime();
249        Process pr = null;
250        try {
251          pr = run.exec(cmd);
252          pr.waitFor();
253    
254          BufferedReader buf = new BufferedReader(new InputStreamReader(
255              pr.getInputStream()));
256          String line = "";
257          while ((line = buf.readLine()) != null) {
258            LOG.info("System CWD content: " + line);
259            System.out.println("System CWD content: " + line);
260          }
261          buf.close();
262        } catch (IOException e) {
263          e.printStackTrace();
264        } catch (InterruptedException e) {
265          e.printStackTrace();
266        }
267      }
268    
269      public ApplicationMaster() throws Exception {
270        // Set up the configuration and RPC
271        conf = new YarnConfiguration();
272        rpc = YarnRPC.create(conf);
273      }
274    
275      /**
276       * Parse command line options
277       *
278       * @param args Command line args
279       * @return Whether init successful and run should be invoked
280       * @throws ParseException
281       * @throws IOException
282       */
283      public boolean init(String[] args) throws ParseException, IOException {
284    
285        Options opts = new Options();
286        opts.addOption("app_attempt_id", true,
287            "App Attempt ID. Not to be used unless for testing purposes");
288        opts.addOption("shell_command", true,
289            "Shell command to be executed by the Application Master");
290        opts.addOption("shell_script", true,
291            "Location of the shell script to be executed");
292        opts.addOption("shell_args", true, "Command line args for the shell script");
293        opts.addOption("shell_env", true,
294            "Environment for shell script. Specified as env_key=env_val pairs");
295        opts.addOption("container_memory", true,
296            "Amount of memory in MB to be requested to run the shell command");
297        opts.addOption("num_containers", true,
298            "No. of containers on which the shell command needs to be executed");
299        opts.addOption("priority", true, "Application Priority. Default 0");
300        opts.addOption("debug", false, "Dump out debug information");
301    
302        opts.addOption("help", false, "Print usage");
303        CommandLine cliParser = new GnuParser().parse(opts, args);
304    
305        if (args.length == 0) {
306          printUsage(opts);
307          throw new IllegalArgumentException(
308              "No args specified for application master to initialize");
309        }
310    
311        if (cliParser.hasOption("help")) {
312          printUsage(opts);
313          return false;
314        }
315    
316        if (cliParser.hasOption("debug")) {
317          dumpOutDebugInfo();
318        }
319    
320        Map<String, String> envs = System.getenv();
321    
322        if (envs.containsKey(ApplicationConstants.AM_APP_ATTEMPT_ID_ENV)) {
323          appAttemptID = ConverterUtils.toApplicationAttemptId(envs
324              .get(ApplicationConstants.AM_APP_ATTEMPT_ID_ENV));
325        } else if (!envs.containsKey(ApplicationConstants.AM_CONTAINER_ID_ENV)) {
326          if (cliParser.hasOption("app_attempt_id")) {
327            String appIdStr = cliParser.getOptionValue("app_attempt_id", "");
328            appAttemptID = ConverterUtils.toApplicationAttemptId(appIdStr);
329          } else {
330            throw new IllegalArgumentException(
331                "Application Attempt Id not set in the environment");
332          }
333        } else {
334          ContainerId containerId = ConverterUtils.toContainerId(envs
335              .get(ApplicationConstants.AM_CONTAINER_ID_ENV));
336          appAttemptID = containerId.getApplicationAttemptId();
337        }
338    
339        LOG.info("Application master for app" + ", appId="
340            + appAttemptID.getApplicationId().getId() + ", clustertimestamp="
341            + appAttemptID.getApplicationId().getClusterTimestamp()
342            + ", attemptId=" + appAttemptID.getAttemptId());
343    
344        if (!cliParser.hasOption("shell_command")) {
345          throw new IllegalArgumentException(
346              "No shell command specified to be executed by application master");
347        }
348        shellCommand = cliParser.getOptionValue("shell_command");
349    
350        if (cliParser.hasOption("shell_args")) {
351          shellArgs = cliParser.getOptionValue("shell_args");
352        }
353        if (cliParser.hasOption("shell_env")) {
354          String shellEnvs[] = cliParser.getOptionValues("shell_env");
355          for (String env : shellEnvs) {
356            env = env.trim();
357            int index = env.indexOf('=');
358            if (index == -1) {
359              shellEnv.put(env, "");
360              continue;
361            }
362            String key = env.substring(0, index);
363            String val = "";
364            if (index < (env.length() - 1)) {
365              val = env.substring(index + 1);
366            }
367            shellEnv.put(key, val);
368          }
369        }
370    
371        if (envs.containsKey(DSConstants.DISTRIBUTEDSHELLSCRIPTLOCATION)) {
372          shellScriptPath = envs.get(DSConstants.DISTRIBUTEDSHELLSCRIPTLOCATION);
373    
374          if (envs.containsKey(DSConstants.DISTRIBUTEDSHELLSCRIPTTIMESTAMP)) {
375            shellScriptPathTimestamp = Long.valueOf(envs
376                .get(DSConstants.DISTRIBUTEDSHELLSCRIPTTIMESTAMP));
377          }
378          if (envs.containsKey(DSConstants.DISTRIBUTEDSHELLSCRIPTLEN)) {
379            shellScriptPathLen = Long.valueOf(envs
380                .get(DSConstants.DISTRIBUTEDSHELLSCRIPTLEN));
381          }
382    
383          if (!shellScriptPath.isEmpty()
384              && (shellScriptPathTimestamp <= 0 || shellScriptPathLen <= 0)) {
385            LOG.error("Illegal values in env for shell script path" + ", path="
386                + shellScriptPath + ", len=" + shellScriptPathLen + ", timestamp="
387                + shellScriptPathTimestamp);
388            throw new IllegalArgumentException(
389                "Illegal values in env for shell script path");
390          }
391        }
392    
393        containerMemory = Integer.parseInt(cliParser.getOptionValue(
394            "container_memory", "10"));
395        numTotalContainers = Integer.parseInt(cliParser.getOptionValue(
396            "num_containers", "1"));
397        requestPriority = Integer.parseInt(cliParser
398            .getOptionValue("priority", "0"));
399    
400        return true;
401      }
402    
403      /**
404       * Helper function to print usage
405       *
406       * @param opts Parsed command line options
407       */
408      private void printUsage(Options opts) {
409        new HelpFormatter().printHelp("ApplicationMaster", opts);
410      }
411    
412      /**
413       * Main run function for the application master
414       *
415       * @throws YarnRemoteException
416       */
417      public boolean run() throws YarnRemoteException {
418        LOG.info("Starting ApplicationMaster");
419    
420        AMRMClientAsync.CallbackHandler allocListener = new RMCallbackHandler();
421        
422        resourceManager = new AMRMClientAsync(appAttemptID, 1000, allocListener);
423        resourceManager.init(conf);
424        resourceManager.start();
425    
426        // Setup local RPC Server to accept status requests directly from clients
427        // TODO need to setup a protocol for client to be able to communicate to
428        // the RPC server
429        // TODO use the rpc port info to register with the RM for the client to
430        // send requests to this app master
431    
432        // Register self with ResourceManager
433        // This will start heartbeating to the RM
434        RegisterApplicationMasterResponse response = resourceManager
435            .registerApplicationMaster(appMasterHostname, appMasterRpcPort,
436                appMasterTrackingUrl);
437        // Dump out information about cluster capability as seen by the
438        // resource manager
439        int minMem = response.getMinimumResourceCapability().getMemory();
440        int maxMem = response.getMaximumResourceCapability().getMemory();
441        LOG.info("Min mem capabililty of resources in this cluster " + minMem);
442        LOG.info("Max mem capabililty of resources in this cluster " + maxMem);
443    
444        // A resource ask has to be atleast the minimum of the capability of the
445        // cluster, the value has to be a multiple of the min value and cannot
446        // exceed the max.
447        // If it is not an exact multiple of min, the RM will allocate to the
448        // nearest multiple of min
449        if (containerMemory < minMem) {
450          LOG.info("Container memory specified below min threshold of cluster."
451              + " Using min value." + ", specified=" + containerMemory + ", min="
452              + minMem);
453          containerMemory = minMem;
454        } else if (containerMemory > maxMem) {
455          LOG.info("Container memory specified above max threshold of cluster."
456              + " Using max value." + ", specified=" + containerMemory + ", max="
457              + maxMem);
458          containerMemory = maxMem;
459        }
460    
461    
462        // Setup ask for containers from RM
463        // Send request for containers to RM
464        // Until we get our fully allocated quota, we keep on polling RM for
465        // containers
466        // Keep looping until all the containers are launched and shell script
467        // executed on them ( regardless of success/failure).
468        ContainerRequest containerAsk = setupContainerAskForRM(numTotalContainers);
469        resourceManager.addContainerRequest(containerAsk);
470        numRequestedContainers.set(numTotalContainers);
471    
472        while (!done) {
473          try {
474            Thread.sleep(200);
475          } catch (InterruptedException ex) {}
476        }
477        finish();
478        
479        return success;
480      }
481      
482      private void finish() {
483        // Join all launched threads
484        // needed for when we time out
485        // and we need to release containers
486        for (Thread launchThread : launchThreads) {
487          try {
488            launchThread.join(10000);
489          } catch (InterruptedException e) {
490            LOG.info("Exception thrown in thread join: " + e.getMessage());
491            e.printStackTrace();
492          }
493        }
494    
495        // When the application completes, it should send a finish application
496        // signal to the RM
497        LOG.info("Application completed. Signalling finish to RM");
498    
499        FinalApplicationStatus appStatus;
500        String appMessage = null;
501        success = true;
502        if (numFailedContainers.get() == 0) {
503          appStatus = FinalApplicationStatus.SUCCEEDED;
504        } else {
505          appStatus = FinalApplicationStatus.FAILED;
506          appMessage = "Diagnostics." + ", total=" + numTotalContainers
507              + ", completed=" + numCompletedContainers.get() + ", allocated="
508              + numAllocatedContainers.get() + ", failed="
509              + numFailedContainers.get();
510          success = false;
511        }
512        try {
513          resourceManager.unregisterApplicationMaster(appStatus, appMessage, null);
514        } catch (YarnRemoteException ex) {
515          LOG.error("Failed to unregister application", ex);
516        }
517        
518        done = true;
519        resourceManager.stop();
520      }
521      
522      private class RMCallbackHandler implements AMRMClientAsync.CallbackHandler {
523        @Override
524        public void onContainersCompleted(List<ContainerStatus> completedContainers) {
525          LOG.info("Got response from RM for container ask, completedCnt="
526              + completedContainers.size());
527          for (ContainerStatus containerStatus : completedContainers) {
528            LOG.info("Got container status for containerID="
529                + containerStatus.getContainerId() + ", state="
530                + containerStatus.getState() + ", exitStatus="
531                + containerStatus.getExitStatus() + ", diagnostics="
532                + containerStatus.getDiagnostics());
533    
534            // non complete containers should not be here
535            assert (containerStatus.getState() == ContainerState.COMPLETE);
536    
537            // increment counters for completed/failed containers
538            int exitStatus = containerStatus.getExitStatus();
539            if (0 != exitStatus) {
540              // container failed
541              if (YarnConfiguration.ABORTED_CONTAINER_EXIT_STATUS != exitStatus) {
542                // shell script failed
543                // counts as completed
544                numCompletedContainers.incrementAndGet();
545                numFailedContainers.incrementAndGet();
546              } else {
547                // container was killed by framework, possibly preempted
548                // we should re-try as the container was lost for some reason
549                numAllocatedContainers.decrementAndGet();
550                numRequestedContainers.decrementAndGet();
551                // we do not need to release the container as it would be done
552                // by the RM
553              }
554            } else {
555              // nothing to do
556              // container completed successfully
557              numCompletedContainers.incrementAndGet();
558              LOG.info("Container completed successfully." + ", containerId="
559                  + containerStatus.getContainerId());
560            }
561          }
562          
563          // ask for more containers if any failed
564          int askCount = numTotalContainers - numRequestedContainers.get();
565          numRequestedContainers.addAndGet(askCount);
566    
567          if (askCount > 0) {
568            ContainerRequest containerAsk = setupContainerAskForRM(askCount);
569            resourceManager.addContainerRequest(containerAsk);
570          }
571          
572          // set progress to deliver to RM on next heartbeat
573          float progress = (float) numCompletedContainers.get()
574              / numTotalContainers;
575          resourceManager.setProgress(progress);
576          
577          if (numCompletedContainers.get() == numTotalContainers) {
578            done = true;
579          }
580        }
581    
582        @Override
583        public void onContainersAllocated(List<Container> allocatedContainers) {
584          LOG.info("Got response from RM for container ask, allocatedCnt="
585              + allocatedContainers.size());
586          numAllocatedContainers.addAndGet(allocatedContainers.size());
587          for (Container allocatedContainer : allocatedContainers) {
588            LOG.info("Launching shell command on a new container."
589                + ", containerId=" + allocatedContainer.getId()
590                + ", containerNode=" + allocatedContainer.getNodeId().getHost()
591                + ":" + allocatedContainer.getNodeId().getPort()
592                + ", containerNodeURI=" + allocatedContainer.getNodeHttpAddress()
593                + ", containerState" + allocatedContainer.getState()
594                + ", containerResourceMemory"
595                + allocatedContainer.getResource().getMemory());
596            // + ", containerToken"
597            // +allocatedContainer.getContainerToken().getIdentifier().toString());
598    
599            LaunchContainerRunnable runnableLaunchContainer = new LaunchContainerRunnable(
600                allocatedContainer);
601            Thread launchThread = new Thread(runnableLaunchContainer);
602    
603            // launch and start the container on a separate thread to keep
604            // the main thread unblocked
605            // as all containers may not be allocated at one go.
606            launchThreads.add(launchThread);
607            launchThread.start();
608          }
609        }
610    
611        @Override
612        public void onRebootRequest() {}
613    
614        @Override
615        public void onNodesUpdated(List<NodeReport> updatedNodes) {}
616      }
617    
618      /**
619       * Thread to connect to the {@link ContainerManager} and launch the container
620       * that will execute the shell command.
621       */
622      private class LaunchContainerRunnable implements Runnable {
623    
624        // Allocated container
625        Container container;
626        // Handle to communicate with ContainerManager
627        ContainerManager cm;
628    
629        /**
630         * @param lcontainer Allocated container
631         */
632        public LaunchContainerRunnable(Container lcontainer) {
633          this.container = lcontainer;
634        }
635    
636        /**
637         * Helper function to connect to CM
638         */
639        private void connectToCM() {
640          LOG.debug("Connecting to ContainerManager for containerid="
641              + container.getId());
642          String cmIpPortStr = container.getNodeId().getHost() + ":"
643              + container.getNodeId().getPort();
644          InetSocketAddress cmAddress = NetUtils.createSocketAddr(cmIpPortStr);
645          LOG.info("Connecting to ContainerManager at " + cmIpPortStr);
646          this.cm = ((ContainerManager) rpc.getProxy(ContainerManager.class,
647              cmAddress, conf));
648        }
649    
650        @Override
651        /**
652         * Connects to CM, sets up container launch context 
653         * for shell command and eventually dispatches the container 
654         * start request to the CM. 
655         */
656        public void run() {
657          // Connect to ContainerManager
658          connectToCM();
659    
660          LOG.info("Setting up container launch container for containerid="
661              + container.getId());
662          ContainerLaunchContext ctx = Records
663              .newRecord(ContainerLaunchContext.class);
664    
665          ctx.setContainerId(container.getId());
666          ctx.setResource(container.getResource());
667    
668          String jobUserName = System.getenv(ApplicationConstants.Environment.USER
669              .name());
670          ctx.setUser(jobUserName);
671          LOG.info("Setting user in ContainerLaunchContext to: " + jobUserName);
672    
673          // Set the environment
674          ctx.setEnvironment(shellEnv);
675    
676          // Set the local resources
677          Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
678    
679          // The container for the eventual shell commands needs its own local
680          // resources too.
681          // In this scenario, if a shell script is specified, we need to have it
682          // copied and made available to the container.
683          if (!shellScriptPath.isEmpty()) {
684            LocalResource shellRsrc = Records.newRecord(LocalResource.class);
685            shellRsrc.setType(LocalResourceType.FILE);
686            shellRsrc.setVisibility(LocalResourceVisibility.APPLICATION);
687            try {
688              shellRsrc.setResource(ConverterUtils.getYarnUrlFromURI(new URI(
689                  shellScriptPath)));
690            } catch (URISyntaxException e) {
691              LOG.error("Error when trying to use shell script path specified"
692                  + " in env, path=" + shellScriptPath);
693              e.printStackTrace();
694    
695              // A failure scenario on bad input such as invalid shell script path
696              // We know we cannot continue launching the container
697              // so we should release it.
698              // TODO
699              numCompletedContainers.incrementAndGet();
700              numFailedContainers.incrementAndGet();
701              return;
702            }
703            shellRsrc.setTimestamp(shellScriptPathTimestamp);
704            shellRsrc.setSize(shellScriptPathLen);
705            localResources.put(ExecShellStringPath, shellRsrc);
706          }
707          ctx.setLocalResources(localResources);
708    
709          // Set the necessary command to execute on the allocated container
710          Vector<CharSequence> vargs = new Vector<CharSequence>(5);
711    
712          // Set executable command
713          vargs.add(shellCommand);
714          // Set shell script path
715          if (!shellScriptPath.isEmpty()) {
716            vargs.add(ExecShellStringPath);
717          }
718    
719          // Set args for the shell command if any
720          vargs.add(shellArgs);
721          // Add log redirect params
722          vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stdout");
723          vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stderr");
724    
725          // Get final commmand
726          StringBuilder command = new StringBuilder();
727          for (CharSequence str : vargs) {
728            command.append(str).append(" ");
729          }
730    
731          List<String> commands = new ArrayList<String>();
732          commands.add(command.toString());
733          ctx.setCommands(commands);
734    
735          StartContainerRequest startReq = Records
736              .newRecord(StartContainerRequest.class);
737          startReq.setContainerLaunchContext(ctx);
738          try {
739            cm.startContainer(startReq);
740          } catch (YarnRemoteException e) {
741            LOG.info("Start container failed for :" + ", containerId="
742                + container.getId());
743            e.printStackTrace();
744            // TODO do we need to release this container?
745          }
746    
747          // Get container status?
748          // Left commented out as the shell scripts are short lived
749          // and we are relying on the status for completed containers
750          // from RM to detect status
751    
752          // GetContainerStatusRequest statusReq =
753          // Records.newRecord(GetContainerStatusRequest.class);
754          // statusReq.setContainerId(container.getId());
755          // GetContainerStatusResponse statusResp;
756          // try {
757          // statusResp = cm.getContainerStatus(statusReq);
758          // LOG.info("Container Status"
759          // + ", id=" + container.getId()
760          // + ", status=" +statusResp.getStatus());
761          // } catch (YarnRemoteException e) {
762          // e.printStackTrace();
763          // }
764        }
765      }
766    
767      /**
768       * Setup the request that will be sent to the RM for the container ask.
769       *
770       * @param numContainers Containers to ask for from RM
771       * @return the setup ResourceRequest to be sent to RM
772       */
773      private ContainerRequest setupContainerAskForRM(int numContainers) {
774        // setup requirements for hosts
775        // using * as any host will do for the distributed shell app
776        // set the priority for the request
777        Priority pri = Records.newRecord(Priority.class);
778        // TODO - what is the range for priority? how to decide?
779        pri.setPriority(requestPriority);
780    
781        // Set up resource type requirements
782        // For now, only memory is supported so we set memory requirements
783        Resource capability = Records.newRecord(Resource.class);
784        capability.setMemory(containerMemory);
785    
786        ContainerRequest request = new ContainerRequest(capability, null, null,
787            pri, numContainers);
788        LOG.info("Requested container ask: " + request.toString());
789        return request;
790      }
791    }