001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    package org.apache.hadoop.mapred;
019    
020    import java.io.FileNotFoundException;
021    import java.io.IOException;
022    import java.net.InetSocketAddress;
023    import java.net.URL;
024    import java.security.PrivilegedExceptionAction;
025    import java.util.ArrayList;
026    import java.util.Collection;
027    import java.util.List;
028    
029    import org.apache.hadoop.classification.InterfaceAudience;
030    import org.apache.hadoop.classification.InterfaceStability;
031    import org.apache.hadoop.conf.Configuration;
032    import org.apache.hadoop.fs.FileStatus;
033    import org.apache.hadoop.fs.FileSystem;
034    import org.apache.hadoop.fs.Path;
035    import org.apache.hadoop.io.Text;
036    import org.apache.hadoop.mapred.ClusterStatus.BlackListInfo;
037    import org.apache.hadoop.mapreduce.Cluster;
038    import org.apache.hadoop.mapreduce.ClusterMetrics;
039    import org.apache.hadoop.mapreduce.Job;
040    import org.apache.hadoop.mapreduce.QueueInfo;
041    import org.apache.hadoop.mapreduce.TaskTrackerInfo;
042    import org.apache.hadoop.mapreduce.TaskType;
043    import org.apache.hadoop.mapreduce.filecache.DistributedCache;
044    import org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenIdentifier;
045    import org.apache.hadoop.mapreduce.tools.CLI;
046    import org.apache.hadoop.mapreduce.util.ConfigUtil;
047    import org.apache.hadoop.security.UserGroupInformation;
048    import org.apache.hadoop.security.token.SecretManager.InvalidToken;
049    import org.apache.hadoop.security.token.Token;
050    import org.apache.hadoop.security.token.TokenRenewer;
051    import org.apache.hadoop.util.Tool;
052    import org.apache.hadoop.util.ToolRunner;
053    
054    /**
055     * <code>JobClient</code> is the primary interface for the user-job to interact
056     * with the cluster.
057     * 
058     * <code>JobClient</code> provides facilities to submit jobs, track their 
059     * progress, access component-tasks' reports/logs, get the Map-Reduce cluster
060     * status information etc.
061     * 
062     * <p>The job submission process involves:
063     * <ol>
064     *   <li>
065     *   Checking the input and output specifications of the job.
066     *   </li>
067     *   <li>
068     *   Computing the {@link InputSplit}s for the job.
069     *   </li>
070     *   <li>
071     *   Setup the requisite accounting information for the {@link DistributedCache} 
072     *   of the job, if necessary.
073     *   </li>
074     *   <li>
075     *   Copying the job's jar and configuration to the map-reduce system directory 
076     *   on the distributed file-system. 
077     *   </li>
078     *   <li>
079     *   Submitting the job to the cluster and optionally monitoring
080     *   it's status.
081     *   </li>
082     * </ol></p>
083     *  
084     * Normally the user creates the application, describes various facets of the
085     * job via {@link JobConf} and then uses the <code>JobClient</code> to submit 
086     * the job and monitor its progress.
087     * 
088     * <p>Here is an example on how to use <code>JobClient</code>:</p>
089     * <p><blockquote><pre>
090     *     // Create a new JobConf
091     *     JobConf job = new JobConf(new Configuration(), MyJob.class);
092     *     
093     *     // Specify various job-specific parameters     
094     *     job.setJobName("myjob");
095     *     
096     *     job.setInputPath(new Path("in"));
097     *     job.setOutputPath(new Path("out"));
098     *     
099     *     job.setMapperClass(MyJob.MyMapper.class);
100     *     job.setReducerClass(MyJob.MyReducer.class);
101     *
102     *     // Submit the job, then poll for progress until the job is complete
103     *     JobClient.runJob(job);
104     * </pre></blockquote></p>
105     * 
106     * <h4 id="JobControl">Job Control</h4>
107     * 
108     * <p>At times clients would chain map-reduce jobs to accomplish complex tasks 
109     * which cannot be done via a single map-reduce job. This is fairly easy since 
110     * the output of the job, typically, goes to distributed file-system and that 
111     * can be used as the input for the next job.</p>
112     * 
113     * <p>However, this also means that the onus on ensuring jobs are complete 
114     * (success/failure) lies squarely on the clients. In such situations the 
115     * various job-control options are:
116     * <ol>
117     *   <li>
118     *   {@link #runJob(JobConf)} : submits the job and returns only after 
119     *   the job has completed.
120     *   </li>
121     *   <li>
122     *   {@link #submitJob(JobConf)} : only submits the job, then poll the 
123     *   returned handle to the {@link RunningJob} to query status and make 
124     *   scheduling decisions.
125     *   </li>
126     *   <li>
127     *   {@link JobConf#setJobEndNotificationURI(String)} : setup a notification
128     *   on job-completion, thus avoiding polling.
129     *   </li>
130     * </ol></p>
131     * 
132     * @see JobConf
133     * @see ClusterStatus
134     * @see Tool
135     * @see DistributedCache
136     */
137    @InterfaceAudience.Public
138    @InterfaceStability.Stable
139    public class JobClient extends CLI {
140    
141      @InterfaceAudience.Private
142      public static final String MAPREDUCE_CLIENT_RETRY_POLICY_ENABLED_KEY =
143          "mapreduce.jobclient.retry.policy.enabled";
144      @InterfaceAudience.Private
145      public static final boolean MAPREDUCE_CLIENT_RETRY_POLICY_ENABLED_DEFAULT =
146          false;
147      @InterfaceAudience.Private
148      public static final String MAPREDUCE_CLIENT_RETRY_POLICY_SPEC_KEY =
149          "mapreduce.jobclient.retry.policy.spec";
150      @InterfaceAudience.Private
151      public static final String MAPREDUCE_CLIENT_RETRY_POLICY_SPEC_DEFAULT =
152          "10000,6,60000,10"; // t1,n1,t2,n2,...
153    
154      public static enum TaskStatusFilter { NONE, KILLED, FAILED, SUCCEEDED, ALL }
155      private TaskStatusFilter taskOutputFilter = TaskStatusFilter.FAILED; 
156      /* notes that get delegation token was called. Again this is hack for oozie 
157       * to make sure we add history server delegation tokens to the credentials
158       *  for the job. Since the api only allows one delegation token to be returned, 
159       *  we have to add this hack.
160       */
161      private boolean getDelegationTokenCalled = false;
162      /* notes the renewer that will renew the delegation token */
163      private String dtRenewer = null;
164      /* do we need a HS delegation token for this client */
165      static final String HS_DELEGATION_TOKEN_REQUIRED 
166          = "mapreduce.history.server.delegationtoken.required";
167      static final String HS_DELEGATION_TOKEN_RENEWER 
168          = "mapreduce.history.server.delegationtoken.renewer";
169      
170      static{
171        ConfigUtil.loadResources();
172      }
173    
174      /**
175       * A NetworkedJob is an implementation of RunningJob.  It holds
176       * a JobProfile object to provide some info, and interacts with the
177       * remote service to provide certain functionality.
178       */
179      static class NetworkedJob implements RunningJob {
180        Job job;
181        /**
182         * We store a JobProfile and a timestamp for when we last
183         * acquired the job profile.  If the job is null, then we cannot
184         * perform any of the tasks.  The job might be null if the cluster
185         * has completely forgotten about the job.  (eg, 24 hours after the
186         * job completes.)
187         */
188        public NetworkedJob(JobStatus status, Cluster cluster) throws IOException {
189          job = Job.getInstance(cluster, status, new JobConf(status.getJobFile()));
190        }
191    
192        public NetworkedJob(Job job) throws IOException {
193          this.job = job;
194        }
195    
196        public Configuration getConfiguration() {
197          return job.getConfiguration();
198        }
199    
200        /**
201         * An identifier for the job
202         */
203        public JobID getID() {
204          return JobID.downgrade(job.getJobID());
205        }
206        
207        /** @deprecated This method is deprecated and will be removed. Applications should 
208         * rather use {@link #getID()}.*/
209        @Deprecated
210        public String getJobID() {
211          return getID().toString();
212        }
213        
214        /**
215         * The user-specified job name
216         */
217        public String getJobName() {
218          return job.getJobName();
219        }
220    
221        /**
222         * The name of the job file
223         */
224        public String getJobFile() {
225          return job.getJobFile();
226        }
227    
228        /**
229         * A URL where the job's status can be seen
230         */
231        public String getTrackingURL() {
232          return job.getTrackingURL();
233        }
234    
235        /**
236         * A float between 0.0 and 1.0, indicating the % of map work
237         * completed.
238         */
239        public float mapProgress() throws IOException {
240          return job.mapProgress();
241        }
242    
243        /**
244         * A float between 0.0 and 1.0, indicating the % of reduce work
245         * completed.
246         */
247        public float reduceProgress() throws IOException {
248          return job.reduceProgress();
249        }
250    
251        /**
252         * A float between 0.0 and 1.0, indicating the % of cleanup work
253         * completed.
254         */
255        public float cleanupProgress() throws IOException {
256          try {
257            return job.cleanupProgress();
258          } catch (InterruptedException ie) {
259            throw new IOException(ie);
260          }
261        }
262    
263        /**
264         * A float between 0.0 and 1.0, indicating the % of setup work
265         * completed.
266         */
267        public float setupProgress() throws IOException {
268          return job.setupProgress();
269        }
270    
271        /**
272         * Returns immediately whether the whole job is done yet or not.
273         */
274        public synchronized boolean isComplete() throws IOException {
275          return job.isComplete();
276        }
277    
278        /**
279         * True iff job completed successfully.
280         */
281        public synchronized boolean isSuccessful() throws IOException {
282          return job.isSuccessful();
283        }
284    
285        /**
286         * Blocks until the job is finished
287         */
288        public void waitForCompletion() throws IOException {
289          try {
290            job.waitForCompletion(false);
291          } catch (InterruptedException ie) {
292            throw new IOException(ie);
293          } catch (ClassNotFoundException ce) {
294            throw new IOException(ce);
295          }
296        }
297    
298        /**
299         * Tells the service to get the state of the current job.
300         */
301        public synchronized int getJobState() throws IOException {
302          try {
303            return job.getJobState().getValue();
304          } catch (InterruptedException ie) {
305            throw new IOException(ie);
306          }
307        }
308        
309        /**
310         * Tells the service to terminate the current job.
311         */
312        public synchronized void killJob() throws IOException {
313          job.killJob();
314        }
315       
316        
317        /** Set the priority of the job.
318        * @param priority new priority of the job. 
319        */
320        public synchronized void setJobPriority(String priority) 
321                                                    throws IOException {
322          try {
323            job.setPriority(
324              org.apache.hadoop.mapreduce.JobPriority.valueOf(priority));
325          } catch (InterruptedException ie) {
326            throw new IOException(ie);
327          }
328        }
329        
330        /**
331         * Kill indicated task attempt.
332         * @param taskId the id of the task to kill.
333         * @param shouldFail if true the task is failed and added to failed tasks list, otherwise
334         * it is just killed, w/o affecting job failure status.
335         */
336        public synchronized void killTask(TaskAttemptID taskId,
337            boolean shouldFail) throws IOException {
338          if (shouldFail) {
339            job.failTask(taskId);
340          } else {
341            job.killTask(taskId);
342          }
343        }
344    
345        /** @deprecated Applications should rather use {@link #killTask(TaskAttemptID, boolean)}*/
346        @Deprecated
347        public synchronized void killTask(String taskId, boolean shouldFail) throws IOException {
348          killTask(TaskAttemptID.forName(taskId), shouldFail);
349        }
350        
351        /**
352         * Fetch task completion events from cluster for this job. 
353         */
354        public synchronized TaskCompletionEvent[] getTaskCompletionEvents(
355            int startFrom) throws IOException {
356          try {
357            org.apache.hadoop.mapreduce.TaskCompletionEvent[] acls = 
358              job.getTaskCompletionEvents(startFrom, 10);
359            TaskCompletionEvent[] ret = new TaskCompletionEvent[acls.length];
360            for (int i = 0 ; i < acls.length; i++ ) {
361              ret[i] = TaskCompletionEvent.downgrade(acls[i]);
362            }
363            return ret;
364          } catch (InterruptedException ie) {
365            throw new IOException(ie);
366          }
367        }
368    
369        /**
370         * Dump stats to screen
371         */
372        @Override
373        public String toString() {
374          return job.toString();
375        }
376            
377        /**
378         * Returns the counters for this job
379         */
380        public Counters getCounters() throws IOException {
381          Counters result = null;
382          org.apache.hadoop.mapreduce.Counters temp = job.getCounters();
383          if(temp != null) {
384            result = Counters.downgrade(temp);
385          }
386          return result;
387        }
388        
389        @Override
390        public String[] getTaskDiagnostics(TaskAttemptID id) throws IOException {
391          try { 
392            return job.getTaskDiagnostics(id);
393          } catch (InterruptedException ie) {
394            throw new IOException(ie);
395          }
396        }
397    
398        public String getHistoryUrl() throws IOException {
399          try {
400            return job.getHistoryUrl();
401          } catch (InterruptedException ie) {
402            throw new IOException(ie);
403          }
404        }
405    
406        public boolean isRetired() throws IOException {
407          try {
408            return job.isRetired();
409          } catch (InterruptedException ie) {
410            throw new IOException(ie);
411          }
412        }
413        
414        boolean monitorAndPrintJob() throws IOException, InterruptedException {
415          return job.monitorAndPrintJob();
416        }
417        
418        @Override
419        public String getFailureInfo() throws IOException {
420          try {
421            return job.getStatus().getFailureInfo();
422          } catch (InterruptedException ie) {
423            throw new IOException(ie);
424          }
425        }
426    
427        @Override
428        public JobStatus getJobStatus() throws IOException {
429          try {
430            return JobStatus.downgrade(job.getStatus());
431          } catch (InterruptedException ie) {
432            throw new IOException(ie);
433          }
434        }
435      }
436    
437      /**
438       * Ugi of the client. We store this ugi when the client is created and 
439       * then make sure that the same ugi is used to run the various protocols.
440       */
441      UserGroupInformation clientUgi;
442      
443      /**
444       * Create a job client.
445       */
446      public JobClient() {
447      }
448        
449      /**
450       * Build a job client with the given {@link JobConf}, and connect to the 
451       * default cluster
452       * 
453       * @param conf the job configuration.
454       * @throws IOException
455       */
456      public JobClient(JobConf conf) throws IOException {
457        init(conf);
458      }
459    
460      /**
461       * Build a job client with the given {@link Configuration}, 
462       * and connect to the default cluster
463       * 
464       * @param conf the configuration.
465       * @throws IOException
466       */
467      public JobClient(Configuration conf) throws IOException {
468        init(new JobConf(conf));
469      }
470    
471      /**
472       * Connect to the default cluster
473       * @param conf the job configuration.
474       * @throws IOException
475       */
476      public void init(JobConf conf) throws IOException {
477        setConf(conf);
478        cluster = new Cluster(conf);
479        clientUgi = UserGroupInformation.getCurrentUser();
480      }
481    
482      @InterfaceAudience.Private
483      public static class Renewer extends TokenRenewer {
484    
485        @Override
486        public boolean handleKind(Text kind) {
487          return DelegationTokenIdentifier.MAPREDUCE_DELEGATION_KIND.equals(kind);
488        }
489    
490        @SuppressWarnings("unchecked")
491        @Override
492        public long renew(Token<?> token, Configuration conf
493                          ) throws IOException, InterruptedException {
494          return new Cluster(conf).
495            renewDelegationToken((Token<DelegationTokenIdentifier>) token);
496        }
497    
498        @SuppressWarnings("unchecked")
499        @Override
500        public void cancel(Token<?> token, Configuration conf
501                           ) throws IOException, InterruptedException {
502          new Cluster(conf).
503            cancelDelegationToken((Token<DelegationTokenIdentifier>) token);
504        }
505    
506        @Override
507        public boolean isManaged(Token<?> token) throws IOException {
508          return true;
509        }   
510      }
511    
512      /**
513       * Build a job client, connect to the indicated job tracker.
514       * 
515       * @param jobTrackAddr the job tracker to connect to.
516       * @param conf configuration.
517       */
518      public JobClient(InetSocketAddress jobTrackAddr, 
519                       Configuration conf) throws IOException {
520        cluster = new Cluster(jobTrackAddr, conf);
521        clientUgi = UserGroupInformation.getCurrentUser();
522      }
523    
524      /**
525       * Close the <code>JobClient</code>.
526       */
527      public synchronized void close() throws IOException {
528        cluster.close();
529      }
530    
531      /**
532       * Get a filesystem handle.  We need this to prepare jobs
533       * for submission to the MapReduce system.
534       * 
535       * @return the filesystem handle.
536       */
537      public synchronized FileSystem getFs() throws IOException {
538        try { 
539          return cluster.getFileSystem();
540        } catch (InterruptedException ie) {
541          throw new IOException(ie);
542        }
543      }
544      
545      /**
546       * Get a handle to the Cluster
547       */
548      public Cluster getClusterHandle() {
549        return cluster;
550      }
551      
552      /**
553       * Submit a job to the MR system.
554       * 
555       * This returns a handle to the {@link RunningJob} which can be used to track
556       * the running-job.
557       * 
558       * @param jobFile the job configuration.
559       * @return a handle to the {@link RunningJob} which can be used to track the
560       *         running-job.
561       * @throws FileNotFoundException
562       * @throws InvalidJobConfException
563       * @throws IOException
564       */
565      public RunningJob submitJob(String jobFile) throws FileNotFoundException, 
566                                                         InvalidJobConfException, 
567                                                         IOException {
568        // Load in the submitted job details
569        JobConf job = new JobConf(jobFile);
570        return submitJob(job);
571      }
572        
573      /**
574       * Submit a job to the MR system.
575       * This returns a handle to the {@link RunningJob} which can be used to track
576       * the running-job.
577       * 
578       * @param conf the job configuration.
579       * @return a handle to the {@link RunningJob} which can be used to track the
580       *         running-job.
581       * @throws FileNotFoundException
582       * @throws IOException
583       */
584      public RunningJob submitJob(final JobConf conf) throws FileNotFoundException,
585                                                      IOException {
586        return submitJobInternal(conf);
587      }
588    
589      @InterfaceAudience.Private
590      public RunningJob submitJobInternal(final JobConf conf)
591          throws FileNotFoundException, IOException {
592        try {
593          conf.setBooleanIfUnset("mapred.mapper.new-api", false);
594          conf.setBooleanIfUnset("mapred.reducer.new-api", false);
595          if (getDelegationTokenCalled) {
596            conf.setBoolean(HS_DELEGATION_TOKEN_REQUIRED, getDelegationTokenCalled);
597            getDelegationTokenCalled = false;
598            conf.set(HS_DELEGATION_TOKEN_RENEWER, dtRenewer);
599            dtRenewer = null;
600          }
601          Job job = clientUgi.doAs(new PrivilegedExceptionAction<Job> () {
602            @Override
603            public Job run() throws IOException, ClassNotFoundException, 
604              InterruptedException {
605              Job job = Job.getInstance(conf);
606              job.submit();
607              return job;
608            }
609          });
610          // update our Cluster instance with the one created by Job for submission
611          // (we can't pass our Cluster instance to Job, since Job wraps the config
612          // instance, and the two configs would then diverge)
613          cluster = job.getCluster();
614          return new NetworkedJob(job);
615        } catch (InterruptedException ie) {
616          throw new IOException("interrupted", ie);
617        }
618      }
619    
620      private Job getJobUsingCluster(final JobID jobid) throws IOException,
621      InterruptedException {
622        return clientUgi.doAs(new PrivilegedExceptionAction<Job>() {
623          public Job run() throws IOException, InterruptedException  {
624           return cluster.getJob(jobid);
625          }
626        });
627      }
628      /**
629       * Get an {@link RunningJob} object to track an ongoing job.  Returns
630       * null if the id does not correspond to any known job.
631       * 
632       * @param jobid the jobid of the job.
633       * @return the {@link RunningJob} handle to track the job, null if the 
634       *         <code>jobid</code> doesn't correspond to any known job.
635       * @throws IOException
636       */
637      public RunningJob getJob(final JobID jobid) throws IOException {
638        try {
639          
640          Job job = getJobUsingCluster(jobid);
641          if (job != null) {
642            JobStatus status = JobStatus.downgrade(job.getStatus());
643            if (status != null) {
644              return new NetworkedJob(status, cluster);
645            } 
646          }
647        } catch (InterruptedException ie) {
648          throw new IOException(ie);
649        }
650        return null;
651      }
652    
653      /**@deprecated Applications should rather use {@link #getJob(JobID)}. 
654       */
655      @Deprecated
656      public RunningJob getJob(String jobid) throws IOException {
657        return getJob(JobID.forName(jobid));
658      }
659      
660      private static final TaskReport[] EMPTY_TASK_REPORTS = new TaskReport[0];
661      
662      /**
663       * Get the information of the current state of the map tasks of a job.
664       * 
665       * @param jobId the job to query.
666       * @return the list of all of the map tips.
667       * @throws IOException
668       */
669      public TaskReport[] getMapTaskReports(JobID jobId) throws IOException {
670        return getTaskReports(jobId, TaskType.MAP);
671      }
672      
673      private TaskReport[] getTaskReports(final JobID jobId, TaskType type) throws 
674        IOException {
675        try {
676          Job j = getJobUsingCluster(jobId);
677          if(j == null) {
678            return EMPTY_TASK_REPORTS;
679          }
680          return TaskReport.downgradeArray(j.getTaskReports(type));
681        } catch (InterruptedException ie) {
682          throw new IOException(ie);
683        }
684      }
685      
686      /**@deprecated Applications should rather use {@link #getMapTaskReports(JobID)}*/
687      @Deprecated
688      public TaskReport[] getMapTaskReports(String jobId) throws IOException {
689        return getMapTaskReports(JobID.forName(jobId));
690      }
691      
692      /**
693       * Get the information of the current state of the reduce tasks of a job.
694       * 
695       * @param jobId the job to query.
696       * @return the list of all of the reduce tips.
697       * @throws IOException
698       */    
699      public TaskReport[] getReduceTaskReports(JobID jobId) throws IOException {
700        return getTaskReports(jobId, TaskType.REDUCE);
701      }
702    
703      /**
704       * Get the information of the current state of the cleanup tasks of a job.
705       * 
706       * @param jobId the job to query.
707       * @return the list of all of the cleanup tips.
708       * @throws IOException
709       */    
710      public TaskReport[] getCleanupTaskReports(JobID jobId) throws IOException {
711        return getTaskReports(jobId, TaskType.JOB_CLEANUP);
712      }
713    
714      /**
715       * Get the information of the current state of the setup tasks of a job.
716       * 
717       * @param jobId the job to query.
718       * @return the list of all of the setup tips.
719       * @throws IOException
720       */    
721      public TaskReport[] getSetupTaskReports(JobID jobId) throws IOException {
722        return getTaskReports(jobId, TaskType.JOB_SETUP);
723      }
724    
725      
726      /**@deprecated Applications should rather use {@link #getReduceTaskReports(JobID)}*/
727      @Deprecated
728      public TaskReport[] getReduceTaskReports(String jobId) throws IOException {
729        return getReduceTaskReports(JobID.forName(jobId));
730      }
731      
732      /**
733       * Display the information about a job's tasks, of a particular type and
734       * in a particular state
735       * 
736       * @param jobId the ID of the job
737       * @param type the type of the task (map/reduce/setup/cleanup)
738       * @param state the state of the task 
739       * (pending/running/completed/failed/killed)
740       */
741      public void displayTasks(final JobID jobId, String type, String state) 
742      throws IOException {
743        try {
744          Job job = getJobUsingCluster(jobId);
745          super.displayTasks(job, type, state);
746        } catch (InterruptedException ie) {
747          throw new IOException(ie);
748        }
749      }
750      
751      /**
752       * Get status information about the Map-Reduce cluster.
753       *  
754       * @return the status information about the Map-Reduce cluster as an object
755       *         of {@link ClusterStatus}.
756       * @throws IOException
757       */
758      public ClusterStatus getClusterStatus() throws IOException {
759        try {
760          return clientUgi.doAs(new PrivilegedExceptionAction<ClusterStatus>() {
761            public ClusterStatus run()  throws IOException, InterruptedException {
762              ClusterMetrics metrics = cluster.getClusterStatus();
763              return new ClusterStatus(metrics.getTaskTrackerCount(),
764                  metrics.getBlackListedTaskTrackerCount(), cluster.getTaskTrackerExpiryInterval(),
765                  metrics.getOccupiedMapSlots(),
766                  metrics.getOccupiedReduceSlots(), metrics.getMapSlotCapacity(),
767                  metrics.getReduceSlotCapacity(),
768                  cluster.getJobTrackerStatus(),
769                  metrics.getDecommissionedTaskTrackerCount());
770            }
771          });
772        }
773          catch (InterruptedException ie) {
774          throw new IOException(ie);
775        }
776      }
777    
778      private  Collection<String> arrayToStringList(TaskTrackerInfo[] objs) {
779        Collection<String> list = new ArrayList<String>();
780        for (TaskTrackerInfo info: objs) {
781          list.add(info.getTaskTrackerName());
782        }
783        return list;
784      }
785    
786      private  Collection<BlackListInfo> arrayToBlackListInfo(TaskTrackerInfo[] objs) {
787        Collection<BlackListInfo> list = new ArrayList<BlackListInfo>();
788        for (TaskTrackerInfo info: objs) {
789          BlackListInfo binfo = new BlackListInfo();
790          binfo.setTrackerName(info.getTaskTrackerName());
791          binfo.setReasonForBlackListing(info.getReasonForBlacklist());
792          binfo.setBlackListReport(info.getBlacklistReport());
793          list.add(binfo);
794        }
795        return list;
796      }
797    
798      /**
799       * Get status information about the Map-Reduce cluster.
800       *  
801       * @param  detailed if true then get a detailed status including the
802       *         tracker names
803       * @return the status information about the Map-Reduce cluster as an object
804       *         of {@link ClusterStatus}.
805       * @throws IOException
806       */
807      public ClusterStatus getClusterStatus(boolean detailed) throws IOException {
808        try {
809          return clientUgi.doAs(new PrivilegedExceptionAction<ClusterStatus>() {
810            public ClusterStatus run() throws IOException, InterruptedException {
811            ClusterMetrics metrics = cluster.getClusterStatus();
812            return new ClusterStatus(arrayToStringList(cluster.getActiveTaskTrackers()),
813              arrayToBlackListInfo(cluster.getBlackListedTaskTrackers()),
814              cluster.getTaskTrackerExpiryInterval(), metrics.getOccupiedMapSlots(),
815              metrics.getOccupiedReduceSlots(), metrics.getMapSlotCapacity(),
816              metrics.getReduceSlotCapacity(), 
817              cluster.getJobTrackerStatus());
818            }
819          });
820        } catch (InterruptedException ie) {
821          throw new IOException(ie);
822        }
823      }
824        
825    
826      /** 
827       * Get the jobs that are not completed and not failed.
828       * 
829       * @return array of {@link JobStatus} for the running/to-be-run jobs.
830       * @throws IOException
831       */
832      public JobStatus[] jobsToComplete() throws IOException {
833        List<JobStatus> stats = new ArrayList<JobStatus>();
834        for (JobStatus stat : getAllJobs()) {
835          if (!stat.isJobComplete()) {
836            stats.add(stat);
837          }
838        }
839        return stats.toArray(new JobStatus[0]);
840      }
841    
842      /** 
843       * Get the jobs that are submitted.
844       * 
845       * @return array of {@link JobStatus} for the submitted jobs.
846       * @throws IOException
847       */
848      public JobStatus[] getAllJobs() throws IOException {
849        try {
850          org.apache.hadoop.mapreduce.JobStatus[] jobs = 
851              clientUgi.doAs(new PrivilegedExceptionAction<
852                  org.apache.hadoop.mapreduce.JobStatus[]> () {
853                public org.apache.hadoop.mapreduce.JobStatus[] run() 
854                    throws IOException, InterruptedException {
855                  return cluster.getAllJobStatuses();
856                }
857              });
858          JobStatus[] stats = new JobStatus[jobs.length];
859          for (int i = 0; i < jobs.length; i++) {
860            stats[i] = JobStatus.downgrade(jobs[i]);
861          }
862          return stats;
863        } catch (InterruptedException ie) {
864          throw new IOException(ie);
865        }
866      }
867      
868      /** 
869       * Utility that submits a job, then polls for progress until the job is
870       * complete.
871       * 
872       * @param job the job configuration.
873       * @throws IOException if the job fails
874       */
875      public static RunningJob runJob(JobConf job) throws IOException {
876        JobClient jc = new JobClient(job);
877        RunningJob rj = jc.submitJob(job);
878        try {
879          if (!jc.monitorAndPrintJob(job, rj)) {
880            throw new IOException("Job failed!");
881          }
882        } catch (InterruptedException ie) {
883          Thread.currentThread().interrupt();
884        }
885        return rj;
886      }
887      
888      /**
889       * Monitor a job and print status in real-time as progress is made and tasks 
890       * fail.
891       * @param conf the job's configuration
892       * @param job the job to track
893       * @return true if the job succeeded
894       * @throws IOException if communication to the JobTracker fails
895       */
896      public boolean monitorAndPrintJob(JobConf conf, 
897                                        RunningJob job
898      ) throws IOException, InterruptedException {
899        return ((NetworkedJob)job).monitorAndPrintJob();
900      }
901    
902      static String getTaskLogURL(TaskAttemptID taskId, String baseUrl) {
903        return (baseUrl + "/tasklog?plaintext=true&attemptid=" + taskId); 
904      }
905      
906      static Configuration getConfiguration(String jobTrackerSpec)
907      {
908        Configuration conf = new Configuration();
909        if (jobTrackerSpec != null) {        
910          if (jobTrackerSpec.indexOf(":") >= 0) {
911            conf.set("mapred.job.tracker", jobTrackerSpec);
912          } else {
913            String classpathFile = "hadoop-" + jobTrackerSpec + ".xml";
914            URL validate = conf.getResource(classpathFile);
915            if (validate == null) {
916              throw new RuntimeException(classpathFile + " not found on CLASSPATH");
917            }
918            conf.addResource(classpathFile);
919          }
920        }
921        return conf;
922      }
923    
924      /**
925       * Sets the output filter for tasks. only those tasks are printed whose
926       * output matches the filter. 
927       * @param newValue task filter.
928       */
929      @Deprecated
930      public void setTaskOutputFilter(TaskStatusFilter newValue){
931        this.taskOutputFilter = newValue;
932      }
933        
934      /**
935       * Get the task output filter out of the JobConf.
936       * 
937       * @param job the JobConf to examine.
938       * @return the filter level.
939       */
940      public static TaskStatusFilter getTaskOutputFilter(JobConf job) {
941        return TaskStatusFilter.valueOf(job.get("jobclient.output.filter", 
942                                                "FAILED"));
943      }
944        
945      /**
946       * Modify the JobConf to set the task output filter.
947       * 
948       * @param job the JobConf to modify.
949       * @param newValue the value to set.
950       */
951      public static void setTaskOutputFilter(JobConf job, 
952                                             TaskStatusFilter newValue) {
953        job.set("jobclient.output.filter", newValue.toString());
954      }
955        
956      /**
957       * Returns task output filter.
958       * @return task filter. 
959       */
960      @Deprecated
961      public TaskStatusFilter getTaskOutputFilter(){
962        return this.taskOutputFilter; 
963      }
964    
965      protected long getCounter(org.apache.hadoop.mapreduce.Counters cntrs,
966          String counterGroupName, String counterName) throws IOException {
967        Counters counters = Counters.downgrade(cntrs);
968        return counters.findCounter(counterGroupName, counterName).getValue();
969      }
970    
971      /**
972       * Get status information about the max available Maps in the cluster.
973       *  
974       * @return the max available Maps in the cluster
975       * @throws IOException
976       */
977      public int getDefaultMaps() throws IOException {
978        try {
979          return clientUgi.doAs(new PrivilegedExceptionAction<Integer>() {
980            @Override
981            public Integer run() throws IOException, InterruptedException {
982              return cluster.getClusterStatus().getMapSlotCapacity();
983            }
984          });
985        } catch (InterruptedException ie) {
986          throw new IOException(ie);
987        }
988      }
989    
990      /**
991       * Get status information about the max available Reduces in the cluster.
992       *  
993       * @return the max available Reduces in the cluster
994       * @throws IOException
995       */
996      public int getDefaultReduces() throws IOException {
997        try {
998          return clientUgi.doAs(new PrivilegedExceptionAction<Integer>() {
999            @Override
1000            public Integer run() throws IOException, InterruptedException {
1001              return cluster.getClusterStatus().getReduceSlotCapacity();
1002            }
1003          });
1004        } catch (InterruptedException ie) {
1005          throw new IOException(ie);
1006        }
1007      }
1008    
1009      /**
1010       * Grab the jobtracker system directory path where job-specific files are to be placed.
1011       * 
1012       * @return the system directory where job-specific files are to be placed.
1013       */
1014      public Path getSystemDir() {
1015        try {
1016          return clientUgi.doAs(new PrivilegedExceptionAction<Path>() {
1017            @Override
1018            public Path run() throws IOException, InterruptedException {
1019              return cluster.getSystemDir();
1020            }
1021          });
1022          } catch (IOException ioe) {
1023          return null;
1024        } catch (InterruptedException ie) {
1025          return null;
1026        }
1027      }
1028    
1029      /**
1030       * Checks if the job directory is clean and has all the required components
1031       * for (re) starting the job
1032       */
1033      public static boolean isJobDirValid(Path jobDirPath, FileSystem fs)
1034          throws IOException {
1035        FileStatus[] contents = fs.listStatus(jobDirPath);
1036        int matchCount = 0;
1037        if (contents != null && contents.length >= 2) {
1038          for (FileStatus status : contents) {
1039            if ("job.xml".equals(status.getPath().getName())) {
1040              ++matchCount;
1041            }
1042            if ("job.split".equals(status.getPath().getName())) {
1043              ++matchCount;
1044            }
1045          }
1046          if (matchCount == 2) {
1047            return true;
1048          }
1049        }
1050        return false;
1051      }
1052    
1053      /**
1054       * Fetch the staging area directory for the application
1055       * 
1056       * @return path to staging area directory
1057       * @throws IOException
1058       */
1059      public Path getStagingAreaDir() throws IOException {
1060        try {
1061          return clientUgi.doAs(new PrivilegedExceptionAction<Path>() {
1062            @Override
1063            public Path run() throws IOException, InterruptedException {
1064              return cluster.getStagingAreaDir();
1065            }
1066          });
1067        } catch (InterruptedException ie) {
1068          // throw RuntimeException instead for compatibility reasons
1069          throw new RuntimeException(ie);
1070        }
1071      }
1072    
1073      private JobQueueInfo getJobQueueInfo(QueueInfo queue) {
1074        JobQueueInfo ret = new JobQueueInfo(queue);
1075        // make sure to convert any children
1076        if (queue.getQueueChildren().size() > 0) {
1077          List<JobQueueInfo> childQueues = new ArrayList<JobQueueInfo>(queue
1078              .getQueueChildren().size());
1079          for (QueueInfo child : queue.getQueueChildren()) {
1080            childQueues.add(getJobQueueInfo(child));
1081          }
1082          ret.setChildren(childQueues);
1083        }
1084        return ret;
1085      }
1086    
1087      private JobQueueInfo[] getJobQueueInfoArray(QueueInfo[] queues)
1088          throws IOException {
1089        JobQueueInfo[] ret = new JobQueueInfo[queues.length];
1090        for (int i = 0; i < queues.length; i++) {
1091          ret[i] = getJobQueueInfo(queues[i]);
1092        }
1093        return ret;
1094      }
1095    
1096      /**
1097       * Returns an array of queue information objects about root level queues
1098       * configured
1099       *
1100       * @return the array of root level JobQueueInfo objects
1101       * @throws IOException
1102       */
1103      public JobQueueInfo[] getRootQueues() throws IOException {
1104        try {
1105          return clientUgi.doAs(new PrivilegedExceptionAction<JobQueueInfo[]>() {
1106            public JobQueueInfo[] run() throws IOException, InterruptedException {
1107              return getJobQueueInfoArray(cluster.getRootQueues());
1108            }
1109          });
1110        } catch (InterruptedException ie) {
1111          throw new IOException(ie);
1112        }
1113      }
1114    
1115      /**
1116       * Returns an array of queue information objects about immediate children
1117       * of queue queueName.
1118       * 
1119       * @param queueName
1120       * @return the array of immediate children JobQueueInfo objects
1121       * @throws IOException
1122       */
1123      public JobQueueInfo[] getChildQueues(final String queueName) throws IOException {
1124        try {
1125          return clientUgi.doAs(new PrivilegedExceptionAction<JobQueueInfo[]>() {
1126            public JobQueueInfo[] run() throws IOException, InterruptedException {
1127              return getJobQueueInfoArray(cluster.getChildQueues(queueName));
1128            }
1129          });
1130        } catch (InterruptedException ie) {
1131          throw new IOException(ie);
1132        }
1133      }
1134      
1135      /**
1136       * Return an array of queue information objects about all the Job Queues
1137       * configured.
1138       * 
1139       * @return Array of JobQueueInfo objects
1140       * @throws IOException
1141       */
1142      public JobQueueInfo[] getQueues() throws IOException {
1143        try {
1144          return clientUgi.doAs(new PrivilegedExceptionAction<JobQueueInfo[]>() {
1145            public JobQueueInfo[] run() throws IOException, InterruptedException {
1146              return getJobQueueInfoArray(cluster.getQueues());
1147            }
1148          });
1149        } catch (InterruptedException ie) {
1150          throw new IOException(ie);
1151        }
1152      }
1153      
1154      /**
1155       * Gets all the jobs which were added to particular Job Queue
1156       * 
1157       * @param queueName name of the Job Queue
1158       * @return Array of jobs present in the job queue
1159       * @throws IOException
1160       */
1161      
1162      public JobStatus[] getJobsFromQueue(final String queueName) throws IOException {
1163        try {
1164          QueueInfo queue = clientUgi.doAs(new PrivilegedExceptionAction<QueueInfo>() {
1165            @Override
1166            public QueueInfo run() throws IOException, InterruptedException {
1167              return cluster.getQueue(queueName);
1168            }
1169          });
1170          if (queue == null) {
1171            return null;
1172          }
1173          org.apache.hadoop.mapreduce.JobStatus[] stats = 
1174            queue.getJobStatuses();
1175          JobStatus[] ret = new JobStatus[stats.length];
1176          for (int i = 0 ; i < stats.length; i++ ) {
1177            ret[i] = JobStatus.downgrade(stats[i]);
1178          }
1179          return ret;
1180        } catch (InterruptedException ie) {
1181          throw new IOException(ie);
1182        }
1183      }
1184      
1185      /**
1186       * Gets the queue information associated to a particular Job Queue
1187       * 
1188       * @param queueName name of the job queue.
1189       * @return Queue information associated to particular queue.
1190       * @throws IOException
1191       */
1192      public JobQueueInfo getQueueInfo(final String queueName) throws IOException {
1193        try {
1194          QueueInfo queueInfo = clientUgi.doAs(new 
1195              PrivilegedExceptionAction<QueueInfo>() {
1196            public QueueInfo run() throws IOException, InterruptedException {
1197              return cluster.getQueue(queueName);
1198            }
1199          });
1200          if (queueInfo != null) {
1201            return new JobQueueInfo(queueInfo);
1202          }
1203          return null;
1204        } catch (InterruptedException ie) {
1205          throw new IOException(ie);
1206        }
1207      }
1208      
1209      /**
1210       * Gets the Queue ACLs for current user
1211       * @return array of QueueAclsInfo object for current user.
1212       * @throws IOException
1213       */
1214      public QueueAclsInfo[] getQueueAclsForCurrentUser() throws IOException {
1215        try {
1216          org.apache.hadoop.mapreduce.QueueAclsInfo[] acls = 
1217            clientUgi.doAs(new 
1218                PrivilegedExceptionAction
1219                <org.apache.hadoop.mapreduce.QueueAclsInfo[]>() {
1220                  public org.apache.hadoop.mapreduce.QueueAclsInfo[] run() 
1221                  throws IOException, InterruptedException {
1222                    return cluster.getQueueAclsForCurrentUser();
1223                  }
1224            });
1225          QueueAclsInfo[] ret = new QueueAclsInfo[acls.length];
1226          for (int i = 0 ; i < acls.length; i++ ) {
1227            ret[i] = QueueAclsInfo.downgrade(acls[i]);
1228          }
1229          return ret;
1230        } catch (InterruptedException ie) {
1231          throw new IOException(ie);
1232        }
1233      }
1234    
1235      /**
1236       * Get a delegation token for the user from the JobTracker.
1237       * @param renewer the user who can renew the token
1238       * @return the new token
1239       * @throws IOException
1240       */
1241      public Token<DelegationTokenIdentifier> 
1242        getDelegationToken(final Text renewer) throws IOException, InterruptedException {
1243        getDelegationTokenCalled = true;
1244        dtRenewer = renewer.toString();
1245        return clientUgi.doAs(new 
1246            PrivilegedExceptionAction<Token<DelegationTokenIdentifier>>() {
1247          public Token<DelegationTokenIdentifier> run() throws IOException, 
1248          InterruptedException {
1249            return cluster.getDelegationToken(renewer);
1250          }
1251        });
1252      }
1253    
1254      /**
1255       * Renew a delegation token
1256       * @param token the token to renew
1257       * @return true if the renewal went well
1258       * @throws InvalidToken
1259       * @throws IOException
1260       * @deprecated Use {@link Token#renew} instead
1261       */
1262      public long renewDelegationToken(Token<DelegationTokenIdentifier> token
1263                                       ) throws InvalidToken, IOException, 
1264                                                InterruptedException {
1265        return token.renew(getConf());
1266      }
1267    
1268      /**
1269       * Cancel a delegation token from the JobTracker
1270       * @param token the token to cancel
1271       * @throws IOException
1272       * @deprecated Use {@link Token#cancel} instead
1273       */
1274      public void cancelDelegationToken(Token<DelegationTokenIdentifier> token
1275                                        ) throws InvalidToken, IOException, 
1276                                                 InterruptedException {
1277        token.cancel(getConf());
1278      }
1279    
1280      /**
1281       */
1282      public static void main(String argv[]) throws Exception {
1283        int res = ToolRunner.run(new JobClient(), argv);
1284        System.exit(res);
1285      }
1286    }
1287