001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    
019    package org.apache.hadoop.mapreduce;
020    
021    import java.io.IOException;
022    import java.net.URI;
023    
024    import org.apache.hadoop.classification.InterfaceAudience;
025    import org.apache.hadoop.classification.InterfaceStability;
026    import org.apache.hadoop.conf.Configuration;
027    import org.apache.hadoop.conf.Configuration.IntegerRanges;
028    import org.apache.hadoop.fs.Path;
029    import org.apache.hadoop.io.RawComparator;
030    import org.apache.hadoop.mapreduce.Mapper;
031    import org.apache.hadoop.security.Credentials;
032    
033    /**
034     * A read-only view of the job that is provided to the tasks while they
035     * are running.
036     */
037    @InterfaceAudience.Public
038    @InterfaceStability.Evolving
039    public interface JobContext extends MRJobConfig {
040      /**
041       * Return the configuration for the job.
042       * @return the shared configuration object
043       */
044      public Configuration getConfiguration();
045    
046      /**
047       * Get credentials for the job.
048       * @return credentials for the job
049       */
050      public Credentials getCredentials();
051    
052      /**
053       * Get the unique ID for the job.
054       * @return the object with the job id
055       */
056      public JobID getJobID();
057      
058      /**
059       * Get configured the number of reduce tasks for this job. Defaults to 
060       * <code>1</code>.
061       * @return the number of reduce tasks for this job.
062       */
063      public int getNumReduceTasks();
064      
065      /**
066       * Get the current working directory for the default file system.
067       * 
068       * @return the directory name.
069       */
070      public Path getWorkingDirectory() throws IOException;
071    
072      /**
073       * Get the key class for the job output data.
074       * @return the key class for the job output data.
075       */
076      public Class<?> getOutputKeyClass();
077      
078      /**
079       * Get the value class for job outputs.
080       * @return the value class for job outputs.
081       */
082      public Class<?> getOutputValueClass();
083    
084      /**
085       * Get the key class for the map output data. If it is not set, use the
086       * (final) output key class. This allows the map output key class to be
087       * different than the final output key class.
088       * @return the map output key class.
089       */
090      public Class<?> getMapOutputKeyClass();
091    
092      /**
093       * Get the value class for the map output data. If it is not set, use the
094       * (final) output value class This allows the map output value class to be
095       * different than the final output value class.
096       *  
097       * @return the map output value class.
098       */
099      public Class<?> getMapOutputValueClass();
100    
101      /**
102       * Get the user-specified job name. This is only used to identify the 
103       * job to the user.
104       * 
105       * @return the job's name, defaulting to "".
106       */
107      public String getJobName();
108    
109      /**
110       * Get the boolean value for the property that specifies which classpath
111       * takes precedence when tasks are launched. True - user's classes takes
112       * precedence. False - system's classes takes precedence.
113       * @return true if user's classes should take precedence
114       */
115      public boolean userClassesTakesPrecedence();
116    
117      /**
118       * Get the {@link InputFormat} class for the job.
119       * 
120       * @return the {@link InputFormat} class for the job.
121       */
122      public Class<? extends InputFormat<?,?>> getInputFormatClass() 
123         throws ClassNotFoundException;
124    
125      /**
126       * Get the {@link Mapper} class for the job.
127       * 
128       * @return the {@link Mapper} class for the job.
129       */
130      public Class<? extends Mapper<?,?,?,?>> getMapperClass() 
131         throws ClassNotFoundException;
132    
133      /**
134       * Get the combiner class for the job.
135       * 
136       * @return the combiner class for the job.
137       */
138      public Class<? extends Reducer<?,?,?,?>> getCombinerClass() 
139         throws ClassNotFoundException;
140    
141      /**
142       * Get the {@link Reducer} class for the job.
143       * 
144       * @return the {@link Reducer} class for the job.
145       */
146      public Class<? extends Reducer<?,?,?,?>> getReducerClass() 
147         throws ClassNotFoundException;
148    
149      /**
150       * Get the {@link OutputFormat} class for the job.
151       * 
152       * @return the {@link OutputFormat} class for the job.
153       */
154      public Class<? extends OutputFormat<?,?>> getOutputFormatClass() 
155         throws ClassNotFoundException;
156    
157      /**
158       * Get the {@link Partitioner} class for the job.
159       * 
160       * @return the {@link Partitioner} class for the job.
161       */
162      public Class<? extends Partitioner<?,?>> getPartitionerClass() 
163         throws ClassNotFoundException;
164    
165      /**
166       * Get the {@link RawComparator} comparator used to compare keys.
167       * 
168       * @return the {@link RawComparator} comparator used to compare keys.
169       */
170      public RawComparator<?> getSortComparator();
171    
172      /**
173       * Get the pathname of the job's jar.
174       * @return the pathname
175       */
176      public String getJar();
177    
178      /**
179       * Get the user defined {@link RawComparator} comparator for
180       * grouping keys of inputs to the combiner.
181       *
182       * @return comparator set by the user for grouping values.
183       * @see Job#setCombinerKeyGroupingComparatorClass(Class)
184       */
185      public RawComparator<?> getCombinerKeyGroupingComparator();
186    
187        /**
188         * Get the user defined {@link RawComparator} comparator for
189         * grouping keys of inputs to the reduce.
190         *
191         * @return comparator set by the user for grouping values.
192         * @see Job#setGroupingComparatorClass(Class)
193         * @see #getCombinerKeyGroupingComparator()
194         */
195      public RawComparator<?> getGroupingComparator();
196      
197      /**
198       * Get whether job-setup and job-cleanup is needed for the job 
199       * 
200       * @return boolean 
201       */
202      public boolean getJobSetupCleanupNeeded();
203      
204      /**
205       * Get whether task-cleanup is needed for the job 
206       * 
207       * @return boolean 
208       */
209      public boolean getTaskCleanupNeeded();
210    
211      /**
212       * Get whether the task profiling is enabled.
213       * @return true if some tasks will be profiled
214       */
215      public boolean getProfileEnabled();
216    
217      /**
218       * Get the profiler configuration arguments.
219       *
220       * The default value for this property is
221       * "-agentlib:hprof=cpu=samples,heap=sites,force=n,thread=y,verbose=n,file=%s"
222       * 
223       * @return the parameters to pass to the task child to configure profiling
224       */
225      public String getProfileParams();
226    
227      /**
228       * Get the range of maps or reduces to profile.
229       * @param isMap is the task a map?
230       * @return the task ranges
231       */
232      public IntegerRanges getProfileTaskRange(boolean isMap);
233    
234      /**
235       * Get the reported username for this job.
236       * 
237       * @return the username
238       */
239      public String getUser();
240      
241      /**
242       * Originally intended to check if symlinks should be used, but currently
243       * symlinks cannot be disabled.
244       * @return true
245       */
246      @Deprecated
247      public boolean getSymlink();
248      
249      /**
250       * Get the archive entries in classpath as an array of Path
251       */
252      public Path[] getArchiveClassPaths();
253    
254      /**
255       * Get cache archives set in the Configuration
256       * @return A URI array of the caches set in the Configuration
257       * @throws IOException
258       */
259      public URI[] getCacheArchives() throws IOException;
260    
261      /**
262       * Get cache files set in the Configuration
263       * @return A URI array of the files set in the Configuration
264       * @throws IOException
265       */
266    
267      public URI[] getCacheFiles() throws IOException;
268    
269      /**
270       * Return the path array of the localized caches
271       * @return A path array of localized caches
272       * @throws IOException
273       * @deprecated the array returned only includes the items the were 
274       * downloaded. There is no way to map this to what is returned by
275       * {@link #getCacheArchives()}.
276       */
277      @Deprecated
278      public Path[] getLocalCacheArchives() throws IOException;
279    
280      /**
281       * Return the path array of the localized files
282       * @return A path array of localized files
283       * @throws IOException
284       * @deprecated the array returned only includes the items the were 
285       * downloaded. There is no way to map this to what is returned by
286       * {@link #getCacheFiles()}.
287       */
288      @Deprecated
289      public Path[] getLocalCacheFiles() throws IOException;
290    
291      /**
292       * Get the file entries in classpath as an array of Path
293       */
294      public Path[] getFileClassPaths();
295      
296      /**
297       * Get the timestamps of the archives.  Used by internal
298       * DistributedCache and MapReduce code.
299       * @return a string array of timestamps 
300       * @throws IOException
301       */
302      public String[] getArchiveTimestamps();
303    
304      /**
305       * Get the timestamps of the files.  Used by internal
306       * DistributedCache and MapReduce code.
307       * @return a string array of timestamps 
308       * @throws IOException
309       */
310      public String[] getFileTimestamps();
311    
312      /** 
313       * Get the configured number of maximum attempts that will be made to run a
314       * map task, as specified by the <code>mapred.map.max.attempts</code>
315       * property. If this property is not already set, the default is 4 attempts.
316       *  
317       * @return the max number of attempts per map task.
318       */
319      public int getMaxMapAttempts();
320    
321      /** 
322       * Get the configured number of maximum attempts  that will be made to run a
323       * reduce task, as specified by the <code>mapred.reduce.max.attempts</code>
324       * property. If this property is not already set, the default is 4 attempts.
325       * 
326       * @return the max number of attempts per reduce task.
327       */
328      public int getMaxReduceAttempts();
329    
330    }