View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.mapreduce;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertTrue;
22  
23  import java.io.File;
24  import java.io.IOException;
25  import java.util.ArrayList;
26  import java.util.List;
27  import java.util.Map;
28  import java.util.NavigableMap;
29  
30  import org.apache.commons.logging.Log;
31  import org.apache.commons.logging.LogFactory;
32  import org.apache.hadoop.conf.Configuration;
33  import org.apache.hadoop.fs.FileUtil;
34  import org.apache.hadoop.fs.Path;
35  import org.apache.hadoop.hbase.HBaseTestingUtility;
36  import org.apache.hadoop.hbase.testclassification.LargeTests;
37  import org.apache.hadoop.hbase.TableName;
38  import org.apache.hadoop.hbase.client.HTable;
39  import org.apache.hadoop.hbase.client.Result;
40  import org.apache.hadoop.hbase.client.Scan;
41  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
42  import org.apache.hadoop.hbase.util.Bytes;
43  import org.apache.hadoop.io.NullWritable;
44  import org.apache.hadoop.mapreduce.Job;
45  import org.apache.hadoop.mapreduce.Reducer;
46  import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
47  import org.junit.After;
48  import org.junit.AfterClass;
49  import org.junit.BeforeClass;
50  import org.junit.Test;
51  import org.junit.experimental.categories.Category;
52  
53  /**
54   * Tests various scan start and stop row scenarios. This is set in a scan and
55   * tested in a MapReduce job to see if that is handed over and done properly
56   * too.
57   */
58  @Category(LargeTests.class)
59  public class TestMultiTableInputFormat {
60  
61    static final Log LOG = LogFactory.getLog(TestMultiTableInputFormat.class);
62    static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
63  
64    static final String TABLE_NAME = "scantest";
65    static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
66    static final String KEY_STARTROW = "startRow";
67    static final String KEY_LASTROW = "stpRow";
68  
69    @BeforeClass
70    public static void setUpBeforeClass() throws Exception {
71      // switch TIF to log at DEBUG level
72      TEST_UTIL.enableDebug(MultiTableInputFormat.class);
73      TEST_UTIL.enableDebug(MultiTableInputFormatBase.class);
74      // start mini hbase cluster
75      TEST_UTIL.startMiniCluster(3);
76      // create and fill table
77      for (int i = 0; i < 3; i++) {
78        try (HTable table =
79            TEST_UTIL.createMultiRegionTable(TableName.valueOf(TABLE_NAME + String.valueOf(i)),
80              INPUT_FAMILY, 4)) {
81          TEST_UTIL.loadTable(table, INPUT_FAMILY, false);
82        }
83      }
84      // start MR cluster
85      TEST_UTIL.startMiniMapReduceCluster();
86    }
87  
88    @AfterClass
89    public static void tearDownAfterClass() throws Exception {
90      TEST_UTIL.shutdownMiniMapReduceCluster();
91      TEST_UTIL.shutdownMiniCluster();
92    }
93    
94    @After
95    public void tearDown() throws Exception {
96      Configuration c = TEST_UTIL.getConfiguration();
97      FileUtil.fullyDelete(new File(c.get("hadoop.tmp.dir")));
98    }
99  
100   /**
101    * Pass the key and value to reducer.
102    */
103   public static class ScanMapper extends
104       TableMapper<ImmutableBytesWritable, ImmutableBytesWritable> {
105     /**
106      * Pass the key and value to reduce.
107      *
108      * @param key The key, here "aaa", "aab" etc.
109      * @param value The value is the same as the key.
110      * @param context The task context.
111      * @throws IOException When reading the rows fails.
112      */
113     @Override
114     public void map(ImmutableBytesWritable key, Result value, Context context)
115         throws IOException, InterruptedException {
116       if (value.size() != 1) {
117         throw new IOException("There should only be one input column");
118       }
119       Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>> cf =
120           value.getMap();
121       if (!cf.containsKey(INPUT_FAMILY)) {
122         throw new IOException("Wrong input columns. Missing: '" +
123             Bytes.toString(INPUT_FAMILY) + "'.");
124       }
125       String val = Bytes.toStringBinary(value.getValue(INPUT_FAMILY, null));
126       LOG.debug("map: key -> " + Bytes.toStringBinary(key.get()) +
127           ", value -> " + val);
128       context.write(key, key);
129     }
130   }
131 
132   /**
133    * Checks the last and first keys seen against the scanner boundaries.
134    */
135   public static class ScanReducer
136       extends
137       Reducer<ImmutableBytesWritable, ImmutableBytesWritable,
138       NullWritable, NullWritable> {
139     private String first = null;
140     private String last = null;
141 
142     @Override
143     protected void reduce(ImmutableBytesWritable key,
144         Iterable<ImmutableBytesWritable> values, Context context)
145         throws IOException, InterruptedException {
146       int count = 0;
147       for (ImmutableBytesWritable value : values) {
148         String val = Bytes.toStringBinary(value.get());
149         LOG.debug("reduce: key[" + count + "] -> " +
150             Bytes.toStringBinary(key.get()) + ", value -> " + val);
151         if (first == null) first = val;
152         last = val;
153         count++;
154       }
155       assertEquals(3, count);
156     }
157 
158     @Override
159     protected void cleanup(Context context) throws IOException,
160         InterruptedException {
161       Configuration c = context.getConfiguration();
162       String startRow = c.get(KEY_STARTROW);
163       String lastRow = c.get(KEY_LASTROW);
164       LOG.info("cleanup: first -> \"" + first + "\", start row -> \"" +
165           startRow + "\"");
166       LOG.info("cleanup: last -> \"" + last + "\", last row -> \"" + lastRow +
167           "\"");
168       if (startRow != null && startRow.length() > 0) {
169         assertEquals(startRow, first);
170       }
171       if (lastRow != null && lastRow.length() > 0) {
172         assertEquals(lastRow, last);
173       }
174     }
175   }
176 
177   @Test
178   public void testScanEmptyToEmpty() throws IOException, InterruptedException,
179       ClassNotFoundException {
180     testScan(null, null, null);
181   }
182   
183   @Test
184   public void testScanEmptyToAPP() throws IOException, InterruptedException,
185       ClassNotFoundException {
186     testScan(null, "app", "apo");
187   }
188 
189   @Test
190   public void testScanOBBToOPP() throws IOException, InterruptedException,
191       ClassNotFoundException {
192     testScan("obb", "opp", "opo");
193   }
194 
195   @Test
196   public void testScanYZYToEmpty() throws IOException, InterruptedException,
197       ClassNotFoundException {
198     testScan("yzy", null, "zzz");
199   }
200 
201   /**
202    * Tests a MR scan using specific start and stop rows.
203    *
204    * @throws IOException
205    * @throws ClassNotFoundException
206    * @throws InterruptedException
207    */
208   private void testScan(String start, String stop, String last)
209       throws IOException, InterruptedException, ClassNotFoundException {
210     String jobName =
211         "Scan" + (start != null ? start.toUpperCase() : "Empty") + "To" +
212             (stop != null ? stop.toUpperCase() : "Empty");
213     LOG.info("Before map/reduce startup - job " + jobName);
214     Configuration c = new Configuration(TEST_UTIL.getConfiguration());
215     
216     c.set(KEY_STARTROW, start != null ? start : "");
217     c.set(KEY_LASTROW, last != null ? last : "");
218     
219     List<Scan> scans = new ArrayList<Scan>();
220     
221     for(int i=0; i<3; i++){
222       Scan scan = new Scan();
223       
224       scan.addFamily(INPUT_FAMILY);
225       scan.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, Bytes.toBytes(TABLE_NAME + i));
226       
227       if (start != null) {
228         scan.setStartRow(Bytes.toBytes(start));
229       }
230       if (stop != null) {
231         scan.setStopRow(Bytes.toBytes(stop));
232       }
233       
234       scans.add(scan);
235       
236       LOG.info("scan before: " + scan);
237     }
238     
239     Job job = new Job(c, jobName);
240 
241     TableMapReduceUtil.initTableMapperJob(scans, ScanMapper.class,
242         ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
243     job.setReducerClass(ScanReducer.class);
244     job.setNumReduceTasks(1); // one to get final "first" and "last" key
245     FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
246     LOG.info("Started " + job.getJobName());
247     job.waitForCompletion(true);
248     assertTrue(job.isSuccessful());
249     LOG.info("After map/reduce completion - job " + jobName);
250   }
251 }