View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.mapreduce;
19  
20  import static org.junit.Assert.assertTrue;
21  import static org.junit.Assert.fail;
22  
23  import java.io.File;
24  import java.io.IOException;
25  import java.util.Iterator;
26  import java.util.Map;
27  import java.util.NavigableMap;
28  
29  import org.apache.commons.logging.Log;
30  import org.apache.commons.logging.LogFactory;
31  import org.apache.hadoop.conf.Configuration;
32  import org.apache.hadoop.fs.FileUtil;
33  import org.apache.hadoop.fs.Path;
34  import org.apache.hadoop.hbase.Cell;
35  import org.apache.hadoop.hbase.CellUtil;
36  import org.apache.hadoop.hbase.HBaseTestingUtility;
37  import org.apache.hadoop.hbase.HConstants;
38  import org.apache.hadoop.hbase.TableName;
39  import org.apache.hadoop.hbase.client.HTable;
40  import org.apache.hadoop.hbase.client.Put;
41  import org.apache.hadoop.hbase.client.Result;
42  import org.apache.hadoop.hbase.client.ResultScanner;
43  import org.apache.hadoop.hbase.client.Scan;
44  import org.apache.hadoop.hbase.client.Table;
45  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
46  import org.apache.hadoop.hbase.testclassification.LargeTests;
47  import org.apache.hadoop.hbase.util.Bytes;
48  import org.apache.hadoop.mapreduce.Job;
49  import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
50  import org.junit.AfterClass;
51  import org.junit.BeforeClass;
52  import org.junit.Test;
53  import org.junit.experimental.categories.Category;
54  
55  /**
56   * Test Map/Reduce job over HBase tables. The map/reduce process we're testing
57   * on our tables is simple - take every row in the table, reverse the value of
58   * a particular cell, and write it back to the table.
59   */
60  @Category(LargeTests.class)
61  public class TestMultithreadedTableMapper {
62    private static final Log LOG = LogFactory.getLog(TestMultithreadedTableMapper.class);
63    private static final HBaseTestingUtility UTIL =
64        new HBaseTestingUtility();
65    static final TableName MULTI_REGION_TABLE_NAME = TableName.valueOf("mrtest");
66    static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
67    static final byte[] OUTPUT_FAMILY = Bytes.toBytes("text");
68    static final int    NUMBER_OF_THREADS = 10;
69  
70    @BeforeClass
71    public static void beforeClass() throws Exception {
72      UTIL.startMiniCluster();
73      HTable table =
74          UTIL.createMultiRegionTable(MULTI_REGION_TABLE_NAME, new byte[][] { INPUT_FAMILY,
75              OUTPUT_FAMILY });
76      UTIL.loadTable(table, INPUT_FAMILY, false);
77      UTIL.startMiniMapReduceCluster();
78      UTIL.waitUntilAllRegionsAssigned(MULTI_REGION_TABLE_NAME);
79    }
80  
81    @AfterClass
82    public static void afterClass() throws Exception {
83      UTIL.shutdownMiniMapReduceCluster();
84      UTIL.shutdownMiniCluster();
85    }
86  
87    /**
88     * Pass the given key and processed record reduce
89     */
90    public static class ProcessContentsMapper
91    extends TableMapper<ImmutableBytesWritable, Put> {
92  
93      /**
94       * Pass the key, and reversed value to reduce
95       *
96       * @param key
97       * @param value
98       * @param context
99       * @throws IOException
100      */
101     public void map(ImmutableBytesWritable key, Result value,
102         Context context)
103             throws IOException, InterruptedException {
104       if (value.size() != 1) {
105         throw new IOException("There should only be one input column");
106       }
107       Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>>
108       cf = value.getMap();
109       if(!cf.containsKey(INPUT_FAMILY)) {
110         throw new IOException("Wrong input columns. Missing: '" +
111             Bytes.toString(INPUT_FAMILY) + "'.");
112       }
113       // Get the original value and reverse it
114       String originalValue = Bytes.toString(value.getValue(INPUT_FAMILY, null));
115       StringBuilder newValue = new StringBuilder(originalValue);
116       newValue.reverse();
117       // Now set the value to be collected
118       Put outval = new Put(key.get());
119       outval.add(OUTPUT_FAMILY, null, Bytes.toBytes(newValue.toString()));
120       context.write(key, outval);
121     }
122   }
123 
124   /**
125    * Test multithreadedTableMappper map/reduce against a multi-region table
126    * @throws IOException
127    * @throws ClassNotFoundException
128    * @throws InterruptedException
129    */
130   @Test
131   public void testMultithreadedTableMapper()
132       throws IOException, InterruptedException, ClassNotFoundException {
133     runTestOnTable(new HTable(new Configuration(UTIL.getConfiguration()),
134         MULTI_REGION_TABLE_NAME));
135   }
136 
137   private void runTestOnTable(HTable table)
138       throws IOException, InterruptedException, ClassNotFoundException {
139     Job job = null;
140     try {
141       LOG.info("Before map/reduce startup");
142       job = new Job(table.getConfiguration(), "process column contents");
143       job.setNumReduceTasks(1);
144       Scan scan = new Scan();
145       scan.addFamily(INPUT_FAMILY);
146       TableMapReduceUtil.initTableMapperJob(
147           table.getTableName(), scan,
148           MultithreadedTableMapper.class, ImmutableBytesWritable.class,
149           Put.class, job);
150       MultithreadedTableMapper.setMapperClass(job, ProcessContentsMapper.class);
151       MultithreadedTableMapper.setNumberOfThreads(job, NUMBER_OF_THREADS);
152       TableMapReduceUtil.initTableReducerJob(
153           Bytes.toString(table.getTableName()),
154           IdentityTableReducer.class, job);
155       FileOutputFormat.setOutputPath(job, new Path("test"));
156       LOG.info("Started " + table.getTableName());
157       assertTrue(job.waitForCompletion(true));
158       LOG.info("After map/reduce completion");
159       // verify map-reduce results
160       verify(table.getName());
161     } finally {
162       table.close();
163       if (job != null) {
164         FileUtil.fullyDelete(
165             new File(job.getConfiguration().get("hadoop.tmp.dir")));
166       }
167     }
168   }
169 
170   private void verify(TableName tableName) throws IOException {
171     Table table = new HTable(new Configuration(UTIL.getConfiguration()), tableName);
172     boolean verified = false;
173     long pause = UTIL.getConfiguration().getLong("hbase.client.pause", 5 * 1000);
174     int numRetries = UTIL.getConfiguration().getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 5);
175     for (int i = 0; i < numRetries; i++) {
176       try {
177         LOG.info("Verification attempt #" + i);
178         verifyAttempt(table);
179         verified = true;
180         break;
181       } catch (NullPointerException e) {
182         // If here, a cell was empty.  Presume its because updates came in
183         // after the scanner had been opened.  Wait a while and retry.
184         LOG.debug("Verification attempt failed: " + e.getMessage());
185       }
186       try {
187         Thread.sleep(pause);
188       } catch (InterruptedException e) {
189         // continue
190       }
191     }
192     assertTrue(verified);
193     table.close();
194   }
195 
196   /**
197    * Looks at every value of the mapreduce output and verifies that indeed
198    * the values have been reversed.
199    *
200    * @param table Table to scan.
201    * @throws IOException
202    * @throws NullPointerException if we failed to find a cell value
203    */
204   private void verifyAttempt(final Table table)
205       throws IOException, NullPointerException {
206     Scan scan = new Scan();
207     scan.addFamily(INPUT_FAMILY);
208     scan.addFamily(OUTPUT_FAMILY);
209     ResultScanner scanner = table.getScanner(scan);
210     try {
211       Iterator<Result> itr = scanner.iterator();
212       assertTrue(itr.hasNext());
213       while(itr.hasNext()) {
214         Result r = itr.next();
215         if (LOG.isDebugEnabled()) {
216           if (r.size() > 2 ) {
217             throw new IOException("Too many results, expected 2 got " +
218                 r.size());
219           }
220         }
221         byte[] firstValue = null;
222         byte[] secondValue = null;
223         int count = 0;
224         for(Cell kv : r.listCells()) {
225           if (count == 0) {
226             firstValue = CellUtil.cloneValue(kv);
227           }else if (count == 1) {
228             secondValue = CellUtil.cloneValue(kv);
229           }else if (count == 2) {
230             break;
231           }
232           count++;
233         }
234         String first = "";
235         if (firstValue == null) {
236           throw new NullPointerException(Bytes.toString(r.getRow()) +
237               ": first value is null");
238         }
239         first = Bytes.toString(firstValue);
240         String second = "";
241         if (secondValue == null) {
242           throw new NullPointerException(Bytes.toString(r.getRow()) +
243               ": second value is null");
244         }
245         byte[] secondReversed = new byte[secondValue.length];
246         for (int i = 0, j = secondValue.length - 1; j >= 0; j--, i++) {
247           secondReversed[i] = secondValue[j];
248         }
249         second = Bytes.toString(secondReversed);
250         if (first.compareTo(second) != 0) {
251           if (LOG.isDebugEnabled()) {
252             LOG.debug("second key is not the reverse of first. row=" +
253                 Bytes.toStringBinary(r.getRow()) + ", first value=" + first +
254                 ", second value=" + second);
255           }
256           fail();
257         }
258       }
259     } finally {
260       scanner.close();
261     }
262   }
263 
264 }
265