1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.mapreduce;
19
20 import java.io.IOException;
21 import java.text.ParseException;
22 import java.text.SimpleDateFormat;
23 import java.util.Map;
24 import java.util.TreeMap;
25
26 import org.apache.commons.logging.Log;
27 import org.apache.commons.logging.LogFactory;
28 import org.apache.hadoop.conf.Configuration;
29 import org.apache.hadoop.conf.Configured;
30 import org.apache.hadoop.fs.Path;
31 import org.apache.hadoop.hbase.Cell;
32 import org.apache.hadoop.hbase.CellUtil;
33 import org.apache.hadoop.hbase.HBaseConfiguration;
34 import org.apache.hadoop.hbase.KeyValue;
35 import org.apache.hadoop.hbase.KeyValueUtil;
36 import org.apache.hadoop.hbase.TableName;
37 import org.apache.hadoop.hbase.classification.InterfaceAudience;
38 import org.apache.hadoop.hbase.classification.InterfaceStability;
39 import org.apache.hadoop.hbase.client.Connection;
40 import org.apache.hadoop.hbase.client.ConnectionFactory;
41 import org.apache.hadoop.hbase.client.Delete;
42 import org.apache.hadoop.hbase.client.Mutation;
43 import org.apache.hadoop.hbase.client.Put;
44 import org.apache.hadoop.hbase.client.RegionLocator;
45 import org.apache.hadoop.hbase.client.Table;
46 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
47 import org.apache.hadoop.hbase.regionserver.wal.WALCellCodec;
48 import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
49 import org.apache.hadoop.hbase.util.Bytes;
50 import org.apache.hadoop.hbase.wal.WALKey;
51 import org.apache.hadoop.mapreduce.Job;
52 import org.apache.hadoop.mapreduce.Mapper;
53 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
54 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
55 import org.apache.hadoop.util.GenericOptionsParser;
56 import org.apache.hadoop.util.Tool;
57 import org.apache.hadoop.util.ToolRunner;
58
59
60
61
62
63
64
65
66
67
68
69 @InterfaceAudience.Public
70 @InterfaceStability.Stable
71 public class WALPlayer extends Configured implements Tool {
72 final static Log LOG = LogFactory.getLog(WALPlayer.class);
73 final static String NAME = "WALPlayer";
74 public final static String BULK_OUTPUT_CONF_KEY = "wal.bulk.output";
75 public final static String TABLES_KEY = "wal.input.tables";
76 public final static String TABLE_MAP_KEY = "wal.input.tablesmap";
77
78
79
80 static {
81 Configuration.addDeprecation("hlog.bulk.output", BULK_OUTPUT_CONF_KEY);
82 Configuration.addDeprecation("hlog.input.tables", TABLES_KEY);
83 Configuration.addDeprecation("hlog.input.tablesmap", TABLE_MAP_KEY);
84 Configuration.addDeprecation(HLogInputFormat.START_TIME_KEY, WALInputFormat.START_TIME_KEY);
85 Configuration.addDeprecation(HLogInputFormat.END_TIME_KEY, WALInputFormat.END_TIME_KEY);
86 }
87
88 public WALPlayer(){
89 }
90
91
92
93
94
95 static class WALKeyValueMapper
96 extends Mapper<WALKey, WALEdit, ImmutableBytesWritable, KeyValue> {
97 private byte[] table;
98
99 @Override
100 public void map(WALKey key, WALEdit value,
101 Context context)
102 throws IOException {
103 try {
104
105 if (Bytes.equals(table, key.getTablename().getName())) {
106 for (Cell cell : value.getCells()) {
107 KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
108 if (WALEdit.isMetaEditFamily(kv.getFamily())) continue;
109 context.write(new ImmutableBytesWritable(kv.getRow()), kv);
110 }
111 }
112 } catch (InterruptedException e) {
113 e.printStackTrace();
114 }
115 }
116
117 @Override
118 public void setup(Context context) throws IOException {
119
120 String tables[] = context.getConfiguration().getStrings(TABLES_KEY);
121 if (tables == null || tables.length != 1) {
122
123 throw new IOException("Exactly one table must be specified for bulk HFile case.");
124 }
125 table = Bytes.toBytes(tables[0]);
126 }
127 }
128
129
130
131
132
133 protected static class WALMapper
134 extends Mapper<WALKey, WALEdit, ImmutableBytesWritable, Mutation> {
135 private Map<TableName, TableName> tables = new TreeMap<TableName, TableName>();
136
137
138 @Override
139 public void map(WALKey key, WALEdit value, Context context)
140 throws IOException {
141 try {
142 if (tables.isEmpty() || tables.containsKey(key.getTablename())) {
143 TableName targetTable = tables.isEmpty() ?
144 key.getTablename() :
145 tables.get(key.getTablename());
146 ImmutableBytesWritable tableOut = new ImmutableBytesWritable(targetTable.getName());
147 Put put = null;
148 Delete del = null;
149 Cell lastCell = null;
150
151 for (Cell cell : value.getCells()) {
152
153 if (WALEdit.isMetaEditFamily(cell.getFamily())) continue;
154
155
156 if (filter(context, cell)) {
157
158
159
160
161 if (lastCell == null || lastCell.getTypeByte() != cell.getTypeByte()
162 || !CellUtil.matchingRow(lastCell, cell)) {
163
164 if (put != null) {
165 context.write(tableOut, put);
166 }
167 if (del != null) {
168 context.write(tableOut, del);
169 }
170 if (CellUtil.isDelete(cell)) {
171 del = new Delete(cell.getRow());
172 } else {
173 put = new Put(cell.getRow());
174 }
175 }
176 if (CellUtil.isDelete(cell)) {
177 del.addDeleteMarker(cell);
178 } else {
179 put.add(cell);
180 }
181 }
182 lastCell = cell;
183 }
184
185 if (put != null) {
186 context.write(tableOut, put);
187 }
188 if (del != null) {
189 context.write(tableOut, del);
190 }
191 }
192 } catch (InterruptedException e) {
193 e.printStackTrace();
194 }
195 }
196
197
198
199
200
201
202 protected boolean filter(Context context, final Cell cell) {
203 return true;
204 }
205
206 @Override
207 protected void
208 cleanup(Mapper<WALKey, WALEdit, ImmutableBytesWritable, Mutation>.Context context)
209 throws IOException, InterruptedException {
210 super.cleanup(context);
211 }
212
213 @Override
214 public void setup(Context context) throws IOException {
215 String[] tableMap = context.getConfiguration().getStrings(TABLE_MAP_KEY);
216 String[] tablesToUse = context.getConfiguration().getStrings(TABLES_KEY);
217 if (tablesToUse == null || tableMap == null || tablesToUse.length != tableMap.length) {
218
219 throw new IOException("No tables or incorrect table mapping specified.");
220 }
221 int i = 0;
222 if (tablesToUse != null) {
223 for (String table : tablesToUse) {
224 tables.put(TableName.valueOf(table),
225 TableName.valueOf(tableMap[i++]));
226 }
227 }
228 }
229 }
230
231
232
233
234 public WALPlayer(Configuration conf) {
235 super(conf);
236 }
237
238 void setupTime(Configuration conf, String option) throws IOException {
239 String val = conf.get(option);
240 if (null == val) {
241 return;
242 }
243 long ms;
244 try {
245
246 ms = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SS").parse(val).getTime();
247 } catch (ParseException pe) {
248 try {
249
250 ms = Long.parseLong(val);
251 } catch (NumberFormatException nfe) {
252 throw new IOException(option
253 + " must be specified either in the form 2001-02-20T16:35:06.99 "
254 + "or as number of milliseconds");
255 }
256 }
257 conf.setLong(option, ms);
258 }
259
260
261
262
263
264
265
266
267 public Job createSubmittableJob(String[] args)
268 throws IOException {
269 Configuration conf = getConf();
270 setupTime(conf, HLogInputFormat.START_TIME_KEY);
271 setupTime(conf, HLogInputFormat.END_TIME_KEY);
272 String inputDirs = args[0];
273 String[] tables = args[1].split(",");
274 String[] tableMap;
275 if (args.length > 2) {
276 tableMap = args[2].split(",");
277 if (tableMap.length != tables.length) {
278 throw new IOException("The same number of tables and mapping must be provided.");
279 }
280 } else {
281
282 tableMap = tables;
283 }
284 conf.setStrings(TABLES_KEY, tables);
285 conf.setStrings(TABLE_MAP_KEY, tableMap);
286 Job job = new Job(conf, NAME + "_" + System.currentTimeMillis());
287 job.setJarByClass(WALPlayer.class);
288 FileInputFormat.setInputPaths(job, inputDirs);
289 job.setInputFormatClass(WALInputFormat.class);
290 job.setMapOutputKeyClass(ImmutableBytesWritable.class);
291 String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
292 if (hfileOutPath != null) {
293
294 if (tables.length != 1) {
295 throw new IOException("Exactly one table must be specified for the bulk export option");
296 }
297 TableName tableName = TableName.valueOf(tables[0]);
298 job.setMapperClass(WALKeyValueMapper.class);
299 job.setReducerClass(KeyValueSortReducer.class);
300 Path outputDir = new Path(hfileOutPath);
301 FileOutputFormat.setOutputPath(job, outputDir);
302 job.setMapOutputValueClass(KeyValue.class);
303 try (Connection conn = ConnectionFactory.createConnection(conf);
304 Table table = conn.getTable(tableName);
305 RegionLocator regionLocator = conn.getRegionLocator(tableName)) {
306 HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator);
307 }
308 LOG.debug("success configuring load incremental job");
309
310 TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
311 com.google.common.base.Preconditions.class);
312 } else {
313
314 job.setMapperClass(WALMapper.class);
315 job.setOutputFormatClass(MultiTableOutputFormat.class);
316 TableMapReduceUtil.addDependencyJars(job);
317 TableMapReduceUtil.initCredentials(job);
318
319 job.setNumReduceTasks(0);
320 }
321 String codecCls = WALCellCodec.getWALCellCodecClass(conf);
322 try {
323 TableMapReduceUtil.addDependencyJars(job.getConfiguration(), Class.forName(codecCls));
324 LOG.debug("tmpjars: " + job.getConfiguration().get("tmpjars"));
325 } catch (Exception e) {
326 throw new IOException("Cannot determine wal codec class " + codecCls, e);
327 }
328 return job;
329 }
330
331
332
333
334
335
336 private void usage(final String errorMsg) {
337 if (errorMsg != null && errorMsg.length() > 0) {
338 System.err.println("ERROR: " + errorMsg);
339 }
340 System.err.println("Usage: " + NAME + " [options] <wal inputdir> <tables> [<tableMappings>]");
341 System.err.println("Read all WAL entries for <tables>.");
342 System.err.println("If no tables (\"\") are specific, all tables are imported.");
343 System.err.println("(Careful, even -ROOT- and hbase:meta entries will be imported"+
344 " in that case.)");
345 System.err.println("Otherwise <tables> is a comma separated list of tables.\n");
346 System.err.println("The WAL entries can be mapped to new set of tables via <tableMapping>.");
347 System.err.println("<tableMapping> is a command separated list of targettables.");
348 System.err.println("If specified, each table in <tables> must have a mapping.\n");
349 System.err.println("By default " + NAME + " will load data directly into HBase.");
350 System.err.println("To generate HFiles for a bulk data load instead, pass the option:");
351 System.err.println(" -D" + BULK_OUTPUT_CONF_KEY + "=/path/for/output");
352 System.err.println(" (Only one table can be specified, and no mapping is allowed!)");
353 System.err.println("Other options: (specify time range to WAL edit to consider)");
354 System.err.println(" -D" + WALInputFormat.START_TIME_KEY + "=[date|ms]");
355 System.err.println(" -D" + WALInputFormat.END_TIME_KEY + "=[date|ms]");
356 System.err.println("For performance also consider the following options:\n"
357 + " -Dmapreduce.map.speculative=false\n"
358 + " -Dmapreduce.reduce.speculative=false");
359 }
360
361
362
363
364
365
366
367 public static void main(String[] args) throws Exception {
368 int ret = ToolRunner.run(new WALPlayer(HBaseConfiguration.create()), args);
369 System.exit(ret);
370 }
371
372 @Override
373 public int run(String[] args) throws Exception {
374 String[] otherArgs = new GenericOptionsParser(getConf(), args).getRemainingArgs();
375 if (otherArgs.length < 2) {
376 usage("Wrong number of arguments: " + otherArgs.length);
377 System.exit(-1);
378 }
379 Job job = createSubmittableJob(otherArgs);
380 return job.waitForCompletion(true) ? 0 : 1;
381 }
382 }