20,808
社区成员
发帖
与我相关
我的任务
分享
public static class BuildIndexAfterReduce
extends
Reducer<ImmutableBytesWritable, Text, ImmutableBytesWritable, KeyValue> {
@Override
public void reduce(ImmutableBytesWritable key, Iterable<Text> values,
Context output) throws IOException, InterruptedException {
StringBuilder outValue = new StringBuilder("");
Iterator<Text> iterator = values.iterator();
while (iterator.hasNext()) {
String v = iterator.next().toString();
outValue.append(v + Constants.FIELD_SEPERATOR);
}
KeyValue column = new KeyValue(key.get(),
Bytes.toBytes("searchIndex"), Bytes.toBytes("userId"),
Bytes.toBytes(outValue.toString()));
output.write(key, column);
}
}
public static class BuildIndexAfterMap extends
Mapper<LongWritable, Text, ImmutableBytesWritable, Text> {
@Override
public void map(LongWritable key, Text value, Context output)
throws IOException, InterruptedException {
String outKey = value.toString().split("\t")[0].trim();
String outValue = value.toString().split("\t")[1].trim();
if (!outKey.trim().isEmpty()) {
output.write(new ImmutableBytesWritable(Bytes.toBytes(outKey.hashCode())),
new Text(outValue));
}
}
}
public static class BuildIndexAfterReduce
extends
Reducer<ImmutableBytesWritable, Text, ImmutableBytesWritable, KeyValue> {
@Override
public void reduce(ImmutableBytesWritable key, Iterable<Text> values,
Context output) throws IOException, InterruptedException {
String outValue = "";
Iterator<Text> iterator = values.iterator();
while (iterator.hasNext()) {
String v = iterator.next().toString();
outValue = v + Constants.FIELD_SEPERATOR + outValue;
}
KeyValue column = new KeyValue(key.get(),
Bytes.toBytes("searchIndex"), Bytes.toBytes("userId"),
Bytes.toBytes(outValue));
output.write(key, column);
}
}
Configuration conf2 = new Configuration();
conf2.set("mapred.input.dir", args[2]);
conf2.set("mapred.output.dir", args[3]);
Job jobAfter = new Job(conf2);
jobAfter.setJarByClass(BuildTest.class);
jobAfter.setMapperClass(BuildIndexAfterMap.class);
jobAfter.setReducerClass(BuildIndexAfterReduce.class);
jobAfter.setNumReduceTasks(10);
jobAfter.setPartitionerClass(SimpleTotalOrderPartitioner.class);
jobAfter.setMapOutputKeyClass(ImmutableBytesWritable.class);
jobAfter.setMapOutputValueClass(Text.class);
// jobAfter.setSortComparatorClass(ByteArrayComparator.class);
// jobAfter.setGroupingComparatorClass(ByteArrayComparator.class);
FileOutputFormat.setOutputPath(jobAfter, new Path(args[3]));
jobAfter.setOutputFormatClass(HFileOutputFormat.class);
jobAfter.setInputFormatClass(TextInputFormat.class);
HTable table = new HTable(conf, conf.get("tableName"));
HFileOutputFormat.configureIncrementalLoad(jobAfter, table);
jobAfter.waitForCompletion(true);
Configuration conf2 = new Configuration();
conf2.set("mapred.input.dir", args[2]);
conf2.set("mapred.output.dir", args[3]);
Job jobAfter = new Job(conf2);
jobAfter.setJarByClass(BuildTest.class);
jobAfter.setMapperClass(BuildIndexAfterMap.class);
jobAfter.setReducerClass(BuildIndexAfterReduce.class);
jobAfter.setNumReduceTasks(1);
jobAfter.setPartitionerClass(SimpleTotalOrderPartitioner.class);
jobAfter.setMapOutputKeyClass(ImmutableBytesWritable.class);
jobAfter.setMapOutputValueClass(Text.class);
//jobAfter.setSortComparatorClass(ByteArrayComparator.class);
//jobAfter.setGroupingComparatorClass(ByteArrayComparator.class);
FileOutputFormat.setOutputPath(jobAfter, new Path(args[3]));
jobAfter.setOutputFormatClass(HFileOutputFormat.class);
jobAfter.setInputFormatClass(TextInputFormat.class);
HTable table=new HTable(conf,conf.get("tableName"));
HFileOutputFormat.configureIncrementalLoad(jobAfter, table);
jobAfter.waitForCompletion(true);