mapreduce排序 中的问题
以下是我从WordCount源码改成功能为排序的代码。
其中输入文件是:
file1
2
32
654
32
15
756
65223
file2
5956
22
650
file3
26
54
6
想要产生的输出文件:
2 1
6 2
15 3
22 4
26 5
32 6
32 7
54 8
650 9
654 10
756 11
5956 12
65223 13
如果利用
word.set( Integer.parseInt(value.toString()));
context.write(word, one);
替换成
StringTokenizer itr = new StringTokenizer(value.toString());
while (itr.hasMoreTokens()) {
word.set( Integer.parseInt(itr.nextToken()));
context.write(word, one);
}
则不能够成功产生想要的结果。
产生的结果如下:
生成里output文件夹,但是文件夹内没有文件。
控制台没有输出。
其reducer阶段没有执行:
job.setReducerClass(IntSumReducer.class);
将// job.setCombinerClass(IntSumReducer.class);的注释去掉,能够执行Combiner。
即控制台能够产生以下输出:
1 FFFF 2HHH2
2 FFFF 15HHH15
3 FFFF 324 FFFF 32HHH32
5 FFFF 654HHH654
6 FFFF 756HHH756
7 FFFF 65223HHH65223
8 FFFF 22HHH22
9 FFFF 650HHH650
10 FFFF 5956HHH5956
11 FFFF 6HHH6
12 FFFF 26HHH26
13 FFFF 54HHH54
执行了Combiner但没有执行reducer产生里output文件夹但是文件夹中没有文件
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class WordCount {
public static class TokenizerMapper
extends Mapper<Object, Text, IntWritable, IntWritable>{
private final static IntWritable one = new IntWritable(1);
private IntWritable word = new IntWritable();
public void map(Object key, Text value, Context context
) throws IOException, InterruptedException {
// 如果将下面2行注释代码和后面的4行替换,不能够成功生成输出文件。
/*
word.set( Integer.parseInt(value.toString()));
context.write(word, one);
*/
StringTokenizer itr = new StringTokenizer(value.toString());
while (itr.hasMoreTokens()) {
word.set( Integer.parseInt(itr.nextToken()));
context.write(word, one);
}
}
}
public static class IntSumReducer
extends Reducer<IntWritable,IntWritable,IntWritable,IntWritable> {
private IntWritable result = new IntWritable();
private static int sum = 1;
public void reduce(IntWritable key, Iterable<IntWritable> values,
Context context
) throws IOException, InterruptedException {
for (IntWritable val : values) {
// System.out.print(sum);
System.out.print(sum+" FFFF "+key);
result.set(sum);
context.write(key, result);
sum++;
}
System.out.println("HHH"+key);
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length < 2) {
System.err.println("Usage: wordcount <in> [<in>...] <out>");
System.exit(2);
}
@SuppressWarnings("deprecation")
Job job = new Job(conf, "word count");
job.setJarByClass(WordCount.class);
job.setMapperClass(TokenizerMapper.class);
// job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(IntSumReducer.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(IntWritable.class);
for (int i = 0; i < otherArgs.length - 1; ++i) {
FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
}
FileOutputFormat.setOutputPath(job,
new Path(otherArgs[otherArgs.length - 1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
产生不一致的结果的原因是什么
word.set( Integer.parseInt(value.toString()));
context.write(word, one);
和
StringTokenizer itr = new StringTokenizer(value.toString());
while (itr.hasMoreTokens()) {
word.set( Integer.parseInt(itr.nextToken()));
context.write(word, one);
}
产生的结果有什么不一样。