Mapreduce topK问题总是只能得到一个结果,求解
import java.io.IOException;
import java.util.TreeMap;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DefaultStringifier;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
public class TopK
{
private int topk;
public static class TopKMapper extends Mapper<LongWritable, Text, IntWritable, Text>
{
private TreeMap<IntWritable, Text> fatcats = new TreeMap<IntWritable, Text>();
public void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException
{
Configuration conf = context.getConfiguration();
Text topktext = DefaultStringifier.load(conf, "topk", Text.class);
int topk = Integer.parseInt(topktext.toString());
String[] str = value.toString().split("\t");
fatcats.put(new IntWritable(Integer.parseInt(str[0])), new Text(str[1]));
if(fatcats.size() > topk)
{
fatcats.remove(fatcats.firstKey());
}
}
@Override
protected void cleanup(Context context) throws IOException, InterruptedException
{
for(IntWritable key :fatcats.keySet())
{
context.write(key, fatcats.get(key));
}
}
}
public static class TopKReducer extends Reducer<IntWritable, Text, IntWritable, Text>
{
private TreeMap<IntWritable, Text> fatcats = new TreeMap<IntWritable, Text>();
public void reduce(IntWritable key, Iterable<Text>values, Context context)throws IOException, InterruptedException
{
Configuration conf = context.getConfiguration();
Text topktext = DefaultStringifier.load(conf, "topk", Text.class);
int topk = Integer.parseInt(topktext.toString());
for (Text str : values)
{
fatcats.put(key, str);
if(fatcats.size() > topk)
{
fatcats.remove(fatcats.firstKey());
}
}
}
@Override
protected void cleanup(Context context) throws IOException, InterruptedException
{
for(IntWritable key :fatcats.keySet())
{
context.write(key, fatcats.get(key));
}
}
}
public void run(String src, String dst) throws IOException
{
Configuration conf = new Configuration();
conf.set("fs.default.name", "hdfs://localhost:9000");
DefaultStringifier.store(conf, new Text(String.valueOf(topk)) ,"topk");
Job job = new Job(conf, "TopK");
job.setJarByClass(TopK.class);
//设置输入输出格式
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
//设置Map、Combine和Reduce处理类
job.setMapperClass(TopKMapper.class);
job.setReducerClass(TopKReducer.class);
job.setCombinerClass(TopKReducer.class);
//设置输出类型
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(Text.class);
job.setNumReduceTasks(1);
//注意这里的输入输出都应该是在HDFS下的文件或目录
FileInputFormat.addInputPath(job, new Path(src));
FileOutputFormat.setOutputPath(job, new Path(dst));
//开始运行
try {
job.waitForCompletion(true);
} catch (ClassNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public TopK(int topk)
{
this.topk = topk;
}
public static void main(String[] args)
{
System.out.println(String.valueOf(10));
System.out.println(Integer.parseInt(String.valueOf(10)));
TopK topk = new TopK(10);
try {
topk.run("/count.txt", "/topk");
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}