hadoop 中reducer非常慢 算法很简单
我的算法非常简单 数据文件24个G 每行都是有两个数字组成例如
12 14
12 15
12 29
13 90
。。。。
算法就是设置第一个数字为key 然后找这个key对应的所有的第二个数字 ,有点像社交网络里找“粉丝”的意思
最后输出为
12 [14,15,29..]
13 [.....]
但是不知道为什么放到hadoop 里面跑 map里面很快 reduce的时候就非常非常慢 reduce 里面67%之前很快 67%之后到67.42%用了尼玛一个多小时
而我跑wordcount(hadoop官网的例子)统计这24个G的数据里面 第一个数字出现的次数 这个却挺快的
是不是算法的问题? 我的code如下: 求各路大神指点
package org.myorg;
import java.io.IOException;
import java.util.*;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.util.*;
public class followerTwitter {
public static class Map extends MapReduceBase implements Mapper<LongWritable, Text, Text, Text> {
//private final static IntWritable one = new IntWritable(1);
private Text twitterID = new Text();
private Text followerID = new Text();
public void map(LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
String line = value.toString();
StringTokenizer tokenizer = new StringTokenizer(line);
if (tokenizer.hasMoreTokens()) {
twitterID.set(tokenizer.nextToken());
}
if (tokenizer.hasMoreTokens()) {
followerID.set(tokenizer.nextToken());
}
output.collect(twitterID, followerID);
}
}
public static class Reduce extends MapReduceBase implements Reducer<Text, Text, Text, Text> {
public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
Text followers = new Text();
String orginal = "Followers:[";
//Text sum2 = new Text();
while (values.hasNext()) {
Text temp =values.next();
String temps=temp.toString();
orginal = orginal+temps+',';
}
orginal =orginal+']';
Text followerList=new Text(orginal);
output.collect(key, followerList);
}
}
public static void main(String[] args) throws Exception {
JobConf conf = new JobConf(followerTwitter.class);
conf.setJobName("followerTwitter");
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(Text.class);
conf.setMapperClass(Map.class);
//conf.setCombinerClass(Reduce.class);
conf.setReducerClass(Reduce.class);
conf.setInputFormat(TextInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);
FileInputFormat.setInputPaths(conf, new Path(args[0]));
FileOutputFormat.setOutputPath(conf, new Path(args[1]));
JobClient.runJob(conf);
}
}