20,808
社区成员
发帖
与我相关
我的任务
分享
import java.io.IOException;
import java.util.*;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.util.*;
public class word_length_count {
public static class Map extends MapReduceBase implements Mapper<LongWritable, Text, IntWritable, Text> {
private Text word = new Text();
private int length;
public void map(LongWritable key, Text value, OutputCollector<IntWritable, Text> output, Reporter reporter) throws IOException {
String line = value.toString();
StringTokenizer tokenizer = new StringTokenizer(line," \t\n\r--,.:;?![]{}()'\"\\.\\|_");
while (tokenizer.hasMoreTokens()) {
word.set(tokenizer.nextToken());
String wordString=word.toString();
length=wordString.length();
output.collect(new IntWritable(length), word);
}
}
}
public static class Reduce extends MapReduceBase implements Reducer<IntWritable, Text,IntWritable,Text> {
private Text ttt = new Text();
public void reduce(IntWritable key, Iterator<Text> values, OutputCollector<IntWritable,Text> output, Reporter reporter) throws IOException {
int sum=0;
// length.set(key.toString()+":");
while (values.hasNext()) {
sum ++;
}
ttt.set(""+sum);
output.collect(key,ttt);
}
}
public static void main(String[] args) throws Exception {
JobConf conf = new JobConf(word_length_count.class);
conf.setJobName("word_length_count");
conf.setMapOutputKeyClass(IntWritable.class);
conf.setMapOutputValueClass(Text.class);
conf.setOutputKeyClass(IntWritable.class);
conf.setOutputValueClass(Text.class);
conf.setMapperClass(Map.class);
conf.setReducerClass(Reduce.class);
conf.setInputFormat(TextInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);
FileInputFormat.setInputPaths(conf, new Path(args[0]));
FileOutputFormat.setOutputPath(conf, new Path(args[1]));
JobClient.runJob(conf);
}
}
while (values.hasNext()) {
sum ++;
}
应该是在这里循环了,一直不去list里面的值就循环了。
改动之后就行了。
但是为什么reduce显示100%呢?