hadoop Mapreduce 问题,求解释

ZhouSanduo18 2015-09-02 09:56:58
我的程序如下(databean.java和Main.java)
databean.java:

package delMax;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.Writable;

public class dataBean implements Writable{
private String uid;
private String mid;
private String time;
private String content;
private int forward_count;
private int comment_count;
private int like_count;
/**
* set()、get()方法 省略
*/

/**
* 有参构造函数
*/
public dataBean(String uid, String mid, String time, int forward_count, int comment_count,
int like_count, String content) {
super();
this.uid = uid;
this.mid = mid;
this.time = time;
this.content = content;
this.forward_count = forward_count;
this.comment_count = comment_count;
this.like_count = like_count;
}
/**
* 无参构造函数
*/
public dataBean(){

}

/**
* 反序列化
*/
@Override
public void readFields(DataInput in) throws IOException {
// TODO Auto-generated method stub
this.uid=in.readUTF();
this.mid=in.readUTF();
this.time=in.readUTF();
this.forward_count=in.readInt();
this.comment_count=in.readInt();
this.like_count=in.readInt();
this.content=in.readUTF();
}
/**
* 序列化
*/
@Override
public void write(DataOutput out) throws IOException {
// TODO Auto-generated method stub
out.writeUTF(uid);
out.writeUTF(mid);
out.writeUTF(time);
out.writeInt(forward_count);
out.writeInt(comment_count);
out.writeInt(like_count);
out.writeUTF(content);
}
/**
* 重写toString()方法
*/

@Override
public String toString() {
// TODO Auto-generated method stub
return this.uid+"\t"+this.mid+"\t"+this.time+"\t"+this.forward_count+"\t"
+this.comment_count+"\t"+this.like_count+"\t"+this.content;
}

}


Main.java:

package delMax;

import java.io.IOException;
//import java.io.PrintStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class MR {
public static class MyMapper extends Mapper<LongWritable, Text, Text, dataBean> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
// 接收数据
String line = value.toString();
String[] fields = line.split("\t");
String uid = fields[0];
String mid = fields[1];
String date = fields[2];
String content;
int forward_count = Integer.parseInt(fields[3]);
int comment_count = Integer.parseInt(fields[4]);
int like_count = Integer.parseInt(fields[5]);
if (fields.length < 7) {
content = null;
} else
content = fields[6];
dataBean bean = new dataBean(uid, mid, date, forward_count, comment_count, like_count, content);
context.write(new Text(uid), bean);
}
}

public static class MyReducer extends Reducer<Text, dataBean, NullWritable, dataBean> {
@Override
protected void reduce(Text key, Iterable<dataBean> value2, Context context)
throws IOException, InterruptedException {
int max = 0;
int submax = 0;
List<dataBean> list = new ArrayList<dataBean>();
for (dataBean bean : value2) {
list.add(bean);
}
for(dataBean bean : list){
context.write(null, bean);
}
}
}

public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {

Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(MR.class);
job.setMapperClass(MyMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(dataBean.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
job.setReducerClass(MyReducer.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(dataBean.class);
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true);
}
}


输入数据为:

07fc721342df1a4c1992560b582992f8 5fefc2488f99aba5782baa43096b76dc 2014-11-04 1 7 2 [吐]@京东 的用户体验太差了....第一次在京东购物,买了几百的书....真是经历了网购的各种问题...
07fc721342df1a4c1992560b582992f8 397d9abe2ebf9ba986fe5ab5a6139e11 2014-11-04 2 3 0 #我爱小米手机#因为小米手机4的屏幕太给力了,5英寸的高色彩饱和度夏普 / JDI屏幕,1920x1080全高清分辨率,玩游戏,看高清视频爽爆了。
07fc721342df1a4c1992560b582992f8 a2ae3959e9aa21a47993506e2609a4ec 2014-11-08 0 0 0 [doge][喵喵]天猫真是牛....卖实验耗材的都跑天猫去开店了...
07fc721342df1a4c1992560b582992f8 fcc63a086522040a271ba6d1fb604734 2014-11-09 3 6 0 [doge]找工作
07fc721342df1a4c1992560b582992f8 6ab0f93ab59cbdaf25f4a023642971a6 2014-11-11 0 5 1 [doge]成功抢了台米4电信~

本程序只是把迭代器中的内容添加到ArrayList中,然后将ArrayList中的数据输出,本程序的输出结果应该和输入数据一样,可是,输出的结果却是:

07fc721342df1a4c1992560b582992f8 6ab0f93ab59cbdaf25f4a023642971a6 2014-11-11 0 5 1 [doge]成功抢了台米4电信~
07fc721342df1a4c1992560b582992f8 6ab0f93ab59cbdaf25f4a023642971a6 2014-11-11 0 5 1 [doge]成功抢了台米4电信~
07fc721342df1a4c1992560b582992f8 6ab0f93ab59cbdaf25f4a023642971a6 2014-11-11 0 5 1 [doge]成功抢了台米4电信~
07fc721342df1a4c1992560b582992f8 6ab0f93ab59cbdaf25f4a023642971a6 2014-11-11 0 5 1 [doge]成功抢了台米4电信~
07fc721342df1a4c1992560b582992f8 6ab0f93ab59cbdaf25f4a023642971a6 2014-11-11 0 5 1 [doge]成功抢了台米4电信~
07fc721342df1a4c1992560b582992f8 6ab0f93ab59cbdaf25f4a023642971a6 2014-11-11 0 5 1 [doge]成功抢了台米4电信~
07fc721342df1a4c1992560b582992f8 6ab0f93ab59cbdaf25f4a023642971a6 2014-11-11 0 5 1 [doge]成功抢了台米4电信~
07fc721342df1a4c1992560b582992f8 6ab0f93ab59cbdaf25f4a023642971a6 2014-11-11 0 5 1 [doge]成功抢了台米4电信~
07fc721342df1a4c1992560b582992f8 6ab0f93ab59cbdaf25f4a023642971a6 2014-11-11 0 5 1 [doge]成功抢了台米4电信~

求高手解释,万分感激!
...全文
277 4 打赏 收藏 转发到动态 举报
写回复
用AI写文章
4 条回复
切换为时间正序
请发表友善的回复…
发表回复
ZhouSanduo18 2015-09-30
  • 打赏
  • 举报
回复
引用 3 楼 wlp001007 的回复:
问题在于list.add(bean)这句,因为bean只是个类型引用,它的值会随着for的进行不断改变,因此你list.add(bean)时它也会随着变化,for结束时它指向追后一行的数据,因此你输出时都是最后一行的值。 你可以改下代码,如下: for (dataBean bean : value2) { dataBean nbean=new dataBean(); 设置nbean的参数; list.add(nbean); }
虽然没有按照你说的取尝试,但是感觉就是个的道理。感谢!!!
Lupe_wlp 2015-09-30
  • 打赏
  • 举报
回复
问题在于list.add(bean)这句,因为bean只是个类型引用,它的值会随着for的进行不断改变,因此你list.add(bean)时它也会随着变化,for结束时它指向追后一行的数据,因此你输出时都是最后一行的值。 你可以改下代码,如下: for (dataBean bean : value2) { dataBean nbean=new dataBean(); 设置nbean的参数; list.add(nbean); }
LiuMing92 2015-09-28
  • 打赏
  • 举报
回复
你是以uid做key的,可以看到所有的数据都是同一个组的,出现这个问题的代码是添加bean到list集合中的时候出现问题了。解决代码:list.add(bean)换成list.add(WritableUtils.clone(bean, context.getConfiguration()))即可
qq_31350549 2015-09-16
  • 打赏
  • 举报
回复
应该重写hashcode和equals

20,808

社区成员

发帖
与我相关
我的任务
社区描述
Hadoop生态大数据交流社区,致力于有Hadoop,hive,Spark,Hbase,Flink,ClickHouse,Kafka,数据仓库,大数据集群运维技术分享和交流等。致力于收集优质的博客
社区管理员
  • 分布式计算/Hadoop社区
  • 涤生大数据
加入社区
  • 近7日
  • 近30日
  • 至今
社区公告
暂无公告

试试用AI创作助手写篇文章吧