20,808
社区成员
发帖
与我相关
我的任务
分享
package cn.trimData;
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
/**
* @author z
* @version 创建时间: 2014 11:08:02 PM
* 类说明 去掉重复的身份证信息
* version 1.0
*/
public class ReDuplicate {
static class DupMapper extends Mapper<Object,Text,Text,Text>{
@Override
protected void map(Object key, Text value,Context context)
throws IOException, InterruptedException {
String line = value.toString();
String outKey = "";
if(line.indexOf("Name") >= 0){
return ;
}
String[] str1 = line.split(",");
if(str1.length == 8){
outKey = str1[4];
if(outKey.length() < 0){
return ;
}
}
context.write(new Text(outKey), new Text(line));
}
}
static class DupReduce extends Reducer<Text,Text,NullWritable,Text>{
@Override
protected void reduce(Text key, Iterable<Text> values,
Context context)
throws IOException, InterruptedException {
Text value = new Text("");
Text next = values.iterator().next();
for(Text outvalue:values){
next = outvalue;
}
context.write(NullWritable.get(), (Text) values);
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
String otherArgs[] = (new GenericOptionsParser(conf, args)).getRemainingArgs();
if(otherArgs.length != 2)
{
System.out.println(" ");
System.exit(1);
}
Job job = new Job(conf, "new ");
job.setJarByClass(ReDuplicate.class);
job.setMapperClass(DupMapper.class);
job.setReducerClass(DupReduce.class);
job.setMapOutputKeyClass(Text.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputKeyClass(Text.class);
FileInputFormat.setInputPaths(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
System.out.println((job.waitForCompletion(true)? 0 :1));
}
}
抛出的异常1提示:
org.apache.hadoop.mapreduce.task.ReduceContextImpl$ValueIterable cannot be cast to org.apache.hadoop.io.Text
at cn.trimData.ReDuplicate$DupReduce.reduce(ReDuplicate.java:54)
就是reduce 方法的 context.write(NullWritable.get(), (Text) values); 类型转换异常
异常2:
ERROR hdfs.DFSClient (DFSClient.java:closeAllFilesBeingWritten(712)) - Failed to close file /testout/010/_temporary/_attempt_local1576430195_0001_r_000000_0/part-r-00000
org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.hdfs.server.namenode.LeaseExpiredException): No lease on /testout/010/_temporary/_attempt_local1576430195_0001_r_000000_0/part-r-00000: File does not exist. Holder DFSClient_NONMAPREDUCE_1155748347_1 does not have any open files.
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.checkLease(FSNamesystem.java:2543)
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.checkLease(FSNamesystem.java:2535)
输出异常