58,455
社区成员
发帖
与我相关
我的任务
分享
Configuration conf = new Configuration();
Path vocabularyPath = new Path(
"hdfs://10.104.5.136:9000/user/hadoop/tmp/4-vocabulary/part-r-00000");
DistributedCache.addCacheFile(vocabularyPath.toUri(), conf); //放在new Job之前,否则读不到CacheFile
Job job = new Job(conf, "GenerateDocVectors");
job.setJarByClass(GenerateDocVectors.class);
job.setMapperClass(DocVectorMapper.class);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(Text.class);
job.setReducerClass(DocVectorReducer.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(DocVector.class);
FileInputFormat.addInputPath(job, new Path("hdfs://10.104.5.136:9000/user/hadoop/tmp/3-tf-idf"));
FileOutputFormat.setOutputPath(job, new Path("hdfs://10.104.5.136:9000/user/hadoop/tmp/5-doc-vectors"));
System.exit(job.waitForCompletion(true) ? 0 : 1);
@Override
public void setup(Context context) throws IOException {
BufferedReader br = null;
Path[] caches = DistributedCache.getLocalCacheFiles(context
.getConfiguration());
if (caches == null || caches.length <= 0) {
System.out.println("No DistributedCache keywords File!");
System.exit(1);
}
System.out.println(caches[0].toString());
br = new BufferedReader(new FileReader(caches[0].toString()));
String line;
while ((line = br.readLine()) != null) {
if (!line.trim().equals("")) {
keywords.add(line.trim());
}
}
br.close();
}
14/04/01 16:28:12 INFO filecache.TrackerDistributedCacheManager: Creating part-r-00000 in /tmp/hadoop-lihaibo/mapred/local/archive/-9064445594102647030_1090122960_121196988/10.104.5.136/user/hadoop/tmp/4-vocabulary-work--4639061430523519916 with rwxr-xr-x
14/04/01 16:28:12 INFO filecache.TrackerDistributedCacheManager: Cached hdfs://10.104.5.136:9000/user/hadoop/tmp/4-vocabulary/part-r-00000 as /tmp/hadoop-lihaibo/mapred/local/archive/-9064445594102647030_1090122960_121196988/10.104.5.136/user/hadoop/tmp/4-vocabulary/part-r-00000
14/04/01 16:28:12 INFO filecache.TrackerDistributedCacheManager: Cached hdfs://10.104.5.136:9000/user/hadoop/tmp/4-vocabulary/part-r-00000 as /tmp/hadoop-lihaibo/mapred/local/archive/-9064445594102647030_1090122960_121196988/10.104.5.136/user/hadoop/tmp/4-vocabulary/part-r-00000
14/04/01 16:28:15 WARN mapred.LocalJobRunner: job_local_0001
java.io.FileNotFoundException: file:\tmp\hadoop-lihaibo\mapred\local\archive\-9064445594102647030_1090122960_121196988\10.104.5.136\user\hadoop\tmp\4-vocabulary\part-r-00000 (文件名、目录名或卷标语法不正确。)
at java.io.FileInputStream.open(Native Method)
at java.io.FileInputStream.<init>(Unknown Source)
at java.io.FileInputStream.<init>(Unknown Source)
at java.io.FileReader.<init>(Unknown Source)
at cn.edu.sysu.arui.mapred.GenerateDocVectors$DocVectorReducer.setup(GenerateDocVectors.java:80)
at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:174)
at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:649)
at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:417)
at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:260)
14/04/01 17:28:06 INFO filecache.TrackerDistributedCacheManager: Creating part-r-00000 in /tmp/hadoop-lihaibo/mapred/local/archive/6967473064854589526_1260178816_121196988/10.104.5.136/user/hadoop/tmp/4-vocabulary-work-4890346733254620063 with rwxr-xr-x
14/04/01 17:28:06 INFO filecache.TrackerDistributedCacheManager: Cached hdfs://10.104.5.136:9000/user/hadoop/tmp/4-vocabulary/part-r-00000#abc as /tmp/hadoop-lihaibo/mapred/local/archive/6967473064854589526_1260178816_121196988/10.104.5.136/user/hadoop/tmp/4-vocabulary/part-r-00000
14/04/01 17:28:06 INFO filecache.TrackerDistributedCacheManager: Cached hdfs://10.104.5.136:9000/user/hadoop/tmp/4-vocabulary/part-r-00000#abc as /tmp/hadoop-lihaibo/mapred/local/archive/6967473064854589526_1260178816_121196988/10.104.5.136/user/hadoop/tmp/4-vocabulary/part-r-00000
14/04/01 17:28:06 WARN mapred.LocalJobRunner: LocalJobRunner does not support symlinking into current working dir.
14/04/01 17:28:06 INFO mapred.TaskRunner: Creating symlink: /tmp/hadoop-lihaibo/mapred/local/archive/6967473064854589526_1260178816_121196988/10.104.5.136/user/hadoop/tmp/4-vocabulary/part-r-00000 <- D:\tmp\hadoop-lihaibo\mapred\local\localRunner/abc
14/04/01 17:28:06 INFO mapred.JobClient: Cleaning up the staging area file:/tmp/hadoop-lihaibo/mapred/staging/lihaibo1582277947/.staging/job_local_0001