Windows下用Eclipse连接hadoop集群开发环境

传说中的堕落 2016-08-18 09:12:58

前言：很多年不弄代码了，最近云计算很热，有点手痒。。费了很大劲终于弄通了eclipse连接云集群环境，总结一下，让他人少走点弯路。

一、原始环境
win7操作系统

二、下载软件
jdk1.8
hadoop2.7.2
eclipse mar
hadoop eclipse plugin

三、部署
安装jdk1.8
安装eclipse
将hadoop eclipse plugin拷贝到eclipse的plugin目录
将hadoop2.7.2解压存放到硬盘的某个目录位置
打开Eclipse,菜单项 Windows—Preferences， Hadoop Map/Reduce选项，填入刚才的hadoop2.7.2目录
windows-show viewer-other 选择map reduce.然后将hadoop集群的ip和端口填进去就可以了

四、设置注意
因为hadoop集群的访问用户是hadoop,开发机器的用户名也需要重新命名成hadoop
建立HADOOP_HOME环境变量，指向hadoop2.7.2的安装目录然后重启机器
将hadoop集群dfs.permissions.enabled设置成false
在Eclipse中建立工程，类型选择MapReduce类型的那种
最终程序是以jar包形式提交到hadoop集群环境执行的，所以编码完成后要export成jar包放在工程的根目录下。然后run on hadoop调试

五、样例代码
package wordcount;

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

public class wordcount {

public static class wordcountMap extends
Mapper<LongWritable, Text, Text, IntWritable> {

private final IntWritable one = new IntWritable(1);
private Text word = new Text();

public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String line = value.toString();
StringTokenizer token = new StringTokenizer(line);
while (token.hasMoreTokens()) {
word.set(token.nextToken());
context.write(word, one);
}
}
}

public static class wordcountReduce extends
Reducer<Text, IntWritable, Text, IntWritable> {

public void reduce(Text key, Iterable<IntWritable> values,
Context context) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
context.write(key, new IntWritable(sum));
}
}

public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();

conf.set("mapreduce.app-submission.cross-platform", "true");
conf.set("mapred.job.tracker", "192.168.120.57:9000");
conf.set("fs.default.name", "hdfs://192.168.120.57:9000");
conf.set("mapreduce.jobhistory.address","192.168.120.57:10020");
conf.set("hadoop.job.user", "hadoop");
conf.set("mapreduce.framework.name", "yarn");
conf.set("mapreduce.jobtracker.address", "192.168.120.57:9000");
conf.set("yarn.resourcemanager.hostname", "192.168.120.57");
conf.set("yarn.resourcemanager.admin.address", "192.168.120.57:8033");
conf.set("yarn.resourcemanager.address", "192.168.120.57:8032");
conf.set("yarn.resourcemanager.resource-tracker.address", "192.168.120.57:8036");
conf.set("yarn.resourcemanager.scheduler.address", "192.168.120.57:8030");
conf.set("mapred.jar", "wordcount.jar");

Job job = new Job(conf);
job.setJarByClass(wordcount.class);
job.setJobName("wordcount");

job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);

job.setMapperClass(wordcountMap.class);
job.setReducerClass(wordcountReduce.class);

job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);

FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));

job.waitForCompletion(true);
}
}

...全文