20,810
社区成员
发帖
与我相关
我的任务
分享
package flight2.maximum.delay;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import java.io.IOException;
import java.util.Iterator;
/**
Data:
------------ 2007.csv ------------
Year,Month,DayofMonth,DayOfWeek,DepTime,CRSDepTime,ArrTime,CRSArrTime,UniqueCarrier,FlightNum,TailNum,ActualElapsedTime,CRSElapsedTime,AirTime,ArrDelay,DepDelay,Origin,Dest,Distance,TaxiIn,TaxiOut,Cancelled,CancellationCode,Diverted,CarrierDelay,WeatherDelay,NASDelay,SecurityDelay,LateAircraftDelay
2007,1,6,7,1050,1050,1211,1210,WN,680,N283WN,81,80,65,1,0,LAX,SFO,337,6,10,0,,0,NA,NA,NA,NA,NA
2007,1,6,7,1244,1245,1405,1405,WN,776,N720WN,81,80,68,0,-1,LAX,SFO,337,3,10,0,,0,NA,NA,NA,NA,NA
2007,1,6,7,1547,1455,1655,1600,WN,173,N350SW,68,65,58,55,52,LAX,SJC,308,4,6,0,,0,21,0,3,0,31
---------- 2008.csv ------------
Year,Month,DayofMonth,DayOfWeek,DepTime,CRSDepTime,ArrTime,CRSArrTime,UniqueCarrier,FlightNum,TailNum,ActualElapsedTime,CRSElapsedTime,AirTime,ArrDelay,DepDelay,Origin,Dest,Distance,TaxiIn,TaxiOut,Cancelled,CancellationCode,Diverted,CarrierDelay,WeatherDelay,NASDelay,SecurityDelay,LateAircraftDelay
2008,1,9,3,1552,1550,1856,1900,WN,438,N307SW,124,130,113,-4,2,LIT,BWI,912,3,8,0,,0,NA,NA,NA,NA,NA
2008,1,9,3,706,705,809,810,WN,7,N902WN,63,65,49,-1,1,LIT,DAL,296,3,11,0,,0,NA,NA,NA,NA,NA
2008,1,9,3,1454,1500,1558,1605,WN,41,N465WN,64,65,53,-7,-6,LIT,DAL,296,4,7,0,,0,NA,NA,NA,NA,NA
2008,1,9,3,732,735,826,835,WN,1194,N660SW,54,60,44,-9,-3,MAF,AUS,294,4,6,0,,0,NA,NA,NA,NA,NA
Understand:
1. Map side: Airport codes (column 17) as the key, [arrival delay (column 15) + Departure airport code, Year, Month, DayOfMonth] => flightDelay as the value;
2. Reduce side find the Maximum arrival delay for each key;
3. Reduce side write the result: (key, value);
4. Set Partition:
public static HashMap<Integer, Integer> years = new HashMap<>();
static{
airCode.put(2007, 1);
airCode.put(2008, 2);
}
getPartition(){
years.get(flightDelay.year)
}
*/
public class Task1Maximum extends Configured implements Tool {
public static class MaximumMapper extends Mapper<LongWritable, Text, Text, FlightDelay>{
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
String[] delays = StringUtils.split(line, '\\', ',');
if(delays[0].equalsIgnoreCase("year")){
return;
}
if(Utils.replaceNAwithZero(delays)){
return;
}
String airCode = delays[16];
FlightDelay flightDelay = new FlightDelay(
Integer.parseInt(delays[14]),
delays[17],
Integer.parseInt(delays[0]),
Integer.parseInt(delays[1]),
Integer.parseInt(delays[2]));
context.write(new Text(airCode), flightDelay);
}
}
public static class MaximumReducer extends Reducer<Text, FlightDelay, Text, FlightDelay>{
@Override
protected void reduce(Text airCode, Iterable<FlightDelay> values, Context context) throws IOException, InterruptedException {
int maximum = 0;
String dptAirCode = "";
int year = 0;
int month = 0;
int dayOfMonth = 0;
for (FlightDelay flightDelay : values) {
if(flightDelay.getArrDelay() > maximum){
maximum = flightDelay.getArrDelay();
dptAirCode = flightDelay.getDptAirCode();
year = flightDelay.getYear();
month = flightDelay.getMonth();
dayOfMonth = flightDelay.getDayOfMonth();
}
}
context.write(airCode, new FlightDelay(maximum, dptAirCode, year, month, dayOfMonth ));
}
}
public static void main(String[] args) {
int result = 0;
try {
result = ToolRunner.run(new Configuration(), new Task1Maximum(), args);
} catch (Exception e) {
e.printStackTrace();
}
System.exit(result);
}
@Override
public int run(String[] strings) throws Exception {
Job job = Job.getInstance(getConf(), "Task1Maximum");
Configuration conf = job.getConfiguration();
conf.set(TextOutputFormat.SEPERATOR, ",");
job.setJarByClass(Task1Maximum.class);
job.setMapperClass(MaximumMapper.class);
job.setReducerClass(MaximumReducer.class);
job.setPartitionerClass(YearPartitioner.class);
job.setNumReduceTasks(2);
job.setOutputFormatClass(TextOutputFormat.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(FlightDelay.class);
job.setOutputKeyClass(Text.class);
job.setMapOutputValueClass(FlightDelay.class);
FileInputFormat.setInputPaths(job, new Path("/user/horton/flights/real/"));
FileOutputFormat.setOutputPath(job, new Path("/user/horton/Task1Maximum"));
return job.waitForCompletion(true) ? 0 : 1;
}
}
package flight2.maximum.delay;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.Objects;
public class FlightDelay implements Writable {
private int arrDelay;
private String dptAirCode;
private int year;
private int month;
private int dayOfMonth;
public FlightDelay() {
}
public FlightDelay(int arrDelay, String dptAirCode, int year, int month, int dayOfMonth) {
this.arrDelay = arrDelay;
this.dptAirCode = dptAirCode;
this.year = year;
this.month = month;
this.dayOfMonth = dayOfMonth;
}
@Override
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeInt(arrDelay);
dataOutput.writeUTF(dptAirCode);
dataOutput.writeInt(year);
dataOutput.writeInt(month);
dataOutput.writeInt(dayOfMonth);
}
@Override
public void readFields(DataInput dataInput) throws IOException {
this.arrDelay = dataInput.readInt();
this.dptAirCode = dataInput.readUTF();
this.year = dataInput.readInt();
this.month = dataInput.readInt();
this.dayOfMonth = dataInput.readInt();
}
public int getArrDelay() {
return arrDelay;
}
public void setArrDelay(int arrDelay) {
this.arrDelay = arrDelay;
}
public String getDptAirCode() {
return dptAirCode;
}
public void setDptAirCode(String dptAirCode) {
package flight2.maximum.delay;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;
import java.util.HashMap;
import java.util.Map;
public class YearPartitioner extends Partitioner<Text, FlightDelay> {
public static Map<Integer, Integer> yearMap = new HashMap<>();
static {
yearMap.put(2007, 0);
yearMap.put(2008, 1);
}
@Override
public int getPartition(Text key, FlightDelay flightDelay, int i) {
Integer partitionID = yearMap.get(flightDelay.getYear());
return partitionID;
}
}
package flight2.maximum.delay;
public class Utils {
public static boolean replaceNAwithZero(String[] strs){
if(strs == null || strs.length == 0){
return false;
}
for (String str : strs ) {
if(str.trim().equalsIgnoreCase("NA")){
return true;
}
}
return false;
}
}
package flight1.average.delay;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import java.io.IOException;
/**
Write and execute a Java MapReduce application that satisfies all of the following criteria:
1. The input of the application is the two text files in /user/horton/flights/.
2. Your application computes the average departure delay (column 16) for each distinct airport code (column 17).
3. Store the output in a new folder in HDFS named /user/horton/task1.
4. The output is partitioned into exactly two files. Airport codes that start with 'A' through 'M' should be in one file,
and airport codes that start with 'N' through 'Z' should be in another file.
5. Each row in the output should consist of two values separated by a comma: the airport code and the value you computed for the average departure delay.
6. Do NOT compute two averages (one for each year). Compute the average departure delay over the two year span of 2007 and 2008.
Data:
------------ 2007.csv ------------
Year,Month,DayofMonth,DayOfWeek,DepTime,CRSDepTime,ArrTime,CRSArrTime,UniqueCarrier,FlightNum,TailNum,ActualElapsedTime,CRSElapsedTime,AirTime,ArrDelay,DepDelay,Origin,Dest,Distance,TaxiIn,TaxiOut,Cancelled,CancellationCode,Diverted,CarrierDelay,WeatherDelay,NASDelay,SecurityDelay,LateAircraftDelay
2007,1,6,7,1050,1050,1211,1210,WN,680,N283WN,81,80,65,1,0,LAX,SFO,337,6,10,0,,0,NA,NA,NA,NA,NA
2007,1,6,7,1244,1245,1405,1405,WN,776,N720WN,81,80,68,0,-1,LAX,SFO,337,3,10,0,,0,NA,NA,NA,NA,NA
2007,1,6,7,1547,1455,1655,1600,WN,173,N350SW,68,65,58,55,52,LAX,SJC,308,4,6,0,,0,21,0,3,0,31
2007,1,6,7,1909,1910,1918,1915,WN,160,N489WN,69,65,56,3,-1,LBB,ABQ,289,5,8,0,,0,NA,NA,NA,NA,NA
2007,1,6,7,1759,1745,1859,1850,WN,555,N512SW,60,65,49,9,14,LBB,AUS,341,3,8,0,,0,NA,NA,NA,NA,NA
2007,1,6,7,847,850,954,955,WN,836,N775SW,67,65,52,-1,-3,LBB,AUS,341,5,10,0,,0,NA,NA,NA,NA,NA
---------- 2008.csv ------------
Year,Month,DayofMonth,DayOfWeek,DepTime,CRSDepTime,ArrTime,CRSArrTime,UniqueCarrier,FlightNum,TailNum,ActualElapsedTime,CRSElapsedTime,AirTime,ArrDelay,DepDelay,Origin,Dest,Distance,TaxiIn,TaxiOut,Cancelled,CancellationCode,Diverted,CarrierDelay,WeatherDelay,NASDelay,SecurityDelay,LateAircraftDelay
2008,1,9,3,1552,1550,1856,1900,WN,438,N307SW,124,130,113,-4,2,LIT,BWI,912,3,8,0,,0,NA,NA,NA,NA,NA
2008,1,9,3,706,705,809,810,WN,7,N902WN,63,65,49,-1,1,LIT,DAL,296,3,11,0,,0,NA,NA,NA,NA,NA
2008,1,9,3,1454,1500,1558,1605,WN,41,N465WN,64,65,53,-7,-6,LIT,DAL,296,4,7,0,,0,NA,NA,NA,NA,NA
2008,1,9,3,732,735,826,835,WN,1194,N660SW,54,60,44,-9,-3,MAF,AUS,294,4,6,0,,0,NA,NA,NA,NA,NA
2008,1,9,3,1835,1830,1928,1925,WN,2374,N347SW,53,55,41,3,5,MAF,AUS,294,4,8,0,,0,NA,NA,NA,NA,NA
2008,1,9,3,1537,1535,1636,1635,WN,43,N305SW,59,60,46,1,2,MAF,DAL,319,3,10,0,,0,NA,NA,NA,NA,NA
Understand:
1. Map side: Airport codes (column 17) as the key, departure delay (column 16) as the value;
2. Reduce side: get the sum and the count of the departure delay;
3. Reduce side write the result: (key, sum / count)
4. Set Partition:
public static HashMap<String, Integer> airCode = new HashMap<>();
static{
airCode.put("A", 0);
airCode.put("B", 0);
...
airCode.put("N", 0);;
...
}
* */
public class Task1Average extends Configured implements Tool {
public static class AverageMapper extends Mapper<LongWritable, Text, Text, IntWritable>{
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] airCodes = StringUtils.split(value.toString(), '\\', ',');
if(airCodes[16].equalsIgnoreCase("Origin")){
return;
}
boolean naCheck = Utils.replaceNAWithZero(airCodes);
if(naCheck){
return;
}
String airCode = airCodes[16];
int delay = Integer.parseInt(airCodes[15]);
context.write(new Text(airCode), new IntWritable(delay));
}
}
public static class AverageReducer extends Reducer<Text, IntWritable, Text, IntWritable>{
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int count = 0;
int sum = 0;
for (IntWritable value : values) {
sum += value.get();
count++;
}
int avg = sum / count;
context.write(key, new IntWritable(avg));
}
}
@Override
public int run(String[] args) throws Exception {
Job job = Job.getInstance(getConf(), "Task1Average");
Configuration conf = job.getConfiguration();
conf.set(TextOutputFormat.SEPERATOR, ",");
job.setJarByClass(Task1Average.class);
job.setMapperClass(AverageMapper.class);
job.setReducerClass(AverageReducer.class);
job.setPartitionerClass(AirCodePartitioner.class);
job.setNumReduceTasks(2);
job.setOutputFormatClass(TextOutputFormat.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.setInputPaths(job, new Path("/user/horton/flights/real/"));
FileOutputFormat.setOutputPath(job, new Path("/user/horton/Task1Average"));
return job.waitForCompletion(true)? 0:1;
}
public static void main(String[] args) {
//
int result = 0;
try {
result = ToolRunner.run(new Configuration(), new Task1Average(), args);
} catch (Exception e) {
e.printStackTrace();
}
System.exit(result);
}
}
package flight1.average.delay;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;
import java.util.HashMap;
import java.util.Map;
public class AirCodePartitioner extends Partitioner<Text, IntWritable> {
public static Map<String, Integer> airCodeMap = new HashMap<>();
static{
airCodeMap.put("A", 0);
airCodeMap.put("B", 0);
airCodeMap.put("C", 0);
airCodeMap.put("D", 0);
airCodeMap.put("E", 0);
airCodeMap.put("F", 0);
airCodeMap.put("G", 0);
airCodeMap.put("H", 0);
airCodeMap.put("I", 0);
airCodeMap.put("J", 0);
airCodeMap.put("K", 0);
airCodeMap.put("L", 0);
airCodeMap.put("M", 0);
airCodeMap.put("N", 1);
airCodeMap.put("O", 1);
airCodeMap.put("P", 1);
airCodeMap.put("Q", 1);
airCodeMap.put("R", 1);
airCodeMap.put("S", 1);
airCodeMap.put("T", 1);
airCodeMap.put("U", 1);
airCodeMap.put("V", 1);
airCodeMap.put("W", 1);
airCodeMap.put("X", 1);
airCodeMap.put("Y", 1);
airCodeMap.put("Z", 1);
}
@Override
public int getPartition(Text text, IntWritable intWritable, int i) {
String prefix = text.toString().substring(0,1);
Integer patitionID = airCodeMap.get(prefix);
return patitionID;
}
}
package flight1.average.delay;
public class Utils {
public static boolean replaceNAWithZero(String[] strs){
if(strs == null || strs.length == 0){
return false;
}
for(int i = 0; i < strs.length; i++){
if(strs[i].trim().toUpperCase().equals("NA")){
return true;
}
}
return false;
}
}
package flightdelay1.join.practice;
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.Objects;
public class Datee implements WritableComparable<Datee> {
public int year;
public int month;
public int day;
public Datee() {
}
public Datee(int year, int month, int day) {
this.year = year;
this.month = month;
this.day = day;
}
@Override
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeInt(year);
dataOutput.writeInt(month);
dataOutput.writeInt(day);
}
@Override
public void readFields(DataInput dataInput) throws IOException {
year = dataInput.readInt();
month = dataInput.readInt();
day = dataInput.readInt();
}
public int getYear() {
return year;
}
public void setYear(int year) {
this.year = year;
}
public int getMonth() {
return month;
}
public void setMonth(int month) {
this.month = month;
}
public int getDay() {
return day;
}
public void setDay(int day) {
this.day = day;
}
@Override
public int compareTo(Datee o) {
int response = this.year - o.year;
if(response == 0){
response = this.month - o.month;
}
if(response == 0){
response = this.day - o.day;
}
return response;
}
@Override
public boolean equals(Object o) {
if(o instanceof Datee){
Datee datee = (Datee) o;
if(year == datee.year && month == datee.month && day == datee.day){
return true;
}
}
return false;
}
@Override
public int hashCode() {
return year + month + day;
}
@Override
public String toString() {
return this.year + "," +
this.month + "," +
this.day;
}
}
package flightdelay1.join.practice;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class DelayFileOutputFormat extends FileOutputFormat {
@Override
public RecordWriter<DateDelay, DelayWeather> getRecordWriter(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
int partition = taskAttemptContext.getTaskAttemptID().getTaskID().getId();
Path outDir = FileOutputFormat.getOutputPath(taskAttemptContext);
Path fileName = new Path(outDir.getName() + Path.SEPARATOR + taskAttemptContext.getJobName() + "_" + partition);
FileSystem fileSystem = fileName.getFileSystem(taskAttemptContext.getConfiguration());
FSDataOutputStream out = fileSystem.create(fileName);
return new DelayRecordWriter(out);
}
}
package flightdelay1.join.practice;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import java.io.DataOutputStream;
import java.io.IOException;
public class DelayRecordWriter extends RecordWriter<DateDelay, DelayWeather> {
private DataOutputStream out;
private final static String SEPERATOR = ",";
public DelayRecordWriter() {
}
public DelayRecordWriter(DataOutputStream out) {
this.out = out;
}
@Override
public void write(DateDelay dateDelay, DelayWeather delayWeather) throws IOException, InterruptedException {
StringBuilder builder = new StringBuilder();
builder.append(dateDelay.datee);
builder.append(SEPERATOR);
builder.append(delayWeather);
builder.append("\n");
out.write(builder.toString().getBytes());
}
@Override
public void close(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
out.close();
}
}
package flightdelay1.join.practice;
import org.apache.hadoop.io.Writable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class DelayWeather implements Writable {
public FlightDelay flightDelay;
public Weather weather;
@Override
public void write(DataOutput dataOutput) throws IOException {
flightDelay.write(dataOutput);
weather.write(dataOutput);
}
@Override
public void readFields(DataInput dataInput) throws IOException {
flightDelay = new FlightDelay();
flightDelay.readFields(dataInput);
weather = new Weather();
weather.readFields(dataInput);
}
@Override
public String toString() {
return this.flightDelay + "," + this.weather;
}
}
package flightdelay1.join.practice;
import org.apache.hadoop.io.Writable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class FlightDelay implements Writable {
public int depTime;
public int arrTime;
public String uniqueCarrier;
public int flightNum;
public int actualElapsedTime;
public int arrDelay;
public int depDelay;
public String origin;
public String destination;
public FlightDelay() {
}
public FlightDelay(int depTime, int arrTime, String uniqueCarrier, int flightNum, int actualElapsedTime, int arrDelay, int depDelay, String origin, String destination) {
this.depTime = depTime;
this.arrTime = arrTime;
this.uniqueCarrier = uniqueCarrier;
this.flightNum = flightNum;
this.actualElapsedTime = actualElapsedTime;
this.arrDelay = arrDelay;
this.depDelay = depDelay;
this.origin = origin;
this.destination = destination;
}
@Override
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeInt(depTime);
dataOutput.writeInt(arrTime);
dataOutput.writeUTF(uniqueCarrier);
dataOutput.writeInt(flightNum);
dataOutput.writeInt(actualElapsedTime);
dataOutput.writeInt(arrDelay);
dataOutput.writeInt(depDelay);
dataOutput.writeUTF(origin);
dataOutput.writeUTF(destination);
}
@Override
public void readFields(DataInput dataInput) throws IOException {
this.depTime = dataInput.readInt();
this.arrTime = dataInput.readInt();
this.uniqueCarrier = dataInput.readUTF();
this.flightNum = dataInput.readInt();
this.actualElapsedTime = dataInput.readInt();
this.arrDelay = dataInput.readInt();
this.depDelay = dataInput.readInt();
this.origin = dataInput.readUTF();
this.destination = dataInput.readUTF();
}
@Override
public String toString() {
return
this.depTime + "," +
this.arrTime + "," +
this.uniqueCarrier + "," +
this.flightNum + "," +
this.actualElapsedTime + "," +
this.arrDelay + "," +
this.depDelay + "," +
this.origin + "," +
this.destination + ",";
}
}
package flightdelay1.join.practice;
public class Utils {
public static boolean replaceNAWithZero(String[] strs){
if(strs == null || strs.length == 0){
return false;
}
for(int i = 0; i < strs.length; i++){
if(strs[i].trim().toUpperCase().equals("NA")){
return true;
}
}
return false;
}
}
package flightdelay1.join.practice;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class Weather implements Writable {
private int prcp;
private int tMax;
private int tMin;
public Weather() {
}
public Weather(int prcp, int tMax, int tMin) {
this.prcp = prcp;
this.tMax = tMax;
this.tMin = tMin;
}
@Override
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeInt(prcp);
dataOutput.writeInt(tMax);
dataOutput.writeInt(tMin);
}
@Override
public void readFields(DataInput dataInput) throws IOException {
prcp = dataInput.readInt();
tMax = dataInput.readInt();
tMax = dataInput.readInt();
}
public String toString(){
return this.prcp + "," + this.tMax + "," + this.tMin;
}
}
package flightdelay1.join.practice;
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class DateDelay implements WritableComparable<DateDelay> {
public Datee datee;
public int arrDelay;
public DateDelay() {
}
public DateDelay(Datee datee, int arrDelay) {
this.datee = datee;
this.arrDelay = arrDelay;
}
@Override
public void write(DataOutput dataOutput) throws IOException {
datee.write(dataOutput);
dataOutput.writeInt(arrDelay);
}
@Override
public void readFields(DataInput dataInput) throws IOException {
datee = new Datee();
datee.readFields(dataInput);
arrDelay = dataInput.readInt();
}
@Override
public int compareTo(DateDelay o) {
int response = this.datee.compareTo(o.datee);
if(response == 0){
response = o.arrDelay - this.arrDelay;
}
return response;
}
@Override
public String toString() {
return this.datee + "," + this.arrDelay;
}
}
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.net.URI;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
public class Task1 extends Configured implements Tool {
private static final String DESTINATION = "Dest";
public static class DelayJoinMapper extends Mapper<LongWritable, Text, DateDelay, DelayWeather>{
private Map<Datee, Weather> map = new HashMap<>();
private String destination;
@Override
protected void setup(Mapper<LongWritable, Text, DateDelay, DelayWeather>.Context context) throws IOException {
destination = context.getConfiguration().get(DESTINATION);
BufferedReader reader = new BufferedReader(new FileReader("sfo_weather.csv"));
String line;
String[] wStr;
Datee datee;
Weather weather;
while((line = reader.readLine()) != null){
wStr = StringUtils.split(line, '\\', ',');
if(wStr[1].equals("YEAR")){
continue;
}
datee = new Datee(Integer.parseInt(wStr[1]),
Integer.parseInt(wStr[2]),
Integer.parseInt(wStr[3]));
weather = new Weather(Integer.parseInt(wStr[4]),
Integer.parseInt(wStr[5]),
Integer.parseInt(wStr[6]));
map.put(datee, weather);
}
reader.close();
}
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, DateDelay, DelayWeather>.Context context) throws IOException, InterruptedException {
String[] delays = StringUtils.split(value.toString(), '\\', ',');
DateDelay dateDelay;
Datee datee;
if(delays[0].equals("Year")){
return;
}
if(delays[17].trim().equals(destination)){
boolean xx = Utils.replaceNAWithZero(delays);
if(xx){
return;
}
datee = new Datee(Integer.parseInt(delays[0]),
Integer.parseInt(delays[1]),
Integer.parseInt(delays[2]));
if(map.containsKey(datee)){
dateDelay = new DateDelay(datee, Integer.parseInt(delays[14]));
FlightDelay flightDelay = new FlightDelay(
Integer.parseInt(delays[4]),
Integer.parseInt(delays[6]),
delays[8],
Integer.parseInt(delays[9]),
Integer.parseInt(delays[11]),
Integer.parseInt(delays[14]),
Integer.parseInt(delays[15]),
delays[16],
delays[17]
);
DelayWeather delayWeather = new DelayWeather();
delayWeather.flightDelay = flightDelay;
delayWeather.weather = map.get(datee);
context.write(dateDelay, delayWeather);
}
}
}
}
public static final class DelayJoinReducer extends Reducer<DateDelay, DelayWeather, DateDelay, DelayWeather>{
@Override
protected void reduce(DateDelay key, Iterable<DelayWeather> values, Reducer<DateDelay, DelayWeather, DateDelay, DelayWeather>.Context context) throws IOException, InterruptedException {
Iterator<DelayWeather> iterator = values.iterator();
while(iterator.hasNext()){
context.write(key, iterator.next());
}
}
}
// sudo date -s "2018-06-17 22:04:30"
public static void main(String[] args) {
//
int result = 0;
try{
result = ToolRunner.run(new Configuration(), new Task1(), args);
} catch (Exception e) {
e.printStackTrace();
}
System.exit(result);
}
@Override
public int run(String[]