数据
数据一
字段解释:年,月,日,小时,温度,湿度,气压,风向,风速,天气情况,1h降雨量,6h降雨量
数据二
0,cloudless
1,cumulus
2,cumulonimbus
3,stratocumulus
4,stratus
5,nimbostratus
6,altostratus
7,altocumulus
8,Cirrus
9,stratocirrus
10,cirrocumulus
字段解释:id,天气状况
需求及实现
需求
- 将分割符由一个或多个空格转换成逗号
- 清除不合法数据:字段长度不足,风向不在[0,360]的,风速为负的,气压为负的,天气情况不在[0,10],湿度不在[0,100],温度不在[-40,50]的数据
- 将数据一与数据二的数据以天气情况进行join操作,把天气情况变为其对应的云属;
- 对进入同一个分区的数据排序; 排序规则: (1)同年同月同天为key; (2)按每日温度升序; (3)若温度相同则按风速升序; (4)风速相同则按压强降序
解析
- 需求一的解决方法:链接: 链接.
- 需求二就是判断语句
- 需求三将数据二缓存到内存里面
- 需求四在自定义类的时候,定义排序规则,然后自定义分组
自定义的类
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class Data implements WritableComparable<Data> {
//年
private String year;
//月
private String month;
//日
private String day;
//小时
private String hour;
//温度
private String temperature;
//湿度
private String dew;
//气压/压强
private int pressure;
//风向
private String wind_direction;
//风速
private String wind_speed;
//天气情况
private String sky_condition;
//1小时降雨量
private String rain_1h;
//6小时降雨量
private String rain_6h;
@Override
public int compareTo(Data o) {
int a = (this.temperature.compareTo(o.temperature));
int b = (this.wind_speed.compareTo(o.wind_speed));
if (a == 0) {
if (b == 0) {
return o.pressure - this.pressure;
}
return b;
}
return a;
}
@Override
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeUTF(year);
dataOutput.writeUTF(month);
dataOutput.writeUTF(day);
dataOutput.writeUTF(hour);
dataOutput.writeUTF(temperature);
dataOutput.writeUTF(dew);
dataOutput.writeInt(pressure);
dataOutput.writeUTF(wind_direction);
dataOutput.writeUTF(wind_speed);
dataOutput.writeUTF(sky_condition);
dataOutput.writeUTF(rain_1h);
dataOutput.writeUTF(rain_6h);
}
@Override
public void readFields(DataInput dataInput) throws IOException {
year = dataInput.readUTF();
month = dataInput.readUTF();
day = dataInput.readUTF();
hour = dataInput.readUTF();
temperature = dataInput.readUTF();
dew = dataInput.readUTF();
pressure = dataInput.readInt();
wind_direction = dataInput.readUTF();
wind_speed = dataInput.readUTF();
sky_condition = dataInput.readUTF();
rain_1h = dataInput.readUTF();
rain_6h = dataInput.readUTF();
}
@Override
public String toString() {
return year + "," + month + "," + day + "," + hour + "," + temperature + "," + dew + "," + pressure + ","
+ wind_direction + "," + wind_speed + "," + sky_condition + "," + rain_1h + "," + rain_6h;
}
public void set(String year, String month, String day, String hour, String temperature, String dew, int pressure, String wind_direction, String wind_speed, String sky_condition, String rain_1h, String rain_6h) {
this.year = year;
this.month = month;
this.day = day;
this.hour = hour;
this.temperature = temperature;
this.dew = dew;
this.pressure = pressure;
this.wind_direction = wind_direction;
this.wind_speed = wind_speed;
this.sky_condition = sky_condition;
this.rain_1h = rain_1h;
this.rain_6h = rain_6h;
}
public String getYear() {
return year;
}
public void setYear(String year) {
this.year = year;
}
public String getMonth() {
return month;
}
public void setMonth(String month) {
this.month = month;
}
public String getDay() {
return day;
}
public void setDay(String day) {
this.day = day;
}
public String getHour() {
return hour;
}
public void setHour(String hour) {
this.hour = hour;
}
public String getTemperature() {
return temperature;
}
public void setTemperature(String temperature) {
this.temperature = temperature;
}
public String getDew() {
return dew;
}
public void setDew(String dew) {
this.dew = dew;
}
public int getPressure() {
return pressure;
}
public void setPressure(int pressure) {
this.pressure = pressure;
}
public String getWind_direction() {
return wind_direction;
}
public void setWind_direction(String wind_direction) {
this.wind_direction = wind_direction;
}
public String getWind_speed() {
return wind_speed;
}
public void setWind_speed(String wind_speed) {
this.wind_speed = wind_speed;
}
public String getSky_condition() {
return sky_condition;
}
public void setSky_condition(String sky_condition) {
this.sky_condition = sky_condition;
}
public String getRain_1h() {
return rain_1h;
}
public void setRain_1h(String rain_1h) {
this.rain_1h = rain_1h;
}
public String getRain_6h() {
return rain_6h;
}
public void setRain_6h(String rain_6h) {
this.rain_6h = rain_6h;
}
}
Mapper阶段
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.net.URI;
import java.util.HashMap;
import java.util.Map;
public class MapTest extends Mapper<LongWritable, Text, Data, NullWritable> {
Data k = new Data();
Map<String, String> sky_status = new HashMap<String, String>();
String status;
@Override
protected void setup(Context context) throws IOException, InterruptedException {
URI[] uris = context.getCacheFiles();
File file = new File(uris[0]);
String line;
BufferedReader br = new BufferedReader(new FileReader(file));
while ((line = br.readLine()) != null) {
sky_status.put(line.split(",")[0], line.split(",")[1]);
}
}
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String datas[] = value.toString().split("\\s+");
if (datas.length != 12
|| Integer.parseInt(datas[7]) < 0
|| Integer.parseInt(datas[6]) < 0
|| (Integer.parseInt(datas[9]) < 0 || Integer.parseInt(datas[9]) > 10)
|| (Integer.parseInt(datas[5]) < 0 || Integer.parseInt(datas[5]) > 100)
|| (Integer.parseInt(datas[4]) < -40 || Integer.parseInt(datas[4]) > 50)) {
return;
}
status = sky_status.get(datas[9]);
k.set(datas[0], datas[1], datas[2], datas[3], datas[4], datas[5], Integer.parseInt(datas[6]), datas[7], datas[8], status, datas[10], datas[11]);
context.write(k, NullWritable.get());
}
}
自定义分组
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
public class Group extends WritableComparator {
@Override
public int compare(WritableComparable a, WritableComparable b) {
Data d1 = (Data) a;
Data d2 = (Data) b;
String date1 = d1.getYear() + d1.getMonth() + d1.getDay();
String date2 = d2.getYear() + d2.getMonth() + d2.getDay();
return date1.compareTo(date2);
}
protected Group(){
super(Data.class,true);
}
}
Reduce阶段
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class RedTest extends Reducer<Data, NullWritable, Data, NullWritable> {
@Override
protected void reduce(Data key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
for (NullWritable v : values) {
context.write(key, NullWritable.get());
}
}
}
Driver阶段
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.File;
import java.net.URI;
public class DriTest {
public static void main(String[] args) throws Exception {
File file = new File("D:\\MP\\气象数据\\output");
if (file.exists()) {
delFile(file);
driver();
} else {
driver();
}
}
public static void delFile(File file) {
File[] files = file.listFiles();
if (files != null && files.length != 0) {
for (int i = 0; i < files.length; i++) {
delFile(files[i]);
}
}
file.delete();
}
public static void driver() throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setMapperClass(MapTest.class);
job.setJarByClass(DriTest.class);
job.setReducerClass(RedTest.class);
job.setMapOutputKeyClass(Data.class);
job.setMapOutputValueClass(NullWritable.class);
job.setOutputKeyClass(Data.class);
job.setOutputValueClass(NullWritable.class);
job.addCacheFile(new URI("file:///D:/MP/气象数据/input/sky.txt"));
job.setGroupingComparatorClass(Group.class);
FileInputFormat.setInputPaths(job, "D:\\MP\\气象数据\\input\\data.txt");
FileOutputFormat.setOutputPath(job, new Path("D:\\MP\\气象数据\\output"));
boolean b = job.waitForCompletion(true);
System.exit(b ? 0 : 1);
}
}