1、重写ImageInputFormat
package org.example;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import java.io.IOException;
/**
* @program: hadoop2
* @description: 新建图片输入类,继承文件输入类,重写读取的方法
* 输出的key是文件名,value是读取的整个图片的字节
* @author: Brooke
* @create: 2023-12-29 13:58
**/
public class ImageInputFormat extends FileInputFormat<Text, BytesWritable> {
/*
重写读取文件的方法,返回读取后的RecordReader
因为我们要重新定义这个RecordReader,所以新建一个ImageRecordReader类
*/
@Override
public RecordReader<Text, BytesWritable> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
return new ImageRecordReader();// 这是一个空的????
}
/*
因为是图片,我们采用单个文件读取,所以不分切片
*/
@Override
protected boolean isSplitable(JobContext context, Path filename) {
return false;
}
}
2、ImageRecordReader
package org.example;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import java.io.IOException;
import java.io.InputStream;
/**
* @program: hadoop2
* @description: 输出的key是文件名,value是读取的整个图片的字节
* @author: Brooke
* @create: 2023-12-29 14:04
**/
public class ImageRecordReader extends RecordReader<Text, BytesWritable> {
private FSDataInputStream in;// 图片的输入流
private String imageName;// 图片名称
private boolean isHandle;// 是否读取完成
/*
在初始化作用:
1、获得文件名
2、
*/
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException {
FileSplit inputSplit1 = (FileSplit) inputSplit;// 将单个切片先转为FileSplit文件切片类型
// 创建文件管理系统
FileSystem fs = FileSystem.get(context.getConfiguration());
in = fs.open(inputSplit1.getPath());// 使用文件管理的open方法将传入的切片文件塞入流中
imageName = inputSplit1.getPath().getName();// 获得文件的图片名称
isHandle = false;
}
/*
表示还有下一个kv需要读取吗?
true表示还有
*/
@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
if (isHandle){ // 当读取完成后,在getCurrentValue方法中isHandle变为true
return false; // 返回false,表示没有下面的kv需要读取了
}
return true;
}
@Override
public Text getCurrentKey() throws IOException, InterruptedException {
// 直接就是图片名称
return new Text(imageName);
}
/*
value返回的是bytes
*/
@Override
public BytesWritable getCurrentValue() throws IOException, InterruptedException {
// available()方法表示剩余读取的字节长度,在这就可以表示流的长度
int length = in.available();
byte[] bytes = new byte[length];// 创建字节数组,用来存储读取图片的字节
// 使用工具类来读取图片流,从输入流 in 中读取指定长度 length 的字节,并将其存储在字节数组 bytes 中
IOUtils.read(in, bytes, 0, length);
isHandle = true;// 表示读取完成
return new BytesWritable(bytes);
}
/*
读取进度
创建isHandle表示是否读取完成,读完返回1.0f
*/
@Override
public float getProgress() throws IOException, InterruptedException {
if (isHandle){ // 如果为true,则表示读取完成,返回1
return 1.0f;
}
return 0;
}
@Override
public void close() throws IOException {
in.close();
}
}
3、ImageJobMapper
package org.example;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
/**
* @program: hadoop2
* @description:
* @author: Brooke
* @create: 2023-12-29 15:14
**/
public class ImageJobMapper extends Mapper<Text, BytesWritable,Text,BytesWritable> {
/*
map不做数据变现
*/
@Override
protected void map(Text key, BytesWritable value, Mapper<Text, BytesWritable, Text, BytesWritable>.Context context) throws IOException, InterruptedException {
context.write(key,value);
}
}
4、ImageJobReducer
package org.example;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream;
import java.io.IOException;
/**
* @program: hadoop2
* @description: 输入的kv是从ImageInputFormat读出的<图片名称, 图片字节>
* 输出的是<图片名称,图片属性类>
* @author: Brooke
* @create: 2023-12-29 15:24
**/
public class ImageJobReducer extends Reducer<Text, BytesWritable, Text, ImageBean> {
ByteArrayInputStream byteArrayInputStream;
@Override
protected void reduce(Text key, Iterable<BytesWritable> values, Reducer<Text, BytesWritable, Text, ImageBean>.Context context) throws IOException, InterruptedException {
// k还是图片名称,v就要将字节写出为完成图片,并拿到长和高
for (BytesWritable item : values) {
byte[] bytes = item.getBytes();
// ByteArrayInputStream 是 Java 中的一个类,用于从字节数组中读取数据
//将字节数组 bytes 包装成一个字节数组输入流 byteArrayInputStream,以便其他部分的代码可以使用输入流的方法读取数据
byteArrayInputStream = new ByteArrayInputStream(bytes);
}
// 使用图片流工具类读取流称为图片
//BufferedImage 对象是 Java 中表示图像的一种数据结构,它包含了图像的像素数据、宽度、高度等信息
BufferedImage bufferedImage = ImageIO.read(byteArrayInputStream);
ImageBean imageBean = new ImageBean(bufferedImage.getWidth(), bufferedImage.getHeight());// 这就是value
context.write(key,imageBean);
byteArrayInputStream.close();// 关流
}
}
5、ImageBean
package org.example;
import org.apache.hadoop.io.Writable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
/**
* @program: hadoop2
* @description:
* @author: Brooke
* @create: 2023-12-29 15:16
**/
public class ImageBean implements Writable {
private int width;
private int height;
public ImageBean() {
}
public ImageBean(int width, int height) {
this.width = width;
this.height = height;
}
@Override
public void write(DataOutput output) throws IOException {
output.writeInt(this.width);
output.writeInt(this.height);
}
@Override
public void readFields(DataInput input) throws IOException {
this.width = input.readInt();
this.height = input.readInt();
}
@Override
public String toString() {
return "ImageBean{" +
"width=" + width +
", height=" + height +
'}';
}
public int getWidth() {
return width;
}
public void setWidth(int width) {
this.width = width;
}
public int getHeight() {
return height;
}
public void setHeight(int height) {
this.height = height;
}
}
5、ImageOutputFormat
package org.example;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
/**
* @program: hadoop2
* @description: 自定义输出文件类
* @author: Brooke
* @create: 2023-12-29 15:54
**/
public class ImageOutputFormat extends FileOutputFormat<Text,ImageBean> {
@Override
public RecordWriter<Text, ImageBean> getRecordWriter(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
// 文件管理系统
FileSystem fs = FileSystem.get(taskAttemptContext.getConfiguration());
// 创建一个默认路径的临时文件流,往里面写东西
FSDataOutputStream outputStream = fs.create(super.getDefaultWorkFile(taskAttemptContext, ""));
// 将传来的数据写到image里
ImageRecordWriter image = new ImageRecordWriter(outputStream);
return image;
}
}
6、ImageRecordWriter
package org.example;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import java.io.IOException;
/**
* @program: hadoop2
* @description:
* @author: Brooke
* @create: 2023-12-29 16:11
**/
public class ImageRecordWriter extends RecordWriter<Text,ImageBean> {
// 在hdfs中,写出需要FSDataOutputStream实例对象
private FSDataOutputStream outputStream;
// 创建构造方法,在外面才能把那个临时文件传进来,
public ImageRecordWriter(FSDataOutputStream outputStream) {
this.outputStream = outputStream;
}
/*
给那个临时文件写进去文件名和imageBean
*/
@Override
public void write(Text text, ImageBean imageBean) throws IOException, InterruptedException {
outputStream.writeUTF(text.toString()+","+ imageBean.toString());
}
@Override
public void close(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
outputStream.close();
}
}