启动HDFS集群,集群示意图如下:hadoop-01作为NameNode同时也作为DataNode hadoop-02作为DataNode
使用Java客户端来对HDFS文件系统进行操作管理,首先建立工程,导入所需Jar包,主要包括hadoop/share/hadoop目录下common包以及hdfs包下的jar包。
使用Java客户端主要使用FileSystem对象来对HDFS文件系统进行操作。
1.获取Configuration对象来进行客户端的必要配置
FileSystem fs = null;
@Before
public void init() throws Exception {
Configuration conf = new Configuration();
//副本数量
conf.set("dfs.replication", "2");
//块大小
conf.set("dfs.blocksize", "64m");
fs = FileSystem.get(new URI("hdfs://hadoop-01:9000/"), conf, "root");
}
Configuration对象加载配置文件的顺序,hadoop-site.xml(已过时) core-default.xml core-site.xml以及set方法配置的选项来获取最终配置对象。
有关HDFS的默认参数配置地址如下:
http://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/hdfs-default.xml
Configuration类初始化静态块源码
static{
//print deprecation warning if hadoop-site.xml is found in classpath
ClassLoader cL = Thread.currentThread().getContextClassLoader();
if (cL == null) {
cL = Configuration.class.getClassLoader();
}
if(cL.getResource("hadoop-site.xml")!=null) {
LOG.warn("DEPRECATED: hadoop-site.xml found in the classpath. " +
"Usage of hadoop-site.xml is deprecated. Instead use core-site.xml, "
+ "mapred-site.xml and hdfs-site.xml to override properties of " +
"core-default.xml, mapred-default.xml and hdfs-default.xml " +
"respectively");
}
addDefaultResource("core-default.xml");
addDefaultResource("core-site.xml");
}
2.通过get方法获取FileSystem对象
URI传入NameNode的RPC通信地址,Conf配置对象,User为HDFS用户
FileSystem fs = FileSystem.get(new URI("hdfs://hadoop-01:9000/"), conf, "root");
3.使用FileSystem API来进行文件系统的管理操作
将本地文件上传至HDFS中
/**
* 函数用途描述: 向HDFS中上传文件
* @param: variable
* @return:
*/
@Test
public void testPut() throws Exception{
// 上传一个文件到HDFS中
fs.copyFromLocalFile(new Path("F:/hadoop-2.8.1/file.txt"), new Path("/chen/wen"));
System.out.println("put success");
fs.close();
}
将HDFS中文件下载到本地
/**
* 从HDFS中下载文件到客户端本地磁盘
*
* @throws IOException
* @throws IllegalArgumentException
*/
@Test
public void testGet() throws IllegalArgumentException, IOException {
fs.copyToLocalFile(new Path("/chen/wen/spring-data-jpa-reference-documentation.pdf"), new Path("F:/"));
fs.close();
}
在HDFS中移动文件
/**
* 在hdfs内部移动文件\修改名称
*/
@Test
public void testRename() throws Exception {
fs.rename(new Path("/chen/wen/spring-data-jpa-reference-documentation.pdf"), new Path("/chen/spring-data-jpa-reference-documentation.pdf"));
fs.close();
}
在HDFS创建文件/夹 删除文件/夹
/**
* 在hdfs中创建文件夹
*/
@Test
public void testMkdir() throws Exception {
fs.mkdirs(new Path("/chen/wen/kkk"));
fs.close();
}
/**
* 在hdfs中删除文件或文件夹
*/
@Test
public void testRm() throws Exception {
fs.delete(new Path("/chen"), true);
fs.close();
}
查询指定目录下的文件信息
/**
* 查询hdfs指定目录下的文件信息
*/
@Test
public void testLs() throws Exception {
// 只查询文件的信息,不返回文件夹的信息
RemoteIterator<LocatedFileStatus> iter = fs.listFiles(new Path("/"), true);
while (iter.hasNext()) {
LocatedFileStatus status = iter.next();
System.out.println("文件全路径:" + status.getPath());
System.out.println("块大小:" + status.getBlockSize());
System.out.println("文件长度:" + status.getLen());
System.out.println("副本数量:" + status.getReplication());
System.out.println("块信息:" + Arrays.toString(status.getBlockLocations()));
System.out.println("--------------------------------");
}
fs.close();
}
查看指定目录下的文件以及文件夹信息
/**
* 查询hdfs指定目录下的文件和文件夹信息
*/
@Test
public void testLs2() throws Exception {
FileStatus[] listStatus = fs.listStatus(new Path("/"));
for (FileStatus status : listStatus) {
System.out.println("文件全路径:" + status.getPath());
System.out.println(status.isDirectory() ? "这是文件夹" : "这是文件");
System.out.println("块大小:" + status.getBlockSize());
System.out.println("文件长度:" + status.getLen());
System.out.println("副本数量:" + status.getReplication());
System.out.println("--------------------------------");
}
fs.close();
}
利用输入流读取HDFS中文件内容
/**
* 利用流读取hdfs中的文件的内容
*
* @throws IOException
* @throws IllegalArgumentException
*/
@Test
public void testReadData() throws IllegalArgumentException, IOException {
FSDataInputStream in = fs.open(new Path("/chen/wen/file.txt"));
BufferedReader br = new BufferedReader(new InputStreamReader(in, "utf-8"));
String line = null;
while ((line = br.readLine()) != null) {
System.out.println(line);
}
br.close();
in.close();
fs.close();
}
利用输入流读取指定起始位置指定偏移量的HDFS内容
/**
* 利用流读取hdfs中文件的指定偏移量范围的内容
* @throws IOException
* @throws IllegalArgumentException
*/
@Test
public void testRandomReadData() throws IllegalArgumentException, IOException {
FSDataInputStream in = fs.open(new Path("/chen/wen/file.txt"));
// 将读取的起始位置进行指定
in.seek(10);
// 读100个字节
byte[] buf = new byte[100];
in.read(buf);
System.out.println(new String(buf));
in.close();
fs.close();
}
利用输出流往HDFS中文件写入内容
/**
* 利用流往hdfs中的文件写内容
*
* @throws IOException
* @throws IllegalArgumentException
*/
@Test
public void testWriteData() throws IllegalArgumentException, IOException {
FSDataOutputStream out = fs.create(new Path("/chen/wen/test.xml"), false);
FileInputStream in = new FileInputStream("F:/settings.xml");
byte[] buf = new byte[1024];
int read = 0;
while ((read = in.read(buf)) != -1) {
out.write(buf,0,read);
}
in.close();
out.close();
fs.close();
}
4.HDFS上传文件源码解析
fs.copyFromLocalFile(new Path("F:/hadoop-2.8.1/file.txt"), new Path("/chen/wen"));
依次进入解析:
/**
* The src file is on the local disk. Add it to FS at
* the given dst name and the source is kept intact afterwards
* @param src path
* @param dst path
*/
public void copyFromLocalFile(Path src, Path dst)
throws IOException {
copyFromLocalFile(false, src, dst);
}
public void copyFromLocalFile(boolean delSrc, Path src, Path dst)
throws IOException {
copyFromLocalFile(delSrc, true, src, dst);
}
/**
* The src file is on the local disk. Add it to FS at
* the given dst name.
* delSrc indicates if the source should be removed
* @param delSrc whether to delete the src
* @param overwrite whether to overwrite an existing file
* @param src path
* @param dst path
*/
public void copyFromLocalFile(boolean delSrc, boolean overwrite,
Path src, Path dst)
throws IOException {
Configuration conf = getConf();
FileUtil.copy(getLocal(conf), src, this, dst, delSrc, overwrite, conf);
}
使用FileUtil copy方法来实现文件上传
/** Copy files between FileSystems. */
public static boolean copy(FileSystem srcFS, Path src,
FileSystem dstFS, Path dst,
boolean deleteSource,
boolean overwrite,
Configuration conf) throws IOException {
FileStatus fileStatus = srcFS.getFileStatus(src);
return copy(srcFS, fileStatus, dstFS, dst, deleteSource, overwrite, conf);
}
核心copy方法
/** Copy files between FileSystems. */
public static boolean copy(FileSystem srcFS, FileStatus srcStatus,
FileSystem dstFS, Path dst,
boolean deleteSource,
boolean overwrite,
Configuration conf) throws IOException {
Path src = srcStatus.getPath();
dst = checkDest(src.getName(), dstFS, dst, overwrite);
if (srcStatus.isDirectory()) {
checkDependencies(srcFS, src, dstFS, dst);
if (!dstFS.mkdirs(dst)) {
return false;
}
FileStatus contents[] = srcFS.listStatus(src);
for (int i = 0; i < contents.length; i++) {
copy(srcFS, contents[i], dstFS,
new Path(dst, contents[i].getPath().getName()),
deleteSource, overwrite, conf);
}
} else {
InputStream in=null;
OutputStream out = null;
try {
in = srcFS.open(src);
out = dstFS.create(dst, overwrite);
IOUtils.copyBytes(in, out, conf, true);
} catch (IOException e) {
IOUtils.closeStream(out);
IOUtils.closeStream(in);
throw e;
}
}
if (deleteSource) {
return srcFS.delete(src, true);
} else {
return true;
}
}
如果带上传文件为目录,则递归调用copy实现上传;如果不是目录则直接使用IOUtils copyBytes方法来实现上传。
输入流来读取待上传文件
/**
* Opens an FSDataInputStream at the indicated Path.
* @param f the file to open
*/
public FSDataInputStream open(Path f) throws IOException {
return open(f, getConf().getInt("io.file.buffer.size", 4096));
}
/**
* Opens an FSDataInputStream at the indicated Path.
* @param f the file name to open
* @param bufferSize the size of the buffer to be used.
*/
public abstract FSDataInputStream open(Path f, int bufferSize)
throws IOException;
利用输出流来传输文件至HDFS,输出流的构造实现了文件的切分分块
/**
* Create an FSDataOutputStream at the indicated Path.
* @param f the file to create
* @param overwrite if a file with this name already exists, then if true,
* the file will be overwritten, and if false an exception will be thrown.
*/
public FSDataOutputStream create(Path f, boolean overwrite)
throws IOException {
return create(f, overwrite,
getConf().getInt("io.file.buffer.size", 4096),
getDefaultReplication(f),
getDefaultBlockSize(f));
}
/**
* Create an FSDataOutputStream at the indicated Path.
* @param f the file name to open
* @param overwrite if a file with this name already exists, then if true,
* the file will be overwritten, and if false an error will be thrown.
* @param bufferSize the size of the buffer to be used.
* @param replication required block replication for the file.
*/
public FSDataOutputStream create(Path f,
boolean overwrite,
int bufferSize,
short replication,
long blockSize
) throws IOException {
return create(f, overwrite, bufferSize, replication, blockSize, null);
}
/**
* Create an FSDataOutputStream at the indicated Path with write-progress
* reporting.
* @param f the file name to open
* @param overwrite if a file with this name already exists, then if true,
* the file will be overwritten, and if false an error will be thrown.
* @param bufferSize the size of the buffer to be used.
* @param replication required block replication for the file.
*/
public FSDataOutputStream create(Path f,
boolean overwrite,
int bufferSize,
short replication,
long blockSize,
Progressable progress
) throws IOException {
return this.create(f, FsPermission.getFileDefault().applyUMask(
FsPermission.getUMask(getConf())), overwrite, bufferSize,
replication, blockSize, progress);
}