1.抽取word doc中的图片
1 package parse;
2
3 import java.io.*;
4 import java.util.*;
5
6
7 import org.apache.poi.hwpf.HWPFDocument;
8 import org.apache.poi.hwpf.model.PicturesTable;
9 import org.apache.poi.hwpf.usermodel.CharacterRun;
10 import org.apache.poi.hwpf.usermodel.Picture;
11 import org.apache.poi.hwpf.usermodel.Range;
12
13 public class ReadImgDoc {
14
15 public static void main(String[] args) throws Exception {
16 new ReadImgDoc().readPicture("E:\\上海项目测试\\文档\\模板.doc");
17 }
18
19 private void readPicture(String path)throws Exception{
20 FileInputStream in=new FileInputStream(new File(path));
21 HWPFDocument doc=new HWPFDocument(in);
22 int length=doc.characterLength();
23 PicturesTable pTable=doc.getPicturesTable();
24 // int TitleLength=doc.getSummaryInformation().getTitle().length();
25
26 // System.out.println(TitleLength);
27 // System.out.println(length);
28 for (int i=0;i<length;i++){
29 Range range=new Range(i, i+1,doc);
30
31 CharacterRun cr=range.getCharacterRun(0);
32 if(pTable.hasPicture(cr)){
33 Picture pic=pTable.extractPicture(cr, false);
34 String afileName=pic.suggestFullFileName();
35 OutputStream out=new FileOutputStream(new File("E:\\上海项目测试\\docImage\\"+UUID.randomUUID()+afileName));
36 pic.writeImageContent(out);
37
38 }
39 }
40
41 }
42
43 }
2.抽取 word docx文件中的图片
1 package parse;
2
3 import java.io.File;
4 import java.io.FileInputStream;
5 import java.io.FileOutputStream;
6 import java.io.IOException;
7 import java.util.List;
8
9 import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
10 import org.apache.poi.xwpf.usermodel.XWPFDocument;
11 import org.apache.poi.xwpf.usermodel.XWPFPictureData;
12
13 public class GetPicsDocx {
14 public static void main(String[] args) {
15 String path ="E:\\上海项目测试\\文档\\35.docx";
16 File file = new File(path);
17 try {
18 FileInputStream fis = new FileInputStream(file);
19 XWPFDocument document = new XWPFDocument(fis);
20 XWPFWordExtractor xwpfWordExtractor = new XWPFWordExtractor(document);
21 String text = xwpfWordExtractor.getText();
22 System.out.println(text);
23 List<XWPFPictureData> picList = document.getAllPictures();
24 for (XWPFPictureData pic : picList) {
25 System.out.println(pic.getPictureType() + file.separator + pic.suggestFileExtension()
26 +file.separator+pic.getFileName());
27 byte[] bytev = pic.getData();
28 FileOutputStream fos = new FileOutputStream("E:\\上海项目测试\\docxImage\\"+pic.getFileName());
29 fos.write(bytev);
30 }
31 fis.close();
32 } catch (IOException e) {
33 e.printStackTrace();
34 }
35 }
36 }