Poi读取word

 

1、需要的jar文件

a) 首先到apache官方网站下载poi资源 http://poi.apache.org/download.html

b) 下载完后解压

c) 读取写入word文件需要:poi-3.9-20121203.jar、poi-ooxml-3.9-20121203.jar、poi-scratchpad-3.9-20121203.jar、xmlbeans-2.3.0.jar

2、代码

a) 读取方式一:

package com.tp.word;
 
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
 
import org.apache.poi.POIXMLDocument;
import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.hwpf.usermodel.Table;
import org.apache.poi.hwpf.usermodel.TableCell;
import org.apache.poi.hwpf.usermodel.TableIterator;
import org.apache.poi.hwpf.usermodel.TableRow;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
 
public class TestRdWord {
 
     public static void main(String[] args) {
 
          testWord("D:/test.doc");
     }
 
   public static void testWord(String wordPath) {
       try {
载入文档
                 POIFSFileSystem pfs = new POIFSFileSystem(in);
                HWPFDocument hwpf = new HWPFDocument(pfs);
得到文档的读取范围
                TableIterator it = new TableIterator(range);
迭代文档中的表格
              while (it.hasNext()) {
                     Table tb = (Table) it.next();
迭代行,默认从0开始
                    for (int i = 0; i < tb.numRows(); i++) {
                             TableRow tr = tb.getRow(i);
迭代列,默认从0开始
                             for (int j = 0; j < tr.numCells(); j++) {
取得单元格
取得单元格的内容
                                 for (int k = 0; k < td.numParagraphs(); k++) {
                                        Paragraph para = td.getParagraph(k);
                                        String s = para.text().trim();
                                        System.out.println(s);
                                   } // end for
                             } // end for
                      } // end for
                } // end while
          } catch (Exception e) {
              e.printStackTrace();
          }
       }// end method
 
}
b) 读取方式二:
package com.tp.word;
 
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
 
import org.apache.poi.POIXMLDocument;
import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.hwpf.usermodel.Table;
import org.apache.poi.hwpf.usermodel.TableCell;
import org.apache.poi.hwpf.usermodel.TableIterator;
import org.apache.poi.hwpf.usermodel.TableRow;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
 
public class TestRdWord {
 
public static void main(String[] args) {
 
testWord1("D:/test.doc");
}
public static void testWord1(String filePath) {
try {
//word 2003: 图片不会被读取      
InputStream is = new FileInputStream(new File(filePath));
WordExtractor ex = new WordExtractor(is);
String text2003 = ex.getText();
System.out.println(text2003);
//word 2007 图片不会被读取, 表格中的数据会被放在字符串的最后      
/*OPCPackage opcPackage = POIXMLDocument.openPackage(filePath);
POIXMLTextExtractor extractor = new XWPFWordExtractor(opcPackage);
String text2007 = extractor.getText();
System.out.println(text2007);*/
 
} catch (Exception e) {
e.printStackTrace();
}
}
}

 

注:此方法03和07读取方式不一样

 

3、注意:这里读取的是表格文件,即:

 

员工姓名

性别

员工号

座位号

电话号

端口号

门禁卡

性质

 

 

 

 

 

 

 

 

 

4、输出为:

a) 方法一输出:

员工姓名

性别

员工号

座位号

电话号

端口号

门禁卡

性质

 

b) 方法二输出:

 

 员工姓名性别  员工号座位号电话号端口号门禁卡性质

Poi写入word

1、jar文件和读取的相同

2、代码:

package com.tp.word;
 
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.HashMap;
import java.util.Map;
 
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.Range;
 
public class TestWtWord {
 
public static void main(String [] args){
 
Map<String, String> map = new HashMap<String,String>();
 
map.put("姓名", "name");
map.put("性别", "sex");
map.put("员工号", "emp_no");
map.put("座位号", "site_no");
map.put("电话号", "tel_no");
map.put("端口号", "port_no");
map.put("门禁卡", "card_no");
 
HWPFDocument document = replaceDoc("D:/test.doc",map);
ByteArrayOutputStream ostream = new ByteArrayOutputStream();
        try {
            document.write(ostream);
输出word文件
            OutputStream outs=new FileOutputStream("D:/test.doc");
            outs.write(ostream.toByteArray());
            outs.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
 
}
 
 
/**
读取word模板并替换变量
     * @param srcPath
     * @param map
     * @return
     */
    public static HWPFDocument replaceDoc(String srcPath, Map<String, String> map) {
        try {
读取word模板
            FileInputStream fis = new FileInputStream(new File(srcPath));
            HWPFDocument doc = new HWPFDocument(fis);
读取word文本内容
            Range bodyRange = doc.getRange();
替换文本内容
            for (Map.Entry<String, String> entry : map.entrySet()) {
            
                bodyRange.replaceText(entry.getKey(), entry.getValue());
            }
            return doc;
        } catch (Exception e) {
            e.printStackTrace();
            return null;
        }
    }
 
}

这里原理实际上是:将源文件读取将内容替换再将新的文件输出替换原来的文件这样就相当于修改了文件的内容,如果不想覆盖源文件只需要不覆盖原来的文件就可以了。