1.说明
基于在线wps的字数统计功能,可以实现复杂的word文档的字数统计功能
2.代码
public static void main(String[] args) throws Exception {
String fileName = "d:\\Users\\ckj\\Desktop\\BPM\\demo.docx";
InputStream fileInputStream = new FileInputStream(fileName);
try {
ZipSecureFile.setMinInflateRatio(-1.0d);
//获取文件后缀名
String suffix = fileName.substring(fileName.lastIndexOf(".") + 1);
//定义word内容
String content = "";
switch (suffix) {
case "doc":
WordExtractor wordExtractor = new WordExtractor(fileInputStream);
content = wordExtractor.getText();
break;
case "docx":
XWPFDocument document = new XWPFDocument(fileInputStream);
XWPFWordExtractor extractor = new XWPFWordExtractor(document);
content = extractor.getText();
break;
default:
break;
}
//中文单词
String cnWords = content.replaceAll("[^(\\u4e00-\\u9fa5,。《》?;’‘:“”【】、)(……¥!·)]", "");
int cnWordsCount = cnWords.length();
//非中文单词
String noCnWords = content.replaceAll("[^(a-zA-Z0-9`\\-=\';.,/~!@#$%^&*()_+|}{\":><?\\[\\])]", " ");
int noCnWordsCount = 0;
String[] ss = noCnWords.split(" ");
for (String s : ss) {
if (s.trim().length() != 0) {
noCnWordsCount++;
}
}
System.out.println(cnWordsCount + noCnWordsCount);
} catch (Exception e) {
e.printStackTrace();
} finally {
if (fileInputStream != null) {
fileInputStream.close();
}
}
}