Java实现Word转PDF方案选择
很多应用场景中都会涉及到Word转PDF,但Word转PDF的方案在网上一搜一大把,让人眼花缭乱,笔者踩过无数的坑后,最终总结出以下三种方案
- OpenOffice实现Word转ODF
- docx2pdf实现Word转ODF
- itext+POI实现Word转ODF
方案
这种方案在Windows中可行,且非常简便,但它完全依赖于OpenOffice,想在Linux中实现,显然不是一个好的方案,笔者尝试过Linux中装OpenOffice,但令人发指的是居然还需要装GUI!
Java代码
import java.io.File;
import java.io.IOException;
import java.net.ConnectException;
import com.artofsolving.jodconverter.DocumentConverter;
import com.artofsolving.jodconverter.openoffice.connection.OpenOfficeConnection;
import com.artofsolving.jodconverter.openoffice.connection.SocketOpenOfficeConnection;
import com.artofsolving.jodconverter.openoffice.converter.OpenOfficeDocumentConverter;
/**
* maven包
<dependency>
<groupId>com.artofsolving</groupId>
<artifactId>jodconverter-maven-plugin</artifactId>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
</dependency>
*
*
*/
public class OfficetoPdfUtil {
public static void createPDF(String sourceFile, String destFile) {
// String OpenOffice_HOME = "D:/Program Files/OpenOffice.org 3";//
// 这里是OpenOffice的安装目录,C:\Program Files (x86)\OpenOffice 4
String OpenOffice_HOME = "D:\\openoffice\\newgay\\";
Process pro = null;
try {
String command = OpenOffice_HOME
+ "program\\soffice.exe -headless -accept=\"socket,host=127.0.0.1,port=8300;urp;StarOffice.ServiceManager\" -nofirststartwizard";
pro = Runtime.getRuntime().exec(command);
File inputFile = new File(sourceFile);
// 如果目标路径不存在, 则新建该路径
File outputFile = new File(destFile);
if (!outputFile.getParentFile().exists()) {
outputFile.getParentFile().mkdirs();
}
// connect to an OpenOffice.org instance running on port 8100
OpenOfficeConnection connection = new SocketOpenOfficeConnection(
"127.0.0.1", 8300);
connection.connect();
// convert
DocumentConverter converter = new OpenOfficeDocumentConverter(
connection);
converter.convert(inputFile, outputFile);
// close the connection
connection.disconnect();
pro.destroy();
} catch (ConnectException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
createPDF("C:\\Users\\Administrator\\Desktop\\test.doc", "C:\\Users\\Administrator\\Desktop\\test.pdf");
}
}
这种方案在Windows和Linux中都可用,但有一点需要注意,它只支持07以上的Word转换,很不幸,笔者的Word模版是03的,只能用第三种方案了。事实上,这种方案中也依赖了Itext的包
Java代码
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.HashMap;
import java.util.Map;
import org.apache.poi.xwpf.converter.pdf.PdfConverter;
import org.apache.poi.xwpf.converter.pdf.PdfOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
/**
* 2007.docx 可用
maven包
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.15</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.15</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>3.15</version>
</dependency>
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>org.apache.poi.xwpf.converter.xhtml</artifactId>
<version>1.0.5</version>
</dependency>
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>org.apache.poi.xwpf.converter.core</artifactId>
<version>1.0.6</version>
</dependency>
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>org.apache.poi.xwpf.converter.pdf</artifactId>
<version>1.0.6</version>
</dependency>
<dependency>
<groupId>org.xhtmlrenderer</groupId>
<artifactId>flying-saucer-pdf</artifactId>
<version>9.1.16</version>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.11.3</version>
</dependency>
<dependency>
<groupId>com.itextpdf.tool</groupId>
<artifactId>xmlworker</artifactId>
<version>5.5.13</version>
</dependency>
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>fr.opensagres.xdocreport.document</artifactId>
<version>1.0.5</version>
</dependency>
*
*/
public class WordToPDF {
/**
* 将word文档, 转换成pdf, 中间替换掉变量
*
* @param source
* 源为word文档, 必须为docx文档
* @param target
* 目标输出
* @param params
* 需要替换的变量
* @throws Exception
*/
public static void wordConverterToPdf(InputStream source,
OutputStream target, Map<String, String> params) throws Exception {
wordConverterToPdf(source, target, null, params);
}
/**
* 将word文档, 转换成pdf, 中间替换掉变量
*
* @param source
* 源为word文档, 必须为docx文档
* @param target
* 目标输出
* @param params
* 需要替换的变量
* @param options
* PdfOptions.create().fontEncoding( "windows-1250" ) 或者其他
* @throws Exception
*/
public static void wordConverterToPdf(InputStream source, OutputStream target, PdfOptions options,
Map<String, String> params) throws Exception {
XWPFDocument docx = new XWPFDocument(source);
PdfConverter.getInstance().convert(docx, target, options);
}
public static void main(String[] args) {
String filepath = "C:\\Users\\Administrator\\Desktop\\test.docx";
String outpath = "C:\\Users\\Administrator\\Desktop\\test.pdf";
InputStream source;
OutputStream target;
try {
source = new FileInputStream(filepath);
target = new FileOutputStream(outpath);
Map<String, String> params = new HashMap<String, String>();
PdfOptions options = PdfOptions.create();
wordConverterToPdf(source, target, options, params);
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
}
}
}
这种方案也就是先解析Word,然后将内容转换到PDF中,是三种方案中最繁琐的选择,但如果你的Word模板不是07以上的,而线上环境是Linux,这是最后的选择了
word解析
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.Bookmark;
import org.apache.poi.hwpf.usermodel.Bookmarks;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.hwpf.usermodel.Section;
import org.apache.poi.hwpf.usermodel.Table;
import org.apache.poi.hwpf.usermodel.TableCell;
import org.apache.poi.hwpf.usermodel.TableIterator;
import org.apache.poi.hwpf.usermodel.TableRow;
public class WordUtil {
public static void main(String[] args) {
try {
// InputStream is = null;
// BufferedImage src = null;
// int ret = -1;
//
// is = new FileInputStream(new File("C:\\Users\\Administrator\\Desktop\\timg.png"));
// src = javax.imageio.ImageIO.read(is);
// System.out.println(src.getHeight());
// System.out.println(src.getWidth());
// is.close();
new WordUtil().testReadByDoc();
} catch (Exception e) {
e.printStackTrace();
}
}
public void testReadByDoc() throws Exception {
InputStream is = new FileInputStream("C:\\Users\\Administrator\\Desktop\\100007zldlwts.doc");
HWPFDocument doc = new HWPFDocument(is);
//输出书签信息
this.printInfo(doc.getBookmarks(), doc);
//输出文本
// System.out.println(doc.getDocumentText());
// Range range = doc.getRange();
// this.insertInfo(range);
// this.printInfo(range);
// //读表格
// this.readTable(range);
// //读列表
// this.readList(range);
// //删除range
// Range r1 = new Range(0, 1048, doc);
// r1.delete();//在内存中进行删除,如果需要保存到文件中需要再把它写回文件
// 把当前HWPFDocument写到输出流中
// doc.write(new FileOutputStream("C:\\Users\\Administrator\\Desktop\\100006bzsupdated.doc"));
this.closeStream(is);
}
/**
* 关闭输入流
* @param is
*/
private void closeStream(InputStream is) {
if (is != null) {
try {
is.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
/**
* 输出书签信息
* @param bookmarks
* @param doc
*/
private void printInfo(Bookmarks bookmarks, HWPFDocument doc) {
int count = bookmarks.getBookmarksCount();
System.out.println("书签数量:" + count);
Bookmark bookmark;
for (int i=0; i<count; i++) {
bookmark = bookmarks.getBookmark(i);
System.out.println("书签" + (i+1) + "的名称是:" + bookmark.getName());
System.out.println("开始位置:" + bookmark.getStart());
System.out.println("结束位置:" + bookmark.getEnd());
System.out.println(new Range(bookmark.getStart(), bookmark.getEnd(), doc).text().replaceAll(" FORMTEXT ", "").replaceAll("", ""));
}
}
/**
* 读表格
* 每一个回车符代表一个段落,所以对于表格而言,每一个单元格至少包含一个段落,每行结束都是一个段落。
* @param range
*/
private void readTable(Range range) {
//遍历range范围内的table。
TableIterator tableIter = new TableIterator(range);
Table table;
TableRow row;
TableCell cell;
while (tableIter.hasNext()) {
table = tableIter.next();
int rowNum = table.numRows();
for (int j=0; j<rowNum; j++) {
row = table.getRow(j);
int cellNum = row.numCells();
for (int k=0; k<cellNum; k++) {
cell = row.getCell(k);
//输出单元格的文本
System.out.println(cell.text().trim());
}
}
}
}
/**
* 读列表
* @param range
*/
private void readList(Range range) {
int num = range.numParagraphs();
Paragraph para;
for (int i=0; i<num; i++) {
para = range.getParagraph(i);
if (para.isInList()) {
System.out.println("list: " + para.text());
}
}
}
/**
* 输出Range
* @param range
*/
private void printInfo(Range range) {
//获取段落数
int paraNum = range.numParagraphs();
System.out.println(paraNum);
for (int i=0; i<paraNum; i++) {
//this.insertInfo(range.getParagraph(i));
System.out.println("段落" + (i+1) + ":" + range.getParagraph(i).text());
if (i == (paraNum-1)) {
this.insertInfo(range.getParagraph(i));
}
}
int secNum = range.numSections();
System.out.println(secNum);
Section section;
for (int i=0; i<secNum; i++) {
section = range.getSection(i);
System.out.println(section.getMarginLeft());
System.out.println(section.getMarginRight());
System.out.println(section.getMarginTop());
System.out.println(section.getMarginBottom());
System.out.println(section.getPageHeight());
System.out.println(section.text());
}
}
/**
* 插入内容到Range,这里只会写到内存中
* @param range
*/
private void insertInfo(Range range) {
range.insertAfter("Hello");
}
}
pdf生成
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import com.itextpdf.text.BaseColor;
import com.itextpdf.text.Chunk;
import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.Element;
import com.itextpdf.text.Font;
import com.itextpdf.text.Image;
import com.itextpdf.text.PageSize;
import com.itextpdf.text.Paragraph;
import com.itextpdf.text.Phrase;
import com.itextpdf.text.Rectangle;
import com.itextpdf.text.pdf.BaseFont;
import com.itextpdf.text.pdf.ColumnText;
import com.itextpdf.text.pdf.PdfPageEventHelper;
import com.itextpdf.text.pdf.PdfTemplate;
import com.itextpdf.text.pdf.PdfWriter;
public class PDFUtil {
public static void main(String[] args) throws Exception {
OutputStream out = new FileOutputStream(new File("C:\\Users\\Administrator\\Desktop\\test.pdf"));
BaseFont bfChinese = BaseFont.createFont("STSongStd-Light", "UniGB-UCS2-H", BaseFont.NOT_EMBEDDED);
Font BlodFont = new Font(bfChinese, 12, Font.NORMAL, BaseColor.BLACK);
Document document = new Document(PageSize.A4);
// 写入器
PdfWriter writer = PdfWriter.getInstance(document, out);
document.open();
Paragraph paragraphRemark = new Paragraph();
paragraphRemark.setFirstLineIndent(2f);
paragraphRemark.add(new Chunk("特别提醒:", BlodFont));
paragraphRemark.add(Chunk.NEWLINE);
document.add(paragraphRemark);
Image img = Image.getInstance("C:\\Users\\Administrator\\Desktop\\图片1.png");
img.scaleToFit(400f,200f);
document.add(img);
HeaderFoot headerFoot = new HeaderFoot("Title");
writer.setPageEvent(headerFoot);
document.close();
writer.close();
out.flush();
}
}
class HeaderFoot extends PdfPageEventHelper {
private String header;
public HeaderFoot(String header) {
this.header = header;
}
@Override
public void onEndPage(PdfWriter writer, Document document) {
BaseFont baseFont = null;
try {
baseFont = BaseFont.createFont("STSongStd-Light", "UniGB-UCS2-H", BaseFont.NOT_EMBEDDED);
} catch (DocumentException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
Font BlodFont = new Font(baseFont, 16, Font.BOLD, BaseColor.BLACK);
Rectangle rect = writer.getBoxSize("art");
//添加页眉
ColumnText.showTextAligned(writer.getDirectContent(),
Element.ALIGN_CENTER, new Phrase(header, BlodFont),
document.left() + 260, document.top(), 0);
}
}