版本的界面如下:
【A】 还要额外开启一个tomcat,吧html方到上面不然pd4ml无法解析http协议以外的文件,pdf会为空的如果谁有好的方法可以通知我
【B】最终的执行效果
【C】生成的分散文件
查看pdf是否写入:乱码还没有解决呢
【D】
1 下载器通过输入博客人的名字实现自动下载--支持多个目录的生产
现在有几个功能点因为其他原因没有完成,先写个草稿版本,供以后完成
本来要使用多线性map-reduce技术快速生成pdf,实际开发中发现受制于网速带宽,所以不再使用此技术
2 缺少的功能点
a:需要额外开一个项目用tomcat部署,为了pd4ml可以读取http协议,这个有待改进
b:生成的临时pdf为中文乱码,没有实现gbk到utf-8的转换
c:没有写最后一步和pdf
所以先写个临时版本吧
3 目录结构
4 主要代码流程:
[1] ui界面设计
package com.blog.csdn.ui;
import java.awt.BorderLayout;
import java.awt.Container;
import java.awt.Dimension;
import java.awt.FlowLayout;
import java.awt.Toolkit;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import javax.swing.JButton;
import javax.swing.JFrame;
import javax.swing.JLabel;
import javax.swing.JOptionPane;
import javax.swing.JPanel;
import javax.swing.JScrollPane;
import javax.swing.JTextArea;
import javax.swing.JTextField;
import com.blog.csdn.common.Message;
import com.blog.csdn.download.HtmlBuilder;
import com.blog.csdn.download.HtmlParser;
import com.blog.csdn.download.ProjController;
import com.blog.csdn.pdf.BuildSinglePdf;
/**
* 只看功能,不重视外观
* @author chaigw
*
*/
public class ConfigFrame extends JFrame {
Container container;
public ConfigFrame() {
this.setSize(400, 300);
this.setTitle("CSDN 博客下载器");
Dimension screenSize = Toolkit.getDefaultToolkit().getScreenSize();
Dimension frameSize = this.getSize();
this.setLocation((screenSize.width - frameSize.width) / 2, (screenSize.height - frameSize.height) / 2);
container = this.getContentPane();
this.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
}
public ConfigFrame(String blogName)
{
this();
fillBlogName(blogName);
}
public void fillBlogName(String blogName)
{
JLabel lb = new JLabel("");
//此处需添加一点就没的功能,以后再说
final JTextField tf = new JTextField(blogName);
tf.setColumns(6);
tf.setText("qinhl99");
tf.setText(tf.getText().toLowerCase());
JButton bt = new JButton("生成PDF");
JLabel thLb = new JLabel("线程");
final JTextField thTf = new JTextField();
thTf.setText("3");
thTf.setColumns(2);
FlowLayout flow = new FlowLayout();
JPanel req = new JPanel();
req.setLayout(flow);
req.add(lb);
req.add(tf);
req.add(thLb);
req.add(thTf);
req.add(bt);
bt.addActionListener(new ActionListener() {
public void actionPerformed(ActionEvent e) {
new Thread(new Runnable() {
public void run() {
System.out.println(tf.getText());
if(null == tf.getText() || "".equals(tf.getText().trim()))
{
JOptionPane.showMessageDialog(ConfigFrame.this, "请输入博客人姓名");
return;
}
Message.bolgName = tf.getText().trim();
if(null == thTf.getText() || "".equals(thTf.getText().trim()))
{
JOptionPane.showMessageDialog(ConfigFrame.this, "线程数未填,默认为1");
Message.threadNum = 1;
thTf.setText("1");
}
else {
try{
Message.threadNum = Integer.parseInt(thTf.getText());
}
catch(Exception e2)
{
JOptionPane.showMessageDialog(ConfigFrame.this, "请输入正确整形数字");
return;
}
}
HtmlBuilder.frame = ConfigFrame.this;
// 此处开始写调用后台处理的代码
Message.bolgName = tf.getText().trim();
HtmlParser parser = new HtmlParser();
parser.setFrame(ConfigFrame.this);
HtmlBuilder builder = new HtmlBuilder();
ProjController controller = new ProjController(parser, builder);
controller.buildMenuFiles(Message.bolgName);
// //放在文件中建立
controller.buildHtmlFiles();
BuildSinglePdf pdf = new BuildSinglePdf();
controller.buildSinglePdf(pdf);
}
}).start();
}
});
JScrollPane scroLog = new JScrollPane();
scroLog.setPreferredSize (new Dimension (320,220));
area = new JTextArea(10, 30);
area.setLineWrap(true);
scroLog.setViewportView(area);
container.add(req,BorderLayout.CENTER);
container.add(scroLog,BorderLayout.SOUTH);
}
private JTextArea area;
public JTextArea getArea() {
return area;
}
public void setArea(JTextArea area) {
this.area = area;
}
public static void main(String[] args) {
ConfigFrame configFrame = new ConfigFrame("");
configFrame.setVisible(true);
}
}
【2】 总的流程控制器,负责获取目录,建立目录,建立html,建立临时pdf,建立总的pdf
package com.blog.csdn.download;
import java.util.List;
import java.util.Map;
import com.blog.csdn.common.Message;
import com.blog.csdn.pdf.BuildSinglePdf;
/**
* 项目进程控制器
*
* @author chaigw
*/
public class ProjController {
private HtmlParser htmlParser;
private HtmlBuilder htmlBuilder;
public ProjController() {
}
public ProjController(HtmlParser htmlParser, HtmlBuilder htmlBuilder) {
this.htmlParser = htmlParser;
this.htmlBuilder = htmlBuilder;
}
public void buildMenuFiles(String blogName) {
String allContent = htmlParser.parse("" + blogName);
htmlParser.filtMainMenuContent(allContent);
for (Map.Entry<String, String> entry : Message.menuMap.entrySet()) {
htmlBuilder.createFolder(Message.getPrexMenu(entry.getKey()));
}
}
/**
* 1进行解析目录下文章标题
* 2生成路径html
* 3添加html
*/
public void buildHtmlFiles() {
htmlParser.parseHtmls();
}
public void buildSinglePdf(BuildSinglePdf singlePdf)
{
List<String> l = Message.getMenus();
for (int i = 0; i < l.size(); i++) {
singlePdf.buildSinglgPdf(l.get(i));
}
}
}
【3】程序中用到的存储所有数据和工作操作方法的类
package com.blog.csdn.common;
import java.io.File;
import java.io.FileFilter;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
public class Message {
public static String bolgName = "";
public static int threadNum = 1;
public static String prexMenu ="";
public static String prexHtml = "";
/**
* 下载路径被写死
*/
public static String downPathString = "D:/csdn_pdf";
public static String prexBuildPdfUrl = "http://localhost:8080/web-csdnblog";
/**
* csdn base路径
*/
public static String baseUrl = "";
/**
* key:目录名称
* value:目录对应url
*/
public static Map<String,String> menuMap = new LinkedHashMap<String,String>();
/**
* key:文章名称
* value:文章对应url
*/
public static Map<String,String> alticalMap = new LinkedHashMap<String,String>();
/**
* 目录和文章名称一对多的关系
*/
public static Map<String, List<String>> menuAlticals = new LinkedHashMap<String,List<String>>();
public static String getPrexMenu(String menuKey)
{
//示例 M0001
prexMenu = menuKey.substring(0, 5);
return prexMenu;
}
public static String getPrexHtml(String htmlKey)
{
//示例 M0001F0001
prexHtml = htmlKey.substring(0,10);
return prexHtml;
}
public static String buildPrexMenu(int index)
{
String prex = "";
if(index<10)
{
prex="M000"+index;
}
else if(index<100)
{
prex="M00"+index;
}
else if(index<1000)
{
prex="M0"+index;
}
else {
prex="M"+index;
}
return prex;
}
public static String buildPrexHtml(int index)
{
String prex = "";
if(index<10)
{
prex="F000"+index;
}
else if(index<100)
{
prex="F00"+index;
}
else if(index<1000)
{
prex="F0"+index;
}
else {
prex="F"+index;
}
return prex;
}
/**
* 获取当前html所有文件
* @return
*/
public static List<String> getCurrentPathHtmls(String currentPath)
{
List<String> paths = new ArrayList<String>();
File[] files= new File(currentPath).listFiles(new FileFilter() {
@Override
public boolean accept(File pathname) {
String temp = pathname.getAbsolutePath();
if(".html".equals(temp.substring(temp.length()-5,temp.length())))
{
System.out.println(temp+"过滤路径");
return true;
}
return false;
}
});
for (int i = 0; i < files.length; i++) {
if(files[i]==null)
{
System.out.println("过滤的文件为null");
}
paths.add(files[i].getAbsolutePath());
}
return paths;
}
public static List<String> getMenus()
{
List<String> fileMenus = new ArrayList<String>();
File[] menuFiles = new File(Message.downPathString+"/"+Message.bolgName).listFiles();
if(menuFiles==null) return null;
for (int i = 0; i < menuFiles.length; i++) {
fileMenus.add(menuFiles[i].getAbsolutePath());
}
return fileMenus;
}
}
【4】 文件进行从网上获取数据的类,用到了大量的正则
package com.blog.csdn.download;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import b.b.b.c.c;
import com.blog.csdn.common.Message;
import com.blog.csdn.ui.ConfigFrame;
import com.itextpdf.text.pdf.PdfStructTreeController.returnType;
/**
* 负责从网络截取html代码 本项目的代码足够冗余,实在懒得重构了,就这样写了啊
*
* @author chaigw
*/
public class HtmlParser {
public String getHtmlContent(URL url, String encode) {
StringBuffer contentBuffer = new StringBuffer();
int responseCode = -1;
HttpURLConnection con = null;
try {
con = (HttpURLConnection) url.openConnection();
con.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)");// IE代理进行下载
con.setConnectTimeout(60000);
con.setReadTimeout(60000);
// 获得网页返回信息码
responseCode = con.getResponseCode();
if (responseCode == -1) {
System.out.println(url.toString() + " : connection is failure...");
con.disconnect();
return null;
}
if (responseCode >= 400) // 请求失败
{
System.out.println("请求失败:get response code: " + responseCode);
con.disconnect();
return null;
}
InputStream inStr = con.getInputStream();
InputStreamReader istreamReader = new InputStreamReader(inStr, encode);
BufferedReader buffStr = new BufferedReader(istreamReader);
String str = null;
while ((str = buffStr.readLine()) != null)
contentBuffer.append(str);
inStr.close();
} catch (IOException e) {
e.printStackTrace();
contentBuffer = null;
System.out.println("error: " + url.toString());
} finally {
con.disconnect();
}
return contentBuffer.toString();
}
public String getHtmlContent(String url, String encode) {
if (!url.toLowerCase().startsWith("http://")) {
url = "http://" + url;
}
try {
URL rUrl = new URL(url);
return getHtmlContent(rUrl, encode);
} catch (Exception e) {
e.printStackTrace();
return null;
}
}
public static void main(String argsp[]) {
HtmlParser parse = new HtmlParser();
// parse.parse("cgwcgw_/article/details/17531323");
// parse.parse("cgwcgw_");
Message.bolgName = "cgwcgw_";
Message.menuMap.put("M0001abc", "cgwcgw_/article/category/1474691");
parse.parseHtmls();
}
public String parse(String alitbaseAllUrl) {
String allContent = getHtmlContent(alitbaseAllUrl, "UTF-8");
// filterAlticalContent(allContent); 获取文章的
// 获取目录的
// filtMainMenuContent(allContent);
return allContent;
}
public void parseHtmls() {
for (Map.Entry<String, String> entry : Message.menuMap.entrySet()) {
pagrationLoop(entry,"",true);
currentNum = 0;
//每完成一个目录就进行写html
addHtmlsContent(entry);
}
}
public void addHtmlsContent(Map.Entry<String, String> menuEntry)
{
for(Map.Entry<String, String> alticalEntry : Message.alticalMap.entrySet())
{
//如果是同一个目录的就进行填写html
if(alticalEntry.getKey().startsWith(Message.getPrexMenu(menuEntry.getKey())))
{
FileOutputStream outputStream;
try {
outputStream = new FileOutputStream(Message.downPathString+"/"+Message.bolgName+"/"+Message.getPrexMenu(alticalEntry.getKey())+"/"+Message.getPrexHtml(alticalEntry.getKey())+".html");
BufferedOutputStream bufferStream = new BufferedOutputStream(outputStream);
//根据匹配获取文章内容
String content = getHtmlsContent(alticalEntry.getValue());
bufferStream.write(content.getBytes(),0,content.getBytes().length);
bufferStream.flush();
bufferStream.close();
outputStream.close();
System.out.println("写入临时文件:"+Message.getPrexHtml(alticalEntry.getKey())+".html");
frame.getArea().append("写入临时文件:"+Message.getPrexHtml(alticalEntry.getKey())+".html\n");
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
public void writeAlticalIo(String content)
{
}
public String getHtmlsContent(String alticalUrl)
{
String pageContent = parse(Message.baseUrl+"/"+alticalUrl);
String content="";
String regex = "<div id=\"article_content[\\s\\S]*<div id=\"bdshare";
Pattern pattern = Pattern.compile(regex);
Matcher matcher = pattern.matcher(pageContent);
if(matcher.find())
{
content=matcher.group();
}
if("".equals(content))
{
System.out.println("写入文章内容为空,匹配错误");
}
System.out.println(content);
return content;
}
public void pagrationLoop(Map.Entry<String, String> entry, String url, boolean flg)
{
String singleMenuAltibases = parse(flg? entry.getValue():Message.baseUrl+url);
String filtersingleMenuAltibasesContent = filterSingleMenuAltibases(singleMenuAltibases);
System.out.println(filtersingleMenuAltibasesContent);
//暂时注解
String filtersingleMenuAltibasesContents = getSingleMenuAltibases(filtersingleMenuAltibasesContent,
Message.getPrexMenu(entry.getKey()));
getPagration(entry,singleMenuAltibases);
}
public void getPagration(Map.Entry<String, String> entry,String filtersingleMenuAltibasesContent)
{
String content = "";
// String regex="<div id=\"papelist\"[\\s\\S]*?<div class=\"clear\">";
String regex="<div id=\"papelist\"[\\s\\S]*?<div class=\"clear\">";
Pattern pattern = Pattern.compile(regex);
Matcher matcher = pattern.matcher(filtersingleMenuAltibasesContent);
if(matcher.find())
{
content = matcher.group();
}
if(content!="")
{
regex="<a href=\"[\\s\\S]*?\">下一页";
pattern = Pattern.compile(regex);
matcher = pattern.matcher(content);
if (matcher.find()) {
content=matcher.group();
content=content.replaceAll("\">下一页", "");
content=content.replaceAll("[\\s\\S]*<a href=\"", "");
System.out.println(content);
if(!content.startsWith("/"))
{
System.out.println("达到了分页的最后一页");
}else
{
pagrationLoop(entry,content,false);
System.out.println("此处有分页");
}
}
}
else
{
System.out.println("没有获取分页栏--说明只有一页");
}
}
public String getSingleMenuAltibases(String filtersingleMenuAltibasesContent, String prexMenu) {
List<String> listKey = new ArrayList<String>();
List<String> listValue = new ArrayList<String>();
String content = "";
String regex = "/" + Message.bolgName + "/article/details/[0-9]*";
Pattern pattern = Pattern.compile(regex);
Matcher matcher = pattern.matcher(filtersingleMenuAltibasesContent);
int i = 0, j = 0;
while (matcher.find()) {
String temp = null;
temp = matcher.group();
System.out.println(temp);
if (i % 3 == 0) {
listValue.add(temp);
j++;
}
i++;
}
i = 0;
j = 0;
System.out.println(listValue.size());
regex = "<a href=[\\s\\S]*?>[\\s\\S]*?</a></span>";
pattern = Pattern.compile(regex);
matcher = pattern.matcher(filtersingleMenuAltibasesContent);
while (matcher.find()) {
String temp = "";
temp = matcher.group();
// temp = temp.replaceAll("<a href=[\\s\\S]*?> ", "");
temp = temp.replaceAll("[\\s\\S]*</span> <h3> ", "");
temp = temp.replaceAll("</a></span>", "");
temp = temp.replaceAll("<span class[\\s\\S]*>", "").trim();
listKey.add(prexMenu + Message.buildPrexHtml(j+currentNum) + temp);
System.out.println(temp);
j++;
i++;
}
currentNum += listKey.size();
i = 0;
System.out.println(listKey.size());
if (listKey.size() != listValue.size()) {
System.out.println("文章路径url和文章数不匹配出错");
}
for (int m = 0; m < listKey.size(); m++) {
Message.alticalMap.put(listKey.get(m), listValue.get(m));
htmlBuilder.createHtmlFile(Message.getPrexHtml(listKey.get(m)));
}
System.out.println(Message.alticalMap);
listKey.clear();
listValue.clear();
return null;
}
int currentNum = 0;
HtmlBuilder htmlBuilder = new HtmlBuilder();
public void getCurrentPageNum()
{
}
public String filterSingleMenuAltibases(String singleMenuAltibases) {
String content = "";
// String regex = "<div id=\"article_list\"[\\s\\S]*<div id=\"papelist\"";
String regex = "<div id=\"article_list\"[\\s\\S]*";
Pattern pattern = Pattern.compile(regex);
Matcher matcher = pattern.matcher(singleMenuAltibases);
while (matcher.find()) {
content = matcher.group();
System.out.println(content);
}
return content;
}
/**
* 获取menu代码,附带了一些额外的冗余代码
*
* @param allContent
*/
public void filtMainMenuContent(String allContent) {
String mainContent = getMainMenuContent(allContent);
saveMenuContent(mainContent);
}
public String getMainMenuContent(String allContent) {
String content = "";
try {
String regex = "<ul class=\"panel_body\">[\\s\\S]*</ul>[\\s\\S]*panel_Archive";
Pattern pattern = Pattern.compile(regex);
Matcher matcher = pattern.matcher(allContent);
if (matcher.find()) {
content = matcher.group();
System.out.println("test" + content);
}
} catch (Exception e) {
System.out.println("博客人名字不存在");
if(frame!=null)
{
frame.getArea().append("博客人名字不存在\n");
}
}
return content;
}
public void saveMenuContent(String mainContent) {
List<String> listKey = new ArrayList<String>();
List<String> listValue = new ArrayList<String>();
String content = "";
String regex = "http://blog[\\s\\S]*?/[0-9]+";// 匹配key
Pattern pattern = Pattern.compile(regex);
Matcher matcher = pattern.matcher(mainContent);
while (matcher.find()) {
String temp = matcher.group();
System.out.println(temp);
listKey.add(temp);
}
System.out.println(listKey.size());
regex = "\">+[\\s\\S]*?</a>";// 匹配value
matcher = Pattern.compile(regex).matcher(mainContent);
while (matcher.find()) {
String temp = matcher.group();
temp = temp.replaceAll("[\\s\\S]*\">", "");
temp = temp.replaceAll("</a>", "");
System.out.println(temp);
listValue.add(temp);
}
System.out.println(listValue.size());
// value比key多1,所以从后面一个开始
if (1 == listValue.size() - listKey.size()) {
for (int i = 0; i < listKey.size(); i++) {
Message.menuMap.put(Message.buildPrexMenu(i) + listValue.get(i + 1), listKey.get(i));
}
System.out.println(Message.menuMap);
} else {
System.out.println("目录匹配出现了异常");
}
}
private ConfigFrame frame;
public ConfigFrame getFrame() {
return frame;
}
public void setFrame(ConfigFrame frame) {
this.frame = frame;
}
public void filtAlticalContent(String allContent) {
String content = getAlticalAllContent(allContent);
System.out.println(content);
}
/**
* 获取文章内容(所有内容)包含了html
*
* @param allContent
* @return
*/
public String getAlticalAllContent(String allContent) {
String content = "";
String s = allContent;
String regex = "<div id=\"article_details\"[\\w\\W]*<!-- Baidu Button END --></div>";
Pattern pt = Pattern.compile(regex);
Matcher mt = pt.matcher(s);
if (mt.find()) {
content = mt.group();
}
return content;
}
/**
* 获取文章目录
*/
public String getAlticalMenu(String message) {
return null;
}
}
【5】 html文件生成类
package com.blog.csdn.download;
import java.awt.Frame;
import java.io.File;
import java.io.IOException;
import javax.swing.RootPaneContainer;
import com.blog.csdn.common.Message;
import com.blog.csdn.ui.ConfigFrame;
/**
* 要先生成html,然后才能生成pdf,没有直接就吧html代码生成pdf的方法呢,需要中转一下
* @author chaigw
*
*/
public class HtmlBuilder {
static
{
//跟路径
initPathFile(Message.downPathString);
//博客主人路径
initPathFile(Message.downPathString+"/"+Message.bolgName);
}
/**
* 生成目录
* @param menu
*/
public void createFolder(String menu)
{
if(null == menu || "".equals(menu.trim()))
{
System.out.println("目录为空--有异常");
return;
}
initPathFile(Message.downPathString+"/"+Message.bolgName+"/"+menu);
}
/**
*
*/
public void createHtmlFile(String htmlFile)
{
initPathHtml(Message.downPathString+"/"+Message.bolgName+"/"+htmlFile.substring(0,5)+"/"+htmlFile+".html");
}
public void initPathHtml(String path)
{
File rootFile = new File(path);
if(!rootFile.exists())
{
try {
rootFile.createNewFile();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
System.out.println("构建文件夹路径"+path);
}
}
public static void initPathFile(String path)
{
File rootFile = new File(path);
if(!rootFile.exists())
{
rootFile.mkdirs();
System.out.println("构建文件夹路径"+path);
if(frame!=null)
{
frame.getArea().append("构建目录路径"+path+"\n");
}
}
}
public static ConfigFrame frame;
public static void main(String[] args) {
HtmlBuilder builder = new HtmlBuilder();
builder.createFolder("");
}
}
【6】 pdf生成临时的单独文件的类
package com.blog.csdn.pdf;
import java.awt.Insets;
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.security.InvalidParameterException;
import java.util.List;
import org.zefer.pd4ml.PD4Constants;
import org.zefer.pd4ml.PD4ML;
import org.zefer.pd4ml.tools.PD4Browser.Rule;
import com.blog.csdn.common.Message;
/**
* 生成单个的pdf用pd4ml
*
* @author chaigw
*/
public class BuildSinglePdf {
protected int topValue = 10;
protected int leftValue = 20;
protected int rightValue = 10;
protected int bottomValue = 10;
protected int userSpaceWidth = 1300;
public static void main(String[] args) {
}
public void buildSinglgPdf(String currentPath) {
try {
List<String> paths=Message.getCurrentPathHtmls(currentPath);
for (int i = 0; i < paths.size(); i++) {
BuildSinglePdf jt = new BuildSinglePdf();
// jt.doConversion("http://pd4ml.com/sample.htm", "c:/pd4ml.pdf");
// jt.doConversion("file:///d:/csdn_pdf/web-csdnblog/estelle_belle/M0000/M0000F0000.html",
// "c:/pd4ml.pdf");
String tempPath = paths.get(i);
tempPath = tempPath.replace("\\", "/");
String url = Message.prexBuildPdfUrl +currentPath.replace("D:\\csdn_pdf\\", "/")+"\\"+tempPath.substring(tempPath.lastIndexOf("/")+1,tempPath.length());
url = url.replace("\\","/");
String output = currentPath+tempPath.substring(tempPath.lastIndexOf("/"),tempPath.length()).replace("/", "\\").replace("html", "pdf");
// output = output.replace("\\", "/");
jt.doConversion(url,output);
}
} catch (Exception e) {
e.printStackTrace();
}
}
public void doConversion(String url, String outputPath) throws InvalidParameterException, MalformedURLException, IOException {
File output = new File(outputPath);
java.io.FileOutputStream fos = new java.io.FileOutputStream(output);
PD4ML pd4ml = new PD4ML();
pd4ml.setHtmlWidth(userSpaceWidth); // set frame width of
// "virtual web browser"
// choose target paper format and "rotate" it to landscape orientation
pd4ml.setPageSize(pd4ml.changePageOrientation(PD4Constants.A4));
// define PDF page margins
pd4ml.setPageInsetsMM(new Insets(topValue, leftValue, bottomValue, rightValue));
// source HTML document also may have margins, could be suppressed this
// way
// (PD4ML *Pro* feature):
pd4ml.addStyle("BODY {margin: 0}", true);
// If built-in basic PDF fonts are not sufficient or
// if you need to output non-Latin texts,
// TTF embedding feature should help (PD4ML *Pro*)
pd4ml.useTTF("c:/windows/fonts", true);
pd4ml.render(new URL(url), fos); // actual document conversion from URL
// to file
fos.close();
System.out.println(outputPath + "\ndone.");
}
}
【7】能够生成带目录的pdf的类,参加本博客的poi的专门一篇关于可以生成目录的类,这块还没有写
下面还缺少把pdf合并的代码,时间有限,先写这么多吧