版本的界面如下:

【A】 还要额外开启一个tomcat,吧html方到上面不然pd4ml无法解析http协议以外的文件,pdf会为空的如果谁有好的方法可以通知我

java 实现类似下载服务端图片的接口_List


【B】最终的执行效果

java 实现类似下载服务端图片的接口_java_02

【C】生成的分散文件

java 实现类似下载服务端图片的接口_List_03

查看pdf是否写入:乱码还没有解决呢

【D】

java 实现类似下载服务端图片的接口_List_04



1 下载器通过输入博客人的名字实现自动下载--支持多个目录的生产

  现在有几个功能点因为其他原因没有完成,先写个草稿版本,供以后完成

 本来要使用多线性map-reduce技术快速生成pdf,实际开发中发现受制于网速带宽,所以不再使用此技术

2 缺少的功能点

  a:需要额外开一个项目用tomcat部署,为了pd4ml可以读取http协议,这个有待改进

  b:生成的临时pdf为中文乱码,没有实现gbk到utf-8的转换

  c:没有写最后一步和pdf

所以先写个临时版本吧

3 目录结构

java 实现类似下载服务端图片的接口_List_05

4 主要代码流程:

[1] ui界面设计

package com.blog.csdn.ui;

import java.awt.BorderLayout;
import java.awt.Container;
import java.awt.Dimension;
import java.awt.FlowLayout;
import java.awt.Toolkit;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;

import javax.swing.JButton;
import javax.swing.JFrame;
import javax.swing.JLabel;
import javax.swing.JOptionPane;
import javax.swing.JPanel;
import javax.swing.JScrollPane;
import javax.swing.JTextArea;
import javax.swing.JTextField;

import com.blog.csdn.common.Message;
import com.blog.csdn.download.HtmlBuilder;
import com.blog.csdn.download.HtmlParser;
import com.blog.csdn.download.ProjController;
import com.blog.csdn.pdf.BuildSinglePdf;


/**
 * 只看功能,不重视外观
 * @author chaigw
 *
 */
public class ConfigFrame extends JFrame {

	Container container;
	public ConfigFrame() {
		this.setSize(400, 300);
		this.setTitle("CSDN 博客下载器");
		Dimension screenSize = Toolkit.getDefaultToolkit().getScreenSize();
		Dimension frameSize = this.getSize();
		this.setLocation((screenSize.width - frameSize.width) / 2, (screenSize.height - frameSize.height) / 2);
		container = this.getContentPane();
		this.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
	}
	
	public ConfigFrame(String blogName) 
	{
		this();
		fillBlogName(blogName);
	}
	public void fillBlogName(String blogName)
	{
		JLabel lb = new JLabel("");
		//此处需添加一点就没的功能,以后再说
		final JTextField tf = new JTextField(blogName);
		tf.setColumns(6);
		tf.setText("qinhl99");
		tf.setText(tf.getText().toLowerCase());
		JButton bt = new JButton("生成PDF");
		JLabel thLb = new JLabel("线程");
		final JTextField thTf = new JTextField();
		thTf.setText("3");
		thTf.setColumns(2);
		FlowLayout flow = new FlowLayout();
		JPanel req = new JPanel();
		req.setLayout(flow);
		req.add(lb);
		req.add(tf);
		req.add(thLb);
		req.add(thTf);
		req.add(bt);
		bt.addActionListener(new ActionListener() {
			public void actionPerformed(ActionEvent e) {
				new Thread(new Runnable() {
					public void run() {
						System.out.println(tf.getText());
						if(null == tf.getText() || "".equals(tf.getText().trim()))
						{
							JOptionPane.showMessageDialog(ConfigFrame.this, "请输入博客人姓名");
							return;
						}
						Message.bolgName = tf.getText().trim();
						if(null == thTf.getText() || "".equals(thTf.getText().trim()))
						{
							JOptionPane.showMessageDialog(ConfigFrame.this, "线程数未填,默认为1");
							Message.threadNum = 1;
							thTf.setText("1");
						}
						else {
							try{
								Message.threadNum = Integer.parseInt(thTf.getText());
							}
							catch(Exception e2)
							{
								JOptionPane.showMessageDialog(ConfigFrame.this, "请输入正确整形数字");
								return;
							}
						}
						
						HtmlBuilder.frame = ConfigFrame.this;
						// 此处开始写调用后台处理的代码
						Message.bolgName = tf.getText().trim();
						HtmlParser parser = new HtmlParser();
						parser.setFrame(ConfigFrame.this);
						HtmlBuilder builder = new HtmlBuilder();
						ProjController controller = new ProjController(parser, builder);
						
						controller.buildMenuFiles(Message.bolgName);
//						//放在文件中建立
						controller.buildHtmlFiles();
						
						BuildSinglePdf pdf = new BuildSinglePdf();
						controller.buildSinglePdf(pdf);
					}
				}).start();
				}
		});
		JScrollPane scroLog = new JScrollPane();
		scroLog.setPreferredSize (new Dimension (320,220));
		area = new JTextArea(10, 30);
		area.setLineWrap(true);
		scroLog.setViewportView(area);
		container.add(req,BorderLayout.CENTER);
		container.add(scroLog,BorderLayout.SOUTH);
	}
	private JTextArea area;
	public JTextArea getArea() {
		return area;
	}
	public void setArea(JTextArea area) {
		this.area = area;
	}

	public static void main(String[] args) {
		ConfigFrame configFrame  = new ConfigFrame("");
		configFrame.setVisible(true);
	}
}



【2】 总的流程控制器,负责获取目录,建立目录,建立html,建立临时pdf,建立总的pdf

package com.blog.csdn.download;

import java.util.List;
import java.util.Map;

import com.blog.csdn.common.Message;
import com.blog.csdn.pdf.BuildSinglePdf;

/**
 * 项目进程控制器
 * 
 * @author chaigw
 */
public class ProjController {

	private HtmlParser htmlParser;

	private HtmlBuilder htmlBuilder;

	public ProjController() {
	}

	public ProjController(HtmlParser htmlParser, HtmlBuilder htmlBuilder) {
		this.htmlParser = htmlParser;
		this.htmlBuilder = htmlBuilder;
	}

	public void buildMenuFiles(String blogName) {
		String allContent = htmlParser.parse("" + blogName);
		htmlParser.filtMainMenuContent(allContent);

		
		for (Map.Entry<String, String> entry : Message.menuMap.entrySet()) {
			htmlBuilder.createFolder(Message.getPrexMenu(entry.getKey()));
		}

	}

	/**
	 * 1进行解析目录下文章标题
	 * 2生成路径html
	 * 3添加html
	 */
	public void buildHtmlFiles() {
		htmlParser.parseHtmls();
	}
	
	public void buildSinglePdf(BuildSinglePdf singlePdf)
	{
		List<String> l = Message.getMenus();
		for (int i = 0; i < l.size(); i++) {
			singlePdf.buildSinglgPdf(l.get(i));
		}
	}
	
}



【3】程序中用到的存储所有数据和工作操作方法的类

package com.blog.csdn.common;

import java.io.File;
import java.io.FileFilter;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;

public class Message {

	public static String bolgName = "";
	public static int threadNum = 1;

	public static String prexMenu ="";
	public static String prexHtml = "";
	
	/**
	 * 下载路径被写死
	 */
	public static String downPathString = "D:/csdn_pdf";
	
	public static String prexBuildPdfUrl = "http://localhost:8080/web-csdnblog";
	
	/**
	 * csdn base路径
	 */
	public static String baseUrl = "";
	/**
	 * key:目录名称 
	 * value:目录对应url
	 */
	public static Map<String,String> menuMap = new LinkedHashMap<String,String>();

	/**
	 * key:文章名称
	 * value:文章对应url
	 */
	public static Map<String,String> alticalMap = new LinkedHashMap<String,String>();
	
	/**
	 * 目录和文章名称一对多的关系
	 */
	public static Map<String, List<String>> menuAlticals = new LinkedHashMap<String,List<String>>();
	
	public static String getPrexMenu(String menuKey)
	{
		//示例 M0001
		prexMenu = menuKey.substring(0, 5);
		return prexMenu;
	}
	
	public static String getPrexHtml(String htmlKey)
	{
		//示例 M0001F0001
		prexHtml = htmlKey.substring(0,10);
		return prexHtml;
	}
	
	public static String buildPrexMenu(int index)
	{
		String prex = "";
		if(index<10)
		{
			prex="M000"+index;
		}
		else if(index<100)
		{
			prex="M00"+index;
		}
		else if(index<1000)
		{
			prex="M0"+index;
		}
		else {
			prex="M"+index;
		}
		return prex;
	}
	
	public static String buildPrexHtml(int index)
	{
		String prex = "";
		if(index<10)
		{
			prex="F000"+index;
		}
		else if(index<100)
		{
			prex="F00"+index;
		}
		else if(index<1000)
		{
			prex="F0"+index;
		}
		else {
			prex="F"+index;
		}
		return prex;
	}
	
	/**
	 * 获取当前html所有文件
	 * @return
	 */
	public static List<String> getCurrentPathHtmls(String currentPath)
	{
		List<String> paths = new ArrayList<String>();
		File[] files= new File(currentPath).listFiles(new FileFilter() {
			@Override
			public boolean accept(File pathname) {
				String temp = pathname.getAbsolutePath();
				if(".html".equals(temp.substring(temp.length()-5,temp.length())))
				{
					System.out.println(temp+"过滤路径");
					return true;
				}
				return false;
			}
		});
		
		for (int i = 0; i < files.length; i++) {
			if(files[i]==null)
			{
				System.out.println("过滤的文件为null");
			}
			paths.add(files[i].getAbsolutePath());
		}
		return paths;
	}
	
	public static List<String> getMenus()
	{
		List<String> fileMenus = new ArrayList<String>();
		File[] menuFiles = new File(Message.downPathString+"/"+Message.bolgName).listFiles();
		if(menuFiles==null) return null;
		for (int i = 0; i < menuFiles.length; i++) {
			fileMenus.add(menuFiles[i].getAbsolutePath());
		}
		return fileMenus;
	}
}



【4】 文件进行从网上获取数据的类,用到了大量的正则

package com.blog.csdn.download;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import b.b.b.c.c;

import com.blog.csdn.common.Message;
import com.blog.csdn.ui.ConfigFrame;
import com.itextpdf.text.pdf.PdfStructTreeController.returnType;

/**
 * 负责从网络截取html代码 本项目的代码足够冗余,实在懒得重构了,就这样写了啊
 * 
 * @author chaigw
 */
public class HtmlParser {
	public String getHtmlContent(URL url, String encode) {
		StringBuffer contentBuffer = new StringBuffer();

		int responseCode = -1;
		HttpURLConnection con = null;
		try {
			con = (HttpURLConnection) url.openConnection();
			con.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)");// IE代理进行下载
			con.setConnectTimeout(60000);
			con.setReadTimeout(60000);
			// 获得网页返回信息码
			responseCode = con.getResponseCode();
			if (responseCode == -1) {
				System.out.println(url.toString() + " : connection is failure...");
				con.disconnect();
				return null;
			}
			if (responseCode >= 400) // 请求失败
			{
				System.out.println("请求失败:get response code: " + responseCode);
				con.disconnect();
				return null;
			}

			InputStream inStr = con.getInputStream();
			InputStreamReader istreamReader = new InputStreamReader(inStr, encode);
			BufferedReader buffStr = new BufferedReader(istreamReader);

			String str = null;
			while ((str = buffStr.readLine()) != null)
				contentBuffer.append(str);
			inStr.close();
		} catch (IOException e) {
			e.printStackTrace();
			contentBuffer = null;
			System.out.println("error: " + url.toString());
		} finally {
			con.disconnect();
		}
		return contentBuffer.toString();
	}

	
	
	public String getHtmlContent(String url, String encode) {
		if (!url.toLowerCase().startsWith("http://")) {
			url = "http://" + url;
		}
		try {
			URL rUrl = new URL(url);
			return getHtmlContent(rUrl, encode);
		} catch (Exception e) {
			e.printStackTrace();
			return null;
		}
	}

	public static void main(String argsp[]) {
		HtmlParser parse = new HtmlParser();
		// parse.parse("cgwcgw_/article/details/17531323");
		// parse.parse("cgwcgw_");
		Message.bolgName = "cgwcgw_";
		Message.menuMap.put("M0001abc", "cgwcgw_/article/category/1474691");
		parse.parseHtmls();
	}

	public String parse(String alitbaseAllUrl) {
		String allContent = getHtmlContent(alitbaseAllUrl, "UTF-8");
		// filterAlticalContent(allContent); 获取文章的

		// 获取目录的
		// filtMainMenuContent(allContent);
		return allContent;
	}

	public void parseHtmls() {
		for (Map.Entry<String, String> entry : Message.menuMap.entrySet()) {
			pagrationLoop(entry,"",true);
			currentNum = 0;
			//每完成一个目录就进行写html
			addHtmlsContent(entry);
		}
	}

	public void addHtmlsContent(Map.Entry<String, String> menuEntry)
	{
		for(Map.Entry<String, String> alticalEntry : Message.alticalMap.entrySet())
		{
			//如果是同一个目录的就进行填写html
			if(alticalEntry.getKey().startsWith(Message.getPrexMenu(menuEntry.getKey())))
			{
				FileOutputStream outputStream;
				try {
					outputStream = new FileOutputStream(Message.downPathString+"/"+Message.bolgName+"/"+Message.getPrexMenu(alticalEntry.getKey())+"/"+Message.getPrexHtml(alticalEntry.getKey())+".html");
					BufferedOutputStream bufferStream = new BufferedOutputStream(outputStream);
					//根据匹配获取文章内容
					String content  = getHtmlsContent(alticalEntry.getValue());
					bufferStream.write(content.getBytes(),0,content.getBytes().length);
					bufferStream.flush();
					bufferStream.close();
					outputStream.close();
					System.out.println("写入临时文件:"+Message.getPrexHtml(alticalEntry.getKey())+".html");
					frame.getArea().append("写入临时文件:"+Message.getPrexHtml(alticalEntry.getKey())+".html\n");
				} catch (FileNotFoundException e) {
					e.printStackTrace();
				} catch (IOException e) {
					e.printStackTrace();
				}
			}
		}
	}
	
	public void writeAlticalIo(String content)
	{
		
	}
	
	public String getHtmlsContent(String alticalUrl)
	{
		String pageContent = parse(Message.baseUrl+"/"+alticalUrl);
		String content="";
		String regex = "<div id=\"article_content[\\s\\S]*<div id=\"bdshare";
		Pattern pattern = Pattern.compile(regex);
		Matcher matcher = pattern.matcher(pageContent);
		if(matcher.find())
		{
			content=matcher.group();
		}
		if("".equals(content))
		{
			System.out.println("写入文章内容为空,匹配错误");
		}
		System.out.println(content);
		return content;
	}
	
	public void pagrationLoop(Map.Entry<String, String> entry, String url, boolean flg)
	{
		String singleMenuAltibases = parse(flg? entry.getValue():Message.baseUrl+url);
		String filtersingleMenuAltibasesContent = filterSingleMenuAltibases(singleMenuAltibases);
		System.out.println(filtersingleMenuAltibasesContent);
		//暂时注解
		String filtersingleMenuAltibasesContents = getSingleMenuAltibases(filtersingleMenuAltibasesContent,
				Message.getPrexMenu(entry.getKey()));
		getPagration(entry,singleMenuAltibases);
	}
	public void getPagration(Map.Entry<String, String> entry,String filtersingleMenuAltibasesContent)
	{
		String content = "";
//		String regex="<div id=\"papelist\"[\\s\\S]*?<div class=\"clear\">";
		String regex="<div id=\"papelist\"[\\s\\S]*?<div class=\"clear\">";
		Pattern pattern = Pattern.compile(regex);
		Matcher matcher = pattern.matcher(filtersingleMenuAltibasesContent);
		if(matcher.find())
		{
			content = matcher.group();
		}
		if(content!="")
		{
			 regex="<a href=\"[\\s\\S]*?\">下一页";
			 pattern = Pattern.compile(regex);
			 matcher = pattern.matcher(content);
			 if (matcher.find()) {
				 content=matcher.group();
				 content=content.replaceAll("\">下一页", "");
				 content=content.replaceAll("[\\s\\S]*<a href=\"", "");
				 System.out.println(content);
				 if(!content.startsWith("/"))
				 {
					 System.out.println("达到了分页的最后一页");
				 }else
				 {
					 pagrationLoop(entry,content,false);
					 System.out.println("此处有分页");
				 }
			 }
		}
		else
		{
			System.out.println("没有获取分页栏--说明只有一页");
		}
	}
	
	public String getSingleMenuAltibases(String filtersingleMenuAltibasesContent, String prexMenu) {
		List<String> listKey = new ArrayList<String>();
		List<String> listValue = new ArrayList<String>();

		String content = "";
		String regex = "/" + Message.bolgName + "/article/details/[0-9]*";
		Pattern pattern = Pattern.compile(regex);
		Matcher matcher = pattern.matcher(filtersingleMenuAltibasesContent);
		int i = 0, j = 0;
		while (matcher.find()) {
			String temp = null;
			temp = matcher.group();
			System.out.println(temp);
			if (i % 3 == 0) {
				listValue.add(temp);
				j++;
			}
			i++;
		}
		i = 0;
		j = 0;
		System.out.println(listValue.size());

		regex = "<a href=[\\s\\S]*?>[\\s\\S]*?</a></span>";
		pattern = Pattern.compile(regex);
		matcher = pattern.matcher(filtersingleMenuAltibasesContent);

		while (matcher.find()) {
			String temp = "";
			temp = matcher.group();
			// temp = temp.replaceAll("<a href=[\\s\\S]*?>        ", "");
			temp = temp.replaceAll("[\\s\\S]*</span>    <h3>        ", "");
			temp = temp.replaceAll("</a></span>", "");
			temp = temp.replaceAll("<span class[\\s\\S]*>", "").trim();
			listKey.add(prexMenu + Message.buildPrexHtml(j+currentNum) + temp);
			System.out.println(temp);
			j++;
			i++;
		}
		currentNum += listKey.size();
		
		i = 0;
		System.out.println(listKey.size());
		if (listKey.size() != listValue.size()) {
			System.out.println("文章路径url和文章数不匹配出错");
		}

		for (int m = 0; m < listKey.size(); m++) {
			Message.alticalMap.put(listKey.get(m), listValue.get(m));
			htmlBuilder.createHtmlFile(Message.getPrexHtml(listKey.get(m)));
		}
		System.out.println(Message.alticalMap);
		listKey.clear();
		listValue.clear();
		return null;
	}
	int currentNum = 0;
	HtmlBuilder htmlBuilder = new HtmlBuilder();
	public void getCurrentPageNum()
	{
	}
	public String filterSingleMenuAltibases(String singleMenuAltibases) {
		String content = "";
//		String regex = "<div id=\"article_list\"[\\s\\S]*<div id=\"papelist\"";
		String regex = "<div id=\"article_list\"[\\s\\S]*";
		Pattern pattern = Pattern.compile(regex);
		Matcher matcher = pattern.matcher(singleMenuAltibases);
		while (matcher.find()) {
			content = matcher.group();
			System.out.println(content);
		}
		return content;
	}

	/**
	 * 获取menu代码,附带了一些额外的冗余代码
	 * 
	 * @param allContent
	 */
	public void filtMainMenuContent(String allContent) {
		String mainContent = getMainMenuContent(allContent);
		saveMenuContent(mainContent);
	}

	public String getMainMenuContent(String allContent) {
		String content = "";
		try {
			String regex = "<ul class=\"panel_body\">[\\s\\S]*</ul>[\\s\\S]*panel_Archive";
			Pattern pattern = Pattern.compile(regex);
			
			Matcher matcher = pattern.matcher(allContent);
			if (matcher.find()) {
				content = matcher.group();
				System.out.println("test" + content);
			}
		} catch (Exception e) {
			System.out.println("博客人名字不存在");
			if(frame!=null)
			{
				frame.getArea().append("博客人名字不存在\n");
			}
		}
		return content;
	}

	public void saveMenuContent(String mainContent) {
		List<String> listKey = new ArrayList<String>();
		List<String> listValue = new ArrayList<String>();
		String content = "";
		String regex = "http://blog[\\s\\S]*?/[0-9]+";// 匹配key
		Pattern pattern = Pattern.compile(regex);
		Matcher matcher = pattern.matcher(mainContent);
		while (matcher.find()) {
			String temp = matcher.group();
			System.out.println(temp);
			listKey.add(temp);
		}
		System.out.println(listKey.size());
		regex = "\">+[\\s\\S]*?</a>";// 匹配value
		matcher = Pattern.compile(regex).matcher(mainContent);
		while (matcher.find()) {
			String temp = matcher.group();
			temp = temp.replaceAll("[\\s\\S]*\">", "");
			temp = temp.replaceAll("</a>", "");
			System.out.println(temp);
			listValue.add(temp);
		}
		System.out.println(listValue.size());

		// value比key多1,所以从后面一个开始

		if (1 == listValue.size() - listKey.size()) {
			for (int i = 0; i < listKey.size(); i++) {
				Message.menuMap.put(Message.buildPrexMenu(i) + listValue.get(i + 1), listKey.get(i));
			}
			System.out.println(Message.menuMap);
		} else {
			System.out.println("目录匹配出现了异常");
		}

	}

	private ConfigFrame frame;
	
	public ConfigFrame getFrame() {
		return frame;
	}

	public void setFrame(ConfigFrame frame) {
		this.frame = frame;
	}

	public void filtAlticalContent(String allContent) {
		String content = getAlticalAllContent(allContent);
		System.out.println(content);
	}

	/**
	 * 获取文章内容(所有内容)包含了html
	 * 
	 * @param allContent
	 * @return
	 */
	public String getAlticalAllContent(String allContent) {
		String content = "";
		String s = allContent;
		String regex = "<div id=\"article_details\"[\\w\\W]*<!-- Baidu Button END --></div>";
		Pattern pt = Pattern.compile(regex);
		Matcher mt = pt.matcher(s);
		if (mt.find()) {
			content = mt.group();
		}
		return content;
	}

	/**
	 * 获取文章目录
	 */
	public String getAlticalMenu(String message) {
		return null;
	}

}



【5】 html文件生成类

package com.blog.csdn.download;

import java.awt.Frame;
import java.io.File;
import java.io.IOException;

import javax.swing.RootPaneContainer;

import com.blog.csdn.common.Message;
import com.blog.csdn.ui.ConfigFrame;

/**
 * 要先生成html,然后才能生成pdf,没有直接就吧html代码生成pdf的方法呢,需要中转一下
 * @author chaigw
 *
 */
public class HtmlBuilder {

	static
	{
		//跟路径
		initPathFile(Message.downPathString);
		//博客主人路径
		initPathFile(Message.downPathString+"/"+Message.bolgName);
	}
	

	/**
	 * 生成目录
	 * @param menu
	 */
	public void createFolder(String menu)
	{
		if(null == menu || "".equals(menu.trim()))
		{
			System.out.println("目录为空--有异常");
			return;
		}
		initPathFile(Message.downPathString+"/"+Message.bolgName+"/"+menu);
	}
	
	/**
	 * 
	 */
	public void createHtmlFile(String htmlFile)
	{
		initPathHtml(Message.downPathString+"/"+Message.bolgName+"/"+htmlFile.substring(0,5)+"/"+htmlFile+".html");
	}
	
	public void initPathHtml(String path)
	{
		File rootFile = new File(path);
		if(!rootFile.exists())
		{
			try {
				rootFile.createNewFile();
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
			System.out.println("构建文件夹路径"+path);
		}
	}
	
	public static  void initPathFile(String path)
	{
		File rootFile = new File(path);
		if(!rootFile.exists())
		{
			rootFile.mkdirs();
			System.out.println("构建文件夹路径"+path);
			if(frame!=null)
			{
				frame.getArea().append("构建目录路径"+path+"\n");
			}
		}
	}
	public static ConfigFrame frame;
	
	public static void main(String[] args) {
		HtmlBuilder builder = new HtmlBuilder();
		builder.createFolder("");
	}
}



【6】 pdf生成临时的单独文件的类

package com.blog.csdn.pdf;

import java.awt.Insets;
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.security.InvalidParameterException;
import java.util.List;

import org.zefer.pd4ml.PD4Constants;
import org.zefer.pd4ml.PD4ML;
import org.zefer.pd4ml.tools.PD4Browser.Rule;

import com.blog.csdn.common.Message;

/**
 * 生成单个的pdf用pd4ml
 * 
 * @author chaigw
 */
public class BuildSinglePdf {
	protected int topValue = 10;
	protected int leftValue = 20;
	protected int rightValue = 10;
	protected int bottomValue = 10;
	protected int userSpaceWidth = 1300;

	public static void main(String[] args) {

	}

	public void buildSinglgPdf(String currentPath) {
		try {
			List<String> paths=Message.getCurrentPathHtmls(currentPath);
			for (int i = 0; i < paths.size(); i++) {
				BuildSinglePdf jt = new BuildSinglePdf();
				// jt.doConversion("http://pd4ml.com/sample.htm", "c:/pd4ml.pdf");
				// jt.doConversion("file:///d:/csdn_pdf/web-csdnblog/estelle_belle/M0000/M0000F0000.html",
				// "c:/pd4ml.pdf");
				String tempPath = paths.get(i);
				tempPath = tempPath.replace("\\", "/");
				String url = Message.prexBuildPdfUrl +currentPath.replace("D:\\csdn_pdf\\", "/")+"\\"+tempPath.substring(tempPath.lastIndexOf("/")+1,tempPath.length());
				url = url.replace("\\","/");
				String output = currentPath+tempPath.substring(tempPath.lastIndexOf("/"),tempPath.length()).replace("/", "\\").replace("html", "pdf");
//				output = output.replace("\\", "/");
				jt.doConversion(url,output);
			}
		} catch (Exception e) {
			e.printStackTrace();
		}
	}

	public void doConversion(String url, String outputPath) throws InvalidParameterException, MalformedURLException, IOException {
		File output = new File(outputPath);
		java.io.FileOutputStream fos = new java.io.FileOutputStream(output);

		PD4ML pd4ml = new PD4ML();

		pd4ml.setHtmlWidth(userSpaceWidth); // set frame width of
											// "virtual web browser"

		// choose target paper format and "rotate" it to landscape orientation
		pd4ml.setPageSize(pd4ml.changePageOrientation(PD4Constants.A4));

		// define PDF page margins
		pd4ml.setPageInsetsMM(new Insets(topValue, leftValue, bottomValue, rightValue));

		// source HTML document also may have margins, could be suppressed this
		// way
		// (PD4ML *Pro* feature):
		pd4ml.addStyle("BODY {margin: 0}", true);

		// If built-in basic PDF fonts are not sufficient or
		// if you need to output non-Latin texts,
		// TTF embedding feature should help (PD4ML *Pro*)
		pd4ml.useTTF("c:/windows/fonts", true);

		pd4ml.render(new URL(url), fos); // actual document conversion from URL
											// to file
		fos.close();

		System.out.println(outputPath + "\ndone.");
	}
}



【7】能够生成带目录的pdf的类,参加本博客的poi的专门一篇关于可以生成目录的类,这块还没有写


下面还缺少把pdf合并的代码,时间有限,先写这么多吧