import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.junit.Assert;

public class MatcherTest{

public String Html() 
{ 
		HttpRequest hq = new HttpRequest();

       String htmlStr = hq.Request("http://www.funshion.com"); //含html标签的字符串 

        // System.out.println(htmlStr.length());

       //   String reg = "(?<=http\\://[a-zA-Z0-9]{0,100}[.]{0,1})[^.\\s]*?\\.(com|cn|net|org|biz|info|cc|tv)";
       //   Pattern p = Pattern.compile(reg,Pattern.CASE_INSENSITIVE);
       //   Matcher m = p.matcher(htmlStr);
         // System.out.println(m);



           String regEx_script = "<[\\s]*?script[^>]*?>[\\s\\S]*?<[\\s]*?\\/[\\s]*?script[\\s]*?>"; 
           //定义script的正则表达式.
           Pattern p = Pattern.compile(regEx_script, Pattern.CASE_INSENSITIVE);
           Matcher m = p.matcher(htmlStr);
           htmlStr = m.replaceAll(" "); 

           String regEx_style = "<[\\s]*?style[^>]*?>[\\s\\S]*?<[\\s]*?\\/[\\s]*?style[\\s]*?>"; 
           //定义style的正则表达式. 
           p =Pattern.compile(regEx_style, Pattern.CASE_INSENSITIVE);
           m = p.matcher(htmlStr);
           htmlStr = m.replaceAll(" ");

           String regEx_html = "<[^>]+>"; 
           //定义HTML标签的正则表达式 
           p =Pattern.compile(regEx_html, Pattern.CASE_INSENSITIVE);
           m = p.matcher(htmlStr);
           htmlStr = m.replaceAll(" ");

           String regEx_houhtml = "/[^>]+>"; 
           //定义HTML标签的正则表达式 
           p =Pattern.compile(regEx_houhtml, Pattern.CASE_INSENSITIVE);
           m = p.matcher(htmlStr);
           htmlStr = m.replaceAll(" ");

           String regEx_spe="\\&[^;]+;";
           //定义特殊符号的正则表达式
           p =Pattern.compile(regEx_spe, Pattern.CASE_INSENSITIVE);
           m = p.matcher(htmlStr);
           htmlStr = m.replaceAll(" ");

           String regEx_blank=" +";
           //定义多个空格的正则表达式
           p =Pattern.compile(regEx_blank, Pattern.CASE_INSENSITIVE);
           m = p.matcher(htmlStr);
           htmlStr = m.replaceAll("");

           String regEx_table="\t+";
           //定义多个制表符的正则表达式
           p =Pattern.compile(regEx_table, Pattern.CASE_INSENSITIVE);
           m = p.matcher(htmlStr);
           htmlStr = m.replaceAll(" ");

           String regEx_enter="\n+";
           //定义多个回车的正则表达式
           p =Pattern.compile(regEx_enter, Pattern.CASE_INSENSITIVE);
           m = p.matcher(htmlStr);
           htmlStr = m.replaceAll("");
           System.out.println(htmlStr);


           return htmlStr;
}

	public static void main(String[] args) {
	MatcherTest hl = new MatcherTest();
	hl.Html();
}
}