/**
* 取HTML,并自动补全闭合
* param $html
*/

function subHtml($html){
$segments=preg_split("~(<[^>]+?>)~si",$html,-1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE);
$compressed=array();
$stack=array();
$tag='';
$half_open=array('meta','input','link','img','br');
$cannot_compress=array('pre','code','script','style');

foreach($segmentsas$seg){
if(trim($seg)===''){
continue;
}

if(preg_match("~<([a-z0-9]+)[^>]*?/>~si",$seg)){//<.../>
$compressed[]=$seg;
}elseif(preg_match("~</([a-z0-9]+)[^>]*?>~si",$seg,$match)){//</...>
$tag= format_tag($match[1]);
if(count($stack)>0&&$stack[count($stack)-1]==$tag){
array_pop($stack);
$compressed[]=$seg;
}
}elseif(preg_match("~<([a-z0-9]+)[^>]*?>~si",$seg,$match)){//<...>
$tag= format_tag($match[1]);
//半闭合标签不需要入栈,如<meta>,<input>,<link>,<img>,<br>
if(!in_array($tag,$half_open)){
array_push($stack,$tag);
}
$compressed[]=$seg;
}elseif(preg_match("~<![^>]*>~",$seg)){
//文档声明和注释,注释也不能删除,如<!--ie条件-->
$compressed[]=$seg;
}else{
//去掉空白
$compressed[]=in_array($tag,$cannot_compress) ? $seg:preg_replace('!\s!','',$seg);
}
}

//补全结尾标签
while(!empty($stack)){
$compressed[]='</'.array_pop($stack).'>';
}
returnjoin('',$compressed);
}

function format_tag($tag){
returntrim(strtolower($tag));
}