diff --git a/Src/包括/dedehtml2.class.php b/Src/包括/dedehtml2.class.php deleted file mode 100755 index 06b47518..00000000 --- a/Src/包括/dedehtml2.class.php +++ /dev/null @@ -1,681 +0,0 @@ -CAtt = ''; - $this->SourceHtml = ''; - $this->Title = ''; - $this->Medias = Array(); - $this->MediaInfos = Array(); - $this->Links = Array(); - $this->BaseUrl = ''; - $this->BaseUrlPath = ''; - $this->Scheme = 'http://'; - $this->HomeUrl = ''; - $this->IsHead = false; - $this->ImgHeight = 30; - $this->ImgWidth = 50; - $this->GetLinkType = 'link'; - } - - function DedeHtml2() - { - $this->__construct(); - } - - /** - * 设置HTML的内容和来源网址 - * - * @access public - * @param string $html html资源 - * @param string $url 地址 - * @param string $linktype 连接类型 - * @return void - */ - function SetSource(&$html, $url = '', $linktype='') - { - $this->__construct(); - $this->CAtt = new DedeAttribute2(); - $url = trim($url); - $this->SourceHtml = $html; - $this->BaseUrl = $url; - //判断文档相对于当前的路径 - $urls = @parse_url($url); - $this->Scheme = $urls['scheme'] . '://'; - $this->HomeUrl = $urls['host']; - $this->BaseUrlPath = $this->HomeUrl.$urls['path']; - $this->BaseUrlPath = preg_replace("/\/([^\/]*)\.(.*)$/","/",$this->BaseUrlPath); - $this->BaseUrlPath = preg_replace("/\/$/",'',$this->BaseUrlPath); - if($linktype!='') - { - $this->GetLinkType = $linktype; - } - if($html != '') - { - $this->Analyser(); - } - } - - /** - * 解析HTML - * - * @access private - * @return void - */ - function Analyser() - { - $cAtt = new DedeAttribute2(); - $cAtt->IsTagName = false; - $c = ''; - $i = 0; - $startPos = 0; - $endPos = 0; - $wt = 0; - $ht = 0; - $scriptdd = 0; - $attStr = ''; - $tmpValue = ''; - $tmpValue2 = ''; - $tagName = ''; - $hashead = 0; - $slen = strlen($this->SourceHtml); - if($this->GetLinkType=='link' || $this->GetLinkType=='') - { - $needTags = array('a'); - } - if($this->GetLinkType=='media') - { - $needTags = array('img','embed','a'); - $this->IsHead = true; - } - $tagbreaks = array(' ','<','>',"\r","\n","\t"); - for(;isset($this->SourceHtml[$i]);$i++) - { - if($this->SourceHtml[$i]=='<') - { - $tagName = ''; - $j = 0; - for($i=$i+1; isset($this->SourceHtml[$i]); $i++) - { - if($j>10) - { - break; - } - $j++; - if( in_array($this->SourceHtml[$i],$tagbreaks) ) - { - break; - } - else - { - $tagName .= $this->SourceHtml[$i]; - } - } - $tagName = strtolower($tagName); - - //标记为注解 - if($tagName=='!--') - { - $endPos = strpos($this->SourceHtml,'-->',$i); - if($endPos !== false) - { - $i=$endPos+3; - } - continue; - } - - //标记在指定集合内 - else if( in_array($tagName,$needTags) ) - { - $startPos = $i; - $endPos = strpos($this->SourceHtml,'>',$i+1); - if($endPos===false) - { - break; - } - $attStr = substr($this->SourceHtml,$i+1,$endPos-$startPos-1); - $cAtt->SetSource($attStr); - if($tagName=='img') - { - $this->InsertMedia($cAtt->GetAtt('src'),'img'); - } - else if($tagName=='embed') - { - $rurl = $this->InsertMedia($cAtt->GetAtt('src'),'embed'); - if($rurl != '') - { - $this->MediaInfos[$rurl][0] = $cAtt->GetAtt('width'); - $this->MediaInfos[$rurl][1] = $cAtt->GetAtt('height'); - } - } - else if($tagName=='a') - { - $this->InsertLink($this->FillUrl($cAtt->GetAtt('href')),$this->GetInnerText($i,'a')); - } - } - else - { - continue; - } - $i--; - }//End if char - - }//End for - - if($this->Title == '') - { - $this->Title = $this->BaseUrl; - } - } - - /** - * 重置资源 - * - * @access private - * @return void - */ - function Clear() - { - $this->CAtt = ''; - $this->SourceHtml = ''; - $this->Title = ''; - $this->Links = ''; - $this->Medias = ''; - $this->BaseUrl = ''; - $this->BaseUrlPath = ''; - } - - /** - * 分析链接 - * - * @access public - * @param string $url 地址 - * @param string $mtype 媒体类型 - * @return string - */ - function InsertMedia($url, $mtype) - { - if( preg_match("/^(javascript:|#|'|\")/", $url) ) - { - return ''; - } - if($url == '') - { - return ''; - } - $this->Medias[$url]=$mtype; - return $url; - } - - /** - * 分析链接 - * - * @access public - * @param string $url 地址 - * @param string $atitle 文档 - * @return string - */ - function InsertLink($url, $atitle) - { - if( preg_match("/^(javascript:|#|'|\")/", $url) ) - { - return ''; - } - if($url == '') - { - return ''; - } - if( preg_match('/^img:/', $atitle) ) - { - list($aimg, $atitle) = explode(':txt:', $atitle); - if(!isset($this->Links[$url])) - { - if($atitle != '') - { - $this->Links[$url]['title'] = cn_substr($atitle,50); - } - else - { - $this->Links[$url]['title'] = preg_replace('/img:/', '', $aimg); - } - $this->Links[$url]['link'] = $url; - } - $this->Links[$url]['image'] = preg_replace('/img:/', '', $aimg); - $this->InsertMedia($this->Links[$url]['image'], 'img'); - } - else - { - if(!isset($this->Links[$url])) - { - $this->Links[$url]['image'] = ''; - $this->Links[$url]['title'] = $atitle; - $this->Links[$url]['link'] = $url; - } - else - { - if(strlen($this->Links[$url]['title']) < strlen($atitle)) $this->Links[$url]['title'] = $atitle; - } - } - return $url; - } - - /** - * 分析content-type中的字符类型 - * - * @access public - * @param string $att 属性字符串 - * @return string - */ - function ParCharSet($att) - { - $startdd=0; - $taglen=0; - $startdd = strpos($att,'='); - if($startdd===false) - { - return ''; - } - else - { - $taglen = strlen($att)-$startdd-1; - if($taglen<=0) - { - return ''; - } - return trim(substr($att, $startdd+1, $taglen)); - } - } - - /** - * 补全相对网址 - * - * @access public - * @param string $surl 地址 - * @return string - */ - function FillUrl($surl) - { - $i = $pathStep = 0; - $dstr = $pstr = $okurl = ''; - - $surl = trim($surl); - if($surl == '') - { - return ''; - } - $pos = strpos($surl,'#'); - if($pos>0) - { - $surl = substr($surl,0,$pos); - } - if($surl[0]=='/') - { - $okurl = $this->HomeUrl.'/'.$surl; - } - else if($surl[0]=='.') - { - if(!isset($surl[2])) - { - return ''; - } - else if($surl[0]=='/') - { - $okurl = $this->BaseUrlPath."/".substr($surl,2,strlen($surl)-2); - } - else - { - $urls = explode('/',$surl); - foreach($urls as $u) - { - if($u=='..') - { - $pathStep++; - } - else if($iBaseUrlPath); - if(count($urls) <= $pathStep) - { - return ''; - } - else - { - $pstr = ''; - for($i=0;$iBaseUrlPath.'/'.$surl; - } - else if( strtolower(substr($surl,0,7))=='http://' ) - { - $okurl = preg_replace('/^http:\/\//i', '', $surl); - } - else if( strtolower(substr($surl,0,8))=='https://' ) - { - $okurl = preg_replace('/^https:\/\//i', '', $surl); - } - else - { - $okurl = $this->BaseUrlPath.'/'.$surl; - } - } - $okurl = preg_replace('/\/{1,}/i', '/', $okurl); - return $this->Scheme . $okurl; - } - - /** - * 获得和下一个标记之间的文本内容 - * - * @access public - * @param string $pos 位置地址 - * @param string $tagname 标签名称 - * @return string - */ - function GetInnerText(&$pos,$tagname) - { - $startPos=0; - $endPos=0; - $textLen=0; - $str = ''; - $startPos = strpos($this->SourceHtml,'>',$pos); - - if($tagname=='title') - { - $endPos = strpos($this->SourceHtml,'<',$startPos); - } - else - { - $endPos1 = strpos($this->SourceHtml,'SourceHtml,' $startPos) - { - $textLen = $endPos-$startPos; - $str = substr($this->SourceHtml,$startPos+1,$textLen-1); - } - $pos = $startPos + $textLen + strlen(" \r\n\t]{1,}/isU",$str,$imgs); - if(isset($imgs[2][0])) - { - $txt = trim(Html2Text($str)); - $imgs[2][0] = preg_replace("/[\"']/",'',$imgs[2][0]); - return "img:".$this->FillUrl($imgs[2][0]).':txt:'.$txt; - } - else - { - $str = strip_tags($str); - //$str = preg_replace('/<\/(.*)$/i', '', $str); - //$str = trim(preg_replace('/^(.*)>/i','',$str)); - return $str; - } - } - } -}//End class - -/******************************* -//属性解析器 -function c____DedeAttribute2(); -********************************/ -class DedeAttribute2 -{ - var $SourceString = ''; - var $SourceMaxSize = 1024; - var $CharToLow = FALSE; //属性值是否不分大小写(属性名统一为小写) - var $IsTagName = TRUE; //是否解析标记名称 - var $Count = -1; - var $Items = array(); //属性元素的集合 - - //设置属性解析器源字符串 - function SetSource($str = '') - { - $this->Count = -1; - $this->Items =array(); - $strLen = 0; - $this->SourceString = trim(preg_replace("/[ \t\r\n]{1,}/"," ",$str)); - $strLen = strlen($this->SourceString); - $this->SourceString .= " "; //增加一个空格结尾,以方便处理没有属性的标记 - if($strLen>0&&$strLen<=$this->SourceMaxSize) - { - $this->PrivateAttParse(); - } - } - - //获得某个属性 - function GetAtt($str) - { - if($str == '') - { - return ''; - } - $str = strtolower($str); - if(isset($this->Items[$str])) - { - return $this->Items[$str]; - } - else - { - return ''; - } - } - - //判断属性是否存在 - function IsAtt($str) - { - if($str == '') - { - return false; - } - $str = strtolower($str); - if(isset($this->Items[$str])) - { - return true; - } - else - { - return false; - } - } - - //获得标记名称 - function GetTagName() - { - return $this->GetAtt("tagname"); - } - - // 获得属性个数 - function GetCount() - { - return $this->Count+1; - } - - //解析属性(仅给SetSource调用) - function PrivateAttParse() - { - $d = ''; - $tmpatt = ''; - $tmpvalue = ''; - $startdd = -1; - $ddtag = ''; - $strLen = strlen($this->SourceString); - $j = 0; - - //这里是获得标记的名称 - if($this->IsTagName) - { - //如果属性是注解,不再解析里面的内容,直接返回 - if(isset($this->SourceString[2])) - { - if($this->SourceString[0].$this->SourceString[1].$this->SourceString[2]=='!--') - { - $this->Items['tagname'] = '!--'; - return ; - } - } - for($i=0;$i<$strLen;$i++) - { - $d = $this->SourceString[$i]; - $j++; - if(preg_match("/[ '\"\r\n\t]/i", $d)) - { - $this->Count++; - $this->Items["tagname"]=strtolower(trim($tmpvalue)); - $tmpvalue = ''; break; - } - else - { - $tmpvalue .= $d; - } - } - if($j>0) - { - $j = $j-1; - } - } - - //遍历源字符串,获得各属性 - for($i=$j;$i<$strLen;$i++) - { - $d = $this->SourceString[$i]; - //获得属性的键 - if($startdd==-1) - { - if($d!='=') - { - $tmpatt .= $d; - } - else - { - $tmpatt = strtolower(trim($tmpatt)); - $startdd=0; - } - } - - //检测属性值是用什么包围的,允许使用 '' '' 或空白 - else if($startdd==0) - { - switch($d) - { - case ' ': - continue; - break; - case '\'': - $ddtag='\''; - $startdd=1; - break; - case '"': - $ddtag='"'; - $startdd=1; - break; - default: - $tmpvalue.=$d; - $ddtag=' '; - $startdd=1; - break; - } - } - - //获得属性的值 - else if($startdd==1) - { - if($d==$ddtag) - { - $this->Count++; - if($this->CharToLow) - { - $this->Items[$tmpatt] = strtolower(trim($tmpvalue)); - } - else - { - $this->Items[$tmpatt] = trim($tmpvalue); - } - $tmpatt = ''; - $tmpvalue = ''; - $startdd=-1; - } - else - { - $tmpvalue.=$d; - } - } - }//End for - - //处理没有值的属性(必须放在结尾才有效)如:"input type=radio name=t1 value=aaa checked" - if($tmpatt != '') - { - $this->Items[$tmpatt] = ''; - } - }//End Function PrivateAttParse - -}//End Class DedeAttribute2 - -?>