100) { break; } $hkey = ""; $hvalue = ""; $v = 0; for ($i = 0; $i < strlen($line); $i++) { if ($v == 1) { $hvalue .= $line[$i]; } if ($line[$i] == ":") { $v = 1; } if ($v == 0) { $hkey .= $line[$i]; } } $hkey = trim($hkey); if ($hkey != "") { $m_httphead[strtolower($hkey)] = trim($hvalue); } } //分析返回记录 if (preg_match("/^3/", $m_httphead["http-state"])) { if (isset($m_httphead["location"]) && $JumpCount < 3) { $JumpCount++; DownImageKeep($gurl, $rfurl, $filename, $gcookie, $JumpCount); } else { return FALSE; } } if (!preg_match("/^2/", $m_httphead["http-state"])) { return FALSE; } if (!isset($m_httphead)) { return FALSE; } $contentLength = $m_httphead['content-length']; //保存文件 $fp = fopen($filename, "w") or die("写入文件:{$filename} 失败!"); $i = 0; $okdata = ""; $starttime = time(); while (!feof($m_fp)) { $okdata .= fgetc($m_fp); $i++; //超时结束 if (time() - $starttime > $maxtime) { break; } //到达指定大小结束 if ($i >= $contentLength) { break; } } if ($okdata != "") { fwrite($fp, $okdata); } fclose($fp); if ($okdata == "") { @unlink($filename); fclose($m_fp); return FALSE; } fclose($m_fp); return TRUE; } /** * 获得某页面返回的Cookie信息 * * @access public * @param string $gurl 调整地址 * @return string */ function RefurlCookie($gurl) { global $gcookie, $lastRfurl; $gurl = trim($gurl); if (!empty($gcookie) && $lastRfurl == $gurl) { return $gcookie; } else { $lastRfurl = $gurl; } if (trim($gurl) == '') { return ''; } $urlinfos = GetHostInfo($gurl); $ghost = $urlinfos['host']; $gquery = $urlinfos['query']; $sessionQuery = "GET $gquery HTTP/1.1\r\n"; $sessionQuery .= "Host: $ghost\r\n"; $sessionQuery .= "Accept: */*\r\n"; $sessionQuery .= "User-Agent: Mozilla/4.0 (compatible; MSIE 5.00; Windows 98)\r\n"; $sessionQuery .= "Connection: Close\r\n\r\n"; $errno = ""; $errstr = ""; $m_fp = fsockopen($ghost, 80, $errno, $errstr, 10) or die($ghost . '
'); fwrite($m_fp, $sessionQuery); $lnum = 0; //获取详细应答头 $gcookie = ""; while (!feof($m_fp)) { $line = trim(fgets($m_fp, 256)); if ($line == "" || $lnum > 100) { break; } else { if (preg_match("/^cookie/i", $line)) { $gcookie = $line; break; } } } fclose($m_fp); return $gcookie; } /** * 获得网址的host和query部份 * * @access public * @param string $gurl 调整地址 * @return string */ function GetHostInfo($gurl) { $gurl = preg_replace("/^http:\/\//i", "", trim($gurl)); $garr['host'] = preg_replace("/\/(.*)$/i", "", $gurl); $garr['query'] = "/" . preg_replace("/^([^\/]*)\//i", "", $gurl); return $garr; } /** * HTML里的图片转DEDE格式 * * @access public * @param string $body 文章内容 * @return string */ function TurnImageTag(&$body) { global $cfg_album_width, $cfg_ddimg_width; if (empty($cfg_album_width)) { $cfg_album_width = 800; } if (empty($cfg_ddimg_width)) { $cfg_ddimg_width = 150; } $patten = "/<\\s*img\\s.*?src\\s*=\\s*([\"\\'])?(?(1)(.*?)\\1|([^\\s\\>\"\\']+))/isx"; preg_match_all($patten, $body, $images); $returnArray1 = $images[2]; $returnArray2 = $images[3]; foreach ($returnArray1 as $key => $value) { if ($value) { $ttx .= "{dede:img ddimg='$litpicname' text='图 " . ($key + 1) . "'}" . $value . "{/dede:img}" . "\r\n"; } else { $ttx .= "{dede:img ddimg='$litpicname' text='图 " . ($key + 1) . "'}" . $returnArray2[$key] . "{/dede:img}" . "\r\n"; } } $ttx = "\r\n{dede:pagestyle maxwidth='{$cfg_album_width}' ddmaxwidth='{$cfg_ddimg_width}' row='3' col='3' value='2'/}\r\n{dede:comments}图集类型会采集时生成此配置是正常的,不过如果后面没有跟着img标记则表示规则无效{/dede:comments}\r\n" . $ttx; return $ttx; } /** * HTML里的网址格式转换 * * @access public * @param string $body 文章内容 * @return string */ function TurnLinkTag(&$body) { $ttx = ''; $handid = '服务器'; preg_match_all("/]+?)>(.+?)<\/a>/is", $body, $match); if (is_array($match[1]) && count($match[1]) > 0) { for ($i = 0; isset($match[1][$i]); $i++) { $servername = (isset($match[3][$i]) ? str_replace("'", "`", $match[3][$i]) : $handid . ($i + 1)); if (preg_match("/[<>]/", $servername) || strlen($servername) > 40) { $servername = $handid . ($i + 1); } $ttx .= "{dede:link text='$servername'} {$match[1][$i]} {/dede:link}\r\n"; } } return $ttx; } /** * 替换XML的CDATA * * @access public * @param string $str 字符串 * @return string */ function RpCdata($str) { $str = str_replace('', '', $str); return $str; } /** * 分析RSS里的链接 * * @access public * @param string $rssurl rss地址 * @return string */ function GetRssLinks($rssurl) { global $cfg_soft_lang; $dhd = new DedeHttpDown(); $dhd->OpenUrl($rssurl); $rsshtml = $dhd->GetHtml(); //分析编码 preg_match("/encoding=[\"']([^\"']*)[\"']/is", $rsshtml, $infos); if (isset($infos[1])) { $pcode = strtolower(trim($infos[1])); } else { $pcode = strtolower($cfg_soft_lang); } if ($cfg_soft_lang == 'gb2312') { if ($pcode == 'utf-8') { $rsshtml = utf82gb($rsshtml); } else if ($pcode == 'big5') { $rsshtml = big52gb($rsshtml); } } else if ($cfg_soft_lang == 'utf-8') { if ($pcode == 'gbk' || $pcode == 'gb2312') { $rsshtml = gb2utf8($rsshtml); } else if ($pcode == 'big5') { $rsshtml = gb2utf8(big52gb($rsshtml)); } } $rsarr = array(); preg_match_all("/(.*)<\/title>/isU", $rsshtml, $titles); preg_match_all("/(.*)<\/link>/isU", $rsshtml, $links); preg_match_all("/(.*)<\/description>/isU", $rsshtml, $descriptions); if (!isset($links[2])) { return ''; } foreach ($links[2] as $k => $v) { $rsarr[$k]['link'] = RpCdata($v); if (isset($titles[2][$k])) { $rsarr[$k]['title'] = RpCdata($titles[2][$k]); } else { $rsarr[$k]['title'] = preg_replace("/^(.*)\//i", "", RpCdata($titles[2][$k])); } if (isset($descriptions[2][$k])) { $rsarr[$k]['image'] = GetddImgFromRss($descriptions[2][$k], $rssurl); } else { $rsarr[$k]['image'] = ''; } } return $rsarr; } /** * 从RSS摘要获取图片信息 * * @access public * @param string $descriptions 描述 * @param string $refurl 来源地址 * @return string */ function GetddImgFromRss($descriptions, $refurl) { if ($descriptions == '') { return ''; } preg_match_all("/ \r\n\t]{1,}/isU", $descriptions, $imgs); if (isset($imgs[2][0])) { $imgs[2][0] = preg_replace("/[\"']/", '', $imgs[2][0]); $imgs[2][0] = preg_replace("/\/{1,}/", '/', $imgs[2][0]); return FillUrl($refurl, $imgs[2][0]); } else { return ''; } } /** * 补全网址 * * @access public * @param string $refurl 来源地址 * @param string $surl 站点地址 * @return string */ function FillUrl($refurl, $surl) { $i = $pathStep = 0; $dstr = $pstr = $okurl = ''; $refurl = trim($refurl); $surl = trim($surl); $urls = @parse_url($refurl); $basehost = ((!isset($urls['port']) || $urls['port'] == '80') ? $urls['host'] : $urls['host'] . ':' . $urls['port']); //$basepath = $basehost.(!isset($urls['path']) ? '' : '/'.$urls['path']); //由于直接获得的path在处理 http://xxxx/nnn/aaa?fdsafd 这种情况时会有错误,因此用其它方式处理 $basepath = $basehost; $paths = explode('/', preg_replace("/^http:\/\//i", "", $refurl)); $n = count($paths); for ($i = 1; $i < ($n - 1); $i++) { if (!preg_match("/[\?]/", $paths[$i])) $basepath .= '/' . $paths[$i]; } if (!preg_match("/[\?\.]/", $paths[$n - 1])) { $basepath .= '/' . $paths[$n - 1]; } if ($surl == '') { return $basepath; } $pos = strpos($surl, "#"); if ($pos > 0) { $surl = substr($surl, 0, $pos); } //用 '/' 表示网站根的网址 if ($surl[0] == '/') { $okurl = $basehost . $surl; } else if ($surl[0] == '.') { if (strlen($surl) <= 2) { return ''; } else if ($surl[1] == '/') { $okurl = $basepath . preg_replace('/^./', '', $surl); } else { $okurl = $basepath . '/' . $surl; } } else { if (strlen($surl) < 7) { $okurl = $basepath . '/' . $surl; } else if (preg_match("/^http:\/\//i", $surl)) { $okurl = $surl; } else { $okurl = $basepath . '/' . $surl; } } $okurl = preg_replace("/^http:\/\//i", '', $okurl); $okurl = 'http://' . preg_replace("/\/{1,}/", '/', $okurl); return $okurl; } /** * 从匹配规则中获取列表网址 * * @access public * @param string $regxurl 正则地址 * @param string $handurl 操作地址 * @param string $startid 开始ID * @param string $endid 结束ID * @param string $addv 增值 * @param string $usemore 使用更多 * @param string $batchrule 列表规则 * @return string */ function GetUrlFromListRule($regxurl = '', $handurl = '', $startid = 0, $endid = 0, $addv = 1, $usemore = 0, $batchrule = '') { global $dsql, $islisten; $lists = array(); $n = 0; $islisten = (empty($islisten) ? 0 : $islisten); if ($handurl != '') { $handurls = explode("\n", $handurl); foreach ($handurls as $handurl) { $handurl = trim($handurl); if (preg_match("/^http:\/\//i", $handurl)) { $lists[$n][0] = $handurl; $lists[$n][1] = 0; $n++; if ($islisten == 1) { break; } } } } if ($regxurl != '') { //没指定(#)和(*) if (!preg_match("/\(\*\)/i", $regxurl) && !preg_match("/\(#\)/", $regxurl)) { $lists[$n][0] = $regxurl; $lists[$n][1] = 0; $n++; } else { if ($addv <= 0) { $addv = 1; } //没指定多栏目匹配规则 if ($usemore == 0) { while ($startid <= $endid) { $lists[$n][0] = str_replace("(*)", sprintf('%0' . strlen($startid) . 'd', $startid), $regxurl); $lists[$n][1] = 0; $startid = sprintf('%0' . strlen($startid) . 'd', $startid + $addv); $n++; if ($n > 2000 || $islisten == 1) { break; } } } //匹配多个栏目 //规则表达式 [(#)=>(#)匹配的网址; (*)=>(*)的范围,如:1-20; typeid=>栏目id; addurl=>附加的网址(用|分开多个)] else { $nrules = explode(']', trim($batchrule)); foreach ($nrules as $nrule) { $nrule = trim($nrule); $nrule = preg_replace("/^\[|\]$/", '', $nrule); $nrules = explode(';', $nrule); if (count($nrules) < 3) { continue; } $brtag = ''; $startid = 0; $endid = 0; $typeid = 0; $addurls = array(); foreach ($nrules as $nrule) { $nrule = trim($nrule); list($k, $v) = explode('=>', $nrule); if (trim($k) == '(#)') { $brtag = trim($v); } else if (trim($k) == 'typeid') { $typeid = trim($v); } else if (trim($k) == 'addurl') { $addurl = trim($v); $addurls = explode('|', $addurl); } else if (trim($k) == '(*)') { $v = preg_replace("/[ \r\n\t]/", '', trim($v)); list($startid, $endid) = explode('-', $v); } } //如果栏目用栏目名称 if (preg_match('/[^0-9]/', $typeid)) { $arr = $dsql->GetOne("SELECT id FROM `#@__arctype` WHERE typename LIKE '$typeid' "); if (is_array($arr)) { $typeid = $arr['id']; } else { $typeid = 0; } } //附加网址优先 $mjj = 0; if (isset($addurls[0])) { foreach ($addurls as $addurl) { $addurl = trim($addurl); if ($addurl == '') { continue; } $lists[$n][0] = $addurl; $lists[$n][1] = $typeid; $n++; $mjj++; if ($islisten == 1) { break; } } } //如果为非监听模式或监听模式没手工指定的附加网址 if ($islisten != 1 || $mjj == 0) { //匹配规则里的网址,注:(#)的网址是是允许使用(*)的 while ($startid <= $endid) { $lists[$n][0] = str_replace("(#)", $brtag, $regxurl); $lists[$n][0] = str_replace("(*)", sprintf('%0' . strlen($startid) . 'd', $startid), $lists[$n][0]); $lists[$n][1] = $typeid; $startid = sprintf('%0' . strlen($startid) . 'd', $startid + $addv); $n++; if ($islisten == 1) { break; } if ($n > 20000) { break; } } } } } //End 匹配多栏目 } //End使用规则匹配的情况 } return $lists; }//End