国内流行的内容管理系统(CMS)多端全媒体解决方案 https://www.dedebiz.com
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

632 lines
19KB

  1. <?php
  2. if (!defined('DEDEINC')) exit('dedebiz');
  3. /**
  4. * 织梦HTTP下载类
  5. *
  6. * @version $Id: dedehttpdown.class.php 1 11:42 2010年7月6日Z tianya $
  7. * @package DedeBIZ.Libraries
  8. * @copyright Copyright (c) 2022, DedeBIZ.COM
  9. * @license https://www.dedebiz.com/license
  10. * @link https://www.dedebiz.com
  11. */
  12. @set_time_limit(0);
  13. class DedeHttpDown
  14. {
  15. var $m_ch = null;
  16. var $m_url = '';
  17. var $m_urlpath = '';
  18. var $m_scheme = 'http';
  19. var $m_host = '';
  20. var $m_port = '80';
  21. var $m_user = '';
  22. var $m_pass = '';
  23. var $m_path = '/';
  24. var $m_query = '';
  25. var $m_fp = '';
  26. var $m_error = '';
  27. var $m_httphead = array();
  28. var $m_html = '';
  29. var $m_puthead = array();
  30. var $m_cookies = '';
  31. var $BaseUrlPath = '';
  32. var $HomeUrl = '';
  33. var $reTry = 0;
  34. var $JumpCount = 0;
  35. /**
  36. * 初始化系统
  37. *
  38. * @access public
  39. * @param string $url 需要下载的地址
  40. * @return string
  41. */
  42. function PrivateInit($url)
  43. {
  44. if ($url == '') {
  45. return;
  46. }
  47. $urls = '';
  48. $urls = @parse_url($url);
  49. $this->m_url = $url;
  50. if (is_array($urls)) {
  51. $this->m_host = $urls["host"];
  52. if (!empty($urls["scheme"])) {
  53. $this->m_scheme = $urls["scheme"];
  54. }
  55. if (!empty($urls["user"])) {
  56. $this->m_user = $urls["user"];
  57. }
  58. if (!empty($urls["pass"])) {
  59. $this->m_pass = $urls["pass"];
  60. }
  61. if (!empty($urls["port"])) {
  62. $this->m_port = $urls["port"];
  63. }
  64. if (!empty($urls["path"])) {
  65. $this->m_path = $urls["path"];
  66. }
  67. $this->m_urlpath = $this->m_path;
  68. if (!empty($urls["query"])) {
  69. $this->m_query = $urls["query"];
  70. $this->m_urlpath .= "?".$this->m_query;
  71. }
  72. $this->HomeUrl = $urls["host"];
  73. $this->BaseUrlPath = $this->HomeUrl.$urls["path"];
  74. $this->BaseUrlPath = preg_replace("/\/([^\/]*)\.(.*)$/", "/", $this->BaseUrlPath);
  75. $this->BaseUrlPath = preg_replace("/\/$/", "", $this->BaseUrlPath);
  76. }
  77. }
  78. /**
  79. * 重设各参数
  80. *
  81. * @access public
  82. * @return void
  83. */
  84. function ResetAny()
  85. {
  86. $this->m_ch = "";
  87. $this->m_url = "";
  88. $this->m_urlpath = "";
  89. $this->m_scheme = "http";
  90. $this->m_host = "";
  91. $this->m_port = "80";
  92. $this->m_user = "";
  93. $this->m_pass = "";
  94. $this->m_path = "/";
  95. $this->m_query = "";
  96. $this->m_cookies = "";
  97. $this->m_error = "";
  98. }
  99. /**
  100. * 打开指定网址
  101. *
  102. * @access public
  103. * @param string $url 地址
  104. * @param string $requestType 请求类型
  105. * @return string
  106. */
  107. function OpenUrl($url, $requestType = "GET")
  108. {
  109. $this->ResetAny();
  110. $this->JumpCount = 0;
  111. $this->m_httphead = array();
  112. $this->m_html = '';
  113. $this->reTry = 0;
  114. $this->Close();
  115. //初始化系统
  116. $this->PrivateInit($url);
  117. $this->PrivateStartSession($requestType);
  118. }
  119. /**
  120. * 转到303重定向网址
  121. *
  122. * @access public
  123. * @param string $url 地址
  124. * @return string
  125. */
  126. function JumpOpenUrl($url)
  127. {
  128. $this->ResetAny();
  129. $this->JumpCount++;
  130. $this->m_httphead = array();
  131. $this->m_html = "";
  132. $this->Close();
  133. //初始化系统
  134. $this->PrivateInit($url);
  135. $this->PrivateStartSession('GET');
  136. }
  137. /**
  138. * 获得某操作错误的原因
  139. *
  140. * @access public
  141. * @return void
  142. */
  143. function printError()
  144. {
  145. echo "错误信息:".$this->m_error;
  146. echo "<br>具体返回头:<br>";
  147. foreach ($this->m_httphead as $k => $v) {
  148. echo "$k => $v <br>\r\n";
  149. }
  150. }
  151. /**
  152. * 判别用Get方法发送的头的应答结果是否正确
  153. *
  154. * @access public
  155. * @return bool
  156. */
  157. function IsGetOK()
  158. {
  159. if (preg_match("/^2/", $this->GetHead("http-state"))) {
  160. return TRUE;
  161. } else {
  162. $this->m_error .= $this->GetHead("http-state")." - ".$this->GetHead("http-describe")."<br>";
  163. return FALSE;
  164. }
  165. }
  166. /**
  167. * 看看返回的网页是否是text类型
  168. *
  169. * @access public
  170. * @return bool
  171. */
  172. function IsText()
  173. {
  174. if (preg_match("/^2/", $this->GetHead("http-state")) && preg_match("/text|xml/i", $this->GetHead("content-type"))) {
  175. return TRUE;
  176. } else {
  177. $this->m_error .= "内容为非文本类型或网址重定向<br>";
  178. return FALSE;
  179. }
  180. }
  181. /**
  182. * 判断返回的网页是否是特定的类型
  183. *
  184. * @access public
  185. * @param string $ctype 内容类型
  186. * @return string
  187. */
  188. function IsContentType($ctype)
  189. {
  190. if (
  191. preg_match("/^2/", $this->GetHead("http-state"))
  192. && $this->GetHead("content-type") == strtolower($ctype)
  193. ) {
  194. return TRUE;
  195. } else {
  196. $this->m_error .= "类型不对 ".$this->GetHead("content-type")."<br>";
  197. return FALSE;
  198. }
  199. }
  200. /**
  201. * 用Http协议下载文件
  202. *
  203. * @access public
  204. * @param string $savefilename 保存文件名称
  205. * @return string
  206. */
  207. function SaveToBin($savefilename)
  208. {
  209. if (!$this->IsGetOK()) {
  210. return FALSE;
  211. }
  212. if (function_exists('curl_init') && function_exists('curl_exec')) {
  213. file_put_contents($savefilename, $this->m_html);
  214. return TRUE;
  215. }
  216. if (@feof($this->m_fp)) {
  217. $this->m_error = "连接已经关闭";
  218. return FALSE;
  219. }
  220. $fp = fopen($savefilename, "w");
  221. while (!feof($this->m_fp)) {
  222. fwrite($fp, fread($this->m_fp, 1024));
  223. }
  224. fclose($this->m_fp);
  225. fclose($fp);
  226. return TRUE;
  227. }
  228. /**
  229. * 保存网页内容为Text文件
  230. *
  231. * @access public
  232. * @param string $savefilename 保存文件名称
  233. * @return string
  234. */
  235. function SaveToText($savefilename)
  236. {
  237. if ($this->IsText()) {
  238. $this->SaveBinFile($savefilename);
  239. } else {
  240. return "";
  241. }
  242. }
  243. function SaveBinFile($filename)
  244. {
  245. return $this->SaveBinFile($filename);
  246. }
  247. /**
  248. * 用Http协议获得一个网页的内容
  249. *
  250. * @access public
  251. * @return string
  252. */
  253. function GetHtml()
  254. {
  255. if ($this->m_html != '') {
  256. return $this->m_html;
  257. }
  258. if (!$this->IsText()) {
  259. return '';
  260. }
  261. if (!$this->m_fp || @feof($this->m_fp)) {
  262. return '';
  263. }
  264. while (!feof($this->m_fp)) {
  265. $this->m_html .= fgets($this->m_fp, 256);
  266. }
  267. @fclose($this->m_fp);
  268. return $this->m_html;
  269. }
  270. /**
  271. * 获取请求解析后的JSON数据
  272. *
  273. * @access public
  274. * @return mixed
  275. */
  276. function GetJSON()
  277. {
  278. if ($this->m_html != '') {
  279. return json_decode($this->m_html);
  280. }
  281. if (!$this->IsText()) {
  282. return '';
  283. }
  284. if (!$this->m_fp || @feof($this->m_fp)) {
  285. return '';
  286. }
  287. while (!feof($this->m_fp)) {
  288. $this->m_html .= fgets($this->m_fp, 256);
  289. }
  290. @fclose($this->m_fp);
  291. return json_decode($this->m_html);
  292. }
  293. /**
  294. * 判断当前是否是https站点
  295. *
  296. * @access public
  297. * @return bool
  298. */
  299. function IsSSL()
  300. {
  301. if ($_SERVER['HTTPS'] && ('1' == $_SERVER['HTTPS'] || 'on' == strtolower($_SERVER['HTTPS']))) {
  302. return true;
  303. } elseif ('https' == $_SERVER['REQUEST_SCHEME']) {
  304. return true;
  305. } elseif ('443' == $_SERVER['SERVER_PORT']) {
  306. return true;
  307. } elseif ('https' == $_SERVER['HTTP_X_FORWARDED_PROTO']) {
  308. return true;
  309. }
  310. return false;
  311. }
  312. /**
  313. * 开始HTTP会话
  314. *
  315. * @access public
  316. * @param string $requestType 请求类型
  317. * @return string
  318. */
  319. function PrivateStartSession($requestType = "GET")
  320. {
  321. if ($this->m_scheme == "https") {
  322. $this->m_port = "443";
  323. }
  324. if (function_exists('curl_init') && function_exists('curl_exec')) {
  325. $this->m_ch = curl_init();
  326. curl_setopt($this->m_ch, CURLOPT_URL, $this->m_scheme.'://'.$this->m_host.':'.$this->m_port.$this->m_path.'?'.$this->m_query);
  327. curl_setopt($this->m_ch, CURLOPT_RETURNTRANSFER, 1);
  328. curl_setopt($this->m_ch, CURLOPT_FOLLOWLOCATION, 1);
  329. if ($requestType == "POST") {
  330. curl_setopt($this->m_ch, CURLOPT_POST, 1);
  331. //$content = is_array($post) ? http_build_query($post) : $post;
  332. //curl_setopt($this->m_ch, CURLOPT_POSTFIELDS, urldecode($content));
  333. }
  334. if (!empty($this->m_cookies)) {
  335. curl_setopt($this->m_ch, CURLOPT_COOKIE, $this->m_cookies);
  336. }
  337. if ($this->m_scheme == "https") {
  338. curl_setopt($this->m_ch, CURLOPT_SSL_VERIFYPEER, false);
  339. curl_setopt($this->m_ch, CURLOPT_SSL_VERIFYHOST, false);
  340. }
  341. $this->m_puthead = array();
  342. $this->m_puthead["Host"] = $this->m_host;
  343. //发送用户自定义的请求头
  344. if (!isset($this->m_puthead["Accept"])) {
  345. $this->m_puthead["Accept"] = "*/*";
  346. }
  347. if (!isset($this->m_puthead["User-Agent"])) {
  348. $this->m_puthead["User-Agent"] = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2)";
  349. }
  350. if (!isset($this->m_puthead["Refer"])) {
  351. $this->m_puthead["Refer"] = "http://".$this->m_puthead["Host"];
  352. }
  353. $headers = array();
  354. foreach ($this->m_puthead as $k => $v) {
  355. $k = trim($k);
  356. $v = trim($v);
  357. if ($k != "" && $v != "") {
  358. $headers[] = "$k: $v";
  359. }
  360. }
  361. if (count($headers) > 0) {
  362. curl_setopt($this->m_ch, CURLOPT_HTTPHEADER, $headers);
  363. }
  364. curl_setopt($this->m_ch, CURLOPT_CONNECTTIMEOUT, 20);
  365. curl_setopt($this->m_ch, CURLOPT_TIMEOUT, 900);
  366. $this->m_html = curl_exec($this->m_ch);
  367. $status = curl_getinfo($this->m_ch);
  368. if (count($status) > 0) {
  369. foreach ($status as $key => $value) {
  370. $key = str_replace("_", "-", $key);
  371. if ($key == "http-code") {
  372. $this->m_httphead["http-state"] = $value;
  373. }
  374. $this->m_httphead[$key] = $value;
  375. }
  376. }
  377. $this->m_error = curl_errno($this->m_ch);
  378. return TRUE;
  379. }
  380. if (!$this->PrivateOpenHost()) {
  381. $this->m_error .= "打开远程主机出错!";
  382. return FALSE;
  383. }
  384. $this->reTry++;
  385. if ($this->GetHead("http-edition") == "HTTP/1.1") {
  386. $httpv = "HTTP/1.1";
  387. } else {
  388. $httpv = "HTTP/1.0";
  389. }
  390. $ps = explode('?', $this->m_urlpath);
  391. $headString = '';
  392. //发送固定的起始请求头GET、Host信息
  393. if ($requestType == "GET") {
  394. $headString .= "GET ".$this->m_urlpath." $httpv\r\n";
  395. } else {
  396. $headString .= "POST ".$ps[0]." $httpv\r\n";
  397. }
  398. $this->m_puthead["Host"] = $this->m_host;
  399. //发送用户自定义的请求头
  400. if (!isset($this->m_puthead["Accept"])) {
  401. $this->m_puthead["Accept"] = "*/*";
  402. }
  403. if (!isset($this->m_puthead["User-Agent"])) {
  404. $this->m_puthead["User-Agent"] = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2)";
  405. }
  406. if (!isset($this->m_puthead["Refer"])) {
  407. $this->m_puthead["Refer"] = "http://".$this->m_puthead["Host"];
  408. }
  409. foreach ($this->m_puthead as $k => $v) {
  410. $k = trim($k);
  411. $v = trim($v);
  412. if ($k != "" && $v != "") {
  413. $headString .= "$k: $v\r\n";
  414. }
  415. }
  416. fputs($this->m_fp, $headString);
  417. if ($requestType == "POST") {
  418. $postdata = "";
  419. if (count($ps) > 1) {
  420. for ($i = 1; $i < count($ps); $i++) {
  421. $postdata .= $ps[$i];
  422. }
  423. } else {
  424. $postdata = "OK";
  425. }
  426. $plen = strlen($postdata);
  427. fputs($this->m_fp, "Content-Type: application/x-www-form-urlencoded\r\n");
  428. fputs($this->m_fp, "Content-Length: $plen\r\n");
  429. }
  430. //发送固定的结束请求头
  431. //HTTP1.1协议必须指定文档结束后关闭链接,否则读取文档时无法使用feof判断结束
  432. if ($httpv == "HTTP/1.1") {
  433. fputs($this->m_fp, "Connection: Close\r\n\r\n");
  434. } else {
  435. fputs($this->m_fp, "\r\n");
  436. }
  437. if ($requestType == "POST") {
  438. fputs($this->m_fp, $postdata);
  439. }
  440. //获取应答头状态信息
  441. $httpstas = explode(" ", fgets($this->m_fp, 256));
  442. $this->m_httphead["http-edition"] = trim($httpstas[0]);
  443. $this->m_httphead["http-state"] = trim($httpstas[1]);
  444. $this->m_httphead["http-describe"] = "";
  445. for ($i = 2; $i < count($httpstas); $i++) {
  446. $this->m_httphead["http-describe"] .= " ".trim($httpstas[$i]);
  447. }
  448. //获取详细应答头
  449. while (!feof($this->m_fp)) {
  450. $line = trim(fgets($this->m_fp, 256));
  451. if ($line == "") {
  452. break;
  453. }
  454. $hkey = "";
  455. $hvalue = "";
  456. $v = 0;
  457. for ($i = 0; $i < strlen($line); $i++) {
  458. if ($v == 1) {
  459. $hvalue .= $line[$i];
  460. }
  461. if ($line[$i] == ":") {
  462. $v = 1;
  463. }
  464. if ($v == 0) {
  465. $hkey .= $line[$i];
  466. }
  467. }
  468. $hkey = trim($hkey);
  469. if ($hkey != "") {
  470. $this->m_httphead[strtolower($hkey)] = trim($hvalue);
  471. }
  472. }
  473. //如果连接被不正常关闭,重试
  474. if (feof($this->m_fp)) {
  475. if ($this->reTry > 10) {
  476. return FALSE;
  477. }
  478. $this->PrivateStartSession($requestType);
  479. }
  480. //判断是否是3xx开头的应答
  481. if (preg_match("/^3/", $this->m_httphead["http-state"])) {
  482. if ($this->JumpCount > 3) {
  483. return;
  484. }
  485. if (isset($this->m_httphead["location"])) {
  486. $newurl = $this->m_httphead["location"];
  487. if (preg_match("/^http/i", $newurl)) {
  488. $this->JumpOpenUrl($newurl);
  489. } else {
  490. $newurl = $this->FillUrl($newurl);
  491. $this->JumpOpenUrl($newurl);
  492. }
  493. } else {
  494. $this->m_error = "无法识别的答复";
  495. }
  496. }
  497. }
  498. /**
  499. * 获得一个Http头的值
  500. *
  501. * @access public
  502. * @param string $headname 头文件名称
  503. * @return string
  504. */
  505. function GetHead($headname)
  506. {
  507. $headname = strtolower($headname);
  508. return isset($this->m_httphead[$headname]) ? $this->m_httphead[$headname] : '';
  509. }
  510. function SetCookie($cookie)
  511. {
  512. $this->m_cookies = $cookie;
  513. }
  514. /**
  515. * 设置Http头的值
  516. *
  517. * @access public
  518. * @param string $skey 键
  519. * @param string $svalue 值
  520. * @return string
  521. */
  522. function SetHead($skey, $svalue)
  523. {
  524. $this->m_puthead[$skey] = $svalue;
  525. }
  526. /**
  527. * 打开连接
  528. *
  529. * @access public
  530. * @return bool
  531. */
  532. function PrivateOpenHost()
  533. {
  534. if ($this->m_host == "") {
  535. return FALSE;
  536. }
  537. $errno = "";
  538. $errstr = "";
  539. $this->m_fp = @fsockopen($this->m_host, $this->m_port, $errno, $errstr, 10);
  540. if (!$this->m_fp) {
  541. $this->m_error = $errstr;
  542. return FALSE;
  543. } else {
  544. return TRUE;
  545. }
  546. }
  547. /**
  548. * 关闭连接
  549. *
  550. * @access public
  551. * @return void
  552. */
  553. function Close()
  554. {
  555. if (function_exists('curl_init') && function_exists('curl_exec') && $this->m_ch) {
  556. @curl_close($this->m_ch);
  557. }
  558. if ($this->m_fp) {
  559. @fclose($this->m_fp);
  560. }
  561. }
  562. /**
  563. * 补全相对网址
  564. *
  565. * @access public
  566. * @param string $surl 需要不全的地址
  567. * @return string
  568. */
  569. function FillUrl($surl)
  570. {
  571. $i = 0;
  572. $dstr = "";
  573. $pstr = "";
  574. $okurl = "";
  575. $pathStep = 0;
  576. $surl = trim($surl);
  577. if ($surl == "") {
  578. return "";
  579. }
  580. $pos = strpos($surl, "#");
  581. $proto = $this->IsSSL()? "https://" : "http://";
  582. if ($pos > 0) {
  583. $surl = substr($surl, 0, $pos);
  584. }
  585. if ($surl[0] == "/") {
  586. $okurl = $proto .$this->HomeUrl.$surl;
  587. } else if ($surl[0] == ".") {
  588. if (strlen($surl) <= 1) {
  589. return "";
  590. } else if ($surl[1] == "/") {
  591. $okurl = $proto.$this->BaseUrlPath."/".substr($surl, 2, strlen($surl) - 2);
  592. } else {
  593. $urls = explode("/", $surl);
  594. foreach ($urls as $u) {
  595. if ($u == "..") {
  596. $pathStep++;
  597. } else if ($i < count($urls) - 1) {
  598. $dstr .= $urls[$i]."/";
  599. } else {
  600. $dstr .= $urls[$i];
  601. }
  602. $i++;
  603. }
  604. $urls = explode("/", $this->BaseUrlPath);
  605. if (count($urls) <= $pathStep) {
  606. return "";
  607. } else {
  608. $pstr = $proto;
  609. for ($i = 0; $i < count($urls) - $pathStep; $i++) {
  610. $pstr .= $urls[$i]."/";
  611. }
  612. $okurl = $pstr.$dstr;
  613. }
  614. }
  615. } else {
  616. if (strlen($surl) < 7) {
  617. $okurl = $proto .$this->BaseUrlPath."/".$surl;
  618. } else if (strtolower(substr($surl, 0, 7)) == "http://") {
  619. $okurl = $surl;
  620. } else if (strtolower(substr($surl, 0, 8)) == "https://") {
  621. $okurl = $surl;
  622. } else {
  623. $okurl = $proto.$this->BaseUrlPath."/".$surl;
  624. }
  625. }
  626. $okurl = preg_replace("/^((http|https):\/\/)/i", "", $okurl);
  627. $okurl = preg_replace("/\/{1,}/", "/", $okurl);
  628. return $proto.$okurl;
  629. }
  630. }//End Class