国内流行的内容管理系统(CMS)多端全媒体解决方案 https://www.dedebiz.com
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

641 lines
18KB

  1. <?php
  2. if (!defined('DEDEINC')) exit('dedebiz');
  3. /**
  4. * 织梦HTTP下载类
  5. *
  6. * @version $Id: dedehttpdown.class.php 1 11:42 2010年7月6日Z tianya $
  7. * @package DedeBIZ.Libraries
  8. * @copyright Copyright (c) 2022, DedeBIZ.COM
  9. * @license https://www.dedebiz.com/license
  10. * @link https://www.dedebiz.com
  11. */
  12. @set_time_limit(0);
  13. class DedeHttpDown
  14. {
  15. var $m_ch = '';
  16. var $m_url = '';
  17. var $m_urlpath = '';
  18. var $m_scheme = 'http';
  19. var $m_host = '';
  20. var $m_port = '80';
  21. var $m_user = '';
  22. var $m_pass = '';
  23. var $m_path = '/';
  24. var $m_query = '';
  25. var $m_fp = '';
  26. var $m_error = '';
  27. var $m_httphead = array();
  28. var $m_html = '';
  29. var $m_puthead = array();
  30. var $m_cookies = '';
  31. var $BaseUrlPath = '';
  32. var $HomeUrl = '';
  33. var $reTry = 0;
  34. var $JumpCount = 0;
  35. /**
  36. * 初始化系统
  37. *
  38. * @access public
  39. * @param string $url 需要下载的地址
  40. * @return string
  41. */
  42. function PrivateInit($url)
  43. {
  44. if ($url == '') {
  45. return;
  46. }
  47. $urls = '';
  48. $urls = @parse_url($url);
  49. $this->m_url = $url;
  50. if (is_array($urls)) {
  51. $this->m_host = $urls["host"];
  52. if (!empty($urls["scheme"])) {
  53. $this->m_scheme = $urls["scheme"];
  54. }
  55. if (!empty($urls["user"])) {
  56. $this->m_user = $urls["user"];
  57. }
  58. if (!empty($urls["pass"])) {
  59. $this->m_pass = $urls["pass"];
  60. }
  61. if (!empty($urls["port"])) {
  62. $this->m_port = $urls["port"];
  63. }
  64. if (!empty($urls["path"])) {
  65. $this->m_path = $urls["path"];
  66. }
  67. $this->m_urlpath = $this->m_path;
  68. if (!empty($urls["query"])) {
  69. $this->m_query = $urls["query"];
  70. $this->m_urlpath .= "?".$this->m_query;
  71. }
  72. $this->HomeUrl = $urls["host"];
  73. $this->BaseUrlPath = $this->HomeUrl.$urls["path"];
  74. $this->BaseUrlPath = preg_replace("/\/([^\/]*)\.(.*)$/", "/", $this->BaseUrlPath);
  75. $this->BaseUrlPath = preg_replace("/\/$/", "", $this->BaseUrlPath);
  76. }
  77. }
  78. /**
  79. * 重设各参数
  80. *
  81. * @access public
  82. * @return void
  83. */
  84. function ResetAny()
  85. {
  86. $this->m_ch = "";
  87. $this->m_url = "";
  88. $this->m_urlpath = "";
  89. $this->m_scheme = "http";
  90. $this->m_host = "";
  91. $this->m_port = "80";
  92. $this->m_user = "";
  93. $this->m_pass = "";
  94. $this->m_path = "/";
  95. $this->m_query = "";
  96. $this->m_cookies = "";
  97. $this->m_error = "";
  98. }
  99. /**
  100. * 打开指定网址
  101. *
  102. * @access public
  103. * @param string $url 地址
  104. * @param string $requestType 请求类型
  105. * @return string
  106. */
  107. function OpenUrl($url, $requestType = "GET")
  108. {
  109. $this->ResetAny();
  110. $this->JumpCount = 0;
  111. $this->m_httphead = array();
  112. $this->m_html = '';
  113. $this->reTry = 0;
  114. $this->Close();
  115. //初始化系统
  116. $this->PrivateInit($url);
  117. $this->PrivateStartSession($requestType);
  118. }
  119. /**
  120. * 转到303重定向网址
  121. *
  122. * @access public
  123. * @param string $url 地址
  124. * @return string
  125. */
  126. function JumpOpenUrl($url)
  127. {
  128. $this->ResetAny();
  129. $this->JumpCount++;
  130. $this->m_httphead = array();
  131. $this->m_html = "";
  132. $this->Close();
  133. //初始化系统
  134. $this->PrivateInit($url);
  135. $this->PrivateStartSession('GET');
  136. }
  137. /**
  138. * 获得某操作错误的原因
  139. *
  140. * @access public
  141. * @return void
  142. */
  143. function printError()
  144. {
  145. echo "错误信息:".$this->m_error;
  146. echo "<br/>具体返回头:<br/>";
  147. foreach ($this->m_httphead as $k => $v) {
  148. echo "$k => $v <br/>\r\n";
  149. }
  150. }
  151. /**
  152. * 判别用Get方法发送的头的应答结果是否正确
  153. *
  154. * @access public
  155. * @return bool
  156. */
  157. function IsGetOK()
  158. {
  159. if (preg_match("/^2/", $this->GetHead("http-state"))) {
  160. return TRUE;
  161. } else {
  162. $this->m_error .= $this->GetHead("http-state")." - ".$this->GetHead("http-describe")."<br/>";
  163. return FALSE;
  164. }
  165. }
  166. /**
  167. * 看看返回的网页是否是text类型
  168. *
  169. * @access public
  170. * @return bool
  171. */
  172. function IsText()
  173. {
  174. if (preg_match("/^2/", $this->GetHead("http-state")) && preg_match("/text|xml/i", $this->GetHead("content-type"))) {
  175. return TRUE;
  176. } else {
  177. $this->m_error .= "内容为非文本类型或网址重定向<br/>";
  178. return FALSE;
  179. }
  180. }
  181. /**
  182. * 判断返回的网页是否是特定的类型
  183. *
  184. * @access public
  185. * @param string $ctype 内容类型
  186. * @return string
  187. */
  188. function IsContentType($ctype)
  189. {
  190. if (
  191. preg_match("/^2/", $this->GetHead("http-state"))
  192. && $this->GetHead("content-type") == strtolower($ctype)
  193. ) {
  194. return TRUE;
  195. } else {
  196. $this->m_error .= "类型不对 ".$this->GetHead("content-type")."<br/>";
  197. return FALSE;
  198. }
  199. }
  200. /**
  201. * 用Http协议下载文件
  202. *
  203. * @access public
  204. * @param string $savefilename 保存文件名称
  205. * @return string
  206. */
  207. function SaveToBin($savefilename)
  208. {
  209. if (!$this->IsGetOK()) {
  210. return FALSE;
  211. }
  212. if (function_exists('curl_init') && function_exists('curl_exec')) {
  213. file_put_contents($savefilename, $this->m_html);
  214. return TRUE;
  215. }
  216. if (@feof($this->m_fp)) {
  217. $this->m_error = "连接已经关闭";
  218. return FALSE;
  219. }
  220. $fp = fopen($savefilename, "w");
  221. while (!feof($this->m_fp)) {
  222. fwrite($fp, fread($this->m_fp, 1024));
  223. }
  224. fclose($this->m_fp);
  225. fclose($fp);
  226. return TRUE;
  227. }
  228. /**
  229. * 保存网页内容为Text文件
  230. *
  231. * @access public
  232. * @param string $savefilename 保存文件名称
  233. * @return string
  234. */
  235. function SaveToText($savefilename)
  236. {
  237. if ($this->IsText()) {
  238. $this->SaveBinFile($savefilename);
  239. } else {
  240. return "";
  241. }
  242. }
  243. function SaveBinFile($filename)
  244. {
  245. return $this->SaveBinFile($filename);
  246. }
  247. /**
  248. * 用Http协议获得一个网页的内容
  249. *
  250. * @access public
  251. * @return string
  252. */
  253. function GetHtml()
  254. {
  255. if ($this->m_html != '') {
  256. return $this->m_html;
  257. }
  258. if (!$this->IsText()) {
  259. return '';
  260. }
  261. if (!$this->m_fp || @feof($this->m_fp)) {
  262. return '';
  263. }
  264. while (!feof($this->m_fp)) {
  265. $this->m_html .= fgets($this->m_fp, 256);
  266. }
  267. @fclose($this->m_fp);
  268. return $this->m_html;
  269. }
  270. function GetJSON()
  271. {
  272. if ($this->m_html != '') {
  273. return json_decode($this->m_html);
  274. }
  275. if (!$this->IsText()) {
  276. return '';
  277. }
  278. if (!$this->m_fp || @feof($this->m_fp)) {
  279. return '';
  280. }
  281. while (!feof($this->m_fp)) {
  282. $this->m_html .= fgets($this->m_fp, 256);
  283. }
  284. @fclose($this->m_fp);
  285. return json_decode($this->m_html);
  286. }
  287. /**
  288. * 开始HTTP会话
  289. *
  290. * @access public
  291. * @param string $requestType 请求类型
  292. * @return string
  293. */
  294. function PrivateStartSession($requestType = "GET")
  295. {
  296. if ($this->m_scheme == "https") {
  297. $this->m_port = "443";
  298. }
  299. if (function_exists('curl_init') && function_exists('curl_exec')) {
  300. $this->m_ch = curl_init();
  301. curl_setopt($this->m_ch, CURLOPT_URL, $this->m_scheme.'://'.$this->m_host.':'.$this->m_port.$this->m_path);
  302. curl_setopt($this->m_ch, CURLOPT_RETURNTRANSFER, 1);
  303. curl_setopt($this->m_ch, CURLOPT_FOLLOWLOCATION, 1);
  304. if ($requestType == "POST") {
  305. curl_setopt($this->m_ch, CURLOPT_POST, 1);
  306. //$content = is_array($post) ? http_build_query($post) : $post;
  307. //curl_setopt($this->m_ch, CURLOPT_POSTFIELDS, urldecode($content));
  308. }
  309. if (!empty($this->m_cookies)) {
  310. curl_setopt($this->m_ch, CURLOPT_COOKIE, $this->m_cookies);
  311. }
  312. if ($this->m_scheme == "https") {
  313. curl_setopt($this->m_ch, CURLOPT_SSL_VERIFYPEER, false);
  314. curl_setopt($this->m_ch, CURLOPT_SSL_VERIFYHOST, false);
  315. }
  316. $this->m_puthead = array();
  317. $this->m_puthead["Host"] = $this->m_host;
  318. //发送用户自定义的请求头
  319. if (!isset($this->m_puthead["Accept"])) {
  320. $this->m_puthead["Accept"] = "*/*";
  321. }
  322. if (!isset($this->m_puthead["User-Agent"])) {
  323. $this->m_puthead["User-Agent"] = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2)";
  324. }
  325. if (!isset($this->m_puthead["Refer"])) {
  326. $this->m_puthead["Refer"] = "http://".$this->m_puthead["Host"];
  327. }
  328. $headers = array();
  329. foreach ($this->m_puthead as $k => $v) {
  330. $k = trim($k);
  331. $v = trim($v);
  332. if ($k != "" && $v != "") {
  333. $headers[] = "$k: $v";
  334. }
  335. }
  336. if (count($headers) > 0) {
  337. curl_setopt($this->m_ch, CURLOPT_HTTPHEADER, $headers);
  338. }
  339. curl_setopt($this->m_ch, CURLOPT_CONNECTTIMEOUT, 20);
  340. curl_setopt($this->m_ch, CURLOPT_TIMEOUT, 900);
  341. $this->m_html = curl_exec($this->m_ch);
  342. $status = curl_getinfo($this->m_ch);
  343. if (count($status) > 0) {
  344. foreach ($status as $key => $value) {
  345. $key = str_replace("_", "-", $key);
  346. if ($key == "http-code") {
  347. $this->m_httphead["http-state"] = $value;
  348. }
  349. $this->m_httphead[$key] = $value;
  350. }
  351. }
  352. $this->m_error = curl_errno($this->m_ch);
  353. return TRUE;
  354. }
  355. if (!$this->PrivateOpenHost()) {
  356. $this->m_error .= "打开远程主机出错!";
  357. return FALSE;
  358. }
  359. $this->reTry++;
  360. if ($this->GetHead("http-edition") == "HTTP/1.1") {
  361. $httpv = "HTTP/1.1";
  362. } else {
  363. $httpv = "HTTP/1.0";
  364. }
  365. $ps = explode('?', $this->m_urlpath);
  366. $headString = '';
  367. //发送固定的起始请求头GET、Host信息
  368. if ($requestType == "GET") {
  369. $headString .= "GET ".$this->m_urlpath." $httpv\r\n";
  370. } else {
  371. $headString .= "POST ".$ps[0]." $httpv\r\n";
  372. }
  373. $this->m_puthead["Host"] = $this->m_host;
  374. //发送用户自定义的请求头
  375. if (!isset($this->m_puthead["Accept"])) {
  376. $this->m_puthead["Accept"] = "*/*";
  377. }
  378. if (!isset($this->m_puthead["User-Agent"])) {
  379. $this->m_puthead["User-Agent"] = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2)";
  380. }
  381. if (!isset($this->m_puthead["Refer"])) {
  382. $this->m_puthead["Refer"] = "http://".$this->m_puthead["Host"];
  383. }
  384. foreach ($this->m_puthead as $k => $v) {
  385. $k = trim($k);
  386. $v = trim($v);
  387. if ($k != "" && $v != "") {
  388. $headString .= "$k: $v\r\n";
  389. }
  390. }
  391. fputs($this->m_fp, $headString);
  392. if ($requestType == "POST") {
  393. $postdata = "";
  394. if (count($ps) > 1) {
  395. for ($i = 1; $i < count($ps); $i++) {
  396. $postdata .= $ps[$i];
  397. }
  398. } else {
  399. $postdata = "OK";
  400. }
  401. $plen = strlen($postdata);
  402. fputs($this->m_fp, "Content-Type: application/x-www-form-urlencoded\r\n");
  403. fputs($this->m_fp, "Content-Length: $plen\r\n");
  404. }
  405. //发送固定的结束请求头
  406. //HTTP1.1协议必须指定文档结束后关闭链接,否则读取文档时无法使用feof判断结束
  407. if ($httpv == "HTTP/1.1") {
  408. fputs($this->m_fp, "Connection: Close\r\n\r\n");
  409. } else {
  410. fputs($this->m_fp, "\r\n");
  411. }
  412. if ($requestType == "POST") {
  413. fputs($this->m_fp, $postdata);
  414. }
  415. //获取应答头状态信息
  416. $httpstas = explode(" ", fgets($this->m_fp, 256));
  417. $this->m_httphead["http-edition"] = trim($httpstas[0]);
  418. $this->m_httphead["http-state"] = trim($httpstas[1]);
  419. $this->m_httphead["http-describe"] = "";
  420. for ($i = 2; $i < count($httpstas); $i++) {
  421. $this->m_httphead["http-describe"] .= " ".trim($httpstas[$i]);
  422. }
  423. //获取详细应答头
  424. while (!feof($this->m_fp)) {
  425. $line = trim(fgets($this->m_fp, 256));
  426. if ($line == "") {
  427. break;
  428. }
  429. $hkey = "";
  430. $hvalue = "";
  431. $v = 0;
  432. for ($i = 0; $i < strlen($line); $i++) {
  433. if ($v == 1) {
  434. $hvalue .= $line[$i];
  435. }
  436. if ($line[$i] == ":") {
  437. $v = 1;
  438. }
  439. if ($v == 0) {
  440. $hkey .= $line[$i];
  441. }
  442. }
  443. $hkey = trim($hkey);
  444. if ($hkey != "") {
  445. $this->m_httphead[strtolower($hkey)] = trim($hvalue);
  446. }
  447. }
  448. //如果连接被不正常关闭,重试
  449. if (feof($this->m_fp)) {
  450. if ($this->reTry > 10) {
  451. return FALSE;
  452. }
  453. $this->PrivateStartSession($requestType);
  454. }
  455. //判断是否是3xx开头的应答
  456. if (preg_match("/^3/", $this->m_httphead["http-state"])) {
  457. if ($this->JumpCount > 3) {
  458. return;
  459. }
  460. if (isset($this->m_httphead["location"])) {
  461. $newurl = $this->m_httphead["location"];
  462. if (preg_match("/^http/i", $newurl)) {
  463. $this->JumpOpenUrl($newurl);
  464. } else {
  465. $newurl = $this->FillUrl($newurl);
  466. $this->JumpOpenUrl($newurl);
  467. }
  468. } else {
  469. $this->m_error = "无法识别的答复";
  470. }
  471. }
  472. }
  473. /**
  474. * 获得一个Http头的值
  475. *
  476. * @access public
  477. * @param string $headname 头文件名称
  478. * @return string
  479. */
  480. function GetHead($headname)
  481. {
  482. $headname = strtolower($headname);
  483. return isset($this->m_httphead[$headname]) ? $this->m_httphead[$headname] : '';
  484. }
  485. function SetCookie($cookie)
  486. {
  487. $this->m_cookies = $cookie;
  488. }
  489. /**
  490. * 设置Http头的值
  491. *
  492. * @access public
  493. * @param string $skey 键
  494. * @param string $svalue 值
  495. * @return string
  496. */
  497. function SetHead($skey, $svalue)
  498. {
  499. $this->m_puthead[$skey] = $svalue;
  500. }
  501. /**
  502. * 打开连接
  503. *
  504. * @access public
  505. * @return bool
  506. */
  507. function PrivateOpenHost()
  508. {
  509. if ($this->m_host == "") {
  510. return FALSE;
  511. }
  512. $errno = "";
  513. $errstr = "";
  514. $this->m_fp = @fsockopen($this->m_host, $this->m_port, $errno, $errstr, 10);
  515. if (!$this->m_fp) {
  516. $this->m_error = $errstr;
  517. return FALSE;
  518. } else {
  519. return TRUE;
  520. }
  521. }
  522. /**
  523. * 关闭连接
  524. *
  525. * @access public
  526. * @return void
  527. */
  528. function Close()
  529. {
  530. if (function_exists('curl_init') && function_exists('curl_exec') && $this->m_ch) {
  531. @curl_close($this->m_ch);
  532. }
  533. if ($this->m_fp) {
  534. @fclose($this->m_fp);
  535. }
  536. }
  537. /**
  538. * 补全相对网址
  539. *
  540. * @access public
  541. * @param string $surl 需要不全的地址
  542. * @return string
  543. */
  544. function FillUrl($surl)
  545. {
  546. $i = 0;
  547. $dstr = "";
  548. $pstr = "";
  549. $okurl = "";
  550. $pathStep = 0;
  551. $surl = trim($surl);
  552. if ($surl == "") {
  553. return "";
  554. }
  555. $pos = strpos($surl, "#");
  556. if ($pos > 0) {
  557. $surl = substr($surl, 0, $pos);
  558. }
  559. if ($surl[0] == "/") {
  560. $okurl = "http://".$this->HomeUrl.$surl;
  561. } else if ($surl[0] == ".") {
  562. if (strlen($surl) <= 1) {
  563. return "";
  564. } else if ($surl[1] == "/") {
  565. $okurl = "http://".$this->BaseUrlPath."/".substr($surl, 2, strlen($surl) - 2);
  566. } else {
  567. $urls = explode("/", $surl);
  568. foreach ($urls as $u) {
  569. if ($u == "..") {
  570. $pathStep++;
  571. } else if ($i < count($urls) - 1) {
  572. $dstr .= $urls[$i]."/";
  573. } else {
  574. $dstr .= $urls[$i];
  575. }
  576. $i++;
  577. }
  578. $urls = explode("/", $this->BaseUrlPath);
  579. if (count($urls) <= $pathStep) {
  580. return "";
  581. } else {
  582. $pstr = "http://";
  583. for ($i = 0; $i < count($urls) - $pathStep; $i++) {
  584. $pstr .= $urls[$i]."/";
  585. }
  586. $okurl = $pstr.$dstr;
  587. }
  588. }
  589. } else {
  590. if (strlen($surl) < 7) {
  591. $okurl = "http://".$this->BaseUrlPath."/".$surl;
  592. } else if (strtolower(substr($surl, 0, 7)) == "http://") {
  593. $okurl = $surl;
  594. } else {
  595. $okurl = "http://".$this->BaseUrlPath."/".$surl;
  596. }
  597. }
  598. $okurl = preg_replace("/^(http:\/\/)/i", "", $okurl);
  599. $okurl = preg_replace("/\/{1,}/", "/", $okurl);
  600. return "http://".$okurl;
  601. }
  602. }//End Class