国内流行的内容管理系统(CMS)多端全媒体解决方案 https://www.dedebiz.com
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

641 lines
18KB

  1. <?php if (!defined('DEDEINC')) exit("Request Error!");
  2. /**
  3. * 织梦HTTP下载类
  4. *
  5. * @version $Id: dedehttpdown.class.php 1 11:42 2010年7月6日Z tianya $
  6. * @package DedeCMS.Libraries
  7. * @copyright Copyright (c) 2020, DedeBIZ.COM
  8. * @license https://www.dedebiz.com/license
  9. * @link https://www.dedebiz.com
  10. */
  11. @set_time_limit(0);
  12. class DedeHttpDown
  13. {
  14. var $m_ch = '';
  15. var $m_url = '';
  16. var $m_urlpath = '';
  17. var $m_scheme = 'http';
  18. var $m_host = '';
  19. var $m_port = '80';
  20. var $m_user = '';
  21. var $m_pass = '';
  22. var $m_path = '/';
  23. var $m_query = '';
  24. var $m_fp = '';
  25. var $m_error = '';
  26. var $m_httphead = array();
  27. var $m_html = '';
  28. var $m_puthead = array();
  29. var $m_cookies = '';
  30. var $BaseUrlPath = '';
  31. var $HomeUrl = '';
  32. var $reTry = 0;
  33. var $JumpCount = 0;
  34. /**
  35. * 初始化系统
  36. *
  37. * @access public
  38. * @param string $url 需要下载的地址
  39. * @return string
  40. */
  41. function PrivateInit($url)
  42. {
  43. if ($url == '') {
  44. return;
  45. }
  46. $urls = '';
  47. $urls = @parse_url($url);
  48. $this->m_url = $url;
  49. if (is_array($urls)) {
  50. $this->m_host = $urls["host"];
  51. if (!empty($urls["scheme"])) {
  52. $this->m_scheme = $urls["scheme"];
  53. }
  54. if (!empty($urls["user"])) {
  55. $this->m_user = $urls["user"];
  56. }
  57. if (!empty($urls["pass"])) {
  58. $this->m_pass = $urls["pass"];
  59. }
  60. if (!empty($urls["port"])) {
  61. $this->m_port = $urls["port"];
  62. }
  63. if (!empty($urls["path"])) {
  64. $this->m_path = $urls["path"];
  65. }
  66. $this->m_urlpath = $this->m_path;
  67. if (!empty($urls["query"])) {
  68. $this->m_query = $urls["query"];
  69. $this->m_urlpath .= "?" . $this->m_query;
  70. }
  71. $this->HomeUrl = $urls["host"];
  72. $this->BaseUrlPath = $this->HomeUrl . $urls["path"];
  73. $this->BaseUrlPath = preg_replace("/\/([^\/]*)\.(.*)$/", "/", $this->BaseUrlPath);
  74. $this->BaseUrlPath = preg_replace("/\/$/", "", $this->BaseUrlPath);
  75. }
  76. }
  77. /**
  78. * 重设各参数
  79. *
  80. * @access public
  81. * @return void
  82. */
  83. function ResetAny()
  84. {
  85. $this->m_ch = "";
  86. $this->m_url = "";
  87. $this->m_urlpath = "";
  88. $this->m_scheme = "http";
  89. $this->m_host = "";
  90. $this->m_port = "80";
  91. $this->m_user = "";
  92. $this->m_pass = "";
  93. $this->m_path = "/";
  94. $this->m_query = "";
  95. $this->m_cookies = "";
  96. $this->m_error = "";
  97. }
  98. /**
  99. * 打开指定网址
  100. *
  101. * @access public
  102. * @param string $url 地址
  103. * @param string $requestType 请求类型
  104. * @return string
  105. */
  106. function OpenUrl($url, $requestType = "GET")
  107. {
  108. $this->ResetAny();
  109. $this->JumpCount = 0;
  110. $this->m_httphead = array();
  111. $this->m_html = '';
  112. $this->reTry = 0;
  113. $this->Close();
  114. //初始化系统
  115. $this->PrivateInit($url);
  116. $this->PrivateStartSession($requestType);
  117. }
  118. /**
  119. * 转到303重定向网址
  120. *
  121. * @access public
  122. * @param string $url 地址
  123. * @return string
  124. */
  125. function JumpOpenUrl($url)
  126. {
  127. $this->ResetAny();
  128. $this->JumpCount++;
  129. $this->m_httphead = array();
  130. $this->m_html = "";
  131. $this->Close();
  132. //初始化系统
  133. $this->PrivateInit($url);
  134. $this->PrivateStartSession('GET');
  135. }
  136. /**
  137. * 获得某操作错误的原因
  138. *
  139. * @access public
  140. * @return void
  141. */
  142. function printError()
  143. {
  144. echo "错误信息:" . $this->m_error;
  145. echo "<br/>具体返回头:<br/>";
  146. foreach ($this->m_httphead as $k => $v) {
  147. echo "$k => $v <br/>\r\n";
  148. }
  149. }
  150. /**
  151. * 判别用Get方法发送的头的应答结果是否正确
  152. *
  153. * @access public
  154. * @return bool
  155. */
  156. function IsGetOK()
  157. {
  158. if (preg_match("/^2/", $this->GetHead("http-state"))) {
  159. return TRUE;
  160. } else {
  161. $this->m_error .= $this->GetHead("http-state") . " - " . $this->GetHead("http-describe") . "<br/>";
  162. return FALSE;
  163. }
  164. }
  165. /**
  166. * 看看返回的网页是否是text类型
  167. *
  168. * @access public
  169. * @return bool
  170. */
  171. function IsText()
  172. {
  173. if (preg_match("/^2/", $this->GetHead("http-state")) && preg_match("/text|xml/i", $this->GetHead("content-type"))) {
  174. return TRUE;
  175. } else {
  176. $this->m_error .= "内容为非文本类型或网址重定向<br/>";
  177. return FALSE;
  178. }
  179. }
  180. /**
  181. * 判断返回的网页是否是特定的类型
  182. *
  183. * @access public
  184. * @param string $ctype 内容类型
  185. * @return string
  186. */
  187. function IsContentType($ctype)
  188. {
  189. if (
  190. preg_match("/^2/", $this->GetHead("http-state"))
  191. && $this->GetHead("content-type") == strtolower($ctype)
  192. ) {
  193. return TRUE;
  194. } else {
  195. $this->m_error .= "类型不对 " . $this->GetHead("content-type") . "<br/>";
  196. return FALSE;
  197. }
  198. }
  199. /**
  200. * 用Http协议下载文件
  201. *
  202. * @access public
  203. * @param string $savefilename 保存文件名称
  204. * @return string
  205. */
  206. function SaveToBin($savefilename)
  207. {
  208. if (!$this->IsGetOK()) {
  209. return FALSE;
  210. }
  211. if (function_exists('curl_init') && function_exists('curl_exec')) {
  212. file_put_contents($savefilename, $this->m_html);
  213. return TRUE;
  214. }
  215. if (@feof($this->m_fp)) {
  216. $this->m_error = "连接已经关闭!";
  217. return FALSE;
  218. }
  219. $fp = fopen($savefilename, "w");
  220. while (!feof($this->m_fp)) {
  221. fwrite($fp, fread($this->m_fp, 1024));
  222. }
  223. fclose($this->m_fp);
  224. fclose($fp);
  225. return TRUE;
  226. }
  227. /**
  228. * 保存网页内容为Text文件
  229. *
  230. * @access public
  231. * @param string $savefilename 保存文件名称
  232. * @return string
  233. */
  234. function SaveToText($savefilename)
  235. {
  236. if ($this->IsText()) {
  237. $this->SaveBinFile($savefilename);
  238. } else {
  239. return "";
  240. }
  241. }
  242. function SaveBinFile($filename)
  243. {
  244. return $this->SaveBinFile($filename);
  245. }
  246. /**
  247. * 用Http协议获得一个网页的内容
  248. *
  249. * @access public
  250. * @return string
  251. */
  252. function GetHtml()
  253. {
  254. if ($this->m_html != '') {
  255. return $this->m_html;
  256. }
  257. if (!$this->IsText()) {
  258. return '';
  259. }
  260. if (!$this->m_fp || @feof($this->m_fp)) {
  261. return '';
  262. }
  263. while (!feof($this->m_fp)) {
  264. $this->m_html .= fgets($this->m_fp, 256);
  265. }
  266. @fclose($this->m_fp);
  267. return $this->m_html;
  268. }
  269. function GetJSON()
  270. {
  271. if ($this->m_html != '') {
  272. return json_decode($this->m_html);
  273. }
  274. if (!$this->IsText()) {
  275. return '';
  276. }
  277. if (!$this->m_fp || @feof($this->m_fp)) {
  278. return '';
  279. }
  280. while (!feof($this->m_fp)) {
  281. $this->m_html .= fgets($this->m_fp, 256);
  282. }
  283. @fclose($this->m_fp);
  284. return json_decode($this->m_html);
  285. }
  286. /**
  287. * 开始HTTP会话
  288. *
  289. * @access public
  290. * @param string $requestType 请求类型
  291. * @return string
  292. */
  293. function PrivateStartSession($requestType = "GET")
  294. {
  295. if ($this->m_scheme == "https") {
  296. $this->m_port = "443";
  297. }
  298. if (function_exists('curl_init') && function_exists('curl_exec')) {
  299. $this->m_ch = curl_init();
  300. curl_setopt($this->m_ch, CURLOPT_URL, $this->m_scheme . '://' . $this->m_host . ':' . $this->m_port . $this->m_path);
  301. curl_setopt($this->m_ch, CURLOPT_RETURNTRANSFER, 1);
  302. curl_setopt($this->m_ch, CURLOPT_FOLLOWLOCATION, 1);
  303. if ($requestType == "POST") {
  304. curl_setopt($this->m_ch, CURLOPT_POST, 1);
  305. // $content = is_array($post) ? http_build_query($post) : $post;
  306. // curl_setopt($this->m_ch, CURLOPT_POSTFIELDS, urldecode($content));
  307. }
  308. if (!empty($this->m_cookies)) {
  309. curl_setopt($this->m_ch, CURLOPT_COOKIE, $this->m_cookies);
  310. }
  311. if ($this->m_scheme == "https") {
  312. curl_setopt($this->m_ch, CURLOPT_SSL_VERIFYPEER, false);
  313. curl_setopt($this->m_ch, CURLOPT_SSL_VERIFYHOST, false);
  314. }
  315. $this->m_puthead = array();
  316. $this->m_puthead["Host"] = $this->m_host;
  317. //发送用户自定义的请求头
  318. if (!isset($this->m_puthead["Accept"])) {
  319. $this->m_puthead["Accept"] = "*/*";
  320. }
  321. if (!isset($this->m_puthead["User-Agent"])) {
  322. $this->m_puthead["User-Agent"] = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2)";
  323. }
  324. if (!isset($this->m_puthead["Refer"])) {
  325. $this->m_puthead["Refer"] = "http://" . $this->m_puthead["Host"];
  326. }
  327. $headers = array();
  328. foreach ($this->m_puthead as $k => $v) {
  329. $k = trim($k);
  330. $v = trim($v);
  331. if ($k != "" && $v != "") {
  332. $headers[] = "$k: $v";
  333. }
  334. }
  335. if (count($headers) > 0) {
  336. curl_setopt($this->m_ch, CURLOPT_HTTPHEADER, $headers);
  337. }
  338. curl_setopt($this->m_ch, CURLOPT_CONNECTTIMEOUT, 20);
  339. curl_setopt($this->m_ch, CURLOPT_TIMEOUT, 900);
  340. $this->m_html = curl_exec($this->m_ch);
  341. $status = curl_getinfo($this->m_ch);
  342. if (count($status) > 0) {
  343. foreach ($status as $key => $value) {
  344. $key = str_replace("_", "-", $key);
  345. if ($key == "http-code") {
  346. $this->m_httphead["http-state"] = $value;
  347. }
  348. $this->m_httphead[$key] = $value;
  349. }
  350. }
  351. $this->m_error = curl_errno($this->m_ch);
  352. return TRUE;
  353. }
  354. if (!$this->PrivateOpenHost()) {
  355. $this->m_error .= "打开远程主机出错!";
  356. return FALSE;
  357. }
  358. $this->reTry++;
  359. if ($this->GetHead("http-edition") == "HTTP/1.1") {
  360. $httpv = "HTTP/1.1";
  361. } else {
  362. $httpv = "HTTP/1.0";
  363. }
  364. $ps = explode('?', $this->m_urlpath);
  365. $headString = '';
  366. //发送固定的起始请求头GET、Host信息
  367. if ($requestType == "GET") {
  368. $headString .= "GET " . $this->m_urlpath . " $httpv\r\n";
  369. } else {
  370. $headString .= "POST " . $ps[0] . " $httpv\r\n";
  371. }
  372. $this->m_puthead["Host"] = $this->m_host;
  373. //发送用户自定义的请求头
  374. if (!isset($this->m_puthead["Accept"])) {
  375. $this->m_puthead["Accept"] = "*/*";
  376. }
  377. if (!isset($this->m_puthead["User-Agent"])) {
  378. $this->m_puthead["User-Agent"] = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2)";
  379. }
  380. if (!isset($this->m_puthead["Refer"])) {
  381. $this->m_puthead["Refer"] = "http://" . $this->m_puthead["Host"];
  382. }
  383. foreach ($this->m_puthead as $k => $v) {
  384. $k = trim($k);
  385. $v = trim($v);
  386. if ($k != "" && $v != "") {
  387. $headString .= "$k: $v\r\n";
  388. }
  389. }
  390. fputs($this->m_fp, $headString);
  391. if ($requestType == "POST") {
  392. $postdata = "";
  393. if (count($ps) > 1) {
  394. for ($i = 1; $i < count($ps); $i++) {
  395. $postdata .= $ps[$i];
  396. }
  397. } else {
  398. $postdata = "OK";
  399. }
  400. $plen = strlen($postdata);
  401. fputs($this->m_fp, "Content-Type: application/x-www-form-urlencoded\r\n");
  402. fputs($this->m_fp, "Content-Length: $plen\r\n");
  403. }
  404. //发送固定的结束请求头
  405. //HTTP1.1协议必须指定文档结束后关闭链接,否则读取文档时无法使用feof判断结束
  406. if ($httpv == "HTTP/1.1") {
  407. fputs($this->m_fp, "Connection: Close\r\n\r\n");
  408. } else {
  409. fputs($this->m_fp, "\r\n");
  410. }
  411. if ($requestType == "POST") {
  412. fputs($this->m_fp, $postdata);
  413. }
  414. //获取应答头状态信息
  415. $httpstas = explode(" ", fgets($this->m_fp, 256));
  416. $this->m_httphead["http-edition"] = trim($httpstas[0]);
  417. $this->m_httphead["http-state"] = trim($httpstas[1]);
  418. $this->m_httphead["http-describe"] = "";
  419. for ($i = 2; $i < count($httpstas); $i++) {
  420. $this->m_httphead["http-describe"] .= " " . trim($httpstas[$i]);
  421. }
  422. //获取详细应答头
  423. while (!feof($this->m_fp)) {
  424. $line = trim(fgets($this->m_fp, 256));
  425. if ($line == "") {
  426. break;
  427. }
  428. $hkey = "";
  429. $hvalue = "";
  430. $v = 0;
  431. for ($i = 0; $i < strlen($line); $i++) {
  432. if ($v == 1) {
  433. $hvalue .= $line[$i];
  434. }
  435. if ($line[$i] == ":") {
  436. $v = 1;
  437. }
  438. if ($v == 0) {
  439. $hkey .= $line[$i];
  440. }
  441. }
  442. $hkey = trim($hkey);
  443. if ($hkey != "") {
  444. $this->m_httphead[strtolower($hkey)] = trim($hvalue);
  445. }
  446. }
  447. //如果连接被不正常关闭,重试
  448. if (feof($this->m_fp)) {
  449. if ($this->reTry > 10) {
  450. return FALSE;
  451. }
  452. $this->PrivateStartSession($requestType);
  453. }
  454. //判断是否是3xx开头的应答
  455. if (preg_match("/^3/", $this->m_httphead["http-state"])) {
  456. if ($this->JumpCount > 3) {
  457. return;
  458. }
  459. if (isset($this->m_httphead["location"])) {
  460. $newurl = $this->m_httphead["location"];
  461. if (preg_match("/^http/i", $newurl)) {
  462. $this->JumpOpenUrl($newurl);
  463. } else {
  464. $newurl = $this->FillUrl($newurl);
  465. $this->JumpOpenUrl($newurl);
  466. }
  467. } else {
  468. $this->m_error = "无法识别的答复!";
  469. }
  470. }
  471. }
  472. /**
  473. * 获得一个Http头的值
  474. *
  475. * @access public
  476. * @param string $headname 头文件名称
  477. * @return string
  478. */
  479. function GetHead($headname)
  480. {
  481. $headname = strtolower($headname);
  482. return isset($this->m_httphead[$headname]) ? $this->m_httphead[$headname] : '';
  483. }
  484. function SetCookie($cookie)
  485. {
  486. $this->m_cookies = $cookie;
  487. }
  488. /**
  489. * 设置Http头的值
  490. *
  491. * @access public
  492. * @param string $skey 键
  493. * @param string $svalue 值
  494. * @return string
  495. */
  496. function SetHead($skey, $svalue)
  497. {
  498. $this->m_puthead[$skey] = $svalue;
  499. }
  500. /**
  501. * 打开连接
  502. *
  503. * @access public
  504. * @return bool
  505. */
  506. function PrivateOpenHost()
  507. {
  508. if ($this->m_host == "") {
  509. return FALSE;
  510. }
  511. $errno = "";
  512. $errstr = "";
  513. $this->m_fp = @fsockopen($this->m_host, $this->m_port, $errno, $errstr, 10);
  514. if (!$this->m_fp) {
  515. $this->m_error = $errstr;
  516. return FALSE;
  517. } else {
  518. return TRUE;
  519. }
  520. }
  521. /**
  522. * 关闭连接
  523. *
  524. * @access public
  525. * @return void
  526. */
  527. function Close()
  528. {
  529. if (function_exists('curl_init') && function_exists('curl_exec') && $this->m_ch) {
  530. @curl_close($this->m_ch);
  531. }
  532. if ($this->m_fp) {
  533. @fclose($this->m_fp);
  534. }
  535. }
  536. /**
  537. * 补全相对网址
  538. *
  539. * @access public
  540. * @param string $surl 需要不全的地址
  541. * @return string
  542. */
  543. function FillUrl($surl)
  544. {
  545. $i = 0;
  546. $dstr = "";
  547. $pstr = "";
  548. $okurl = "";
  549. $pathStep = 0;
  550. $surl = trim($surl);
  551. if ($surl == "") {
  552. return "";
  553. }
  554. $pos = strpos($surl, "#");
  555. if ($pos > 0) {
  556. $surl = substr($surl, 0, $pos);
  557. }
  558. if ($surl[0] == "/") {
  559. $okurl = "http://" . $this->HomeUrl . $surl;
  560. } else if ($surl[0] == ".") {
  561. if (strlen($surl) <= 1) {
  562. return "";
  563. } else if ($surl[1] == "/") {
  564. $okurl = "http://" . $this->BaseUrlPath . "/" . substr($surl, 2, strlen($surl) - 2);
  565. } else {
  566. $urls = explode("/", $surl);
  567. foreach ($urls as $u) {
  568. if ($u == "..") {
  569. $pathStep++;
  570. } else if ($i < count($urls) - 1) {
  571. $dstr .= $urls[$i] . "/";
  572. } else {
  573. $dstr .= $urls[$i];
  574. }
  575. $i++;
  576. }
  577. $urls = explode("/", $this->BaseUrlPath);
  578. if (count($urls) <= $pathStep) {
  579. return "";
  580. } else {
  581. $pstr = "http://";
  582. for ($i = 0; $i < count($urls) - $pathStep; $i++) {
  583. $pstr .= $urls[$i] . "/";
  584. }
  585. $okurl = $pstr . $dstr;
  586. }
  587. }
  588. } else {
  589. if (strlen($surl) < 7) {
  590. $okurl = "http://" . $this->BaseUrlPath . "/" . $surl;
  591. } else if (strtolower(substr($surl, 0, 7)) == "http://") {
  592. $okurl = $surl;
  593. } else {
  594. $okurl = "http://" . $this->BaseUrlPath . "/" . $surl;
  595. }
  596. }
  597. $okurl = preg_replace("/^(http:\/\/)/i", "", $okurl);
  598. $okurl = preg_replace("/\/{1,}/", "/", $okurl);
  599. return "http://" . $okurl;
  600. }
  601. }//End Class