国内流行的内容管理系统(CMS)多端全媒体解决方案 https://www.dedebiz.com
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

620 lines
18KB

  1. <?php if (!defined('DEDEINC')) exit("Request Error!");
  2. /**
  3. * 织梦HTTP下载类
  4. *
  5. * @version $Id: dedehttpdown.class.php 1 11:42 2010年7月6日Z tianya $
  6. * @package DedeCMS.Libraries
  7. * @copyright Copyright (c) 2007 - 2020, DesDev, Inc.
  8. * @license http://help.dedecms.com/usersguide/license.html
  9. * @link http://www.dedecms.com
  10. */
  11. @set_time_limit(0);
  12. class DedeHttpDown
  13. {
  14. var $m_ch = '';
  15. var $m_url = '';
  16. var $m_urlpath = '';
  17. var $m_scheme = 'http';
  18. var $m_host = '';
  19. var $m_port = '80';
  20. var $m_user = '';
  21. var $m_pass = '';
  22. var $m_path = '/';
  23. var $m_query = '';
  24. var $m_fp = '';
  25. var $m_error = '';
  26. var $m_httphead = array();
  27. var $m_html = '';
  28. var $m_puthead = array();
  29. var $m_cookies = '';
  30. var $BaseUrlPath = '';
  31. var $HomeUrl = '';
  32. var $reTry = 0;
  33. var $JumpCount = 0;
  34. /**
  35. * 初始化系统
  36. *
  37. * @access public
  38. * @param string $url 需要下载的地址
  39. * @return string
  40. */
  41. function PrivateInit($url)
  42. {
  43. if ($url == '') {
  44. return;
  45. }
  46. $urls = '';
  47. $urls = @parse_url($url);
  48. $this->m_url = $url;
  49. if (is_array($urls)) {
  50. $this->m_host = $urls["host"];
  51. if (!empty($urls["scheme"])) {
  52. $this->m_scheme = $urls["scheme"];
  53. }
  54. if (!empty($urls["user"])) {
  55. $this->m_user = $urls["user"];
  56. }
  57. if (!empty($urls["pass"])) {
  58. $this->m_pass = $urls["pass"];
  59. }
  60. if (!empty($urls["port"])) {
  61. $this->m_port = $urls["port"];
  62. }
  63. if (!empty($urls["path"])) {
  64. $this->m_path = $urls["path"];
  65. }
  66. $this->m_urlpath = $this->m_path;
  67. if (!empty($urls["query"])) {
  68. $this->m_query = $urls["query"];
  69. $this->m_urlpath .= "?" . $this->m_query;
  70. }
  71. $this->HomeUrl = $urls["host"];
  72. $this->BaseUrlPath = $this->HomeUrl . $urls["path"];
  73. $this->BaseUrlPath = preg_replace("/\/([^\/]*)\.(.*)$/", "/", $this->BaseUrlPath);
  74. $this->BaseUrlPath = preg_replace("/\/$/", "", $this->BaseUrlPath);
  75. }
  76. }
  77. /**
  78. * 重设各参数
  79. *
  80. * @access public
  81. * @return void
  82. */
  83. function ResetAny()
  84. {
  85. $this->m_ch = "";
  86. $this->m_url = "";
  87. $this->m_urlpath = "";
  88. $this->m_scheme = "http";
  89. $this->m_host = "";
  90. $this->m_port = "80";
  91. $this->m_user = "";
  92. $this->m_pass = "";
  93. $this->m_path = "/";
  94. $this->m_query = "";
  95. $this->m_cookies = "";
  96. $this->m_error = "";
  97. }
  98. /**
  99. * 打开指定网址
  100. *
  101. * @access public
  102. * @param string $url 地址
  103. * @param string $requestType 请求类型
  104. * @return string
  105. */
  106. function OpenUrl($url, $requestType = "GET")
  107. {
  108. $this->ResetAny();
  109. $this->JumpCount = 0;
  110. $this->m_httphead = array();
  111. $this->m_html = '';
  112. $this->reTry = 0;
  113. $this->Close();
  114. //初始化系统
  115. $this->PrivateInit($url);
  116. $this->PrivateStartSession($requestType);
  117. }
  118. /**
  119. * 转到303重定向网址
  120. *
  121. * @access public
  122. * @param string $url 地址
  123. * @return string
  124. */
  125. function JumpOpenUrl($url)
  126. {
  127. $this->ResetAny();
  128. $this->JumpCount++;
  129. $this->m_httphead = array();
  130. $this->m_html = "";
  131. $this->Close();
  132. //初始化系统
  133. $this->PrivateInit($url);
  134. $this->PrivateStartSession('GET');
  135. }
  136. /**
  137. * 获得某操作错误的原因
  138. *
  139. * @access public
  140. * @return void
  141. */
  142. function printError()
  143. {
  144. echo "错误信息:" . $this->m_error;
  145. echo "<br/>具体返回头:<br/>";
  146. foreach ($this->m_httphead as $k => $v) {
  147. echo "$k => $v <br/>\r\n";
  148. }
  149. }
  150. /**
  151. * 判别用Get方法发送的头的应答结果是否正确
  152. *
  153. * @access public
  154. * @return bool
  155. */
  156. function IsGetOK()
  157. {
  158. if (preg_match("/^2/", $this->GetHead("http-state"))) {
  159. return TRUE;
  160. } else {
  161. $this->m_error .= $this->GetHead("http-state") . " - " . $this->GetHead("http-describe") . "<br/>";
  162. return FALSE;
  163. }
  164. }
  165. /**
  166. * 看看返回的网页是否是text类型
  167. *
  168. * @access public
  169. * @return bool
  170. */
  171. function IsText()
  172. {
  173. if (preg_match("/^2/", $this->GetHead("http-state")) && preg_match("/text|xml/i", $this->GetHead("content-type"))) {
  174. return TRUE;
  175. } else {
  176. $this->m_error .= "内容为非文本类型或网址重定向<br/>";
  177. return FALSE;
  178. }
  179. }
  180. /**
  181. * 判断返回的网页是否是特定的类型
  182. *
  183. * @access public
  184. * @param string $ctype 内容类型
  185. * @return string
  186. */
  187. function IsContentType($ctype)
  188. {
  189. if (
  190. preg_match("/^2/", $this->GetHead("http-state"))
  191. && $this->GetHead("content-type") == strtolower($ctype)
  192. ) {
  193. return TRUE;
  194. } else {
  195. $this->m_error .= "类型不对 " . $this->GetHead("content-type") . "<br/>";
  196. return FALSE;
  197. }
  198. }
  199. /**
  200. * 用Http协议下载文件
  201. *
  202. * @access public
  203. * @param string $savefilename 保存文件名称
  204. * @return string
  205. */
  206. function SaveToBin($savefilename)
  207. {
  208. if (!$this->IsGetOK()) {
  209. return FALSE;
  210. }
  211. if (function_exists('curl_init') && function_exists('curl_exec')) {
  212. file_put_contents($savefilename, $this->m_html);
  213. return TRUE;
  214. }
  215. if (@feof($this->m_fp)) {
  216. $this->m_error = "连接已经关闭!";
  217. return FALSE;
  218. }
  219. $fp = fopen($savefilename, "w");
  220. while (!feof($this->m_fp)) {
  221. fwrite($fp, fread($this->m_fp, 1024));
  222. }
  223. fclose($this->m_fp);
  224. fclose($fp);
  225. return TRUE;
  226. }
  227. /**
  228. * 保存网页内容为Text文件
  229. *
  230. * @access public
  231. * @param string $savefilename 保存文件名称
  232. * @return string
  233. */
  234. function SaveToText($savefilename)
  235. {
  236. if ($this->IsText()) {
  237. $this->SaveBinFile($savefilename);
  238. } else {
  239. return "";
  240. }
  241. }
  242. function SaveBinFile($filename)
  243. {
  244. return $this->SaveBinFile($filename);
  245. }
  246. /**
  247. * 用Http协议获得一个网页的内容
  248. *
  249. * @access public
  250. * @return string
  251. */
  252. function GetHtml()
  253. {
  254. if ($this->m_html != '') {
  255. return $this->m_html;
  256. }
  257. if (!$this->IsText()) {
  258. return '';
  259. }
  260. if (!$this->m_fp || @feof($this->m_fp)) {
  261. return '';
  262. }
  263. while (!feof($this->m_fp)) {
  264. $this->m_html .= fgets($this->m_fp, 256);
  265. }
  266. @fclose($this->m_fp);
  267. return $this->m_html;
  268. }
  269. /**
  270. * 开始HTTP会话
  271. *
  272. * @access public
  273. * @param string $requestType 请求类型
  274. * @return string
  275. */
  276. function PrivateStartSession($requestType = "GET")
  277. {
  278. if ($this->m_scheme == "https") {
  279. $this->m_port = "443";
  280. }
  281. if (function_exists('curl_init') && function_exists('curl_exec')) {
  282. $this->m_ch = curl_init();
  283. curl_setopt($this->m_ch, CURLOPT_URL, $this->m_scheme . '://' . $this->m_host . ':' . $this->m_port . $this->m_path);
  284. curl_setopt($this->m_ch, CURLOPT_RETURNTRANSFER, 1);
  285. curl_setopt($this->m_ch, CURLOPT_FOLLOWLOCATION, 1);
  286. if ($requestType == "POST") {
  287. curl_setopt($this->m_ch, CURLOPT_POST, 1);
  288. // $content = is_array($post) ? http_build_query($post) : $post;
  289. // curl_setopt($this->m_ch, CURLOPT_POSTFIELDS, urldecode($content));
  290. }
  291. if (!empty($this->m_cookies)) {
  292. curl_setopt($this->m_ch, CURLOPT_COOKIE, $this->m_cookies);
  293. }
  294. if ($this->m_scheme == "https") {
  295. curl_setopt($this->m_ch, CURLOPT_SSL_VERIFYPEER, false);
  296. curl_setopt($this->m_ch, CURLOPT_SSL_VERIFYHOST, false);
  297. }
  298. $this->m_puthead = array();
  299. $this->m_puthead["Host"] = $this->m_host;
  300. //发送用户自定义的请求头
  301. if (!isset($this->m_puthead["Accept"])) {
  302. $this->m_puthead["Accept"] = "*/*";
  303. }
  304. if (!isset($this->m_puthead["User-Agent"])) {
  305. $this->m_puthead["User-Agent"] = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2)";
  306. }
  307. if (!isset($this->m_puthead["Refer"])) {
  308. $this->m_puthead["Refer"] = "http://" . $this->m_puthead["Host"];
  309. }
  310. $headers = array();
  311. foreach ($this->m_puthead as $k => $v) {
  312. $k = trim($k);
  313. $v = trim($v);
  314. if ($k != "" && $v != "") {
  315. $headers[] = "$k: $v";
  316. }
  317. }
  318. if (count($headers) > 0) {
  319. curl_setopt($this->m_ch, CURLOPT_HTTPHEADER, $headers);
  320. }
  321. curl_setopt($this->m_ch, CURLOPT_CONNECTTIMEOUT, 20);
  322. curl_setopt($this->m_ch, CURLOPT_TIMEOUT, 900);
  323. $this->m_html = curl_exec($this->m_ch);
  324. $status = curl_getinfo($this->m_ch);
  325. if (count($status) > 0) {
  326. foreach ($status as $key => $value) {
  327. $key = str_replace("_", "-", $key);
  328. if ($key == "http-code") {
  329. $this->m_httphead["http-state"] = $value;
  330. }
  331. $this->m_httphead[$key] = $value;
  332. }
  333. }
  334. $this->m_error = curl_errno($this->m_ch);
  335. return TRUE;
  336. }
  337. if (!$this->PrivateOpenHost()) {
  338. $this->m_error .= "打开远程主机出错!";
  339. return FALSE;
  340. }
  341. $this->reTry++;
  342. if ($this->GetHead("http-edition") == "HTTP/1.1") {
  343. $httpv = "HTTP/1.1";
  344. } else {
  345. $httpv = "HTTP/1.0";
  346. }
  347. $ps = explode('?', $this->m_urlpath);
  348. $headString = '';
  349. //发送固定的起始请求头GET、Host信息
  350. if ($requestType == "GET") {
  351. $headString .= "GET " . $this->m_urlpath . " $httpv\r\n";
  352. } else {
  353. $headString .= "POST " . $ps[0] . " $httpv\r\n";
  354. }
  355. $this->m_puthead["Host"] = $this->m_host;
  356. //发送用户自定义的请求头
  357. if (!isset($this->m_puthead["Accept"])) {
  358. $this->m_puthead["Accept"] = "*/*";
  359. }
  360. if (!isset($this->m_puthead["User-Agent"])) {
  361. $this->m_puthead["User-Agent"] = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2)";
  362. }
  363. if (!isset($this->m_puthead["Refer"])) {
  364. $this->m_puthead["Refer"] = "http://" . $this->m_puthead["Host"];
  365. }
  366. foreach ($this->m_puthead as $k => $v) {
  367. $k = trim($k);
  368. $v = trim($v);
  369. if ($k != "" && $v != "") {
  370. $headString .= "$k: $v\r\n";
  371. }
  372. }
  373. fputs($this->m_fp, $headString);
  374. if ($requestType == "POST") {
  375. $postdata = "";
  376. if (count($ps) > 1) {
  377. for ($i = 1; $i < count($ps); $i++) {
  378. $postdata .= $ps[$i];
  379. }
  380. } else {
  381. $postdata = "OK";
  382. }
  383. $plen = strlen($postdata);
  384. fputs($this->m_fp, "Content-Type: application/x-www-form-urlencoded\r\n");
  385. fputs($this->m_fp, "Content-Length: $plen\r\n");
  386. }
  387. //发送固定的结束请求头
  388. //HTTP1.1协议必须指定文档结束后关闭链接,否则读取文档时无法使用feof判断结束
  389. if ($httpv == "HTTP/1.1") {
  390. fputs($this->m_fp, "Connection: Close\r\n\r\n");
  391. } else {
  392. fputs($this->m_fp, "\r\n");
  393. }
  394. if ($requestType == "POST") {
  395. fputs($this->m_fp, $postdata);
  396. }
  397. //获取应答头状态信息
  398. $httpstas = explode(" ", fgets($this->m_fp, 256));
  399. $this->m_httphead["http-edition"] = trim($httpstas[0]);
  400. $this->m_httphead["http-state"] = trim($httpstas[1]);
  401. $this->m_httphead["http-describe"] = "";
  402. for ($i = 2; $i < count($httpstas); $i++) {
  403. $this->m_httphead["http-describe"] .= " " . trim($httpstas[$i]);
  404. }
  405. //获取详细应答头
  406. while (!feof($this->m_fp)) {
  407. $line = trim(fgets($this->m_fp, 256));
  408. if ($line == "") {
  409. break;
  410. }
  411. $hkey = "";
  412. $hvalue = "";
  413. $v = 0;
  414. for ($i = 0; $i < strlen($line); $i++) {
  415. if ($v == 1) {
  416. $hvalue .= $line[$i];
  417. }
  418. if ($line[$i] == ":") {
  419. $v = 1;
  420. }
  421. if ($v == 0) {
  422. $hkey .= $line[$i];
  423. }
  424. }
  425. $hkey = trim($hkey);
  426. if ($hkey != "") {
  427. $this->m_httphead[strtolower($hkey)] = trim($hvalue);
  428. }
  429. }
  430. //如果连接被不正常关闭,重试
  431. if (feof($this->m_fp)) {
  432. if ($this->reTry > 10) {
  433. return FALSE;
  434. }
  435. $this->PrivateStartSession($requestType);
  436. }
  437. //判断是否是3xx开头的应答
  438. if (preg_match("/^3/", $this->m_httphead["http-state"])) {
  439. if ($this->JumpCount > 3) {
  440. return;
  441. }
  442. if (isset($this->m_httphead["location"])) {
  443. $newurl = $this->m_httphead["location"];
  444. if (preg_match("/^http/i", $newurl)) {
  445. $this->JumpOpenUrl($newurl);
  446. } else {
  447. $newurl = $this->FillUrl($newurl);
  448. $this->JumpOpenUrl($newurl);
  449. }
  450. } else {
  451. $this->m_error = "无法识别的答复!";
  452. }
  453. }
  454. }
  455. /**
  456. * 获得一个Http头的值
  457. *
  458. * @access public
  459. * @param string $headname 头文件名称
  460. * @return string
  461. */
  462. function GetHead($headname)
  463. {
  464. $headname = strtolower($headname);
  465. return isset($this->m_httphead[$headname]) ? $this->m_httphead[$headname] : '';
  466. }
  467. function SetCookie($cookie)
  468. {
  469. $this->m_cookies = $cookie;
  470. }
  471. /**
  472. * 设置Http头的值
  473. *
  474. * @access public
  475. * @param string $skey 键
  476. * @param string $svalue 值
  477. * @return string
  478. */
  479. function SetHead($skey, $svalue)
  480. {
  481. $this->m_puthead[$skey] = $svalue;
  482. }
  483. /**
  484. * 打开连接
  485. *
  486. * @access public
  487. * @return bool
  488. */
  489. function PrivateOpenHost()
  490. {
  491. if ($this->m_host == "") {
  492. return FALSE;
  493. }
  494. $errno = "";
  495. $errstr = "";
  496. $this->m_fp = @fsockopen($this->m_host, $this->m_port, $errno, $errstr, 10);
  497. if (!$this->m_fp) {
  498. $this->m_error = $errstr;
  499. return FALSE;
  500. } else {
  501. return TRUE;
  502. }
  503. }
  504. /**
  505. * 关闭连接
  506. *
  507. * @access public
  508. * @return void
  509. */
  510. function Close()
  511. {
  512. if (function_exists('curl_init') && function_exists('curl_exec')) {
  513. @curl_close($this->m_ch);
  514. }
  515. @fclose($this->m_fp);
  516. }
  517. /**
  518. * 补全相对网址
  519. *
  520. * @access public
  521. * @param string $surl 需要不全的地址
  522. * @return string
  523. */
  524. function FillUrl($surl)
  525. {
  526. $i = 0;
  527. $dstr = "";
  528. $pstr = "";
  529. $okurl = "";
  530. $pathStep = 0;
  531. $surl = trim($surl);
  532. if ($surl == "") {
  533. return "";
  534. }
  535. $pos = strpos($surl, "#");
  536. if ($pos > 0) {
  537. $surl = substr($surl, 0, $pos);
  538. }
  539. if ($surl[0] == "/") {
  540. $okurl = "http://" . $this->HomeUrl . $surl;
  541. } else if ($surl[0] == ".") {
  542. if (strlen($surl) <= 1) {
  543. return "";
  544. } else if ($surl[1] == "/") {
  545. $okurl = "http://" . $this->BaseUrlPath . "/" . substr($surl, 2, strlen($surl) - 2);
  546. } else {
  547. $urls = explode("/", $surl);
  548. foreach ($urls as $u) {
  549. if ($u == "..") {
  550. $pathStep++;
  551. } else if ($i < count($urls) - 1) {
  552. $dstr .= $urls[$i] . "/";
  553. } else {
  554. $dstr .= $urls[$i];
  555. }
  556. $i++;
  557. }
  558. $urls = explode("/", $this->BaseUrlPath);
  559. if (count($urls) <= $pathStep) {
  560. return "";
  561. } else {
  562. $pstr = "http://";
  563. for ($i = 0; $i < count($urls) - $pathStep; $i++) {
  564. $pstr .= $urls[$i] . "/";
  565. }
  566. $okurl = $pstr . $dstr;
  567. }
  568. }
  569. } else {
  570. if (strlen($surl) < 7) {
  571. $okurl = "http://" . $this->BaseUrlPath . "/" . $surl;
  572. } else if (strtolower(substr($surl, 0, 7)) == "http://") {
  573. $okurl = $surl;
  574. } else {
  575. $okurl = "http://" . $this->BaseUrlPath . "/" . $surl;
  576. }
  577. }
  578. $okurl = preg_replace("/^(http:\/\/)/i", "", $okurl);
  579. $okurl = preg_replace("/\/{1,}/", "/", $okurl);
  580. return "http://" . $okurl;
  581. }
  582. }//End Class