国内流行的内容管理系统(CMS)多端全媒体解决方案 https://www.dedebiz.com
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

125 lines
3.5KB

  1. <?php
  2. /**
  3. * 采集指定页面作为文章发布源
  4. *
  5. * @version $Id: inc_coonepage.php 1 10:32 2010年7月21日Z tianya $
  6. * @package DedeCMS.Administrator
  7. * @copyright Copyright (c) 2007 - 2018, DesDev, Inc.
  8. * @copyright Copyright (c) 2020, DedeBIZ.COM
  9. * @license https://www.dedebiz.com/license/v6
  10. * @link https://www.dedebiz.com
  11. */
  12. require_once(DEDEINC.'/charset.func.php');
  13. /**
  14. * 获取一个页面
  15. *
  16. * @access public
  17. * @param string $gurl 操作地址
  18. * @return string
  19. */
  20. function CoOnePage($gurl)
  21. {
  22. global $dsql,$cfg_auot_description, $cfg_soft_lang;
  23. $redatas = array('title' => '','body' => '','source' => '','writer' => '','description' => '','keywords' => '');
  24. $redatas['source'] = preg_replace("/http:\/\//i","",$gurl);
  25. $redatas['source'] = preg_replace("/\/(.*)$/i","",$redatas['source']);
  26. $row = $dsql->GetOne("SELECT * FROM `#@__co_onepage` WHERE url LIKE '".$redatas['source']."' ");
  27. $s = $e = '';
  28. if(is_array($row))
  29. {
  30. list($s,$e) = explode('{@body}',$row['rule']);
  31. $s = trim($s);
  32. $e = trim($e);
  33. if($row['issource']==1)
  34. {
  35. $redatas['source'] = $row['title'];
  36. }
  37. }
  38. $htd = new DedeHttpDown();
  39. $htd->OpenUrl($gurl);
  40. $body = $htd->GetHtml();
  41. if($body!='')
  42. {
  43. //编码自动转换
  44. if($cfg_soft_lang=='utf-8')
  45. {
  46. if($row['lang']=='gb2312')
  47. {
  48. $body = gb2utf8($body);
  49. }
  50. }
  51. else if($cfg_soft_lang=='gb2312')
  52. {
  53. if($row['lang']=='utf-8')
  54. {
  55. $body = utf82gb($body);
  56. }
  57. }
  58. //获取标题
  59. $inarr = array();
  60. preg_match("/<title>(.*)<\/title>/isU",$body,$inarr);
  61. if(isset($inarr[1]))
  62. {
  63. $redatas['title'] = $inarr[1];
  64. }
  65. //获取关键词
  66. $inarr = array();
  67. preg_match("/<meta[\s]+name=['\"]keywords['\"] content=['\"](.*)['\"]/isU",$body,$inarr);
  68. if(isset($inarr[1]))
  69. {
  70. $redatas['keywords'] = cn_substr(html2text($inarr[1]),30);
  71. }
  72. //获取摘要
  73. $inarr = array();
  74. preg_match("/<meta[\s]+name=['\"]description['\"] content=['\"](.*)['\"]/isU",$body,$inarr);
  75. if(isset($inarr[1]))
  76. {
  77. $redatas['description'] = cn_substr(html2text($inarr[1]),$cfg_auot_description);
  78. }
  79. //获取内容
  80. if($s!='' && $e!='')
  81. {
  82. $redatas['body'] = GetHtmlAreaA($s,$e,$body);
  83. if($redatas['body']!='' && $redatas['description']=='')
  84. {
  85. $redatas['description'] = cn_substr(html2text($redatas['body']),$GLOBALS['cfg_auot_description']);
  86. }
  87. }
  88. }
  89. return $redatas;
  90. }
  91. /**
  92. * 获取特定区域的HTML
  93. *
  94. * @access public
  95. * @param string $s 开始标识符
  96. * @param string $e 末尾标识符
  97. * @param string $html 文档信息
  98. * @return string
  99. */
  100. function GetHtmlAreaA($s, $e, &$html)
  101. {
  102. if($html==""||$s=="")
  103. {
  104. return "";
  105. }
  106. $posstart = @strpos($html,$s);
  107. if($posstart === FALSE)
  108. {
  109. return "";
  110. }
  111. $posend = strpos($html, $e, $posstart);
  112. if($posend > $posstart && $posend !== FALSE)
  113. {
  114. return substr($html, $posstart+strlen($s), $posend-$posstart-strlen($s));
  115. }else
  116. {
  117. return '';
  118. }
  119. }