国内流行的内容管理系统(CMS)多端全媒体解决方案 https://www.dedebiz.com
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

124 lines
3.5KB

  1. <?php
  2. /**
  3. * 采集指定页面作为文章发布源
  4. *
  5. * @version $Id: inc_coonepage.php 1 10:32 2010年7月21日Z tianya $
  6. * @package DedeCMS.Administrator
  7. * @copyright Copyright (c) 2007 - 2019, DesDev, Inc.
  8. * @license http://help.dedecms.com/usersguide/license.html
  9. * @link http://www.dedecms.com
  10. */
  11. require_once(DEDEINC.'/charset.func.php');
  12. /**
  13. * 获取一个页面
  14. *
  15. * @access public
  16. * @param string $gurl 操作地址
  17. * @return string
  18. */
  19. function CoOnePage($gurl)
  20. {
  21. global $dsql,$cfg_auot_description, $cfg_soft_lang;
  22. $redatas = array('title' => '','body' => '','source' => '','writer' => '','description' => '','keywords' => '');
  23. $redatas['source'] = preg_replace("/http:\/\//i","",$gurl);
  24. $redatas['source'] = preg_replace("/\/(.*)$/i","",$redatas['source']);
  25. $row = $dsql->GetOne("SELECT * FROM `#@__co_onepage` WHERE url LIKE '".$redatas['source']."' ");
  26. $s = $e = '';
  27. if(is_array($row))
  28. {
  29. list($s,$e) = explode('{@body}',$row['rule']);
  30. $s = trim($s);
  31. $e = trim($e);
  32. if($row['issource']==1)
  33. {
  34. $redatas['source'] = $row['title'];
  35. }
  36. }
  37. $htd = new DedeHttpDown();
  38. $htd->OpenUrl($gurl);
  39. $body = $htd->GetHtml();
  40. if($body!='')
  41. {
  42. //编码自动转换
  43. if($cfg_soft_lang=='utf-8')
  44. {
  45. if($row['lang']=='gb2312')
  46. {
  47. $body = gb2utf8($body);
  48. }
  49. }
  50. else if($cfg_soft_lang=='gb2312')
  51. {
  52. if($row['lang']=='utf-8')
  53. {
  54. $body = utf82gb($body);
  55. }
  56. }
  57. //获取标题
  58. $inarr = array();
  59. preg_match("/<title>(.*)<\/title>/isU",$body,$inarr);
  60. if(isset($inarr[1]))
  61. {
  62. $redatas['title'] = $inarr[1];
  63. }
  64. //获取关键词
  65. $inarr = array();
  66. preg_match("/<meta[\s]+name=['\"]keywords['\"] content=['\"](.*)['\"]/isU",$body,$inarr);
  67. if(isset($inarr[1]))
  68. {
  69. $redatas['keywords'] = cn_substr(html2text($inarr[1]),30);
  70. }
  71. //获取摘要
  72. $inarr = array();
  73. preg_match("/<meta[\s]+name=['\"]description['\"] content=['\"](.*)['\"]/isU",$body,$inarr);
  74. if(isset($inarr[1]))
  75. {
  76. $redatas['description'] = cn_substr(html2text($inarr[1]),$cfg_auot_description);
  77. }
  78. //获取内容
  79. if($s!='' && $e!='')
  80. {
  81. $redatas['body'] = GetHtmlAreaA($s,$e,$body);
  82. if($redatas['body']!='' && $redatas['description']=='')
  83. {
  84. $redatas['description'] = cn_substr(html2text($redatas['body']),$GLOBALS['cfg_auot_description']);
  85. }
  86. }
  87. }
  88. return $redatas;
  89. }
  90. /**
  91. * 获取特定区域的HTML
  92. *
  93. * @access public
  94. * @param string $s 开始标识符
  95. * @param string $e 末尾标识符
  96. * @param string $html 文档信息
  97. * @return string
  98. */
  99. function GetHtmlAreaA($s, $e, &$html)
  100. {
  101. if($html==""||$s=="")
  102. {
  103. return "";
  104. }
  105. $posstart = @strpos($html,$s);
  106. if($posstart === FALSE)
  107. {
  108. return "";
  109. }
  110. $posend = strpos($html, $e, $posstart);
  111. if($posend > $posstart && $posend !== FALSE)
  112. {
  113. return substr($html, $posstart+strlen($s), $posend-$posstart-strlen($s));
  114. }else
  115. {
  116. return '';
  117. }
  118. }