国内流行的内容管理系统(CMS)多端全媒体解决方案 https://www.dedebiz.com
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

102 lines
3.3KB

  1. <?php
  2. /**
  3. * 采集指定页面作为文档发布源
  4. *
  5. * @version $id:inc_coonepage.php 10:32 2010年7月21日 tianya $
  6. * @package DedeBIZ.Administrator
  7. * @copyright Copyright (c) 2022 DedeBIZ.COM
  8. * @license https://www.dedebiz.com/license
  9. * @link https://www.dedebiz.com
  10. */
  11. require_once(DEDEINC.'/charset.func.php');
  12. /**
  13. * 获取一个页面
  14. *
  15. * @access public
  16. * @param string $gurl 操作地址
  17. * @return string
  18. */
  19. function CoOnePage($gurl)
  20. {
  21. global $dsql, $cfg_auot_description, $cfg_soft_lang;
  22. $redatas = array('title' => '', 'body' => '', 'source' => '', 'writer' => '', 'description' => '', 'keywords' => '');
  23. $redatas['source'] = preg_replace("/(http|https):\/\//i", "", $gurl);
  24. $redatas['source'] = preg_replace("/\/(.*)$/i", "", $redatas['source']);
  25. $row = $dsql->GetOne("SELECT * FROM `#@__co_onepage` WHERE url LIKE '".$redatas['source']."' ");
  26. $s = $e = '';
  27. if (is_array($row)) {
  28. list($s, $e) = explode('{@body}', $row['rule']);
  29. $s = trim($s);
  30. $e = trim($e);
  31. if ($row['issource'] == 1) {
  32. $redatas['source'] = $row['title'];
  33. }
  34. }
  35. $htd = new DedeHttpDown();
  36. $htd->OpenUrl($gurl);
  37. $body = $htd->GetHtml();
  38. if ($body != '') {
  39. //编码自动转换
  40. if ($cfg_soft_lang == 'utf-8') {
  41. if ($row['lang'] == 'gb2312') {
  42. $body = gb2utf8($body);
  43. }
  44. } else if ($cfg_soft_lang == 'gb2312') {
  45. if ($row['lang'] == 'utf-8') {
  46. $body = utf82gb($body);
  47. }
  48. }
  49. //获取标题
  50. $inarr = array();
  51. preg_match("/<title>(.*)<\/title>/isU", $body, $inarr);
  52. if (isset($inarr[1])) {
  53. $redatas['title'] = $inarr[1];
  54. }
  55. //获取关键词
  56. $inarr = array();
  57. preg_match("/<meta[\s]+name=['\"]keywords['\"] content=['\"](.*)['\"]/isU", $body, $inarr);
  58. if (isset($inarr[1])) {
  59. $redatas['keywords'] = cn_substr(html2text($inarr[1]), 30);
  60. }
  61. //获取摘要
  62. $inarr = array();
  63. preg_match("/<meta[\s]+name=['\"]description['\"] content=['\"](.*)['\"]/isU", $body, $inarr);
  64. if (isset($inarr[1])) {
  65. $redatas['description'] = cn_substr(html2text($inarr[1]), $cfg_auot_description);
  66. }
  67. //获取内容
  68. if ($s != '' && $e != '') {
  69. $redatas['body'] = GetHtmlAreaA($s, $e, $body);
  70. if ($redatas['body'] != '' && $redatas['description'] == '') {
  71. $redatas['description'] = cn_substr(html2text($redatas['body']), $GLOBALS['cfg_auot_description']);
  72. }
  73. }
  74. }
  75. return $redatas;
  76. }
  77. /**
  78. * 获取特定区域的HTML
  79. *
  80. * @access public
  81. * @param string $s 开始标识符
  82. * @param string $e 末尾标识符
  83. * @param string $html 文档信息
  84. * @return string
  85. */
  86. function GetHtmlAreaA($s, $e, &$html)
  87. {
  88. if ($html == "" || $s == "") {
  89. return "";
  90. }
  91. $posstart = @strpos($html, $s);
  92. if ($posstart === FALSE) {
  93. return "";
  94. }
  95. $posend = strpos($html, $e, $posstart);
  96. if ($posend > $posstart && $posend !== FALSE) {
  97. return substr($html, $posstart + strlen($s), $posend - $posstart - strlen($s));
  98. } else {
  99. return '';
  100. }
  101. }
  102. ?>