国内流行的内容管理系统(CMS)多端全媒体解决方案 https://www.dedebiz.com
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

166 lines
4.1KB

  1. <?php
  2. if (!defined('DEDEINC')) exit ('dedebiz');
  3. /*
  4. * This file is part of Crawler Detect - the web crawler detection library.
  5. *
  6. * (c) Mark Beech <m@rkbee.ch>
  7. *
  8. * This source file is subject to the MIT license that is bundled
  9. * with this source code in the file LICENSE.
  10. */
  11. require_once(DEDEINC."/libraries/fixtures/crawlers.php");
  12. require_once(DEDEINC."/libraries/fixtures/exclusions.php");
  13. require_once(DEDEINC."/libraries/fixtures/headers.php");
  14. class CrawlerDetect
  15. {
  16. /**
  17. * The user agent.
  18. *
  19. * @var string|null
  20. */
  21. protected $userAgent;
  22. /**
  23. * Headers that contain a user agent.
  24. *
  25. * @var array
  26. */
  27. protected $httpHeaders = array();
  28. /**
  29. * Store regex matches.
  30. *
  31. * @var array
  32. */
  33. protected $matches = array();
  34. /**
  35. * Crawlers object.
  36. *
  37. * @var \Jaybizzle\CrawlerDetect\Fixtures\Crawlers
  38. */
  39. protected $crawlers;
  40. /**
  41. * Exclusions object.
  42. *
  43. * @var \Jaybizzle\CrawlerDetect\Fixtures\Exclusions
  44. */
  45. protected $exclusions;
  46. /**
  47. * Headers object.
  48. *
  49. * @var \Jaybizzle\CrawlerDetect\Fixtures\Headers
  50. */
  51. protected $uaHttpHeaders;
  52. /**
  53. * The compiled regex string.
  54. *
  55. * @var string
  56. */
  57. protected $compiledRegex;
  58. /**
  59. * The compiled exclusions regex string.
  60. *
  61. * @var string
  62. */
  63. protected $compiledExclusions;
  64. /**
  65. * Class constructor.
  66. */
  67. public function __construct(array $headers = null, $userAgent = null)
  68. {
  69. $this->crawlers = new Crawlers();
  70. $this->exclusions = new Exclusions();
  71. $this->uaHttpHeaders = new Headers();
  72. $this->compiledRegex = $this->compileRegex($this->crawlers->getAll());
  73. $this->compiledExclusions = $this->compileRegex($this->exclusions->getAll());
  74. $this->setHttpHeaders($headers);
  75. $this->setUserAgent($userAgent);
  76. }
  77. /**
  78. * Compile the regex patterns into one regex string.
  79. *
  80. * @param array
  81. *
  82. * @return string
  83. */
  84. public function compileRegex($patterns)
  85. {
  86. return '('.implode('|', $patterns).')';
  87. }
  88. /**
  89. * Set HTTP headers.
  90. *
  91. * @param array|null $httpHeaders
  92. */
  93. public function setHttpHeaders($httpHeaders)
  94. {
  95. //Use global _SERVER if $httpHeaders aren't defined.
  96. if (! is_array($httpHeaders) || ! count($httpHeaders)) {
  97. $httpHeaders = $_SERVER;
  98. }
  99. //Clear existing headers.
  100. $this->httpHeaders = array();
  101. //Only save HTTP headers. In PHP land, that means
  102. //only _SERVER vars that start with HTTP_.
  103. foreach ($httpHeaders as $key => $value) {
  104. if (strpos($key, 'HTTP_') === 0) {
  105. $this->httpHeaders[$key] = $value;
  106. }
  107. }
  108. }
  109. /**
  110. * Return user agent headers.
  111. *
  112. * @return array
  113. */
  114. public function getUaHttpHeaders()
  115. {
  116. return $this->uaHttpHeaders->getAll();
  117. }
  118. /**
  119. * Set the user agent.
  120. *
  121. * @param string|null $userAgent
  122. */
  123. public function setUserAgent($userAgent)
  124. {
  125. if (is_null($userAgent)) {
  126. foreach ($this->getUaHttpHeaders() as $altHeader) {
  127. if (isset($this->httpHeaders[$altHeader])) {
  128. $userAgent .= $this->httpHeaders[$altHeader].' ';
  129. }
  130. }
  131. }
  132. return $this->userAgent = $userAgent;
  133. }
  134. /**
  135. * Check user agent string against the regex.
  136. *
  137. * @param string|null $userAgent
  138. *
  139. * @return bool
  140. */
  141. public function isCrawler($userAgent = null)
  142. {
  143. $agent = trim(preg_replace(
  144. "/{$this->compiledExclusions}/i",
  145. '',
  146. $userAgent ?: $this->userAgent ?: ''
  147. ));
  148. if ($agent === '') {
  149. return false;
  150. }
  151. return (bool) preg_match("/{$this->compiledRegex}/i", $agent, $this->matches);
  152. }
  153. /**
  154. * Return the matches.
  155. *
  156. * @return string|null
  157. */
  158. public function getMatches()
  159. {
  160. return isset($this->matches[0]) ? $this->matches[0] : null;
  161. }
  162. }
  163. ?>