mirror of
https://git.friendi.ca/friendica/friendica-addons.git
synced 2025-07-12 11:28:49 +00:00
Rename botdetection to blockbot
Adding composer/vendor to blockbot
This commit is contained in:
parent
f1839f23e6
commit
34fc60be77
27 changed files with 3768 additions and 7 deletions
193
blockbot/vendor/jaybizzle/crawler-detect/src/CrawlerDetect.php
vendored
Normal file
193
blockbot/vendor/jaybizzle/crawler-detect/src/CrawlerDetect.php
vendored
Normal file
|
@ -0,0 +1,193 @@
|
|||
<?php
|
||||
|
||||
/*
|
||||
* This file is part of Crawler Detect - the web crawler detection library.
|
||||
*
|
||||
* (c) Mark Beech <m@rkbee.ch>
|
||||
*
|
||||
* This source file is subject to the MIT license that is bundled
|
||||
* with this source code in the file LICENSE.
|
||||
*/
|
||||
|
||||
namespace Jaybizzle\CrawlerDetect;
|
||||
|
||||
use Jaybizzle\CrawlerDetect\Fixtures\Crawlers;
|
||||
use Jaybizzle\CrawlerDetect\Fixtures\Exclusions;
|
||||
use Jaybizzle\CrawlerDetect\Fixtures\Headers;
|
||||
|
||||
class CrawlerDetect
|
||||
{
|
||||
/**
|
||||
* The user agent.
|
||||
*
|
||||
* @var null
|
||||
*/
|
||||
protected $userAgent = null;
|
||||
|
||||
/**
|
||||
* Headers that contain a user agent.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
protected $httpHeaders = array();
|
||||
|
||||
/**
|
||||
* Store regex matches.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
protected $matches = array();
|
||||
|
||||
/**
|
||||
* Crawlers object.
|
||||
*
|
||||
* @var \Jaybizzle\CrawlerDetect\Fixtures\Crawlers
|
||||
*/
|
||||
protected $crawlers;
|
||||
|
||||
/**
|
||||
* Exclusions object.
|
||||
*
|
||||
* @var \Jaybizzle\CrawlerDetect\Fixtures\Exclusions
|
||||
*/
|
||||
protected $exclusions;
|
||||
|
||||
/**
|
||||
* Headers object.
|
||||
*
|
||||
* @var \Jaybizzle\CrawlerDetect\Fixtures\Headers
|
||||
*/
|
||||
protected $uaHttpHeaders;
|
||||
|
||||
/**
|
||||
* The compiled regex string.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $compiledRegex;
|
||||
|
||||
/**
|
||||
* The compiled exclusions regex string.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $compiledExclusions;
|
||||
|
||||
/**
|
||||
* Class constructor.
|
||||
*/
|
||||
public function __construct(array $headers = null, $userAgent = null)
|
||||
{
|
||||
$this->crawlers = new Crawlers();
|
||||
$this->exclusions = new Exclusions();
|
||||
$this->uaHttpHeaders = new Headers();
|
||||
|
||||
$this->compiledRegex = $this->compileRegex($this->crawlers->getAll());
|
||||
$this->compiledExclusions = $this->compileRegex($this->exclusions->getAll());
|
||||
|
||||
$this->setHttpHeaders($headers);
|
||||
$this->setUserAgent($userAgent);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compile the regex patterns into one regex string.
|
||||
*
|
||||
* @param array
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function compileRegex($patterns)
|
||||
{
|
||||
return '('.implode('|', $patterns).')';
|
||||
}
|
||||
|
||||
/**
|
||||
* Set HTTP headers.
|
||||
*
|
||||
* @param array|null $httpHeaders
|
||||
*/
|
||||
public function setHttpHeaders($httpHeaders)
|
||||
{
|
||||
// Use global _SERVER if $httpHeaders aren't defined.
|
||||
if (! is_array($httpHeaders) || ! count($httpHeaders)) {
|
||||
$httpHeaders = $_SERVER;
|
||||
}
|
||||
|
||||
// Clear existing headers.
|
||||
$this->httpHeaders = array();
|
||||
|
||||
// Only save HTTP headers. In PHP land, that means
|
||||
// only _SERVER vars that start with HTTP_.
|
||||
foreach ($httpHeaders as $key => $value) {
|
||||
if (strpos($key, 'HTTP_') === 0) {
|
||||
$this->httpHeaders[$key] = $value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return user agent headers.
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function getUaHttpHeaders()
|
||||
{
|
||||
return $this->uaHttpHeaders->getAll();
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the user agent.
|
||||
*
|
||||
* @param string $userAgent
|
||||
*/
|
||||
public function setUserAgent($userAgent)
|
||||
{
|
||||
if (is_null($userAgent)) {
|
||||
foreach ($this->getUaHttpHeaders() as $altHeader) {
|
||||
if (isset($this->httpHeaders[$altHeader])) {
|
||||
$userAgent .= $this->httpHeaders[$altHeader].' ';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $this->userAgent = $userAgent;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check user agent string against the regex.
|
||||
*
|
||||
* @param string|null $userAgent
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function isCrawler($userAgent = null)
|
||||
{
|
||||
$agent = trim(preg_replace(
|
||||
"/{$this->compiledExclusions}/i",
|
||||
'',
|
||||
$userAgent ?: $this->userAgent
|
||||
));
|
||||
|
||||
if ($agent == '') {
|
||||
return false;
|
||||
}
|
||||
|
||||
$result = preg_match("/{$this->compiledRegex}/i", $agent, $matches);
|
||||
|
||||
if ($matches) {
|
||||
$this->matches = $matches;
|
||||
}
|
||||
|
||||
return (bool) $result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the matches.
|
||||
*
|
||||
* @return string|null
|
||||
*/
|
||||
public function getMatches()
|
||||
{
|
||||
return isset($this->matches[0]) ? $this->matches[0] : null;
|
||||
}
|
||||
}
|
32
blockbot/vendor/jaybizzle/crawler-detect/src/Fixtures/AbstractProvider.php
vendored
Normal file
32
blockbot/vendor/jaybizzle/crawler-detect/src/Fixtures/AbstractProvider.php
vendored
Normal file
|
@ -0,0 +1,32 @@
|
|||
<?php
|
||||
|
||||
/*
|
||||
* This file is part of Crawler Detect - the web crawler detection library.
|
||||
*
|
||||
* (c) Mark Beech <m@rkbee.ch>
|
||||
*
|
||||
* This source file is subject to the MIT license that is bundled
|
||||
* with this source code in the file LICENSE.
|
||||
*/
|
||||
|
||||
namespace Jaybizzle\CrawlerDetect\Fixtures;
|
||||
|
||||
abstract class AbstractProvider
|
||||
{
|
||||
/**
|
||||
* The data set.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
protected $data;
|
||||
|
||||
/**
|
||||
* Return the data set.
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function getAll()
|
||||
{
|
||||
return $this->data;
|
||||
}
|
||||
}
|
1240
blockbot/vendor/jaybizzle/crawler-detect/src/Fixtures/Crawlers.php
vendored
Normal file
1240
blockbot/vendor/jaybizzle/crawler-detect/src/Fixtures/Crawlers.php
vendored
Normal file
File diff suppressed because it is too large
Load diff
72
blockbot/vendor/jaybizzle/crawler-detect/src/Fixtures/Exclusions.php
vendored
Normal file
72
blockbot/vendor/jaybizzle/crawler-detect/src/Fixtures/Exclusions.php
vendored
Normal file
|
@ -0,0 +1,72 @@
|
|||
<?php
|
||||
|
||||
/*
|
||||
* This file is part of Crawler Detect - the web crawler detection library.
|
||||
*
|
||||
* (c) Mark Beech <m@rkbee.ch>
|
||||
*
|
||||
* This source file is subject to the MIT license that is bundled
|
||||
* with this source code in the file LICENSE.
|
||||
*/
|
||||
|
||||
namespace Jaybizzle\CrawlerDetect\Fixtures;
|
||||
|
||||
class Exclusions extends AbstractProvider
|
||||
{
|
||||
/**
|
||||
* List of strings to remove from the user agent before running the crawler regex
|
||||
* Over a large list of user agents, this gives us about a 55% speed increase!
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
protected $data = array(
|
||||
'Safari.[\d\.]*',
|
||||
'Firefox.[\d\.]*',
|
||||
' Chrome.[\d\.]*',
|
||||
'Chromium.[\d\.]*',
|
||||
'MSIE.[\d\.]',
|
||||
'Opera\/[\d\.]*',
|
||||
'Mozilla.[\d\.]*',
|
||||
'AppleWebKit.[\d\.]*',
|
||||
'Trident.[\d\.]*',
|
||||
'Windows NT.[\d\.]*',
|
||||
'Android [\d\.]*',
|
||||
'Macintosh.',
|
||||
'Ubuntu',
|
||||
'Linux',
|
||||
'[ ]Intel',
|
||||
'Mac OS X [\d_]*',
|
||||
'(like )?Gecko(.[\d\.]*)?',
|
||||
'KHTML,',
|
||||
'CriOS.[\d\.]*',
|
||||
'CPU iPhone OS ([0-9_])* like Mac OS X',
|
||||
'CPU OS ([0-9_])* like Mac OS X',
|
||||
'iPod',
|
||||
'compatible',
|
||||
'x86_..',
|
||||
'i686',
|
||||
'x64',
|
||||
'X11',
|
||||
'rv:[\d\.]*',
|
||||
'Version.[\d\.]*',
|
||||
'WOW64',
|
||||
'Win64',
|
||||
'Dalvik.[\d\.]*',
|
||||
' \.NET CLR [\d\.]*',
|
||||
'Presto.[\d\.]*',
|
||||
'Media Center PC',
|
||||
'BlackBerry',
|
||||
'Build',
|
||||
'Opera Mini\/\d{1,2}\.\d{1,2}\.[\d\.]*\/\d{1,2}\.',
|
||||
'Opera',
|
||||
' \.NET[\d\.]*',
|
||||
'cubot',
|
||||
'; M bot',
|
||||
'; CRONO',
|
||||
'; B bot',
|
||||
'; IDbot',
|
||||
'; ID bot',
|
||||
'; POWER BOT',
|
||||
';', // Remove the following characters ;
|
||||
);
|
||||
}
|
37
blockbot/vendor/jaybizzle/crawler-detect/src/Fixtures/Headers.php
vendored
Normal file
37
blockbot/vendor/jaybizzle/crawler-detect/src/Fixtures/Headers.php
vendored
Normal file
|
@ -0,0 +1,37 @@
|
|||
<?php
|
||||
|
||||
/*
|
||||
* This file is part of Crawler Detect - the web crawler detection library.
|
||||
*
|
||||
* (c) Mark Beech <m@rkbee.ch>
|
||||
*
|
||||
* This source file is subject to the MIT license that is bundled
|
||||
* with this source code in the file LICENSE.
|
||||
*/
|
||||
|
||||
namespace Jaybizzle\CrawlerDetect\Fixtures;
|
||||
|
||||
class Headers extends AbstractProvider
|
||||
{
|
||||
/**
|
||||
* All possible HTTP headers that represent the user agent string.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
protected $data = array(
|
||||
// The default User-Agent string.
|
||||
'HTTP_USER_AGENT',
|
||||
// Header can occur on devices using Opera Mini.
|
||||
'HTTP_X_OPERAMINI_PHONE_UA',
|
||||
// Vodafone specific header: http://www.seoprinciple.com/mobile-web-community-still-angry-at-vodafone/24/
|
||||
'HTTP_X_DEVICE_USER_AGENT',
|
||||
'HTTP_X_ORIGINAL_USER_AGENT',
|
||||
'HTTP_X_SKYFIRE_PHONE',
|
||||
'HTTP_X_BOLT_PHONE_UA',
|
||||
'HTTP_DEVICE_STOCK_UA',
|
||||
'HTTP_X_UCBROWSER_DEVICE_UA',
|
||||
// Sometimes, bots (especially Google) use a genuine user agent, but fill this header in with their email address
|
||||
'HTTP_FROM',
|
||||
'HTTP_X_SCANNER', // Seen in use by Netsparker
|
||||
);
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue