Merge pull request #846 from nupplaphil/features/6948-bot_detection
New Addon Bot detectionpull/847/head
commit
cf741fe3e9
|
@ -0,0 +1,32 @@
|
|||
<?php
|
||||
/**
|
||||
* Name: blockbot
|
||||
* Description: Blocking bots based on detecting bots/crawlers/spiders via the user agent and http_from header.
|
||||
* Version: 0.1
|
||||
* Author: Philipp Holzer <admin@philipp.info>
|
||||
*
|
||||
*/
|
||||
|
||||
use Friendica\App;
|
||||
use Friendica\Core\Hook;
|
||||
use Friendica\Core\System;
|
||||
use Jaybizzle\CrawlerDetect\CrawlerDetect;
|
||||
|
||||
require_once __DIR__ . DIRECTORY_SEPARATOR . 'vendor' . DIRECTORY_SEPARATOR . 'autoload.php';
|
||||
|
||||
function blockbot_install() {
|
||||
Hook::register('init_1', __FILE__, 'blockbot_init_1');
|
||||
}
|
||||
|
||||
|
||||
function blockbot_uninstall() {
|
||||
Hook::unregister('init_1', __FILE__, 'blockbot_init_1');
|
||||
}
|
||||
|
||||
function blockbot_init_1(App $a) {
|
||||
$crawlerDetect = new CrawlerDetect();
|
||||
|
||||
if ($crawlerDetect->isCrawler()) {
|
||||
System::httpExit(403, 'Bots are not allowed');
|
||||
}
|
||||
}
|
|
@ -0,0 +1,24 @@
|
|||
{
|
||||
"name": "friendica-addons/blockbot",
|
||||
"description": "Blocking bots based on detecting bots/crawlers/spiders via the user agent and http_from header.",
|
||||
"type": "friendica-addon",
|
||||
"authors": [
|
||||
{
|
||||
"name": "Philipp Holzer",
|
||||
"email": "admin@philipp.info",
|
||||
"homepage": "https://friendica.philipp.info/profile/nupplaphil",
|
||||
"role": "Developer"
|
||||
}
|
||||
],
|
||||
"require": {
|
||||
"php": ">=5.6.0",
|
||||
"jaybizzle/crawler-detect": "1.*"
|
||||
},
|
||||
"license": "3-clause BSD license",
|
||||
"minimum-stability": "stable",
|
||||
"config": {
|
||||
"optimize-autoloader": true,
|
||||
"autoloader-suffix": "BlockBotAddon",
|
||||
"preferred-install": "dist"
|
||||
}
|
||||
}
|
|
@ -0,0 +1,69 @@
|
|||
{
|
||||
"_readme": [
|
||||
"This file locks the dependencies of your project to a known state",
|
||||
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
|
||||
"This file is @generated automatically"
|
||||
],
|
||||
"content-hash": "814fd867d00e99f84d12304e8e244aae",
|
||||
"packages": [
|
||||
{
|
||||
"name": "jaybizzle/crawler-detect",
|
||||
"version": "v1.2.80",
|
||||
"source": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/JayBizzle/Crawler-Detect.git",
|
||||
"reference": "af6a36e6d69670df3f0a3ed8e21d4b8cc67a7847"
|
||||
},
|
||||
"dist": {
|
||||
"type": "zip",
|
||||
"url": "https://api.github.com/repos/JayBizzle/Crawler-Detect/zipball/af6a36e6d69670df3f0a3ed8e21d4b8cc67a7847",
|
||||
"reference": "af6a36e6d69670df3f0a3ed8e21d4b8cc67a7847",
|
||||
"shasum": ""
|
||||
},
|
||||
"require": {
|
||||
"php": ">=5.3.0"
|
||||
},
|
||||
"require-dev": {
|
||||
"phpunit/phpunit": "^4.8|^5.5|^6.5",
|
||||
"satooshi/php-coveralls": "1.*"
|
||||
},
|
||||
"type": "library",
|
||||
"autoload": {
|
||||
"psr-4": {
|
||||
"Jaybizzle\\CrawlerDetect\\": "src/"
|
||||
}
|
||||
},
|
||||
"notification-url": "https://packagist.org/downloads/",
|
||||
"license": [
|
||||
"MIT"
|
||||
],
|
||||
"authors": [
|
||||
{
|
||||
"name": "Mark Beech",
|
||||
"email": "m@rkbee.ch",
|
||||
"role": "Developer"
|
||||
}
|
||||
],
|
||||
"description": "CrawlerDetect is a PHP class for detecting bots/crawlers/spiders via the user agent",
|
||||
"homepage": "https://github.com/JayBizzle/Crawler-Detect/",
|
||||
"keywords": [
|
||||
"crawler",
|
||||
"crawler detect",
|
||||
"crawler detector",
|
||||
"crawlerdetect",
|
||||
"php crawler detect"
|
||||
],
|
||||
"time": "2019-04-05T19:52:02+00:00"
|
||||
}
|
||||
],
|
||||
"packages-dev": [],
|
||||
"aliases": [],
|
||||
"minimum-stability": "stable",
|
||||
"stability-flags": [],
|
||||
"prefer-stable": false,
|
||||
"prefer-lowest": false,
|
||||
"platform": {
|
||||
"php": ">=5.6.0"
|
||||
},
|
||||
"platform-dev": []
|
||||
}
|
|
@ -0,0 +1,7 @@
|
|||
<?php
|
||||
|
||||
// autoload.php @generated by Composer
|
||||
|
||||
require_once __DIR__ . '/composer/autoload_real.php';
|
||||
|
||||
return ComposerAutoloaderInitBlockBotAddon::getLoader();
|
|
@ -0,0 +1,445 @@
|
|||
<?php
|
||||
|
||||
/*
|
||||
* This file is part of Composer.
|
||||
*
|
||||
* (c) Nils Adermann <naderman@naderman.de>
|
||||
* Jordi Boggiano <j.boggiano@seld.be>
|
||||
*
|
||||
* For the full copyright and license information, please view the LICENSE
|
||||
* file that was distributed with this source code.
|
||||
*/
|
||||
|
||||
namespace Composer\Autoload;
|
||||
|
||||
/**
|
||||
* ClassLoader implements a PSR-0, PSR-4 and classmap class loader.
|
||||
*
|
||||
* $loader = new \Composer\Autoload\ClassLoader();
|
||||
*
|
||||
* // register classes with namespaces
|
||||
* $loader->add('Symfony\Component', __DIR__.'/component');
|
||||
* $loader->add('Symfony', __DIR__.'/framework');
|
||||
*
|
||||
* // activate the autoloader
|
||||
* $loader->register();
|
||||
*
|
||||
* // to enable searching the include path (eg. for PEAR packages)
|
||||
* $loader->setUseIncludePath(true);
|
||||
*
|
||||
* In this example, if you try to use a class in the Symfony\Component
|
||||
* namespace or one of its children (Symfony\Component\Console for instance),
|
||||
* the autoloader will first look for the class under the component/
|
||||
* directory, and it will then fallback to the framework/ directory if not
|
||||
* found before giving up.
|
||||
*
|
||||
* This class is loosely based on the Symfony UniversalClassLoader.
|
||||
*
|
||||
* @author Fabien Potencier <fabien@symfony.com>
|
||||
* @author Jordi Boggiano <j.boggiano@seld.be>
|
||||
* @see http://www.php-fig.org/psr/psr-0/
|
||||
* @see http://www.php-fig.org/psr/psr-4/
|
||||
*/
|
||||
class ClassLoader
|
||||
{
|
||||
// PSR-4
|
||||
private $prefixLengthsPsr4 = array();
|
||||
private $prefixDirsPsr4 = array();
|
||||
private $fallbackDirsPsr4 = array();
|
||||
|
||||
// PSR-0
|
||||
private $prefixesPsr0 = array();
|
||||
private $fallbackDirsPsr0 = array();
|
||||
|
||||
private $useIncludePath = false;
|
||||
private $classMap = array();
|
||||
private $classMapAuthoritative = false;
|
||||
private $missingClasses = array();
|
||||
private $apcuPrefix;
|
||||
|
||||
public function getPrefixes()
|
||||
{
|
||||
if (!empty($this->prefixesPsr0)) {
|
||||
return call_user_func_array('array_merge', $this->prefixesPsr0);
|
||||
}
|
||||
|
||||
return array();
|
||||
}
|
||||
|
||||
public function getPrefixesPsr4()
|
||||
{
|
||||
return $this->prefixDirsPsr4;
|
||||
}
|
||||
|
||||
public function getFallbackDirs()
|
||||
{
|
||||
return $this->fallbackDirsPsr0;
|
||||
}
|
||||
|
||||
public function getFallbackDirsPsr4()
|
||||
{
|
||||
return $this->fallbackDirsPsr4;
|
||||
}
|
||||
|
||||
public function getClassMap()
|
||||
{
|
||||
return $this->classMap;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $classMap Class to filename map
|
||||
*/
|
||||
public function addClassMap(array $classMap)
|
||||
{
|
||||
if ($this->classMap) {
|
||||
$this->classMap = array_merge($this->classMap, $classMap);
|
||||
} else {
|
||||
$this->classMap = $classMap;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Registers a set of PSR-0 directories for a given prefix, either
|
||||
* appending or prepending to the ones previously set for this prefix.
|
||||
*
|
||||
* @param string $prefix The prefix
|
||||
* @param array|string $paths The PSR-0 root directories
|
||||
* @param bool $prepend Whether to prepend the directories
|
||||
*/
|
||||
public function add($prefix, $paths, $prepend = false)
|
||||
{
|
||||
if (!$prefix) {
|
||||
if ($prepend) {
|
||||
$this->fallbackDirsPsr0 = array_merge(
|
||||
(array) $paths,
|
||||
$this->fallbackDirsPsr0
|
||||
);
|
||||
} else {
|
||||
$this->fallbackDirsPsr0 = array_merge(
|
||||
$this->fallbackDirsPsr0,
|
||||
(array) $paths
|
||||
);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
$first = $prefix[0];
|
||||
if (!isset($this->prefixesPsr0[$first][$prefix])) {
|
||||
$this->prefixesPsr0[$first][$prefix] = (array) $paths;
|
||||
|
||||
return;
|
||||
}
|
||||
if ($prepend) {
|
||||
$this->prefixesPsr0[$first][$prefix] = array_merge(
|
||||
(array) $paths,
|
||||
$this->prefixesPsr0[$first][$prefix]
|
||||
);
|
||||
} else {
|
||||
$this->prefixesPsr0[$first][$prefix] = array_merge(
|
||||
$this->prefixesPsr0[$first][$prefix],
|
||||
(array) $paths
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Registers a set of PSR-4 directories for a given namespace, either
|
||||
* appending or prepending to the ones previously set for this namespace.
|
||||
*
|
||||
* @param string $prefix The prefix/namespace, with trailing '\\'
|
||||
* @param array|string $paths The PSR-4 base directories
|
||||
* @param bool $prepend Whether to prepend the directories
|
||||
*
|
||||
* @throws \InvalidArgumentException
|
||||
*/
|
||||
public function addPsr4($prefix, $paths, $prepend = false)
|
||||
{
|
||||
if (!$prefix) {
|
||||
// Register directories for the root namespace.
|
||||
if ($prepend) {
|
||||
$this->fallbackDirsPsr4 = array_merge(
|
||||
(array) $paths,
|
||||
$this->fallbackDirsPsr4
|
||||
);
|
||||
} else {
|
||||
$this->fallbackDirsPsr4 = array_merge(
|
||||
$this->fallbackDirsPsr4,
|
||||
(array) $paths
|
||||
);
|
||||
}
|
||||
} elseif (!isset($this->prefixDirsPsr4[$prefix])) {
|
||||
// Register directories for a new namespace.
|
||||
$length = strlen($prefix);
|
||||
if ('\\' !== $prefix[$length - 1]) {
|
||||
throw new \InvalidArgumentException("A non-empty PSR-4 prefix must end with a namespace separator.");
|
||||
}
|
||||
$this->prefixLengthsPsr4[$prefix[0]][$prefix] = $length;
|
||||
$this->prefixDirsPsr4[$prefix] = (array) $paths;
|
||||
} elseif ($prepend) {
|
||||
// Prepend directories for an already registered namespace.
|
||||
$this->prefixDirsPsr4[$prefix] = array_merge(
|
||||
(array) $paths,
|
||||
$this->prefixDirsPsr4[$prefix]
|
||||
);
|
||||
} else {
|
||||
// Append directories for an already registered namespace.
|
||||
$this->prefixDirsPsr4[$prefix] = array_merge(
|
||||
$this->prefixDirsPsr4[$prefix],
|
||||
(array) $paths
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Registers a set of PSR-0 directories for a given prefix,
|
||||
* replacing any others previously set for this prefix.
|
||||
*
|
||||
* @param string $prefix The prefix
|
||||
* @param array|string $paths The PSR-0 base directories
|
||||
*/
|
||||
public function set($prefix, $paths)
|
||||
{
|
||||
if (!$prefix) {
|
||||
$this->fallbackDirsPsr0 = (array) $paths;
|
||||
} else {
|
||||
$this->prefixesPsr0[$prefix[0]][$prefix] = (array) $paths;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Registers a set of PSR-4 directories for a given namespace,
|
||||
* replacing any others previously set for this namespace.
|
||||
*
|
||||
* @param string $prefix The prefix/namespace, with trailing '\\'
|
||||
* @param array|string $paths The PSR-4 base directories
|
||||
*
|
||||
* @throws \InvalidArgumentException
|
||||
*/
|
||||
public function setPsr4($prefix, $paths)
|
||||
{
|
||||
if (!$prefix) {
|
||||
$this->fallbackDirsPsr4 = (array) $paths;
|
||||
} else {
|
||||
$length = strlen($prefix);
|
||||
if ('\\' !== $prefix[$length - 1]) {
|
||||
throw new \InvalidArgumentException("A non-empty PSR-4 prefix must end with a namespace separator.");
|
||||
}
|
||||
$this->prefixLengthsPsr4[$prefix[0]][$prefix] = $length;
|
||||
$this->prefixDirsPsr4[$prefix] = (array) $paths;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Turns on searching the include path for class files.
|
||||
*
|
||||
* @param bool $useIncludePath
|
||||
*/
|
||||
public function setUseIncludePath($useIncludePath)
|
||||
{
|
||||
$this->useIncludePath = $useIncludePath;
|
||||
}
|
||||
|
||||
/**
|
||||
* Can be used to check if the autoloader uses the include path to check
|
||||
* for classes.
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function getUseIncludePath()
|
||||
{
|
||||
return $this->useIncludePath;
|
||||
}
|
||||
|
||||
/**
|
||||
* Turns off searching the prefix and fallback directories for classes
|
||||
* that have not been registered with the class map.
|
||||
*
|
||||
* @param bool $classMapAuthoritative
|
||||
*/
|
||||
public function setClassMapAuthoritative($classMapAuthoritative)
|
||||
{
|
||||
$this->classMapAuthoritative = $classMapAuthoritative;
|
||||
}
|
||||
|
||||
/**
|
||||
* Should class lookup fail if not found in the current class map?
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function isClassMapAuthoritative()
|
||||
{
|
||||
return $this->classMapAuthoritative;
|
||||
}
|
||||
|
||||
/**
|
||||
* APCu prefix to use to cache found/not-found classes, if the extension is enabled.
|
||||
*
|
||||
* @param string|null $apcuPrefix
|
||||
*/
|
||||
public function setApcuPrefix($apcuPrefix)
|
||||
{
|
||||
$this->apcuPrefix = function_exists('apcu_fetch') && ini_get('apc.enabled') ? $apcuPrefix : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* The APCu prefix in use, or null if APCu caching is not enabled.
|
||||
*
|
||||
* @return string|null
|
||||
*/
|
||||
public function getApcuPrefix()
|
||||
{
|
||||
return $this->apcuPrefix;
|
||||
}
|
||||
|
||||
/**
|
||||
* Registers this instance as an autoloader.
|
||||
*
|
||||
* @param bool $prepend Whether to prepend the autoloader or not
|
||||
*/
|
||||
public function register($prepend = false)
|
||||
{
|
||||
spl_autoload_register(array($this, 'loadClass'), true, $prepend);
|
||||
}
|
||||
|
||||
/**
|
||||
* Unregisters this instance as an autoloader.
|
||||
*/
|
||||
public function unregister()
|
||||
{
|
||||
spl_autoload_unregister(array($this, 'loadClass'));
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads the given class or interface.
|
||||
*
|
||||
* @param string $class The name of the class
|
||||
* @return bool|null True if loaded, null otherwise
|
||||
*/
|
||||
public function loadClass($class)
|
||||
{
|
||||
if ($file = $this->findFile($class)) {
|
||||
includeFile($file);
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the path to the file where the class is defined.
|
||||
*
|
||||
* @param string $class The name of the class
|
||||
*
|
||||
* @return string|false The path if found, false otherwise
|
||||
*/
|
||||
public function findFile($class)
|
||||
{
|
||||
// class map lookup
|
||||
if (isset($this->classMap[$class])) {
|
||||
return $this->classMap[$class];
|
||||
}
|
||||
if ($this->classMapAuthoritative || isset($this->missingClasses[$class])) {
|
||||
return false;
|
||||
}
|
||||
if (null !== $this->apcuPrefix) {
|
||||
$file = apcu_fetch($this->apcuPrefix.$class, $hit);
|
||||
if ($hit) {
|
||||
return $file;
|
||||
}
|
||||
}
|
||||
|
||||
$file = $this->findFileWithExtension($class, '.php');
|
||||
|
||||
// Search for Hack files if we are running on HHVM
|
||||
if (false === $file && defined('HHVM_VERSION')) {
|
||||
$file = $this->findFileWithExtension($class, '.hh');
|
||||
}
|
||||
|
||||
if (null !== $this->apcuPrefix) {
|
||||
apcu_add($this->apcuPrefix.$class, $file);
|
||||
}
|
||||
|
||||
if (false === $file) {
|
||||
// Remember that this class does not exist.
|
||||
$this->missingClasses[$class] = true;
|
||||
}
|
||||
|
||||
return $file;
|
||||
}
|
||||
|
||||
private function findFileWithExtension($class, $ext)
|
||||
{
|
||||
// PSR-4 lookup
|
||||
$logicalPathPsr4 = strtr($class, '\\', DIRECTORY_SEPARATOR) . $ext;
|
||||
|
||||
$first = $class[0];
|
||||
if (isset($this->prefixLengthsPsr4[$first])) {
|
||||
$subPath = $class;
|
||||
while (false !== $lastPos = strrpos($subPath, '\\')) {
|
||||
$subPath = substr($subPath, 0, $lastPos);
|
||||
$search = $subPath . '\\';
|
||||
if (isset($this->prefixDirsPsr4[$search])) {
|
||||
$pathEnd = DIRECTORY_SEPARATOR . substr($logicalPathPsr4, $lastPos + 1);
|
||||
foreach ($this->prefixDirsPsr4[$search] as $dir) {
|
||||
if (file_exists($file = $dir . $pathEnd)) {
|
||||
return $file;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// PSR-4 fallback dirs
|
||||
foreach ($this->fallbackDirsPsr4 as $dir) {
|
||||
if (file_exists($file = $dir . DIRECTORY_SEPARATOR . $logicalPathPsr4)) {
|
||||
return $file;
|
||||
}
|
||||
}
|
||||
|
||||
// PSR-0 lookup
|
||||
if (false !== $pos = strrpos($class, '\\')) {
|
||||
// namespaced class name
|
||||
$logicalPathPsr0 = substr($logicalPathPsr4, 0, $pos + 1)
|
||||
. strtr(substr($logicalPathPsr4, $pos + 1), '_', DIRECTORY_SEPARATOR);
|
||||
} else {
|
||||
// PEAR-like class name
|
||||
$logicalPathPsr0 = strtr($class, '_', DIRECTORY_SEPARATOR) . $ext;
|
||||
}
|
||||
|
||||
if (isset($this->prefixesPsr0[$first])) {
|
||||
foreach ($this->prefixesPsr0[$first] as $prefix => $dirs) {
|
||||
if (0 === strpos($class, $prefix)) {
|
||||
foreach ($dirs as $dir) {
|
||||
if (file_exists($file = $dir . DIRECTORY_SEPARATOR . $logicalPathPsr0)) {
|
||||
return $file;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// PSR-0 fallback dirs
|
||||
foreach ($this->fallbackDirsPsr0 as $dir) {
|
||||
if (file_exists($file = $dir . DIRECTORY_SEPARATOR . $logicalPathPsr0)) {
|
||||
return $file;
|
||||
}
|
||||
}
|
||||
|
||||
// PSR-0 include paths.
|
||||
if ($this->useIncludePath && $file = stream_resolve_include_path($logicalPathPsr0)) {
|
||||
return $file;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Scope isolated include.
|
||||
*
|
||||
* Prevents access to $this/self from included files.
|
||||
*/
|
||||
function includeFile($file)
|
||||
{
|
||||
include $file;
|
||||
}
|
|
@ -0,0 +1,21 @@
|
|||
|
||||
Copyright (c) Nils Adermann, Jordi Boggiano
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is furnished
|
||||
to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
|
|
@ -0,0 +1,14 @@
|
|||
<?php
|
||||
|
||||
// autoload_classmap.php @generated by Composer
|
||||
|
||||
$vendorDir = dirname(dirname(__FILE__));
|
||||
$baseDir = dirname($vendorDir);
|
||||
|
||||
return array(
|
||||
'Jaybizzle\\CrawlerDetect\\CrawlerDetect' => $vendorDir . '/jaybizzle/crawler-detect/src/CrawlerDetect.php',
|
||||
'Jaybizzle\\CrawlerDetect\\Fixtures\\AbstractProvider' => $vendorDir . '/jaybizzle/crawler-detect/src/Fixtures/AbstractProvider.php',
|
||||
'Jaybizzle\\CrawlerDetect\\Fixtures\\Crawlers' => $vendorDir . '/jaybizzle/crawler-detect/src/Fixtures/Crawlers.php',
|
||||
'Jaybizzle\\CrawlerDetect\\Fixtures\\Exclusions' => $vendorDir . '/jaybizzle/crawler-detect/src/Fixtures/Exclusions.php',
|
||||
'Jaybizzle\\CrawlerDetect\\Fixtures\\Headers' => $vendorDir . '/jaybizzle/crawler-detect/src/Fixtures/Headers.php',
|
||||
);
|
|
@ -0,0 +1,9 @@
|
|||
<?php
|
||||
|
||||
// autoload_namespaces.php @generated by Composer
|
||||
|
||||
$vendorDir = dirname(dirname(__FILE__));
|
||||
$baseDir = dirname($vendorDir);
|
||||
|
||||
return array(
|
||||
);
|
|
@ -0,0 +1,10 @@
|
|||
<?php
|
||||
|
||||
// autoload_psr4.php @generated by Composer
|
||||
|
||||
$vendorDir = dirname(dirname(__FILE__));
|
||||
$baseDir = dirname($vendorDir);
|
||||
|
||||
return array(
|
||||
'Jaybizzle\\CrawlerDetect\\' => array($vendorDir . '/jaybizzle/crawler-detect/src'),
|
||||
);
|
|
@ -0,0 +1,52 @@
|
|||
<?php
|
||||
|
||||
// autoload_real.php @generated by Composer
|
||||
|
||||
class ComposerAutoloaderInitBlockBotAddon
|
||||
{
|
||||
private static $loader;
|
||||
|
||||
public static function loadClassLoader($class)
|
||||
{
|
||||
if ('Composer\Autoload\ClassLoader' === $class) {
|
||||
require __DIR__ . '/ClassLoader.php';
|
||||
}
|
||||
}
|
||||
|
||||
public static function getLoader()
|
||||
{
|
||||
if (null !== self::$loader) {
|
||||
return self::$loader;
|
||||
}
|
||||
|
||||
spl_autoload_register(array('ComposerAutoloaderInitBlockBotAddon', 'loadClassLoader'), true, true);
|
||||
self::$loader = $loader = new \Composer\Autoload\ClassLoader();
|
||||
spl_autoload_unregister(array('ComposerAutoloaderInitBlockBotAddon', 'loadClassLoader'));
|
||||
|
||||
$useStaticLoader = PHP_VERSION_ID >= 50600 && !defined('HHVM_VERSION') && (!function_exists('zend_loader_file_encoded') || !zend_loader_file_encoded());
|
||||
if ($useStaticLoader) {
|
||||
require_once __DIR__ . '/autoload_static.php';
|
||||
|
||||
call_user_func(\Composer\Autoload\ComposerStaticInitBlockBotAddon::getInitializer($loader));
|
||||
} else {
|
||||
$map = require __DIR__ . '/autoload_namespaces.php';
|
||||
foreach ($map as $namespace => $path) {
|
||||
$loader->set($namespace, $path);
|
||||
}
|
||||
|
||||
$map = require __DIR__ . '/autoload_psr4.php';
|
||||
foreach ($map as $namespace => $path) {
|
||||
$loader->setPsr4($namespace, $path);
|
||||
}
|
||||
|
||||
$classMap = require __DIR__ . '/autoload_classmap.php';
|
||||
if ($classMap) {
|
||||
$loader->addClassMap($classMap);
|
||||
}
|
||||
}
|
||||
|
||||
$loader->register(true);
|
||||
|
||||
return $loader;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,40 @@
|
|||
<?php
|
||||
|
||||
// autoload_static.php @generated by Composer
|
||||
|
||||
namespace Composer\Autoload;
|
||||
|
||||
class ComposerStaticInitBlockBotAddon
|
||||
{
|
||||
public static $prefixLengthsPsr4 = array (
|
||||
'J' =>
|
||||
array (
|
||||
'Jaybizzle\\CrawlerDetect\\' => 24,
|
||||
),
|
||||
);
|
||||
|
||||
public static $prefixDirsPsr4 = array (
|
||||
'Jaybizzle\\CrawlerDetect\\' =>
|
||||
array (
|
||||
0 => __DIR__ . '/..' . '/jaybizzle/crawler-detect/src',
|
||||
),
|
||||
);
|
||||
|
||||
public static $classMap = array (
|
||||
'Jaybizzle\\CrawlerDetect\\CrawlerDetect' => __DIR__ . '/..' . '/jaybizzle/crawler-detect/src/CrawlerDetect.php',
|
||||
'Jaybizzle\\CrawlerDetect\\Fixtures\\AbstractProvider' => __DIR__ . '/..' . '/jaybizzle/crawler-detect/src/Fixtures/AbstractProvider.php',
|
||||
'Jaybizzle\\CrawlerDetect\\Fixtures\\Crawlers' => __DIR__ . '/..' . '/jaybizzle/crawler-detect/src/Fixtures/Crawlers.php',
|
||||
'Jaybizzle\\CrawlerDetect\\Fixtures\\Exclusions' => __DIR__ . '/..' . '/jaybizzle/crawler-detect/src/Fixtures/Exclusions.php',
|
||||
'Jaybizzle\\CrawlerDetect\\Fixtures\\Headers' => __DIR__ . '/..' . '/jaybizzle/crawler-detect/src/Fixtures/Headers.php',
|
||||
);
|
||||
|
||||
public static function getInitializer(ClassLoader $loader)
|
||||
{
|
||||
return \Closure::bind(function () use ($loader) {
|
||||
$loader->prefixLengthsPsr4 = ComposerStaticInitBlockBotAddon::$prefixLengthsPsr4;
|
||||
$loader->prefixDirsPsr4 = ComposerStaticInitBlockBotAddon::$prefixDirsPsr4;
|
||||
$loader->classMap = ComposerStaticInitBlockBotAddon::$classMap;
|
||||
|
||||
}, null, ClassLoader::class);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,53 @@
|
|||
[
|
||||
{
|
||||
"name": "jaybizzle/crawler-detect",
|
||||
"version": "v1.2.80",
|
||||
"version_normalized": "1.2.80.0",
|
||||
"source": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/JayBizzle/Crawler-Detect.git",
|
||||
"reference": "af6a36e6d69670df3f0a3ed8e21d4b8cc67a7847"
|
||||
},
|
||||
"dist": {
|
||||
"type": "zip",
|
||||
"url": "https://api.github.com/repos/JayBizzle/Crawler-Detect/zipball/af6a36e6d69670df3f0a3ed8e21d4b8cc67a7847",
|
||||
"reference": "af6a36e6d69670df3f0a3ed8e21d4b8cc67a7847",
|
||||
"shasum": ""
|
||||
},
|
||||
"require": {
|
||||
"php": ">=5.3.0"
|
||||
},
|
||||
"require-dev": {
|
||||
"phpunit/phpunit": "^4.8|^5.5|^6.5",
|
||||
"satooshi/php-coveralls": "1.*"
|
||||
},
|
||||
"time": "2019-04-05T19:52:02+00:00",
|
||||
"type": "library",
|
||||
"installation-source": "dist",
|
||||
"autoload": {
|
||||
"psr-4": {
|
||||
"Jaybizzle\\CrawlerDetect\\": "src/"
|
||||
}
|
||||
},
|
||||
"notification-url": "https://packagist.org/downloads/",
|
||||
"license": [
|
||||
"MIT"
|
||||
],
|
||||
"authors": [
|
||||
{
|
||||
"name": "Mark Beech",
|
||||
"email": "m@rkbee.ch",
|
||||
"role": "Developer"
|
||||
}
|
||||
],
|
||||
"description": "CrawlerDetect is a PHP class for detecting bots/crawlers/spiders via the user agent",
|
||||
"homepage": "https://github.com/JayBizzle/Crawler-Detect/",
|
||||
"keywords": [
|
||||
"crawler",
|
||||
"crawler detect",
|
||||
"crawler detector",
|
||||
"crawlerdetect",
|
||||
"php crawler detect"
|
||||
]
|
||||
}
|
||||
]
|
|
@ -0,0 +1,22 @@
|
|||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2015-2018 Mark Beech
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
|
@ -0,0 +1,72 @@
|
|||
<p align="center"><a href="http://crawlerdetect.io/" target="_blank"><img src="https://cloud.githubusercontent.com/assets/340752/23082173/1bd1a396-f550-11e6-8aba-4d3c75edea2f.png" width="321" height="219" /></a><br><br>
|
||||
<a href="http://crawlerdetect.io/" target="_blank">crawlerdetect.io</a>
|
||||
<br><br>
|
||||
</p>
|
||||
|
||||
<p align="center">
|
||||
<a href="https://travis-ci.org/JayBizzle/Crawler-Detect"><img src="https://img.shields.io/travis/JayBizzle/Crawler-Detect/master.svg?style=flat-square" /></a>
|
||||
<a href="https://packagist.org/packages/jaybizzle/crawler-detect"><img src="https://img.shields.io/packagist/dm/JayBizzle/Crawler-Detect.svg?style=flat-square" /></a>
|
||||
<a href="https://scrutinizer-ci.com/g/JayBizzle/Crawler-Detect/?branch=master"><img src="https://img.shields.io/scrutinizer/g/JayBizzle/Crawler-Detect.svg?style=flat-square" /></a>
|
||||
<a href="https://github.com/JayBizzle/Crawler-Detect"><img src="https://img.shields.io/badge/license-MIT-ff69b4.svg?style=flat-square" /></a>
|
||||
<a href="https://packagist.org/packages/jaybizzle/crawler-detect"><img src="https://img.shields.io/packagist/v/jaybizzle/Crawler-Detect.svg?style=flat-square" /></a>
|
||||
<a href="https://styleci.io/repos/32755917"><img src="https://styleci.io/repos/32755917/shield" /></a>
|
||||
<a href="https://coveralls.io/github/JayBizzle/Crawler-Detect"><img src="https://img.shields.io/coveralls/JayBizzle/Crawler-Detect/master.svg?style=flat-square" /></a>
|
||||
</p>
|
||||
|
||||
## About CrawlerDetect
|
||||
|
||||
CrawlerDetect is a PHP class for detecting bots/crawlers/spiders via the user agent and http_from header. Currently able to detect 1,000's of bots/spiders/crawlers.
|
||||
|
||||
### Installation
|
||||
Run `composer require jaybizzle/crawler-detect 1.*` or add `"jaybizzle/crawler-detect" :"1.*"` to your `composer.json`.
|
||||
|
||||
### Usage
|
||||
```PHP
|
||||
use Jaybizzle\CrawlerDetect\CrawlerDetect;
|
||||
|
||||
$CrawlerDetect = new CrawlerDetect;
|
||||
|
||||
// Check the user agent of the current 'visitor'
|
||||
if($CrawlerDetect->isCrawler()) {
|
||||
// true if crawler user agent detected
|
||||
}
|
||||
|
||||
// Pass a user agent as a string
|
||||
if($CrawlerDetect->isCrawler('Mozilla/5.0 (compatible; Sosospider/2.0; +http://help.soso.com/webspider.htm)')) {
|
||||
// true if crawler user agent detected
|
||||
}
|
||||
|
||||
// Output the name of the bot that matched (if any)
|
||||
echo $CrawlerDetect->getMatches();
|
||||
```
|
||||
|
||||
### Contributing
|
||||
If you find a bot/spider/crawler user agent that CrawlerDetect fails to detect, please submit a pull request with the regex pattern added to the `$data` array in `Fixtures/Crawlers.php` and add the failing user agent to `tests/crawlers.txt`.
|
||||
|
||||
Failing that, just create an issue with the user agent you have found, and we'll take it from there :)
|
||||
|
||||
### Laravel Package
|
||||
If you would like to use this with Laravel 4/5, please see [Laravel-Crawler-Detect](https://github.com/JayBizzle/Laravel-Crawler-Detect)
|
||||
|
||||
### Symfony Bundle
|
||||
To use this library with Symfony 2/3/4, check out the [CrawlerDetectBundle](https://github.com/nicolasmure/CrawlerDetectBundle).
|
||||
|
||||
### YII2 Extension
|
||||
To use this library with the YII2 framework, check out [yii2-crawler-detect](https://github.com/AlikDex/yii2-crawler-detect).
|
||||
|
||||
### ES6 Library
|
||||
To use this library with NodeJS or any ES6 application based, check out [es6-crawler-detect](https://github.com/JefferyHus/es6-crawler-detect).
|
||||
|
||||
### .NET Library
|
||||
To use this library in a .net standard (including .net core) based project, check out [NetCrawlerDetect](https://github.com/gplumb/NetCrawlerDetect).
|
||||
|
||||
### Nette Extension
|
||||
To use this library with the Nette framework, checkout [NetteCrawlerDetect](https://github.com/JanGalek/Crawler-Detect).
|
||||
|
||||
### Ruby Gem
|
||||
|
||||
To use this library with Ruby on Rails or any Ruby-based application, check out [crawler_detect](https://github.com/loadkpi/crawler_detect) gem.
|
||||
|
||||
_Parts of this class are based on the brilliant [MobileDetect](https://github.com/serbanghita/Mobile-Detect)_
|
||||
|
||||
[![Analytics](https://ga-beacon.appspot.com/UA-72430465-1/Crawler-Detect/readme?pixel)](https://github.com/JayBizzle/Crawler-Detect)
|
|
@ -0,0 +1,30 @@
|
|||
{
|
||||
"name": "jaybizzle/crawler-detect",
|
||||
"type": "library",
|
||||
"description": "CrawlerDetect is a PHP class for detecting bots/crawlers/spiders via the user agent",
|
||||
"keywords": ["crawler", "crawler detect", "crawler detector", "crawlerdetect", "php crawler detect"],
|
||||
"homepage": "https://github.com/JayBizzle/Crawler-Detect/",
|
||||
"license": "MIT",
|
||||
"authors": [
|
||||
{
|
||||
"name": "Mark Beech",
|
||||
"email": "m@rkbee.ch",
|
||||
"role": "Developer"
|
||||
}
|
||||
],
|
||||
"require": {
|
||||
"php": ">=5.3.0"
|
||||
},
|
||||
"require-dev": {
|
||||
"phpunit/phpunit": "^4.8|^5.5|^6.5",
|
||||
"satooshi/php-coveralls": "1.*"
|
||||
},
|
||||
"autoload": {
|
||||
"psr-4": {
|
||||
"Jaybizzle\\CrawlerDetect\\": "src/"
|
||||
}
|
||||
},
|
||||
"scripts": {
|
||||
"test": "vendor/bin/phpunit"
|
||||
}
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
<?php
|
||||
|
||||
/*
|
||||
* This file is part of Crawler Detect - the web crawler detection library.
|
||||
*
|
||||
* (c) Mark Beech <m@rkbee.ch>
|
||||
*
|
||||
* This source file is subject to the MIT license that is bundled
|
||||
* with this source code in the file LICENSE.
|
||||
*/
|
||||
|
||||
require 'src/Fixtures/AbstractProvider.php';
|
||||
require 'src/Fixtures/Crawlers.php';
|
||||
require 'src/Fixtures/Exclusions.php';
|
||||
require 'src/Fixtures/Headers.php';
|
||||
|
||||
$src = array(
|
||||
'Crawlers',
|
||||
'Exclusions',
|
||||
'Headers',
|
||||
);
|
||||
|
||||
foreach ($src as $class) {
|
||||
$class = "Jaybizzle\\CrawlerDetect\\Fixtures\\$class";
|
||||
$object = new $class;
|
||||
|
||||
outputJson($object);
|
||||
outputTxt($object);
|
||||
}
|
||||
|
||||
function outputJson($object)
|
||||
{
|
||||
$className = (new ReflectionClass($object))->getShortName();
|
||||
file_put_contents("raw/$className.json", json_encode($object->getAll()));
|
||||
}
|
||||
|
||||
function outputTxt($object)
|
||||
{
|
||||
$className = (new ReflectionClass($object))->getShortName();
|
||||
file_put_contents("raw/$className.txt", implode($object->getAll(), PHP_EOL));
|
||||
}
|
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1 @@
|
|||
["Safari.[\\d\\.]*","Firefox.[\\d\\.]*"," Chrome.[\\d\\.]*","Chromium.[\\d\\.]*","MSIE.[\\d\\.]","Opera\\\/[\\d\\.]*","Mozilla.[\\d\\.]*","AppleWebKit.[\\d\\.]*","Trident.[\\d\\.]*","Windows NT.[\\d\\.]*","Android [\\d\\.]*","Macintosh.","Ubuntu","Linux","[ ]Intel","Mac OS X [\\d_]*","(like )?Gecko(.[\\d\\.]*)?","KHTML,","CriOS.[\\d\\.]*","CPU iPhone OS ([0-9_])* like Mac OS X","CPU OS ([0-9_])* like Mac OS X","iPod","compatible","x86_..","i686","x64","X11","rv:[\\d\\.]*","Version.[\\d\\.]*","WOW64","Win64","Dalvik.[\\d\\.]*"," \\.NET CLR [\\d\\.]*","Presto.[\\d\\.]*","Media Center PC","BlackBerry","Build","Opera Mini\\\/\\d{1,2}\\.\\d{1,2}\\.[\\d\\.]*\\\/\\d{1,2}\\.","Opera"," \\.NET[\\d\\.]*","cubot","; M bot","; CRONO","; B bot","; IDbot","; ID bot","; POWER BOT",";"]
|
|
@ -0,0 +1,48 @@
|
|||
Safari.[\d\.]*
|
||||
Firefox.[\d\.]*
|
||||
Chrome.[\d\.]*
|
||||
Chromium.[\d\.]*
|
||||
MSIE.[\d\.]
|
||||
Opera\/[\d\.]*
|
||||
Mozilla.[\d\.]*
|
||||
AppleWebKit.[\d\.]*
|
||||
Trident.[\d\.]*
|
||||
Windows NT.[\d\.]*
|
||||
Android [\d\.]*
|
||||
Macintosh.
|
||||
Ubuntu
|
||||
Linux
|
||||
[ ]Intel
|
||||
Mac OS X [\d_]*
|
||||
(like )?Gecko(.[\d\.]*)?
|
||||
KHTML,
|
||||
CriOS.[\d\.]*
|
||||
CPU iPhone OS ([0-9_])* like Mac OS X
|
||||
CPU OS ([0-9_])* like Mac OS X
|
||||
iPod
|
||||
compatible
|
||||
x86_..
|
||||
i686
|
||||
x64
|
||||
X11
|
||||
rv:[\d\.]*
|
||||
Version.[\d\.]*
|
||||
WOW64
|
||||
Win64
|
||||
Dalvik.[\d\.]*
|
||||
\.NET CLR [\d\.]*
|
||||
Presto.[\d\.]*
|
||||
Media Center PC
|
||||
BlackBerry
|
||||
Build
|
||||
Opera Mini\/\d{1,2}\.\d{1,2}\.[\d\.]*\/\d{1,2}\.
|
||||
Opera
|
||||
\.NET[\d\.]*
|
||||
cubot
|
||||
; M bot
|
||||
; CRONO
|
||||
; B bot
|
||||
; IDbot
|
||||
; ID bot
|
||||
; POWER BOT
|
||||
;
|
|
@ -0,0 +1 @@
|
|||
["HTTP_USER_AGENT","HTTP_X_OPERAMINI_PHONE_UA","HTTP_X_DEVICE_USER_AGENT","HTTP_X_ORIGINAL_USER_AGENT","HTTP_X_SKYFIRE_PHONE","HTTP_X_BOLT_PHONE_UA","HTTP_DEVICE_STOCK_UA","HTTP_X_UCBROWSER_DEVICE_UA","HTTP_FROM","HTTP_X_SCANNER"]
|
|
@ -0,0 +1,10 @@
|
|||
HTTP_USER_AGENT
|
||||
HTTP_X_OPERAMINI_PHONE_UA
|
||||
HTTP_X_DEVICE_USER_AGENT
|
||||
HTTP_X_ORIGINAL_USER_AGENT
|
||||
HTTP_X_SKYFIRE_PHONE
|
||||
HTTP_X_BOLT_PHONE_UA
|
||||
HTTP_DEVICE_STOCK_UA
|
||||
HTTP_X_UCBROWSER_DEVICE_UA
|
||||
HTTP_FROM
|
||||
HTTP_X_SCANNER
|
|
@ -0,0 +1,193 @@
|
|||
<?php
|
||||
|
||||
/*
|
||||
* This file is part of Crawler Detect - the web crawler detection library.
|
||||
*
|
||||
* (c) Mark Beech <m@rkbee.ch>
|
||||
*
|
||||
* This source file is subject to the MIT license that is bundled
|
||||
* with this source code in the file LICENSE.
|
||||
*/
|
||||
|
||||
namespace Jaybizzle\CrawlerDetect;
|
||||
|
||||
use Jaybizzle\CrawlerDetect\Fixtures\Crawlers;
|
||||
use Jaybizzle\CrawlerDetect\Fixtures\Exclusions;
|
||||
use Jaybizzle\CrawlerDetect\Fixtures\Headers;
|
||||
|
||||
class CrawlerDetect
|
||||
{
|
||||
/**
|
||||
* The user agent.
|
||||
*
|
||||
* @var null
|
||||
*/
|
||||
protected $userAgent = null;
|
||||
|
||||
/**
|
||||
* Headers that contain a user agent.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
protected $httpHeaders = array();
|
||||
|
||||
/**
|
||||
* Store regex matches.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
protected $matches = array();
|
||||
|
||||
/**
|
||||
* Crawlers object.
|
||||
*
|
||||
* @var \Jaybizzle\CrawlerDetect\Fixtures\Crawlers
|
||||
*/
|
||||
protected $crawlers;
|
||||
|
||||
/**
|
||||
* Exclusions object.
|
||||
*
|
||||
* @var \Jaybizzle\CrawlerDetect\Fixtures\Exclusions
|
||||
*/
|
||||
protected $exclusions;
|
||||
|
||||
/**
|
||||
* Headers object.
|
||||
*
|
||||
* @var \Jaybizzle\CrawlerDetect\Fixtures\Headers
|
||||
*/
|
||||
protected $uaHttpHeaders;
|
||||
|
||||
/**
|
||||
* The compiled regex string.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $compiledRegex;
|
||||
|
||||
/**
|
||||
* The compiled exclusions regex string.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $compiledExclusions;
|
||||
|
||||
/**
|
||||
* Class constructor.
|
||||
*/
|
||||
public function __construct(array $headers = null, $userAgent = null)
|
||||
{
|
||||
$this->crawlers = new Crawlers();
|
||||
$this->exclusions = new Exclusions();
|
||||
$this->uaHttpHeaders = new Headers();
|
||||
|
||||
$this->compiledRegex = $this->compileRegex($this->crawlers->getAll());
|
||||
$this->compiledExclusions = $this->compileRegex($this->exclusions->getAll());
|
||||
|
||||
$this->setHttpHeaders($headers);
|
||||
$this->setUserAgent($userAgent);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compile the regex patterns into one regex string.
|
||||
*
|
||||
* @param array
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function compileRegex($patterns)
|
||||
{
|
||||
return '('.implode('|', $patterns).')';
|
||||
}
|
||||
|
||||
/**
|
||||
* Set HTTP headers.
|
||||
*
|
||||
* @param array|null $httpHeaders
|
||||
*/
|
||||
public function setHttpHeaders($httpHeaders)
|
||||
{
|
||||
// Use global _SERVER if $httpHeaders aren't defined.
|
||||
if (! is_array($httpHeaders) || ! count($httpHeaders)) {
|
||||
$httpHeaders = $_SERVER;
|
||||
}
|
||||
|
||||
// Clear existing headers.
|
||||
$this->httpHeaders = array();
|
||||
|
||||
// Only save HTTP headers. In PHP land, that means
|
||||
// only _SERVER vars that start with HTTP_.
|
||||
foreach ($httpHeaders as $key => $value) {
|
||||
if (strpos($key, 'HTTP_') === 0) {
|
||||
$this->httpHeaders[$key] = $value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return user agent headers.
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function getUaHttpHeaders()
|
||||
{
|
||||
return $this->uaHttpHeaders->getAll();
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the user agent.
|
||||
*
|
||||
* @param string $userAgent
|
||||
*/
|
||||
public function setUserAgent($userAgent)
|
||||
{
|
||||
if (is_null($userAgent)) {
|
||||
foreach ($this->getUaHttpHeaders() as $altHeader) {
|
||||
if (isset($this->httpHeaders[$altHeader])) {
|
||||
$userAgent .= $this->httpHeaders[$altHeader].' ';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $this->userAgent = $userAgent;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check user agent string against the regex.
|
||||
*
|
||||
* @param string|null $userAgent
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function isCrawler($userAgent = null)
|
||||
{
|
||||
$agent = trim(preg_replace(
|
||||
"/{$this->compiledExclusions}/i",
|
||||
'',
|
||||
$userAgent ?: $this->userAgent
|
||||
));
|
||||
|
||||
if ($agent == '') {
|
||||
return false;
|
||||
}
|
||||
|
||||
$result = preg_match("/{$this->compiledRegex}/i", $agent, $matches);
|
||||
|
||||
if ($matches) {
|
||||
$this->matches = $matches;
|
||||
}
|
||||
|
||||
return (bool) $result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the matches.
|
||||
*
|
||||
* @return string|null
|
||||
*/
|
||||
public function getMatches()
|
||||
{
|
||||
return isset($this->matches[0]) ? $this->matches[0] : null;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,32 @@
|
|||
<?php
|
||||
|
||||
/*
|
||||
* This file is part of Crawler Detect - the web crawler detection library.
|
||||
*
|
||||
* (c) Mark Beech <m@rkbee.ch>
|
||||
*
|
||||
* This source file is subject to the MIT license that is bundled
|
||||
* with this source code in the file LICENSE.
|
||||
*/
|
||||
|
||||
namespace Jaybizzle\CrawlerDetect\Fixtures;
|
||||
|
||||
abstract class AbstractProvider
|
||||
{
|
||||
/**
|
||||
* The data set.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
protected $data;
|
||||
|
||||
/**
|
||||
* Return the data set.
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function getAll()
|
||||
{
|
||||
return $this->data;
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,72 @@
|
|||
<?php
|
||||
|
||||
/*
|
||||
* This file is part of Crawler Detect - the web crawler detection library.
|
||||
*
|
||||
* (c) Mark Beech <m@rkbee.ch>
|
||||
*
|
||||
* This source file is subject to the MIT license that is bundled
|
||||
* with this source code in the file LICENSE.
|
||||
*/
|
||||
|
||||
namespace Jaybizzle\CrawlerDetect\Fixtures;
|
||||
|
||||
class Exclusions extends AbstractProvider
|
||||
{
|
||||
/**
|
||||
* List of strings to remove from the user agent before running the crawler regex
|
||||
* Over a large list of user agents, this gives us about a 55% speed increase!
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
protected $data = array(
|
||||
'Safari.[\d\.]*',
|
||||
'Firefox.[\d\.]*',
|
||||
' Chrome.[\d\.]*',
|
||||
'Chromium.[\d\.]*',
|
||||
'MSIE.[\d\.]',
|
||||
'Opera\/[\d\.]*',
|
||||
'Mozilla.[\d\.]*',
|
||||
'AppleWebKit.[\d\.]*',
|
||||
'Trident.[\d\.]*',
|
||||
'Windows NT.[\d\.]*',
|
||||
'Android [\d\.]*',
|
||||
'Macintosh.',
|
||||
'Ubuntu',
|
||||
'Linux',
|
||||
'[ ]Intel',
|
||||
'Mac OS X [\d_]*',
|
||||
'(like )?Gecko(.[\d\.]*)?',
|
||||
'KHTML,',
|
||||
'CriOS.[\d\.]*',
|
||||
'CPU iPhone OS ([0-9_])* like Mac OS X',
|
||||
'CPU OS ([0-9_])* like Mac OS X',
|
||||
'iPod',
|
||||
'compatible',
|
||||
'x86_..',
|
||||
'i686',
|
||||
'x64',
|
||||
'X11',
|
||||
'rv:[\d\.]*',
|
||||
'Version.[\d\.]*',
|
||||
'WOW64',
|
||||
'Win64',
|
||||
'Dalvik.[\d\.]*',
|
||||
' \.NET CLR [\d\.]*',
|
||||
'Presto.[\d\.]*',
|
||||
'Media Center PC',
|
||||
'BlackBerry',
|
||||
'Build',
|
||||
'Opera Mini\/\d{1,2}\.\d{1,2}\.[\d\.]*\/\d{1,2}\.',
|
||||
'Opera',
|
||||
' \.NET[\d\.]*',
|
||||
'cubot',
|
||||
'; M bot',
|
||||
'; CRONO',
|
||||
'; B bot',
|
||||
'; IDbot',
|
||||
'; ID bot',
|
||||
'; POWER BOT',
|
||||
';', // Remove the following characters ;
|
||||
);
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
<?php
|
||||
|
||||
/*
|
||||
* This file is part of Crawler Detect - the web crawler detection library.
|
||||
*
|
||||
* (c) Mark Beech <m@rkbee.ch>
|
||||
*
|
||||
* This source file is subject to the MIT license that is bundled
|
||||
* with this source code in the file LICENSE.
|
||||
*/
|
||||
|
||||
namespace Jaybizzle\CrawlerDetect\Fixtures;
|
||||
|
||||
class Headers extends AbstractProvider
|
||||
{
|
||||
/**
|
||||
* All possible HTTP headers that represent the user agent string.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
protected $data = array(
|
||||
// The default User-Agent string.
|
||||
'HTTP_USER_AGENT',
|
||||
// Header can occur on devices using Opera Mini.
|
||||
'HTTP_X_OPERAMINI_PHONE_UA',
|
||||
// Vodafone specific header: http://www.seoprinciple.com/mobile-web-community-still-angry-at-vodafone/24/
|
||||
'HTTP_X_DEVICE_USER_AGENT',
|
||||
'HTTP_X_ORIGINAL_USER_AGENT',
|
||||
'HTTP_X_SKYFIRE_PHONE',
|
||||
'HTTP_X_BOLT_PHONE_UA',
|
||||
'HTTP_DEVICE_STOCK_UA',
|
||||
'HTTP_X_UCBROWSER_DEVICE_UA',
|
||||
// Sometimes, bots (especially Google) use a genuine user agent, but fill this header in with their email address
|
||||
'HTTP_FROM',
|
||||
'HTTP_X_SCANNER', // Seen in use by Netsparker
|
||||
);
|
||||
}
|
Loading…
Reference in New Issue