From 6eb9dff807b65729363ff094eba62746f4301c28 Mon Sep 17 00:00:00 2001 From: Michael Date: Tue, 6 Sep 2022 06:04:41 +0000 Subject: [PATCH] Pagecache for frequently fetched pages --- database.sql | 13 ++++- doc/database.md | 1 + doc/database/db_pagecache.md | 24 +++++++++ src/Model/Contact.php | 2 +- src/Model/ItemURI.php | 26 +++------ src/Module/ActivityPub/Objects.php | 12 +++++ src/Module/Settings/Account.php | 3 +- src/Protocol/ActivityPub/PageCache.php | 73 ++++++++++++++++++++++++++ src/Worker/OptimizeTables.php | 1 + static/dbstructure.config.php | 16 +++++- static/defaults.config.php | 6 ++- 11 files changed, 151 insertions(+), 26 deletions(-) create mode 100644 doc/database/db_pagecache.md create mode 100644 src/Protocol/ActivityPub/PageCache.php diff --git a/database.sql b/database.sql index b76a4e0206..d50e0bbd06 100644 --- a/database.sql +++ b/database.sql @@ -1,6 +1,6 @@ -- ------------------------------------------ -- Friendica 2022.09-rc (Giant Rhubarb) --- DB_UPDATE_VERSION 1482 +-- DB_UPDATE_VERSION 1483 -- ------------------------------------------ @@ -1007,6 +1007,17 @@ CREATE TABLE IF NOT EXISTS `openwebauth-token` ( FOREIGN KEY (`uid`) REFERENCES `user` (`uid`) ON UPDATE RESTRICT ON DELETE CASCADE ) DEFAULT COLLATE utf8mb4_general_ci COMMENT='Store OpenWebAuth token to verify contacts'; +-- +-- TABLE pagecache +-- +CREATE TABLE IF NOT EXISTS `pagecache` ( + `page` varbinary(255) NOT NULL COMMENT 'Page', + `content` mediumtext COMMENT 'Page content', + `fetched` datetime COMMENT 'date when the page had been fetched', + PRIMARY KEY(`page`), + INDEX `fetched` (`fetched`) +) DEFAULT COLLATE utf8mb4_general_ci COMMENT='Stores temporary data'; + -- -- TABLE parsed_url -- diff --git a/doc/database.md b/doc/database.md index 5aed24e91a..1a01df337c 100644 --- a/doc/database.md +++ b/doc/database.md @@ -47,6 +47,7 @@ Database Tables | [notify-threads](help/database/db_notify-threads) | | | [oembed](help/database/db_oembed) | cache for OEmbed queries | | [openwebauth-token](help/database/db_openwebauth-token) | Store OpenWebAuth token to verify contacts | +| [pagecache](help/database/db_pagecache) | Stores temporary data | | [parsed_url](help/database/db_parsed_url) | cache for 'parse_url' queries | | [pconfig](help/database/db_pconfig) | personal (per user) configuration storage | | [permissionset](help/database/db_permissionset) | | diff --git a/doc/database/db_pagecache.md b/doc/database/db_pagecache.md new file mode 100644 index 0000000000..6cc3edf0fd --- /dev/null +++ b/doc/database/db_pagecache.md @@ -0,0 +1,24 @@ +Table pagecache +=========== + +Stores temporary data + +Fields +------ + +| Field | Description | Type | Null | Key | Default | Extra | +| ------- | ----------------------------------- | -------------- | ---- | --- | ------- | ----- | +| page | Page | varbinary(255) | NO | PRI | NULL | | +| content | Page content | mediumtext | YES | | NULL | | +| fetched | date when the page had been fetched | datetime | YES | | NULL | | + +Indexes +------------ + +| Name | Fields | +| ------- | ------- | +| PRIMARY | page | +| fetched | fetched | + + +Return to [database documentation](help/database) diff --git a/src/Model/Contact.php b/src/Model/Contact.php index 0408ea78a7..6a57693991 100644 --- a/src/Model/Contact.php +++ b/src/Model/Contact.php @@ -3321,7 +3321,7 @@ class Contact continue; } $contact = self::getByURL($url, false, ['id', 'updated']); - if (empty($contact['id'])) { + if (empty($contact['id']) && Network::isValidHttpUrl($url)) { Worker::add(PRIORITY_LOW, 'AddContact', 0, $url); ++$added; } elseif ($contact['updated'] < DateTimeFormat::utc('now -7 days')) { diff --git a/src/Model/ItemURI.php b/src/Model/ItemURI.php index 75d9cdc700..a196627352 100644 --- a/src/Model/ItemURI.php +++ b/src/Model/ItemURI.php @@ -21,8 +21,10 @@ namespace Friendica\Model; +use Friendica\Core\Logger; use Friendica\Database\Database; use Friendica\Database\DBA; +use Friendica\DI; class ItemURI { @@ -35,14 +37,16 @@ class ItemURI */ public static function insert(array $fields) { + $fields = DI::dbaDefinition()->truncateFieldsForTable('item-uri', $fields); + if (!DBA::exists('item-uri', ['uri' => $fields['uri']])) { - DBA::insert('item-uri', $fields, Database::INSERT_UPDATE); + DBA::insert('item-uri', $fields, Database::INSERT_IGNORE); } $itemuri = DBA::selectFirst('item-uri', ['id', 'guid'], ['uri' => $fields['uri']]); - if (!DBA::isResult($itemuri)) { // This shouldn't happen + Logger::warning('Item-uri not found', $fields); return null; } @@ -77,22 +81,4 @@ class ItemURI return $itemuri['id'] ?? 0; } - - /** - * Searched for an id of a given guid. - * - * @param string $guid - * @return integer item-uri id - * @throws \Exception - */ - public static function getIdByGUID(string $guid): int - { - $itemuri = DBA::selectFirst('item-uri', ['id'], ['guid' => $guid]); - - if (!DBA::isResult($itemuri)) { - return 0; - } - - return $itemuri['id']; - } } diff --git a/src/Module/ActivityPub/Objects.php b/src/Module/ActivityPub/Objects.php index d52c02bef2..dc9eaaf60c 100644 --- a/src/Module/ActivityPub/Objects.php +++ b/src/Module/ActivityPub/Objects.php @@ -31,6 +31,7 @@ use Friendica\Model\Item; use Friendica\Model\Post; use Friendica\Network\HTTPException; use Friendica\Protocol\ActivityPub; +use Friendica\Protocol\ActivityPub\PageCache; use Friendica\Util\HTTPSignature; use Friendica\Util\Network; use Friendica\Util\Strings; @@ -50,6 +51,13 @@ class Objects extends BaseModule DI::baseUrl()->redirect(str_replace('objects/', 'display/', DI::args()->getQueryString())); } + $data = PageCache::fetch($_SERVER['REQUEST_URI']); + if (!empty($data)) { + header('Access-Control-Allow-Origin: *'); + + System::jsonExit($data, 'application/activity+json'); + } + $itemuri = DBA::selectFirst('item-uri', ['id'], ['guid' => $this->parameters['guid']]); if (DBA::isResult($itemuri)) { @@ -127,6 +135,10 @@ class Objects extends BaseModule throw new HTTPException\NotFoundException(); } + if (in_array($item['private'], [Item::PUBLIC, Item::UNLISTED])) { + PageCache::add($_SERVER['REQUEST_URI'], $data); + } + // Relaxed CORS header for public items header('Access-Control-Allow-Origin: *'); diff --git a/src/Module/Settings/Account.php b/src/Module/Settings/Account.php index dcbb1861f2..373f1614d7 100644 --- a/src/Module/Settings/Account.php +++ b/src/Module/Settings/Account.php @@ -38,6 +38,7 @@ use Friendica\Model\Verb; use Friendica\Module\BaseSettings; use Friendica\Network\HTTPException; use Friendica\Protocol\Activity; +use Friendica\Util\Network; use Friendica\Util\Temporal; use Friendica\Worker\Delivery; @@ -373,7 +374,7 @@ class Account extends BaseSettings // or the handle of the account, therefore we check for either // "http" or "@" to be present in the string. // All other fields from the row will be ignored - if ((strpos($csvRow[0], '@') !== false) || in_array(parse_url($csvRow[0], PHP_URL_SCHEME), ['http', 'https'])) { + if ((strpos($csvRow[0], '@') !== false) || Network::isValidHttpUrl($csvRow[0])) { Worker::add(PRIORITY_MEDIUM, 'AddContact', local_user(), $csvRow[0]); } else { Logger::notice('Invalid account', ['url' => $csvRow[0]]); diff --git a/src/Protocol/ActivityPub/PageCache.php b/src/Protocol/ActivityPub/PageCache.php new file mode 100644 index 0000000000..d34283b152 --- /dev/null +++ b/src/Protocol/ActivityPub/PageCache.php @@ -0,0 +1,73 @@ +. + * + */ + +namespace Friendica\Protocol\ActivityPub; + +use Friendica\Core\Logger; +use Friendica\Database\Database; +use Friendica\Database\DBA; +use Friendica\DI; +use Friendica\Util\DateTimeFormat; + +/** + * This class handles the page cache + */ +class PageCache +{ + /** + * Add content to the page cache + * + * @param string $page + * @param mixed $content + * @return void + */ + public static function add(string $page, $content) + { + if (!DI::config()->get('system', 'pagecache')) { + return; + } + + DBA::delete('pagecache', ["`fetched` < ?", DateTimeFormat::utc('now - 5 minutes')]); + DBA::insert('pagecache', ['page' => $page, 'content' => serialize($content), 'fetched' => DateTimeFormat::utcNow()], Database::INSERT_UPDATE); + + Logger::debug('Page added', ['page' => $page]); + } + + /** + * Fetch data from the page cache + * + * @param string $page + * @return mixed + */ + public static function fetch(string $page) + { + $pagecache = DBA::selectFirst('pagecache', [], ['page' => $page]); + if (empty($pagecache['content'])) { + return null; + } + + DBA::update('pagecache', ['fetched' => DateTimeFormat::utcNow()], ['page' => $page]); + + Logger::debug('Page fetched', ['page' => $page]); + + return unserialize($pagecache['content']); + } +} diff --git a/src/Worker/OptimizeTables.php b/src/Worker/OptimizeTables.php index cfbdca7fe5..cfe8295aac 100644 --- a/src/Worker/OptimizeTables.php +++ b/src/Worker/OptimizeTables.php @@ -43,6 +43,7 @@ class OptimizeTables DBA::e("OPTIMIZE TABLE `cache`"); DBA::e("OPTIMIZE TABLE `locks`"); DBA::e("OPTIMIZE TABLE `oembed`"); + DBA::e("OPTIMIZE TABLE `pagecache`"); DBA::e("OPTIMIZE TABLE `parsed_url`"); DBA::e("OPTIMIZE TABLE `session`"); diff --git a/static/dbstructure.config.php b/static/dbstructure.config.php index bd5cdd93e8..01c593f68d 100644 --- a/static/dbstructure.config.php +++ b/static/dbstructure.config.php @@ -55,7 +55,7 @@ use Friendica\Database\DBA; if (!defined('DB_UPDATE_VERSION')) { - define('DB_UPDATE_VERSION', 1482); + define('DB_UPDATE_VERSION', 1483); } return [ @@ -795,7 +795,7 @@ return [ "conversation" => ["type" => "varbinary(383)", "comment" => ""], "type" => ["type" => "varchar(64)", "comment" => "Type of the activity"], "object-type" => ["type" => "varchar(64)", "comment" => "Type of the object activity"], - "object-object-type" => ["type" => "varchar(64)", "comment" => "Type of the object's object activity"], + "object-object-type" => ["type" => "varchar(64)", "comment" => "Type of the object's object activity"], "received" => ["type" => "datetime", "comment" => "Receiving date"], "activity" => ["type" => "mediumtext", "comment" => "The JSON activity"], "signer" => ["type" => "varchar(255)", "comment" => ""], @@ -1051,6 +1051,18 @@ return [ "uid" => ["uid"], ] ], + "pagecache" => [ + "comment" => "Stores temporary data", + "fields" => [ + "page" => ["type" => "varbinary(255)", "not null" => "1", "primary" => "1", "comment" => "Page"], + "content" => ["type" => "mediumtext", "comment" => "Page content"], + "fetched" => ["type" => "datetime", "comment" => "date when the page had been fetched"], + ], + "indexes" => [ + "PRIMARY" => ["page"], + "fetched" => ["fetched"], + ], + ], "parsed_url" => [ "comment" => "cache for 'parse_url' queries", "fields" => [ diff --git a/static/defaults.config.php b/static/defaults.config.php index 926cdd7d39..bfd2f8d44c 100644 --- a/static/defaults.config.php +++ b/static/defaults.config.php @@ -478,6 +478,10 @@ return [ // Don't show smilies. 'no_smilies' => false, + // pagecache (Boolean) + // Cache frequently fetched pages. + 'pagecache' => false, + // paranoia (Boolean) // Log out users if their IP address changed. 'paranoia' => false, @@ -643,7 +647,7 @@ return [ 'worker_load_exponent' => 3, // worker_processes_cooldown (Integer) - // Maximum number pro processes that causes a cooldown before each worker function call. + // Maximum number per processes that causes a cooldown before each worker function call. 'worker_processes_cooldown' => 0, // worker_multiple_fetch (Boolean)