diff --git a/src/Content/Text/BBCode.php b/src/Content/Text/BBCode.php index bacfe27bb1..3112ce169f 100644 --- a/src/Content/Text/BBCode.php +++ b/src/Content/Text/BBCode.php @@ -1317,10 +1317,8 @@ class BBCode Hook::callAll('bbcode', $text); - $a = DI::app(); - - $text = self::performWithEscapedTags($text, ['code'], function ($text) use ($try_oembed, $simple_html, $for_plaintext, $a, $uriid) { - $text = self::performWithEscapedTags($text, ['noparse', 'nobb', 'pre'], function ($text) use ($try_oembed, $simple_html, $for_plaintext, $a, $uriid) { + $text = self::performWithEscapedTags($text, ['code'], function ($text) use ($try_oembed, $simple_html, $for_plaintext, $uriid) { + $text = self::performWithEscapedTags($text, ['noparse', 'nobb', 'pre'], function ($text) use ($try_oembed, $simple_html, $for_plaintext, $uriid) { /* * preg_match_callback function to replace potential Oembed tags with Oembed content * @@ -1341,534 +1339,66 @@ class BBCode return $return; }; - // Remove the abstract element. It is a non visible element. - $text = self::stripAbstract($text); - - // Line ending normalisation - $text = str_replace("\r\n", "\n", $text); - - // Move new lines outside of tags - $text = preg_replace("#\[(\w*)](\n*)#ism", '$2[$1]', $text); - $text = preg_replace("#(\n*)\[/(\w*)]#ism", '[/$2]$1', $text); - // Extract the private images which use data urls since preg has issues with // large data sizes. Stash them away while we do bbcode conversion, and then put them back // in after we've done all the regex matching. We cannot use any preg functions to do this. - $extracted = self::extractImagesFromItemBody($text); - $text = $extracted['body']; $saved_image = $extracted['images']; - // If we find any event code, turn it into an event. - // After we're finished processing the bbcode we'll - // replace all of the event code with a reformatted version. + // General clean up of the content, for example unneeded blanks and new lines + $text = self::normaliseInput($extracted['body']); - $ev = Event::fromBBCode($text); - - // Replace any html brackets with HTML Entities to prevent executing HTML or script - // Don't use strip_tags here because it breaks [url] search by replacing & with amp - - $text = str_replace("<", "<", $text); - $text = str_replace(">", ">", $text); - - // remove some newlines before the general conversion - $text = preg_replace("/\s?\[share(.*?)\]\s?(.*?)\s?\[\/share\]\s?/ism", "\n[share$1]$2[/share]\n", $text); - $text = preg_replace("/\s?\[quote(.*?)\]\s?(.*?)\s?\[\/quote\]\s?/ism", "\n[quote$1]$2[/quote]\n", $text); - - // when the content is meant exporting to other systems then remove the avatar picture since this doesn't really look good on these systems - if (!$try_oembed) { - $text = preg_replace("/\[share(.*?)avatar\s?=\s?'.*?'\s?(.*?)\]\s?(.*?)\s?\[\/share\]\s?/ism", "\n[share$1$2]$3[/share]", $text); - } - - // Remove linefeeds inside of the table elements. See issue #6799 - $search = [ - "\n[th]", "[th]\n", " [th]", "\n[/th]", "[/th]\n", "[/th] ", - "\n[td]", "[td]\n", " [td]", "\n[/td]", "[/td]\n", "[/td] ", - "\n[tr]", "[tr]\n", " [tr]", "[tr] ", "\n[/tr]", "[/tr]\n", " [/tr]", "[/tr] ", - "\n[hr]", "[hr]\n", " [hr]", "[hr] ", - "\n[attachment ", " [attachment ", "\n[/attachment]", "[/attachment]\n", " [/attachment]", "[/attachment] ", - "[table]\n", "[table] ", " [table]", "\n[/table]", " [/table]", "[/table] ", - " \n", "\t\n", "[/li]\n", "\n[li]", "\n[*]", - ]; - $replace = [ - "[th]", "[th]", "[th]", "[/th]", "[/th]", "[/th]", - "[td]", "[td]", "[td]", "[/td]", "[/td]", "[/td]", - "[tr]", "[tr]", "[tr]", "[tr]", "[/tr]", "[/tr]", "[/tr]", "[/tr]", - "[hr]", "[hr]", "[hr]", "[hr]", - "[attachment ", "[attachment ", "[/attachment]", "[/attachment]", "[/attachment]", "[/attachment]", - "[table]", "[table]", "[table]", "[/table]", "[/table]", "[/table]", - "\n", "\n", "[/li]", "[li]", "[*]", - ]; - do { - $oldtext = $text; - $text = str_replace($search, $replace, $text); - } while ($oldtext != $text); - - // Replace these here only once - $search = ["\n[table]", "[/table]\n"]; - $replace = ["[table]", "[/table]"]; - $text = str_replace($search, $replace, $text); - - // Trim new lines regardless of the system.remove_multiplicated_lines config value - $text = trim($text, "\n"); - - // removing multiplicated newlines - if (DI::config()->get('system', 'remove_multiplicated_lines')) { - $search = [ - "\n\n\n", "[/quote]\n\n", "\n[/quote]", "\n[ul]", "[/ul]\n", "\n[ol]", "[/ol]\n", "\n\n[share ", "[/attachment]\n", - "\n[h1]", "[/h1]\n", "\n[h2]", "[/h2]\n", "\n[h3]", "[/h3]\n", "\n[h4]", "[/h4]\n", "\n[h5]", "[/h5]\n", "\n[h6]", "[/h6]\n" - ]; - $replace = [ - "\n\n", "[/quote]\n", "[/quote]", "[ul]", "[/ul]", "[ol]", "[/ol]", "\n[share ", "[/attachment]", - "[h1]", "[/h1]", "[h2]", "[/h2]", "[h3]", "[/h3]", "[h4]", "[/h4]", "[h5]", "[/h5]", "[h6]", "[/h6]" - ]; - do { - $oldtext = $text; - $text = str_replace($search, $replace, $text); - } while ($oldtext != $text); - } - - /// @todo Have a closer look at the different html modes - // Handle attached links or videos - if ($simple_html == self::NPF) { - $text = self::removeAttachment($text); - } elseif (in_array($simple_html, [self::MASTODON_API, self::TWITTER_API, self::ACTIVITYPUB])) { - $text = self::replaceAttachment($text); - } elseif (!in_array($simple_html, [self::INTERNAL, self::EXTERNAL, self::CONNECTORS])) { - $text = self::replaceAttachment($text, true); - } else { - $text = self::convertAttachment($text, $simple_html, $try_oembed, [], $uriid); - } - - $nosmile = strpos($text, '[nosmile]') !== false; - $text = str_replace('[nosmile]', '', $text); - - // Replace non graphical smilies for external posts - if (!$nosmile) { - $text = self::performWithEscapedTags($text, ['url', 'img', 'audio', 'video', 'youtube', 'vimeo', 'share', 'attachment', 'iframe', 'bookmark'], function ($text) use ($simple_html, $for_plaintext) { - return Smilies::replace($text, ($simple_html != self::INTERNAL) || $for_plaintext); - }); - } - - // leave open the possibility of [map=something] - // this is replaced in Item::prepareBody() which has knowledge of the item location - if (strpos($text, '[/map]') !== false) { - $text = preg_replace_callback( - "/\[map\](.*?)\[\/map\]/ism", - function ($match) use ($simple_html) { - return str_replace($match[0], '

' . Map::byLocation($match[1], $simple_html) . '

', $match[0]); - }, - $text - ); - } - - if (strpos($text, '[map=') !== false) { - $text = preg_replace_callback( - "/\[map=(.*?)\]/ism", - function ($match) use ($simple_html) { - return str_replace($match[0], '

' . Map::byCoordinates(str_replace('/', ' ', $match[1]), $simple_html) . '

', $match[0]); - }, - $text - ); - } - - if (strpos($text, '[map]') !== false) { - $text = preg_replace("/\[map\]/", '

', $text); - } - - // Check for headers - - if ($simple_html == self::INTERNAL) { - //Ensure to always start with

if possible - $heading_count = 0; - for ($level = 6; $level > 0; $level--) { - if (preg_match("(\[h$level\].*?\[\/h$level\])ism", $text)) { - $heading_count++; - } + // Now the structural elements are converted + $text = self::convertHeaderToHtml($text, $simple_html); + $text = self::convertStylesToHtml($text, $simple_html); + $text = self::convertListsToHtml($text); + $text = self::convertTablesToHtml($text); + $text = self::convertSpoilersToHtml($text); + $text = self::convertStructuresToHtml($text); + + // We add URL without a surrounding URL at this time, since at a earlier stage it would had been too early, + // since the used regular expression won't touch URL inside of BBCode elements, but with the structural ones it should. + // At a later stage we won't be able to exclude certain parts of the code. + $text = self::performWithEscapedTags($text, ['url', 'img', 'audio', 'video', 'youtube', 'vimeo', 'share', 'attachment', 'iframe', 'bookmark', 'map', 'oembed'], function ($text) use ($simple_html, $for_plaintext) { + if (!$for_plaintext) { + $text = preg_replace(Strings::autoLinkRegEx(), '[url]$1[/url]', $text); } - if ($heading_count > 0) { - $heading = min($heading_count + 3, 6); - for ($level = 6; $level > 0; $level--) { - if (preg_match("(\[h$level\].*?\[\/h$level\])ism", $text)) { - $text = preg_replace("(\[h$level\](.*?)\[\/h$level\])ism", "

$1

", $text); - $heading--; - } - } - } - } else { - $text = preg_replace("(\[h1\](.*?)\[\/h1\])ism", '

$1

', $text); - $text = preg_replace("(\[h2\](.*?)\[\/h2\])ism", '

$1

', $text); - $text = preg_replace("(\[h3\](.*?)\[\/h3\])ism", '

$1

', $text); - $text = preg_replace("(\[h4\](.*?)\[\/h4\])ism", '

$1

', $text); - $text = preg_replace("(\[h5\](.*?)\[\/h5\])ism", '

$1

', $text); - $text = preg_replace("(\[h6\](.*?)\[\/h6\])ism", '

$1

', $text); - } + return self::convertSmileysToHtml($text, $simple_html, $for_plaintext); + }); + + // Now for some more complex BBCode elements (mostly non standard ones) + $text = self::convertAttachmentsToHtml($text, $simple_html, $try_oembed, $uriid); + $text = self::convertMapsToHtml($text, $simple_html); + $text = self::convertQuotesToHtml($text); + $text = self::convertVideoPlatformsToHtml($text, $try_oembed); + $text = self::convertOEmbedToHtml($text, $uriid); + $text = self::convertEventsToHtml($text, $simple_html, $uriid); - // Check for paragraph - $text = preg_replace("(\[p\](.*?)\[\/p\])ism", '

$1

', $text); - - // Check for bold text - $text = preg_replace("(\[b\](.*?)\[\/b\])ism", '$1', $text); - - // Check for Italics text - $text = preg_replace("(\[i\](.*?)\[\/i\])ism", '$1', $text); - - // Check for Underline text - $text = preg_replace("(\[u\](.*?)\[\/u\])ism", '$1', $text); - - // Check for strike-through text - $text = preg_replace("(\[s\](.*?)\[\/s\])ism", '$1', $text); - - // Check for over-line text - $text = preg_replace("(\[o\](.*?)\[\/o\])ism", '$1', $text); - - // Check for colored text - $text = preg_replace("(\[color=(.*?)\](.*?)\[\/color\])ism", "$2", $text); - - // Check for sized text - // [size=50] --> font-size: 50px (with the unit). - if ($simple_html != self::DIASPORA) { - $text = preg_replace("(\[size=(\d*?)\](.*?)\[\/size\])ism", '$2', $text); - $text = preg_replace("(\[size=(.*?)\](.*?)\[\/size\])ism", '$2', $text); - } else { - // Issue 2199: Diaspora doesn't interpret the construct above, nor the or element - $text = preg_replace("(\[size=(.*?)\](.*?)\[\/size\])ism", "$2", $text); - } - - - // Check for centered text - $text = preg_replace("(\[center\](.*?)\[\/center\])ism", '
$1
', $text); - - // Check for block-level custom CSS - $text = preg_replace('#(?<=^|\n)\[style=(.*?)](.*?)\[/style](?:\n|$)#ism', '
$2
', $text); - - // Check for inline custom CSS - $text = preg_replace("(\[style=(.*?)\](.*?)\[\/style\])ism", '$2', $text); - - // Mastodon Emoji (internal tag, do not document for users) - if ($simple_html == self::MASTODON_API) { - $text = preg_replace("(\[emoji=(.*?)](.*?)\[/emoji])ism", '$2', $text); - } else { - $text = preg_replace("(\[emoji=(.*?)](.*?)\[/emoji])ism", '$2', $text); - } - - // Check for CSS classes - // @deprecated since 2021.12, left for backward-compatibility reasons - $text = preg_replace("(\[class=(.*?)\](.*?)\[\/class\])ism", '$2', $text); - // Add HTML new lines - $text = str_replace("\n\n", '

', $text); - $text = str_replace("\n", '
', $text); - - // handle nested lists - $endlessloop = 0; - - while ((((strpos($text, "[/list]") !== false) && (strpos($text, "[list") !== false)) || - ((strpos($text, "[/ol]") !== false) && (strpos($text, "[ol]") !== false)) || - ((strpos($text, "[/ul]") !== false) && (strpos($text, "[ul]") !== false)) || - ((strpos($text, "[/li]") !== false) && (strpos($text, "[li]") !== false))) && (++$endlessloop < 20)) { - $text = preg_replace("/\[list\](.*?)\[\/list\]/ism", '

', $text); - $text = preg_replace("/\[list=\](.*?)\[\/list\]/ism", '

', $text); - $text = preg_replace("/\[list=1\](.*?)\[\/list\]/ism", '

', $text); - $text = preg_replace("/\[list=((?-i)i)\](.*?)\[\/list\]/ism", '

', $text); - $text = preg_replace("/\[list=((?-i)I)\](.*?)\[\/list\]/ism", '

', $text); - $text = preg_replace("/\[list=((?-i)a)\](.*?)\[\/list\]/ism", '

', $text); - $text = preg_replace("/\[list=((?-i)A)\](.*?)\[\/list\]/ism", '

', $text); - $text = preg_replace("/\[ul\](.*?)\[\/ul\]/ism", '

', $text); - $text = preg_replace("/\[ol\](.*?)\[\/ol\]/ism", '

    $1

', $text); - $text = preg_replace("/\[li\](.*?)\[\/li\]/ism", '

  • $1
  • ', $text); - } - - // Check for list text - $text = str_replace("[*]", "
  • ", $text); - $text = str_replace("[li]", "
  • ", $text); - - $text = preg_replace("/\[th\](.*?)\[\/th\]/sm", '$1', $text); - $text = preg_replace("/\[td\](.*?)\[\/td\]/sm", '$1', $text); - $text = preg_replace("/\[tr\](.*?)\[\/tr\]/sm", '$1', $text); - $text = preg_replace("/\[table\](.*?)\[\/table\]/sm", '

    $1

    ', $text); - - $text = preg_replace("/\[table border=1\](.*?)\[\/table\]/sm", '

    $1

    ', $text); - $text = preg_replace("/\[table border=0\](.*?)\[\/table\]/sm", '

    $1

    ', $text); - - $text = str_replace('[hr]', '


    ', $text); - - if (!$for_plaintext) { - $text = self::performWithEscapedTags($text, ['url', 'img', 'audio', 'video', 'youtube', 'vimeo', 'share', 'attachment', 'iframe', 'bookmark'], function ($text) { - return preg_replace(Strings::autoLinkRegEx(), '[url]$1[/url]', $text); - }); - } - - // Check for font change text - $text = preg_replace("/\[font=(.*?)\](.*?)\[\/font\]/sm", "$2", $text); - - // Declare the format for [spoiler] layout - $SpoilerLayout = '

    ' . DI::l10n()->t('Click to open/close') . '$1
    '; - - // Check for [spoiler] text - // handle nested quotes - $endlessloop = 0; - while ((strpos($text, "[/spoiler]") !== false) && (strpos($text, "[spoiler]") !== false) && (++$endlessloop < 20)) { - $text = preg_replace("/\[spoiler\](.*?)\[\/spoiler\]/ism", $SpoilerLayout, $text); - } - - // Check for [spoiler=Title] text - - // handle nested quotes - $endlessloop = 0; - while ((strpos($text, "[/spoiler]") !== false) && (strpos($text, "[spoiler=") !== false) && (++$endlessloop < 20)) { - $text = preg_replace( - "/\[spoiler=[\"\']*(.*?)[\"\']*\](.*?)\[\/spoiler\]/ism", - '
    $1$2
    ', - $text - ); - } - - // Declare the format for [quote] layout - $QuoteLayout = '

    $1

    '; - - // Check for [quote] text - // handle nested quotes - $endlessloop = 0; - while ((strpos($text, "[/quote]") !== false) && (strpos($text, "[quote]") !== false) && (++$endlessloop < 20)) { - $text = preg_replace("/\[quote\](.*?)\[\/quote\]/ism", "$QuoteLayout", $text); - } - - // Check for [quote=Author] text - - $t_wrote = DI::l10n()->t('$1 wrote:'); - - // handle nested quotes - $endlessloop = 0; - while ((strpos($text, "[/quote]") !== false) && (strpos($text, "[quote=") !== false) && (++$endlessloop < 20)) { - $text = preg_replace( - "/\[quote=[\"\']*(.*?)[\"\']*\](.*?)\[\/quote\]/ism", - "

    " . $t_wrote . "

    $2
    ", - $text - ); - } - - - // [img=widthxheight]image source[/img] - $text = preg_replace_callback( - "/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", - function ($matches) use ($simple_html, $uriid) { - if (strpos($matches[3], "data:image/") === 0) { - return $matches[0]; - } - - $matches[3] = self::proxyUrl($matches[3], $simple_html, $uriid); - return "[img=" . $matches[1] . "x" . $matches[2] . "]" . $matches[3] . "[/img]"; - }, - $text - ); - - $text = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '', $text); - $text = preg_replace("/\[zmg\=([0-9]*)x([0-9]*)\](.*?)\[\/zmg\]/ism", '', $text); - - $text = preg_replace_callback( - "/\[[iz]mg\=(.*?)\](.*?)\[\/[iz]mg\]/ism", - function ($matches) use ($simple_html, $uriid) { - $matches[1] = self::proxyUrl($matches[1], $simple_html, $uriid); - $alt = htmlspecialchars($matches[2], ENT_COMPAT); - // Fix for Markdown problems with Diaspora, see issue #12701 - if (($simple_html != self::DIASPORA) || strpos($matches[2], '"') === false) { - return '' . $alt . ''; - } else { - return '' . $alt . ''; - } - }, - $text - ); - - // Images - // [img]pathtoimage[/img] - $text = preg_replace_callback( - "/\[[iz]mg\](.*?)\[\/[iz]mg\]/ism", - function ($matches) use ($simple_html, $uriid) { - if (strpos($matches[1], "data:image/") === 0) { - return $matches[0]; - } - - $matches[1] = self::proxyUrl($matches[1], $simple_html, $uriid); - return "[img]" . $matches[1] . "[/img]"; - }, - $text - ); - - $text = preg_replace("/\[img\](.*?)\[\/img\]/ism", '' . DI::l10n()->t('Image/photo') . '', $text); - $text = preg_replace("/\[zmg\](.*?)\[\/zmg\]/ism", '' . DI::l10n()->t('Image/photo') . '', $text); - - $text = self::convertImages($text, $simple_html, $uriid); - - $text = preg_replace("/\[crypt\](.*?)\[\/crypt\]/ism", '
    ' . DI::l10n()->t('Encrypted content') . '
    ', $text); - $text = preg_replace("/\[crypt(.*?)\](.*?)\[\/crypt\]/ism", '
    ' . DI::l10n()->t('Encrypted content') . '
    ', $text); - //$text = preg_replace("/\[crypt=(.*?)\](.*?)\[\/crypt\]/ism", '
    ' . DI::l10n()->t('Encrypted content') . '
    ', $text); - - // Simplify "video" element - $text = preg_replace('(\[video[^\]]*?\ssrc\s?=\s?([^\s\]]+)[^\]]*?\].*?\[/video\])ism', '[video]$1[/video]', $text); - - $text = preg_replace_callback("/\[(video)\](.*?)\[\/video\]/ism", [self::class, 'sanitizeLinksCallback'], $text); - $text = preg_replace_callback("/\[(audio)\](.*?)\[\/audio\]/ism", [self::class, 'sanitizeLinksCallback'], $text); - - if ($simple_html == self::NPF) { - $text = preg_replace( - "/\[video\](.*?)\[\/video\]/ism", - '

    ', - $text - ); - $text = preg_replace( - "/\[audio\](.*?)\[\/audio\]/ism", - '

    ', - $text - ); - } elseif ($try_oembed) { - // html5 video and audio - $text = preg_replace( - "/\[video\](.*?\.(ogg|ogv|oga|ogm|webm|mp4).*?)\[\/video\]/ism", - '', - $text - ); - - $text = preg_replace_callback("/\[video\](.*?)\[\/video\]/ism", $try_oembed_callback, $text); - $text = preg_replace_callback("/\[audio\](.*?)\[\/audio\]/ism", $try_oembed_callback, $text); - - $text = preg_replace( - "/\[video\](.*?)\[\/video\]/ism", - '$1', - $text - ); - $text = preg_replace("/\[audio\](.*?)\[\/audio\]/ism", '', $text); - } else { - $text = preg_replace( - "/\[video\](.*?)\[\/video\]/ism", - '$1', - $text - ); - $text = preg_replace( - "/\[audio\](.*?)\[\/audio\]/ism", - '$1', - $text - ); - } - - // Backward compatibility, [iframe] support has been removed in version 2020.12 - $text = preg_replace_callback("/\[(iframe)\](.*?)\[\/iframe\]/ism", [self::class, 'sanitizeLinksCallback'], $text); - $text = preg_replace("/\[iframe\](.*?)\[\/iframe\]/ism", '$1', $text); - - $text = self::normalizeVideoLinks($text); - - // Youtube extensions - if ($try_oembed && OEmbed::isAllowedURL('https://www.youtube.com/embed/')) { - $text = preg_replace("/\[youtube\]([A-Za-z0-9\-_=]+)(.*?)\[\/youtube\]/ism", '', $text); - } else { - $text = preg_replace( - "/\[youtube\]([A-Za-z0-9\-_=]+)(.*?)\[\/youtube\]/ism", - 'https://www.youtube.com/watch?v=$1', - $text - ); - } - - // Vimeo extensions - if ($try_oembed && OEmbed::isAllowedURL('https://player.vimeo.com/video')) { - $text = preg_replace("/\[vimeo\]([0-9]+)(.*?)\[\/vimeo\]/ism", '', $text); - } else { - $text = preg_replace( - "/\[vimeo\]([0-9]+)(.*?)\[\/vimeo\]/ism", - 'https://vimeo.com/$1', - $text - ); - } - - // oembed tag - $text = OEmbed::BBCode2HTML($text, $uriid); - - // Avoid triple linefeeds through oembed - $text = str_replace("


    ", "

    ", $text); - - // If we found an event earlier, strip out all the event code and replace with a reformatted version. - // Replace the event-start section with the entire formatted event. The other bbcode is stripped. - // Summary (e.g. title) is required, earlier revisions only required description (in addition to - // start which is always required). Allow desc with a missing summary for compatibility. - - if ((!empty($ev['desc']) || !empty($ev['summary'])) && !empty($ev['start'])) { - $sub = Event::getHTML($ev, $simple_html, $uriid); - - $text = preg_replace("/\[event\-summary\](.*?)\[\/event\-summary\]/ism", '', $text); - $text = preg_replace("/\[event\-description\](.*?)\[\/event\-description\]/ism", '', $text); - $text = preg_replace("/\[event\-start\](.*?)\[\/event\-start\]/ism", $sub, $text); - $text = preg_replace("/\[event\-finish\](.*?)\[\/event\-finish\]/ism", '', $text); - $text = preg_replace("/\[event\-location\](.*?)\[\/event\-location\]/ism", '', $text); - $text = preg_replace("/\[event\-id\](.*?)\[\/event\-id\]/ism", '', $text); - } + // Some simpler non standard elements + $text = self::convertEmojisToHtml($text, $simple_html); + $text = self::convertCryptToHtml($text); + $text = self::convertIFramesToHtml($text); + $text = self::convertMailToHtml($text); + $text = self::convertAudioVideoToHtml($text, $simple_html, $try_oembed, $try_oembed_callback); + + // At last, some standard elements. URL has to go last, + // since some previous conversions use URL elements. + $text = self::convertImagesToHtml($text, $simple_html, $uriid); + $text = self::convertUrlToHtml($text, $simple_html, $for_plaintext, $try_oembed, $try_oembed_callback); + // If the post only consists of an emoji, we display it larger than normal. if (!$for_plaintext && DI::config()->get('system', 'big_emojis') && ($simple_html != self::DIASPORA) && Smilies::isEmojiPost($text)) { $text = '' . $text . ''; } - $text = self::convertUrlToHtml($text, $simple_html, $for_plaintext, $try_oembed, $try_oembed_callback); + // Sanitize the created HTML. + $text = self::cleanupHtml($text); - // we may need to restrict this further if it picks up too many strays - // link acct:user@host to a webfinger profile redirector + // This needs to be called after the cleanup, since otherwise some links are invalidated + $text = self::convertSharesToHtml($text, $simple_html, $try_oembed, $uriid); - $text = preg_replace('/acct:([^@]+)@((?!\-)(?:[a-zA-Z\d\-]{0,62}[a-zA-Z\d]\.){1,126}(?!\d+)[a-zA-Z\d]{1,63})/', 'acct:$1@$2', $text); - - // Perform MAIL Search - $text = preg_replace_callback("/\[(mail)\](.*?)\[\/mail\]/ism", [self::class, 'sanitizeLinksCallback'], $text); - $text = preg_replace("/\[mail\](.*?)\[\/mail\]/", '$1', $text); - $text = preg_replace("/\[mail\=(.*?)\](.*?)\[\/mail\]/", '$2', $text); - - /// @todo What is the meaning of these lines? - $text = preg_replace('/\[\&\;([#a-z0-9]+)\;\]/', '&$1;', $text); - $text = preg_replace('/\&\#039\;/', '\'', $text); - - // Currently deactivated, it made problems with " inside of alt texts. - //$text = preg_replace('/\"\;/', '"', $text); - - // fix any escaped ampersands that may have been converted into links - $text = preg_replace('/\<([^>]*?)(src|href)=(.*?)\&\;(.*?)\>/ism', '<$1$2=$3&$4>', $text); - - // sanitizes src attributes (http and redir URLs for displaying in a web page, cid used for inline images in emails) - $allowed_src_protocols = ['//', 'http://', 'https://', 'contact/redir/', 'cid:']; - - array_walk($allowed_src_protocols, function (&$value) { - $value = preg_quote($value, '#'); - }); - - $text = preg_replace( - '#<([^>]*?)(src)="(?!' . implode('|', $allowed_src_protocols) . ')(.*?)"(.*?)>#ism', - '<$1$2=""$4 data-original-src="$3" class="invalid-src" title="' . DI::l10n()->t('Invalid source protocol') . '">', - $text - ); - - // sanitize href attributes (only allowlisted protocols URLs) - // default value for backward compatibility - $allowed_link_protocols = DI::config()->get('system', 'allowed_link_protocols', []); - - // Always allowed protocol even if config isn't set or not including it - $allowed_link_protocols[] = '//'; - $allowed_link_protocols[] = 'http://'; - $allowed_link_protocols[] = 'https://'; - $allowed_link_protocols[] = 'contact/redir/'; - - array_walk($allowed_link_protocols, function (&$value) { - $value = preg_quote($value, '#'); - }); - - $regex = '#<([^>]*?)(href)="(?!' . implode('|', $allowed_link_protocols) . ')(.*?)"(.*?)>#ism'; - $text = preg_replace($regex, '<$1$2="javascript:void(0)"$4 data-original-href="$3" class="invalid-href" title="' . DI::l10n()->t('Invalid link protocol') . '">', $text); - - // Shared content - $text = self::convertShare( - $text, - function (array $attributes, array $author_contact, $content, $is_quote_share) use ($simple_html) { - return self::convertShareCallback($attributes, $author_contact, $content, $is_quote_share, $simple_html); - }, - $uriid - ); - - $text = self::interpolateSavedImagesIntoItemBody($uriid, $text, $saved_image); - - return $text; + // Insert the previously extracted embedded image again. + return self::interpolateSavedImagesIntoItemBody($uriid, $text, $saved_image); }); // Escaped noparse, nobb, pre // Remove escaping tags and replace new lines that remain @@ -1918,6 +1448,523 @@ class BBCode return trim($text); } + private static function normaliseInput(string $text): string + { + // Remove the abstract element. It is a non visible element. + $text = self::stripAbstract($text); + + // Line ending normalisation + $text = str_replace("\r\n", "\n", $text); + + // Move new lines outside of tags + $text = preg_replace("#\[(\w*)](\n*)#ism", '$2[$1]', $text); + $text = preg_replace("#(\n*)\[/(\w*)]#ism", '[/$2]$1', $text); + + // Replace any html brackets with HTML Entities to prevent executing HTML or script + // Don't use strip_tags here because it breaks [url] search by replacing & with amp + + $text = str_replace("<", "<", $text); + $text = str_replace(">", ">", $text); + + // remove some newlines before the general conversion + $text = preg_replace("/\s?\[share(.*?)\]\s?(.*?)\s?\[\/share\]\s?/ism", "\n[share$1]$2[/share]\n", $text); + $text = preg_replace("/\s?\[quote(.*?)\]\s?(.*?)\s?\[\/quote\]\s?/ism", "\n[quote$1]$2[/quote]\n", $text); + + // Remove linefeeds inside of the table elements. See issue #6799 + $search = [ + "\n[th]", "[th]\n", " [th]", "\n[/th]", "[/th]\n", "[/th] ", + "\n[td]", "[td]\n", " [td]", "\n[/td]", "[/td]\n", "[/td] ", + "\n[tr]", "[tr]\n", " [tr]", "[tr] ", "\n[/tr]", "[/tr]\n", " [/tr]", "[/tr] ", + "\n[hr]", "[hr]\n", " [hr]", "[hr] ", + "\n[attachment ", " [attachment ", "\n[/attachment]", "[/attachment]\n", " [/attachment]", "[/attachment] ", + "[table]\n", "[table] ", " [table]", "\n[/table]", " [/table]", "[/table] ", + " \n", "\t\n", "[/li]\n", "\n[li]", "\n[*]", + ]; + $replace = [ + "[th]", "[th]", "[th]", "[/th]", "[/th]", "[/th]", + "[td]", "[td]", "[td]", "[/td]", "[/td]", "[/td]", + "[tr]", "[tr]", "[tr]", "[tr]", "[/tr]", "[/tr]", "[/tr]", "[/tr]", + "[hr]", "[hr]", "[hr]", "[hr]", + "[attachment ", "[attachment ", "[/attachment]", "[/attachment]", "[/attachment]", "[/attachment]", + "[table]", "[table]", "[table]", "[/table]", "[/table]", "[/table]", + "\n", "\n", "[/li]", "[li]", "[*]", + ]; + do { + $oldtext = $text; + $text = str_replace($search, $replace, $text); + } while ($oldtext != $text); + + // Replace these here only once + $search = ["\n[table]", "[/table]\n"]; + $replace = ["[table]", "[/table]"]; + $text = str_replace($search, $replace, $text); + + // Trim new lines regardless of the system.remove_multiplicated_lines config value + $text = trim($text, "\n"); + + // removing multiplicated newlines + if (DI::config()->get('system', 'remove_multiplicated_lines')) { + $search = [ + "\n\n\n", "[/quote]\n\n", "\n[/quote]", "\n[ul]", "[/ul]\n", "\n[ol]", "[/ol]\n", "\n\n[share ", "[/attachment]\n", + "\n[h1]", "[/h1]\n", "\n[h2]", "[/h2]\n", "\n[h3]", "[/h3]\n", "\n[h4]", "[/h4]\n", "\n[h5]", "[/h5]\n", "\n[h6]", "[/h6]\n" + ]; + $replace = [ + "\n\n", "[/quote]\n", "[/quote]", "[ul]", "[/ul]", "[ol]", "[/ol]", "\n[share ", "[/attachment]", + "[h1]", "[/h1]", "[h2]", "[/h2]", "[h3]", "[/h3]", "[h4]", "[/h4]", "[h5]", "[/h5]", "[h6]", "[/h6]" + ]; + do { + $oldtext = $text; + $text = str_replace($search, $replace, $text); + } while ($oldtext != $text); + } + + return $text; + } + + private static function convertEventsToHtml(string $text, int $simple_html, int $uriid): string + { + // If we find any event code, turn it into an event. + // After we're finished processing the bbcode we'll + // replace all of the event code with a reformatted version. + + $ev = Event::fromBBCode($text); + + // If we found an event earlier, strip out all the event code and replace with a reformatted version. + // Replace the event-start section with the entire formatted event. The other bbcode is stripped. + // Summary (e.g. title) is required, earlier revisions only required description (in addition to + // start which is always required). Allow desc with a missing summary for compatibility. + + if ((!empty($ev['desc']) || !empty($ev['summary'])) && !empty($ev['start'])) { + $sub = Event::getHTML($ev, $simple_html, $uriid); + + $text = preg_replace("/\[event\-summary\](.*?)\[\/event\-summary\]/ism", '', $text); + $text = preg_replace("/\[event\-description\](.*?)\[\/event\-description\]/ism", '', $text); + $text = preg_replace("/\[event\-start\](.*?)\[\/event\-start\]/ism", $sub, $text); + $text = preg_replace("/\[event\-finish\](.*?)\[\/event\-finish\]/ism", '', $text); + $text = preg_replace("/\[event\-location\](.*?)\[\/event\-location\]/ism", '', $text); + $text = preg_replace("/\[event\-id\](.*?)\[\/event\-id\]/ism", '', $text); + } + + return $text; + } + + private static function convertAttachmentsToHtml(string $text, int $simple_html, bool $try_oembed, int $uriid): string + { + /// @todo Have a closer look at the different html modes + // Handle attached links or videos + if ($simple_html == self::NPF) { + $text = self::removeAttachment($text); + } elseif (in_array($simple_html, [self::MASTODON_API, self::TWITTER_API, self::ACTIVITYPUB])) { + $text = self::replaceAttachment($text); + } elseif (!in_array($simple_html, [self::INTERNAL, self::EXTERNAL, self::CONNECTORS])) { + $text = self::replaceAttachment($text, true); + } else { + $text = self::convertAttachment($text, $simple_html, $try_oembed, [], $uriid); + } + + return $text; + } + + private static function convertMapsToHtml(string $text, int $simple_html): string + { + // leave open the possibility of [map=something] + // this is replaced in Item::prepareBody() which has knowledge of the item location + if (strpos($text, '[/map]') !== false) { + $text = preg_replace_callback( + "/\[map\](.*?)\[\/map\]/ism", + function ($match) use ($simple_html) { + return str_replace($match[0], '

    ' . Map::byLocation($match[1], $simple_html) . '

    ', $match[0]); + }, + $text + ); + } + + if (strpos($text, '[map=') !== false) { + $text = preg_replace_callback( + "/\[map=(.*?)\]/ism", + function ($match) use ($simple_html) { + return str_replace($match[0], '

    ' . Map::byCoordinates(str_replace('/', ' ', $match[1]), $simple_html) . '

    ', $match[0]); + }, + $text + ); + } + + if (strpos($text, '[map]') !== false) { + $text = preg_replace("/\[map\]/", '

    ', $text); + } + + return $text; + } + + private static function convertHeaderToHtml(string $text, int $simple_html): string + { + // Check for headers + + if ($simple_html == self::INTERNAL) { + //Ensure to always start with

    if possible + $heading_count = 0; + for ($level = 6; $level > 0; $level--) { + if (preg_match("(\[h$level\].*?\[\/h$level\])ism", $text)) { + $heading_count++; + } + } + if ($heading_count > 0) { + $heading = min($heading_count + 3, 6); + for ($level = 6; $level > 0; $level--) { + if (preg_match("(\[h$level\].*?\[\/h$level\])ism", $text)) { + $text = preg_replace("(\[h$level\](.*?)\[\/h$level\])ism", "

    $1

    ", $text); + $heading--; + } + } + } + } else { + $text = preg_replace("(\[h1\](.*?)\[\/h1\])ism", '

    $1

    ', $text); + $text = preg_replace("(\[h2\](.*?)\[\/h2\])ism", '

    $1

    ', $text); + $text = preg_replace("(\[h3\](.*?)\[\/h3\])ism", '

    $1

    ', $text); + $text = preg_replace("(\[h4\](.*?)\[\/h4\])ism", '

    $1

    ', $text); + $text = preg_replace("(\[h5\](.*?)\[\/h5\])ism", '

    $1

    ', $text); + $text = preg_replace("(\[h6\](.*?)\[\/h6\])ism", '

    $1

    ', $text); + } + + return $text; + } + + private static function convertEmojisToHtml(string $text, int $simple_html): string + { + // Mastodon Emoji (internal tag, do not document for users) + if ($simple_html == self::MASTODON_API) { + $text = preg_replace("(\[emoji=(.*?)](.*?)\[/emoji])ism", '$2', $text); + } else { + $text = preg_replace("(\[emoji=(.*?)](.*?)\[/emoji])ism", '$2', $text); + } + return $text; + } + + private static function convertStylesToHtml(string $text, int $simple_html): string + { + // Markdown is designed to pass through HTML elements that it can't handle itself, + // so that the other system would parse the original HTML element. + // But Diaspora has chosen not to do this and doesn't parse HTML elements. + // So we need to make some changes here. + if ($simple_html == BBCode::DIASPORA) { + $elements = ['big', 'small']; + foreach ($elements as $bbcode) { + $text = preg_replace("(\[" . $bbcode . "\](.*?)\[\/" . $bbcode . "\])ism", '$1', $text); + } + + $elements = ['del' => 's', 'ins' => 'em', 'kbd' => 'code', 'mark' => 'strong', + 'samp' => 'code', 'u' => 'em', 'var' => 'em']; + foreach ($elements as $bbcode => $html) { + $text = preg_replace("(\[" . $bbcode . "\](.*?)\[\/" . $bbcode . "\])ism", '<' . $html . '>$1', $text); + } + } + + // Several easy to replace HTML elements + // @todo add the new elements to the documentation by the end of 2024 so that most systems will support them. + $elements = ['b', 'del', 'em', 'i', 'ins', 'kbd', 'mark', + 's', 'samp', 'small', 'strong', 'sub', 'sup', 'u', 'var']; + foreach ($elements as $element) { + $text = preg_replace("(\[" . $element . "\](.*?)\[\/" . $element . "\])ism", '<' . $element . '>$1', $text); + } + + $text = preg_replace("(\[big\](.*?)\[\/big\])ism", "$1", $text); + + // Check for over-line text + $text = preg_replace("(\[o\](.*?)\[\/o\])ism", '$1', $text); + + // Check for colored text + $text = preg_replace("(\[color=(.*?)\](.*?)\[\/color\])ism", "$2", $text); + + // Check for sized text + // [size=50] --> font-size: 50px (with the unit). + if ($simple_html != self::DIASPORA) { + $text = preg_replace("(\[size=(\d*?)\](.*?)\[\/size\])ism", '$2', $text); + $text = preg_replace("(\[size=(.*?)\](.*?)\[\/size\])ism", '$2', $text); + } else { + // Issue 2199: Diaspora doesn't interpret the construct above, nor the or element + $text = preg_replace("(\[size=(.*?)\](.*?)\[\/size\])ism", "$2", $text); + } + + // Check for centered text + $text = preg_replace("(\[center\](.*?)\[\/center\])ism", '

    $1
    ', $text); + + // Check for block-level custom CSS + $text = preg_replace('#(?<=^|\n)\[style=(.*?)](.*?)\[/style](?:\n|$)#ism', '
    $2
    ', $text); + + // Check for inline custom CSS + $text = preg_replace("(\[style=(.*?)\](.*?)\[\/style\])ism", '$2', $text); + + // Check for CSS classes + // @deprecated since 2021.12, left for backward-compatibility reasons + $text = preg_replace("(\[class=(.*?)\](.*?)\[\/class\])ism", '$2', $text); + // Add HTML new lines + $text = str_replace("\n\n", '

    ', $text); + $text = str_replace("\n", '
    ', $text); + + // Check for font change text + $text = preg_replace("/\[font=(.*?)\](.*?)\[\/font\]/sm", "$2", $text); + + return $text; + } + + private static function convertTablesToHtml(string $text): string + { + $text = preg_replace("/\[th\](.*?)\[\/th\]/sm", '$1', $text); + $text = preg_replace("/\[td\](.*?)\[\/td\]/sm", '$1', $text); + $text = preg_replace("/\[tr\](.*?)\[\/tr\]/sm", '$1', $text); + $text = preg_replace("/\[table\](.*?)\[\/table\]/sm", '

    $1

    ', $text); + + $text = preg_replace("/\[table border=1\](.*?)\[\/table\]/sm", '

    $1

    ', $text); + $text = preg_replace("/\[table border=0\](.*?)\[\/table\]/sm", '

    $1

    ', $text); + + return $text; + } + + private static function convertListsToHtml(string $text): string + { + // handle nested lists + $endlessloop = 0; + + while ((((strpos($text, "[/list]") !== false) && (strpos($text, "[list") !== false)) || + ((strpos($text, "[/ol]") !== false) && (strpos($text, "[ol]") !== false)) || + ((strpos($text, "[/ul]") !== false) && (strpos($text, "[ul]") !== false)) || + ((strpos($text, "[/li]") !== false) && (strpos($text, "[li]") !== false))) && (++$endlessloop < 20)) { + $text = preg_replace("/\[list\](.*?)\[\/list\]/ism", '

      $1

    ', $text); + $text = preg_replace("/\[list=\](.*?)\[\/list\]/ism", '

      $1

    ', $text); + $text = preg_replace("/\[list=1\](.*?)\[\/list\]/ism", '

      $1

    ', $text); + $text = preg_replace("/\[list=((?-i)i)\](.*?)\[\/list\]/ism", '

      $2

    ', $text); + $text = preg_replace("/\[list=((?-i)I)\](.*?)\[\/list\]/ism", '

      $2

    ', $text); + $text = preg_replace("/\[list=((?-i)a)\](.*?)\[\/list\]/ism", '

      $2

    ', $text); + $text = preg_replace("/\[list=((?-i)A)\](.*?)\[\/list\]/ism", '

      $2

    ', $text); + $text = preg_replace("/\[ul\](.*?)\[\/ul\]/ism", '

      $1

    ', $text); + $text = preg_replace("/\[ol\](.*?)\[\/ol\]/ism", '

      $1

    ', $text); + $text = preg_replace("/\[li\](.*?)\[\/li\]/ism", '

  • $1
  • ', $text); + } + + // Check for list text + $text = str_replace("[*]", "
  • ", $text); + $text = str_replace("[li]", "
  • ", $text); + + return $text; + } + + private static function convertSpoilersToHtml(string $text): string + { + // Declare the format for [spoiler] layout + $SpoilerLayout = '
    ' . DI::l10n()->t('Click to open/close') . '$1
    '; + + // Check for [spoiler] text + // handle nested quotes + $endlessloop = 0; + while ((strpos($text, "[/spoiler]") !== false) && (strpos($text, "[spoiler]") !== false) && (++$endlessloop < 20)) { + $text = preg_replace("/\[spoiler\](.*?)\[\/spoiler\]/ism", $SpoilerLayout, $text); + } + + // Check for [spoiler=Title] text + + // handle nested quotes + $endlessloop = 0; + while ((strpos($text, "[/spoiler]") !== false) && (strpos($text, "[spoiler=") !== false) && (++$endlessloop < 20)) { + $text = preg_replace( + "/\[spoiler=[\"\']*(.*?)[\"\']*\](.*?)\[\/spoiler\]/ism", + '
    $1$2
    ', + $text + ); + } + + return $text; + } + + private static function convertStructuresToHtml(string $text): string + { + $text = preg_replace("(\[p\](.*?)\[\/p\])ism", '

    $1

    ', $text); + // Check for paragraph + return str_replace('[hr]', '


    ', $text); + } + + private static function convertSmileysToHtml(string $text, int $simple_html, bool $for_plaintext): string + { + if (strpos($text, '[nosmile]') !== false) { + $text = str_replace('[nosmile]', '', $text); + return $text; + } + + return Smilies::replace($text, ($simple_html != self::INTERNAL) || $for_plaintext); + } + + private static function convertQuotesToHtml(string $text): string + { + // Declare the format for [quote] layout + $QuoteLayout = '

    $1

    '; + + // Check for [quote] text + // handle nested quotes + $endlessloop = 0; + while ((strpos($text, "[/quote]") !== false) && (strpos($text, "[quote]") !== false) && (++$endlessloop < 20)) { + $text = preg_replace("/\[quote\](.*?)\[\/quote\]/ism", "$QuoteLayout", $text); + } + + // Check for [quote=Author] text + + $t_wrote = DI::l10n()->t('$1 wrote:'); + + // handle nested quotes + $endlessloop = 0; + while ((strpos($text, "[/quote]") !== false) && (strpos($text, "[quote=") !== false) && (++$endlessloop < 20)) { + $text = preg_replace( + "/\[quote=[\"\']*(.*?)[\"\']*\](.*?)\[\/quote\]/ism", + "

    " . $t_wrote . "

    $2
    ", + $text + ); + } + + return $text; + } + + private static function convertImagesToHtml(string $text, int $simple_html, int $uriid): string + { + // [img=widthxheight]image source[/img] + $text = preg_replace_callback( + "/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", + function ($matches) use ($simple_html, $uriid) { + if (strpos($matches[3], "data:image/") === 0) { + return $matches[0]; + } + + $matches[3] = self::proxyUrl($matches[3], $simple_html, $uriid); + return "[img=" . $matches[1] . "x" . $matches[2] . "]" . $matches[3] . "[/img]"; + }, + $text + ); + + $text = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '', $text); + $text = preg_replace("/\[zmg\=([0-9]*)x([0-9]*)\](.*?)\[\/zmg\]/ism", '', $text); + + $text = preg_replace_callback( + "/\[[iz]mg\=(.*?)\](.*?)\[\/[iz]mg\]/ism", + function ($matches) use ($simple_html, $uriid) { + $matches[1] = self::proxyUrl($matches[1], $simple_html, $uriid); + $alt = htmlspecialchars($matches[2], ENT_COMPAT); + // Fix for Markdown problems with Diaspora, see issue #12701 + if (($simple_html != self::DIASPORA) || strpos($matches[2], '"') === false) { + return '' . $alt . ''; + } else { + return '' . $alt . ''; + } + }, + $text + ); + + // Images + // [img]pathtoimage[/img] + $text = preg_replace_callback( + "/\[[iz]mg\](.*?)\[\/[iz]mg\]/ism", + function ($matches) use ($simple_html, $uriid) { + if (strpos($matches[1], "data:image/") === 0) { + return $matches[0]; + } + + $matches[1] = self::proxyUrl($matches[1], $simple_html, $uriid); + return "[img]" . $matches[1] . "[/img]"; + }, + $text + ); + + $text = preg_replace("/\[img\](.*?)\[\/img\]/ism", '' . DI::l10n()->t('Image/photo') . '', $text); + $text = preg_replace("/\[zmg\](.*?)\[\/zmg\]/ism", '' . DI::l10n()->t('Image/photo') . '', $text); + + $text = self::convertImages($text, $simple_html, $uriid); + + return $text; + } + + private static function convertCryptToHtml(string $text): string + { + $text = preg_replace("/\[crypt\](.*?)\[\/crypt\]/ism", '
    ' . DI::l10n()->t('Encrypted content') . '
    ', $text); + $text = preg_replace("/\[crypt(.*?)\](.*?)\[\/crypt\]/ism", '
    ' . DI::l10n()->t('Encrypted content') . '
    ', $text); + return $text; + } + + private static function convertAudioVideoToHtml(string $text, int $simple_html, bool $try_oembed, \Closure $try_oembed_callback): string + { + // Simplify "video" element + $text = preg_replace('(\[video[^\]]*?\ssrc\s?=\s?([^\s\]]+)[^\]]*?\].*?\[/video\])ism', '[video]$1[/video]', $text); + + $text = preg_replace_callback("/\[(video)\](.*?)\[\/video\]/ism", [self::class, 'sanitizeLinksCallback'], $text); + $text = preg_replace_callback("/\[(audio)\](.*?)\[\/audio\]/ism", [self::class, 'sanitizeLinksCallback'], $text); + + if ($simple_html == self::NPF) { + $text = preg_replace( + "/\[video\](.*?)\[\/video\]/ism", + '

    ', + $text + ); + $text = preg_replace( + "/\[audio\](.*?)\[\/audio\]/ism", + '

    ', + $text + ); + } elseif ($try_oembed) { + // html5 video and audio + $text = preg_replace( + "/\[video\](.*?\.(ogg|ogv|oga|ogm|webm|mp4).*?)\[\/video\]/ism", + '', + $text + ); + + $text = preg_replace_callback("/\[video\](.*?)\[\/video\]/ism", $try_oembed_callback, $text); + $text = preg_replace_callback("/\[audio\](.*?)\[\/audio\]/ism", $try_oembed_callback, $text); + + $text = preg_replace("/\[video\](.*?)\[\/video\]/ism", '[url]$1[/url]', $text); + $text = preg_replace("/\[audio\](.*?)\[\/audio\]/ism", '', $text); + } else { + $text = preg_replace("/\[video\](.*?)\[\/video\]/ism", '[url]$1[/url]', $text); + $text = preg_replace("/\[audio\](.*?)\[\/audio\]/ism", '[url]$1[/url]', $text); + } + return $text; + } + + private static function convertIFramesToHtml(string $text): string + { + // Backward compatibility, [iframe] support has been removed in version 2020.12 + $text = preg_replace_callback("/\[(iframe)\](.*?)\[\/iframe\]/ism", [self::class, 'sanitizeLinksCallback'], $text); + $text = preg_replace("/\[iframe\](.*?)\[\/iframe\]/ism", '[url]$1[/url]', $text); + + return $text; + } + + private static function convertVideoPlatformsToHtml(string $text, bool $try_oembed): string + { + $a = DI::app(); + $text = self::normalizeVideoLinks($text); + + // Youtube extensions + if ($try_oembed && OEmbed::isAllowedURL('https://www.youtube.com/embed/')) { + $text = preg_replace("/\[youtube\]([A-Za-z0-9\-_=]+)(.*?)\[\/youtube\]/ism", '', $text); + } else { + $text = preg_replace("/\[youtube\]([A-Za-z0-9\-_=]+)(.*?)\[\/youtube\]/ism", '[url]https://www.youtube.com/watch?v=$1[/url]', $text); + } + + // Vimeo extensions + if ($try_oembed && OEmbed::isAllowedURL('https://player.vimeo.com/video')) { + $text = preg_replace("/\[vimeo\]([0-9]+)(.*?)\[\/vimeo\]/ism", '', $text); + } else { + $text = preg_replace("/\[vimeo\]([0-9]+)(.*?)\[\/vimeo\]/ism", '[url]https://vimeo.com/$1[/url]', $text); + } + return $text; + } + + private static function convertOEmbedToHtml(string $text, int $uriid): string + { + // oembed tag + $text = OEmbed::BBCode2HTML($text, $uriid); + + // Avoid triple linefeeds through oembed + $text = str_replace("


    ", "

    ", $text); + + return $text; + } + private static function convertUrlToHtml(string $text, int $simple_html, bool $for_plaintext, bool $try_oembed, \Closure $try_oembed_callback): string { $text = preg_replace_callback("/\[(url)\](.*?)\[\/url\]/ism", [self::class, 'sanitizeLinksCallback'], $text); @@ -2046,7 +2093,10 @@ class BBCode $text = preg_replace("/\[url\=(" . preg_quote(DI::baseUrl(), '/') . ".*?)\](.*?)\[\/url\]/ism", '$2', $text); $text = preg_replace("/\[url\=(.*?)\](.*?)\[\/url\]/ism", '$2', $text); - return $text; + + // we may need to restrict this further if it picks up too many strays + // link acct:user@host to a webfinger profile redirector + return preg_replace('/acct:([^@]+)@((?!\-)(?:[a-zA-Z\d\-]{0,62}[a-zA-Z\d]\.){1,126}(?!\d+)[a-zA-Z\d]{1,63})/', 'acct:$1@$2', $text); } private static function escapeUrl(string $url): string @@ -2088,6 +2138,78 @@ class BBCode return $text; } + private static function convertMailToHtml(string $text): string + { + $text = preg_replace_callback("/\[(mail)\](.*?)\[\/mail\]/ism", [self::class, 'sanitizeLinksCallback'], $text); + $text = preg_replace("/\[mail\](.*?)\[\/mail\]/", '$1', $text); + $text = preg_replace("/\[mail\=(.*?)\](.*?)\[\/mail\]/", '$2', $text); + return $text; + } + + private static function convertSharesToHtml(string $text, int $simple_html, bool $try_oembed, int $uriid): string + { + // Shared content + // when the content is meant exporting to other systems then remove the avatar picture since this doesn't really look good on these systems + if (!$try_oembed) { + $text = preg_replace("/\[share(.*?)avatar\s?=\s?'.*?'\s?(.*?)\]\s?(.*?)\s?\[\/share\]\s?/ism", "\n[share$1$2]$3[/share]", $text); + } + + $text = self::convertShare( + $text, + function (array $attributes, array $author_contact, $content, $is_quote_share) use ($simple_html) { + return self::convertShareCallback($attributes, $author_contact, $content, $is_quote_share, $simple_html); + }, + $uriid + ); + + return $text; + } + + private static function cleanupHtml(string $text): string + { + /// @todo What is the meaning of these lines? + $text = preg_replace('/\[\&\;([#a-z0-9]+)\;\]/', '&$1;', $text); + $text = preg_replace('/\&\#039\;/', '\'', $text); + + // Currently deactivated, it made problems with " inside of alt texts. + //$text = preg_replace('/\"\;/', '"', $text); + + // fix any escaped ampersands that may have been converted into links + $text = preg_replace('/\<([^>]*?)(src|href)=(.*?)\&\;(.*?)\>/ism', '<$1$2=$3&$4>', $text); + + // sanitizes src attributes (http and redir URLs for displaying in a web page, cid used for inline images in emails) + $allowed_src_protocols = ['//', 'http://', 'https://', 'contact/redir/', 'cid:']; + + array_walk($allowed_src_protocols, function (&$value) { + $value = preg_quote($value, '#'); + }); + + $text = preg_replace( + '#<([^>]*?)(src)="(?!' . implode('|', $allowed_src_protocols) . ')(.*?)"(.*?)>#ism', + '<$1$2=""$4 data-original-src="$3" class="invalid-src" title="' . DI::l10n()->t('Invalid source protocol') . '">', + $text + ); + + // sanitize href attributes (only allowlisted protocols URLs) + // default value for backward compatibility + $allowed_link_protocols = DI::config()->get('system', 'allowed_link_protocols', []); + + // Always allowed protocol even if config isn't set or not including it + $allowed_link_protocols[] = '//'; + $allowed_link_protocols[] = 'http://'; + $allowed_link_protocols[] = 'https://'; + $allowed_link_protocols[] = 'contact/redir/'; + + array_walk($allowed_link_protocols, function (&$value) { + $value = preg_quote($value, '#'); + }); + + $regex = '#<([^>]*?)(href)="(?!' . implode('|', $allowed_link_protocols) . ')(.*?)"(.*?)>#ism'; + $text = preg_replace($regex, '<$1$2="javascript:void(0)"$4 data-original-href="$3" class="invalid-href" title="' . DI::l10n()->t('Invalid link protocol') . '">', $text); + + return $text; + } + /** * Strips the "abstract" tag from the provided text * diff --git a/src/Content/Text/HTML.php b/src/Content/Text/HTML.php index fd4a4b1d24..209b12eec4 100644 --- a/src/Content/Text/HTML.php +++ b/src/Content/Text/HTML.php @@ -253,13 +253,12 @@ class HTML self::tagToBBCode($doc, 'span', ['class' => 'type-link'], '[class=type-link]', '[/class]'); self::tagToBBCode($doc, 'span', ['class' => 'type-video'], '[class=type-video]', '[/class]'); - self::tagToBBCode($doc, 'strong', [], '[b]', '[/b]'); - self::tagToBBCode($doc, 'em', [], '[i]', '[/i]'); - self::tagToBBCode($doc, 'b', [], '[b]', '[/b]'); - self::tagToBBCode($doc, 'i', [], '[i]', '[/i]'); - self::tagToBBCode($doc, 'u', [], '[u]', '[/u]'); - self::tagToBBCode($doc, 's', [], '[s]', '[/s]'); - self::tagToBBCode($doc, 'del', [], '[s]', '[/s]'); + $elements = ['b', 'del', 'em', 'i', 'ins', 'kbd', 'mark', + 's', 'samp', 'strong', 'sub', 'sup', 'u', 'var']; + foreach ($elements as $element) { + self::tagToBBCode($doc, $element, [], '[' . $element . ']', '[/' . $element . ']'); + } + self::tagToBBCode($doc, 'strike', [], '[s]', '[/s]'); self::tagToBBCode($doc, 'big', [], "[size=large]", "[/size]"); diff --git a/tests/src/Factory/Api/Twitter/DirectMessageTest.php b/tests/src/Factory/Api/Twitter/DirectMessageTest.php index 4e9d94cafb..03f15f55e0 100644 --- a/tests/src/Factory/Api/Twitter/DirectMessageTest.php +++ b/tests/src/Factory/Api/Twitter/DirectMessageTest.php @@ -67,7 +67,7 @@ class DirectMessageTest extends FixtureTest ->toArray(); self::assertEquals('item_title', $directMessage['title']); - self::assertEquals('item_body', $directMessage['text']); + self::assertEquals('item_body', $directMessage['text']); } /** diff --git a/view/lang/C/messages.po b/view/lang/C/messages.po index 9fa16e1e5b..9955eab35b 100644 --- a/view/lang/C/messages.po +++ b/view/lang/C/messages.po @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: 2024.06-dev\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2024-04-07 16:31+0000\n" +"POT-Creation-Date: 2024-04-13 11:02+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" @@ -957,7 +957,7 @@ msgstr "" msgid "Enter user nickname: " msgstr "" -#: src/Console/User.php:182 src/Model/User.php:820 +#: src/Console/User.php:182 src/Model/User.php:822 #: src/Module/Api/Twitter/ContactEndpoint.php:74 #: src/Module/Moderation/Users/Active.php:71 #: src/Module/Moderation/Users/Blocked.php:71 @@ -1381,7 +1381,7 @@ msgstr "" msgid "Public post" msgstr "" -#: src/Content/Conversation.php:424 src/Content/Widget/VCard.php:130 +#: src/Content/Conversation.php:424 src/Content/Widget/VCard.php:131 #: src/Model/Profile.php:482 src/Module/Admin/Logs/View.php:92 #: src/Module/Post/Edit.php:181 msgid "Message" @@ -1733,7 +1733,7 @@ msgstr "" #: src/Content/Feature.php:130 src/Content/GroupManager.php:147 #: src/Content/Nav.php:278 src/Content/Text/HTML.php:881 -#: src/Content/Widget.php:538 src/Model/User.php:1386 +#: src/Content/Widget.php:538 src/Model/User.php:1388 msgid "Groups" msgstr "" @@ -2266,39 +2266,39 @@ msgstr "" msgid "last" msgstr "" -#: src/Content/Text/BBCode.php:767 src/Content/Text/BBCode.php:1764 -#: src/Content/Text/BBCode.php:1765 +#: src/Content/Text/BBCode.php:701 src/Content/Text/BBCode.php:1843 +#: src/Content/Text/BBCode.php:1844 msgid "Image/photo" msgstr "" -#: src/Content/Text/BBCode.php:985 +#: src/Content/Text/BBCode.php:919 #, php-format msgid "" "%2$s %3$s" msgstr "" -#: src/Content/Text/BBCode.php:1010 src/Model/Item.php:4014 -#: src/Model/Item.php:4020 src/Model/Item.php:4021 +#: src/Content/Text/BBCode.php:944 src/Model/Item.php:4021 +#: src/Model/Item.php:4027 src/Model/Item.php:4028 msgid "Link to source" msgstr "" -#: src/Content/Text/BBCode.php:1671 src/Content/Text/HTML.php:905 +#: src/Content/Text/BBCode.php:1724 src/Content/Text/HTML.php:905 msgid "Click to open/close" msgstr "" -#: src/Content/Text/BBCode.php:1704 +#: src/Content/Text/BBCode.php:1779 msgid "$1 wrote:" msgstr "" -#: src/Content/Text/BBCode.php:1769 src/Content/Text/BBCode.php:1770 +#: src/Content/Text/BBCode.php:1853 src/Content/Text/BBCode.php:1854 msgid "Encrypted content" msgstr "" -#: src/Content/Text/BBCode.php:2033 +#: src/Content/Text/BBCode.php:2159 msgid "Invalid source protocol" msgstr "" -#: src/Content/Text/BBCode.php:2052 +#: src/Content/Text/BBCode.php:2178 msgid "Invalid link protocol" msgstr "" @@ -2310,7 +2310,7 @@ msgstr "" msgid "The end" msgstr "" -#: src/Content/Text/HTML.php:860 src/Content/Widget/VCard.php:126 +#: src/Content/Text/HTML.php:860 src/Content/Widget/VCard.php:127 #: src/Model/Profile.php:476 src/Module/Contact/Profile.php:477 msgid "Follow" msgstr "" @@ -2483,27 +2483,27 @@ msgstr[1] "" msgid "More Trending Tags" msgstr "" -#: src/Content/Widget/VCard.php:104 src/Model/Contact.php:1205 +#: src/Content/Widget/VCard.php:105 src/Model/Contact.php:1205 #: src/Model/Profile.php:461 msgid "Post to group" msgstr "" -#: src/Content/Widget/VCard.php:109 src/Model/Contact.php:1209 +#: src/Content/Widget/VCard.php:110 src/Model/Contact.php:1209 #: src/Model/Profile.php:465 src/Module/Moderation/Item/Source.php:85 msgid "Mention" msgstr "" -#: src/Content/Widget/VCard.php:119 src/Model/Profile.php:380 +#: src/Content/Widget/VCard.php:120 src/Model/Profile.php:380 #: src/Module/Contact/Profile.php:413 src/Module/Profile/Profile.php:199 msgid "XMPP:" msgstr "" -#: src/Content/Widget/VCard.php:120 src/Model/Profile.php:381 +#: src/Content/Widget/VCard.php:121 src/Model/Profile.php:381 #: src/Module/Contact/Profile.php:415 src/Module/Profile/Profile.php:203 msgid "Matrix:" msgstr "" -#: src/Content/Widget/VCard.php:121 src/Model/Event.php:82 +#: src/Content/Widget/VCard.php:122 src/Model/Event.php:82 #: src/Model/Event.php:109 src/Model/Event.php:471 src/Model/Event.php:960 #: src/Model/Profile.php:375 src/Module/Contact/Profile.php:411 #: src/Module/Directory.php:147 src/Module/Notifications/Introductions.php:187 @@ -2511,18 +2511,18 @@ msgstr "" msgid "Location:" msgstr "" -#: src/Content/Widget/VCard.php:124 src/Model/Profile.php:489 +#: src/Content/Widget/VCard.php:125 src/Model/Profile.php:489 #: src/Module/Notifications/Introductions.php:201 msgid "Network:" msgstr "" -#: src/Content/Widget/VCard.php:128 src/Model/Contact.php:1237 +#: src/Content/Widget/VCard.php:129 src/Model/Contact.php:1237 #: src/Model/Contact.php:1249 src/Model/Profile.php:478 #: src/Module/Contact/Profile.php:469 msgid "Unfollow" msgstr "" -#: src/Content/Widget/VCard.php:134 src/Model/Contact.php:1207 +#: src/Content/Widget/VCard.php:135 src/Model/Contact.php:1207 #: src/Model/Profile.php:463 msgid "View group" msgstr "" @@ -3568,7 +3568,7 @@ msgstr[1] "" msgid "Poll end: %s" msgstr "" -#: src/Model/Item.php:3997 src/Model/Item.php:3998 +#: src/Model/Item.php:4004 src/Model/Item.php:4005 msgid "View on separate page" msgstr "" @@ -3726,145 +3726,145 @@ msgstr "" msgid "Contact information and Social Networks" msgstr "" -#: src/Model/User.php:229 src/Model/User.php:1299 +#: src/Model/User.php:231 src/Model/User.php:1301 msgid "SERIOUS ERROR: Generation of security keys failed." msgstr "" -#: src/Model/User.php:729 src/Model/User.php:762 +#: src/Model/User.php:731 src/Model/User.php:764 msgid "Login failed" msgstr "" -#: src/Model/User.php:794 +#: src/Model/User.php:796 msgid "Not enough information to authenticate" msgstr "" -#: src/Model/User.php:919 +#: src/Model/User.php:921 msgid "Password can't be empty" msgstr "" -#: src/Model/User.php:961 +#: src/Model/User.php:963 msgid "Empty passwords are not allowed." msgstr "" -#: src/Model/User.php:965 +#: src/Model/User.php:967 msgid "" "The new password has been exposed in a public data dump, please choose " "another." msgstr "" -#: src/Model/User.php:969 +#: src/Model/User.php:971 msgid "The password length is limited to 72 characters." msgstr "" -#: src/Model/User.php:973 +#: src/Model/User.php:975 msgid "The password can't contain white spaces nor accentuated letters" msgstr "" -#: src/Model/User.php:1182 +#: src/Model/User.php:1184 msgid "Passwords do not match. Password unchanged." msgstr "" -#: src/Model/User.php:1189 +#: src/Model/User.php:1191 msgid "An invitation is required." msgstr "" -#: src/Model/User.php:1193 +#: src/Model/User.php:1195 msgid "Invitation could not be verified." msgstr "" -#: src/Model/User.php:1201 +#: src/Model/User.php:1203 msgid "Invalid OpenID url" msgstr "" -#: src/Model/User.php:1214 src/Security/Authentication.php:230 +#: src/Model/User.php:1216 src/Security/Authentication.php:230 msgid "" "We encountered a problem while logging in with the OpenID you provided. " "Please check the correct spelling of the ID." msgstr "" -#: src/Model/User.php:1214 src/Security/Authentication.php:230 +#: src/Model/User.php:1216 src/Security/Authentication.php:230 msgid "The error message was:" msgstr "" -#: src/Model/User.php:1220 +#: src/Model/User.php:1222 msgid "Please enter the required information." msgstr "" -#: src/Model/User.php:1234 +#: src/Model/User.php:1236 #, php-format msgid "" "system.username_min_length (%s) and system.username_max_length (%s) are " "excluding each other, swapping values." msgstr "" -#: src/Model/User.php:1241 +#: src/Model/User.php:1243 #, php-format msgid "Username should be at least %s character." msgid_plural "Username should be at least %s characters." msgstr[0] "" msgstr[1] "" -#: src/Model/User.php:1245 +#: src/Model/User.php:1247 #, php-format msgid "Username should be at most %s character." msgid_plural "Username should be at most %s characters." msgstr[0] "" msgstr[1] "" -#: src/Model/User.php:1253 +#: src/Model/User.php:1255 msgid "That doesn't appear to be your full (First Last) name." msgstr "" -#: src/Model/User.php:1258 +#: src/Model/User.php:1260 msgid "Your email domain is not among those allowed on this site." msgstr "" -#: src/Model/User.php:1262 +#: src/Model/User.php:1264 msgid "Not a valid email address." msgstr "" -#: src/Model/User.php:1265 +#: src/Model/User.php:1267 msgid "The nickname was blocked from registration by the nodes admin." msgstr "" -#: src/Model/User.php:1269 src/Model/User.php:1275 +#: src/Model/User.php:1271 src/Model/User.php:1277 msgid "Cannot use that email." msgstr "" -#: src/Model/User.php:1281 +#: src/Model/User.php:1283 msgid "Your nickname can only contain a-z, 0-9 and _." msgstr "" -#: src/Model/User.php:1289 src/Model/User.php:1346 +#: src/Model/User.php:1291 src/Model/User.php:1348 msgid "Nickname is already registered. Please choose another." msgstr "" -#: src/Model/User.php:1333 src/Model/User.php:1337 +#: src/Model/User.php:1335 src/Model/User.php:1339 msgid "An error occurred during registration. Please try again." msgstr "" -#: src/Model/User.php:1360 +#: src/Model/User.php:1362 msgid "An error occurred creating your default profile. Please try again." msgstr "" -#: src/Model/User.php:1367 +#: src/Model/User.php:1369 msgid "An error occurred creating your self contact. Please try again." msgstr "" -#: src/Model/User.php:1372 +#: src/Model/User.php:1374 msgid "Friends" msgstr "" -#: src/Model/User.php:1376 +#: src/Model/User.php:1378 msgid "" "An error occurred creating your default contact circle. Please try again." msgstr "" -#: src/Model/User.php:1418 +#: src/Model/User.php:1420 msgid "Profile Photos" msgstr "" -#: src/Model/User.php:1600 +#: src/Model/User.php:1602 #, php-format msgid "" "\n" @@ -3872,7 +3872,7 @@ msgid "" "\t\t\tthe administrator of %2$s has set up an account for you." msgstr "" -#: src/Model/User.php:1603 +#: src/Model/User.php:1605 #, php-format msgid "" "\n" @@ -3908,12 +3908,12 @@ msgid "" "\t\tThank you and welcome to %4$s." msgstr "" -#: src/Model/User.php:1635 src/Model/User.php:1741 +#: src/Model/User.php:1637 src/Model/User.php:1743 #, php-format msgid "Registration details for %s" msgstr "" -#: src/Model/User.php:1655 +#: src/Model/User.php:1657 #, php-format msgid "" "\n" @@ -3929,12 +3929,12 @@ msgid "" "\t\t" msgstr "" -#: src/Model/User.php:1674 +#: src/Model/User.php:1676 #, php-format msgid "Registration at %s" msgstr "" -#: src/Model/User.php:1698 +#: src/Model/User.php:1700 #, php-format msgid "" "\n" @@ -3943,7 +3943,7 @@ msgid "" "\t\t\t" msgstr "" -#: src/Model/User.php:1706 +#: src/Model/User.php:1708 #, php-format msgid "" "\n" @@ -3981,7 +3981,7 @@ msgid "" "\t\t\tThank you and welcome to %2$s." msgstr "" -#: src/Model/User.php:1768 +#: src/Model/User.php:1770 msgid "" "User with delegates can't be removed, please remove delegate users first" msgstr "" @@ -8920,11 +8920,11 @@ msgstr "" msgid "Show unread" msgstr "" -#: src/Module/Notifications/Ping.php:223 +#: src/Module/Notifications/Ping.php:220 msgid "{0} requested registration" msgstr "" -#: src/Module/Notifications/Ping.php:232 +#: src/Module/Notifications/Ping.php:229 #, php-format msgid "{0} and %d others requested registration" msgstr ""