README updated, code restructured

2025-10-07 23:43:01 +00:00 · 2019-11-25 22:23:19 +00:00 · 2019-11-25 22:23:19 +00:00 · 9bc1f9cee6
commit 9bc1f9cee6
parent 0465de62dc
2 changed files with 60 additions and 128 deletions
--- a/discourse/README
+++ b/discourse/README
@ -1,97 +1,28 @@
-NSFW
+Discourse connector
+===================

-"Not safe for work"
+The Discourse connectors detects incoming mails from Discourse and
+improves them by fetching the content and user data via API.

-Scans the message content for the string 'nsfw' 
-(case insensitive) and if found replaces the content
-with a "click to open/close" link, default is closed.
+Prerequisites
+-------------
+The user has to configure the mail interface so that the user's mails
+can be fetched via Friendica. Then the user has to activate the
+mailing list mode in Discourse.

-If you click on the 'Not safe for work' addon under
-/settings/addon a text field appears, where you can
-extend the list of search terms. The terms must be
-seperated by commas.
+The mailing list mode in Discourse knows two different options:
+1. Get all posts - including your own. This will create duplicates
+   if you post via Friendica.
+2. Don't get your own posts. Then you will missing all your posts
+   that you made directly on Discourse. Since you cannot create
+   a new post via this connector (only comments are possible)
+   this is not a goog choice either.

-It is also possible to enter profile URLs as values.
-This is quite useful for the case, that you perhaps
-don't want to see postings by person_A, but person_B
-is one of your contacts and person_B used to reshare
-postings by person_A.
-
-You can also make use of regular expressions.
-They also have to be seperated by commas and the
-regex itself has to be enclosed with slashes:
-
-	... nsfw, /<REGEX>/, politics,...
-
---------------
-A few examples:
---------------
-
-1)
-Let's say you don't want to see postings which contain
-the term 'fake news'
-
-The term could appear in several ways:
-
-fakenews, fake news, fake_news, fake-news, f@ke news,
-f4ke news, f4k3 n3ws, and so on and so on and so on.
-
-You could write every possible version of it as single
-item into your NSFW-filter list, but this can also be
-done with a single regex, which matches all of them:
-
-	/f[@4a]k[3e][-_ ]n[3e]w[sz]/
-
-
-2)
-Another use case could be, that you are simply not
-interested in postings about christmas.
-
-	/christmas(?:[-_ ]?(?:tree|time|eve|pudding))?/
-
-
-ATTENTION:
-
-It is absolutely important, that you use grouping
-parentheses instead of capturing parentheses!!
-
-Grouping parentheses are:
-
-	(?: )
-
-If you use capturing parentheses, which are
-
-	( )
-
-it will produce errors and the regex won't work and
-at least your targets will not get collapsed.
-
-
-
-3)
-Another possibility is the usage of a so called
-'lookbehind' construct. I'll give an example followed
-by a descripton:
-
-	/(?<!the )\badvent\b/
-
-The \b is a word boundary, what matches the beginning
-and the end of a word. The simple pattern of 'advent'
-would match advent iteself, but also adventure.
-This can be prevented by
-
-	/\badvent\b/
-
-The first part of the regex above
-
-	(?<!the )
-
-is a negative lookbehind. It makes \badvent\b only
-match, if there is no 'the ' before \badvent\b or in
-words:
-
-It looks for 'advent', but doesn't match 'the advent'.
-
-
-For more informations take a look at the PCRE regex
-dialect.
+Known problems
+--------------
+- You can't create starting posts
+- Either you don't get your own posts you made directly on Discourse
+  or you do get duplicates for every post you made via Friendica.
+- Non public categories are currently only working via some workaround
+  without the API, which most likely will cause some content problems.
+- links to Discourse profiles in the posts are invalid.
--- a/discourse/discourse.php
+++ b/discourse/discourse.php
@ -25,18 +25,16 @@ Use Friendica\Util\DateTimeFormat;

 function discourse_install()
 {
-	Hook::register('email_getmessage',     __FILE__, 'discourse_email_getmessage');
-	Hook::register('email_getmessage_end', __FILE__, 'discourse_email_getmessage_end');
-	Hook::register('addon_settings',       __FILE__, 'discourse_addon_settings');
-	Hook::register('addon_settings_post',  __FILE__, 'discourse_addon_settings_post');
+	Hook::register('email_getmessage',    __FILE__, 'discourse_email_getmessage');
+	Hook::register('addon_settings',      __FILE__, 'discourse_settings');
+	Hook::register('addon_settings_post', __FILE__, 'discourse_settings_post');
 }

 function discourse_uninstall()
 {
-	Hook::unregister('email_getmessage',     __FILE__, 'discourse_email_getmessage');
-	Hook::unregister('email_getmessage_end', __FILE__, 'discourse_email_getmessage_end');
-	Hook::unregister('addon_settings',       __FILE__, 'discourse_addon_settings');
-	Hook::unregister('addon_settings_post',  __FILE__, 'discourse_addon_settings_post');
+	Hook::unregister('email_getmessage',        __FILE__, 'discourse_email_getmessage');
+	Hook::unregister('connector_settings',      __FILE__, 'discourse_settings');
+	Hook::unregister('connector_settings_post', __FILE__, 'discourse_settings_post');
 }

 function discourse_addon_settings(App $a, &$s)
@ -51,28 +49,35 @@ function discourse_email_getmessage(App $a, &$message)
 {
 //	Logger::info('Got raw message', $message);

-/*	if (preg_match('=topic/(.*)/(.*)@(.*)=', $message['item']['uri'], $matches)) {
-		Logger::info('Got post data', ['topic' => $matches[1], 'post' => $matches[2], 'host' => $matches[3]]);
-		if (discourse_fetch_post_from_api($message, $matches[2], $matches[3])) {
-			return;
-		}
+	// We do assume that all Discourse servers are running with SSL
+	if (preg_match('=topic/(.*\d)/(.*\d)@(.*)=', $message['item']['uri'], $matches) &&
+		discourse_fetch_post_from_api($message, $matches[2], $matches[3])) {
+		Logger::info('Fetched comment via API', ['host' => $matches[3], 'topic' => $matches[1], 'post' => $matches[2]]);
+		return;
 	}
-*/
+
+	if (preg_match('=topic/(.*\d)@(.*)=', $message['item']['uri'], $matches) &&
+		discourse_fetch_topic_from_api($message, 'https://' . $matches[2], $matches[1], 1)) {
+		discourse_fetch_post_from_api($message, $matches[2], $matches[3]);
+		Logger::info('Fetched starting post via API', ['host' => $matches[2], 'topic' => $matches[1]]);
+		return;
+	}
+
 	// Search in the text part for the link to the discourse entry and the text body
-	// The text body is used as alternative, if the fetched HTML isn't working
 	if (!empty($message['text'])) {
 		$message = discourse_get_text($message);
 	}

-	if (!empty($message['item']['plink'])) {
-		if (preg_match('=(http.*)/t/.*/(.*\d)/(.*\d)=', $message['item']['plink'], $matches)) {
-			if (discourse_fetch_topic_from_api($message, $matches[1], $matches[2], $matches[3])) {
-				return;
-			}
-		}
+	if (empty($message['item']['plink']) || !preg_match('=(http.*)/t/.*/(.*\d)/(.*\d)=', $message['item']['plink'], $matches)) {
+		Logger::info('This is no Discourse post');
 	}
-	Logger::info('Stop');
-die('Test');
+
+	if (discourse_fetch_topic_from_api($message, $matches[1], $matches[2], $matches[3])) {
+		Logger::info('Fetched post from via API', ['host' => $matches[1], 'topic' => $matches[2], 'id' => $matches[3]]);
+		return;
+	}
+
+	Logger::info('Fallback mode');
 	// Search in the HTML part for the discourse entry and the author profile
 	if (!empty($message['html'])) {
 		$message = discourse_get_html($message);
@ -98,8 +103,7 @@ function discourse_fetch_post($host, $topic, $pid)
 	$posts = $data['post_stream']['posts'];
 	foreach($posts as $post) {
 		if ($post['post_number'] != $pid) {
-			// Test
-			discourse_get_user($post, $host);
+			/// @todo Possibly fetch missing posts here
 			continue;
 		}
 		Logger::info('Got post data from topic', $post);
@ -146,9 +150,11 @@ function discourse_get_user($post, $hostaddr)
 {
 	$host = parse_url($hostaddr, PHP_URL_HOST);

+	// Currently unused contact fields:
+	// - display_username
+	// - user_id
+
 	$contact = [];
-	// display_username
-	// user_id
 	$contact['uid'] = 0;
 	$contact['network'] = Protocol::DISCOURSE;
 	$contact['name'] = $contact['nick'] = $post['username'];
@ -198,7 +204,7 @@ function discourse_process_post($message, $post, $hostaddr)

 	if ($post['post_number'] == 1) {
 		$message['item']['parent-uri'] = $message['item']['uri'] = 'topic/' . $post['topic_id'] . '@' . $host;
-		// To-Do: Thread information
+		/// @ToDo Fetch thread information
 	} else {
 		$message['item']['uri'] = 'topic/' . $post['topic_id'] . '/' . $post['id'] . '@' . $host;
 		unset($message['item']['title']);
@ -224,7 +230,7 @@ function discourse_get_html($message)

 	$xpath = new DomXPath($doc);

-	// Fetch the first 'div' before the 'hr' -hopefully this fits for all systems
+	// Fetch the first 'div' before the 'hr' - hopefully this fits for all systems
 	$result = $xpath->query("//hr//preceding::div[1]");
 	$div = $doc2->importNode($result->item(0), true);
 	$doc2->appendChild($div);
@ -232,7 +238,7 @@ function discourse_get_html($message)
 	Logger::info('Found html body', ['html' => $message['html']]);

 	$profile = discourse_get_profile($xpath);
-	if (!empty($profile)) {
+	if (!empty($profile['url'])) {
 		Logger::info('Found profile', $profile);
 		$message['item']['author-id'] = Contact::getIdForURL($profile['url'], 0, true, $profile);
 		$message['item']['author-link'] = $profile['url'];
@ -301,8 +307,3 @@ function discourse_get_profile($xpath)
 	}
 	return $profile;
 }
-
-function discourse_email_getmessage_end(App $a, &$message)
-{
-//	Logger::info('Got converted message', $message);
-}