2014-11-13 20:56:23 +00:00
|
|
|
<?php
|
|
|
|
/**
|
|
|
|
* Name: Leistungsschutzrecht
|
|
|
|
* Description: Only useful in germany: Remove data from snippets from members of the VG Media
|
|
|
|
* Version: 0.1
|
|
|
|
* Author: Michael Vogel <https://pirati.ca/profile/heluecht>
|
|
|
|
*/
|
|
|
|
|
2017-11-06 23:55:24 +00:00
|
|
|
use Friendica\Core\Config;
|
|
|
|
|
2014-11-13 20:56:23 +00:00
|
|
|
function leistungsschutzrecht_install() {
|
|
|
|
register_hook('cron', 'addon/leistungsschutzrecht/leistungsschutzrecht.php', 'leistungsschutzrecht_cron');
|
|
|
|
register_hook('getsiteinfo', 'addon/leistungsschutzrecht/leistungsschutzrecht.php', 'leistungsschutzrecht_getsiteinfo');
|
|
|
|
register_hook('page_info_data', 'addon/leistungsschutzrecht/leistungsschutzrecht.php', 'leistungsschutzrecht_getsiteinfo');
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
function leistungsschutzrecht_uninstall() {
|
|
|
|
unregister_hook('cron', 'addon/leistungsschutzrecht/leistungsschutzrecht.php', 'leistungsschutzrecht_cron');
|
|
|
|
unregister_hook('getsiteinfo', 'addon/leistungsschutzrecht/leistungsschutzrecht.php', 'leistungsschutzrecht_getsiteinfo');
|
|
|
|
unregister_hook('page_info_data', 'addon/leistungsschutzrecht/leistungsschutzrecht.php', 'leistungsschutzrecht_getsiteinfo');
|
|
|
|
}
|
|
|
|
|
|
|
|
function leistungsschutzrecht_getsiteinfo($a, &$siteinfo) {
|
|
|
|
if (!isset($siteinfo["url"]))
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (!leistungsschutzrecht_is_member_site($siteinfo["url"]))
|
|
|
|
return;
|
|
|
|
|
2015-09-26 09:55:59 +00:00
|
|
|
//$siteinfo["title"] = $siteinfo["url"];
|
|
|
|
$siteinfo["text"] = leistungsschutzrecht_cuttext($siteinfo["text"]);
|
2014-11-13 20:56:23 +00:00
|
|
|
unset($siteinfo["image"]);
|
|
|
|
unset($siteinfo["images"]);
|
|
|
|
unset($siteinfo["keywords"]);
|
|
|
|
}
|
|
|
|
|
2015-09-26 09:55:59 +00:00
|
|
|
function leistungsschutzrecht_cuttext($text) {
|
|
|
|
$text = str_replace(array("\r", "\n"), array(" ", " "), $text);
|
|
|
|
|
|
|
|
do {
|
|
|
|
$oldtext = $text;
|
|
|
|
$text = str_replace(" ", " ", $text);
|
|
|
|
} while ($oldtext != $text);
|
|
|
|
|
|
|
|
$words = explode(" ", $text);
|
|
|
|
|
|
|
|
$text = "";
|
|
|
|
$count = 0;
|
|
|
|
$limit = 7;
|
|
|
|
|
|
|
|
foreach ($words as $word) {
|
|
|
|
if ($text != "")
|
|
|
|
$text .= " ";
|
|
|
|
|
|
|
|
$text .= $word;
|
|
|
|
|
|
|
|
if (++$count >= $limit) {
|
|
|
|
if (sizeof($words) > $limit)
|
|
|
|
$text .= " ...";
|
|
|
|
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return $text;
|
|
|
|
}
|
|
|
|
|
2014-11-13 20:56:23 +00:00
|
|
|
function leistungsschutzrecht_fetchsites() {
|
|
|
|
require_once("include/network.php");
|
|
|
|
|
2017-07-02 19:23:58 +00:00
|
|
|
// This list works - but question is how current it is
|
|
|
|
$url = "http://leistungsschutzrecht-stoppen.d-64.org/blacklist.txt";
|
|
|
|
$sitelist = fetch_url($url);
|
|
|
|
$siteurls = explode(',', $sitelist);
|
|
|
|
|
2017-07-29 08:01:02 +00:00
|
|
|
$whitelist = array('tagesschau.de', 'heute.de', 'wdr.de');
|
|
|
|
|
2017-07-02 19:23:58 +00:00
|
|
|
$sites = array();
|
|
|
|
foreach ($siteurls AS $site) {
|
2017-07-29 08:01:02 +00:00
|
|
|
if (!in_array($site, $whitelist)) {
|
|
|
|
$sites[$site] = $site;
|
|
|
|
}
|
2017-07-02 19:23:58 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// I would prefer parsing the list from the original site, but I haven't found a list.
|
|
|
|
// The following stays here to possibly reenable it in the future without having to reinvent the wheel completely.
|
|
|
|
/*
|
2014-11-13 20:56:23 +00:00
|
|
|
$sites = array();
|
|
|
|
|
|
|
|
$url = "http://www.vg-media.de/lizenzen/digitale-verlegerische-angebote/wahrnehmungsberechtigte-digitale-verlegerische-angebote.html";
|
|
|
|
|
|
|
|
$site = fetch_url($url);
|
|
|
|
|
|
|
|
$doc = new DOMDocument();
|
|
|
|
@$doc->loadHTML($site);
|
|
|
|
|
|
|
|
$xpath = new DomXPath($doc);
|
|
|
|
$list = $xpath->query("//td/a");
|
|
|
|
foreach ($list as $node) {
|
|
|
|
$attr = array();
|
|
|
|
if ($node->attributes->length)
|
|
|
|
foreach ($node->attributes as $attribute)
|
|
|
|
$attr[$attribute->name] = $attribute->value;
|
|
|
|
|
|
|
|
if (isset($attr["href"])) {
|
|
|
|
$urldata = parse_url($attr["href"]);
|
|
|
|
|
2017-06-09 01:20:27 +00:00
|
|
|
if (isset($urldata["host"]) && !isset($urldata["path"])) {
|
2014-11-13 20:56:23 +00:00
|
|
|
$cleanedurlpart = explode("%", $urldata["host"]);
|
|
|
|
|
|
|
|
$hostname = explode(".", $cleanedurlpart[0]);
|
|
|
|
$site = $hostname[sizeof($hostname) - 2].".".$hostname[sizeof($hostname) - 1];
|
|
|
|
$sites[$site] = $site;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2017-07-02 19:23:58 +00:00
|
|
|
*/
|
2014-11-13 20:56:23 +00:00
|
|
|
|
|
|
|
if (sizeof($sites)) {
|
2017-11-06 23:55:24 +00:00
|
|
|
Config::set('leistungsschutzrecht','sites',$sites);
|
2014-11-13 20:56:23 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
function leistungsschutzrecht_is_member_site($url) {
|
2017-11-06 23:55:24 +00:00
|
|
|
$sites = Config::get('leistungsschutzrecht','sites');
|
2014-11-13 20:56:23 +00:00
|
|
|
|
|
|
|
if ($sites == "")
|
|
|
|
return(false);
|
|
|
|
|
|
|
|
if (sizeof($sites) == 0)
|
|
|
|
return(false);
|
|
|
|
|
|
|
|
$urldata = parse_url($url);
|
|
|
|
|
|
|
|
if (!isset($urldata["host"]))
|
|
|
|
return(false);
|
|
|
|
|
|
|
|
$cleanedurlpart = explode("%", $urldata["host"]);
|
|
|
|
|
|
|
|
$hostname = explode(".", $cleanedurlpart[0]);
|
|
|
|
$site = $hostname[sizeof($hostname) - 2].".".$hostname[sizeof($hostname) - 1];
|
|
|
|
|
|
|
|
return (isset($sites[$site]));
|
|
|
|
}
|
|
|
|
|
|
|
|
function leistungsschutzrecht_cron($a,$b) {
|
2017-11-06 23:55:24 +00:00
|
|
|
$last = Config::get('leistungsschutzrecht','last_poll');
|
2014-11-13 20:56:23 +00:00
|
|
|
|
|
|
|
if($last) {
|
|
|
|
$next = $last + 86400;
|
|
|
|
if($next > time()) {
|
|
|
|
logger('poll intervall not reached');
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
leistungsschutzrecht_fetchsites();
|
2017-11-06 23:55:24 +00:00
|
|
|
Config::set('leistungsschutzrecht','last_poll', time());
|
2014-11-13 20:56:23 +00:00
|
|
|
}
|
|
|
|
?>
|