Merge pull request #114 from mexon/retriever

Added retriever plugin
pull/115/merge
friendica 2013-04-03 17:37:32 -07:00
commit 3864d115c8
11 changed files with 1417 additions and 0 deletions

BIN
retriever.tgz Normal file

Binary file not shown.

35
retriever/database.sql Normal file
View File

@ -0,0 +1,35 @@
CREATE TABLE IF NOT EXISTS `retriever_rule` (
`id` int(11) unsigned NOT NULL AUTO_INCREMENT,
`uid` int(11) NOT NULL,
`contact-id` int(11) NOT NULL,
`data` mediumtext NOT NULL,
PRIMARY KEY (`id`),
KEY `uid` (`uid`),
KEY `contact-id` (`contact-id`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_bin;
CREATE TABLE IF NOT EXISTS `retriever_item` (
`id` int(11) unsigned NOT NULL AUTO_INCREMENT,
`item-uri` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NOT NULL,
`item-uid` int(10) unsigned NOT NULL DEFAULT '0',
`contact-id` int(10) unsigned NOT NULL DEFAULT '0',
`resource` int(11) NOT NULL,
`parent` int(11) NOT NULL,
`finished` tinyint(1) unsigned NOT NULL DEFAULT '0',
KEY `resource` (`resource`),
KEY `all` (`item-uri`, `item-uid`, `contact-id`),
PRIMARY KEY (`id`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_bin;
CREATE TABLE IF NOT EXISTS `retriever_resource` (
`id` int(11) unsigned NOT NULL AUTO_INCREMENT,
`type` char(255) NOT NULL,
`binary` int(1) NOT NULL DEFAULT 0,
`url` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NOT NULL,
`created` timestamp NOT NULL DEFAULT now(),
`completed` timestamp NULL DEFAULT NULL,
`last-try` timestamp NULL DEFAULT NULL,
`num-tries` int(11) NOT NULL DEFAULT 0,
`data` mediumtext NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_bin

738
retriever/retriever.php Normal file
View File

@ -0,0 +1,738 @@
<?php
/**
* Name: Retrieve Feed Content
* Description: Follow the permalink of RSS/Atom feed items and replace the summary with the full content.
* Version: 0.2
* Author: Matthew Exon <http://mat.exon.name>
*/
function retriever_install() {
register_hook('plugin_settings', 'addon/retriever/retriever.php', 'retriever_plugin_settings');
register_hook('plugin_settings_post', 'addon/retriever/retriever.php', 'retriever_plugin_settings_post');
register_hook('post_remote', 'addon/retriever/retriever.php', 'retriever_post_remote_hook');
register_hook('contact_photo_menu', 'addon/retriever/retriever.php', 'retriever_contact_photo_menu');
register_hook('cron', 'addon/retriever/retriever.php', 'retriever_cron');
$schema = file_get_contents(dirname(__file__).'/database.sql');
$arr = explode(';', $schema);
foreach ($arr as $a) {
$r = q($a);
}
$r = q("SELECT `id` FROM `pconfig` WHERE `cat` LIKE 'retriever_%%'");
if (count($r) || (get_config('retriever', 'dbversion') == '0.1')) {
$retrievers = array();
$r = q("SELECT SUBSTRING(`cat`, 10) AS `contact`, `k`, `v` FROM `pconfig` WHERE `cat` LIKE 'retriever%%'");
foreach ($r as $rr) {
$retrievers[$rr['contact']][$rr['k']] = $rr['v'];
}
foreach ($retrievers as $k => $v) {
$rr = q("SELECT `uid` FROM `contact` WHERE `id` = %d", intval($k));
$uid = $rr[0]['uid'];
$v['images'] = 'on';
q("INSERT INTO `retriever_rule` (`uid`, `contact-id`, `data`) VALUES (%d, %d, '%s')",
intval($uid), intval($k), dbesc(json_encode($v)));
}
q("DELETE FROM `pconfig` WHERE `cat` LIKE 'retriever%%'");
}
if (get_config('retriever', 'dbversion') == '0.2') {
q("ALTER TABLE `retriever_resource` DROP COLUMN `retriever`");
}
if (get_config('retriever', 'dbversion') == '0.3') {
q("ALTER TABLE `retriever_item` MODIFY COLUMN `item-uri` varchar(800) CHARACTER SET ascii NOT NULL");
q("ALTER TABLE `retriever_resource` MODIFY COLUMN `url` varchar(800) CHARACTER SET ascii NOT NULL");
}
if (get_config('retriever', 'dbversion') == '0.4') {
q("ALTER TABLE `retriever_item` ADD COLUMN `finished` tinyint(1) unsigned NOT NULL DEFAULT '0'");
}
if (get_config('retriever', 'dbversion') == '0.5') {
q('ALTER TABLE `retriever_resource` CHANGE `created` `created` timestamp NOT NULL DEFAULT now()');
q('ALTER TABLE `retriever_resource` CHANGE `completed` `completed` timestamp NULL DEFAULT NULL');
q('ALTER TABLE `retriever_resource` CHANGE `last-try` `last-try` timestamp NULL DEFAULT NULL');
q('ALTER TABLE `retriever_item` DROP KEY `all`');
q('ALTER TABLE `retriever_item` ADD KEY `all` (`item-uri`, `item-uid`, `contact-id`)');
}
if (get_config('retriever', 'dbversion') == '0.6') {
q('ALTER TABLE `retriever_item` CONVERT TO CHARACTER SET utf8 COLLATE utf8_bin');
q('ALTER TABLE `retriever_item` CHANGE `item-uri` `item-uri` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NOT NULL');
q('ALTER TABLE `retriever_resource` CONVERT TO CHARACTER SET utf8 COLLATE utf8_bin');
q('ALTER TABLE `retriever_resource` CHANGE `url` `url` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NOT NULL');
q('ALTER TABLE `retriever_rule` CONVERT TO CHARACTER SET utf8 COLLATE utf8_bin');
}
if (get_config('retriever', 'dbversion') == '0.7') {
$r = q("SELECT `id`, `data` FROM `retriever_rule`");
foreach ($r as $rr) {
logger('retriever_install: retriever ' . $rr['id'] . ' old config ' . $rr['data'], LOGGER_DATA);
$data = json_decode($rr['data'], true);
if ($data['pattern']) {
$matches = array();
if (preg_match("/\/(.*)\//", $data['pattern'], $matches)) {
$data['pattern'] = $matches[1];
}
}
if ($data['match']) {
$include = array();
foreach (explode('|', $data['match']) as $component) {
$matches = array();
if (preg_match("/([A-Za-z][A-Za-z0-9]*)\[@([A-Za-z][a-z0-9]*)='([^']*)'\]/", $component, $matches)) {
$include[] = array(
'element' => $matches[1],
'attribute' => $matches[2],
'value' => $matches[3]);
}
if (preg_match("/([A-Za-z][A-Za-z0-9]*)\[contains(concat(' ',normalize-space(@class),' '),' ([^ ']+) ')]/", $component, $matches)) {
$include[] = array(
'element' => $matches[1],
'attribute' => $matches[2],
'value' => $matches[3]);
}
}
$data['include'] = $include;
unset($data['match']);
}
if ($data['remove']) {
$exclude = array();
foreach (explode('|', $data['remove']) as $component) {
$matches = array();
if (preg_match("/([A-Za-z][A-Za-z0-9]*)\[@([A-Za-z][a-z0-9]*)='([^']*)'\]/", $component, $matches)) {
$exclude[] = array(
'element' => $matches[1],
'attribute' => $matches[2],
'value' => $matches[3]);
}
if (preg_match("/([A-Za-z][A-Za-z0-9]*)\[contains(concat(' ',normalize-space(@class),' '),' ([^ ']+) ')]/", $component, $matches)) {
$exclude[] = array(
'element' => $matches[1],
'attribute' => $matches[2],
'value' => $matches[3]);
}
}
$data['exclude'] = $exclude;
unset($data['remove']);
}
$r = q('UPDATE `retriever_rule` SET `data` = "%s" WHERE `id` = %d', dbesc(json_encode($data)), $rr['id']);
logger('retriever_install: retriever ' . $rr['id'] . ' new config ' . json_encode($data), LOGGER_DATA);
}
}
set_config('retriever', 'dbversion', '0.8');
}
function retriever_uninstall() {
unregister_hook('plugin_settings', 'addon/retriever/retriever.php', 'retriever_plugin_settings');
unregister_hook('plugin_settings_post', 'addon/retriever/retriever.php', 'retriever_plugin_settings_post');
unregister_hook('post_remote', 'addon/retriever/retriever.php', 'retriever_post_remote_hook');
unregister_hook('plugin_settings', 'addon/retriever/retriever.php', 'retriever_plugin_settings');
unregister_hook('plugin_settings_post', 'addon/retriever/retriever.php', 'retriever_plugin_settings_post');
unregister_hook('contact_photo_menu', 'addon/retriever/retriever.php', 'retriever_contact_photo_menu');
unregister_hook('cron', 'addon/retriever/retriever.php', 'retriever_cron');
}
function retriever_module() {}
function retriever_cron($a, $b) {
// 100 is a nice sane number. Maybe this should be configurable.
retriever_retrieve_items(100);
retriever_tidy();
}
$retriever_item_count = 0;
function retriever_retrieve_items($max_items) {
global $retriever_item_count;
$retriever_schedule = array(array(1,'minute'),
array(10,'minute'),
array(1,'hour'),
array(1,'day'),
array(2,'day'),
array(1,'week'),
array(1,'month'));
$schedule_clauses = array();
for ($i = 0; $i < count($retriever_schedule); $i++) {
$num = $retriever_schedule[$i][0];
$unit = $retriever_schedule[$i][1];
array_push($schedule_clauses,
'(`num-tries` = ' . $i . ' AND TIMESTAMPADD(' . dbesc($unit) .
', ' . intval($num) . ', `last-try`) < now())');
}
$retrieve_items = $max_items - $retriever_item_count;
do {
$r = q("SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR %s) ORDER BY `last-try` ASC LIMIT %d",
dbesc(implode($schedule_clauses, ' OR ')),
intval($retrieve_items));
if (count($r) == 0) {
break;
}
foreach ($r as $rr) {
retrieve_resource($rr);
$retriever_item_count++;
}
$retrieve_items = $max_items - $retriever_item_count;
}
while ($retrieve_items > 0);
/* Look for items that are waiting even though the resource has
* completed. This usually happens because we've been asked to
* retrospectively apply a config change. It could also happen
* due to a cron job dying or something. */
$r = q("SELECT retriever_resource.`id` as resource, retriever_item.`id` as item FROM retriever_resource, retriever_item, retriever_rule WHERE retriever_item.`finished` = 0 AND retriever_item.`resource` = retriever_resource.`id` AND retriever_resource.`completed` IS NOT NULL AND retriever_item.`contact-id` = retriever_rule.`contact-id` AND retriever_item.`item-uid` = retriever_rule.`uid` LIMIT %d",
intval($retrieve_items));
if (!$r) {
$r = array();
}
foreach ($r as $rr) {
$resource = q("SELECT * FROM retriever_resource WHERE `id` = %d", $rr['resource']);
$retriever_item = retriever_get_retriever_item($rr['item']);
if (!$retriever_item) {
logger('retriever_retrieve_items: no retriever item with id ' . $rr['item']);
continue;
}
$item = retriever_get_item($retriever_item);
if (!$item) {
logger('retriever_retrieve_items: no item ' . $retriever_item['item-uri']);
continue;
}
$retriever = get_retriever($item['contact-id'], $item['uid']);
if (!$retriever) {
logger('retriever_retrieve_items: no retriever for item ' .
$retriever_item['item-uri'] . ' ' . $retriever_item['uid'] . ' ' . $item['contact-id']);
continue;
}
retriever_apply_completed_resource_to_item($retriever, $item, $resource[0]);
q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d",
intval($retriever_item['id']));
retriever_check_item_completed($item);
}
}
function retriever_tidy() {
q("DELETE FROM retriever_resource WHERE completed IS NOT NULL AND completed < DATE_SUB(now(), INTERVAL 1 WEEK)");
q("DELETE FROM retriever_resource WHERE completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)");
$r = q("SELECT retriever_item.id FROM retriever_item LEFT OUTER JOIN retriever_resource ON (retriever_item.resource = retriever_resource.id) WHERE retriever_resource.id is null");
foreach ($r as $rr) {
q('DELETE FROM retriever_item WHERE id = %d', intval($rr['id']));
}
}
function retrieve_resource($resource) {
logger('retrieve_resource: ' . ($resource['num-tries'] + 1) .
' attempt at resource ' . $resource['id'] . ' ' . $resource['url'], LOGGER_DEBUG);
q("UPDATE `retriever_resource` SET `last-try` = now(), `num-tries` = `num-tries` + 1 WHERE id = %d",
intval($resource['id']));
$data = fetch_url($resource['url'], $resource['binary'], $resource['type']);
$resource['type'] = get_app()->get_curl_content_type();
if ($data) {
$resource['data'] = $data;
q("UPDATE `retriever_resource` SET `completed` = now(), `data` = '%s', `type` = '%s' WHERE id = %d",
dbesc($data), dbesc($resource['type']), intval($resource['id']));
retriever_resource_completed($resource);
}
}
function get_retriever($contact_id, $uid, $create = false) {
$r = q("SELECT * FROM `retriever_rule` WHERE `contact-id` = %d AND `uid` = %d",
intval($contact_id), intval($uid));
if (count($r)) {
$r[0]['data'] = json_decode($r[0]['data'], true);
return $r[0];
}
if ($create) {
q("INSERT INTO `retriever_rule` (`uid`, `contact-id`) VALUES (%d, %d)",
intval($uid), intval($contact_id));
$r = q("SELECT * FROM `retriever_rule` WHERE `contact-id` = %d AND `uid` = %d",
intval($contact_id), intval($uid));
return $r[0];
}
}
function retriever_get_retriever_item($id) {
$retriever_items = q("SELECT * FROM `retriever_item` WHERE id = %d", intval($id));
if (count($retriever_items) != 1) {
logger('retriever_get_retriever_item: unable to find retriever_item ' . $id, LOGGER_NORMAL);
return;
}
return $retriever_items[0];
}
function retriever_get_item($retriever_item) {
$items = q("SELECT * FROM `item` WHERE `uri` = '%s' AND `uid` = %d AND `contact-id` = %d",
dbesc($retriever_item['item-uri']),
intval($retriever_item['item-uid']),
intval($retriever_item['contact-id']));
if (count($items) != 1) {
logger('retriever_get_item: unexpected number of results ' .
count($items) . " when searching for item $uri $uid $cid", LOGGER_NORMAL);
return;
}
return $items[0];
}
function retriever_item_completed($retriever_item_id, $resource) {
logger('retriever_item_completed: id ' . $retriever_item_id . ' url ' . $resource['url'], LOGGER_DEBUG);
$retriever_item = retriever_get_retriever_item($retriever_item_id);
if (!$retriever_item) {
return;
}
$retriever = get_retriever($retriever_item['contact-id'], $retriever_item['item-uid']);
if (!$retriever) {
return;
}
$item = retriever_get_item($retriever_item);
if (!$item) {
return;
}
retriever_apply_completed_resource_to_item($retriever, $item, $resource);
q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d",
intval($retriever_item['id']));
retriever_check_item_completed($item);
}
function retriever_resource_completed($resource) {
logger('retriever_resource_completed: id ' . $resource['id'] . ' url ' . $resource['url'], LOGGER_DEBUG);
$r = q("SELECT `id` FROM `retriever_item` WHERE `resource` = %d", $resource['id']);
foreach ($r as $rr) {
retriever_item_completed($rr['id'], $resource);
}
}
function apply_retrospective($retriever, $num) {
$r = q("SELECT * FROM `item` WHERE `contact-id` = %d ORDER BY `received` DESC LIMIT %d",
intval($retriever['contact-id']), intval($num));
foreach ($r as $item) {
q('UPDATE `item` SET `visible` = 0 WHERE `id` = %d', $item['id']);
retriever_on_item_insert($retriever, $item);
}
}
function retriever_on_item_insert($retriever, &$item) {
if (!$retriever || !$retriever['id']) {
logger('retriever_on_item_insert: No retriever supplied', LOGGER_NORMAL);
return;
}
if (!$retriever["data"]['enable'] == "on") {
return;
}
if ($retriever["data"]['pattern']) {
$url = preg_replace('/' . $retriever["data"]['pattern'] . '/', $retriever["data"]['replace'], $item['plink']);
logger('retriever_on_item_insert: Changed ' . $item['plink'] . ' to ' . $url, LOGGER_DATA);
}
else {
$url = $item['plink'];
}
$resource = add_retriever_resource($url);
$retriever_item_id = add_retriever_item($item, $resource);
}
function add_retriever_resource($url, $binary = false) {
logger('add_retriever_resource: ' . $url, LOGGER_DEBUG);
$r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", dbesc($url));
$resource = $r[0];
if (count($r)) {
logger('add_retriever_resource: Resource ' . $url . ' already requested', LOGGER_DEBUG);
return $r[0];
}
else {
q("INSERT INTO `retriever_resource` (`binary`, `url`) " .
"VALUES (%d, '%s')", intval($binary ? 1 : 0), dbesc($url));
$r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", dbesc($url));
return $r[0];
}
}
function add_retriever_item(&$item, $resource) {
logger('add_retriever_item: ' . $resource['url'] . ' for ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], LOGGER_DEBUG);
q("INSERT INTO `retriever_item` (`item-uri`, `item-uid`, `contact-id`, `resource`) " .
"VALUES ('%s', %d, %d, %d)",
dbesc($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource["id"]));
$r = q("SELECT id FROM `retriever_item` WHERE " .
"`item-uri` = '%s' AND `item-uid` = %d AND `contact-id` = %d AND `resource` = %d ORDER BY id DESC",
dbesc($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource['id']));
if (!count($r)) {
logger("add_retriever_item: couldn't create retriever item for " .
$item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'],
LOGGER_NORMAL);
return;
}
logger('add_retriever_item: created retriever_item ' . $r[0]['id'] . ' for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], LOGGER_DEBUG);
return $r[0]['id'];
}
function retriever_get_encoding($resource) {
$matches = array();
if (preg_match('/charset=(.*)/', $resource['type'], $matches)) {
return trim(array_pop($matches));
}
return 'utf-8';
}
function retriever_construct_xpath($spec) {
if (gettype($spec) != "array") {
return;
}
$components = array();
foreach ($spec as $clause) {
if (!$clause['attribute']) {
$components[] = $clause['element'];
continue;
}
if ($clause['attribute'] === 'class') {
$components[] =
$clause['element'] .
"[contains(concat(' ', normalize-space(@class), ' '), ' " .
$clause['value'] . " ')]";
}
else {
$components[] =
$clause['element'] . '[@' .
$clause['attribute'] . "='" .
$clause['value'] . "']";
}
}
// It would be better to do this in smarty3 in extract.tpl
return implode('|', $components);
}
function retriever_apply_dom_filter($retriever, &$item, $resource) {
logger('retriever_apply_dom_filter: applying XSLT to ' . $item['id'] . ' ' . $item['plink'], LOGGER_DEBUG);
require_once('include/html2bbcode.php');
if (!$retriever['data']['include']) {
return;
}
if (!$resource['data']) {
logger('retriever_apply_dom_filter: no text to work with', LOGGER_NORMAL);
return;
}
$encoding = retriever_get_encoding($resource);
logger('@@@ item type ' . $resource['type'] . ' encoding ' . $encoding);
$extracter_template = get_markup_template('extract.tpl', 'addon/retriever/');
$doc = new DOMDocument('1.0', 'utf-8');
if (strpos($resource['type'], 'html') !== false) {
@$doc->loadHTML($resource['data']);
}
else {
$doc->loadXML($resource['data']);
}
logger('@@@ actual encoding of document is ' . $doc->encoding);
$components = parse_url($item['plink']);
$rooturl = $components['scheme'] . "://" . $components['host'];
$dirurl = $rooturl . dirname($components['path']) . "/";
$params = array('$include' => retriever_construct_xpath($retriever['data']['include']),
'$exclude' => retriever_construct_xpath($retriever['data']['exclude']),
'$pageurl' => $item['plink'],
'$dirurl' => $dirurl,
'$rooturl' => $rooturl);
$xslt = replace_macros($extracter_template, $params);
$xmldoc = new DOMDocument();
$xmldoc->loadXML($xslt);
$xp = new XsltProcessor();
$xp->importStylesheet($xmldoc);
$transformed = $xp->transformToXML($doc);
$item['body'] = html2bbcode($transformed);
if (!strlen($item['body'])) {
logger('retriever_apply_dom_filter retriever ' . $retriever['id'] . ' item ' . $item['id'] . ': output was empty', LOGGER_NORMAL);
return;
}
$item['body'] .= "\n\n" . t('Retrieved') . ' ' . date("Y-m-d") . ': [url=';
$item['body'] .= $item['plink'];
$item['body'] .= ']' . $item['plink'] . '[/url]';
q("UPDATE `item` SET `body` = '%s', `edited` = '%s' WHERE `id` = %d",
dbesc($item['body']), dbesc(datetime_convert('UTC', 'UTC')), intval($item['id']));
}
function retrieve_images(&$item) {
$matches1 = array();
preg_match_all("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", $item["body"], $matches1);
$matches2 = array();
preg_match_all("/\[img\](.*?)\[\/img\]/ism", $item["body"], $matches2);
$matches = array_merge($matches1[3], $matches2[1]);
logger('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], LOGGER_DEBUG);
foreach ($matches as $url) {
if (strpos($url, get_app()->get_baseurl()) === FALSE) {
$resource = add_retriever_resource($url, true);
if (!$resource['completed']) {
add_retriever_item($item, $resource);
}
else {
retriever_transform_images($item, $resource);
}
}
}
}
function retriever_check_item_completed(&$item)
{
$r = q('SELECT count(*) FROM retriever_item WHERE `item-uri` = "%s" ' .
'AND `item-uid` = %d AND `contact-id` = %d AND `finished` = 0',
dbesc($item['uri']), intval($item['uid']),
intval($item['contact-id']));
$waiting = $r[0]['count(*)'];
logger('retriever_check_item_completed: item ' . $item['uri'] . ' ' . $item['uid']
. ' '. $item['contact-id'] . ' waiting for ' . $waiting . ' resources', LOGGER_DEBUG);
$old_visible = $item['visible'];
$item['visible'] = $waiting ? 0 : 1;
if (($item['id'] > 0) && ($old_visible != $item['visible'])) {
logger('retriever_check_item_completed: changing visible flag to ' . $item['visible'] . ' and invoking notifier ("edit_post", ' . $item['id'] . ')', LOGGER_DEBUG);
q("UPDATE `item` SET `visible` = %d, `edited` = '%s' WHERE `id` = %d",
intval($item['visible']),
dbesc(datetime_convert('UTC', 'UTC')),
intval($item['id']));
proc_run('php', "include/notifier.php", 'edit_post', $item['id']);
}
}
function retriever_apply_completed_resource_to_item($retriever, &$item, $resource) {
logger('retriever_apply_completed_resource_to_item: retriever ' .
($retriever ? $retriever['id'] : 'none') .
' resource ' . $resource['url'] . ' plink ' . $item['plink'], LOGGER_DEBUG);
if (strpos($resource['type'], 'image') !== false) {
retriever_transform_images($item, $resource);
}
if (!$retriever) {
return;
}
if ((strpos($resource['type'], 'html') !== false) ||
(strpos($resource['type'], 'xml') !== false)) {
retriever_apply_dom_filter($retriever, $item, $resource);
if ($retriever["data"]['images'] ) {
retrieve_images($item);
}
}
}
function retriever_store_photo($item, &$resource) {
$hash = photo_new_resource();
if (class_exists('Imagick')) {
try {
$image = new Imagick();
$image->readImageBlob($resource['data']);
$resource['width'] = $image->getImageWidth();
$resource['height'] = $image->getImageHeight();
}
catch (Exception $e) {
logger("ImageMagick couldn't process image " . $resource['id'] . " " . $resource['url'] . ' length ' . strlen($resource['data']) . ': ' . $e->getMessage(), LOGGER_DEBUG);
return false;
}
}
if (!array_key_exists('width', $resource)) {
$image = @imagecreatefromstring($resource['data']);
if ($image === false) {
logger("Couldn't process image " . $resource['id'] . " " . $resource['url'], LOGGER_DEBUG);
return false;
}
$resource['width'] = imagesx($image);
$resource['height'] = imagesy($image);
imagedestroy($image);
}
$url_components = parse_url($resource['url']);
$filename = basename($url_components['path']);
if (!strlen($filename)) {
$filename = 'image';
}
$r = q("INSERT INTO `photo`
( `uid`, `contact-id`, `guid`, `resource-id`, `created`, `edited`, `filename`, `type`, `album`, `height`, `width`, `datasize`, `data` )
VALUES ( %d, %d, '%s', '%s', '%s', '%s', '%s', '%s', '%s', %d, %d, %d, '%s' )",
intval($item['item-uid']),
intval($item['contact-id']),
dbesc(get_guid()),
dbesc($hash),
dbesc(datetime_convert()),
dbesc(datetime_convert()),
dbesc($filename),
dbesc($resource['type']),
dbesc('Retrieved Images'),
intval($resource['height']),
intval($resource['width']),
intval(strlen($resource['data'])),
dbesc($resource['data'])
);
return $hash;
}
function retriever_transform_images(&$item, $resource) {
require_once('include/Photo.php');
if (!$resource["data"]) {
logger('retriever_transform_images: no data available for '
. $resource['id'] . ' ' . $resource['url'], LOGGER_NORMAL);
return;
}
$hash = retriever_store_photo($item, $resource);
if ($hash === false) {
logger('retriever_transform_images: unable to store photo '
. $resource['id'] . ' ' . $resource['url'], LOGGER_NORMAL);
return;
}
$new_url = get_app()->get_baseurl() . '/photo/' . $hash;
logger('retriever_transform_images: replacing ' . $resource['url'] . ' with ' .
$new_url . ' in item ' . $item['plink'], LOGGER_DEBUG);
$transformed = str_replace($resource["url"], $new_url, $item['body']);
if ($transformed === $item['body']) {
return;
}
$item['body'] = $transformed;
q("UPDATE `item` SET `edited` = '%s', `body` = '%s' WHERE `plink` = '%s' AND `uid` = %d AND `contact-id` = %d",
dbesc(datetime_convert('UTC', 'UTC')),
dbesc($item['body']),
dbesc($item['plink']),
intval($item['uid']),
intval($item['contact-id']));
}
function retriever_content($a) {
if (!local_user()) {
$a->page['content'] .= "<p>Please log in</p>";
return;
}
if ($a->argv[1] === 'help') {
$feeds = q("SELECT `id`, `name`, `thumb` FROM contact WHERE `uid` = %d AND `network` = 'feed'",
local_user());
foreach ($feeds as $k=>$v) {
$feeds[$k]['url'] = $a->get_baseurl() . '/retriever/' . $v['id'];
}
$template = get_markup_template('/help.tpl', 'addon/retriever/');
$a->page['content'] .= replace_macros($template, array(
'$config' => $a->get_baseurl() . '/settings/addon',
'$feeds' => $feeds));
return;
}
if ($a->argv[1]) {
$retriever = get_retriever($a->argv[1], local_user(), false);
if (x($_POST["id"])) {
$retriever = get_retriever($a->argv[1], local_user(), true);
$retriever["data"] = array();
foreach (array('pattern', 'replace', 'enable', 'images') as $setting) {
if (x($_POST['retriever_' . $setting])) {
$retriever["data"][$setting] = $_POST['retriever_' . $setting];
}
}
foreach ($_POST as $k=>$v) {
if (preg_match("/retriever-(include|exclude)-(\d+)-(element|attribute|value)/", $k, $matches)) {
$retriever['data'][$matches[1]][intval($matches[2])][$matches[3]] = $v;
}
}
// You've gotta have an element, even if it's just "*"
foreach ($retriever['data']['include'] as $k=>$clause) {
if (!$clause['element']) {
unset($retriever['data']['include'][$k]);
}
}
foreach ($retriever['data']['exclude'] as $k=>$clause) {
if (!$clause['element']) {
unset($retriever['data']['exclude'][$k]);
}
}
q("UPDATE `retriever_rule` SET `data`='%s' WHERE `id` = %d",
dbesc(json_encode($retriever["data"])), intval($retriever["id"]));
$a->page['content'] .= "<p><b>Settings Updated";
if (x($_POST["retriever_retrospective"])) {
apply_retrospective($retriever, $_POST["retriever_retrospective"]);
$a->page['content'] .= " and retrospectively applied to " . $_POST["apply"] . " posts";
}
$a->page['content'] .= ".</p></b>";
}
$template = get_markup_template('/rule-config.tpl', 'addon/retriever/');
$a->page['content'] .= replace_macros($template, array(
'$enable' => array(
'retriever_enable',
t('Enabled'),
$retriever['data']['enable']),
'$pattern' => array(
'retriever_pattern',
t('URL Pattern'),
$retriever["data"]['pattern'],
t('Regular expression matching part of the URL to replace')),
'$replace' => array(
'retriever_replace',
t('URL Replace'),
$retriever["data"]['replace'],
t('Text to replace matching part of above regular expression')),
'$images' => array(
'retriever_images',
t('Download Images'),
$retriever['data']['images']),
'$retrospective' => array(
'retriever_retrospective',
t('Retrospectively Apply'),
'0',
t('Reapply the rules to this number of posts')),
'$title' => t('Retrieve Feed Content'),
'$help' => $a->get_baseurl() . '/retriever/help',
'$submit' => t('Submit'),
'$id' => ($retriever["id"] ? $retriever["id"] : "create"),
'$tag_t' => t('Tag'),
'$attribute_t' => t('Attribute'),
'$value_t' => t('Value'),
'$add_t' => t('Add'),
'$remove_t' => t('Remove'),
'$include_t' => t('Include'),
'$include' => $retriever['data']['include'],
'$exclude_t' => t('Exclude'),
'$exclude' => $retriever["data"]['exclude']));
return;
}
}
function retriever_contact_photo_menu($a, &$args) {
if (!$args) {
return;
}
if ($args["contact"]["network"] == "feed") {
$args["menu"][ 'retriever' ] = array(t('Retriever'), $a->get_baseurl() . '/retriever/' . $args["contact"]['id']);
}
}
function retriever_post_remote_hook(&$a, &$item) {
logger('retriever_post_remote_hook: ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], LOGGER_DEBUG);
$retriever = get_retriever($item['contact-id'], $item["uid"], false);
if ($retriever) {
retriever_on_item_insert($retriever, $item);
}
else {
if (get_pconfig($item["uid"], 'retriever', 'all_photos')) {
retrieve_images($item, null);
}
}
retriever_check_item_completed($item);
}
function retriever_plugin_settings(&$a,&$s) {
$all_photos = get_pconfig(local_user(), 'retriever', 'all_photos');
$all_photos_mu = ($all_photos == 'on') ? ' checked="true"' : '';
$template = get_markup_template('/settings.tpl', 'addon/retriever/');
$s .= replace_macros($template, array(
'$submit' => t('Submit'),
'$title' => t('Retriever Settings'),
'$help' => $a->get_baseurl() . '/retriever/help',
'$all_photos' => $all_photos_mu,
'$all_photos_t' => t('All Photos')));
}
function retriever_plugin_settings_post($a,$post) {
if ($_POST['all_photos']) {
set_pconfig(local_user(), 'retriever', 'all_photos', $_POST['all_photos']);
}
else {
del_pconfig(local_user(), 'retriever', 'all_photos');
}
}

View File

@ -0,0 +1,36 @@
<?xml version="1.0" encoding="utf-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
<xsl:template match="text()"/>
<xsl:template match="$include">
<xsl:copy>
<xsl:apply-templates select="node()|@*" mode="remove"/>
</xsl:copy>
</xsl:template>
<xsl:template match="node()|@*" mode="remove">
<xsl:copy>
<xsl:apply-templates select="node()|@*" mode="remove"/>
</xsl:copy>
</xsl:template>
{{ if $exclude }}
<xsl:template match="$exclude" mode="remove"/>
{{ endif }}
<!-- attempt to replace relative URLs with absolute URLs -->
<!-- http://stackoverflow.com/questions/3824631/replace-href-value-in-anchor-tags-of-html-using-xslt -->
<xsl:template match="*/@src[starts-with(.,'.')]" mode="remove">
<xsl:attribute name="src">
<xsl:value-of select="concat('$dirurl',.)"/>
</xsl:attribute>
</xsl:template>
<xsl:template match="*/@src[starts-with(.,'/')]" mode="remove">
<xsl:attribute name="src">
<xsl:value-of select="concat('$rooturl',.)"/>
</xsl:attribute>
</xsl:template>
</xsl:stylesheet>

148
retriever/view/help.tpl Normal file
View File

@ -0,0 +1,148 @@
<h2>Retriever Plugin Help</h2>
<p>
This plugin replaces the short excerpts you normally get in RSS feeds
with the full content of the article from the source website. You
specify which part of the page you're interested in with a set of
rules. When each item arrives, the plugin downloads the full page
from the website, extracts content using the rules, and replaces the
original article.
</p>
<p>
There's a few reasons you may want to do this. The source website
might be slow or overloaded. The source website might be
untrustworthy, in which case using Friendica to scrub the HTML is a
good idea. You might be on a LAN that blacklists certain websites.
It also works neatly with the mailstream plugin, allowing you to read
a news stream comfortably without needing continuous Internet
connectivity.
</p>
<p>
However, setting up retriever can be quite tricky since it depends on
the internal design of the website. This was designed to make life
easy for the website's developers, not for you. You'll need to have
some familiarity with HTML, and be willing to adapt when the website
suddenly changes everything without notice.
</p>
<h3>Configuring Retriever for a feed</h3>
<p>
To set up retriever for an RSS feed, go to the "Contacts" page and
find your feed. Then click on the drop-down menu on the contact.
Select "Retriever" to get to the retriever configuration.
</p>
<p>
The "Include" configuration section specifies parts of the page to
include in the article. Each row has three components:
</p>
<ul>
<li>An HTML tag (e.g. "div", "span", "p")</li>
<li>An attribute (usually "class" or "id")</li>
<li>A value for the attribute</li>
</ul>
<p>
A simple case is when the article is wrapped in a "div" element:
</p>
<pre>
...
&lt;div class="main-content"&gt;
&lt;h2&gt;Man Bites Dog&lt;/h2&gt;
&lt;img src="mbd.jpg"&gt;
&lt;p&gt;
Residents of the sleepy community of Nowheresville were
shocked yesterday by the sight of creepy local weirdo Jim
McOddman assaulting innocent local dog Snufflekins with his
false teeth.
&lt;/p&gt;
...
&lt;/div&gt;
...
</pre>
<p>
You then specify the tag "div", attribute "class", and value
"main-content". Everything else in the page, such as navigation
panels and menus and footers and so on, will be discarded. If there
is more than one section of the page you want to include, specify each
one on a separate row. If the matching section contains some sections
you want to remove, specify those in the "Exclude" section in the same
way.
</p>
<p>
Once you've got a configuration that you think will work, you can try
it out on some existing articles. Type a number into the
"Retrospectively Apply" box and click "Submit". After a while
(exactly how long depends on your system's cron configuration) the new
articles should be available.
</p>
<h3>Techniques</h3>
<p>
You can leave the attribute and value blank to include all the
corresponding elements with the specified tag name. You can also use
a tag name of "*", which will match any element type with the
specified attribute regardless of the tag.
</p>
<p>
Note that the "class" attribute is a special case. Many web page
templates will put multiple different classes in the same element,
separated by spaces. If you specify an attribute of "class" it will
match an element if any of its classes matches the specified value.
For example:
</p>
<pre>
&lt;div class="article breaking-news"&gt;
</pre>
<p>
In this case you can specify a value of "article", or "breaking-news".
You can also specify "article breaking-news", but that won't match if
the website suddenly changes to "breaking-news article", so that's not
recommended.
</p>
<p>
One useful trick you can try is using the website's "print" pages.
Many news sites have print versions of all their articles. These are
usually drastically simplified compared to the live website page.
Sometimes this is a good way to get the whole article when it's
normally split across multiple pages.
</p>
<p>
Hopefully the URL for the print page is a predictable variant of the
normal article URL. For example, an article URL like:
</p>
<pre>
http://www.newssite.com/article-8636.html
</pre>
<p>
...might have a print version at:
</p>
<pre>
http://www.newssite.com/print/article-8636.html
</pre>
<p>
To change the URL used to retrieve the page, use the "URL Pattern" and
"URL Replace" fields. The pattern is a regular expression matching
part of the URL to replace. In this case, you might use a pattern of
"/article" and a replace string of "/print/article". A common pattern
is simply "$", used to add the replace string to the end of the URL.
</p>
<h3>Background Processing</h3>
<p>
Note that retrieving and processing the articles can take some time,
so it's done in the background. Incoming articles will be marked as
invisible while they're in the process of being downloaded. If a URL
fails, the plugin will keep trying at progressively longer intervals
for up to a month, in case the website is temporarily overloaded or
the network is down.
</p>
<h3>Retrieving Images</h3>
<p>
Retriever can also optionally download images and store them in the
local Friendica instance. Just check the "Download Images" box. You
can also download images in every item from your network, whether it's
an RSS feed or not. Go to the "Settings" page and
click <a href="$config">"Plugin settings"</a>. Then check the "All
Photos" box in the "Retriever Settings" section and click "Submit".
</p>
<h2>Configure Feeds:</h2>
<div>
{{ for $feeds as $feed }}
{{ inc contact_template.tpl with $contact=$feed }}{{ endinc }}
{{ endfor }}
</div>

View File

@ -0,0 +1,111 @@
<div class="settings-block">
<script language="javascript">
function retriever_add_row(id)
{
var tbody = document.getElementById(id);
var last = tbody.rows[tbody.childElementCount - 1];
var count = +last.id.replace(id + '-', '');
count++;
var row = document.createElement('tr');
row.id = id + '-' + count;
var cell1 = document.createElement('td');
var inptag = document.createElement('input');
inptag.name = row.id + '-element';
cell1.appendChild(inptag);
row.appendChild(cell1);
var cell2 = document.createElement('td');
var inpatt = document.createElement('input');
inpatt.name = row.id + '-attribute';
cell2.appendChild(inpatt);
row.appendChild(cell2);
var cell3 = document.createElement('td');
var inpval = document.createElement('input');
inpval.name = row.id + '-value';
cell3.appendChild(inpval);
row.appendChild(cell3);
var cell4 = document.createElement('td');
var butrem = document.createElement('input');
butrem.id = row.id + '-rem';
butrem.type = 'button';
butrem.onclick = function(){retriever_remove_row(id, count)};
butrem.value = '$remove_t';
cell4.appendChild(butrem);
row.appendChild(cell4);
tbody.appendChild(row);
}
function retriever_remove_row(id, number)
{
var tbody = document.getElementById(id);
var row = document.getElementById(id + '-' + number);
tbody.removeChild(row);
}
</script>
<h2>$title</h2>
<p><a href="$help">Get Help</a></p>
<form method="post">
<input type="hidden" name="id" value="$id">
{{ inc field_checkbox.tpl with $field=$enable }}{{ endinc }}
{{ inc field_input.tpl with $field=$pattern }}{{ endinc }}
{{ inc field_input.tpl with $field=$replace }}{{ endinc }}
{{ inc field_checkbox.tpl with $field=$images }}{{ endinc }}
{{ inc field_input.tpl with $field=$retrospective }}{{ endinc }}
<h3>$include_t:</h3>
<div>
<table>
<thead>
<tr><th>$tag_t</th><th>$attribute_t</th><th>$value_t</th></tr>
</thead>
<tbody id="retriever-include">
{{ if $include }}
{{ for $include as $k=>$m }}
<tr id="retriever-include-$k">
<td><input name="retriever-include-$k-element" value="$m.element"></td>
<td><input name="retriever-include-$k-attribute" value="$m.attribute"></td>
<td><input name="retriever-include-$k-value" value="$m.value"></td>
<td><input id="retrieve-include-$k-rem" type="button" onclick="retriever_remove_row('retriever-include', $k)" value="$remove_t"></td>
</tr>
{{ endfor }}
{{ else }}
<tr id="retriever-include-0">
<td><input name="retriever-include-0-element"></td>
<td><input name="retriever-include-0-attribute"></td>
<td><input name="retriever-include-0-value"></td>
<td><input id="retrieve-include-0-rem" type="button" onclick="retriever_remove_row('retriever-include', 0)" value="$remove_t"></td>
</tr>
{{ endif }}
</tbody>
</table>
<input type="button" onclick="retriever_add_row('retriever-include')" value="$add_t">
</div>
<h3>$exclude_t:</h3>
<div>
<table>
<thead>
<tr><th>Tag</th><th>Attribute</th><th>Value</th></tr>
</thead>
<tbody id="retriever-exclude">
{{ if $exclude }}
{{ for $exclude as $k=>$r }}
<tr id="retriever-exclude-$k">
<td><input name="retriever-exclude-$k-element" value="$r.element"></td>
<td><input name="retriever-exclude-$k-attribute" value="$r.attribute"></td>
<td><input name="retriever-exclude-$k-value" value="$r.value"></td>
<td><input id="retrieve-exclude-$k-rem" type="button" onclick="retriever_remove_row('retriever-exclude', $k)" value="$remove_t"></td>
</tr>
{{ endfor }}
{{ else }}
<tr id="retriever-exclude-0">
<td><input name="retriever-exclude-0-element"></td>
<td><input name="retriever-exclude-0-attribute"></td>
<td><input name="retriever-exclude-0-value"></td>
<td><input id="retrieve-exclude-0-rem" type="button" onclick="retriever_remove_row('retriever-exclude', 0)" value="$remove_t"></td>
</tr>
{{ endif }}
</tbody>
</table>
<input type="button" onclick="retriever_add_row('retriever-exclude')" value="$add_t">
</div>
<input type="submit" size="70" value="$submit">
</form>
</div>

View File

@ -0,0 +1,17 @@
<div class="settings-block">
<h3>$title</h3>
<p>
<a href="$help">Get Help</a>
</p>
<table>
<tbody>
<tr>
<td>$all_photos_t:</td>
<td><input class="checkbox" type="checkbox" name="all_photos" $all_photos></td>
</tr>
<tr>
<td colspan="2"><input type="submit" value="$submit"></td>
</tr>
</tbody>
</table>
</div>

View File

@ -0,0 +1,41 @@
{{*
* AUTOMATICALLY GENERATED TEMPLATE
* DO NOT EDIT THIS FILE, CHANGES WILL BE OVERWRITTEN
*
*}}
<?xml version="1.0" encoding="utf-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
<xsl:template match="text()"/>
<xsl:template match="{{$include}}">
<xsl:copy>
<xsl:apply-templates select="node()|@*" mode="remove"/>
</xsl:copy>
</xsl:template>
<xsl:template match="node()|@*" mode="remove">
<xsl:copy>
<xsl:apply-templates select="node()|@*" mode="remove"/>
</xsl:copy>
</xsl:template>
{{if $exclude}}
<xsl:template match="{{$exclude}}" mode="remove"/>
{{/if}}
<!-- attempt to replace relative URLs with absolute URLs -->
<!-- http://stackoverflow.com/questions/3824631/replace-href-value-in-anchor-tags-of-html-using-xslt -->
<xsl:template match="*/@src[starts-with(.,'.')]" mode="remove">
<xsl:attribute name="src">
<xsl:value-of select="concat('{{$dirurl}}',.)"/>
</xsl:attribute>
</xsl:template>
<xsl:template match="*/@src[starts-with(.,'/')]" mode="remove">
<xsl:attribute name="src">
<xsl:value-of select="concat('{{$rooturl}}',.)"/>
</xsl:attribute>
</xsl:template>
</xsl:stylesheet>

View File

@ -0,0 +1,153 @@
{{*
* AUTOMATICALLY GENERATED TEMPLATE
* DO NOT EDIT THIS FILE, CHANGES WILL BE OVERWRITTEN
*
*}}
<h2>Retriever Plugin Help</h2>
<p>
This plugin replaces the short excerpts you normally get in RSS feeds
with the full content of the article from the source website. You
specify which part of the page you're interested in with a set of
rules. When each item arrives, the plugin downloads the full page
from the website, extracts content using the rules, and replaces the
original article.
</p>
<p>
There's a few reasons you may want to do this. The source website
might be slow or overloaded. The source website might be
untrustworthy, in which case using Friendica to scrub the HTML is a
good idea. You might be on a LAN that blacklists certain websites.
It also works neatly with the mailstream plugin, allowing you to read
a news stream comfortably without needing continuous Internet
connectivity.
</p>
<p>
However, setting up retriever can be quite tricky since it depends on
the internal design of the website. This was designed to make life
easy for the website's developers, not for you. You'll need to have
some familiarity with HTML, and be willing to adapt when the website
suddenly changes everything without notice.
</p>
<h3>Configuring Retriever for a feed</h3>
<p>
To set up retriever for an RSS feed, go to the "Contacts" page and
find your feed. Then click on the drop-down menu on the contact.
Select "Retriever" to get to the retriever configuration.
</p>
<p>
The "Include" configuration section specifies parts of the page to
include in the article. Each row has three components:
</p>
<ul>
<li>An HTML tag (e.g. "div", "span", "p")</li>
<li>An attribute (usually "class" or "id")</li>
<li>A value for the attribute</li>
</ul>
<p>
A simple case is when the article is wrapped in a "div" element:
</p>
<pre>
...
&lt;div class="main-content"&gt;
&lt;h2&gt;Man Bites Dog&lt;/h2&gt;
&lt;img src="mbd.jpg"&gt;
&lt;p&gt;
Residents of the sleepy community of Nowheresville were
shocked yesterday by the sight of creepy local weirdo Jim
McOddman assaulting innocent local dog Snufflekins with his
false teeth.
&lt;/p&gt;
...
&lt;/div&gt;
...
</pre>
<p>
You then specify the tag "div", attribute "class", and value
"main-content". Everything else in the page, such as navigation
panels and menus and footers and so on, will be discarded. If there
is more than one section of the page you want to include, specify each
one on a separate row. If the matching section contains some sections
you want to remove, specify those in the "Exclude" section in the same
way.
</p>
<p>
Once you've got a configuration that you think will work, you can try
it out on some existing articles. Type a number into the
"Retrospectively Apply" box and click "Submit". After a while
(exactly how long depends on your system's cron configuration) the new
articles should be available.
</p>
<h3>Techniques</h3>
<p>
You can leave the attribute and value blank to include all the
corresponding elements with the specified tag name. You can also use
a tag name of "*", which will match any element type with the
specified attribute regardless of the tag.
</p>
<p>
Note that the "class" attribute is a special case. Many web page
templates will put multiple different classes in the same element,
separated by spaces. If you specify an attribute of "class" it will
match an element if any of its classes matches the specified value.
For example:
</p>
<pre>
&lt;div class="article breaking-news"&gt;
</pre>
<p>
In this case you can specify a value of "article", or "breaking-news".
You can also specify "article breaking-news", but that won't match if
the website suddenly changes to "breaking-news article", so that's not
recommended.
</p>
<p>
One useful trick you can try is using the website's "print" pages.
Many news sites have print versions of all their articles. These are
usually drastically simplified compared to the live website page.
Sometimes this is a good way to get the whole article when it's
normally split across multiple pages.
</p>
<p>
Hopefully the URL for the print page is a predictable variant of the
normal article URL. For example, an article URL like:
</p>
<pre>
http://www.newssite.com/article-8636.html
</pre>
<p>
...might have a print version at:
</p>
<pre>
http://www.newssite.com/print/article-8636.html
</pre>
<p>
To change the URL used to retrieve the page, use the "URL Pattern" and
"URL Replace" fields. The pattern is a regular expression matching
part of the URL to replace. In this case, you might use a pattern of
"/article" and a replace string of "/print/article". A common pattern
is simply "$", used to add the replace string to the end of the URL.
</p>
<h3>Background Processing</h3>
<p>
Note that retrieving and processing the articles can take some time,
so it's done in the background. Incoming articles will be marked as
invisible while they're in the process of being downloaded. If a URL
fails, the plugin will keep trying at progressively longer intervals
for up to a month, in case the website is temporarily overloaded or
the network is down.
</p>
<h3>Retrieving Images</h3>
<p>
Retriever can also optionally download images and store them in the
local Friendica instance. Just check the "Download Images" box. You
can also download images in every item from your network, whether it's
an RSS feed or not. Go to the "Settings" page and
click <a href="{{$config}}">"Plugin settings"</a>. Then check the "All
Photos" box in the "Retriever Settings" section and click "Submit".
</p>
<h2>Configure Feeds:</h2>
<div>
{{foreach $feeds as $feed}}
{{include file="contact_template.tpl" contact=$feed}}
{{/foreach}}
</div>

View File

@ -0,0 +1,116 @@
{{*
* AUTOMATICALLY GENERATED TEMPLATE
* DO NOT EDIT THIS FILE, CHANGES WILL BE OVERWRITTEN
*
*}}
<div class="settings-block">
<script language="javascript">
function retriever_add_row(id)
{
var tbody = document.getElementById(id);
var last = tbody.rows[tbody.childElementCount - 1];
var count = +last.id.replace(id + '-', '');
count++;
var row = document.createElement('tr');
row.id = id + '-' + count;
var cell1 = document.createElement('td');
var inptag = document.createElement('input');
inptag.name = row.id + '-element';
cell1.appendChild(inptag);
row.appendChild(cell1);
var cell2 = document.createElement('td');
var inpatt = document.createElement('input');
inpatt.name = row.id + '-attribute';
cell2.appendChild(inpatt);
row.appendChild(cell2);
var cell3 = document.createElement('td');
var inpval = document.createElement('input');
inpval.name = row.id + '-value';
cell3.appendChild(inpval);
row.appendChild(cell3);
var cell4 = document.createElement('td');
var butrem = document.createElement('input');
butrem.id = row.id + '-rem';
butrem.type = 'button';
butrem.onclick = function(){retriever_remove_row(id, count)};
butrem.value = '{{$remove_t}}';
cell4.appendChild(butrem);
row.appendChild(cell4);
tbody.appendChild(row);
}
function retriever_remove_row(id, number)
{
var tbody = document.getElementById(id);
var row = document.getElementById(id + '-' + number);
tbody.removeChild(row);
}
</script>
<h2>{{$title}}</h2>
<p><a href="{{$help}}">Get Help</a></p>
<form method="post">
<input type="hidden" name="id" value="{{$id}}">
{{include file="field_checkbox.tpl" field=$enable}}
{{include file="field_input.tpl" field=$pattern}}
{{include file="field_input.tpl" field=$replace}}
{{include file="field_checkbox.tpl" field=$images}}
{{include file="field_input.tpl" field=$retrospective}}
<h3>{{$include_t}}:</h3>
<div>
<table>
<thead>
<tr><th>{{$tag_t}}</th><th>{{$attribute_t}}</th><th>{{$value_t}}</th></tr>
</thead>
<tbody id="retriever-include">
{{if $include}}
{{foreach $include as $k=>$m}}
<tr id="retriever-include-{{$k}}">
<td><input name="retriever-include-{{$k}}-element" value="{{$m.element}}"></td>
<td><input name="retriever-include-{{$k}}-attribute" value="{{$m.attribute}}"></td>
<td><input name="retriever-include-{{$k}}-value" value="{{$m.value}}"></td>
<td><input id="retrieve-include-{{$k}}-rem" type="button" onclick="retriever_remove_row('retriever-include', {{$k}})" value="{{$remove_t}}"></td>
</tr>
{{/foreach}}
{{else}}
<tr id="retriever-include-0">
<td><input name="retriever-include-0-element"></td>
<td><input name="retriever-include-0-attribute"></td>
<td><input name="retriever-include-0-value"></td>
<td><input id="retrieve-include-0-rem" type="button" onclick="retriever_remove_row('retriever-include', 0)" value="{{$remove_t}}"></td>
</tr>
{{/if}}
</tbody>
</table>
<input type="button" onclick="retriever_add_row('retriever-include')" value="{{$add_t}}">
</div>
<h3>{{$exclude_t}}:</h3>
<div>
<table>
<thead>
<tr><th>Tag</th><th>Attribute</th><th>Value</th></tr>
</thead>
<tbody id="retriever-exclude">
{{if $exclude}}
{{foreach $exclude as $k=>$r}}
<tr id="retriever-exclude-{{$k}}">
<td><input name="retriever-exclude-{{$k}}-element" value="{{$r.element}}"></td>
<td><input name="retriever-exclude-{{$k}}-attribute" value="{{$r.attribute}}"></td>
<td><input name="retriever-exclude-{{$k}}-value" value="{{$r.value}}"></td>
<td><input id="retrieve-exclude-{{$k}}-rem" type="button" onclick="retriever_remove_row('retriever-exclude', {{$k}})" value="{{$remove_t}}"></td>
</tr>
{{/foreach}}
{{else}}
<tr id="retriever-exclude-0">
<td><input name="retriever-exclude-0-element"></td>
<td><input name="retriever-exclude-0-attribute"></td>
<td><input name="retriever-exclude-0-value"></td>
<td><input id="retrieve-exclude-0-rem" type="button" onclick="retriever_remove_row('retriever-exclude', 0)" value="{{$remove_t}}"></td>
</tr>
{{/if}}
</tbody>
</table>
<input type="button" onclick="retriever_add_row('retriever-exclude')" value="{{$add_t}}">
</div>
<input type="submit" size="70" value="{{$submit}}">
</form>
</div>

View File

@ -0,0 +1,22 @@
{{*
* AUTOMATICALLY GENERATED TEMPLATE
* DO NOT EDIT THIS FILE, CHANGES WILL BE OVERWRITTEN
*
*}}
<div class="settings-block">
<h3>{{$title}}</h3>
<p>
<a href="{{$help}}">Get Help</a>
</p>
<table>
<tbody>
<tr>
<td>{{$all_photos_t}}:</td>
<td><input class="checkbox" type="checkbox" name="all_photos" {{$all_photos}}></td>
</tr>
<tr>
<td colspan="2"><input type="submit" value="{{$submit}}"></td>
</tr>
</tbody>
</table>
</div>