fix/allow CDATA encoding (#428)

This commit is contained in:
Sean Molenaar 2019-03-17 08:23:37 +01:00 committed by Benjamin Brahmer
parent f5e25d2ede
commit 2698214c41
12 changed files with 202 additions and 286 deletions

View File

@ -11,11 +11,11 @@
* [raghunayyar](mailto:me@iraghu.com)
* [bastei](mailto:bastei@users.noreply.github.com)
* [Bernhard Posselt](mailto:bep@foryouandyourcustomers.com)
* [Sean Molenaar](mailto:sean@seanmolenaar.eu)
* [Thomas Müller](mailto:thomas.mueller@tmit.eu)
* [Hoàng Đức Hiếu](mailto:hdhoang@zahe.me)
* [Benjamin Brahmer](mailto:info@b-brahmer.de)
* [Daniel Opitz](mailto:git@copynpaste.de)
* [Sean Molenaar](mailto:sean@seanmolenaar.eu)
* [rakekniven](mailto:mark.ziegler@rakekniven.de)
* [David-Development](mailto:david-dev@live.de)
* [Koen Martens](mailto:gmc@sonologic.nl)
@ -95,6 +95,7 @@
* [Zach DeCook](mailto:zachdecook@gmail.com)
* [amittel](mailto:arnulf.mittelstaedt@gmail.com)
* [anoy](mailto:anoymouserver@users.noreply.github.com)
* [b_b](mailto:bruno@eliaz.fr)
* [b_b](mailto:brunobergot@gmail.com)
* [bjoerns1983](mailto:bjoern@sengotta.net)
* [blackcrack](mailto:blackcrack@blackysgate.de)
@ -102,6 +103,7 @@
* [joeplus](mailto:joerg@honululu.Speedport_W_723V_1_32_000)
* [kondou](mailto:kondou@ts.unde.re)
* [markusj](mailto:markusj@users.noreply.github.com)
* [nexus-uw](mailto:you@example.com)
* [repat](mailto:repat@repat.de)
* [ritchiewilson](mailto:rawilson52@gmail.com)
* [sonologic](mailto:gmc@sonologic.nl)

View File

@ -11,6 +11,7 @@
namespace OCA\News\AppInfo;
use OCA\News\Command\ShowFeed;
use OCA\News\Command\Updater\UpdateFeed;
use OCA\News\Command\Updater\AllFeeds;
use OCA\News\Command\Updater\BeforeUpdate;
@ -24,3 +25,4 @@ $application->add($container->query(UpdateFeed::class));
$application->add($container->query(BeforeUpdate::class));
$application->add($container->query(AfterUpdate::class));
$application->add($container->query(ExploreGenerator::class));
$application->add($container->query(ShowFeed::class));

View File

@ -1,64 +1,65 @@
{
"type": "project",
"license": "AGPL-3.0",
"description": "An RSS/Atom feed reader. Requires Nextcloud backgroundjobs or an updater script to be enabled to update your feeds. See the README.md in the apps top directory",
"homepage": "https://github.com/nextcloud/news",
"authors": [
{
"name": "Bernhard Posselt",
"email": "dev@bernhard-posselt.com",
"homepage": "https://bernhard-posselt.com",
"role": "Developer"
},
{
"name": "Sean Molenaar",
"email": "sean@seanmolenaar.eu",
"homepage": "https://seanmolenaar.eu",
"role": "Developer"
},
{
"name": "Alessandro Cosentino",
"homepage": "http://algorithmsforthekitchen.com/",
"email": "cosenal@gmail.com",
"role": "Developer"
},
{
"name": "Jan-Christoph Borchardt",
"email": "hey@jancborchardt.net",
"homepage": "http://jancborchardt.net/",
"role": "Designer"
}
],
"support": {
"irc": "irc://irc.freenode.org/nextcloud-news",
"issues": "https://github.com/nextcloud/news/issues",
"source": "https://github.com/nextcloud/news/"
"type": "project",
"license": "AGPL-3.0",
"description": "An RSS/Atom feed reader. Requires Nextcloud backgroundjobs or an updater script to be enabled to update your feeds. See the README.md in the apps top directory",
"homepage": "https://github.com/nextcloud/news",
"authors": [
{
"name": "Bernhard Posselt",
"email": "dev@bernhard-posselt.com",
"homepage": "https://bernhard-posselt.com",
"role": "Developer"
},
"require": {
"php": "^7.0",
"ezyang/htmlpurifier": "4.10.0",
"pear/net_url2": "2.2.2",
"riimu/kit-pathjoin": "1.2.0",
"debril/feed-io": "^3.1",
"arthurhoaro/favicon": "^1.2",
"ext-json": "*"
{
"name": "Sean Molenaar",
"email": "sean@seanmolenaar.eu",
"homepage": "https://seanmolenaar.eu",
"role": "Developer"
},
"require-dev": {
"phpunit/phpunit": "^6.5",
"squizlabs/php_codesniffer": "^3.3",
"guzzlehttp/guzzle": "~6.3"
{
"name": "Alessandro Cosentino",
"homepage": "http://algorithmsforthekitchen.com/",
"email": "cosenal@gmail.com",
"role": "Developer"
},
"replace": {
"guzzlehttp/guzzle": "*"
},
"autoload": {
"psr-4": {
"OCA\\News\\": "lib/"
}
},
"autoload-dev": {
"psr-4": {
"OCA\\News\\Tests\\": "tests/"
}
{
"name": "Jan-Christoph Borchardt",
"email": "hey@jancborchardt.net",
"homepage": "http://jancborchardt.net/",
"role": "Designer"
}
],
"support": {
"irc": "irc://irc.freenode.org/nextcloud-news",
"issues": "https://github.com/nextcloud/news/issues",
"source": "https://github.com/nextcloud/news/"
},
"require": {
"php": "^7.0",
"ezyang/htmlpurifier": "4.10.0",
"pear/net_url2": "2.2.2",
"riimu/kit-pathjoin": "1.2.0",
"debril/feed-io": "^3.1",
"arthurhoaro/favicon": "^1.2",
"ext-json": "*",
"guzzlehttp/guzzle": "^6.3",
"ext-simplexml": "*"
},
"require-dev": {
"phpunit/phpunit": "^6.5",
"squizlabs/php_codesniffer": "^3.3"
},
"replace": {
"guzzlehttp/guzzle": "*"
},
"autoload": {
"psr-4": {
"OCA\\News\\": "lib/"
}
},
"autoload-dev": {
"psr-4": {
"OCA\\News\\Tests\\": "tests/"
}
}
}

5
composer.lock generated
View File

@ -4,7 +4,7 @@
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
"This file is @generated automatically"
],
"content-hash": "2247ef4a89a64d37158df942d780cdfe",
"content-hash": "fe812fa2682a6ee30eb5ab53805f4aeb",
"packages": [
{
"name": "arthurhoaro/favicon",
@ -1905,7 +1905,8 @@
"prefer-lowest": false,
"platform": {
"php": "^7.0",
"ext-json": "*"
"ext-json": "*",
"ext-simplexml": "*"
},
"platform-dev": []
}

View File

@ -159,13 +159,9 @@ class Application extends App
* Fetchers
*/
$container->registerService(FetcherConfig::class, function (IContainer $c): FetcherConfig {
// FIXME: move this into a separate class for testing?
$config = $c->query(Config::class);
$proxy = $c->query(ProxyConfigParser::class);
$fConfig = new FetcherConfig();
$fConfig->setClientTimeout($config->getFeedFetcherTimeout());
$fConfig->setProxy($proxy);
$fConfig->setConfig($c->query(Config::class))
->setProxy($c->query(IConfig::class));
return $fConfig;
});

70
lib/Command/ShowFeed.php Normal file
View File

@ -0,0 +1,70 @@
<?php
/**
* Nextcloud - News
*
* This file is licensed under the Affero General Public License version 3 or
* later. See the COPYING file.
*
* @author Sean Molenaar <sean@seanmolenaar.eu>
* @copyright Sean Molenaar 2019
*/
namespace OCA\News\Command;
use FeedIo\FeedIo;
use Favicon\Favicon;
use OCA\News\Fetcher\Fetcher;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputArgument;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface;
/**
* This is used for debugging feed data:
* ./occ news:show-feed www.feed.com
*/
class ShowFeed extends Command
{
/**
* Feed and favicon fetcher.
*/
protected $feedFetcher;
/**
* Set up class.
*
* @param Fetcher $feedFetcher Feed reader
*/
public function __construct(Fetcher $feedFetcher)
{
$this->feedFetcher = $feedFetcher;
parent::__construct();
}
protected function configure()
{
$this->setName('news:show-feed')
->setDescription('Prints a JSON string which represents the given feed as it would be in the DB.')
->addArgument('feed', InputArgument::REQUIRED, 'Feed to parse')
->addOption('user', 'u', InputOption::VALUE_OPTIONAL, 'Username for the feed')
->addOption('password', 'p', InputOption::VALUE_OPTIONAL, 'Password for the feed');
}
protected function execute(InputInterface $input, OutputInterface $output)
{
$url = $input->getArgument('feed');
$user = $input->getOption('user');
$password = $input->getOption('password');
try {
list($feed, $items) = $this->feedFetcher->fetch($url, true, null, $user, $password);
$output->writeln("Feed: " . json_encode($feed, JSON_PRETTY_PRINT));
$output->writeln("Items: " . json_encode($items, JSON_PRETTY_PRINT));
} catch (\Throwable $ex) {
$output->writeln('<error>Failed to fetch feed info:</error>');
$output->writeln($ex->getMessage());
return 1;
}
}
}

View File

@ -16,6 +16,7 @@ namespace OCA\News\Config;
use FeedIo\Adapter\ClientInterface;
use \GuzzleHttp\Client;
use \FeedIo\Adapter\Guzzle\Client as FeedIoClient;
use OCP\IConfig;
/**
* Class FetcherConfig
@ -26,6 +27,8 @@ class FetcherConfig
{
protected $client_timeout;
protected $proxy;
protected $redirects;
protected $max_size;
/**
* Configure a guzzle client
@ -36,14 +39,18 @@ class FetcherConfig
{
if (!class_exists('GuzzleHttp\Collection')) {
$config = [
'timeout' => $this->getClientTimeout(),
'timeout' => $this->client_timeout,
];
if (!empty($this->proxy)) {
$config['proxy'] = $this->proxy;
}
$guzzle = new Client();
if (!empty($this->redirects)) {
$config['redirect.max'] = $this->redirects;
}
$guzzle = new Client($config);
$client = new FeedIoClient($guzzle);
return $client;
@ -51,7 +58,7 @@ class FetcherConfig
$config = [
'request.options' => [
'timeout' => $this->getClientTimeout(),
'timeout' => $this->client_timeout,
],
];
@ -59,59 +66,54 @@ class FetcherConfig
$config['request.options']['proxy'] = $this->proxy;
}
if (!empty($this->redirects)) {
$config['request.options']['redirect.max'] = $this->redirects;
}
$guzzle = new Client($config);
return new LegacyGuzzleClient($guzzle);
}
/**
* Set a timeout for the client
* Set settings for config.
*
* @param int $timeout The timeout
* @param Config $config The shared configuration
*
* @return self
*/
public function setClientTimeout($timeout)
public function setConfig(Config $config)
{
$this->client_timeout = $timeout;
$this->client_timeout = $config->getFeedFetcherTimeout();
$this->redirects = $config->getMaxRedirects();
$this->max_size = $config->getMaxSize();
return $this;
}
/**
* Get the client timeout.
*
* @return mixed
*/
public function getClientTimeout()
{
return $this->client_timeout;
}
/**
* Set the proxy
*
* @param \OCA\News\Utility\ProxyConfigParser $proxy The proxy to set.
* @param IConfig $config Nextcloud config.
*
* @return self
*/
public function setProxy($proxy)
public function setProxy(IConfig $config)
{
// proxy settings
$proxySettings = $proxy->parse();
$host = $proxySettings['host'];
$port = $proxySettings['port'];
$user = $proxySettings['user'];
$password = $proxySettings['password'];
$proxy = $config->getSystemValue('proxy', null);
$creds = $config->getSystemValue('proxyuserpwd', null);
$proxy_string = 'https://';
if (!empty($user)) {
$proxy_string .= $user . ':' . $password . '@';
if (is_null($proxy)) {
return $this;
}
$proxy_string .= $host;
if (!empty($port)) {
$proxy_string .= ':' . $port;
$url = new \Net_URL2($proxy);
if ($creds) {
$auth = explode(':', $creds, 2);
$url->setUserinfo($auth[0], $auth[1]);
}
$this->proxy = $proxy_string;
$this->proxy = $url->getNormalizedURL();
return $this;
}

View File

@ -146,7 +146,7 @@ class Item extends Entity implements IAPI, \JsonSerializable
return $this->enclosureMime;
}
public function getFeedId(): int
public function getFeedId()
{
return $this->feedId;
}
@ -169,7 +169,7 @@ class Item extends Entity implements IAPI, \JsonSerializable
return $this->guidHash;
}
public function getId(): int
public function getId()
{
return $this->id;
}

View File

@ -19,12 +19,14 @@ use FeedIo\Feed\ItemInterface;
use FeedIo\FeedInterface;
use FeedIo\FeedIo;
use Net_URL2;
use OCA\News\Utility\PsrLogger;
use OCP\IL10N;
use OCA\News\Db\Item;
use OCA\News\Db\Feed;
use OCA\News\Utility\Time;
use SimpleXMLElement;
class FeedFetcher implements IFeedFetcher
{
@ -65,10 +67,11 @@ class FeedFetcher implements IFeedFetcher
*/
public function fetch(string $url, bool $favicon, $lastModified, $user, $password): array
{
$url2 = new Net_URL2($url);
if (!empty($user) && !empty(trim($user))) {
$url = explode('://', $url);
$url = $url[0] . '://' . urlencode($user) . ':' . urlencode($password) . '@' . $url[1];
$url2->setUserinfo(urlencode($user), urlencode($password));
}
$url = $url2->getNormalizedURL();
if (is_null($lastModified) || !is_string($lastModified)) {
$resource = $this->reader->read($url);
} else {
@ -204,6 +207,12 @@ class FeedFetcher implements IFeedFetcher
'HTML-ENTITIES',
mb_detect_encoding($body)
);
$data = simplexml_load_string(
'<?xml version="1.0" encoding="utf-8"?><item>' . $body . '</item>',
SimpleXMLElement::class,
LIBXML_NOCDATA
);
$body = ($data === false) ? $body : (string) $data;
$item->setBody($body);
if ($parsedItem->hasMedia()) {

View File

@ -1,66 +0,0 @@
<?php
/**
* Nextcloud - News
*
* This file is licensed under the Affero General Public License version 3 or
* later. See the COPYING file.
*
* @author Alessandro Cosentino <cosenal@gmail.com>
* @author Bernhard Posselt <dev@bernhard-posselt.com>
* @copyright 2012 Alessandro Cosentino
* @copyright 2012-2014 Bernhard Posselt
*/
namespace OCA\News\Utility;
use \OCP\IConfig;
class ProxyConfigParser
{
private $config;
public function __construct(IConfig $config)
{
$this->config = $config;
}
/**
* Parses the config and splits up the port + url
*
* @return array
*/
public function parse()
{
$proxy = $this->config->getSystemValue('proxy');
$userpasswd = $this->config->getSystemValue('proxyuserpwd');
$result = [
'host' => null,
'port' => null,
'user' => null,
'password' => null
];
// we need to filter out the port -.-
$url = new \Net_URL2($proxy);
$port = $url->getPort();
$url->setPort(false);
$host = $url->getUrl();
$result['host'] = $host;
$result['port'] = $port;
if ($userpasswd) {
$auth = explode(':', $userpasswd, 2);
$result['user'] = $auth[0];
$result['password'] = $auth[1];
}
return $result;
}
}

View File

@ -113,6 +113,7 @@ class FeedFetcherTest extends TestCase
private $pub;
private $updated;
private $body;
private $parsed_body;
/**
* @var Author
*/
@ -172,21 +173,22 @@ class FeedFetcherTest extends TestCase
$timeFactory,
$this->logger
);
$this->url = 'http://tests';
$this->url = 'http://tests/';
$this->permalink = 'http://permalink';
$this->title = 'my&amp;lt;&apos; title';
$this->guid = 'hey guid here';
$this->guid_hash = 'df9a5f84e44bfe38cf44f6070d5b0250';
$this->body = 'let the bodies hit the floor <a href="test">test</a>';
$this->pub = 23111;
$this->updated = 23444;
$this->author = new Author();
$this->permalink = 'http://permalink';
$this->title = 'my&amp;lt;&apos; title';
$this->guid = 'hey guid here';
$this->guid_hash = 'df9a5f84e44bfe38cf44f6070d5b0250';
$this->body = '<![CDATA[let the bodies hit the floor <a href="test">test</a>]]>';
$this->parsed_body = 'let the bodies hit the floor <a href="test">test</a>';
$this->pub = 23111;
$this->updated = 23444;
$this->author = new Author();
$this->author->setName('&lt;boogieman');
$this->enclosure = 'http://enclosure.you';
$this->enclosure = 'http://enclosure.you';
$this->feed_title = '&lt;a&gt;&amp;its a&lt;/a&gt; title';
$this->feed_link = 'http://tests';
$this->feed_link = 'http://tests/';
$this->feed_image = '/an/image';
$this->web_favicon = 'http://anon.google.com';
$this->modified = $this->getMockBuilder('\DateTime')->getMock();
@ -231,9 +233,9 @@ class FeedFetcherTest extends TestCase
public function testFetchAccount()
{
$this->__setUpReader('http://account%40email.com:F9sEU%2ARt%25%3AKFK8HMHT%26@tests');
$this->__setUpReader('http://account%40email.com:F9sEU%2ARt%25%3AKFK8HMHT%26@tests/');
$item = $this->_createItem();
$feed = $this->_createFeed('de-DE', false, 'http://account%40email.com:F9sEU%2ARt%25%3AKFK8HMHT%26@tests');
$feed = $this->_createFeed('de-DE', false, 'http://account%40email.com:F9sEU%2ARt%25%3AKFK8HMHT%26@tests/');
$this->_mockIterator($this->feed_mock, [$this->item_mock]);
$result = $this->fetcher->fetch($this->url, false, null, 'account@email.com', 'F9sEU*Rt%:KFK8HMHT&');
@ -477,7 +479,7 @@ class FeedFetcherTest extends TestCase
$item->setTitle('my<\' title');
$item->setGuid($this->guid);
$item->setGuidHash($this->guid_hash);
$item->setBody($this->body);
$item->setBody($this->parsed_body);
$item->setRtl(false);
$item->setLastModified(3);
$item->setPubDate(3);

View File

@ -1,103 +0,0 @@
<?php
/**
* Nextcloud - News
*
* This file is licensed under the Affero General Public License version 3 or
* later. See the COPYING file.
*
* @author Alessandro Cosentino <cosenal@gmail.com>
* @author Bernhard Posselt <dev@bernhard-posselt.com>
* @copyright 2012 Alessandro Cosentino
* @copyright 2012-2014 Bernhard Posselt
*/
namespace OCA\News\Tests\Unit\Utility;
use OCA\News\Utility\ProxyConfigParser;
use OCP\IConfig;
use PHPUnit\Framework\TestCase;
class ProxyConfigParserTest extends TestCase
{
private $config;
private $feedService;
private $itemService;
private $parser;
protected function setUp()
{
$this->config = $this->getMockBuilder(IConfig::class)
->disableOriginalConstructor()
->getMock();
$this->parser = new ProxyConfigParser($this->config);
}
private function setExpectedProxy($proxy=null, $userpasswd=null)
{
$this->config->expects($this->at(0))
->method('getSystemValue')
->with($this->equalTo('proxy'))
->will($this->returnValue($proxy));
$this->config->expects($this->at(1))
->method('getSystemValue')
->with($this->equalTo('proxyuserpwd'))
->will($this->returnValue($userpasswd));
}
public function testParsesNoProxy()
{
$expected = [
'host' => null,
'port' => null,
'user' => null,
'password' => null
];
$this->setExpectedProxy();
$result = $this->parser->parse();
$this->assertEquals($expected, $result);
}
public function testParsesHost()
{
$expected = [
'host' => 'http://google.com/mytest',
'port' => null,
'user' => null,
'password' => null
];
$this->setExpectedProxy('http://google.com/mytest');
$result = $this->parser->parse();
$this->assertEquals($expected, $result);
}
public function testParsesHostAndPort()
{
$expected = [
'host' => 'http://google.com/mytest',
'port' => 89,
'user' => null,
'password' => null
];
$this->setExpectedProxy('http://google.com:89/mytest');
$result = $this->parser->parse();
$this->assertEquals($expected, $result);
}
public function testParsesUser()
{
$expected = [
'host' => null,
'port' => null,
'user' => 'john',
'password' => 'doe:hey'
];
$this->setExpectedProxy(null, 'john:doe:hey');
$result = $this->parser->parse();
$this->assertEquals($expected, $result);
}
}