move af_readability out of master tree

This commit is contained in:
Andrew Dolgov 2022-12-13 20:08:43 +03:00
parent 313f12ae93
commit 8ea537123d
No known key found for this signature in database
GPG Key ID: 1A56B4FA25D4AF2A
654 changed files with 0 additions and 322114 deletions

View File

@ -1,20 +0,0 @@
{
"minimum-stability": "dev",
"prefer-stable": true,
"repositories": [
{
"name": "fivefilters/readability.php",
"type": "vcs",
"url": "https://dev.tt-rss.org/fox/readability-php.git"
},
{
"name": "masterminds/html5",
"type": "vcs",
"url": "https://dev.tt-rss.org/fox/html5-php.git"
}
],
"require": {
"fivefilters/readability.php": "dev-master",
"psr/http-factory": "1.0.1"
}
}

View File

@ -1,473 +0,0 @@
{
"_readme": [
"This file locks the dependencies of your project to a known state",
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
"This file is @generated automatically"
],
"content-hash": "183ed768c66eb8f183350edf06c06a63",
"packages": [
{
"name": "fivefilters/readability.php",
"version": "dev-master",
"source": {
"type": "git",
"url": "https://dev.tt-rss.org/fox/readability-php.git",
"reference": "8ac5abdd497b37d2be4833bcf18d6819bba4d9c9"
},
"require": {
"ext-dom": "*",
"ext-mbstring": "*",
"ext-xml": "*",
"league/uri": "^6.4",
"masterminds/html5": "2.7.x-dev@dev",
"php": ">=7.3.0",
"psr/log": "^1.0"
},
"require-dev": {
"monolog/monolog": "^2.3",
"phpunit/phpunit": "^9"
},
"suggest": {
"monolog/monolog": "Allow logging debug information"
},
"default-branch": true,
"type": "library",
"autoload": {
"psr-4": {
"fivefilters\\Readability\\": "src/"
}
},
"autoload-dev": {
"psr-4": {
"fivefilters\\Readability\\Test\\": "test"
}
},
"license": [
"Apache-2.0"
],
"authors": [
{
"name": "Andres Rey",
"email": "andreskrey@gmail.com",
"role": "Original Developer"
},
{
"name": "Keyvan Minoukadeh",
"email": "keyvan@fivefilters.org",
"homepage": "https://www.fivefilters.org",
"role": "Developer/Maintainer"
}
],
"description": "A PHP port of Readability.js",
"homepage": "https://github.com/fivefilters/readability.php",
"keywords": [
"html",
"readability"
],
"time": "2022-07-31T06:02:47+00:00"
},
{
"name": "league/uri",
"version": "6.7.1",
"source": {
"type": "git",
"url": "https://github.com/thephpleague/uri.git",
"reference": "2d7c87a0860f3126a39f44a8a9bf2fed402dcfea"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/thephpleague/uri/zipball/2d7c87a0860f3126a39f44a8a9bf2fed402dcfea",
"reference": "2d7c87a0860f3126a39f44a8a9bf2fed402dcfea",
"shasum": ""
},
"require": {
"ext-json": "*",
"league/uri-interfaces": "^2.3",
"php": "^7.4 || ^8.0",
"psr/http-message": "^1.0"
},
"conflict": {
"league/uri-schemes": "^1.0"
},
"require-dev": {
"friendsofphp/php-cs-fixer": "^v3.3.2",
"nyholm/psr7": "^1.5",
"php-http/psr7-integration-tests": "^1.1",
"phpstan/phpstan": "^1.2.0",
"phpstan/phpstan-deprecation-rules": "^1.0",
"phpstan/phpstan-phpunit": "^1.0.0",
"phpstan/phpstan-strict-rules": "^1.1.0",
"phpunit/phpunit": "^9.5.10",
"psr/http-factory": "^1.0"
},
"suggest": {
"ext-fileinfo": "Needed to create Data URI from a filepath",
"ext-intl": "Needed to improve host validation",
"league/uri-components": "Needed to easily manipulate URI objects",
"psr/http-factory": "Needed to use the URI factory"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "6.x-dev"
}
},
"autoload": {
"psr-4": {
"League\\Uri\\": "src"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Ignace Nyamagana Butera",
"email": "nyamsprod@gmail.com",
"homepage": "https://nyamsprod.com"
}
],
"description": "URI manipulation library",
"homepage": "https://uri.thephpleague.com",
"keywords": [
"data-uri",
"file-uri",
"ftp",
"hostname",
"http",
"https",
"middleware",
"parse_str",
"parse_url",
"psr-7",
"query-string",
"querystring",
"rfc3986",
"rfc3987",
"rfc6570",
"uri",
"uri-template",
"url",
"ws"
],
"support": {
"docs": "https://uri.thephpleague.com",
"forum": "https://thephpleague.slack.com",
"issues": "https://github.com/thephpleague/uri/issues",
"source": "https://github.com/thephpleague/uri/tree/6.7.1"
},
"funding": [
{
"url": "https://github.com/sponsors/nyamsprod",
"type": "github"
}
],
"time": "2022-06-29T09:48:18+00:00"
},
{
"name": "league/uri-interfaces",
"version": "2.3.0",
"source": {
"type": "git",
"url": "https://github.com/thephpleague/uri-interfaces.git",
"reference": "00e7e2943f76d8cb50c7dfdc2f6dee356e15e383"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/thephpleague/uri-interfaces/zipball/00e7e2943f76d8cb50c7dfdc2f6dee356e15e383",
"reference": "00e7e2943f76d8cb50c7dfdc2f6dee356e15e383",
"shasum": ""
},
"require": {
"ext-json": "*",
"php": "^7.2 || ^8.0"
},
"require-dev": {
"friendsofphp/php-cs-fixer": "^2.19",
"phpstan/phpstan": "^0.12.90",
"phpstan/phpstan-phpunit": "^0.12.19",
"phpstan/phpstan-strict-rules": "^0.12.9",
"phpunit/phpunit": "^8.5.15 || ^9.5"
},
"suggest": {
"ext-intl": "to use the IDNA feature",
"symfony/intl": "to use the IDNA feature via Symfony Polyfill"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "2.x-dev"
}
},
"autoload": {
"psr-4": {
"League\\Uri\\": "src/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Ignace Nyamagana Butera",
"email": "nyamsprod@gmail.com",
"homepage": "https://nyamsprod.com"
}
],
"description": "Common interface for URI representation",
"homepage": "http://github.com/thephpleague/uri-interfaces",
"keywords": [
"rfc3986",
"rfc3987",
"uri",
"url"
],
"support": {
"issues": "https://github.com/thephpleague/uri-interfaces/issues",
"source": "https://github.com/thephpleague/uri-interfaces/tree/2.3.0"
},
"funding": [
{
"url": "https://github.com/sponsors/nyamsprod",
"type": "github"
}
],
"time": "2021-06-28T04:27:21+00:00"
},
{
"name": "masterminds/html5",
"version": "dev-master",
"source": {
"type": "git",
"url": "https://dev.tt-rss.org/fox/html5-php.git",
"reference": "d2c79ada2a87bb7eaafe1a39e4e3bb37853099aa"
},
"require": {
"ext-ctype": "*",
"ext-dom": "*",
"ext-libxml": "*",
"php": ">=5.3.0"
},
"require-dev": {
"phpunit/phpunit": "^4.8.35 || ^5.7.21 || ^6 || ^7"
},
"default-branch": true,
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "2.7-dev"
}
},
"autoload": {
"psr-4": {
"Masterminds\\": "src"
}
},
"autoload-dev": {
"psr-4": {
"Masterminds\\HTML5\\Tests\\": "test/HTML5"
}
},
"license": [
"MIT"
],
"authors": [
{
"name": "Matt Butcher",
"email": "technosophos@gmail.com"
},
{
"name": "Matt Farina",
"email": "matt@mattfarina.com"
},
{
"name": "Asmir Mustafic",
"email": "goetas@gmail.com"
}
],
"description": "An HTML5 parser and serializer.",
"homepage": "http://masterminds.github.io/html5-php",
"keywords": [
"dom",
"html",
"html5",
"parser",
"querypath",
"serializer",
"xml"
],
"time": "2022-12-11T19:41:09+00:00"
},
{
"name": "psr/http-factory",
"version": "1.0.1",
"source": {
"type": "git",
"url": "https://github.com/php-fig/http-factory.git",
"reference": "12ac7fcd07e5b077433f5f2bee95b3a771bf61be"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/php-fig/http-factory/zipball/12ac7fcd07e5b077433f5f2bee95b3a771bf61be",
"reference": "12ac7fcd07e5b077433f5f2bee95b3a771bf61be",
"shasum": ""
},
"require": {
"php": ">=7.0.0",
"psr/http-message": "^1.0"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "1.0.x-dev"
}
},
"autoload": {
"psr-4": {
"Psr\\Http\\Message\\": "src/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "PHP-FIG",
"homepage": "http://www.php-fig.org/"
}
],
"description": "Common interfaces for PSR-7 HTTP message factories",
"keywords": [
"factory",
"http",
"message",
"psr",
"psr-17",
"psr-7",
"request",
"response"
],
"support": {
"source": "https://github.com/php-fig/http-factory/tree/master"
},
"time": "2019-04-30T12:38:16+00:00"
},
{
"name": "psr/http-message",
"version": "1.0.1",
"source": {
"type": "git",
"url": "https://github.com/php-fig/http-message.git",
"reference": "f6561bf28d520154e4b0ec72be95418abe6d9363"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/php-fig/http-message/zipball/f6561bf28d520154e4b0ec72be95418abe6d9363",
"reference": "f6561bf28d520154e4b0ec72be95418abe6d9363",
"shasum": ""
},
"require": {
"php": ">=5.3.0"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "1.0.x-dev"
}
},
"autoload": {
"psr-4": {
"Psr\\Http\\Message\\": "src/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "PHP-FIG",
"homepage": "http://www.php-fig.org/"
}
],
"description": "Common interface for HTTP messages",
"homepage": "https://github.com/php-fig/http-message",
"keywords": [
"http",
"http-message",
"psr",
"psr-7",
"request",
"response"
],
"support": {
"source": "https://github.com/php-fig/http-message/tree/master"
},
"time": "2016-08-06T14:39:51+00:00"
},
{
"name": "psr/log",
"version": "1.1.4",
"source": {
"type": "git",
"url": "https://github.com/php-fig/log.git",
"reference": "d49695b909c3b7628b6289db5479a1c204601f11"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/php-fig/log/zipball/d49695b909c3b7628b6289db5479a1c204601f11",
"reference": "d49695b909c3b7628b6289db5479a1c204601f11",
"shasum": ""
},
"require": {
"php": ">=5.3.0"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "1.1.x-dev"
}
},
"autoload": {
"psr-4": {
"Psr\\Log\\": "Psr/Log/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "PHP-FIG",
"homepage": "https://www.php-fig.org/"
}
],
"description": "Common interface for logging libraries",
"homepage": "https://github.com/php-fig/log",
"keywords": [
"log",
"psr",
"psr-3"
],
"support": {
"source": "https://github.com/php-fig/log/tree/1.1.4"
},
"time": "2021-05-03T11:20:27+00:00"
}
],
"packages-dev": [],
"aliases": [],
"minimum-stability": "dev",
"stability-flags": {
"fivefilters/readability.php": 20
},
"prefer-stable": true,
"prefer-lowest": false,
"platform": [],
"platform-dev": [],
"plugin-api-version": "2.3.0"
}

View File

@ -1,35 +0,0 @@
/* global xhr, App, Plugins, Article, Notify */
Plugins.Af_Readability = {
orig_attr_name: 'data-readability-orig-content',
self: this,
embed: function(id) {
const content = App.find(App.isCombinedMode() ? `.cdm[data-article-id="${id}"] .content-inner` :
`.post[data-article-id="${id}"] .content`);
if (content.hasAttribute(self.orig_attr_name)) {
content.innerHTML = content.getAttribute(self.orig_attr_name);
content.removeAttribute(self.orig_attr_name);
if (App.isCombinedMode()) Article.cdmMoveToId(id);
return;
}
Notify.progress("Loading, please wait...");
xhr.json("backend.php", App.getPhArgs("af_readability", "embed", {id: id}), (reply) => {
if (content && reply.content) {
content.setAttribute(self.orig_attr_name, content.innerHTML);
content.innerHTML = reply.content;
Notify.close();
if (App.isCombinedMode()) Article.cdmMoveToId(id);
} else {
Notify.error("Unable to fetch full text for this article");
}
});
}
};

View File

@ -1,366 +0,0 @@
<?php
require_once __DIR__ . "/vendor/autoload.php";
use \fivefilters\Readability\Readability;
use \fivefilters\Readability\Configuration;
class Af_Readability extends Plugin {
/** @var PluginHost $host */
private $host;
function about() {
return array(null,
"Try to inline article content using Readability",
"fox");
}
function flags() {
return array("needs_curl" => true);
}
/** @return void */
function save() {
$enable_share_anything = checkbox_to_sql_bool($_POST["enable_share_anything"] ?? "");
$this->host->set($this, "enable_share_anything", $enable_share_anything);
echo __("Data saved.");
}
function init($host)
{
$this->host = $host;
$host->add_hook($host::HOOK_ARTICLE_FILTER, $this);
$host->add_hook($host::HOOK_PREFS_TAB, $this);
$host->add_hook($host::HOOK_PREFS_EDIT_FEED, $this);
$host->add_hook($host::HOOK_PREFS_SAVE_FEED, $this);
$host->add_hook($host::HOOK_ARTICLE_BUTTON, $this);
// Note: we have to install the hook even if disabled because init() is being run before plugin data has loaded
// so we can't check for our storage-set options here
$host->add_hook($host::HOOK_GET_FULL_TEXT, $this);
$host->add_filter_action($this, "action_inline", __("Inline content"));
$host->add_filter_action($this, "action_inline_append", __("Append content"));
}
function get_js() {
return file_get_contents(__DIR__ . "/init.js");
}
function hook_article_button($line) {
return "<i class='material-icons' onclick=\"Plugins.Af_Readability.embed(".$line["id"].")\"
style='cursor : pointer' title=\"".__('Toggle full article text')."\">description</i>";
}
function hook_prefs_tab($args) {
if ($args != "prefFeeds") return;
$enable_share_anything = sql_bool_to_bool($this->host->get($this, "enable_share_anything"));
?>
<div dojoType='dijit.layout.AccordionPane'
title="<i class='material-icons'>extension</i> <?= __('Readability settings (af_readability)') ?>">
<?= format_notice("Enable for specific feeds in the feed editor.") ?>
<form dojoType='dijit.form.Form'>
<?= \Controls\pluginhandler_tags($this, "save") ?>
<script type="dojo/method" event="onSubmit" args="evt">
evt.preventDefault();
if (this.validate()) {
Notify.progress('Saving data...', true);
xhr.post("backend.php", this.getValues(), (reply) => {
Notify.info(reply);
})
}
</script>
<fieldset>
<label class='checkbox'>
<?= \Controls\checkbox_tag("enable_share_anything", $enable_share_anything) ?>
<?= __("Provide full-text services to core code (bookmarklets) and other plugins") ?>
</label>
</fieldset>
<hr/>
<?= \Controls\submit_tag(__("Save")) ?>
</form>
<?php
/* cleanup */
$enabled_feeds = $this->filter_unknown_feeds(
$this->get_stored_array("enabled_feeds"));
$append_feeds = $this->filter_unknown_feeds(
$this->get_stored_array("append_feeds"));
$this->host->set($this, "enabled_feeds", $enabled_feeds);
$this->host->set($this, "append_feeds", $append_feeds);
?>
<?php if (count($enabled_feeds) > 0) { ?>
<hr/>
<h3><?= __("Currently enabled for (click to edit):") ?></h3>
<ul class='panel panel-scrollable list list-unstyled'>
<?php foreach ($enabled_feeds as $f) { ?>
<li>
<i class='material-icons'>rss_feed</i>
<a href='#' onclick="CommonDialogs.editFeed(<?= $f ?>)">
<?= Feeds::_get_title($f) . " " . (in_array($f, $append_feeds) ? __("(append)") : "") ?>
</a>
</li>
<?php } ?>
</ul>
<?php } ?>
</div>
<?php
}
function hook_prefs_edit_feed($feed_id) {
$enabled_feeds = $this->get_stored_array("enabled_feeds");
$append_feeds = $this->get_stored_array("append_feeds");
?>
<header><?= __("Readability") ?></header>
<section>
<fieldset>
<label class='checkbox'>
<?= \Controls\checkbox_tag("af_readability_enabled", in_array($feed_id, $enabled_feeds)) ?>
<?= __('Inline article content') ?>
</label>
</fieldset>
<fieldset>
<label class='checkbox'>
<?= \Controls\checkbox_tag("af_readability_append", in_array($feed_id, $append_feeds)) ?>
<?= __('Append to summary, instead of replacing it') ?>
</label>
</fieldset>
</section>
<?php
}
function hook_prefs_save_feed($feed_id) {
$enabled_feeds = $this->get_stored_array("enabled_feeds");
$append_feeds = $this->get_stored_array("append_feeds");
$enable = checkbox_to_sql_bool($_POST["af_readability_enabled"] ?? "");
$append = checkbox_to_sql_bool($_POST["af_readability_append"] ?? "");
$enable_key = array_search($feed_id, $enabled_feeds);
$append_key = array_search($feed_id, $append_feeds);
if ($enable) {
if ($enable_key === false) {
array_push($enabled_feeds, $feed_id);
}
} else {
if ($enable_key !== false) {
unset($enabled_feeds[$enable_key]);
}
}
if ($append) {
if ($append_key === false) {
array_push($append_feeds, $feed_id);
}
} else {
if ($append_key !== false) {
unset($append_feeds[$append_key]);
}
}
$this->host->set($this, "enabled_feeds", $enabled_feeds);
$this->host->set($this, "append_feeds", $append_feeds);
}
function hook_article_filter_action($article, $action) {
switch ($action) {
case "action_inline":
return $this->process_article($article, false);
case "action_append":
return $this->process_article($article, true);
}
return $article;
}
/**
* @param string $url
* @return string|false
*/
public function extract_content(string $url) {
$tmp = UrlHelper::fetch([
"url" => $url,
"http_accept" => "text/*",
"type" => "text/html"]);
if ($tmp && mb_strlen($tmp) < 1024 * 500) {
$tmpdoc = new DOMDocument("1.0", "UTF-8");
if (!@$tmpdoc->loadHTML($tmp))
return false;
// this is the worst hack yet :(
if (strtolower($tmpdoc->encoding) != 'utf-8') {
$tmp = preg_replace("/<meta.*?charset.*?\/?>/i", "", $tmp);
if (empty($tmpdoc->encoding)) {
$tmp = mb_convert_encoding($tmp, 'utf-8');
} else {
$tmp = mb_convert_encoding($tmp, 'utf-8', $tmpdoc->encoding);
}
}
try {
$r = new Readability(new Configuration([
'fixRelativeURLs' => true,
'originalURL' => $url,
]));
if ($r->parse($tmp)) {
$tmpxpath = new DOMXPath($r->getDOMDOcument());
$entries = $tmpxpath->query('(//a[@href]|//img[@src])');
foreach ($entries as $entry) {
if ($entry->hasAttribute("href")) {
$entry->setAttribute("href",
UrlHelper::rewrite_relative(UrlHelper::$fetch_effective_url, $entry->getAttribute("href")));
}
if ($entry->hasAttribute("src")) {
if ($entry->hasAttribute("data-src")) {
$src = $entry->getAttribute("data-src");
} else {
$src = $entry->getAttribute("src");
}
$entry->setAttribute("src",
UrlHelper::rewrite_relative(UrlHelper::$fetch_effective_url, $src));
}
}
return $r->getContent();
}
} catch (Exception $e) {
return false;
}
}
return false;
}
/**
* @param array<string, mixed> $article
* @param bool $append_mode
* @return array<string,mixed>
* @throws PDOException
*/
function process_article(array $article, bool $append_mode) : array {
$extracted_content = $this->extract_content($article["link"]);
# let's see if there's anything of value in there
$content_test = trim(strip_tags(Sanitizer::sanitize($extracted_content)));
if ($content_test) {
if ($append_mode)
$article["content"] .= "<hr/>" . $extracted_content;
else
$article["content"] = $extracted_content;
}
return $article;
}
/**
* @param string $name
* @return array<int|string, mixed>
* @throws PDOException
* @deprecated
*/
private function get_stored_array(string $name) : array {
return $this->host->get_array($this, $name);
}
function hook_article_filter($article) {
$enabled_feeds = $this->get_stored_array("enabled_feeds");
$append_feeds = $this->get_stored_array("append_feeds");
$feed_id = $article["feed"]["id"];
if (!in_array($feed_id, $enabled_feeds))
return $article;
return $this->process_article($article, in_array($feed_id, $append_feeds));
}
function hook_get_full_text($link) {
$enable_share_anything = $this->host->get($this, "enable_share_anything");
if ($enable_share_anything) {
$extracted_content = $this->extract_content($link);
# let's see if there's anything of value in there
$content_test = trim(strip_tags(Sanitizer::sanitize($extracted_content)));
if ($content_test) {
return $extracted_content;
}
}
return false;
}
function api_version() {
return 2;
}
/**
* @param array<int> $enabled_feeds
* @return array<int>
* @throws PDOException
*/
private function filter_unknown_feeds(array $enabled_feeds) : array {
$tmp = array();
foreach ($enabled_feeds as $feed) {
$sth = $this->pdo->prepare("SELECT id FROM ttrss_feeds WHERE id = ? AND owner_uid = ?");
$sth->execute([$feed, $_SESSION['uid']]);
if ($row = $sth->fetch()) {
array_push($tmp, $feed);
}
}
return $tmp;
}
function embed() : void {
$article_id = (int) $_REQUEST["id"];
$sth = $this->pdo->prepare("SELECT link FROM ttrss_entries WHERE id = ?");
$sth->execute([$article_id]);
$ret = [];
if ($row = $sth->fetch()) {
$ret["content"] = Sanitizer::sanitize($this->extract_content($row["link"]));
}
print json_encode($ret);
}
}

View File

@ -1,25 +0,0 @@
<?php
// autoload.php @generated by Composer
if (PHP_VERSION_ID < 50600) {
if (!headers_sent()) {
header('HTTP/1.1 500 Internal Server Error');
}
$err = 'Composer 2.3.0 dropped support for autoloading on PHP <5.6 and you are running '.PHP_VERSION.', please upgrade PHP or use Composer 2.2 LTS via "composer self-update --2.2". Aborting.'.PHP_EOL;
if (!ini_get('display_errors')) {
if (PHP_SAPI === 'cli' || PHP_SAPI === 'phpdbg') {
fwrite(STDERR, $err);
} elseif (!headers_sent()) {
echo $err;
}
}
trigger_error(
$err,
E_USER_ERROR
);
}
require_once __DIR__ . '/composer/autoload_real.php';
return ComposerAutoloaderInitb44cc79a0eaef9cd9c2f2ac697cbe9c0::getLoader();

View File

@ -1,572 +0,0 @@
<?php
/*
* This file is part of Composer.
*
* (c) Nils Adermann <naderman@naderman.de>
* Jordi Boggiano <j.boggiano@seld.be>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace Composer\Autoload;
/**
* ClassLoader implements a PSR-0, PSR-4 and classmap class loader.
*
* $loader = new \Composer\Autoload\ClassLoader();
*
* // register classes with namespaces
* $loader->add('Symfony\Component', __DIR__.'/component');
* $loader->add('Symfony', __DIR__.'/framework');
*
* // activate the autoloader
* $loader->register();
*
* // to enable searching the include path (eg. for PEAR packages)
* $loader->setUseIncludePath(true);
*
* In this example, if you try to use a class in the Symfony\Component
* namespace or one of its children (Symfony\Component\Console for instance),
* the autoloader will first look for the class under the component/
* directory, and it will then fallback to the framework/ directory if not
* found before giving up.
*
* This class is loosely based on the Symfony UniversalClassLoader.
*
* @author Fabien Potencier <fabien@symfony.com>
* @author Jordi Boggiano <j.boggiano@seld.be>
* @see https://www.php-fig.org/psr/psr-0/
* @see https://www.php-fig.org/psr/psr-4/
*/
class ClassLoader
{
/** @var ?string */
private $vendorDir;
// PSR-4
/**
* @var array[]
* @psalm-var array<string, array<string, int>>
*/
private $prefixLengthsPsr4 = array();
/**
* @var array[]
* @psalm-var array<string, array<int, string>>
*/
private $prefixDirsPsr4 = array();
/**
* @var array[]
* @psalm-var array<string, string>
*/
private $fallbackDirsPsr4 = array();
// PSR-0
/**
* @var array[]
* @psalm-var array<string, array<string, string[]>>
*/
private $prefixesPsr0 = array();
/**
* @var array[]
* @psalm-var array<string, string>
*/
private $fallbackDirsPsr0 = array();
/** @var bool */
private $useIncludePath = false;
/**
* @var string[]
* @psalm-var array<string, string>
*/
private $classMap = array();
/** @var bool */
private $classMapAuthoritative = false;
/**
* @var bool[]
* @psalm-var array<string, bool>
*/
private $missingClasses = array();
/** @var ?string */
private $apcuPrefix;
/**
* @var self[]
*/
private static $registeredLoaders = array();
/**
* @param ?string $vendorDir
*/
public function __construct($vendorDir = null)
{
$this->vendorDir = $vendorDir;
}
/**
* @return string[]
*/
public function getPrefixes()
{
if (!empty($this->prefixesPsr0)) {
return call_user_func_array('array_merge', array_values($this->prefixesPsr0));
}
return array();
}
/**
* @return array[]
* @psalm-return array<string, array<int, string>>
*/
public function getPrefixesPsr4()
{
return $this->prefixDirsPsr4;
}
/**
* @return array[]
* @psalm-return array<string, string>
*/
public function getFallbackDirs()
{
return $this->fallbackDirsPsr0;
}
/**
* @return array[]
* @psalm-return array<string, string>
*/
public function getFallbackDirsPsr4()
{
return $this->fallbackDirsPsr4;
}
/**
* @return string[] Array of classname => path
* @psalm-return array<string, string>
*/
public function getClassMap()
{
return $this->classMap;
}
/**
* @param string[] $classMap Class to filename map
* @psalm-param array<string, string> $classMap
*
* @return void
*/
public function addClassMap(array $classMap)
{
if ($this->classMap) {
$this->classMap = array_merge($this->classMap, $classMap);
} else {
$this->classMap = $classMap;
}
}
/**
* Registers a set of PSR-0 directories for a given prefix, either
* appending or prepending to the ones previously set for this prefix.
*
* @param string $prefix The prefix
* @param string[]|string $paths The PSR-0 root directories
* @param bool $prepend Whether to prepend the directories
*
* @return void
*/
public function add($prefix, $paths, $prepend = false)
{
if (!$prefix) {
if ($prepend) {
$this->fallbackDirsPsr0 = array_merge(
(array) $paths,
$this->fallbackDirsPsr0
);
} else {
$this->fallbackDirsPsr0 = array_merge(
$this->fallbackDirsPsr0,
(array) $paths
);
}
return;
}
$first = $prefix[0];
if (!isset($this->prefixesPsr0[$first][$prefix])) {
$this->prefixesPsr0[$first][$prefix] = (array) $paths;
return;
}
if ($prepend) {
$this->prefixesPsr0[$first][$prefix] = array_merge(
(array) $paths,
$this->prefixesPsr0[$first][$prefix]
);
} else {
$this->prefixesPsr0[$first][$prefix] = array_merge(
$this->prefixesPsr0[$first][$prefix],
(array) $paths
);
}
}
/**
* Registers a set of PSR-4 directories for a given namespace, either
* appending or prepending to the ones previously set for this namespace.
*
* @param string $prefix The prefix/namespace, with trailing '\\'
* @param string[]|string $paths The PSR-4 base directories
* @param bool $prepend Whether to prepend the directories
*
* @throws \InvalidArgumentException
*
* @return void
*/
public function addPsr4($prefix, $paths, $prepend = false)
{
if (!$prefix) {
// Register directories for the root namespace.
if ($prepend) {
$this->fallbackDirsPsr4 = array_merge(
(array) $paths,
$this->fallbackDirsPsr4
);
} else {
$this->fallbackDirsPsr4 = array_merge(
$this->fallbackDirsPsr4,
(array) $paths
);
}
} elseif (!isset($this->prefixDirsPsr4[$prefix])) {
// Register directories for a new namespace.
$length = strlen($prefix);
if ('\\' !== $prefix[$length - 1]) {
throw new \InvalidArgumentException("A non-empty PSR-4 prefix must end with a namespace separator.");
}
$this->prefixLengthsPsr4[$prefix[0]][$prefix] = $length;
$this->prefixDirsPsr4[$prefix] = (array) $paths;
} elseif ($prepend) {
// Prepend directories for an already registered namespace.
$this->prefixDirsPsr4[$prefix] = array_merge(
(array) $paths,
$this->prefixDirsPsr4[$prefix]
);
} else {
// Append directories for an already registered namespace.
$this->prefixDirsPsr4[$prefix] = array_merge(
$this->prefixDirsPsr4[$prefix],
(array) $paths
);
}
}
/**
* Registers a set of PSR-0 directories for a given prefix,
* replacing any others previously set for this prefix.
*
* @param string $prefix The prefix
* @param string[]|string $paths The PSR-0 base directories
*
* @return void
*/
public function set($prefix, $paths)
{
if (!$prefix) {
$this->fallbackDirsPsr0 = (array) $paths;
} else {
$this->prefixesPsr0[$prefix[0]][$prefix] = (array) $paths;
}
}
/**
* Registers a set of PSR-4 directories for a given namespace,
* replacing any others previously set for this namespace.
*
* @param string $prefix The prefix/namespace, with trailing '\\'
* @param string[]|string $paths The PSR-4 base directories
*
* @throws \InvalidArgumentException
*
* @return void
*/
public function setPsr4($prefix, $paths)
{
if (!$prefix) {
$this->fallbackDirsPsr4 = (array) $paths;
} else {
$length = strlen($prefix);
if ('\\' !== $prefix[$length - 1]) {
throw new \InvalidArgumentException("A non-empty PSR-4 prefix must end with a namespace separator.");
}
$this->prefixLengthsPsr4[$prefix[0]][$prefix] = $length;
$this->prefixDirsPsr4[$prefix] = (array) $paths;
}
}
/**
* Turns on searching the include path for class files.
*
* @param bool $useIncludePath
*
* @return void
*/
public function setUseIncludePath($useIncludePath)
{
$this->useIncludePath = $useIncludePath;
}
/**
* Can be used to check if the autoloader uses the include path to check
* for classes.
*
* @return bool
*/
public function getUseIncludePath()
{
return $this->useIncludePath;
}
/**
* Turns off searching the prefix and fallback directories for classes
* that have not been registered with the class map.
*
* @param bool $classMapAuthoritative
*
* @return void
*/
public function setClassMapAuthoritative($classMapAuthoritative)
{
$this->classMapAuthoritative = $classMapAuthoritative;
}
/**
* Should class lookup fail if not found in the current class map?
*
* @return bool
*/
public function isClassMapAuthoritative()
{
return $this->classMapAuthoritative;
}
/**
* APCu prefix to use to cache found/not-found classes, if the extension is enabled.
*
* @param string|null $apcuPrefix
*
* @return void
*/
public function setApcuPrefix($apcuPrefix)
{
$this->apcuPrefix = function_exists('apcu_fetch') && filter_var(ini_get('apc.enabled'), FILTER_VALIDATE_BOOLEAN) ? $apcuPrefix : null;
}
/**
* The APCu prefix in use, or null if APCu caching is not enabled.
*
* @return string|null
*/
public function getApcuPrefix()
{
return $this->apcuPrefix;
}
/**
* Registers this instance as an autoloader.
*
* @param bool $prepend Whether to prepend the autoloader or not
*
* @return void
*/
public function register($prepend = false)
{
spl_autoload_register(array($this, 'loadClass'), true, $prepend);
if (null === $this->vendorDir) {
return;
}
if ($prepend) {
self::$registeredLoaders = array($this->vendorDir => $this) + self::$registeredLoaders;
} else {
unset(self::$registeredLoaders[$this->vendorDir]);
self::$registeredLoaders[$this->vendorDir] = $this;
}
}
/**
* Unregisters this instance as an autoloader.
*
* @return void
*/
public function unregister()
{
spl_autoload_unregister(array($this, 'loadClass'));
if (null !== $this->vendorDir) {
unset(self::$registeredLoaders[$this->vendorDir]);
}
}
/**
* Loads the given class or interface.
*
* @param string $class The name of the class
* @return true|null True if loaded, null otherwise
*/
public function loadClass($class)
{
if ($file = $this->findFile($class)) {
includeFile($file);
return true;
}
return null;
}
/**
* Finds the path to the file where the class is defined.
*
* @param string $class The name of the class
*
* @return string|false The path if found, false otherwise
*/
public function findFile($class)
{
// class map lookup
if (isset($this->classMap[$class])) {
return $this->classMap[$class];
}
if ($this->classMapAuthoritative || isset($this->missingClasses[$class])) {
return false;
}
if (null !== $this->apcuPrefix) {
$file = apcu_fetch($this->apcuPrefix.$class, $hit);
if ($hit) {
return $file;
}
}
$file = $this->findFileWithExtension($class, '.php');
// Search for Hack files if we are running on HHVM
if (false === $file && defined('HHVM_VERSION')) {
$file = $this->findFileWithExtension($class, '.hh');
}
if (null !== $this->apcuPrefix) {
apcu_add($this->apcuPrefix.$class, $file);
}
if (false === $file) {
// Remember that this class does not exist.
$this->missingClasses[$class] = true;
}
return $file;
}
/**
* Returns the currently registered loaders indexed by their corresponding vendor directories.
*
* @return self[]
*/
public static function getRegisteredLoaders()
{
return self::$registeredLoaders;
}
/**
* @param string $class
* @param string $ext
* @return string|false
*/
private function findFileWithExtension($class, $ext)
{
// PSR-4 lookup
$logicalPathPsr4 = strtr($class, '\\', DIRECTORY_SEPARATOR) . $ext;
$first = $class[0];
if (isset($this->prefixLengthsPsr4[$first])) {
$subPath = $class;
while (false !== $lastPos = strrpos($subPath, '\\')) {
$subPath = substr($subPath, 0, $lastPos);
$search = $subPath . '\\';
if (isset($this->prefixDirsPsr4[$search])) {
$pathEnd = DIRECTORY_SEPARATOR . substr($logicalPathPsr4, $lastPos + 1);
foreach ($this->prefixDirsPsr4[$search] as $dir) {
if (file_exists($file = $dir . $pathEnd)) {
return $file;
}
}
}
}
}
// PSR-4 fallback dirs
foreach ($this->fallbackDirsPsr4 as $dir) {
if (file_exists($file = $dir . DIRECTORY_SEPARATOR . $logicalPathPsr4)) {
return $file;
}
}
// PSR-0 lookup
if (false !== $pos = strrpos($class, '\\')) {
// namespaced class name
$logicalPathPsr0 = substr($logicalPathPsr4, 0, $pos + 1)
. strtr(substr($logicalPathPsr4, $pos + 1), '_', DIRECTORY_SEPARATOR);
} else {
// PEAR-like class name
$logicalPathPsr0 = strtr($class, '_', DIRECTORY_SEPARATOR) . $ext;
}
if (isset($this->prefixesPsr0[$first])) {
foreach ($this->prefixesPsr0[$first] as $prefix => $dirs) {
if (0 === strpos($class, $prefix)) {
foreach ($dirs as $dir) {
if (file_exists($file = $dir . DIRECTORY_SEPARATOR . $logicalPathPsr0)) {
return $file;
}
}
}
}
}
// PSR-0 fallback dirs
foreach ($this->fallbackDirsPsr0 as $dir) {
if (file_exists($file = $dir . DIRECTORY_SEPARATOR . $logicalPathPsr0)) {
return $file;
}
}
// PSR-0 include paths.
if ($this->useIncludePath && $file = stream_resolve_include_path($logicalPathPsr0)) {
return $file;
}
return false;
}
}
/**
* Scope isolated include.
*
* Prevents access to $this/self from included files.
*
* @param string $file
* @return void
* @private
*/
function includeFile($file)
{
include $file;
}

View File

@ -1,352 +0,0 @@
<?php
/*
* This file is part of Composer.
*
* (c) Nils Adermann <naderman@naderman.de>
* Jordi Boggiano <j.boggiano@seld.be>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace Composer;
use Composer\Autoload\ClassLoader;
use Composer\Semver\VersionParser;
/**
* This class is copied in every Composer installed project and available to all
*
* See also https://getcomposer.org/doc/07-runtime.md#installed-versions
*
* To require its presence, you can require `composer-runtime-api ^2.0`
*
* @final
*/
class InstalledVersions
{
/**
* @var mixed[]|null
* @psalm-var array{root: array{name: string, pretty_version: string, version: string, reference: string|null, type: string, install_path: string, aliases: string[], dev: bool}, versions: array<string, array{pretty_version?: string, version?: string, reference?: string|null, type?: string, install_path?: string, aliases?: string[], dev_requirement: bool, replaced?: string[], provided?: string[]}>}|array{}|null
*/
private static $installed;
/**
* @var bool|null
*/
private static $canGetVendors;
/**
* @var array[]
* @psalm-var array<string, array{root: array{name: string, pretty_version: string, version: string, reference: string|null, type: string, install_path: string, aliases: string[], dev: bool}, versions: array<string, array{pretty_version?: string, version?: string, reference?: string|null, type?: string, install_path?: string, aliases?: string[], dev_requirement: bool, replaced?: string[], provided?: string[]}>}>
*/
private static $installedByVendor = array();
/**
* Returns a list of all package names which are present, either by being installed, replaced or provided
*
* @return string[]
* @psalm-return list<string>
*/
public static function getInstalledPackages()
{
$packages = array();
foreach (self::getInstalled() as $installed) {
$packages[] = array_keys($installed['versions']);
}
if (1 === \count($packages)) {
return $packages[0];
}
return array_keys(array_flip(\call_user_func_array('array_merge', $packages)));
}
/**
* Returns a list of all package names with a specific type e.g. 'library'
*
* @param string $type
* @return string[]
* @psalm-return list<string>
*/
public static function getInstalledPackagesByType($type)
{
$packagesByType = array();
foreach (self::getInstalled() as $installed) {
foreach ($installed['versions'] as $name => $package) {
if (isset($package['type']) && $package['type'] === $type) {
$packagesByType[] = $name;
}
}
}
return $packagesByType;
}
/**
* Checks whether the given package is installed
*
* This also returns true if the package name is provided or replaced by another package
*
* @param string $packageName
* @param bool $includeDevRequirements
* @return bool
*/
public static function isInstalled($packageName, $includeDevRequirements = true)
{
foreach (self::getInstalled() as $installed) {
if (isset($installed['versions'][$packageName])) {
return $includeDevRequirements || empty($installed['versions'][$packageName]['dev_requirement']);
}
}
return false;
}
/**
* Checks whether the given package satisfies a version constraint
*
* e.g. If you want to know whether version 2.3+ of package foo/bar is installed, you would call:
*
* Composer\InstalledVersions::satisfies(new VersionParser, 'foo/bar', '^2.3')
*
* @param VersionParser $parser Install composer/semver to have access to this class and functionality
* @param string $packageName
* @param string|null $constraint A version constraint to check for, if you pass one you have to make sure composer/semver is required by your package
* @return bool
*/
public static function satisfies(VersionParser $parser, $packageName, $constraint)
{
$constraint = $parser->parseConstraints($constraint);
$provided = $parser->parseConstraints(self::getVersionRanges($packageName));
return $provided->matches($constraint);
}
/**
* Returns a version constraint representing all the range(s) which are installed for a given package
*
* It is easier to use this via isInstalled() with the $constraint argument if you need to check
* whether a given version of a package is installed, and not just whether it exists
*
* @param string $packageName
* @return string Version constraint usable with composer/semver
*/
public static function getVersionRanges($packageName)
{
foreach (self::getInstalled() as $installed) {
if (!isset($installed['versions'][$packageName])) {
continue;
}
$ranges = array();
if (isset($installed['versions'][$packageName]['pretty_version'])) {
$ranges[] = $installed['versions'][$packageName]['pretty_version'];
}
if (array_key_exists('aliases', $installed['versions'][$packageName])) {
$ranges = array_merge($ranges, $installed['versions'][$packageName]['aliases']);
}
if (array_key_exists('replaced', $installed['versions'][$packageName])) {
$ranges = array_merge($ranges, $installed['versions'][$packageName]['replaced']);
}
if (array_key_exists('provided', $installed['versions'][$packageName])) {
$ranges = array_merge($ranges, $installed['versions'][$packageName]['provided']);
}
return implode(' || ', $ranges);
}
throw new \OutOfBoundsException('Package "' . $packageName . '" is not installed');
}
/**
* @param string $packageName
* @return string|null If the package is being replaced or provided but is not really installed, null will be returned as version, use satisfies or getVersionRanges if you need to know if a given version is present
*/
public static function getVersion($packageName)
{
foreach (self::getInstalled() as $installed) {
if (!isset($installed['versions'][$packageName])) {
continue;
}
if (!isset($installed['versions'][$packageName]['version'])) {
return null;
}
return $installed['versions'][$packageName]['version'];
}
throw new \OutOfBoundsException('Package "' . $packageName . '" is not installed');
}
/**
* @param string $packageName
* @return string|null If the package is being replaced or provided but is not really installed, null will be returned as version, use satisfies or getVersionRanges if you need to know if a given version is present
*/
public static function getPrettyVersion($packageName)
{
foreach (self::getInstalled() as $installed) {
if (!isset($installed['versions'][$packageName])) {
continue;
}
if (!isset($installed['versions'][$packageName]['pretty_version'])) {
return null;
}
return $installed['versions'][$packageName]['pretty_version'];
}
throw new \OutOfBoundsException('Package "' . $packageName . '" is not installed');
}
/**
* @param string $packageName
* @return string|null If the package is being replaced or provided but is not really installed, null will be returned as reference
*/
public static function getReference($packageName)
{
foreach (self::getInstalled() as $installed) {
if (!isset($installed['versions'][$packageName])) {
continue;
}
if (!isset($installed['versions'][$packageName]['reference'])) {
return null;
}
return $installed['versions'][$packageName]['reference'];
}
throw new \OutOfBoundsException('Package "' . $packageName . '" is not installed');
}
/**
* @param string $packageName
* @return string|null If the package is being replaced or provided but is not really installed, null will be returned as install path. Packages of type metapackages also have a null install path.
*/
public static function getInstallPath($packageName)
{
foreach (self::getInstalled() as $installed) {
if (!isset($installed['versions'][$packageName])) {
continue;
}
return isset($installed['versions'][$packageName]['install_path']) ? $installed['versions'][$packageName]['install_path'] : null;
}
throw new \OutOfBoundsException('Package "' . $packageName . '" is not installed');
}
/**
* @return array
* @psalm-return array{name: string, pretty_version: string, version: string, reference: string|null, type: string, install_path: string, aliases: string[], dev: bool}
*/
public static function getRootPackage()
{
$installed = self::getInstalled();
return $installed[0]['root'];
}
/**
* Returns the raw installed.php data for custom implementations
*
* @deprecated Use getAllRawData() instead which returns all datasets for all autoloaders present in the process. getRawData only returns the first dataset loaded, which may not be what you expect.
* @return array[]
* @psalm-return array{root: array{name: string, pretty_version: string, version: string, reference: string|null, type: string, install_path: string, aliases: string[], dev: bool}, versions: array<string, array{pretty_version?: string, version?: string, reference?: string|null, type?: string, install_path?: string, aliases?: string[], dev_requirement: bool, replaced?: string[], provided?: string[]}>}
*/
public static function getRawData()
{
@trigger_error('getRawData only returns the first dataset loaded, which may not be what you expect. Use getAllRawData() instead which returns all datasets for all autoloaders present in the process.', E_USER_DEPRECATED);
if (null === self::$installed) {
// only require the installed.php file if this file is loaded from its dumped location,
// and not from its source location in the composer/composer package, see https://github.com/composer/composer/issues/9937
if (substr(__DIR__, -8, 1) !== 'C') {
self::$installed = include __DIR__ . '/installed.php';
} else {
self::$installed = array();
}
}
return self::$installed;
}
/**
* Returns the raw data of all installed.php which are currently loaded for custom implementations
*
* @return array[]
* @psalm-return list<array{root: array{name: string, pretty_version: string, version: string, reference: string|null, type: string, install_path: string, aliases: string[], dev: bool}, versions: array<string, array{pretty_version?: string, version?: string, reference?: string|null, type?: string, install_path?: string, aliases?: string[], dev_requirement: bool, replaced?: string[], provided?: string[]}>}>
*/
public static function getAllRawData()
{
return self::getInstalled();
}
/**
* Lets you reload the static array from another file
*
* This is only useful for complex integrations in which a project needs to use
* this class but then also needs to execute another project's autoloader in process,
* and wants to ensure both projects have access to their version of installed.php.
*
* A typical case would be PHPUnit, where it would need to make sure it reads all
* the data it needs from this class, then call reload() with
* `require $CWD/vendor/composer/installed.php` (or similar) as input to make sure
* the project in which it runs can then also use this class safely, without
* interference between PHPUnit's dependencies and the project's dependencies.
*
* @param array[] $data A vendor/composer/installed.php data set
* @return void
*
* @psalm-param array{root: array{name: string, pretty_version: string, version: string, reference: string|null, type: string, install_path: string, aliases: string[], dev: bool}, versions: array<string, array{pretty_version?: string, version?: string, reference?: string|null, type?: string, install_path?: string, aliases?: string[], dev_requirement: bool, replaced?: string[], provided?: string[]}>} $data
*/
public static function reload($data)
{
self::$installed = $data;
self::$installedByVendor = array();
}
/**
* @return array[]
* @psalm-return list<array{root: array{name: string, pretty_version: string, version: string, reference: string|null, type: string, install_path: string, aliases: string[], dev: bool}, versions: array<string, array{pretty_version?: string, version?: string, reference?: string|null, type?: string, install_path?: string, aliases?: string[], dev_requirement: bool, replaced?: string[], provided?: string[]}>}>
*/
private static function getInstalled()
{
if (null === self::$canGetVendors) {
self::$canGetVendors = method_exists('Composer\Autoload\ClassLoader', 'getRegisteredLoaders');
}
$installed = array();
if (self::$canGetVendors) {
foreach (ClassLoader::getRegisteredLoaders() as $vendorDir => $loader) {
if (isset(self::$installedByVendor[$vendorDir])) {
$installed[] = self::$installedByVendor[$vendorDir];
} elseif (is_file($vendorDir.'/composer/installed.php')) {
$installed[] = self::$installedByVendor[$vendorDir] = require $vendorDir.'/composer/installed.php';
if (null === self::$installed && strtr($vendorDir.'/composer', '\\', '/') === strtr(__DIR__, '\\', '/')) {
self::$installed = $installed[count($installed) - 1];
}
}
}
}
if (null === self::$installed) {
// only require the installed.php file if this file is loaded from its dumped location,
// and not from its source location in the composer/composer package, see https://github.com/composer/composer/issues/9937
if (substr(__DIR__, -8, 1) !== 'C') {
self::$installed = require __DIR__ . '/installed.php';
} else {
self::$installed = array();
}
}
$installed[] = self::$installed;
return $installed;
}
}

View File

@ -1,21 +0,0 @@
Copyright (c) Nils Adermann, Jordi Boggiano
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is furnished
to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

View File

@ -1,10 +0,0 @@
<?php
// autoload_classmap.php @generated by Composer
$vendorDir = dirname(__DIR__);
$baseDir = dirname($vendorDir);
return array(
'Composer\\InstalledVersions' => $vendorDir . '/composer/InstalledVersions.php',
);

View File

@ -1,9 +0,0 @@
<?php
// autoload_namespaces.php @generated by Composer
$vendorDir = dirname(__DIR__);
$baseDir = dirname($vendorDir);
return array(
);

View File

@ -1,14 +0,0 @@
<?php
// autoload_psr4.php @generated by Composer
$vendorDir = dirname(__DIR__);
$baseDir = dirname($vendorDir);
return array(
'fivefilters\\Readability\\' => array($vendorDir . '/fivefilters/readability.php/src'),
'Psr\\Log\\' => array($vendorDir . '/psr/log/Psr/Log'),
'Psr\\Http\\Message\\' => array($vendorDir . '/psr/http-factory/src', $vendorDir . '/psr/http-message/src'),
'Masterminds\\' => array($vendorDir . '/masterminds/html5/src'),
'League\\Uri\\' => array($vendorDir . '/league/uri/src', $vendorDir . '/league/uri-interfaces/src'),
);

View File

@ -1,38 +0,0 @@
<?php
// autoload_real.php @generated by Composer
class ComposerAutoloaderInitb44cc79a0eaef9cd9c2f2ac697cbe9c0
{
private static $loader;
public static function loadClassLoader($class)
{
if ('Composer\Autoload\ClassLoader' === $class) {
require __DIR__ . '/ClassLoader.php';
}
}
/**
* @return \Composer\Autoload\ClassLoader
*/
public static function getLoader()
{
if (null !== self::$loader) {
return self::$loader;
}
require __DIR__ . '/platform_check.php';
spl_autoload_register(array('ComposerAutoloaderInitb44cc79a0eaef9cd9c2f2ac697cbe9c0', 'loadClassLoader'), true, true);
self::$loader = $loader = new \Composer\Autoload\ClassLoader(\dirname(__DIR__));
spl_autoload_unregister(array('ComposerAutoloaderInitb44cc79a0eaef9cd9c2f2ac697cbe9c0', 'loadClassLoader'));
require __DIR__ . '/autoload_static.php';
call_user_func(\Composer\Autoload\ComposerStaticInitb44cc79a0eaef9cd9c2f2ac697cbe9c0::getInitializer($loader));
$loader->register(true);
return $loader;
}
}

View File

@ -1,67 +0,0 @@
<?php
// autoload_static.php @generated by Composer
namespace Composer\Autoload;
class ComposerStaticInitb44cc79a0eaef9cd9c2f2ac697cbe9c0
{
public static $prefixLengthsPsr4 = array (
'f' =>
array (
'fivefilters\\Readability\\' => 24,
),
'P' =>
array (
'Psr\\Log\\' => 8,
'Psr\\Http\\Message\\' => 17,
),
'M' =>
array (
'Masterminds\\' => 12,
),
'L' =>
array (
'League\\Uri\\' => 11,
),
);
public static $prefixDirsPsr4 = array (
'fivefilters\\Readability\\' =>
array (
0 => __DIR__ . '/..' . '/fivefilters/readability.php/src',
),
'Psr\\Log\\' =>
array (
0 => __DIR__ . '/..' . '/psr/log/Psr/Log',
),
'Psr\\Http\\Message\\' =>
array (
0 => __DIR__ . '/..' . '/psr/http-factory/src',
1 => __DIR__ . '/..' . '/psr/http-message/src',
),
'Masterminds\\' =>
array (
0 => __DIR__ . '/..' . '/masterminds/html5/src',
),
'League\\Uri\\' =>
array (
0 => __DIR__ . '/..' . '/league/uri/src',
1 => __DIR__ . '/..' . '/league/uri-interfaces/src',
),
);
public static $classMap = array (
'Composer\\InstalledVersions' => __DIR__ . '/..' . '/composer/InstalledVersions.php',
);
public static function getInitializer(ClassLoader $loader)
{
return \Closure::bind(function () use ($loader) {
$loader->prefixLengthsPsr4 = ComposerStaticInitb44cc79a0eaef9cd9c2f2ac697cbe9c0::$prefixLengthsPsr4;
$loader->prefixDirsPsr4 = ComposerStaticInitb44cc79a0eaef9cd9c2f2ac697cbe9c0::$prefixDirsPsr4;
$loader->classMap = ComposerStaticInitb44cc79a0eaef9cd9c2f2ac697cbe9c0::$classMap;
}, null, ClassLoader::class);
}
}

View File

@ -1,479 +0,0 @@
{
"packages": [
{
"name": "fivefilters/readability.php",
"version": "dev-master",
"version_normalized": "dev-master",
"source": {
"type": "git",
"url": "https://dev.tt-rss.org/fox/readability-php.git",
"reference": "8ac5abdd497b37d2be4833bcf18d6819bba4d9c9"
},
"require": {
"ext-dom": "*",
"ext-mbstring": "*",
"ext-xml": "*",
"league/uri": "^6.4",
"masterminds/html5": "2.7.x-dev@dev",
"php": ">=7.3.0",
"psr/log": "^1.0"
},
"require-dev": {
"monolog/monolog": "^2.3",
"phpunit/phpunit": "^9"
},
"suggest": {
"monolog/monolog": "Allow logging debug information"
},
"time": "2022-07-31T06:02:47+00:00",
"default-branch": true,
"type": "library",
"installation-source": "source",
"autoload": {
"psr-4": {
"fivefilters\\Readability\\": "src/"
}
},
"autoload-dev": {
"psr-4": {
"fivefilters\\Readability\\Test\\": "test"
}
},
"license": [
"Apache-2.0"
],
"authors": [
{
"name": "Andres Rey",
"email": "andreskrey@gmail.com",
"role": "Original Developer"
},
{
"name": "Keyvan Minoukadeh",
"email": "keyvan@fivefilters.org",
"homepage": "https://www.fivefilters.org",
"role": "Developer/Maintainer"
}
],
"description": "A PHP port of Readability.js",
"homepage": "https://github.com/fivefilters/readability.php",
"keywords": [
"html",
"readability"
],
"install-path": "../fivefilters/readability.php"
},
{
"name": "league/uri",
"version": "6.7.1",
"version_normalized": "6.7.1.0",
"source": {
"type": "git",
"url": "https://github.com/thephpleague/uri.git",
"reference": "2d7c87a0860f3126a39f44a8a9bf2fed402dcfea"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/thephpleague/uri/zipball/2d7c87a0860f3126a39f44a8a9bf2fed402dcfea",
"reference": "2d7c87a0860f3126a39f44a8a9bf2fed402dcfea",
"shasum": ""
},
"require": {
"ext-json": "*",
"league/uri-interfaces": "^2.3",
"php": "^7.4 || ^8.0",
"psr/http-message": "^1.0"
},
"conflict": {
"league/uri-schemes": "^1.0"
},
"require-dev": {
"friendsofphp/php-cs-fixer": "^v3.3.2",
"nyholm/psr7": "^1.5",
"php-http/psr7-integration-tests": "^1.1",
"phpstan/phpstan": "^1.2.0",
"phpstan/phpstan-deprecation-rules": "^1.0",
"phpstan/phpstan-phpunit": "^1.0.0",
"phpstan/phpstan-strict-rules": "^1.1.0",
"phpunit/phpunit": "^9.5.10",
"psr/http-factory": "^1.0"
},
"suggest": {
"ext-fileinfo": "Needed to create Data URI from a filepath",
"ext-intl": "Needed to improve host validation",
"league/uri-components": "Needed to easily manipulate URI objects",
"psr/http-factory": "Needed to use the URI factory"
},
"time": "2022-06-29T09:48:18+00:00",
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "6.x-dev"
}
},
"installation-source": "dist",
"autoload": {
"psr-4": {
"League\\Uri\\": "src"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Ignace Nyamagana Butera",
"email": "nyamsprod@gmail.com",
"homepage": "https://nyamsprod.com"
}
],
"description": "URI manipulation library",
"homepage": "https://uri.thephpleague.com",
"keywords": [
"data-uri",
"file-uri",
"ftp",
"hostname",
"http",
"https",
"middleware",
"parse_str",
"parse_url",
"psr-7",
"query-string",
"querystring",
"rfc3986",
"rfc3987",
"rfc6570",
"uri",
"uri-template",
"url",
"ws"
],
"support": {
"docs": "https://uri.thephpleague.com",
"forum": "https://thephpleague.slack.com",
"issues": "https://github.com/thephpleague/uri/issues",
"source": "https://github.com/thephpleague/uri/tree/6.7.1"
},
"funding": [
{
"url": "https://github.com/sponsors/nyamsprod",
"type": "github"
}
],
"install-path": "../league/uri"
},
{
"name": "league/uri-interfaces",
"version": "2.3.0",
"version_normalized": "2.3.0.0",
"source": {
"type": "git",
"url": "https://github.com/thephpleague/uri-interfaces.git",
"reference": "00e7e2943f76d8cb50c7dfdc2f6dee356e15e383"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/thephpleague/uri-interfaces/zipball/00e7e2943f76d8cb50c7dfdc2f6dee356e15e383",
"reference": "00e7e2943f76d8cb50c7dfdc2f6dee356e15e383",
"shasum": ""
},
"require": {
"ext-json": "*",
"php": "^7.2 || ^8.0"
},
"require-dev": {
"friendsofphp/php-cs-fixer": "^2.19",
"phpstan/phpstan": "^0.12.90",
"phpstan/phpstan-phpunit": "^0.12.19",
"phpstan/phpstan-strict-rules": "^0.12.9",
"phpunit/phpunit": "^8.5.15 || ^9.5"
},
"suggest": {
"ext-intl": "to use the IDNA feature",
"symfony/intl": "to use the IDNA feature via Symfony Polyfill"
},
"time": "2021-06-28T04:27:21+00:00",
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "2.x-dev"
}
},
"installation-source": "dist",
"autoload": {
"psr-4": {
"League\\Uri\\": "src/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Ignace Nyamagana Butera",
"email": "nyamsprod@gmail.com",
"homepage": "https://nyamsprod.com"
}
],
"description": "Common interface for URI representation",
"homepage": "http://github.com/thephpleague/uri-interfaces",
"keywords": [
"rfc3986",
"rfc3987",
"uri",
"url"
],
"support": {
"issues": "https://github.com/thephpleague/uri-interfaces/issues",
"source": "https://github.com/thephpleague/uri-interfaces/tree/2.3.0"
},
"funding": [
{
"url": "https://github.com/sponsors/nyamsprod",
"type": "github"
}
],
"install-path": "../league/uri-interfaces"
},
{
"name": "masterminds/html5",
"version": "dev-master",
"version_normalized": "dev-master",
"source": {
"type": "git",
"url": "https://dev.tt-rss.org/fox/html5-php.git",
"reference": "d2c79ada2a87bb7eaafe1a39e4e3bb37853099aa"
},
"require": {
"ext-ctype": "*",
"ext-dom": "*",
"ext-libxml": "*",
"php": ">=5.3.0"
},
"require-dev": {
"phpunit/phpunit": "^4.8.35 || ^5.7.21 || ^6 || ^7"
},
"time": "2022-12-11T19:41:09+00:00",
"default-branch": true,
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "2.7-dev"
}
},
"installation-source": "source",
"autoload": {
"psr-4": {
"Masterminds\\": "src"
}
},
"autoload-dev": {
"psr-4": {
"Masterminds\\HTML5\\Tests\\": "test/HTML5"
}
},
"license": [
"MIT"
],
"authors": [
{
"name": "Matt Butcher",
"email": "technosophos@gmail.com"
},
{
"name": "Matt Farina",
"email": "matt@mattfarina.com"
},
{
"name": "Asmir Mustafic",
"email": "goetas@gmail.com"
}
],
"description": "An HTML5 parser and serializer.",
"homepage": "http://masterminds.github.io/html5-php",
"keywords": [
"dom",
"html",
"html5",
"parser",
"querypath",
"serializer",
"xml"
],
"install-path": "../masterminds/html5"
},
{
"name": "psr/http-factory",
"version": "1.0.1",
"version_normalized": "1.0.1.0",
"source": {
"type": "git",
"url": "https://github.com/php-fig/http-factory.git",
"reference": "12ac7fcd07e5b077433f5f2bee95b3a771bf61be"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/php-fig/http-factory/zipball/12ac7fcd07e5b077433f5f2bee95b3a771bf61be",
"reference": "12ac7fcd07e5b077433f5f2bee95b3a771bf61be",
"shasum": ""
},
"require": {
"php": ">=7.0.0",
"psr/http-message": "^1.0"
},
"time": "2019-04-30T12:38:16+00:00",
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "1.0.x-dev"
}
},
"installation-source": "dist",
"autoload": {
"psr-4": {
"Psr\\Http\\Message\\": "src/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "PHP-FIG",
"homepage": "http://www.php-fig.org/"
}
],
"description": "Common interfaces for PSR-7 HTTP message factories",
"keywords": [
"factory",
"http",
"message",
"psr",
"psr-17",
"psr-7",
"request",
"response"
],
"support": {
"source": "https://github.com/php-fig/http-factory/tree/master"
},
"install-path": "../psr/http-factory"
},
{
"name": "psr/http-message",
"version": "1.0.1",
"version_normalized": "1.0.1.0",
"source": {
"type": "git",
"url": "https://github.com/php-fig/http-message.git",
"reference": "f6561bf28d520154e4b0ec72be95418abe6d9363"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/php-fig/http-message/zipball/f6561bf28d520154e4b0ec72be95418abe6d9363",
"reference": "f6561bf28d520154e4b0ec72be95418abe6d9363",
"shasum": ""
},
"require": {
"php": ">=5.3.0"
},
"time": "2016-08-06T14:39:51+00:00",
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "1.0.x-dev"
}
},
"installation-source": "dist",
"autoload": {
"psr-4": {
"Psr\\Http\\Message\\": "src/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "PHP-FIG",
"homepage": "http://www.php-fig.org/"
}
],
"description": "Common interface for HTTP messages",
"homepage": "https://github.com/php-fig/http-message",
"keywords": [
"http",
"http-message",
"psr",
"psr-7",
"request",
"response"
],
"support": {
"source": "https://github.com/php-fig/http-message/tree/master"
},
"install-path": "../psr/http-message"
},
{
"name": "psr/log",
"version": "1.1.4",
"version_normalized": "1.1.4.0",
"source": {
"type": "git",
"url": "https://github.com/php-fig/log.git",
"reference": "d49695b909c3b7628b6289db5479a1c204601f11"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/php-fig/log/zipball/d49695b909c3b7628b6289db5479a1c204601f11",
"reference": "d49695b909c3b7628b6289db5479a1c204601f11",
"shasum": ""
},
"require": {
"php": ">=5.3.0"
},
"time": "2021-05-03T11:20:27+00:00",
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "1.1.x-dev"
}
},
"installation-source": "dist",
"autoload": {
"psr-4": {
"Psr\\Log\\": "Psr/Log/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "PHP-FIG",
"homepage": "https://www.php-fig.org/"
}
],
"description": "Common interface for logging libraries",
"homepage": "https://github.com/php-fig/log",
"keywords": [
"log",
"psr",
"psr-3"
],
"support": {
"source": "https://github.com/php-fig/log/tree/1.1.4"
},
"install-path": "../psr/log"
}
],
"dev": true,
"dev-package-names": []
}

View File

@ -1,90 +0,0 @@
<?php return array(
'root' => array(
'name' => '__root__',
'pretty_version' => '1.0.0+no-version-set',
'version' => '1.0.0.0',
'reference' => NULL,
'type' => 'library',
'install_path' => __DIR__ . '/../../',
'aliases' => array(),
'dev' => true,
),
'versions' => array(
'__root__' => array(
'pretty_version' => '1.0.0+no-version-set',
'version' => '1.0.0.0',
'reference' => NULL,
'type' => 'library',
'install_path' => __DIR__ . '/../../',
'aliases' => array(),
'dev_requirement' => false,
),
'fivefilters/readability.php' => array(
'pretty_version' => 'dev-master',
'version' => 'dev-master',
'reference' => '8ac5abdd497b37d2be4833bcf18d6819bba4d9c9',
'type' => 'library',
'install_path' => __DIR__ . '/../fivefilters/readability.php',
'aliases' => array(
0 => '9999999-dev',
),
'dev_requirement' => false,
),
'league/uri' => array(
'pretty_version' => '6.7.1',
'version' => '6.7.1.0',
'reference' => '2d7c87a0860f3126a39f44a8a9bf2fed402dcfea',
'type' => 'library',
'install_path' => __DIR__ . '/../league/uri',
'aliases' => array(),
'dev_requirement' => false,
),
'league/uri-interfaces' => array(
'pretty_version' => '2.3.0',
'version' => '2.3.0.0',
'reference' => '00e7e2943f76d8cb50c7dfdc2f6dee356e15e383',
'type' => 'library',
'install_path' => __DIR__ . '/../league/uri-interfaces',
'aliases' => array(),
'dev_requirement' => false,
),
'masterminds/html5' => array(
'pretty_version' => 'dev-master',
'version' => 'dev-master',
'reference' => 'd2c79ada2a87bb7eaafe1a39e4e3bb37853099aa',
'type' => 'library',
'install_path' => __DIR__ . '/../masterminds/html5',
'aliases' => array(
0 => '2.7.x-dev',
),
'dev_requirement' => false,
),
'psr/http-factory' => array(
'pretty_version' => '1.0.1',
'version' => '1.0.1.0',
'reference' => '12ac7fcd07e5b077433f5f2bee95b3a771bf61be',
'type' => 'library',
'install_path' => __DIR__ . '/../psr/http-factory',
'aliases' => array(),
'dev_requirement' => false,
),
'psr/http-message' => array(
'pretty_version' => '1.0.1',
'version' => '1.0.1.0',
'reference' => 'f6561bf28d520154e4b0ec72be95418abe6d9363',
'type' => 'library',
'install_path' => __DIR__ . '/../psr/http-message',
'aliases' => array(),
'dev_requirement' => false,
),
'psr/log' => array(
'pretty_version' => '1.1.4',
'version' => '1.1.4.0',
'reference' => 'd49695b909c3b7628b6289db5479a1c204601f11',
'type' => 'library',
'install_path' => __DIR__ . '/../psr/log',
'aliases' => array(),
'dev_requirement' => false,
),
),
);

View File

@ -1,26 +0,0 @@
<?php
// platform_check.php @generated by Composer
$issues = array();
if (!(PHP_VERSION_ID >= 70400)) {
$issues[] = 'Your Composer dependencies require a PHP version ">= 7.4.0". You are running ' . PHP_VERSION . '.';
}
if ($issues) {
if (!headers_sent()) {
header('HTTP/1.1 500 Internal Server Error');
}
if (!ini_get('display_errors')) {
if (PHP_SAPI === 'cli' || PHP_SAPI === 'phpdbg') {
fwrite(STDERR, 'Composer detected issues in your platform:' . PHP_EOL.PHP_EOL . implode(PHP_EOL, $issues) . PHP_EOL.PHP_EOL);
} elseif (!headers_sent()) {
echo 'Composer detected issues in your platform:' . PHP_EOL.PHP_EOL . str_replace('You are running '.PHP_VERSION.'.', '', implode(PHP_EOL, $issues)) . PHP_EOL.PHP_EOL;
}
}
trigger_error(
'Composer detected issues in your platform: ' . implode(' ', $issues),
E_USER_ERROR
);
}

View File

@ -1,2 +0,0 @@
test/* linguist-language=PHP
* text=auto eol=lf

View File

@ -1,42 +0,0 @@
# This is a basic workflow to help you get started with Actions
name: CI
# Controls when the workflow will run
on:
# Triggers the workflow on push or pull request events but only for the master branch
push:
branches: [ master ]
pull_request:
branches: [ master ]
# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:
# This workflow contains a single job called "build"
build:
# The type of runner that the job will run on
runs-on: ubuntu-latest
strategy:
matrix:
php: ['7.3', '7.4', '8']
libxml: ['2.9.4', '2.9.5', '2.9.10', '2.9.12']
# Steps represent a sequence of tasks that will be executed as part of the job
steps:
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- uses: actions/checkout@v2
# Runs a single command using the runners shell
#- name: Run a one-line script
# run: echo Hello, world!
# Runs a set of commands using the runners shell
- name: Run a multi-line script
run: |
composer install
docker build --build-arg PHP_VERSION=${{matrix.php}} --build-arg LIBXML_VERSION=${{matrix.libxml}} -t gh-action - < ./docker/php/Dockerfile
docker run --volume $PWD:/app --workdir="/app" --env XDEBUG_MODE=coverage gh-action php ./vendor/bin/phpunit --coverage-clover /app/test/clover.xml

View File

@ -1,5 +0,0 @@
.idea/
vendor
composer.lock
/test.*
/test/changed/

View File

@ -1,14 +0,0 @@
# Authors
Readability.php developed by **Andres Rey**.
Based on Arc90's readability.js (1.7.1) script available at: http://code.google.com/p/arc90labs-readability.
Copyright (c) 2010 Arc90 Inc
The AUTHORS/Contributors are (and/or have been):
* Andres Rey
* Sergiy Lavryk
* Pedro Amorim
* Malu Decks
* Keyvan Minoukadeh

View File

@ -1,145 +0,0 @@
# Change Log
All notable changes to this project will be documented in this file.
## Unreleased
## [v2.1.0](https://github.com/andreskrey/readability.php/releases/tag/v2.1.0)
- Avoid overwriting extracted metadata with similarly named keys (like `og:image` and `og:image:width`)
- Imported new `getSiteName()` feature from JS version as of [21 Dec 2018](https://github.com/mozilla/readability/pull/504)
- Added getFirstElementChild function to NodeTrait + test case (Issue #83)
- Reworked the test suit to use TestPage objects and give more hints about what failed
- Removed getWordThreshold and setWordThreshold configuration functions
- Added NodeUtility::filterTextNodes and deprecated NodeTrait getChildren()
- Added new DOMNodeList fake class that mimics the original DOMNodeList class but allows to add new nodes to the list
- Added new Dockerfiles that pulls different versions of PHP and libxml. Now we are supporting 4 versions of PHP and 6 versions of libxml!
## [v2.0.1](https://github.com/andreskrey/readability.php/releases/tag/v2.0.1)
- Fixed small issue that prevented the main image from showing up in the results
## [v2.0.0](https://github.com/andreskrey/readability.php/releases/tag/v2.0.0)
- [BREAKING CHANGE] Bumped the minimum supported version of PHP to 7.0
- Clean `<aside>` tags during `prepArticle()`.
- Merged PR #58: Fix notice non-object on $parentOfTopCandidate for tumblr.com
- Fixed issue #63: Division by zero
- Housekeeping:
- Removed $parseSuccessful flag that wasn't needed anymore
- Rename wordThreshold to charThreshold and throw deprecation notices. WordThreshold will be removed in version 3.0.
- Added "-ad-" as unlikely candidate
- Added Docker containers with PHP 7.0, 7.1, and 7.2 and makefile to trigger the tests.
- Imported new code from the JS version as of [19 Nov 2018](https://github.com/mozilla/readability/commit/876c81f710711ba2afb36dd83889d4c5b4fc2743), which includes the following changes:
- Move phrasing contents [into paragraphs](https://github.com/mozilla/readability/commit/9f2c5cb42ee9635f091178271d66888cbb47e5dc)
- Improved the title detection
- Remove [single cell tables](https://github.com/mozilla/readability/commit/ea4165721f9105d8f1e53cfecdcfdafceaf3e4bf)
- Improved the detection of video related elements
- New test cases
- Various minor fixes
## [v1.2.0](https://github.com/andreskrey/readability.php/releases/tag/v1.2.0)
- Merged PR#49 (Missing object when calling `->getContent()`)
- Imported all changes from Readability.js as of 2 March 2018 ([8525c6a](https://github.com/mozilla/readability/commit/8525c6af36d3badbe27c4672a6f2dd99ddb4097f)):
- Check for `<base>` elements before converting URLs to absolute.
- Clean `<link>` tags on `prepArticle()`
- Attempt to return at least some text if all the algorithm runs fail (Check PR [#423](https://github.com/mozilla/readability/pull/423) on JS version)
- Add new test cases for the previous changes
- And all other changes reflected [in this diff](https://github.com/mozilla/readability/compare/c3ff1a2d2c94c1db257b2c9aa88a4b8fbeb221c5...8525c6af36d3badbe27c4672a6f2dd99ddb4097f)
## [v1.1.1](https://github.com/andreskrey/readability.php/releases/tag/v1.1.1)
- Switched from assertEquals to assertSame on unit testing to avoid weak comparisons.
- Added a safe check to avoid sending the DOMDocument as a node when scanning for node ancestors.
- Fix issue #45: Small mistake in documentation
- Fix issue #46: Added `data-src` as a image source path
- Fixed bug when extracting all the image of the article (Was extracting images from the original DOM instead of the parsed one)
- Added the `->getDOMDocument()` getter to retrieve the fully parsed DOMDocument
- Merged PR #48 that allows passing an array as configuration (@topotru)
## [v1.1.0](https://github.com/andreskrey/readability.php/releases/tag/v1.1.0)
- Added 'data-orig' as an URL source for images
- Removed 'modal' as a negative property from classes
- Added option to inject a logger
- Removed all references to the `data-readability` tags that don't apply anymore to the new structure
- Merged PR #38 (Missing DOMEntityReference)
## [v1.0.0](https://github.com/andreskrey/readability.php/releases/tag/v1.0.0)
- Node encapsulation is gone. Pre v1 all nodes where encapsulated in a Readability class, which created lots of trouble with dependencies, responsibilities, and properties. Now all the encapsulation is gone: all the DOMNodes inside the Readability class are extensions of the original DOM classes, which allows the system to take advantage of the functions and properties of DOMDocument.
- HTMLParser is gone, Readability is the new main class. Switched things a bit for this release. Pre v1 you had to create an HTMLParser class to parse the HTML. Now you have to create a Readability class, feed it the text, and check the result.
- No more dumb arrays as a result. If you want to get the title, content, images, or anything else you'll have to use the getters of the Readability class.
- Environment class is gone. Now you have to create a configuration class and use setters to set your configuration options.
- Exceptions. Make sure you wrap your Readability class in a try catch block, because if it fails to parse your HTML, it will throw a `ParseException`.
- Minimum PHP version bumped to 5.6.
## [v0.3.1](https://github.com/andreskrey/readability.php/releases/tag/v0.3.1)
- Trim titles when detecting hierarchical separators to avoid false negatives on strings with spaces.
- Fix issue when converting divs to p nodes and never rating them (issue #29)
- Fix "Unsupported operand types" (PR #31)
- Fix division by zero when no title was found (issue #32)
- New function to retrieve all images at once (PR #30)
- Get the title from the `<title>` tag before searching on the `<meta>` tags
## [v0.3.0](https://github.com/andreskrey/readability.php/releases/tag/v0.3.0)
- Merged PR #24. Fixes notice when trying to extract `og:image`
- Up to date to commit [eb221c5](https://github.com/mozilla/readability/commit/c3ff1a2d2c94c1db257b2c9aa88a4b8fbeb221c5) (2017-10-16), which includes the following changes:
- New tags added to the unlikelyCandidates regex
- Detection and removal of hierarchical separators in titles
- Added more tags to clean after parsing the article (`button`, `textarea`, `select`, etc.)
- New way to detect empty nodes (including a edge case where a node with a `&nsbp;` was detected as a node with content)
- Better approach to find a top candidate (specially when a top candidate is the only child of a parent node, which allows a more accurate joining of sibling elements)
- Detect text direction (`ltr` or `rtl`)
- Detect and mark data tables to avoid removing them during final clean up
- Major fixes when scanning and deleting nodes (no need to traverse backwards anymore)
- Node cleaning via regex matches
- Clean table attributes during final clean up.
- Added license
Next release after this one will be v1 and will be a major refactor around Readability and HTMLParser methods and responsibilities.
## [v0.2.2](https://github.com/andreskrey/readability.php/releases/tag/v0.2.2)
- Added a safecheck for really nasty HTML
- Added summonCthulhu option, to remove all script tags via regex
## [v0.2.1](https://github.com/andreskrey/readability.php/releases/tag/v0.2.1)
- Added `normalizeEntities` flag to convert UTF-8 characters to its HTML Entity equivalent. Fixes bugs on htmls with mixed encoding.
- Added more information to the readme.md file
- New way to create a backup DOM: not creating a backup. In the previous version, the system cloned the $this->dom object to keep it as a backup in order to restart the algorithm with other flags, if needed. This seemed to work until I realized that *sometimes* the backup changes even if we are not touching it. Seems that the `dom` and `backupdom` objects are linked and *some* changes on the dom object reach the bakcupdom object. The new approach consists in deleting the backupdom object and recreating from scratch the dom object. Of course this has a performance impact, but seems to be quite low.
## [v0.2.0](https://github.com/andreskrey/readability.php/releases/tag/v0.2.0)
100% complete port of Readability.js!
- Every test unit passes
- Readability.php produces the same exact output as Readability.js
- I'm happy :)
### Fixed
- Lots of bugs
- Merged PR by DavidFricker to avoid exceptions while grabbing the document content
### Added
- substituteEntities flag, to avoid replacing especial characters with HTML entities. There's nothing we can do about `&nbsp;`, that entity is replaced by libxml and there's no way to disable it.
- Named data sets so it's easier to detect which test case is failing.
### Removed
- Couple of test cases that involved broken JS. There's nothing we can do about JS spilling onto the text.
## [0.0.3-alpha](https://github.com/andreskrey/readability.php/releases/tag/v0.0.3v-alpha)
We are getting closer to be a 100% complete port of Readability.js!
- Added prepArticle to remove junk after selecting the top candidates.
- Added a function to restore score after selecting top candidates. This basically works by scanning the data-readability tag and restoring the score to the contentScore variable. This is an horrible hack and should be removed once we ditch the Element interface of html-to-markdown and start extending the DOMDocument object.
- Switched all strlen functions to mb_strlen
- Fixed lots of bugs and pretty sure that introduced a bunch of new ones.
## [0.0.2-alpha](https://github.com/andreskrey/readability.php/releases/tag/v0.0.2-alpha)
- Last version I'm using master as the main development branch. All unreleased changes and main development will happen in the develop branch.
## [0.0.1-alpha](https://github.com/andreskrey/readability.php/releases/tag/v0.0.1-alpha)
- Initial release

View File

@ -1,31 +0,0 @@
# Contributing
Contributions are **welcome** and will be fully **credited**.
When it comes to the core article-extraction functionality, please contribute to [Mozilla's Readability](https://github.com/mozilla/readability/) repository, as we're trying to mirror that here.
For anything else, we accept contributions via Pull Requests on [Github](https://github.com/fivefilters/readability.php/).
## Pull Requests
- **Document any change in behaviour** - Make sure the `README.md` and any other relevant documentation are kept up-to-date.
- **Add tests!** - Your patch won't be accepted if it doesn't have tests.
- **Create feature branches** - Don't ask us to pull from your master branch.
- **One pull request per feature** - If you want to do more than one thing, send multiple pull requests.
- **Send coherent history** - Make sure each individual commit in your pull request is meaningful. If you had to make multiple intermediate commits while developing, please [squash them](http://www.git-scm.com/book/en/v2/Git-Tools-Rewriting-History#Changing-Multiple-Commit-Messages) before submitting.
- **Don't forget to add yourself to AUTHORS.md** - If you want to be credited, make sure you add your information (whatever you want to include) in `AUTHORS.md`.
## Running Tests
``` bash
$ make test-all #requires docker and docker-compose
```
**Happy coding**!

View File

@ -1,201 +0,0 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@ -1,27 +0,0 @@
.PHONY: test-all
test-all: start test-7.3 test-7.4 test-8 stop
test-7.3:
docker-compose exec php-7.3-libxml-2.9.10 php /app/vendor/phpunit/phpunit/phpunit --configuration /app/phpunit.xml
test-7.4:
docker-compose exec php-7.4-libxml-2.9.10 php /app/vendor/phpunit/phpunit/phpunit --configuration /app/phpunit.xml
test-8:
docker-compose exec php-8-libxml-2.9.10 php /app/vendor/phpunit/phpunit/phpunit --configuration /app/phpunit.xml
start:
docker-compose up -d php-7.3-libxml-2.9.10 php-7.4-libxml-2.9.10 php-8-libxml-2.9.10
stop:
docker-compose stop
test-all-versions:
for php_version in 7.3 7.4 8; do \
for libxml_version in 2.9.4 2.9.5 2.9.10 2.9.12; do \
docker-compose up -d php-$$php_version-libxml-$$libxml_version; \
docker-compose exec php-$$php_version-libxml-$$libxml_version php /app/vendor/phpunit/phpunit/phpunit --configuration /app/phpunit.xml; \
done \
done
docker-compose stop

View File

@ -1,250 +0,0 @@
# Readability.php
## News (August 2021)
Andres Rey, the [original developer](https://github.com/andreskrey/readability.php) of Readability.php has kindly let us take over maintenance and development of the project.
Please bear with us while we catch up with [Readability.js](https://github.com/mozilla/readability) changes. There'll be a new release (3.0.0) when we're ready.
For the changes we've made so far in this repository, please see our [blog post](https://www.fivefilters.org/2021/readability/).
## About
[![Latest Stable Version](https://poser.pugx.org/fivefilters/readability.php/v/stable)](https://packagist.org/packages/fivefilters/readability.php) [![Tests](https://github.com/fivefilters/readability.php/actions/workflows/main.yml/badge.svg?branch=master)](https://github.com/fivefilters/readability.php/actions/workflows/main.yml)
PHP port of *Mozilla's* **[Readability.js](https://github.com/mozilla/readability)**. Parses html text (usually news and other articles) and returns **title**, **author**, **main image** and **text content** without nav bars, ads, footers, or anything that isn't the main body of the text. Analyzes each node, gives them a score, and determines what's relevant and what can be discarded.
![Screenshot](https://raw.githubusercontent.com/fivefilters/readability.php/assets/screenshot.png)
The project aim is to be a 1 to 1 port of Mozilla's version and to follow closely all changes introduced there, but there are some major differences on the structure. Most of the code is a 1:1 copy even the comments were imported but some functions and structures were adapted to suit better the PHP language.
**Original Developer**: Andres Rey
**Developer/Maintainer**: FiveFilters.org
## Code porting
Master branch - Up to date on 26 August 2021, with the exception of a [piece of code](https://github.com/fivefilters/readability.php/commit/1c662465bded2ab3acf3b975a1315c8c45f0bf73#diff-b9b31807b1a39caec18ddc293e9c52931ba8b55191c61e6b77a623d699a599ffR1899) which doesn't produce the same results in PHP for us compard to the JS version. Perhaps there's an error, or some difference in the underlying code that affects this. If you know what's wrong, please feel free to drop us a note or submit a pull request. :)
Version 2.1.0 - Up to date with Readability.js up to [19 Nov 2018](https://github.com/mozilla/readability/commit/876c81f710711ba2afb36dd83889d4c5b4fc2743).
## Requirements
PHP 7.3+, ext-dom, ext-xml, and ext-mbstring. To install these dependencies (in the rare case your system does not have them already), you could try something like this in *nix like environments:
`$ sudo apt-get install php7.4-xml php7.4-mbstring`
## How to use it
First you have to require the library using composer:
`composer require fivefilters/readability.php`
Then, create a Readability class and pass a Configuration class, feed the `parse()` function with your HTML and echo the variable:
```php
use fivefilters\Readability\Readability;
use fivefilters\Readability\Configuration;
use fivefilters\Readability\ParseException;
$readability = new Readability(new Configuration());
$html = file_get_contents('http://your.favorite.newspaper/article.html');
try {
$readability->parse($html);
echo $readability;
} catch (ParseException $e) {
echo sprintf('Error processing text: %s', $e->getMessage());
}
```
Your script will output the parsed text or inform about any errors. You should always wrap the `->parse` call in a try/catch block because if the HTML cannot be parsed correctly, a `ParseException` will be thrown.
If you want to have a finer control on the output, just call the properties one by one, wrapping it with your own HTML.
```php
<h1><?= $readability->getTitle(); ?></h1>
<h2>By <?= $readability->getAuthor(); ?></h2>
<div class="content"><?= $readability->getContent(); ?></div>
```
Here's a list of the available properties:
- Article title: `->getTitle();`
- Article content: `->getContent();`
- Excerpt: `->getExcerpt();`
- Main image: `->getImage();`
- All images: `->getImages();`
- Author: `->getAuthor();`
- Text direction (ltr or rtl): `->getDirection();`
If you need to tweak the final HTML you can get the DOMDocument of the result by calling `->getDOMDocument()`.
## Options
You can change the behaviour of Readability via the Configuration object. For example, if you want to fix relative URLs and declare the original URL, you could set up the configuration like this:
```php
$configuration = new Configuration();
$configuration
->setFixRelativeURLs(true)
->setOriginalURL('http://my.newspaper.url/article/something-interesting-to-read.html');
```
Also you can pass an array of configuration parameters to the constructor:
```php
$configuration = new Configuration([
'fixRelativeURLs' => true,
'originalURL' => 'http://my.newspaper.url/article/something-interesting-to-read.html',
// other parameters ... listing below
]);
```
Then you pass this Configuration object to Readability. The following options are available. Remember to prepend `set` when calling them using native setters.
- **MaxTopCandidates**: default value `5`, max amount of top level candidates.
- **CharThreshold**: default value `500`, minimum amount of characters to consider that the article was parsed successful.
- **ArticleByLine**: default value `false`, search for the article byline and remove it from the text. It will be moved to the article metadata.
- **StripUnlikelyCandidates**: default value `true`, remove nodes that are unlikely to have relevant information. Useful for debugging or parsing complex or non-standard articles.
- **CleanConditionally**: default value `true`, remove certain nodes after parsing to return a cleaner result.
- **WeightClasses**: default value `true`, weight classes during the rating phase.
- **FixRelativeURLs**: default value `false`, convert relative URLs to absolute. Like `/test` to `http://host/test`.
- **SubstituteEntities**: default value `false`, disables the `substituteEntities` flag of libxml. Will avoid substituting HTML entities. Like `&aacute;` to á.
- **NormalizeEntities**: default value `false`, converts UTF-8 characters to its HTML Entity equivalent. Useful to parse HTML with mixed encoding.
- **OriginalURL**: default value `http://fakehost`, original URL from the article used to fix relative URLs.
- **KeepClasses**: default value `false`, which removes all `class="..."` attribute values from HTML elements.
- **Parser**: default value `html5`, which uses HTML5-PHP for parsing. Set to `libxml` to use that instead (not recommended for modern HTML documents).
- **SummonCthulhu**: default value `false`, remove all `<script>` nodes via regex. This is not ideal as it might break things, but if you've set the parser to libxml (see above), it might be the only solution to [libxml problems with unescaped javascript](https://github.com/fivefilters/readability.php#known-libxml-parsing-issues).
### Debug log
Logging is optional and you will have to inject your own logger to save all the debugging messages. To do so, use a logger that implements the [PSR-3 logging interface](https://github.com/php-fig/log) and pass it to the configuration object. For example:
```php
// Using monolog
$log = new Logger('Readability');
$log->pushHandler(new StreamHandler('path/to/my/log.txt'));
$configuration->setLogger($log);
```
In the log you will find information about the parsed nodes, why they were removed, and why they were considered relevant to the final article.
## Limitations
Of course the main limitation is PHP. Websites that load the content through lazy loading, AJAX, or any type of javascript fueled call will be ignored (actually, *not ran*) and the resulting text will be incorrect, compared to the readability.js results. All the articles you want to parse with readability.php need to be complete and all the content should be in the HTML already.
## Known libxml parsing issues
Readability.php as of version 3.0.0 uses a HTML5 parser. Earlier versions used libxml. The issues below apply to libxml parsing, so if you're using an earlier version of Readability.php (pre 3.0.0), or if you've set the parser to libxml in the configuration, read on...
### Javascript spilling into the text body
DOMDocument has some issues while parsing javascript with unescaped HTML on strings. Consider the following code:
```html
<div> <!-- Offending div without closing tag -->
<script type="text/javascript">
var test = '</div>';
// I should not appear on the result
</script>
```
If you would like to remove the scripts of the HTML (like readability does), you would expect ending up with just one div and one comment on the final HTML. The problem is that libxml takes that closing div tag inside the javascript string as a HTML tag, effectively closing the unclosed tag and leaving the rest of the javascript as a string within a P tag. If you save that node, the final HTML will end up like this:
```html
<div> <!-- Offending div without closing tag -->
<p>';
// I should not appear on the result
</p></div>
```
This is a libxml issue and not a Readability.php bug.
There's a workaround for this: using the `summonCthulhu` option. This will remove all script tags **via regex**, which is not ideal because you may end up summoning [the lord of darkness](https://stackoverflow.com/a/1732454).
### &nbsp entities disappearing
`&nbsp` entities are converted to spaces automatically by libxml and there's no way to disable it.
### Self closing tags rendering as fully expanded tags
Self closing tags like `<br />` get automatically expanded to `<br></br`. No way to disable it in libxml.
## Dependencies
Readability.php uses
* [HTML5-PHP](https://github.com/Masterminds/html5-php) to parse and serialise HTML.
* [PSR Log](https://github.com/php-fig/log) interface to define the allowed type of loggers.
* [Monolog](https://github.com/Seldaek/monolog) is only required on development installations. (`--dev` option during `composer install`).
## To-do
- Keep up with Readability.js changes
- Add a small template engine for the __toString() method, instead of using a hardcoded one.
- Replace all the `iterator_to_array` calls with a custom PHP generator that keeps track of the removed or altered nodes.
## How it works
Readability parses all the text with DOMDocument, scans the text nodes and gives the a score, based on the amount of words, links and type of element. Then it selects the highest scoring element and creates a new DOMDocument with all its siblings. Each sibling is scored to discard useless elements, like nav bars, empty nodes, etc.
## Security
If you're going to use Readability with untrusted input (whether in HTML or DOM form), we **strongly** recommend you use a sanitizer library like [HTML Purifier](https://github.com/ezyang/htmlpurifier) to avoid script injection when you use
the output of Readability. We would also recommend using [CSP](https://developer.mozilla.org/en-US/docs/Web/HTTP/CSP) to add further defense-in-depth
restrictions to what you allow the resulting content to do. The Firefox integration of
reader mode uses both of these techniques itself. Sanitizing unsafe content out of the input is explicitly not something we aim to do as part of Readability itself - there are other good sanitizer libraries out there, use them!
## Testing
Any version of PHP from 7.3 and above installed locally should be enough to develop new features and add new test cases. If you want to be 100% sure that your change doesn't create any issues with other versions of PHP, you can use the provided Docker containers to test currently in 7.3, 7.4, and 8.0.
You'll need Docker and Docker Compose for this. To run all the tests in the three PHP versions above, just type the following command:
```bash
make test-all
```
This will start all the containers and run all the tests on every supported version of PHP. If you want to test against a specific version, you can use `make test-7.3`, `make test-7.4`, or `make test-8`.
### Different versions of libxml
If you want to test against supported versions of PHP *AND* multiple versions of libxml, run `test-all-versions`. This will test against PHP versions 7.3 to 8 and libxml versions 2.9.4, 2.9.5, 2.9.10, and 2.9.12. Normally you won't need to do this unless you think you've found a bug on an specific version of libxml.
### Updating the expected tests
If you've made an improvement to the code, you'll probably want to examine the Readability.php output for the test cases here. To do that, run the following command first from the root of the project folder:
docker-compose up -d php-7.4-libxml-2.9.10
You should now have a docker image running with the project root folder mapped to /app/ on your Docker instance (see `docker-compose.yml`). Any changes to these files will be accessible from the Docker instance from now on.
Next, create a folder in tests/ called /changed, then run the following command to run the test suite:
docker-compose exec -e output-changes=1 -e output-diff=1 php-7.4-libxml-2.9.10 php /app/vendor/phpunit/phpunit/phpunit --configuration /app/phpunit.xml
The two environment variables (`output-changes=1` and `output-diff=1`) will result in new output for any failing test (along with a diff of changes) being written to the changed/ folder.
If you're happy the changes are okay, set `output-diff=0` and the diff files will no longer be written, making it easier to copy the new expected output files over to their corresponding locations in test-pages\.
## License
Based on Arc90's readability.js (1.7.1) script available at: http://code.google.com/p/arc90labs-readability
Copyright (c) 2010 Arc90 Inc
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@ -1,52 +0,0 @@
{
"name": "fivefilters/readability.php",
"type": "library",
"description": "A PHP port of Readability.js",
"keywords": ["readability", "html"],
"homepage": "https://github.com/fivefilters/readability.php",
"license": "Apache-2.0",
"repositories": [
{
"name": "masterminds/html5",
"type": "vcs",
"url": "https://dev.tt-rss.org/fox/html5-php.git"
}
],
"authors": [
{
"name": "Andres Rey",
"email": "andreskrey@gmail.com",
"role": "Original Developer"
},
{
"name": "Keyvan Minoukadeh",
"email": "keyvan@fivefilters.org",
"homepage": "https://www.fivefilters.org",
"role": "Developer/Maintainer"
}
],
"autoload": {
"psr-4": {
"fivefilters\\Readability\\": "src/"
}
},
"autoload-dev": {
"psr-4": {"fivefilters\\Readability\\Test\\": "test"}
},
"require": {
"php": ">=7.3.0",
"ext-dom": "*",
"ext-xml": "*",
"ext-mbstring": "*",
"psr/log": "^1.0",
"masterminds/html5": "2.7.x-dev@dev",
"league/uri": "^6.4"
},
"require-dev": {
"phpunit/phpunit": "^9",
"monolog/monolog": "^2.3"
},
"suggest": {
"monolog/monolog": "Allow logging debug information"
}
}

View File

@ -1,100 +0,0 @@
version: '3'
services:
php-7.3-libxml-2.9.4: &template
build:
context: ./docker/php
args:
LIBXML_VERSION: 2.9.4
PHP_VERSION: 7.3
volumes:
- ./:/app
tty: true
php-7.3-libxml-2.9.5:
<<: *template
build:
context: ./docker/php
args:
LIBXML_VERSION: 2.9.5
PHP_VERSION: 7.3
php-7.3-libxml-2.9.10:
<<: *template
build:
context: ./docker/php
args:
LIBXML_VERSION: 2.9.10
PHP_VERSION: 7.3
php-7.3-libxml-2.9.12:
<<: *template
build:
context: ./docker/php
args:
LIBXML_VERSION: 2.9.12
PHP_VERSION: 7.3
php-7.4-libxml-2.9.4:
<<: *template
build:
context: ./docker/php
args:
LIBXML_VERSION: 2.9.4
PHP_VERSION: 7.4
php-7.4-libxml-2.9.5:
<<: *template
build:
context: ./docker/php
args:
LIBXML_VERSION: 2.9.5
PHP_VERSION: 7.4
php-7.4-libxml-2.9.10:
<<: *template
build:
context: ./docker/php
args:
LIBXML_VERSION: 2.9.10
PHP_VERSION: 7.4
php-7.4-libxml-2.9.12:
<<: *template
build:
context: ./docker/php
args:
LIBXML_VERSION: 2.9.12
PHP_VERSION: 7.4
php-8-libxml-2.9.4:
<<: *template
build:
context: ./docker/php
args:
LIBXML_VERSION: 2.9.4
PHP_VERSION: 8
php-8-libxml-2.9.5:
<<: *template
build:
context: ./docker/php
args:
LIBXML_VERSION: 2.9.5
PHP_VERSION: 8
php-8-libxml-2.9.10:
<<: *template
build:
context: ./docker/php
args:
LIBXML_VERSION: 2.9.10
PHP_VERSION: 8
php-8-libxml-2.9.12:
<<: *template
build:
context: ./docker/php
args:
LIBXML_VERSION: 2.9.12
PHP_VERSION: 8

View File

@ -1,16 +0,0 @@
# Load pre-built image of PHP (php-cli) and libxml.
# See https://hub.docker.com/r/fivefilters/php-libxml for supported versions
# Use build.Dockerfile to compile new versions of PHP/libxml
# For reference, default package versions for Ubuntu are:
# Ubuntu 18.04 - php 7.2, libxml 2.9.4
# Ubuntu 20.04 - php 7.4, libxml 2.9.10
# Ubuntu 20.10 - php 7.4, libxml 2.9.10
# Ubuntu 21.04 - php 7.4, libxml 2.9.10
# Ubuntu 21.10 - php 8.0, libxml 2.9.10
ARG PHP_VERSION
ARG LIBXML_VERSION
FROM fivefilters/php-libxml:php-${PHP_VERSION}-libxml-${LIBXML_VERSION}
RUN apt-get update

View File

@ -1,56 +0,0 @@
# Use this file to build a Docker image using the versions of PHP and Libxml specified.
# We have pre-built images at https://hub.docker.com/r/fivefilters/php-libxml which are faster to load than building from this file.
# To build using this file, use the following command from the root project folder (replace version of PHP/Libxml with the ones you want to use):
# docker build --build-arg PHP_VERSION=7.4 --build-arg LIBXML_VERSION=2.9.12 -t php-libxml -f ./docker/php/Dockerfile .
# To upload the image to Docker Hub, the tag (-t) value should be something like org/repo:tag, e.g. for us, fivefilters/php-libxml:php-8-libxml-2.9.12
# The tag can be applied afterwards too, e.g. docker tag php-libxml org/repo:tag
ARG PHP_VERSION=8
FROM php:${PHP_VERSION}-cli
# Install sqlite and libonig-dev (required for building PHP 7.4)
RUN apt-get update && apt-get install -y libsqlite3-dev libonig-dev
# Install libsodium (package doesn't work for some reason)
RUN curl https://download.libsodium.org/libsodium/releases/LATEST.tar.gz -o /tmp/libsodium.tar.gz && \
cd /tmp && \
tar -xzf libsodium.tar.gz && \
cd libsodium-stable/ && \
./configure && \
make && make check && \
make install
# Install custom version of libxml2
RUN apt-get install -y automake libtool unzip libssl-dev
# Remove current version
RUN apt-get remove -y libxml2
# Download new version, configure and compile
ARG LIBXML_VERSION=2.9.12
RUN curl https://gitlab.gnome.org/GNOME/libxml2/-/archive/v$LIBXML_VERSION/libxml2-v$LIBXML_VERSION.zip -o /tmp/libxml.zip && \
cd /tmp && \
unzip libxml.zip && \
cd libxml2-v$LIBXML_VERSION && \
./autogen.sh --libdir=/usr/lib/x86_64-linux-gnu && \
make && \
make install
# Recompile PHP with the new libxml2 library
RUN docker-php-source extract && \
cd /usr/src/php && \
./configure \
--with-libxml \
--enable-mbstring \
--with-openssl \
--with-config-file-path=/usr/local/etc/php \
--with-config-file-scan-dir=/usr/local/etc/php/conf.d && \
make && make install && \
docker-php-source delete
RUN apt-get update
#RUN pecl install libsodium
# Check if there's a pinned version of Xdebug for compatibility reasons
ARG XDEBUG_VERSION
RUN pecl install xdebug$(if [ ! ${XDEBUG_VERSION} = '' ]; then echo -${XDEBUG_VERSION} ; fi) && docker-php-ext-enable xdebug
# Required by coveralls
RUN apt-get install git -y

View File

@ -1,16 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<phpunit bootstrap="vendor/autoload.php"
colors="true"
stopOnFailure="false"
stopOnError="false">
<testsuites>
<testsuite name="Readability.php Test Suite">
<directory>./test/</directory>
</testsuite>
</testsuites>
<filter>
<whitelist>
<directory suffix=".php">src/</directory>
</whitelist>
</filter>
</phpunit>

View File

@ -1,423 +0,0 @@
<?php
namespace fivefilters\Readability;
use Psr\Log\LoggerAwareTrait;
use Psr\Log\LoggerInterface;
use Psr\Log\NullLogger;
/**
* Class Configuration.
*/
class Configuration
{
use LoggerAwareTrait;
/**
* @var int
*/
protected $maxTopCandidates = 5;
/**
* @var int
*/
protected $charThreshold = 500;
/**
* @var bool
*/
protected $articleByLine = false;
/**
* @var bool
*/
protected $stripUnlikelyCandidates = true;
/**
* @var bool
*/
protected $cleanConditionally = true;
/**
* @var bool
*/
protected $weightClasses = true;
/**
* @var bool
*/
protected $fixRelativeURLs = false;
/**
* @var bool
*/
protected $substituteEntities = false;
/**
* @var bool
*/
protected $normalizeEntities = false;
/**
* @var bool
*/
protected $summonCthulhu = false;
/**
* @var string
*/
protected $originalURL = 'http://fakehost';
/**
* @var string
*/
protected $parser = 'html5';
/**
* @var bool
*/
protected $keepClasses = false;
/**
* @var bool
*/
protected $disableJSONLD = false;
/**
* Configuration constructor.
*
* @param array $params
*/
public function __construct(array $params = [])
{
foreach ($params as $key => $value) {
$setter = sprintf('set%s', $key);
if (method_exists($this, $setter)) {
call_user_func([$this, $setter], $value);
}
}
}
/**
* Returns an array-representation of configuration.
*
* @return array
*/
public function toArray()
{
$out = [];
foreach ($this as $key => $value) {
$getter = sprintf('get%s', $key);
if (!is_object($value) && method_exists($this, $getter)) {
$out[$key] = call_user_func([$this, $getter]);
}
}
return $out;
}
/**
* @return LoggerInterface
*/
public function getLogger()
{
// If no logger has been set, just return a null logger
if ($this->logger === null) {
return new NullLogger();
}
return $this->logger;
}
/**
* @param LoggerInterface $logger
*
* @return Configuration
*/
public function setLogger(LoggerInterface $logger)
{
$this->logger = $logger;
return $this;
}
/**
* @return int
*/
public function getMaxTopCandidates()
{
return $this->maxTopCandidates;
}
/**
* @param int $maxTopCandidates
*
* @return $this
*/
public function setMaxTopCandidates($maxTopCandidates)
{
$this->maxTopCandidates = $maxTopCandidates;
return $this;
}
/**
* @return int
*/
public function getCharThreshold()
{
return $this->charThreshold;
}
/**
* @param int $charThreshold
*
* @return $this
*/
public function setCharThreshold($charThreshold)
{
$this->charThreshold = $charThreshold;
return $this;
}
/**
* @return bool
*/
public function getArticleByLine()
{
return $this->articleByLine;
}
/**
* @param bool $articleByLine
*
* @return $this
*/
public function setArticleByLine($articleByLine)
{
$this->articleByLine = $articleByLine;
return $this;
}
/**
* @return bool
*/
public function getStripUnlikelyCandidates()
{
return $this->stripUnlikelyCandidates;
}
/**
* @param bool $stripUnlikelyCandidates
*
* @return $this
*/
public function setStripUnlikelyCandidates($stripUnlikelyCandidates)
{
$this->stripUnlikelyCandidates = $stripUnlikelyCandidates;
return $this;
}
/**
* @return bool
*/
public function getCleanConditionally()
{
return $this->cleanConditionally;
}
/**
* @param bool $cleanConditionally
*
* @return $this
*/
public function setCleanConditionally($cleanConditionally)
{
$this->cleanConditionally = $cleanConditionally;
return $this;
}
/**
* @return bool
*/
public function getWeightClasses()
{
return $this->weightClasses;
}
/**
* @param bool $weightClasses
*
* @return $this
*/
public function setWeightClasses($weightClasses)
{
$this->weightClasses = $weightClasses;
return $this;
}
/**
* @return bool
*/
public function getFixRelativeURLs()
{
return $this->fixRelativeURLs;
}
/**
* @param bool $fixRelativeURLs
*
* @return $this
*/
public function setFixRelativeURLs($fixRelativeURLs)
{
$this->fixRelativeURLs = $fixRelativeURLs;
return $this;
}
/**
* @return bool
*/
public function getSubstituteEntities()
{
return $this->substituteEntities;
}
/**
* @param bool $substituteEntities
*
* @return $this
*/
public function setSubstituteEntities($substituteEntities)
{
$this->substituteEntities = $substituteEntities;
return $this;
}
/**
* @return bool
*/
public function getNormalizeEntities()
{
return $this->normalizeEntities;
}
/**
* @param bool $normalizeEntities
*
* @return $this
*/
public function setNormalizeEntities($normalizeEntities)
{
$this->normalizeEntities = $normalizeEntities;
return $this;
}
/**
* @return string
*/
public function getOriginalURL()
{
return $this->originalURL;
}
/**
* @param string $originalURL
*
* @return $this
*/
public function setOriginalURL($originalURL)
{
$this->originalURL = $originalURL;
return $this;
}
/**
* @return string
*/
public function getParser()
{
return $this->parser;
}
/**
* @param string $parser
*
* @return $this
*/
public function setParser($parser)
{
$this->parser = $parser;
return $this;
}
/**
* @return bool
*/
public function getKeepClasses()
{
return $this->keepClasses;
}
/**
* @param bool $keepClasses
*
* @return $this
*/
public function setKeepClasses($keepClasses)
{
$this->keepClasses = $keepClasses;
return $this;
}
/**
* @return bool
*/
public function getDisableJSONLD()
{
return $this->disableJSONLD;
}
/**
* @param bool $disableJSONLD
*
* @return $this
*/
public function setDisableJSONLD($disableJSONLD)
{
$this->disableJSONLD = $disableJSONLD;
return $this;
}
/**
* @return bool
*/
public function getSummonCthulhu()
{
return $this->summonCthulhu;
}
/**
* @param bool $summonCthulhu
*
* @return $this
*/
public function setSummonCthulhu($summonCthulhu)
{
$this->summonCthulhu = $summonCthulhu;
return $this;
}
}

View File

@ -1,10 +0,0 @@
<?php
namespace fivefilters\Readability\Nodes\DOM;
use fivefilters\Readability\Nodes\NodeTrait;
class DOMAttr extends \DOMAttr
{
use NodeTrait;
}

View File

@ -1,10 +0,0 @@
<?php
namespace fivefilters\Readability\Nodes\DOM;
use fivefilters\Readability\Nodes\NodeTrait;
class DOMCdataSection extends \DOMCdataSection
{
use NodeTrait;
}

View File

@ -1,10 +0,0 @@
<?php
namespace fivefilters\Readability\Nodes\DOM;
use fivefilters\Readability\Nodes\NodeTrait;
class DOMCharacterData extends \DOMCharacterData
{
use NodeTrait;
}

View File

@ -1,10 +0,0 @@
<?php
namespace fivefilters\Readability\Nodes\DOM;
use fivefilters\Readability\Nodes\NodeTrait;
class DOMComment extends \DOMComment
{
use NodeTrait;
}

View File

@ -1,30 +0,0 @@
<?php
namespace fivefilters\Readability\Nodes\DOM;
use fivefilters\Readability\Nodes\NodeTrait;
class DOMDocument extends \DOMDocument
{
use NodeTrait;
public function __construct($version, $encoding)
{
parent::__construct($version, $encoding);
$this->registerNodeClass('DOMAttr', DOMAttr::class);
$this->registerNodeClass('DOMCdataSection', DOMCdataSection::class);
$this->registerNodeClass('DOMCharacterData', DOMCharacterData::class);
$this->registerNodeClass('DOMComment', DOMComment::class);
$this->registerNodeClass('DOMDocument', self::class);
$this->registerNodeClass('DOMDocumentFragment', DOMDocumentFragment::class);
$this->registerNodeClass('DOMDocumentType', DOMDocumentType::class);
$this->registerNodeClass('DOMElement', DOMElement::class);
$this->registerNodeClass('DOMEntity', DOMEntity::class);
$this->registerNodeClass('DOMEntityReference', DOMEntityReference::class);
$this->registerNodeClass('DOMNode', DOMNode::class);
$this->registerNodeClass('DOMNotation', DOMNotation::class);
$this->registerNodeClass('DOMProcessingInstruction', DOMProcessingInstruction::class);
$this->registerNodeClass('DOMText', DOMText::class);
}
}

View File

@ -1,10 +0,0 @@
<?php
namespace fivefilters\Readability\Nodes\DOM;
use fivefilters\Readability\Nodes\NodeTrait;
class DOMDocumentFragment extends \DOMDocumentFragment
{
use NodeTrait;
}

View File

@ -1,10 +0,0 @@
<?php
namespace fivefilters\Readability\Nodes\DOM;
use fivefilters\Readability\Nodes\NodeTrait;
class DOMDocumentType extends \DOMDocumentType
{
use NodeTrait;
}

View File

@ -1,46 +0,0 @@
<?php
namespace fivefilters\Readability\Nodes\DOM;
use fivefilters\Readability\Nodes\NodeTrait;
class DOMElement extends \DOMElement
{
use NodeTrait;
/**
* Returns the child elements of this element.
*
* To get all child nodes, including non-element nodes like text and comment nodes, use childNodes.
*
* @return DOMNodeList
*/
public function children()
{
$newList = new DOMNodeList();
foreach ($this->childNodes as $node) {
if ($node->nodeType === XML_ELEMENT_NODE) {
$newList->add($node);
}
}
return $newList;
}
/**
* Returns the Element immediately prior to the specified one in its parent's children list, or null if the specified element is the first one in the list.
*
* @see https://wiki.php.net/rfc/dom_living_standard_api
* @return DOMElement|null
*/
public function previousElementSibling()
{
$previous = $this->previousSibling;
while ($previous) {
if ($previous->nodeType === XML_ELEMENT_NODE) {
return $previous;
}
$previous = $previous->previousSibling;
}
return null;
}
}

View File

@ -1,10 +0,0 @@
<?php
namespace fivefilters\Readability\Nodes\DOM;
use fivefilters\Readability\Nodes\NodeTrait;
class DOMEntity extends \DOMEntity
{
use NodeTrait;
}

View File

@ -1,10 +0,0 @@
<?php
namespace fivefilters\Readability\Nodes\DOM;
use fivefilters\Readability\Nodes\NodeTrait;
class DOMEntityReference extends \DOMEntityReference
{
use NodeTrait;
}

View File

@ -1,14 +0,0 @@
<?php
namespace fivefilters\Readability\Nodes\DOM;
use fivefilters\Readability\Nodes\NodeTrait;
/**
* @method getAttribute($attribute)
* @method hasAttribute($attribute)
*/
class DOMNode extends \DOMNode
{
use NodeTrait;
}

View File

@ -1,82 +0,0 @@
<?php
namespace fivefilters\Readability\Nodes\DOM;
/**
* Class DOMNodeList.
*
* This is a fake DOMNodeList class that allows adding items to the list. The original class is static and the nodes
* are defined automagically when instantiating it. This fake version behaves exactly the same way but adds the function
* add() that allows to insert new DOMNodes into the DOMNodeList.
*
* It cannot extend the original DOMNodeList class because the functionality behind the property ->length is hidden
* from the user and cannot be extended, changed, or tweaked.
*/
class DOMNodeList implements \Countable, \IteratorAggregate
{
/**
* @var array
*/
protected $items = [];
/**
* @var int
*/
protected $length = 0;
/**
* To allow access to length in the same way that DOMNodeList allows.
*
* {@inheritdoc}
*/
public function __get($name)
{
switch ($name) {
case 'length':
return $this->length;
default:
trigger_error(sprintf('Undefined property: %s::%s', static::class, $name));
}
}
/**
* @param DOMNode|DOMElement|DOMComment $node
*
* @return DOMNodeList
*/
public function add($node)
{
$this->items[] = $node;
$this->length++;
return $this;
}
/**
* @param int $offset
*
* @return DOMNode|DOMElement|DOMComment
*/
public function item(int $offset)
{
return $this->items[$offset];
}
/**
* @return int|void
*/
public function count(): int
{
return $this->length;
}
/**
* To make it compatible with iterator_to_array() function.
*
* {@inheritdoc}
*/
public function getIterator(): \ArrayIterator
{
return new \ArrayIterator($this->items);
}
}

View File

@ -1,10 +0,0 @@
<?php
namespace fivefilters\Readability\Nodes\DOM;
use fivefilters\Readability\Nodes\NodeTrait;
class DOMNotation extends \DOMNotation
{
use NodeTrait;
}

View File

@ -1,10 +0,0 @@
<?php
namespace fivefilters\Readability\Nodes\DOM;
use fivefilters\Readability\Nodes\NodeTrait;
class DOMProcessingInstruction extends \DOMProcessingInstruction
{
use NodeTrait;
}

View File

@ -1,10 +0,0 @@
<?php
namespace fivefilters\Readability\Nodes\DOM;
use fivefilters\Readability\Nodes\NodeTrait;
class DOMText extends \DOMText
{
use NodeTrait;
}

View File

@ -1,566 +0,0 @@
<?php
namespace fivefilters\Readability\Nodes;
use fivefilters\Readability\Nodes\DOM\DOMDocument;
use fivefilters\Readability\Nodes\DOM\DOMElement;
use fivefilters\Readability\Nodes\DOM\DOMNode;
use fivefilters\Readability\Nodes\DOM\DOMText;
use DOMNodeList;
/**
* @method \DOMNode removeAttribute($name)
*/
trait NodeTrait
{
/**
* Content score of the node. Used to determine the value of the content.
*
* @var int
*/
public $contentScore = 0;
/**
* Flag for initialized status.
*
* @var bool
*/
private $initialized = false;
/**
* Flag data tables.
*
* @var bool
*/
private $readabilityDataTable = false;
/**
* @var array
*/
private $divToPElements = [
'blockquote',
'dl',
'div',
'img',
'ol',
'p',
'pre',
'table',
'ul'
];
/**
* The commented out elements qualify as phrasing content but tend to be
* removed by readability when put into paragraphs, so we ignore them here.
*
* @var array
*/
private $phrasing_elems = [
// 'CANVAS', 'IFRAME', 'SVG', 'VIDEO',
'abbr', 'audio', 'b', 'bdo', 'br', 'button', 'cite', 'code', 'data',
'datalist', 'dfn', 'em', 'embed', 'i', 'img', 'input', 'kbd', 'label',
'mark', 'math', 'meter', 'noscript', 'object', 'output', 'progress', 'q',
'ruby', 'samp', 'script', 'select', 'small', 'span', 'strong', 'sub',
'sup', 'textarea', 'time', 'var', 'wbr'
];
/**
* initialized getter.
*
* @return bool
*/
public function isInitialized()
{
return $this->initialized;
}
/**
* @return bool
*/
public function isReadabilityDataTable()
{
/*
* This is a workaround that I'd like to remove in the future.
* Seems that although we are extending the base DOMElement and adding custom properties (like this one,
* 'readabilityDataTable'), these properties get lost when you search for elements with getElementsByTagName.
* This means that even if we mark the tables in a previous step, when we want to retrieve that information,
* all the custom properties are in their default values. Somehow we need to find a way to make these properties
* permanent across the whole DOM.
*
* @see https://stackoverflow.com/questions/35654709/php-registernodeclass-and-reusing-variable-names
*/
return $this->hasAttribute('readabilityDataTable')
&& $this->getAttribute('readabilityDataTable') === '1';
// return $this->readabilityDataTable;
}
/**
* @param bool $param
*/
public function setReadabilityDataTable($param)
{
// Can't be "true" because DOMDocument casts it to "1"
$this->setAttribute('readabilityDataTable', $param ? '1' : '0');
// $this->readabilityDataTable = $param;
}
/**
* Initializer. Calculates the current score of the node and returns a full Readability object.
*
* @ TODO: I don't like the weightClasses param. How can we get the config here?
*
* @param $weightClasses bool Weight classes?
*
* @return static
*/
public function initializeNode($weightClasses)
{
if (!$this->isInitialized()) {
$contentScore = 0;
switch ($this->nodeName) {
case 'div':
$contentScore += 5;
break;
case 'pre':
case 'td':
case 'blockquote':
$contentScore += 3;
break;
case 'address':
case 'ol':
case 'ul':
case 'dl':
case 'dd':
case 'dt':
case 'li':
case 'form':
$contentScore -= 3;
break;
case 'h1':
case 'h2':
case 'h3':
case 'h4':
case 'h5':
case 'h6':
case 'th':
$contentScore -= 5;
break;
}
$this->contentScore = $contentScore + ($weightClasses ? $this->getClassWeight() : 0);
$this->initialized = true;
}
return $this;
}
/**
* Override for native getAttribute method. Some nodes have the getAttribute method, some don't, so we need
* to check first the existence of the attributes property.
*
* @param $attributeName string Attribute to retrieve
*
* @return string
*/
#[\ReturnTypeWillChange]
public function getAttribute($attributeName)
{
if (!is_null($this->attributes)) {
return parent::getAttribute($attributeName);
}
return '';
}
/**
* Override for native hasAttribute.
*
* @param $attributeName
*
* @return bool
*
* @see getAttribute
*/
#[\ReturnTypeWillChange]
public function hasAttribute($attributeName)
{
if (!is_null($this->attributes)) {
return parent::hasAttribute($attributeName);
}
return false;
}
/**
* Get the ancestors of the current node.
*
* @param int|bool $maxLevel Max amount of ancestors to get. False for all of them
*
* @return array
*/
public function getNodeAncestors($maxLevel = 3)
{
$ancestors = [];
$level = 0;
$node = $this->parentNode;
while ($node && !($node instanceof DOMDocument)) {
$ancestors[] = $node;
$level++;
if ($level === $maxLevel) {
break;
}
$node = $node->parentNode;
}
return $ancestors;
}
/**
* Returns all links from the current element.
*
* @return array
*/
public function getAllLinks()
{
return iterator_to_array($this->getElementsByTagName('a'));
}
/**
* Get the density of links as a percentage of the content
* This is the amount of text that is inside a link divided by the total text in the node.
*
* @return int
*/
public function getLinkDensity()
{
$textLength = mb_strlen($this->getTextContent(true));
if ($textLength === 0) {
return 0;
}
$linkLength = 0;
$links = $this->getAllLinks();
if ($links) {
/** @var DOMElement $link */
foreach ($links as $link) {
$href = $link->getAttribute('href');
$coefficient = ($href && preg_match(NodeUtility::$regexps['hashUrl'], $href)) ? 0.3 : 1;
$linkLength += mb_strlen($link->getTextContent(true)) * $coefficient;
}
}
return $linkLength / $textLength;
}
/**
* Calculates the weight of the class/id of the current element.
*
* @return int
*/
public function getClassWeight()
{
$weight = 0;
// Look for a special classname
$class = $this->getAttribute('class');
if (trim($class)) {
if (preg_match(NodeUtility::$regexps['negative'], $class)) {
$weight -= 25;
}
if (preg_match(NodeUtility::$regexps['positive'], $class)) {
$weight += 25;
}
}
// Look for a special ID
$id = $this->getAttribute('id');
if (trim($id) !== '') {
if (preg_match(NodeUtility::$regexps['negative'], $id)) {
$weight -= 25;
}
if (preg_match(NodeUtility::$regexps['positive'], $id)) {
$weight += 25;
}
}
return $weight;
}
/**
* Returns the full text of the node.
*
* @param bool $normalize Normalize white space?
*
* @return string
*/
public function getTextContent($normalize = true)
{
$nodeValue = trim($this->textContent);
if ($normalize) {
$nodeValue = preg_replace(NodeUtility::$regexps['normalize'], ' ', $nodeValue);
}
return $nodeValue;
}
/**
* Return an array indicating how many rows and columns this table has.
*
* @return array
*/
public function getRowAndColumnCount()
{
$rows = $columns = 0;
$trs = $this->getElementsByTagName('tr');
foreach ($trs as $tr) {
/** @var \DOMElement $tr */
$rowspan = $tr->getAttribute('rowspan');
$rows += ($rowspan || 1);
// Now look for column-related info
$columnsInThisRow = 0;
$cells = $tr->getElementsByTagName('td');
foreach ($cells as $cell) {
/** @var \DOMElement $cell */
$colspan = $cell->getAttribute('colspan');
$columnsInThisRow += ($colspan || 1);
}
$columns = max($columns, $columnsInThisRow);
}
return ['rows' => $rows, 'columns' => $columns];
}
/**
* Creates a new node based on the text content of the original node.
*
* @param $originalNode DOMNode
* @param $tagName string
*
* @return DOMElement
*/
public function createNode($originalNode, $tagName)
{
$text = $originalNode->getTextContent(false);
$newNode = $originalNode->ownerDocument->createElement($tagName, $text);
return $newNode;
}
/**
* Check if a given node has one of its ancestor tag name matching the
* provided one.
*
* @param string $tagName
* @param int $maxDepth
* @param callable $filterFn
*
* @return bool
*/
public function hasAncestorTag($tagName, $maxDepth = 3, callable $filterFn = null)
{
$depth = 0;
$node = $this;
while ($node->parentNode) {
if ($maxDepth > 0 && $depth > $maxDepth) {
return false;
}
if ($node->parentNode->nodeName === $tagName && (!$filterFn || $filterFn($node->parentNode))) {
return true;
}
$node = $node->parentNode;
$depth++;
}
return false;
}
/**
* Check if this node has only whitespace and a single element with given tag
* or if it contains no element with given tag or more than 1 element.
*
* @param $tag string Name of tag
*
* @return bool
*/
public function hasSingleTagInsideElement($tag)
{
// There should be exactly 1 element child with given tag
if (count($children = NodeUtility::filterTextNodes($this->childNodes)) !== 1 || $children->item(0)->nodeName !== $tag) {
return false;
}
// And there should be no text nodes with real content
return array_reduce(iterator_to_array($children), function ($carry, $child) {
if (!$carry === false) {
return false;
}
/* @var DOMNode $child */
return !($child->nodeType === XML_TEXT_NODE && preg_match(NodeUtility::$regexps['hasContent'], $child->textContent));
});
}
/**
* Check if the current element has a single child block element.
* Block elements are the ones defined in the divToPElements array.
*
* @return bool
*/
public function hasSingleChildBlockElement()
{
$result = false;
if ($this->hasChildNodes()) {
foreach ($this->childNodes as $child) {
if (in_array($child->nodeName, $this->divToPElements)) {
$result = true;
} else {
// If any of the hasSingleChildBlockElement calls return true, return true then.
/** @var $child DOMElement */
$result = ($result || $child->hasSingleChildBlockElement());
}
}
}
return $result;
}
/**
* Determines if a node has no content or it is just a bunch of dividing lines and/or whitespace.
*
* @return bool
*/
public function isElementWithoutContent()
{
return $this instanceof DOMElement &&
mb_strlen(preg_replace(NodeUtility::$regexps['onlyWhitespace'], '', $this->textContent)) === 0 &&
($this->childNodes->length === 0 ||
$this->childNodes->length === $this->getElementsByTagName('br')->length + $this->getElementsByTagName('hr')->length
/*
* Special PHP DOMDocument case: We also need to count how many DOMText we have inside the node.
* If there's an empty tag with an space inside and a BR (for example "<p> <br/></p>) counting only BRs and
* HRs will will say that the example has 2 nodes, instead of one. This happens because in DOMDocument,
* DOMTexts are also nodes (which doesn't happen in JS). So we need to also count how many DOMText we
* are dealing with (And at this point we know they are empty or are just whitespace, because of the
* mb_strlen in this chain of checks).
*/
+ count(array_filter(iterator_to_array($this->childNodes), function ($child) {
return $child instanceof DOMText;
}))
);
}
/**
* Determine if a node qualifies as phrasing content.
* https://developer.mozilla.org/en-US/docs/Web/Guide/HTML/Content_categories#Phrasing_content.
*
* @return bool
*/
public function isPhrasingContent()
{
return $this->nodeType === XML_TEXT_NODE || in_array($this->nodeName, $this->phrasing_elems) !== false ||
(!is_null($this->childNodes) &&
($this->nodeName === 'a' || $this->nodeName === 'del' || $this->nodeName === 'ins') &&
array_reduce(iterator_to_array($this->childNodes), function ($carry, $node) {
return $node->isPhrasingContent() && $carry;
}, true)
);
}
/**
* In the original JS project they check if the node has the style display=none, which unfortunately
* in our case we have no way of knowing that. So we just check for the attribute hidden or "display: none".
*
* @return bool
*/
public function isProbablyVisible()
{
return !preg_match('/display:( )?none/i', $this->getAttribute('style')) &&
!$this->hasAttribute('hidden') &&
//check for "fallback-image" so that wikimedia math images are displayed
(!$this->hasAttribute('aria-hidden') || $this->getAttribute('aria-hidden') !== 'true' || ($this->hasAttribute('class') && strpos($this->getAttribute('class'), 'fallback-image') !== false));
}
/**
* @return bool
*/
public function isWhitespace()
{
return ($this->nodeType === XML_TEXT_NODE && mb_strlen(trim($this->textContent)) === 0) ||
($this->nodeType === XML_ELEMENT_NODE && $this->nodeName === 'br');
}
/**
* This is a hack that overcomes the issue of node shifting when scanning and removing nodes.
*
* In the JS version of getElementsByTagName, if you remove a node it will not appear during the
* foreach. This does not happen in PHP DOMDocument, because if you remove a node, it will still appear but as an
* orphan node and will give an exception if you try to do anything with it.
*
* Shifting also occurs when converting parent nodes (like a P to a DIV), which in that case the found nodes are
* removed from the foreach "pool" but the internal index of the foreach is not aware and skips over nodes that
* never looped over. (index is at position 5, 2 nodes are removed, next one should be node 3, but the foreach tries
* to access node 6)
*
* This function solves this by searching for the nodes on every loop and keeping track of the count differences.
* Because on every loop we call getElementsByTagName again, this could cause a performance impact and should be
* used only when the results of the search are going to be used to remove the nodes.
*
* @param string $tag
*
* @return \Generator
*/
public function shiftingAwareGetElementsByTagName($tag)
{
/** @var $nodes DOMNodeList */
$nodes = $this->getElementsByTagName($tag);
$count = $nodes->length;
for ($i = 0; $i < $count; $i = max(++$i, 0)) {
yield $nodes->item($i);
// Search for all the nodes again
$nodes = $this->getElementsByTagName($tag);
// Subtract the amount of nodes removed from the current index
$i -= $count - $nodes->length;
// Subtract the amount of nodes removed from the current count
$count -= ($count - $nodes->length);
}
}
/**
* Mimics JS's firstElementChild property. PHP only has firstChild which could be any type of DOMNode. Use this
* function to get the first one that is an DOMElement node.
*
* @return \DOMElement|null
*/
public function getFirstElementChild()
{
if ($this->childNodes instanceof \Traversable) {
foreach ($this->childNodes as $node) {
if ($node instanceof \DOMElement) {
return $node;
}
}
}
return null;
}
}

View File

@ -1,192 +0,0 @@
<?php
namespace fivefilters\Readability\Nodes;
use fivefilters\Readability\Nodes\DOM\DOMDocument;
use fivefilters\Readability\Nodes\DOM\DOMElement;
use fivefilters\Readability\Nodes\DOM\DOMNode;
use fivefilters\Readability\Nodes\DOM\DOMNodeList;
/**
* Class NodeUtility.
*/
class NodeUtility
{
/**
* Collection of regexps to check the node usability.
*
* @var array
*/
public static $regexps = [
'unlikelyCandidates' => '/-ad-|ai2html|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i',
'okMaybeItsACandidate' => '/and|article|body|column|content|main|shadow/i',
'extraneous' => '/print|archive|comment|discuss|e[\-]?mail|share|reply|all|login|sign|single|utility/i',
'byline' => '/byline|author|dateline|writtenby|p-author/i',
'replaceFonts' => '/<(\/?)font[^>]*>/i',
'normalize' => '/\s{2,}/',
'videos' => '/\/\/(www\.)?((dailymotion|youtube|youtube-nocookie|player\.vimeo|v\.qq)\.com|(archive|upload\.wikimedia)\.org|player\.twitch\.tv)/i',
'shareElements' => '/(\b|_)(share|sharedaddy)(\b|_)/i',
'nextLink' => '/(next|weiter|continue|>([^\|]|$)|»([^\|]|$))/i',
'prevLink' => '/(prev|earl|old|new|<|«)/i',
'tokenize' => '/\W+/',
'whitespace' => '/^\s*$/',
'hasContent' => '/\S$/',
'positive' => '/article|body|content|entry|hentry|h-entry|main|page|pagination|post|text|blog|story/i',
'negative' => '/-ad-|hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|foot|footer|footnote|gdpr|masthead|media|meta|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget/i',
// \x{00A0} is the unicode version of &nbsp;
'onlyWhitespace' => '/\x{00A0}|\s+/u',
'hashUrl' => '/^#.+/',
'srcsetUrl' => '/(\S+)(\s+[\d.]+[xw])?(\s*(?:,|$))/',
'b64DataUrl' => '/^data:\s*([^\s;,]+)\s*;\s*base64\s*,/i',
// See: https://schema.org/Article
'jsonLdArticleTypes' => '/^Article|AdvertiserContentArticle|NewsArticle|AnalysisNewsArticle|AskPublicNewsArticle|BackgroundNewsArticle|OpinionNewsArticle|ReportageNewsArticle|ReviewNewsArticle|Report|SatiricalArticle|ScholarlyArticle|MedicalScholarlyArticle|SocialMediaPosting|BlogPosting|LiveBlogPosting|DiscussionForumPosting|TechArticle|APIReference$/'
];
/**
* Finds the next node, starting from the given node, and ignoring
* whitespace in between. If the given node is an element, the same node is
* returned.
*
* Imported from the Element class on league\html-to-markdown.
*
* @param $node
*
* @return DOMNode
*/
public static function nextNode($node)
{
$next = $node;
while ($next
&& $next->nodeType !== XML_ELEMENT_NODE
&& $next->isWhitespace()) {
$next = $next->nextSibling;
}
return $next;
}
/**
* Changes the node tag name. Since tagName on DOMElement is a read only value, this must be done creating a new
* element with the new tag name and importing it to the main DOMDocument.
*
* @param DOMNode $node
* @param string $value
* @param bool $importAttributes
*
* @return DOMNode
*/
public static function setNodeTag($node, $value, $importAttributes = true)
{
$new = new DOMDocument('1.0', 'utf-8');
$new->appendChild($new->createElement($value));
$children = $node->childNodes;
/** @var $children \DOMNodeList $i */
for ($i = 0; $i < $children->length; $i++) {
$import = $new->importNode($children->item($i), true);
$new->firstChild->appendChild($import);
}
if ($importAttributes) {
// Import attributes from the original node.
foreach ($node->attributes as $attribute) {
$new->firstChild->setAttribute($attribute->nodeName, $attribute->nodeValue);
}
}
// The import must be done on the firstChild of $new, since $new is a DOMDocument and not a DOMElement.
$import = $node->ownerDocument->importNode($new->firstChild, true);
$node->parentNode->replaceChild($import, $node);
return $import;
}
/**
* Removes the current node and returns the next node to be parsed (child, sibling or parent).
*
* @param DOMNode $node
*
* @return DOMNode
*/
public static function removeAndGetNext($node)
{
$nextNode = self::getNextNode($node, true);
$node->parentNode->removeChild($node);
return $nextNode;
}
/**
* Remove the selected node.
*
* @param $node DOMElement
*
* @return void
**/
public static function removeNode($node)
{
$parent = $node->parentNode;
if ($parent) {
$parent->removeChild($node);
}
}
/**
* Returns the next node. First checks for children (if the flag allows it), then for siblings, and finally
* for parents.
*
* @param DOMNode $originalNode
* @param bool $ignoreSelfAndKids
*
* @return DOMNode
*/
public static function getNextNode($originalNode, $ignoreSelfAndKids = false)
{
/*
* Traverse the DOM from node to node, starting at the node passed in.
* Pass true for the second parameter to indicate this node itself
* (and its kids) are going away, and we want the next node over.
*
* Calling this in a loop will traverse the DOM depth-first.
*/
// First check for kids if those aren't being ignored
if (!$ignoreSelfAndKids && $originalNode->firstChild) {
return $originalNode->firstChild;
}
// Then for siblings...
if ($originalNode->nextSibling) {
return $originalNode->nextSibling;
}
// And finally, move up the parent chain *and* find a sibling
// (because this is depth-first traversal, we will have already
// seen the parent nodes themselves).
do {
$originalNode = $originalNode->parentNode;
} while ($originalNode && !$originalNode->nextSibling);
return ($originalNode) ? $originalNode->nextSibling : $originalNode;
}
/**
* Remove all empty DOMNodes from DOMNodeLists.
*
* @param \DOMNodeList $list
*
* @return DOMNodeList
*/
public static function filterTextNodes(\DOMNodeList $list)
{
$newList = new DOMNodeList();
foreach ($list as $node) {
if ($node->nodeType !== XML_TEXT_NODE || mb_strlen(trim($node->nodeValue))) {
$newList->add($node);
}
}
return $newList;
}
}

View File

@ -1,7 +0,0 @@
<?php
namespace fivefilters\Readability;
class ParseException extends \Exception
{
}

View File

@ -1,92 +0,0 @@
<?php
namespace fivefilters\Readability\Test;
use fivefilters\Readability\Configuration;
use Monolog\Handler\NullHandler;
use Monolog\Logger;
/**
* Class ConfigurationTest.
*/
class ConfigurationTest extends \PHPUnit\Framework\TestCase
{
/**
* @dataProvider getParams
*
* @param array $params
*/
public function testConfigurationConstructorSetsParameters(array $params)
{
$config = new Configuration($params);
$this->doEqualsAsserts($config, $params);
}
/**
* @dataProvider getParams
*
* @param array $params
*/
public function testInvalidParameterIsNotInConfig(array $params)
{
$config = new Configuration($params);
$this->assertArrayNotHasKey('invalidParameter', $config->toArray(), 'Invalid param key is not present in config');
}
/**
* @param Configuration $config
* @param array $options
*/
private function doEqualsAsserts(Configuration $config, array $options)
{
$this->assertEquals($options['maxTopCandidates'], $config->getMaxTopCandidates());
$this->assertEquals($options['charThreshold'], $config->getCharThreshold());
$this->assertEquals($options['articleByLine'], $config->getArticleByLine());
$this->assertEquals($options['stripUnlikelyCandidates'], $config->getStripUnlikelyCandidates());
$this->assertEquals($options['cleanConditionally'], $config->getCleanConditionally());
$this->assertEquals($options['weightClasses'], $config->getWeightClasses());
$this->assertEquals($options['fixRelativeURLs'], $config->getFixRelativeURLs());
$this->assertEquals($options['substituteEntities'], $config->getSubstituteEntities());
$this->assertEquals($options['normalizeEntities'], $config->getNormalizeEntities());
$this->assertEquals($options['originalURL'], $config->getOriginalURL());
$this->assertEquals($options['summonCthulhu'], $config->getOriginalURL());
}
/**
* @return array
*/
public function getParams()
{
return [[
'All current parameters' => [
'maxTopCandidates' => 3,
'wordThreshold' => 500,
'charThreshold' => 500,
'articleByLine' => true,
'stripUnlikelyCandidates' => false,
'cleanConditionally' => false,
'weightClasses' => false,
'fixRelativeURLs' => true,
'substituteEntities' => true,
'normalizeEntities' => true,
'originalURL' => 'my.original.url',
'summonCthulhu' => 'my.original.url',
'invalidParameter' => 'invalidParameterValue'
]
]];
}
/**
* Test if a logger interface can be injected and retrieved from the Configuration object.
*/
public function testLoggerCanBeInjected()
{
$configuration = new Configuration();
$log = new Logger('Readability');
$log->pushHandler(new NullHandler());
$configuration->setLogger($log);
$this->assertSame($log, $configuration->getLogger());
}
}

View File

@ -1,203 +0,0 @@
<?php
namespace fivefilters\Readability\Test;
use fivefilters\Readability\Configuration;
use fivefilters\Readability\ParseException;
use fivefilters\Readability\Readability;
/**
* Class ReadabilityTest.
*/
class ReadabilityTest extends \PHPUnit\Framework\TestCase
{
/**
* Test that Readability parses the HTML correctly and matches the expected result.
*
* @dataProvider getSamplePages
*
* @param TestPage $testPage
*
* @throws ParseException
*/
public function testReadabilityParsesHTML(TestPage $testPage)
{
$options = ['OriginalURL' => 'http://fakehost/test/test.html',
'FixRelativeURLs' => true,
'SubstituteEntities' => true,
'ArticleByLine' => true
];
$configuration = new Configuration(array_merge($testPage->getConfiguration(), $options));
$readability = new Readability($configuration);
$readability->parse($testPage->getSourceHTML());
// Let's (crudely) remove whitespace between tags here to simplify comparison.
// This isn't used for output.
$from = ['/\>[^\S ]+/s', '/[^\S ]+\</s', '/(\s)+/s', '/> </s'];
$to = ['>', '<', '\\1', '><'];
$expected_no_whitespace = preg_replace($from, $to, $testPage->getExpectedHTML());
$readability_no_whitespace = preg_replace($from, $to, $readability->getContent());
if (getenv('output-changes') && $expected_no_whitespace !== $readability_no_whitespace) {
@mkdir(__DIR__.'/changed/'.$testPage->getSlug());
$new_expected = __DIR__.'/changed/'.$testPage->getSlug().'/expected.html';
$old_expected = __DIR__.'/test-pages/'.$testPage->getSlug().'/expected.html';
//file_put_contents(__DIR__.'/changed/'.$testPage->getSlug().'/readability.html', $readability_no_whitespace);
//file_put_contents(__DIR__.'/changed/'.$testPage->getSlug().'/expected-current.html', $expected_no_whitespace);
file_put_contents($new_expected, $readability->getContent());
if (getenv('output-diff')) {
file_put_contents(__DIR__.'/changed/'.$testPage->getSlug().'/diff-expected.txt', shell_exec(sprintf('diff -u -d %s %s', $old_expected, $new_expected)));
}
}
$this->assertSame($expected_no_whitespace, $readability_no_whitespace, 'Parsed text does not match the expected one.');
//$this->assertSame($testPage->getExpectedHTML(), $readability->getContent(), 'Parsed text does not match the expected one.');
//$this->assertXmlStringEqualsXmlString($testPage->getExpectedHTML(), $readability->getContent(), 'Parsed text does not match the expected one.');
}
/**
* Test that Readability parses the HTML correctly and matches the expected result.
*
* @dataProvider getSamplePages
*
* @param TestPage $testPage
*
* @throws ParseException
*/
public function testReadabilityParsesMetadata(TestPage $testPage)
{
$options = ['OriginalURL' => 'http://fakehost/test/test.html',
'FixRelativeURLs' => true,
'SubstituteEntities' => true,
'ArticleByLine' => true
];
$configuration = new Configuration(array_merge($testPage->getConfiguration(), $options));
$readability = new Readability($configuration);
$readability->parse($testPage->getSourceHTML());
$metadata = [
'Author' => $readability->getAuthor(),
'Direction' => $readability->getDirection(),
'Excerpt' => $readability->getExcerpt(),
'Image' => $readability->getImage(),
'Title' => $readability->getTitle(),
'SiteName' => $readability->getSiteName()
];
if (getenv('output-changes') && (array)$testPage->getExpectedMetadata() !== $metadata) {
@mkdir(__DIR__.'/changed/'.$testPage->getSlug());
$new_expected = __DIR__.'/changed/'.$testPage->getSlug().'/expected-metadata.json';
$old_expected = __DIR__.'/test-pages/'.$testPage->getSlug().'/expected-metadata.json';
//file_put_contents(__DIR__.'/changed/'.$testPage->getSlug().'/expected-metadata-current.json', json_encode($testPage->getExpectedMetadata(), JSON_PRETTY_PRINT));
file_put_contents($new_expected, json_encode((object)$metadata, JSON_PRETTY_PRINT));
if (getenv('output-diff')) {
file_put_contents(__DIR__.'/changed/'.$testPage->getSlug().'/diff-expected-metadata.txt', shell_exec(sprintf('diff -u -d %s %s', $old_expected, $new_expected)));
}
}
$this->assertSame($testPage->getExpectedMetadata()->Author, $readability->getAuthor(), 'Parsed Author does not match expected value.');
$this->assertSame($testPage->getExpectedMetadata()->Direction, $readability->getDirection(), 'Parsed Direction does not match expected value.');
$this->assertSame($testPage->getExpectedMetadata()->Excerpt, $readability->getExcerpt(), 'Parsed Excerpt does not match expected value.');
$this->assertSame($testPage->getExpectedMetadata()->Image, $readability->getImage(), 'Parsed Image does not match expected value.');
$this->assertSame($testPage->getExpectedMetadata()->Title, $readability->getTitle(), 'Parsed Title does not match expected value.');
}
/**
* Test that Readability returns all the expected images from the test page.
*
* @param TestPage $testPage
* @dataProvider getSamplePages
*
* @throws ParseException
*/
public function testHTMLParserParsesImages(TestPage $testPage)
{
$options = ['OriginalURL' => 'http://fakehost/test/test.html',
'fixRelativeURLs' => true,
'substituteEntities' => true,
];
$configuration = new Configuration(array_merge($testPage->getConfiguration(), $options));
$readability = new Readability($configuration);
$readability->parse($testPage->getSourceHTML());
if (getenv('output-changes') && $testPage->getExpectedImages() !== array_values($readability->getImages())) {
@mkdir(__DIR__.'/changed/'.$testPage->getSlug());
$new_expected = __DIR__.'/changed/'.$testPage->getSlug().'/expected-images.json';
$old_expected = __DIR__.'/test-pages/'.$testPage->getSlug().'/expected-images.json';
//file_put_contents(__DIR__.'/changed/'.$testPage->getSlug().'/expected-images-current.json', json_encode($testPage->getExpectedImages(), JSON_PRETTY_PRINT));
file_put_contents($new_expected, json_encode(array_values($readability->getImages()), JSON_PRETTY_PRINT));
if (getenv('output-diff')) {
file_put_contents(__DIR__.'/changed/'.$testPage->getSlug().'/diff-expected-images.txt', shell_exec(sprintf('diff -u -d %s %s', $old_expected, $new_expected)));
}
}
$this->assertSame($testPage->getExpectedImages(), array_values($readability->getImages()));
}
/**
* Main data provider.
*
* @return \Generator
*/
public function getSamplePages()
{
$path = pathinfo(__FILE__, PATHINFO_DIRNAME) . DIRECTORY_SEPARATOR . 'test-pages';
$testPages = scandir($path);
foreach (array_slice($testPages, 2) as $testPage) {
$testCasePath = $path . DIRECTORY_SEPARATOR . $testPage . DIRECTORY_SEPARATOR;
$slug = $testPage;
$source = file_get_contents($testCasePath . 'source.html');
$expectedHTML = file_exists($testCasePath . 'expected.html') ? file_get_contents($testCasePath . 'expected.html') : '';
$expectedImages = file_exists($testCasePath . 'expected-images.json') ? json_decode(file_get_contents($testCasePath . 'expected-images.json'), true) : [];
$expectedMetadata = file_exists($testCasePath . 'expected-metadata.json') ? json_decode(file_get_contents($testCasePath . 'expected-metadata.json')) : (object)[];
$configuration = file_exists($testCasePath . 'config.json') ? json_decode(file_get_contents($testCasePath . 'config.json'), true) : [];
yield $testPage => [new TestPage($slug, $configuration, $source, $expectedHTML, $expectedImages, $expectedMetadata)];
}
}
/**
* Test that Readability throws an exception with malformed HTML.
*
* @throws ParseException
*/
public function testReadabilityThrowsExceptionWithMalformedHTML()
{
$parser = new Readability(new Configuration());
$this->expectException(ParseException::class);
$this->expectExceptionMessage('Invalid or incomplete HTML.');
$parser->parse('<html>');
}
/**
* Test that Readability throws an exception with incomplete or short HTML.
*
* @throws ParseException
*/
public function testReadabilityThrowsExceptionWithUnparseableHTML()
{
$parser = new Readability(new Configuration());
$this->expectException(ParseException::class);
$this->expectExceptionMessage('Could not parse text.');
$parser->parse('<html><body><p></p></body></html>');
}
/**
* Test that the Readability object has no content as soon as it is instantiated.
*/
public function testReadabilityCallGetContentWithNoContent()
{
$parser = new Readability(new Configuration());
$this->assertNull($parser->getContent());
}
}

View File

@ -1,71 +0,0 @@
<?php
namespace fivefilters\Readability\Test;
class TestPage
{
private $slug;
private $configuration;
private $sourceHTML;
private $expectedHTML;
private $expectedImages;
private $expectedMetadata;
public function __construct($slug, $configuration, $sourceHTML, $expectedHTML, $expectedImages, $expectedMetadata)
{
$this->slug = $slug;
$this->configuration = $configuration;
$this->sourceHTML = $sourceHTML;
$this->expectedHTML = $expectedHTML;
$this->expectedImages = $expectedImages;
$this->expectedMetadata = $expectedMetadata;
}
/**
* @return string
*/
public function getSlug()
{
return $this->slug;
}
/**
* @return array
*/
public function getConfiguration()
{
return $this->configuration;
}
/**
* @return string
*/
public function getSourceHTML()
{
return $this->sourceHTML;
}
/**
* @return string
*/
public function getExpectedHTML()
{
return $this->expectedHTML;
}
/**
* @return mixed
*/
public function getExpectedImages()
{
return $this->expectedImages;
}
/**
* @return \stdClass
*/
public function getExpectedMetadata()
{
return $this->expectedMetadata;
}
}

View File

@ -1,3 +0,0 @@
{
"ArticleByLine": true
}

View File

@ -1,3 +0,0 @@
[
"http:\/\/fakehost\/static\/code\/2013\/blanket-coverage.png"
]

View File

@ -1,8 +0,0 @@
{
"Author": "Nicolas Perriault —",
"Direction": null,
"Excerpt": "Nicolas Perriault's homepage.",
"Image": null,
"Title": "Get your Frontend JavaScript Code Covered | Code",
"SiteName": null
}

View File

@ -1,132 +0,0 @@
<section>
<p><strong>So finally you're <a href="http://fakehost/code/2013/testing-frontend-javascript-code-using-mocha-chai-and-sinon/">testing your frontend JavaScript code</a>? Great! The more you
write tests, the more confident you are with your code… but how much precisely?
That's where <a href="http://en.wikipedia.org/wiki/Code_coverage">code coverage</a> might
help.</strong>
</p>
<p>The idea behind code coverage is to record which parts of your code (functions,
statements, conditionals and so on) have been executed by your test suite,
to compute metrics out of these data and usually to provide tools for navigating
and inspecting them.</p>
<p>Not a lot of frontend developers I know actually test their frontend code,
and I can barely imagine how many of them have ever setup code coverage…
Mostly because there are not many frontend-oriented tools in this area
I guess.</p>
<p>Actually I've only found one which provides an adapter for <a href="http://visionmedia.github.io/mocha/">Mocha</a> and
actually works…</p>
<blockquote>
<p>Drinking game for web devs:
<br>(1) Think of a noun
<br>(2) Google "&lt;noun&gt;.js"
<br>(3) If a library with that name exists - drink</p>— Shay Friedman (@ironshay)
<a href="https://twitter.com/ironshay/statuses/370525864523743232">August 22, 2013</a>
</blockquote>
<p><strong><a href="http://blanketjs.org/">Blanket.js</a></strong> is an <em>easy to install, easy to configure,
and easy to use JavaScript code coverage library that works both in-browser and
with nodejs.</em>
</p>
<p>Its use is dead easy, adding Blanket support to your Mocha test suite
is just matter of adding this simple line to your HTML test file:</p>
<pre><code>&lt;script src="vendor/blanket.js"
data-cover-adapter="vendor/mocha-blanket.js"&gt;&lt;/script&gt;
</code></pre>
<p>Source files: <a href="https://raw.github.com/alex-seville/blanket/master/dist/qunit/blanket.min.js">blanket.js</a>,
<a href="https://raw.github.com/alex-seville/blanket/master/src/adapters/mocha-blanket.js">mocha-blanket.js</a>
</p>
<p>As an example, let's reuse the silly <code>Cow</code> example we used
<a href="http://fakehost/code/2013/testing-frontend-javascript-code-using-mocha-chai-and-sinon/">in a previous episode</a>:</p>
<pre><code>// cow.js
(function(exports) {
"use strict";
function Cow(name) {
this.name = name || "Anon cow";
}
exports.Cow = Cow;
Cow.prototype = {
greets: function(target) {
if (!target)
throw new Error("missing target");
return this.name + " greets " + target;
}
};
})(this);
</code></pre>
<p>And its test suite, powered by Mocha and <a href="http://chaijs.com/">Chai</a>:</p>
<pre><code>var expect = chai.expect;
describe("Cow", function() {
describe("constructor", function() {
it("should have a default name", function() {
var cow = new Cow();
expect(cow.name).to.equal("Anon cow");
});
it("should set cow's name if provided", function() {
var cow = new Cow("Kate");
expect(cow.name).to.equal("Kate");
});
});
describe("#greets", function() {
it("should greet passed target", function() {
var greetings = (new Cow("Kate")).greets("Baby");
expect(greetings).to.equal("Kate greets Baby");
});
});
});
</code></pre>
<p>Let's create the HTML test file for it, featuring Blanket and its adapter
for Mocha:</p>
<pre><code>&lt;!DOCTYPE html&gt;
&lt;html&gt;
&lt;head&gt;
&lt;meta charset="utf-8"&gt;
&lt;title&gt;Test&lt;/title&gt;
&lt;link rel="stylesheet" media="all" href="vendor/mocha.css"&gt;
&lt;/head&gt;
&lt;body&gt;
&lt;div id="mocha"&gt;&lt;/div&gt;
&lt;div id="messages"&gt;&lt;/div&gt;
&lt;div id="fixtures"&gt;&lt;/div&gt;
&lt;script src="vendor/mocha.js"&gt;&lt;/script&gt;
&lt;script src="vendor/chai.js"&gt;&lt;/script&gt;
&lt;script src="vendor/blanket.js"
data-cover-adapter="vendor/mocha-blanket.js"&gt;&lt;/script&gt;
&lt;script&gt;mocha.setup('bdd');&lt;/script&gt;
&lt;script src="cow.js" data-cover&gt;&lt;/script&gt;
&lt;script src="cow_test.js"&gt;&lt;/script&gt;
&lt;script&gt;mocha.run();&lt;/script&gt;
&lt;/body&gt;
&lt;/html&gt;
</code></pre>
<p><strong>Notes</strong>:</p>
<ul>
<li>Notice the <code>data-cover</code> attribute we added to the script tag
loading the source of our library;</li>
<li>The HTML test file <em>must</em> be served over HTTP for the adapter to
be loaded.</li>
</ul>
<p>Running the tests now gives us something like this:</p>
<p>
<img alt="screenshot" src="http://fakehost/static/code/2013/blanket-coverage.png">
</p>
<p>As you can see, the report at the bottom highlights that we haven't actually
tested the case where an error is raised in case a target name is missing.
We've been informed of that, nothing more, nothing less. We simply know
we're missing a test here. Isn't this cool? I think so!</p>
<p>Just remember that code coverage will only <a href="http://codebetter.com/karlseguin/2008/12/09/code-coverage-use-it-wisely/">bring you numbers</a> and
raw information, not actual proofs that the whole of your <em>code logic</em> has
been actually covered. If you ask me, the best inputs you can get about
your code logic and implementation ever are the ones issued out of <a href="http://www.extremeprogramming.org/rules/pair.html">pair programming</a>
sessions
and <a href="http://alexgaynor.net/2013/sep/26/effective-code-review/">code reviews</a>
but that's another story.</p>
<p><strong>So is code coverage silver bullet? No. Is it useful? Definitely. Happy testing!</strong>
</p>
</section>

View File

@ -1,233 +0,0 @@
<!DOCTYPE html>
<html class="no-js" lang="en">
<head>
<meta charset="utf-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1"/>
<title>Get your Frontend JavaScript Code Covered | Code | Nicolas Perriault</title>
<meta
name="description" content="Nicolas Perriault's homepage."/>
<meta name="viewport" content="width=device-width"/>
<link href="//fonts.googleapis.com/css?family=Asap:400,400italic,700,700italic&amp;subset=latin,latin-ext"
rel="stylesheet" type="text/css"/>
<link rel="stylesheet" type="text/css" href="/static/packed.css?1412806084"/>
<link rel="alternate" type="application/rss+xml" href="/code/feed/" title="Code (RSS)"/>
<link rel="alternate" type="application/rss+xml" href="/photography/feed/"
title="Photography (RSS)"/>
<link rel="alternate" type="application/rss+xml" href="/talks/feed/" title="Talks (RSS)"/>
<link rel="alternate" type="application/rss+xml" href="/carnet/feed/"
title="Carnet (RSS)"/>
<link rel="alternate" type="application/rss+xml" href="/feed/" title="Everything (RSS)"/>
<!--[if lt IE 9]>
<script src="//html5shiv.googlecode.com/svn/trunk/html5.js"></script>
<![endif]-->
</head>
<body class="code " onload="prettyPrint()">
<!--[if lt IE 7]>
<p class="chromeframe">Your browser is <em>ancient!</em> Please <a href="http://www.quirksmode.org/upgrade.html">upgrade</a>.</p>
<![endif]-->
<div class="container">
<header class="main-title">
<h1><a href="/">Hi, I'm <strong>Nicolas.</strong></a></h1>
<small>I code stuff. I take photos. I write rants.</small>
</header>
<main class="contents" role="main">
<article lang="en" class="code" itemscope="" itemtype="http://schema.org/BlogPosting">
<link itemprop="url" href="/code/2013/get-your-frontend-javascript-code-covered/"/>
<header>
<h2><a itemprop="name" href="/code/2013/get-your-frontend-javascript-code-covered/">Get your Frontend JavaScript Code Covered</a></h2>
</header>
<section>
<p><strong>So finally you're <a href="/code/2013/testing-frontend-javascript-code-using-mocha-chai-and-sinon/">testing your frontend JavaScript code</a>? Great! The more you
write tests, the more confident you are with your code… but how much precisely?
That's where <a href="http://en.wikipedia.org/wiki/Code_coverage">code coverage</a> might
help.</strong>
</p>
<p>The idea behind code coverage is to record which parts of your code (functions,
statements, conditionals and so on) have been executed by your test suite,
to compute metrics out of these data and usually to provide tools for navigating
and inspecting them.</p>
<p>Not a lot of frontend developers I know actually test their frontend code,
and I can barely imagine how many of them have ever setup code coverage…
Mostly because there are not many frontend-oriented tools in this area
I guess.</p>
<p>Actually I've only found one which provides an adapter for <a href="http://visionmedia.github.io/mocha/">Mocha</a> and
actually works…</p>
<blockquote class="twitter-tweet tw-align-center">
<p>Drinking game for web devs:
<br />(1) Think of a noun
<br />(2) Google "&lt;noun&gt;.js"
<br />(3) If a library with that name exists - drink</p>— Shay Friedman (@ironshay)
<a
href="https://twitter.com/ironshay/statuses/370525864523743232">August 22, 2013</a>
</blockquote>
<p><strong><a href="http://blanketjs.org/">Blanket.js</a></strong> is an <em>easy to install, easy to configure,
and easy to use JavaScript code coverage library that works both in-browser and
with nodejs.</em>
</p>
<p>Its use is dead easy, adding Blanket support to your Mocha test suite
is just matter of adding this simple line to your HTML test file:</p>
<pre><code>&lt;script src="vendor/blanket.js"
data-cover-adapter="vendor/mocha-blanket.js"&gt;&lt;/script&gt;
</code></pre>
<p>Source files: <a href="https://raw.github.com/alex-seville/blanket/master/dist/qunit/blanket.min.js">blanket.js</a>,
<a
href="https://raw.github.com/alex-seville/blanket/master/src/adapters/mocha-blanket.js">mocha-blanket.js</a>
</p>
<p>As an example, let's reuse the silly <code>Cow</code> example we used
<a
href="/code/2013/testing-frontend-javascript-code-using-mocha-chai-and-sinon/">in a previous episode</a>:</p>
<pre><code>// cow.js
(function(exports) {
"use strict";
function Cow(name) {
this.name = name || "Anon cow";
}
exports.Cow = Cow;
Cow.prototype = {
greets: function(target) {
if (!target)
throw new Error("missing target");
return this.name + " greets " + target;
}
};
})(this);
</code></pre>
<p>And its test suite, powered by Mocha and <a href="http://chaijs.com/">Chai</a>:</p>
<pre><code>var expect = chai.expect;
describe("Cow", function() {
describe("constructor", function() {
it("should have a default name", function() {
var cow = new Cow();
expect(cow.name).to.equal("Anon cow");
});
it("should set cow's name if provided", function() {
var cow = new Cow("Kate");
expect(cow.name).to.equal("Kate");
});
});
describe("#greets", function() {
it("should greet passed target", function() {
var greetings = (new Cow("Kate")).greets("Baby");
expect(greetings).to.equal("Kate greets Baby");
});
});
});
</code></pre>
<p>Let's create the HTML test file for it, featuring Blanket and its adapter
for Mocha:</p>
<pre><code>&lt;!DOCTYPE html&gt;
&lt;html&gt;
&lt;head&gt;
&lt;meta charset="utf-8"&gt;
&lt;title&gt;Test&lt;/title&gt;
&lt;link rel="stylesheet" media="all" href="vendor/mocha.css"&gt;
&lt;/head&gt;
&lt;body&gt;
&lt;div id="mocha"&gt;&lt;/div&gt;
&lt;div id="messages"&gt;&lt;/div&gt;
&lt;div id="fixtures"&gt;&lt;/div&gt;
&lt;script src="vendor/mocha.js"&gt;&lt;/script&gt;
&lt;script src="vendor/chai.js"&gt;&lt;/script&gt;
&lt;script src="vendor/blanket.js"
data-cover-adapter="vendor/mocha-blanket.js"&gt;&lt;/script&gt;
&lt;script&gt;mocha.setup('bdd');&lt;/script&gt;
&lt;script src="cow.js" data-cover&gt;&lt;/script&gt;
&lt;script src="cow_test.js"&gt;&lt;/script&gt;
&lt;script&gt;mocha.run();&lt;/script&gt;
&lt;/body&gt;
&lt;/html&gt;
</code></pre>
<p><strong>Notes</strong>:</p>
<ul>
<li>Notice the <code>data-cover</code> attribute we added to the script tag
loading the source of our library;</li>
<li>The HTML test file <em>must</em> be served over HTTP for the adapter to
be loaded.</li>
</ul>
<p>Running the tests now gives us something like this:</p>
<p>
<img alt="screenshot" src="/static/code/2013/blanket-coverage.png"/>
</p>
<p>As you can see, the report at the bottom highlights that we haven't actually
tested the case where an error is raised in case a target name is missing.
We've been informed of that, nothing more, nothing less. We simply know
we're missing a test here. Isn't this cool? I think so!</p>
<p>Just remember that code coverage will only <a href="http://codebetter.com/karlseguin/2008/12/09/code-coverage-use-it-wisely/">bring you numbers</a> and
raw information, not actual proofs that the whole of your <em>code logic</em> has
been actually covered. If you ask me, the best inputs you can get about
your code logic and implementation ever are the ones issued out of <a href="http://www.extremeprogramming.org/rules/pair.html">pair programming</a>
sessions
and <a href="http://alexgaynor.net/2013/sep/26/effective-code-review/">code reviews</a>
but that's another story.</p>
<p><strong>So is code coverage silver bullet? No. Is it useful? Definitely. Happy testing!</strong>
</p>
</section>
<aside>
<p> <span class="article-author" itemprop="author" itemscope="" itemtype="http://schema.org/Person">
<span itemprop="name">Nicolas Perriault</span></span>
<time
datetime="2013-09-29" itemprop="datePublished">2013-09-29</time>— in <a href="/code/" itemprop="genre">Code</a>
<a href="/code/2013/get-your-frontend-javascript-code-covered/">Permalink</a>
<a
rel="license" href="http://creativecommons.org/licenses/by-sa/3.0/">License</a><a href="http://flattr.com/submit/auto?url=https://nicolas.perriault.net/code/2013/get-your-frontend-javascript-code-covered/&amp;title=Get your Frontend JavaScript Code Covered&amp;user_id=n1k0&amp;category=software&amp;language=en">flattr this</a>
</p>
</aside>
<hr/>
<nav> <a class="prev" href="/code/2013/functional-javascript-for-crawling-the-web/">Functional JavaScript for crawling the Web</a>
|
<a
class="next" href="/code/2013/testing-frontend-javascript-code-using-mocha-chai-and-sinon/">Testing your frontend JavaScript code using mocha, chai, and sinon</a>
</nav>
</article>
</main>
<nav class="sidebar">
<ul>
<li class="home"><a href="/" hreflang="en">Home</a>
</li>
<li class="code"><a href="/code/" hreflang="en">Code</a>
</li>
<li class="photography"><a href="/photography/" hreflang="en">Photography</a>
</li>
<li class="talks"><a href="/talks/" hreflang="en">Talks</a>
</li>
<li class="carnet"><a href="/carnet/" hreflang="fr">Carnet <span>fr</span></a>
</li>
<li class="contact"><a href="/contact/" hreflang="en">Contact</a>
</li>
</ul>
</nav>
<footer class="site-footer">
<p>© 2012 Nicolas Perriault — <a href="https://twitter.com/n1k0">Tweet at me</a>
<a
href="https://github.com/n1k0">Get my code</a><a href="http://500px.com/n1k0">Enjoy my pics</a>
<a href="/contact/">Contact me</a>
</p>
</footer>
</div>
<!-- /container -->
<script src="//ajax.googleapis.com/ajax/libs/jquery/1.7.1/jquery.min.js"></script>
<script>
window.jQuery || document.write('&lt;script src="js/libs/jquery-1.7.1.min.js">&lt;\/script>')
</script>
<script type="text/javascript" src="/static/js/libs/prettify/prettify.js"></script>
<script type="text/javascript" src="/static/js/app.js"></script>
<script src="//platform.twitter.com/widgets.js" charset="utf-8"></script>
</body>
</html>

View File

@ -1,8 +0,0 @@
{
"Author": "Nikhil Marathe",
"Direction": null,
"Excerpt": "For more than a decade the Web has used XMLHttpRequest (XHR) to achieve asynchronous requests in JavaScript. While very useful, XHR is not a very ...",
"Image": null,
"Title": "This API is so Fetching!",
"SiteName": "Mozilla Hacks the Web developer blog"
}

View File

@ -1,418 +0,0 @@
<div id="content-main">
<article role="article">
<p>For more than a decade the Web has used XMLHttpRequest (XHR) to achieve
asynchronous requests in JavaScript. While very useful, XHR is not a very
nice API. It suffers from lack of separation of concerns. The input, output
and state are all managed by interacting with one object, and state is
tracked using events. Also, the event-based model doesnt play well with
JavaScripts recent focus on Promise- and generator-based asynchronous
programming.</p>
<p>The <a href="https://developer.mozilla.org/en-US/docs/Web/API/Fetch_API">Fetch API</a> intends
to fix most of these problems. It does this by introducing the same primitives
to JS that are used in the HTTP protocol. In addition, it introduces a
utility function <code>fetch()</code> that succinctly captures the intention
of retrieving a resource from the network.</p>
<p>The <a href="https://fetch.spec.whatwg.org">Fetch specification</a>, which
defines the API, nails down the semantics of a user agent fetching a resource.
This, combined with ServiceWorkers, is an attempt to:</p>
<ol>
<li>Improve the offline experience.</li>
<li>Expose the building blocks of the Web to the platform as part of the
<a href="https://extensiblewebmanifesto.org/">extensible web movement</a>.</li>
</ol>
<p>As of this writing, the Fetch API is available in Firefox 39 (currently
Nightly) and Chrome 42 (currently dev). Github has a <a href="https://github.com/github/fetch">Fetch polyfill</a>.</p>
<h2>Feature detection</h2>
<p>Fetch API support can be detected by checking for <code>Headers</code>,<code>Request</code>, <code>Response</code> or <code>fetch</code> on
the <code>window</code> or <code>worker</code> scope.</p>
<h2>Simple fetching</h2>
<p>The most useful, high-level part of the Fetch API is the <code>fetch()</code> function.
In its simplest form it takes a URL and returns a promise that resolves
to the response. The response is captured as a <code>Response</code> object.</p>
<div><pre>fetch<span>(</span><span>"/data.json"</span><span>)</span>.<span>then</span><span>(</span><span>function</span><span>(</span>res<span>)</span> <span>{</span>
<span>// res instanceof Response == true.</span>
<span>if</span> <span>(</span>res.<span>ok</span><span>)</span> <span>{</span>
res.<span>json</span><span>(</span><span>)</span>.<span>then</span><span>(</span><span>function</span><span>(</span>data<span>)</span> <span>{</span>
console.<span>log</span><span>(</span>data.<span>entries</span><span>)</span><span>;</span>
<span>}</span><span>)</span><span>;</span>
<span>}</span> <span>else</span> <span>{</span>
console.<span>log</span><span>(</span><span>"Looks like the response wasn't perfect, got status"</span><span>,</span> res.<span>status</span><span>)</span><span>;</span>
<span>}</span>
<span>}</span><span>,</span> <span>function</span><span>(</span>e<span>)</span> <span>{</span>
console.<span>log</span><span>(</span><span>"Fetch failed!"</span><span>,</span> e<span>)</span><span>;</span>
<span>}</span><span>)</span><span>;</span></pre>
</div>
<p>Submitting some parameters, it would look like this:</p>
<div><pre>fetch<span>(</span><span>"http://www.example.org/submit.php"</span><span>,</span> <span>{</span>
method<span>:</span> <span>"POST"</span><span>,</span>
headers<span>:</span> <span>{</span>
<span>"Content-Type"</span><span>:</span> <span>"application/x-www-form-urlencoded"</span>
<span>}</span><span>,</span>
body<span>:</span> <span>"firstName=Nikhil&amp;favColor=blue&amp;password=easytoguess"</span>
<span>}</span><span>)</span>.<span>then</span><span>(</span><span>function</span><span>(</span>res<span>)</span> <span>{</span>
<span>if</span> <span>(</span>res.<span>ok</span><span>)</span> <span>{</span>
alert<span>(</span><span>"Perfect! Your settings are saved."</span><span>)</span><span>;</span>
<span>}</span> <span>else</span> <span>if</span> <span>(</span>res.<span>status</span> <span>==</span> <span>401</span><span>)</span> <span>{</span>
alert<span>(</span><span>"Oops! You are not authorized."</span><span>)</span><span>;</span>
<span>}</span>
<span>}</span><span>,</span> <span>function</span><span>(</span>e<span>)</span> <span>{</span>
alert<span>(</span><span>"Error submitting form!"</span><span>)</span><span>;</span>
<span>}</span><span>)</span><span>;</span></pre>
</div>
<p>The <code>fetch()</code> functions arguments are the same as those passed
to the
<br>
<code>Request()</code> constructor, so you may directly pass arbitrarily
complex requests to <code>fetch()</code> as discussed below.</p>
<h2>Headers</h2>
<p>Fetch introduces 3 interfaces. These are <code>Headers</code>, <code>Request</code> and
<br>
<code>Response</code>. They map directly to the underlying HTTP concepts,
but have
<br>certain visibility filters in place for privacy and security reasons,
such as
<br>supporting CORS rules and ensuring cookies arent readable by third parties.</p>
<p>The <a href="https://fetch.spec.whatwg.org/#headers-class">Headers interface</a> is
a simple multi-map of names to values:</p>
<div><pre><span>var</span> content <span>=</span> <span>"Hello World"</span><span>;</span>
<span>var</span> reqHeaders <span>=</span> <span>new</span> Headers<span>(</span><span>)</span><span>;</span>
reqHeaders.<span>append</span><span>(</span><span>"Content-Type"</span><span>,</span> <span>"text/plain"</span>
reqHeaders.<span>append</span><span>(</span><span>"Content-Length"</span><span>,</span> content.<span>length</span>.<span>toString</span><span>(</span><span>)</span><span>)</span><span>;</span>
reqHeaders.<span>append</span><span>(</span><span>"X-Custom-Header"</span><span>,</span> <span>"ProcessThisImmediately"</span><span>)</span><span>;</span></pre>
</div>
<p>The same can be achieved by passing an array of arrays or a JS object
literal
<br>to the constructor:</p>
<div><pre>reqHeaders <span>=</span> <span>new</span> Headers<span>(</span><span>{</span>
<span>"Content-Type"</span><span>:</span> <span>"text/plain"</span><span>,</span>
<span>"Content-Length"</span><span>:</span> content.<span>length</span>.<span>toString</span><span>(</span><span>)</span><span>,</span>
<span>"X-Custom-Header"</span><span>:</span> <span>"ProcessThisImmediately"</span><span>,</span>
<span>}</span><span>)</span><span>;</span></pre>
</div>
<p>The contents can be queried and retrieved:</p>
<div><pre>console.<span>log</span><span>(</span>reqHeaders.<span>has</span><span>(</span><span>"Content-Type"</span><span>)</span><span>)</span><span>;</span> <span>// true</span>
console.<span>log</span><span>(</span>reqHeaders.<span>has</span><span>(</span><span>"Set-Cookie"</span><span>)</span><span>)</span><span>;</span> <span>// false</span>
reqHeaders.<span>set</span><span>(</span><span>"Content-Type"</span><span>,</span> <span>"text/html"</span><span>)</span><span>;</span>
reqHeaders.<span>append</span><span>(</span><span>"X-Custom-Header"</span><span>,</span> <span>"AnotherValue"</span><span>)</span><span>;</span>
&nbsp;
console.<span>log</span><span>(</span>reqHeaders.<span>get</span><span>(</span><span>"Content-Length"</span><span>)</span><span>)</span><span>;</span> <span>// 11</span>
console.<span>log</span><span>(</span>reqHeaders.<span>getAll</span><span>(</span><span>"X-Custom-Header"</span><span>)</span><span>)</span><span>;</span> <span>// ["ProcessThisImmediately", "AnotherValue"]</span>
&nbsp;
reqHeaders.<span>delete</span><span>(</span><span>"X-Custom-Header"</span><span>)</span><span>;</span>
console.<span>log</span><span>(</span>reqHeaders.<span>getAll</span><span>(</span><span>"X-Custom-Header"</span><span>)</span><span>)</span><span>;</span> <span>// []</span></pre>
</div>
<p>Some of these operations are only useful in ServiceWorkers, but they provide
<br>a much nicer API to Headers.</p>
<p>Since Headers can be sent in requests, or received in responses, and have
various limitations about what information can and should be mutable, <code>Headers</code> objects
have a <strong>guard</strong> property. This is not exposed to the Web, but
it affects which mutation operations are allowed on the Headers object.
<br>Possible values are:</p>
<ul>
<li>“none”: default.</li>
<li>“request”: guard for a Headers object obtained from a Request (<code>Request.headers</code>).</li>
<li>“request-no-cors”: guard for a Headers object obtained from a Request
created
<br>with mode “no-cors”.</li>
<li>“response”: naturally, for Headers obtained from Response (<code>Response.headers</code>).</li>
<li>“immutable”: Mostly used for ServiceWorkers, renders a Headers object
<br>read-only.</li>
</ul>
<p>The details of how each guard affects the behaviors of the Headers object
are
<br>in the <a href="https://fetch.spec.whatwg.org">specification</a>. For example,
you may not append or set a “request” guarded Headers “Content-Length”
header. Similarly, inserting “Set-Cookie” into a Response header is not
allowed so that ServiceWorkers may not set cookies via synthesized Responses.</p>
<p>All of the Headers methods throw TypeError if <code>name</code> is not a
<a href="https://fetch.spec.whatwg.org/#concept-header-name">valid HTTP Header name</a>. The mutation operations will throw TypeError
if there is an immutable guard. Otherwise they fail silently. For example:</p>
<div><pre><span>var</span> res <span>=</span> Response.<span>error</span><span>(</span><span>)</span><span>;</span>
<span>try</span> <span>{</span>
res.<span>headers</span>.<span>set</span><span>(</span><span>"Origin"</span><span>,</span> <span>"http://mybank.com"</span><span>)</span><span>;</span>
<span>}</span> <span>catch</span><span>(</span>e<span>)</span> <span>{</span>
console.<span>log</span><span>(</span><span>"Cannot pretend to be a bank!"</span><span>)</span><span>;</span>
<span>}</span></pre>
</div>
<h2>Request</h2>
<p>The Request interface defines a request to fetch a resource over HTTP.
URL, method and headers are expected, but the Request also allows specifying
a body, a request mode, credentials and cache hints.</p>
<p>The simplest Request is of course, just a URL, as you may do to GET a
resource.</p>
<div><pre><span>var</span> req <span>=</span> <span>new</span> Request<span>(</span><span>"/index.html"</span><span>)</span><span>;</span>
console.<span>log</span><span>(</span>req.<span>method</span><span>)</span><span>;</span> <span>// "GET"</span>
console.<span>log</span><span>(</span>req.<span>url</span><span>)</span><span>;</span> <span>// "http://example.com/index.html"</span></pre>
</div>
<p>You may also pass a Request to the <code>Request()</code> constructor to
create a copy.
<br>(This is not the same as calling the <code>clone()</code> method, which
is covered in
<br>the “Reading bodies” section.).</p>
<div><pre><span>var</span> copy <span>=</span> <span>new</span> Request<span>(</span>req<span>)</span><span>;</span>
console.<span>log</span><span>(</span>copy.<span>method</span><span>)</span><span>;</span> <span>// "GET"</span>
console.<span>log</span><span>(</span>copy.<span>url</span><span>)</span><span>;</span> <span>// "http://example.com/index.html"</span></pre>
</div>
<p>Again, this form is probably only useful in ServiceWorkers.</p>
<p>The non-URL attributes of the <code>Request</code> can only be set by passing
initial
<br>values as a second argument to the constructor. This argument is a dictionary.</p>
<div><pre><span>var</span> uploadReq <span>=</span> <span>new</span> Request<span>(</span><span>"/uploadImage"</span><span>,</span> <span>{</span>
method<span>:</span> <span>"POST"</span><span>,</span>
headers<span>:</span> <span>{</span>
<span>"Content-Type"</span><span>:</span> <span>"image/png"</span><span>,</span>
<span>}</span><span>,</span>
body<span>:</span> <span>"image data"</span>
<span>}</span><span>)</span><span>;</span></pre>
</div>
<p>The Requests mode is used to determine if cross-origin requests lead
to valid responses, and which properties on the response are readable.
Legal mode values are <code>"same-origin"</code>, <code>"no-cors"</code> (default)
and <code>"cors"</code>.</p>
<p>The <code>"same-origin"</code> mode is simple, if a request is made to another
origin with this mode set, the result is simply an error. You could use
this to ensure that
<br>a request is always being made to your origin.</p>
<div><pre><span>var</span> arbitraryUrl <span>=</span> document.<span>getElementById</span><span>(</span><span>"url-input"</span><span>)</span>.<span>value</span><span>;</span>
fetch<span>(</span>arbitraryUrl<span>,</span> <span>{</span> mode<span>:</span> <span>"same-origin"</span> <span>}</span><span>)</span>.<span>then</span><span>(</span><span>function</span><span>(</span>res<span>)</span> <span>{</span>
console.<span>log</span><span>(</span><span>"Response succeeded?"</span><span>,</span> res.<span>ok</span><span>)</span><span>;</span>
<span>}</span><span>,</span> <span>function</span><span>(</span>e<span>)</span> <span>{</span>
console.<span>log</span><span>(</span><span>"Please enter a same-origin URL!"</span><span>)</span><span>;</span>
<span>}</span><span>)</span><span>;</span></pre>
</div>
<p>The <code>"no-cors"</code> mode captures what the web platform does by default
for scripts you import from CDNs, images hosted on other domains, and so
on. First, it prevents the method from being anything other than “HEAD”,
“GET” or “POST”. Second, if any ServiceWorkers intercept these requests,
they may not add or override any headers except for <a href="https://fetch.spec.whatwg.org/#simple-header">these</a>.
Third, JavaScript may not access any properties of the resulting Response.
This ensures that ServiceWorkers do not affect the semantics of the Web
and prevents security and privacy issues that could arise from leaking
data across domains.</p>
<p><code>"cors"</code> mode is what youll usually use to make known cross-origin
requests to access various APIs offered by other vendors. These are expected
to adhere to
<br>the <a href="https://developer.mozilla.org/en-US/docs/Web/HTTP/Access_control_CORS">CORS protocol</a>.
Only a <a href="https://fetch.spec.whatwg.org/#concept-filtered-response-cors">limited set</a> of
headers is exposed in the Response, but the body is readable. For example,
you could get a list of Flickrs <a href="https://www.flickr.com/services/api/flickr.interestingness.getList.html">most interesting</a> photos
today like this:</p>
<div><pre><span>var</span> u <span>=</span> <span>new</span> URLSearchParams<span>(</span><span>)</span><span>;</span>
u.<span>append</span><span>(</span><span>'method'</span><span>,</span> <span>'flickr.interestingness.getList'</span><span>)</span><span>;</span>
u.<span>append</span><span>(</span><span>'api_key'</span><span>,</span> <span>'&lt;insert api key here&gt;'</span><span>)</span><span>;</span>
u.<span>append</span><span>(</span><span>'format'</span><span>,</span> <span>'json'</span><span>)</span><span>;</span>
u.<span>append</span><span>(</span><span>'nojsoncallback'</span><span>,</span> <span>'1'</span><span>)</span><span>;</span>
&nbsp;
<span>var</span> apiCall <span>=</span> fetch<span>(</span><span>'https://api.flickr.com/services/rest?'</span> <span>+</span> u<span>)</span><span>;</span>
&nbsp;
apiCall.<span>then</span><span>(</span><span>function</span><span>(</span>response<span>)</span> <span>{</span>
<span>return</span> response.<span>json</span><span>(</span><span>)</span>.<span>then</span><span>(</span><span>function</span><span>(</span>json<span>)</span> <span>{</span>
<span>// photo is a list of photos.</span>
<span>return</span> json.<span>photos</span>.<span>photo</span><span>;</span>
<span>}</span><span>)</span><span>;</span>
<span>}</span><span>)</span>.<span>then</span><span>(</span><span>function</span><span>(</span>photos<span>)</span> <span>{</span>
photos.<span>forEach</span><span>(</span><span>function</span><span>(</span>photo<span>)</span> <span>{</span>
console.<span>log</span><span>(</span>photo.<span>title</span><span>)</span><span>;</span>
<span>}</span><span>)</span><span>;</span>
<span>}</span><span>)</span><span>;</span></pre>
</div>
<p>You may not read out the “Date” header since Flickr does not allow it
via
<br>
<code>Access-Control-Expose-Headers</code>.</p>
<div><pre>response.<span>headers</span>.<span>get</span><span>(</span><span>"Date"</span><span>)</span><span>;</span> <span>// null</span></pre>
</div>
<p>The <code>credentials</code> enumeration determines if cookies for the other
domain are
<br>sent to cross-origin requests. This is similar to XHRs <code>withCredentials</code>
<br>flag, but tri-valued as <code>"omit"</code> (default), <code>"same-origin"</code> and <code>"include"</code>.</p>
<p>The Request object will also give the ability to offer caching hints to
the user-agent. This is currently undergoing some <a href="https://github.com/slightlyoff/ServiceWorker/issues/585">security review</a>.
Firefox exposes the attribute, but it has no effect.</p>
<p>Requests have two read-only attributes that are relevant to ServiceWorkers
<br>intercepting them. There is the string <code>referrer</code>, which is
set by the UA to be
<br>the referrer of the Request. This may be an empty string. The other is
<br>
<code>context</code> which is a rather <a href="https://fetch.spec.whatwg.org/#requestcredentials">large enumeration</a> defining
what sort of resource is being fetched. This could be “image” if the request
is from an
&lt;img&gt;tag in the controlled document, “worker” if it is an attempt to load a
worker script, and so on. When used with the <code>fetch()</code> function,
it is “fetch”.</p>
<h2>Response</h2>
<p><code>Response</code> instances are returned by calls to <code>fetch()</code>.
They can also be created by JS, but this is only useful in ServiceWorkers.</p>
<p>We have already seen some attributes of Response when we looked at <code>fetch()</code>.
The most obvious candidates are <code>status</code>, an integer (default
value 200) and <code>statusText</code> (default value “OK”), which correspond
to the HTTP status code and reason. The <code>ok</code> attribute is just
a shorthand for checking that <code>status</code> is in the range 200-299
inclusive.</p>
<p><code>headers</code> is the Responses Headers object, with guard “response”.
The <code>url</code> attribute reflects the URL of the corresponding request.</p>
<p>Response also has a <code>type</code>, which is “basic”, “cors”, “default”,
“error” or
<br>“opaque”.</p>
<ul>
<li><code>"basic"</code>: normal, same origin response, with all headers exposed
except
<br>“Set-Cookie” and “Set-Cookie2″.</li>
<li><code>"cors"</code>: response was received from a valid cross-origin request.
<a href="https://fetch.spec.whatwg.org/#concept-filtered-response-cors">Certain headers and the body</a>may be accessed.</li>
<li><code>"error"</code>: network error. No useful information describing
the error is available. The Responses status is 0, headers are empty and
immutable. This is the type for a Response obtained from <code>Response.error()</code>.</li>
<li><code>"opaque"</code>: response for “no-cors” request to cross-origin
resource. <a href="https://fetch.spec.whatwg.org/#concept-filtered-response-opaque">Severely<br>
restricted</a>
</li>
</ul>
<p>The “error” type results in the <code>fetch()</code> Promise rejecting with
TypeError.</p>
<p>There are certain attributes that are useful only in a ServiceWorker scope.
The
<br>idiomatic way to return a Response to an intercepted request in ServiceWorkers
is:</p>
<div><pre>addEventListener<span>(</span><span>'fetch'</span><span>,</span> <span>function</span><span>(</span>event<span>)</span> <span>{</span>
event.<span>respondWith</span><span>(</span><span>new</span> Response<span>(</span><span>"Response body"</span><span>,</span> <span>{</span>
headers<span>:</span> <span>{</span> <span>"Content-Type"</span> <span>:</span> <span>"text/plain"</span> <span>}</span>
<span>}</span><span>)</span><span>;</span>
<span>}</span><span>)</span><span>;</span></pre>
</div>
<p>As you can see, Response has a two argument constructor, where both arguments
are optional. The first argument is a body initializer, and the second
is a dictionary to set the <code>status</code>, <code>statusText</code> and <code>headers</code>.</p>
<p>The static method <code>Response.error()</code> simply returns an error
response. Similarly, <code>Response.redirect(url, status)</code> returns
a Response resulting in
<br>a redirect to <code>url</code>.</p>
<h2>Dealing with bodies</h2>
<p>Both Requests and Responses may contain body data. Weve been glossing
over it because of the various data types body may contain, but we will
cover it in detail now.</p>
<p>A body is an instance of any of the following types.</p>
<ul>
<li><a href="https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/ArrayBuffer">ArrayBuffer</a>
</li>
<li><a href="https://developer.mozilla.org/en-US/docs/Web/API/ArrayBufferView">ArrayBufferView</a> (Uint8Array
and friends)</li>
<li><a href="https://developer.mozilla.org/en-US/docs/Web/API/Blob">Blob</a>/
<a href="https://developer.mozilla.org/en-US/docs/Web/API/File">File</a>
</li>
<li>string</li>
<li><a href="https://url.spec.whatwg.org/#interface-urlsearchparams">URLSearchParams</a>
</li>
<li><a href="https://developer.mozilla.org/en-US/docs/Web/API/FormData">FormData</a>
currently not supported by either Gecko or Blink. Firefox expects to ship
this in version 39 along with the rest of Fetch.</li>
</ul>
<p>In addition, Request and Response both offer the following methods to
extract their body. These all return a Promise that is eventually resolved
with the actual content.</p>
<ul>
<li><code>arrayBuffer()</code>
</li>
<li><code>blob()</code>
</li>
<li><code>json()</code>
</li>
<li><code>text()</code>
</li>
<li><code>formData()</code>
</li>
</ul>
<p>This is a significant improvement over XHR in terms of ease of use of
non-text data!</p>
<p>Request bodies can be set by passing <code>body</code> parameters:</p>
<div><pre><span>var</span> form <span>=</span> <span>new</span> FormData<span>(</span>document.<span>getElementById</span><span>(</span><span>'login-form'</span><span>)</span><span>)</span><span>;</span>
fetch<span>(</span><span>"/login"</span><span>,</span> <span>{</span>
method<span>:</span> <span>"POST"</span><span>,</span>
body<span>:</span> form
<span>}</span><span>)</span></pre>
</div>
<p>Responses take the first argument as the body.</p>
<div><pre><span>var</span> res <span>=</span> <span>new</span> Response<span>(</span><span>new</span> File<span>(</span><span>[</span><span>"chunk"</span><span>,</span> <span>"chunk"</span><span>]</span><span>,</span> <span>"archive.zip"</span><span>,</span>
<span>{</span> type<span>:</span> <span>"application/zip"</span> <span>}</span><span>)</span><span>)</span><span>;</span></pre>
</div>
<p>Both Request and Response (and by extension the <code>fetch()</code> function),
will try to intelligently <a href="https://fetch.spec.whatwg.org/#concept-bodyinit-extract">determine the content type</a>.
Request will also automatically set a “Content-Type” header if none is
set in the dictionary.</p>
<h3>Streams and cloning</h3>
<p>It is important to realise that Request and Response bodies can only be
read once! Both interfaces have a boolean attribute <code>bodyUsed</code> to
determine if it is safe to read or not.</p>
<div><pre><span>var</span> res <span>=</span> <span>new</span> Response<span>(</span><span>"one time use"</span><span>)</span><span>;</span>
console.<span>log</span><span>(</span>res.<span>bodyUsed</span><span>)</span><span>;</span> <span>// false</span>
res.<span>text</span><span>(</span><span>)</span>.<span>then</span><span>(</span><span>function</span><span>(</span>v<span>)</span> <span>{</span>
console.<span>log</span><span>(</span>res.<span>bodyUsed</span><span>)</span><span>;</span> <span>// true</span>
<span>}</span><span>)</span><span>;</span>
console.<span>log</span><span>(</span>res.<span>bodyUsed</span><span>)</span><span>;</span> <span>// true</span>
&nbsp;
res.<span>text</span><span>(</span><span>)</span>.<span>catch</span><span>(</span><span>function</span><span>(</span>e<span>)</span> <span>{</span>
console.<span>log</span><span>(</span><span>"Tried to read already consumed Response"</span><span>)</span><span>;</span>
<span>}</span><span>)</span><span>;</span></pre>
</div>
<p>This decision allows easing the transition to an eventual <a href="https://streams.spec.whatwg.org/">stream-based</a> Fetch
API. The intention is to let applications consume data as it arrives, allowing
for JavaScript to deal with larger files like videos, and perform things
like compression and editing on the fly.</p>
<p>Often, youll want access to the body multiple times. For example, you
can use the upcoming <a href="http://slightlyoff.github.io/ServiceWorker/spec/service_worker/index.html#cache-objects">Cache API</a> to
store Requests and Responses for offline use, and Cache requires bodies
to be available for reading.</p>
<p>So how do you read out the body multiple times within such constraints?
The API provides a <code>clone()</code> method on the two interfaces. This
will return a clone of the object, with a new body. <code>clone()</code> MUST
be called before the body of the corresponding object has been used. That
is, <code>clone()</code> first, read later.</p>
<div><pre>addEventListener<span>(</span><span>'fetch'</span><span>,</span> <span>function</span><span>(</span>evt<span>)</span> <span>{</span>
<span>var</span> sheep <span>=</span> <span>new</span> Response<span>(</span><span>"Dolly"</span><span>)</span><span>;</span>
console.<span>log</span><span>(</span>sheep.<span>bodyUsed</span><span>)</span><span>;</span> <span>// false</span>
<span>var</span> clone <span>=</span> sheep.<span>clone</span><span>(</span><span>)</span><span>;</span>
console.<span>log</span><span>(</span>clone.<span>bodyUsed</span><span>)</span><span>;</span> <span>// false</span>
&nbsp;
clone.<span>text</span><span>(</span><span>)</span><span>;</span>
console.<span>log</span><span>(</span>sheep.<span>bodyUsed</span><span>)</span><span>;</span> <span>// false</span>
console.<span>log</span><span>(</span>clone.<span>bodyUsed</span><span>)</span><span>;</span> <span>// true</span>
&nbsp;
evt.<span>respondWith</span><span>(</span>cache.<span>add</span><span>(</span>sheep.<span>clone</span><span>(</span><span>)</span><span>)</span>.<span>then</span><span>(</span><span>function</span><span>(</span>e<span>)</span> <span>{</span>
<span>return</span> sheep<span>;</span>
<span>}</span><span>)</span><span>;</span>
<span>}</span><span>)</span><span>;</span></pre>
</div>
<h2>Future improvements</h2>
<p>Along with the transition to streams, Fetch will eventually have the ability
to abort running <code>fetch()</code>es and some way to report the progress
of a fetch. These are provided by XHR, but are a little tricky to fit in
the Promise-based nature of the Fetch API.</p>
<p>You can contribute to the evolution of this API by participating in discussions
on the <a href="https://whatwg.org/mailing-list">WHATWG mailing list</a> and
in the issues in the <a href="https://www.w3.org/Bugs/Public/buglist.cgi?product=WHATWG&amp;component=Fetch&amp;resolution=---">Fetch</a> and
<a href="https://github.com/slightlyoff/ServiceWorker/issues">ServiceWorker</a>specifications.</p>
<p>For a better web!</p>
<p><em>The author would like to thank Andrea Marchesini, Anne van Kesteren and Ben<br>
Kelly for helping with the specification and implementation.</em>
</p>
</article>
</div>

View File

@ -1,8 +0,0 @@
{
"Author": "Dublin Core property author",
"Direction": null,
"Excerpt": "Dublin Core property description",
"Image": null,
"Title": "Dublin Core property title",
"SiteName": null
}

View File

@ -1,19 +0,0 @@
<article>
<h2>Test document title</h2>
<p>
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</p>
<p>
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</p>
</article>

View File

@ -1,45 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8"/>
<title>Title Element</title>
<meta name="title" content="Meta name title"/>
<meta name="og:title" content="Open Graph name title"/>
<meta name="twitter:title" content="Twitter name title"/>
<meta name="DC.title" content="Dublin Core name title"/>
<meta property="dc:title" content="Dublin Core property title"/>
<meta property="twitter:title" content="Twitter property title"/>
<meta property="og:title" content="Open Graph property title"/>
<meta name="author" content="Meta name author"/>
<meta name="DC.creator" content="Dublin Core name author"/>
<meta property="dc:creator" content="Dublin Core property author"/>
<meta name="description" content="Meta name description"/>
<meta name="og:description" content="Open Graph name description"/>
<meta name="twitter:description" content="Twitter name description"/>
<meta name="DC.description" content="Dublin Core name description"/>
<meta property="dc:description" content="Dublin Core property description"/>
<meta property="twitter:description" content="Twitter property description"/>
<meta property="og:description" content="Open Graph property description"/>
</head>
<body>
<article>
<h1>Test document title</h1>
<p>
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</p>
<p>
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</p>
</article>
</body>
</html>

View File

@ -1,8 +0,0 @@
{
"Author": "Creator Name",
"Direction": null,
"Excerpt": "Preferred description",
"Image": "http:\/\/fakehost.com\/image.jpg",
"Title": "Preferred title",
"SiteName": null
}

View File

@ -1,19 +0,0 @@
<article>
<h2>Test document title</h2>
<p>
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</p>
<p>
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</p>
</article>

View File

@ -1,36 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8"/>
<title>Title Element</title>
<meta property="x:title dc:title" content="Preferred title"/>
<meta property="og:title twitter:title" content="A title"/>
<meta property="dc:creator twitter:site_name" content="Creator Name"/>
<meta name="author" content="FAIL"/>
<meta property="og:description x:description twitter:description" content="A description"/>
<meta property="dc:description og:description" content="Preferred description"/>
<meta property="twitter:image og:image" content="http://fakehost.com/image.jpg"/>
<meta name="description" content="FAIL"/>
</head>
<body>
<article>
<h1>Test document title</h1>
<p>
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</p>
<p>
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</p>
</article>
</body>
</html>

View File

@ -1,3 +0,0 @@
{
"ArticleByLine": true
}

View File

@ -1,3 +0,0 @@
[
"https:\/\/www.aclu.org\/sites\/default\/files\/styles\/metatag_og_image_1200x630\/public\/field_share_image\/web18-facebook-socialshare-1200x628-v02.png?itok=p77cQjOm"
]

View File

@ -1,8 +0,0 @@
{
"Author": "Daniel Kahn Gillmor",
"Direction": null,
"Excerpt": "Facebook collects data about people who have never even opted in. But there are ways these non-users can protect themselves.",
"Image": "https:\/\/www.aclu.org\/sites\/default\/files\/styles\/metatag_og_image_1200x630\/public\/field_share_image\/web18-facebook-socialshare-1200x628-v02.png?itok=p77cQjOm",
"Title": "Facebook Is Tracking Me Even Though I\u2019m Not on Facebook",
"SiteName": "American Civil Liberties Union"
}

View File

@ -1,124 +0,0 @@
<div>
<p>
I don't use Facebook. I'm not technophobic — I'm a geek. I've been using email since the early 1990s, I have accounts on hundreds of services around the net, and I do software development and internet protocol design both for work and for fun. I believe that a globe-spanning communications network like the internet can be a positive social force, and I publish much of my own work on the open web.
</p>
<p>
But Facebook and other massive web companies represent a strong push toward unaccountable centralized social control, which I think makes our society more unequal and more unjust. The Cambridge Analytica scandal is one instance of this long-running problem with what I call the "surveillance economy." I don't want to submit to these power structures, and I dont want my presence on such platforms to serve as bait that lures other people into the digital panopticon.
</p>
<p>
But while I've never "opted in" to Facebook or any of the other big social networks, Facebook still has a detailed profile that can be used to target me. I've never consented to having Facebook collect my data, which can be used to draw very detailed inferences about my life, my habits, and my relationships. As we aim to take Facebook to task for its breach of user trust, we need to think about what its capabilities imply for society overall. After all, if you do #deleteFacebook, you'll find yourself in my shoes: non-consenting, but still subject to Facebooks globe-spanning surveillance and targeting network.
</p>
<p>
There are at least two major categories of information available to Facebook about non-participants like me: information from other Facebook users, and information from sites on the open web.
</p>
<h3>
<strong>Information from other Facebook users</strong>
</h3>
<p>
When you sign up for Facebook, it encourages you to upload your list of contacts so that the site can "find your friends." Facebook uses this contact information to learn about people, even if those people don't agree to participate. It also links people together based on who they know, even if the shared contact hasn't agreed to this use.
</p>
<p>
For example, I received an email from Facebook that lists the people who have all invited me to join Facebook: my aunt, an old co-worker, a friend from elementary school, etc. This email includes names and email addresses — including my own name — and at least one <a href="https://en.wikipedia.org/wiki/Web_bug">web bug</a> designed to identify me to Facebooks web servers when I open the email. Facebook records this group of people as my contacts, even though I've never agreed to this kind of data collection.
</p>
<p>
Similarly, I'm sure that I'm in some photographs that someone has uploaded to Facebook — and I'm probably tagged in some of them. I've never agreed to this, but Facebook could still be keeping track.
</p>
<p>
So even if you decide you need to join Facebook, remember that you might be giving the company information about someone else who didn't agree to be part of its surveillance platform.
</p>
<h3>
<strong>Information from sites on the open Web</strong>
</h3>
<p>
Nearly every website that you visit that has a "Like" button is actually encouraging your browser to tell Facebook about your browsing habits. Even if you don't click on the "Like" button, displaying it requires your browser to send a request to Facebook's servers for the "Like" button itself. That request includes <a href="https://en.wikipedia.org/wiki/HTTP_referer">information</a> mentioning the name of the page you are visiting and any Facebook-specific <a href="https://en.wikipedia.org/wiki/HTTP_cookie">cookies</a> your browser might have collected. (See <a href="https://www.facebook.com/help/186325668085084">Facebook's own description of this process</a>.) This is called a "third-party request."
</p>
<p>
This makes it possible for Facebook to create a detailed picture of your browsing history — even if you've never even visited Facebook directly, let alone signed up for a Facebook account.
</p>
<p>
Think about most of the web pages you've visited — how many of them <em>don't</em> have a "Like" button? If you administer a website and you include a "Like" button on every page, you're helping Facebook to build profiles of your visitors, even those who have opted out of the social network. Facebooks <a href="https://developers.facebook.com/docs/plugins/">“Share” buttons</a> on other sites — along with <a href="https://www.facebook.com/business/learn/facebook-ads-pixel">other tools</a> — work a bit differently from the “Like” button, but do effectively the same thing.
</p>
<p>
The profiles that Facebook builds on non-users don't necessarily include so-called "personally identifiable information" (PII) like names or email addresses. But they do include fairly unique patterns. Using <a href="https://dev.chromium.org/for-testers/providing-network-details">Chromium's NetLog dumping</a>, I performed a simple five-minute browsing test last week that included visits to various sites — but not Facebook. In that test, the PII-free data that was sent to Facebook included information about which news articles I was reading, my dietary preferences, and my hobbies.
</p>
<p>
Given the precision of this kind of mapping and targeting, "PII" isnt necessary to reveal my identity. How many vegans examine specifications for computer hardware from the ACLU's offices while reading about Cambridge Analytica? Anyway, if Facebook combined that information with the "web bug" from the email mentioned above — which <em>is</em> clearly linked to my name and e-mail address — no guesswork would be required.
</p>
<p>
I'd be shocked if Facebook were not connecting those dots given the goals <a href="https://www.facebook.com/about/privacy/cookies">they claim for data collection</a>:
</p>
<blockquote>
<p>
We use the information we have to improve our advertising and measurement systems so we can show you relevant ads on and off our Services and measure the effectiveness and reach of ads and services.
</p>
</blockquote>
<p>
This is, in essence, exactly what Cambridge Analytica did.
</p>
<h3>
<strong>Consent</strong>
</h3>
<p>
Facebook and other tech companies often deflect accusations against excessive data collection by arguing "consent" — that they harvest and use data with the consent of the users involved.
</p>
<p>
But even if we accept that clicking through a "Terms of Service" that <a href="https://tosdr.org/">no one reads</a> can actually constitute true consent, even if we ignore the fact that these terms are overwhelmingly one-sided and non-negotiable, and even if we accept that it's meaningful for people to give consent when sharing data about other people who may have also opted in — what is the recourse for someone who has not opted into these systems at all?
</p>
<p>
Are those of us who have explicitly avoided agreeing to the Facebook terms of service simply fair game for an industry-wide surveillance and targeting network?
</p>
<h3>
<strong>Privilege</strong>
</h3>
<p>
I dont mean to critique people who have created a Facebook profile or suggest they deserve whatever they get.
</p>
<p>
My ability to avoid Facebook comes from privilege — I have existing social contacts with whom I know how to stay in touch without using Facebook's network. My job does not require that I use Facebook. I can afford the time and expense to communicate with my electoral representatives and political allies via other channels.
</p>
<p>
Many people do not have these privileges and are compelled to "opt in" on Facebook's non-negotiable terms.
</p>
<p>
Many journalists, organizers, schools, politicians, and others who have good reasons to oppose Facebook's centralized social control feel compelled by Facebook's reach and scale to participate in their practices, even those we know to be harmful. That includes the ACLU.
</p>
<p>
Privacy should not be a luxury good, and while I'm happy to encourage people to opt out of these subtle and socially fraught arrangements, I do not argue that anyone who has signed up has somehow relinquished concerns about their privacy. We need to evaluate privacy concerns in their full social contexts. These are not problems that can be resolved on an individual level, because of the interpersonal nature of much of this data and the complexities of the tradeoffs involved.
</p>
<h3>
<strong>Technical countermeasures</strong>
</h3>
<p>
While they may not solve the problem, there are some technical steps people can take to limit the scope of these surveillance practices. For example, some web browsers do not send "third-party cookies" by default, or <a href="https://wiki.mozilla.org/Thirdparty">they scope cookies</a> so that centralized surveillance doesn't get a single view of one user. The most privacy-preserving modern browser is <a href="https://www.torproject.org/">the Tor Browser</a>, which everyone should have installed and available, even if it's not the browser they choose to use every day. It limits the surveillance ability of systems that you have not signed up for to track you as you move around the web.
</p>
<p>
You can also modify some browsers — for example, with plug-ins for <a href="https://requestpolicycontinued.github.io/">Firefox</a> and <a href="https://chrome.google.com/webstore/detail/umatrix/ogfcmafjalglgifnmanfmnieipoejdcf">Chrome</a> — so that they <a href="https://addons.mozilla.org/en-US/firefox/addon/umatrix/">do not send third-party</a> <a href="https://requestpolicycontinued.github.io/">requests at all</a>. Firefox is also exploring even more <a href="https://addons.mozilla.org/en-US/firefox/addon/multi-account-containers/">privacy-preserving techniques</a><a href="https://addons.mozilla.org/en-US/firefox/addon/multi-account-containers/">.</a>
</p>
<p>
It cant be denied, though, that these tools are harder to use than the web browsers most people are accustomed to, and they create barriers to some online activities. (For example, logging in to <a href="https://offcampushousing.uconn.edu/login">some sites</a> and accessing some <a href="https://filestore.community.support.microsoft.com/api/images/0253d8fb-b050-401a-834d-9d80a99c0b12">web applications</a> is impossible without third-party cookies.)
</p>
<p>
Some website operators take their visitors' privacy more seriously than others, by reducing the amount of third-party requests. For example, it's possible to display "share on Facebook" or "Like" buttons without sending user requests to Facebook in the first place. The ACLU's own website does this because we believe that the right to read with privacy is a fundamental protection for civic discourse.
</p>
<p>
If you are responsible for running a website, try browsing it with a third-party-blocking extension turned on. Think about how much information you're requiring your users to send to third parties as a condition for using your site. If you care about being a good steward of your visitors' data, you can re-design your website to reduce this kind of leakage.
</p>
<h3>
<strong>Opting out?</strong>
</h3>
<p>
Some advertisers claim that you can "opt out" of their targeted advertising, and even offer <a href="http://optout.aboutads.info/">a centralized place meant to help you do so</a>.&nbsp;However, my experience with these tools isn't a positive one. They don't appear to work all of the time. (In a recent experiment I conducted, two advertisers opt-out mechanisms failed to take effect.) And while advertisers claim to allow the user to opt out of "interest-based ads," it's not clear that the opt-outs govern data collection itself, rather than just the use of the collected data for displaying ads. Moreover, opting out on their terms requires the use of third-party cookies, thereby enabling another mechanism that other advertisers can then exploit.
</p>
<p>
It's also not clear how they function over time: How frequently do I need to take these steps? Do they expire? How often should I check back to make sure Im still opted out? I'd much prefer an approach requiring me to opt <em>in</em> to surveillance and targeting.
</p>
<h3>
<strong>Fix the surveillance economy, not just Facebook</strong>
</h3>
<p>
These are just a few of the mechanisms that enable online tracking. Facebook is just one culprit in this online "surveillance economy," albeit a massive one — the company owns <a href="https://www.instagram.com/">Instagram</a>, <a href="https://atlassolutions.com/">Atlas</a>, <a href="https://www.whatsapp.com/">WhatsApp</a>, and dozens of other internet and technology companies and services. But its not the only player in this space. Googles business model also relies on this kind of surveillance, and there are dozens of smaller players as well.
</p>
<p>
As we work to address the fallout from the current storm around Facebook and Cambridge Analytica, we can't afford to lose sight of these larger mechanisms at play. Cambridge Analytica's failures and mistakes are inherent to Facebook's business model. We need to seriously challenge the social structures that encourage people to opt in to this kind of surveillance. At the same time, we also need to protect those of us who manage to opt out.
</p>
</div>

File diff suppressed because one or more lines are too long

View File

@ -1,8 +0,0 @@
{
"Author": "Organization for Transformative Works",
"Direction": null,
"Excerpt": "An Archive of Our Own, a project of the Organization for Transformative Works",
"Image": null,
"Title": "Conversations with a Cryptid - Chapter 1 - AMournfulHowlInTheNight - \u50d5\u306e\u30d2\u30fc\u30ed\u30fc\u30a2\u30ab\u30c7\u30df\u30a2 | Boku no Hero Academia",
"SiteName": null
}

View File

@ -1,317 +0,0 @@
<div role="article" id="chapters">
<h3 id="work">
Chapter Text
</h3>
<p>
Izuku was struggling to understand how he had even managed to get here, seated before the archvillain of Japan with only a sense of dread to keep him company. All Might sat concealed in an observation room, of the firm opinion that he could only aggravate the prisoner and he sent Izuku off with a strained smile. A vague haze hovered over Izukus memory. It started with a simple conversation gone astray on a long drive home.
</p>
<p>
“So, who is All For One? Do we know anything about him beyond what you told me before? Hes been imprisoned for months now.” Izuku remembered asking All Might from the backseat of the car as Detective Tsukauchi leisurely drove along a sprawling highway.
</p>
<p>
Playing on the car radio was an aftermath report of a villain attack in downtown Tokyo. Izuku caught the phrase “liquid body” from the female reporter before Detective Tsukauchi changed the channel.
</p>
<p>
“Nope. Still nothing. No one really wants to speak to him,” All Might had replied brightly. “He gives off polite airs, but hes a piece of work.” All Mights mostly obstructed shoulders in the front seat shrugged. “Not much you can do with someone like him. Everything that comes out is a threat or taunt.” All Might carefully waved his hand in a circular motion towards the side of his head.
</p>
<p>
“No ones even made it through a full interview with him, from what Ive heard,” Detective Tsukauchi added from behind the wheel. “He plays mind games with them. The prison also has a “no recent events” policy on any discussions with him as well. Just in case he ends up with ideas or has some means of communicating. Given that people only want to ask him about current events, it doesnt leave much to talk about.”
</p>
<p>
“Wait, they still dont know what Quirks he has?” Izuku asked exasperatedly. “They cant if theres still an information block on visits.”
</p>
<p>
“Nope. We have no idea what he can do. They can run DNA tests, but its not like anyone apart from him even knows how his Quirk works. They could get matches with any number of people, but if theyre not in a database then we cant cross-reference them anyway. Even if they run an analysis, the data doesnt mean anything without the ability to interpret it,” All Might gestured with a skeletal finger. “Its a waste of time after the initial tests were conducted. They werent game to MRI him either, given hes definitely got a Quirk that creates metal components.”
</p>
<p>
“No ones bothered to ask him anything about… anything?” Izuku asked, dumbfounded. “He must be around two-hundred years old and people cant think of a single non-current affairs thing to ask him?”
</p>
<p>
In some ways it was unfathomable that theyd let a potential resource go to waste. On the other hand, said potential resource had blown up a city, murdered numerous people and terrorised Japan for over a century. At the very least.
</p>
<p>
“Well, I tried to ask him about Shigaraki, but he didnt say much of anything really. Some garbage about you being too dependent on me and him letting Shigaraki run wild and how he just wanted to be the ultimate evil,” All Might shrugged again. “He spends too much time talking about nothing.”
</p>
<p>
Izuku shifted his head onto his arm. “But, thats not really nothing, is it?”
</p>
<p>
“What do you mean?” Izuku had the feeling that All Might would have been looking at him with the <i>youre about to do something stupid arent you</i> expression that was thankfully becoming less common.
</p>
<p>
“Well, he clearly doesnt know anything about us, All Might, if he thinks that youre just going to let go of me after not even two years of being taught. Maybe Shigaraki was dependent on adult figures, but I dont even remember my dad and mums been busy working and keeping the house together. Ive never had a lot of adult supervision before,” Izuku laughed nervously. “I had to find ways to keep myself entertained. If anything, Im on the disobedient side of the scale.” All Might outright giggled.
</p>
<p>
“Ill say, especially after what happened with Overhaul. Im surprised your mother let you leave the dorms again after that.”
</p>
<p>
“Im surprised she didnt withdraw and ground me until I was thirty.”
</p>
<p>
“Oh? That strict?” Tsukauchi asked.
</p>
<p>
“She has her moments,” Izuku smiled fondly. “Do you think shed agree to me asking the archvillain of Japan about his Quirk?” Izuku asked, only partially joking. There was an itch at the back of his head, a feeling of something missing that poked and prodded at his senses.
</p>
<p>
All Might coughed and sprayed the dash with a fine red mist. “Absolutely not! I forbid it!”
</p>
<p>
“Thats exactly why Im asking her and not you,” Izuku grinned from the backseat.
</p>
<p>
“Hes evil!”
</p>
<p>
“Hes ancient. You honestly dont wonder about the sort of things someone with that life experience and Quirk would have run across to end up the way he did?”
</p>
<p>
“Nope, he made it perfectly clear that he always wanted to be the supreme evil,” All Might snipped through folded arms.
</p>
<p>
“Yeah, and Ill just take his word for that, wont I?” Izuku grinned. “If he does nothing but lie, then thats probably one too, but theres a grain of truth in there somewhere.”
</p>
<p>
“What would you even do? Harass him into telling you his life story?” All Might sighed.
</p>
<p>
“Not when I can kill him with kindness. Who knows, it might even be poisonous for him.”
</p>
<p>
“Youre explaining this to your mother. Teacher or not, Im not being on the receiving end of this one.”
</p>
<p>
Izuku blinked for a moment. “Youll let me?”
</p>
<p>
“Im not entirely for it, but any prospective information on what influenced Shigaraki can only be a good thing. If anything goes south we can pull you out pretty easily. Just be aware of who and what youre dealing with.” Struggling, All Might turned a serious look to Izuku around the side of the seat. “<i>Only</i> if your mother gives the okay.”
</p>
<p>
The conversation turned to school for the rest of the way.
</p>
<p>
It might have been curiosity or it might have been the nagging sensation that chewed at his brain for the three weeks that he researched the subject of the conversation. All For One was a cryptid. Mystical in more ways than one, he was only a rumour on a network that was two-hundred years old. There were whispers of a shadowy figure who once ruled Japan, intermingled with a string of conspiracies and fragmented events.
</p>
<p>
Izuku had even braved the dark web, poking and prodding at some of the seedier elements of the world wide web. The internet had rumours, but the dark web had stories.<br>
</p>
<p>
An implied yakuza wrote about his grandfather who lost a fire manipulation Quirk and his sanity without any reason. His grandfather had been institutionalised, crying and repeating “he took it, he took it” until his dying days. No one could console him.
</p>
<p>
Another user spoke of a nursing home where a room full of dementia residents inexplicably became docile and no longer used their Quirks on the increasingly disturbed staff. The nursing home erupted into flames just before a court case against them commenced.
</p>
<p>
A user with neon pink text spoke of how their great-great-great-great grandmother with a longevity Quirk had simply aged rapidly one day and passed away in her sleep, her face a mask of terror. No cause had ever been found.
</p>
<p>
A hacker provided a grainy CCTV recording of a heist and a scanned collection of documents from over a century ago, where there was a flash of light and entire bank vault had been emptied. What separated it from the usual robbery was that it contained a list containing confidential information on the Quirks of the First Generation. Izuku had greedily snavelled up and saved the video and documents to an external hard drive.
</p>
<p>
Paging through, Izuku saw someone recount how their Quirkless uncle had developed a warp Quirk and gone from rags to riches under a mysterious benefactor. A decade ago, the uncle had simply disappeared.
</p>
<p>
Numerous and terrifying, the stories were scattered nuggets of gold hidden across the web. Theyd never last long, vanishing within hours of posting. Izuku bounced from proxy to proxy, fleeing from a series of deletions that seemed to follow Izukus aliased postings across snitch.ru, rabbit.az, aconspiracy.xfiles and their compatriots.
</p>
<p>
After thirty-two identity changes (all carefully logged in a separate notebook), a large amount of feigning communal interest in a lucky tabloid article on All For One which had been released at the start of the first of the three weeks, Izuku hung up his tinfoil hat and called it a month. He haphazardly tossed a bulging notebook into his bookshelf and lodged his hard drive in a gap containing seven others and went to dinner.
</p>
<p>
It took another week to present his research to All Might and Tsukauchi, whose jaws reached the proverbial floor.
</p>
<p>
“We never found any of this,” the Detective Tsukauchi exclaimed. “How did you find all of it?”
</p>
<p>
“I asked the right people. Turns out criminals have very long and very unforgiving memories,” Izuku explained through sunken eyes. “Theres more than this that could be linked to him, but these ones seem to be the most obvious.”
</p>
<p>
“They would do, you cant be head of the underworld without making an army of enemies,” All Might agreed. “You know, if you can get any more information about these events, I think youll give people a lot of peace of mind.”
</p>
<p>
“Provided mum agrees to it.”
</p>
<p>
“Only if she agrees to it.”
</p>
<p>
It took another month to convince his mother, who eventually gave in once All Might provided an extremely comprehensive schedule of how the visitations and any resulting research would be carefully balanced against Izukus schoolwork and internship.
</p>
<p>
The day of the visit finally arrived, four months after the initial conversation, much to Izukus dismay.
</p>
<p>
Izuku remembered how he had arrived, with the Detective and All Might escorting him through its sterile, white innards. A list of rules rattled off at the gate, “no current affairs” was chief among them and an assertion that hed be dragged from the room if need be if Izuku was to breach any of them. No smuggling of communication devices, no weapons, no Quirks, nothing that could compromise the prisoners secure status.
</p>
<p>
Heavily armoured and drilled guards leading him underground into the deepest bowels of the Tartarus complex.
</p>
<p>
Izuku understood the rules, dressed casually in a cotton t-shirt with “Shirt” printed across it in haphazard English and clutching at a carefully screened and utterly blank notebook.
</p>
<p>
Across from him, behind reinforced glass, the archvillain of Japan was bound and unmoving.
</p>
<p>
“Hello,” Izuku initiated uncertainly. His skin had been crawling the moment he crossed the threshold, a memory of the encounter and escape at the Kamino Ward months ago.
</p>
<p>
“Ah, All Mights disciple,” drawled All For One, “is he too cowardly to come himself? Yet I dont hear the garments of a hero.” With hardly a word out, All For One had already lunged for the figurative jugular.
</p>
<p>
A stray thought of <i>how does he know who I am if hes blind and isnt familiar with me?</i> whispered its way through Izukus head.
</p>
<p>
“Oh, no,” Izuku corrected hastily, almost relieved at the lack of any pretence, “I asked if I could talk to you. This isnt exactly hero related.”
</p>
<p>
“Im surprised he said yes.” While there was little by way of expression, Izuku could just about sense the contempt dripping from the prisoners tone. It wasnt anything he wasnt expecting. Kacchan had already said worse to him in earlier years. Water off a ducks back.
</p>
<p>
“Well, hes not my legal guardian, so I think you should be more surprised that mum said yes. Shes stricter with these things than All Might,” Izuku corrected again. “Mum gave the okay, but that was a stressful discussion.” And there it was, a miniscule twitch from the man opposite. A spasm more than anything else. <i>Interesting.</i> Pinned down as he was, the prisoner oozed irritation.
</p>
<p>
“At least your mother is a wise person. I wonder why the student doesnt heed all of the advice of the teacher.” All For Ones tone didnt indicate a question, so much as an implicit statement that All Might wasnt worth listening to in any capacity. Kacchan would have hated the comparison, but the hostility had an almost comfortable familiarity. “He no doubt warned you off speaking to me, overprotective as he is, but here you are.”
</p>
<p>
Izuku found himself smiling at the thought of Kacchans outrage if he ever found out about the mental comparison as he replied. “I dont think its normal for anyone my age to listen completely to their teachers. We pick and choose and run with what works best for us. He warned me, but Im still here. Mum warned me as well, but I think she cared more about the time management aspect of it."
</p>
<p>
“Is that a recent development?” All For One probed.
</p>
<p>
“Not really. My old homeroom teacher told me not to bother applying to U.A.” His mothers beaming face had carried Izuku through the cheerful and resolute signing of that application form.
</p>
<p>
“I see you followed their advice to the letter,” came the snide, dismissive reply.
</p>
<p>
Izuku hoisted up his legs and sat cross-legged in his seat. Leaning slightly forward as he did so as to better prop up his notebook.
</p>
<p>
“Youre a walking contrarian, arent you? All Might told me about his run ins with you. What someone does or doesnt do really doesnt matter to you, youll just find a way to rationalise it as a negative and go on the attack anyway. What youre currently doing is drawing attention away from yourself and focusing it on me so you can withhold information.” Izuku flipped open his notebook and put pen to paper. “Youve got something fairly big to hide and you diverting attention exposes that motivation as existing anyway. The only real questions here are what and why?” Izuku paused in mortification as the man opposites lips parted. “I just said that aloud, didnt I?”
</p>
<p>
Of the responses Izuku had expected, it wasnt laughter. Unrestrained, Izuku would have expected a violent outburst. In this situation, he would have expected another scathing comment. Instead, All For One laughed breathily, leaning into his bonds. Wheezingly he spoke, “Ill have to change tactics, if that ones too transparent for you. How refreshing.”
</p>
<p>
Doing his best not to glow a blinding red and simultaneously pale at the interest, Izuku carried on. “I add it to the list when you do. Im not emotionally involved enough to really be impacted by what youre saying. I know about you in theory, but thats it. Maybe All Might has a history with you, but I dont really know enough about you personally to…”
</p>
<p>
“Care,” All For One supplied, somewhat subdued as he struggled to breathe. “Youre only here to satisfy your curiosity as to whether or not the stories were true.”
</p>
<p>
Izuku nodded, scratching at his notebook with his left hand. “Yes and no, Im actually here to ask you about how your Quirk works.” <i>For now.</i>
</p>
<p>
Another chortle, more restrained that the last.
</p>
<p>
"What makes you think others havent already asked?” Had All For One been unrestrained, Izuku could imagine the stereotypical scene of the villain confidently leaning back in some overblown chair in a secret lair, drink of choice in hand, if the tone of voice was any indication. Deflections aside, the man easily rose to each comment.
</p>
<p>
“Whether or not they asked its irrelevant if they cant read the answers.” Answers didnt matter if the people involved were too attached to read into the answers. If none of the interviewers had managed a full interview, then it seemed unlikely that any sort of effort was put into understanding the villain.
</p>
<p>
“And you think you can? What expertise do you hold above theirs?” Doubt and reprimand weighted the words. Oddly enough, had Izuku been any younger he could have mistaken the man for a disapproving parent rebuking an overly ambitious child. Albeit an extremely evil one.
</p>
<p>
Izuku inhaled shortly and went for it. “If theres something I know, its Quirks and how they work. Maybe I dont know you, but I dont really need to. Quirks fall under broad categories of function. You can take and give, consent doesnt seem to be a factor. You either cant “see” certain types of Quirks or you need to have prior knowledge of it before you take it with what I know about your brother. Despite your <i>nom de guerre</i>, because we both know its not your real name, you have a history of giving multiple Quirks and causing brain damage to the receiver. You clearly arent impacted by those same restrictions, so it must either alter your brain mapping or adjust functions to allow for simultaneous use and storage. It also must isolate or categories the Quirks you stock, because from the few people who do remember you, you creating certain Quirks is always in the context of giving them to someone else meaning theres probably an inherent immunity to stop it from tainting your own Quirk with a mutation,” Izuku mumbled, almost to himself. “The only thing really in question about your Quirk is the finer details and whether or not you need to maintain those features or if theyre inherent and your hard limit for holding Quirks.”
</p>
<p>
There was silence, for only a moment. “If only my hands were free, I would clap for such a thoughtful assessment. Clearly youre not all brawn,” All For One positively purred. “Speculate away.” A wide and slightly unhinged smile was directed at Izuku.
</p>
<p>
It was all Izuku could do not to wince at the eagerness. An image of a nervous All Might, hidden in the observation room above with the grim-faced prison staff, came to mind.
</p>
<p>
“I note that you said thoughtful and not correct,” and Izuku breathed and unsteadily jotted it down in his notebook. “You dont seem bothered by the guess.”
</p>
<p>
“Few people live long enough to question my Quirk, let alone have the talent to guess so thoughtfully at its functions. It seems we share a hobby.” There was something terribly keen in that voice that hadnt been there before, twisting itself through the compliment.
</p>
<p>
“I suppose it helps that youre playing along out of boredom,” Izuku verbally dodged, unease uncoiling itself from the back of his mind.
</p>
<p>
“I <i>was</i> playing along out of boredom,” All For One corrected smoothly. “Now, Im curious. Admittedly, my prior assumptions of you werent generous, but Ive been too hasty in my assessments before.”
</p>
<p>
“Ill pack up and leave now if thats the case,” Izuku replied with only half an ear on the conversation as the words on his page began to drastically expand to distract himself from the building anxiety.
</p>
<p>
“Sarcasm, so you do have characteristics of a normal teenager. Your willingness to maim yourself has often left me wondering…”
</p>
<p>
“Youre deflecting again,” Izuku observed. “Im not sure if thats a nervous habit for you or if youre doing it because Im close to being right about your Quirk. That being said, I dont think you know what a normal teenager is if Shigaraki is any indication. Hes about seven years too late for his rebellious phase.”
</p>
<p>
“Im hurt and offended,” came the amused reply.
</p>
<p>
“By how Shigaraki ended up or your parenting? You only have yourself to blame for both of them.”
</p>
<p>
“How harsh. Shigaraki is a product of society that birthed him. I cant take credit for all of the hard work,” All For One laid out invitingly. Perhaps someone else would have risen to the bait, but Izuku was already packing his mental bags and heading for the door.
</p>
<p>
Clearly the prisoners anticipation had registered poorly with someone in the observation room, because a voice rang through the air. “Times up Midoriya-kun.”
</p>
<p>
“Okay!” Izuku called back and etched out his last thoughtful of words, untangled his legs and rose to his feet.
</p>
<p>
“What a shame, my visitations are always so short,” All For One spoke mournfully.
</p>
<p>
“Well, you did blow up half a city. They could have just let you suffocate instead. Same time next week, then?” Izuku offered brightly, notebook stuffed into a pocket and was followed out the door by wheezing laughter.
</p>
<p>
It was only after he had made it safely back to the communal room where All Might waited did he allow the spring to fade from his step and discard his nervous smile. Shuddering, he turned to All Might whose face was set in a grimace.
</p>
<p>
“I wont say I told you so,” All Might offered, perched on the edge of his couch like a misshapen vulture.
</p>
<p>
“Hes… not really what I was expecting. I was expecting someone, more openly evil.” Izuku allowed himself to collapse into the leather of the seat. He shakily reached for the warm tea that had been clearly been prepared the moment Izuku left the cell. “I suppose he does it to lull people into a false sense of security. I didnt understand how someone with only half a set of expressions could have “villain” written all over them until I met him.”
</p>
<p>
“Hes always been like that. He feigns concern and sympathy to lure in societys outcasts. Theyre easy targets,” All Might said through a mouthful of biscuit.
</p>
<p>
“Has he ever tried it on any of the One For All successors?”
</p>
<p>
“Not really, but you might have accidentally given him the incentive for it. He never had access to any of the One For All wielders while they were young.” All Might snorted, “not that itll make a difference with you”.
</p>
<p>
“I think he was trying to gauge me for a world view before the wardens ended it. I need more time to work out his response to the stuff on his Quirk.”
</p>
<p>
“Hes conversation starved since its solitary confinement. If what the people monitoring his brain activity said was true, youre the most exciting thing to have happened to him in months. He replied after you left, said he was looking forward to it.”
</p>
<p>
“Thats pretty sad."
</p>
<p>
“Its even sadder that were the only two members of the public who have had anything to do with him. Stain gets a pile of mail from his “fans”, but All For One has nothing,” All Might waved a tea spoon. “Thats what he gets.”
</p>
<p>
“Lets get out of here and tell Detective Tsukauchi how it went.” Izuku gulped down his tea and headed for the exit, with him and All Might reaching it at roughly the same amount of time.
</p>
<p>
“At least your mums making katsudon for us tonight," was All Might's only optimistic comment.
</p>
<p>
Anxiety was still ebbing over Izuku after Tsukauchi had been debriefed in the car.
</p>
<p>
<i>“It seems we share a hobby.”</i> Haunted Izuku on the drive home. As if ripping someones Quirk from them and leaving them lying traumatised on the ground was just a fun pastime and not an act of grievous bodily harm.
</p>
<p>
And hed be dealing with him again in another week.
</p>
</div>

File diff suppressed because one or more lines are too long

View File

@ -1,3 +0,0 @@
{
"ArticleByLine": true
}

View File

@ -1,4 +0,0 @@
[
"https:\/\/cdn.arstechnica.net\/wp-content\/uploads\/2015\/04\/server-crash-640x215.jpg",
"https:\/\/cdn.arstechnica.net\/wp-content\/uploads\/2015\/04\/server-crash-640x426.jpg"
]

View File

@ -1,8 +0,0 @@
{
"Author": "Dan Goodin - Apr 16, 2015 8:02 pm UTC",
"Direction": null,
"Excerpt": "Two-year-old bug exposes thousands of servers to crippling attack.",
"Image": "https:\/\/cdn.arstechnica.net\/wp-content\/uploads\/2015\/04\/server-crash-640x215.jpg",
"Title": "Just-released Minecraft exploit makes it easy to crash game servers",
"SiteName": "Ars Technica"
}

View File

@ -1,91 +0,0 @@
<div>
<header>
<h4>
Biz &amp; IT —
</h4>
<h2 itemprop="description">
Two-year-old bug exposes thousands of servers to crippling attack.
</h2>
</header>
<div itemprop="articleBody">
<figure>
<img src="https://cdn.arstechnica.net/wp-content/uploads/2015/04/server-crash-640x426.jpg" alt="Just-released Minecraft exploit makes it easy to crash game servers">
<figcaption>
</figcaption>
</figure>
<p>
A flaw in the wildly popular online game <em>Minecraft</em> makes it easy for just about anyone to crash the server hosting the game, according to a computer programmer who has released proof-of-concept code that exploits the vulnerability.
</p>
<p>
"I thought a lot before writing this post," Pakistan-based developer Ammar Askar wrote in a <a href="http://blog.ammaraskar.com/minecraft-vulnerability-advisory">blog post published Thursday</a>, 21 months, he said, after privately reporting the bug to <em>Minecraft</em> developer Mojang. "On the one hand I don't want to expose thousands of servers to a major vulnerability, yet on the other hand Mojang has failed to act on it."
</p>
<p>
The bug resides in the <a href="https://github.com/ammaraskar/pyCraft">networking internals of the <em>Minecraft</em> protocol</a>. It allows the contents of inventory slots to be exchanged, so that, among other things, items in players' hotbars are displayed automatically after logging in. <em>Minecraft</em> items can also store arbitrary metadata in a file format known as <a href="http://wiki.vg/NBT">Named Binary Tag (NBT)</a>, which allows complex data structures to be kept in hierarchical nests. Askar has released <a href="https://github.com/ammaraskar/pyCraft/tree/nbt_exploit">proof-of-concept attack code</a> he said exploits the vulnerability to crash any server hosting the game. Here's how it works.
</p>
<blockquote>
<p>
The vulnerability stems from the fact that the client is allowed to send the server information about certain slots. This, coupled with the NBT formats nesting allows us to <em>craft</em> a packet that is incredibly complex for the server to deserialize but trivial for us to generate.
</p>
<p>
In my case, I chose to create lists within lists, down to five levels. This is a json representation of what it looks like.
</p>
<div>
<pre><code data-lang="javascript"><span>rekt</span><span>:</span> <span>{</span>
<span>list</span><span>:</span> <span>[</span>
<span>list</span><span>:</span> <span>[</span>
<span>list</span><span>:</span> <span>[</span>
<span>list</span><span>:</span> <span>[</span>
<span>list</span><span>:</span> <span>[</span>
<span>list</span><span>:</span> <span>[</span>
<span>]</span>
<span>list</span><span>:</span> <span>[</span>
<span>]</span>
<span>list</span><span>:</span> <span>[</span>
<span>]</span>
<span>list</span><span>:</span> <span>[</span>
<span>]</span>
<span>...</span>
<span>]</span>
<span>...</span>
<span>]</span>
<span>...</span>
<span>]</span>
<span>...</span>
<span>]</span>
<span>...</span>
<span>]</span>
<span>...</span>
<span>}</span></code></pre>
</div>
<p>
The root of the object, <code>rekt</code>, contains 300 lists. Each list has a list with 10 sublists, and each of those sublists has 10 of their own, up until 5 levels of recursion. Thats a total of <code>10^5 * 300 = 30,000,000</code> lists.
</p>
<p>
And this isnt even the theoretical maximum for this attack. Just the nbt data for this payload is 26.6 megabytes. But luckily Minecraft implements a way to compress large packets, lucky us! zlib shrinks down our evil data to a mere 39 kilobytes.
</p>
<p>
Note: in previous versions of Minecraft, there was no protocol wide compression for big packets. Previously, NBT was sent compressed with gzip and prefixed with a signed short of its length, which reduced our maximum payload size to <code>2^15 - 1</code>. Now that the length is a varint capable of storing integers up to <code>2^28</code>, our potential for attack has increased significantly.
</p>
<p>
When the server will decompress our data, itll have 27 megs in a buffer somewhere in memory, but that isnt the bit thatll kill it. When it attempts to parse it into NBT, itll create java representations of the objects meaning suddenly, the sever is having to create several million java objects including ArrayLists. This runs the server out of memory and causes tremendous CPU load.
</p>
<p>
This vulnerability exists on almost all previous and current Minecraft versions as of 1.8.3, the packets used as attack vectors are the <a href="http://wiki.vg/Protocol#Player_Block_Placement">0x08: Block Placement Packet</a> and <a href="http://wiki.vg/Protocol#Creative_Inventory_Action">0x10: Creative Inventory Action</a>.
</p>
<p>
The fix for this vulnerability isnt exactly that hard, the client should never really send a data structure as complex as NBT of arbitrary size and if it must, some form of recursion and size limits should be implemented.
</p>
<p>
These were the fixes that I recommended to Mojang 2 years ago.
</p>
</blockquote>
<p>
Ars is asking Mojang for comment and will update this post if company officials respond.
</p>
</div>
</div>

View File

@ -1,634 +0,0 @@
<!DOCTYPE html>
<html lang="en-us" xmlns="http://www.w3.org/1999/xhtml" xml:lang="en-us">
<head>
<title>
Just-released Minecraft exploit makes it easy to crash game servers | Ars Technica
</title>
<script type="text/javascript">
//<![CDATA[
ars = {"ASSETS":"https:\/\/cdn.arstechnica.net\/wp-content\/themes\/ars\/assets","HOME_URL":"https:\/\/arstechnica.com","LOGIN_URL":"https:\/\/arstechnica.com\/services\/login-desktop.html?v=1","CIVIS":"\/civis","THEME":"light","VIEW":"grid","MOBILE":false,"SUBSCRIBER":false,"PLUS_PLUS":false,"LOGGED":false,"USER_ID":null,"ENV":"production","AD":{"tags":["denial-of-service-attack","exploits","minecraft","vulnerabilities"],"channel":"information-technology","slug":"just-released-minecraft-exploit-makes-it-easy-to-crash-game-servers","template_type":"article","queue":[],"server":"production"},"TOTAL":97063,"UNREAD":0,"RECENT":[1698939,1698645,1698804,1698783,1698769,1698663,1698682,1698690,1698667,1698588,1698619,1697597,1698183,1698597,1698540,1698542,1698370,1698442,1698274,1698421,1698346,1698367,1698356,1698294,1698335],"LOGINS":true,"CROSS":false,"PARSELY":"arstechnica.com","COMMENTS":false,"HOMEPAGE":false,"SITE":1,"READY":[],"SHOW_ADS":true,"IMG_PROXY":"https:\/\/cdn.arstechnica.net\/i\/","CATEGORY":"information-technology","PAGETITLE":"","ZEN_MODE":false};
//]]>
</script>
<link rel="stylesheet" type="text/css" media="all" href="https://cdn.arstechnica.net/wp-content/themes/ars/assets/css/main-130fcfcce0.css" />
<link rel="alternate" type="application/rss+xml" href="http://feeds.arstechnica.com/arstechnica/index/" />
<link rel="shortcut icon" href="https://cdn.arstechnica.net/favicon.ico" />
<link rel="icon" type="image/x-icon" href="https://cdn.arstechnica.net/favicon.ico" />
<link rel="apple-touch-icon" sizes="180x180" href="https://cdn.arstechnica.net/wp-content/themes/ars/assets/img/ars-ios-icon-d9a45f558c.png" />
<link rel="mask-icon" href="https://cdn.arstechnica.net/wp-content/themes/ars/assets/img/ars-macos-safari-8997f76b21.svg" color="#ff4e00" />
<link rel="icon" sizes="192x192" href="https://cdn.arstechnica.net/wp-content/themes/ars/assets/img/material-ars-db41652381.png" />
<meta name="application-name" content="Ars Technica" />
<meta name="msapplication-starturl" content="http://arstechnica.com/" />
<meta name="msapplication-tooltip" content="Ars Technica: Serving the technologist for 1.2 decades" />
<meta name="msapplication-task" content="name=News;action-uri=http://arstechnica.com/;icon-uri=https://cdn.arstechnica.net/favicon.ico" />
<meta name="msapplication-task" content="name=Features;action-uri=http://arstechnica.com/features/;icon-uri=https://cdn.arstechnica.net/ie-jump-menu/jump-features.ico" />
<meta name="msapplication-task" content="name=OpenForum;action-uri=http://arstechnica.com/civis/;icon-uri=https://cdn.arstechnica.net/ie-jump-menu/jump-forum.ico" />
<meta name="msapplication-task" content="name=Subscribe;action-uri=http://arstechnica.com/subscriptions/;icon-uri=https://cdn.arstechnica.net/ie-jump-menu/jump-subscribe.ico" />
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="advertising" content="ask" />
<meta property="fb:admins" content="592156917" />
<meta property="fb:admins" content="108943" />
<meta property="fb:pages" content="19374573752" />
<meta name="format-detection" content="telephone=no" />
<meta name="theme-color" content="#000000" />
<meta name="viewport" content="width=device-width,initial-scale=1" /><!-- cache miss 581:single/meta:5a5daf59fa5245a64fe8615caa0b1d1b -->
<meta name="parsely-page" content="{&quot;title&quot;:&quot;Just-released Minecraft exploit makes it easy to crash game servers&quot;,&quot;link&quot;:&quot;https:\/\/arstechnica.com\/information-technology\/2015\/04\/just-released-minecraft-exploit-makes-it-easy-to-crash-game-servers\/&quot;,&quot;type&quot;:&quot;post&quot;,&quot;author&quot;:&quot;Dan Goodin&quot;,&quot;post_id&quot;:648287,&quot;pub_date&quot;:&quot;2015-04-16T20:02:01Z&quot;,&quot;section&quot;:&quot;Biz &amp; IT&quot;,&quot;tags&quot;:[&quot;denial-of-service-attack&quot;,&quot;exploits&quot;,&quot;minecraft&quot;,&quot;vulnerabilities&quot;,&quot;type: report&quot;],&quot;image_url&quot;:&quot;https:\/\/cdn.arstechnica.net\/wp-content\/uploads\/2015\/04\/server-crash-150x150.jpg&quot;}" />
<meta name="parsely-metadata" content="{&quot;type&quot;:&quot;report&quot;,&quot;title&quot;:&quot;Just-released Minecraft exploit makes it easy to crash game servers&quot;,&quot;post_id&quot;:648287,&quot;lower_deck&quot;:&quot;Two-year-old bug exposes thousands of servers to crippling attack.&quot;,&quot;image_url&quot;:&quot;https:\/\/cdn.arstechnica.net\/wp-content\/uploads\/2015\/04\/server-crash-150x150.jpg&quot;,&quot;listing_image_url&quot;:&quot;https:\/\/cdn.arstechnica.net\/wp-content\/uploads\/2015\/04\/server-crash-300x150.jpg&quot;}" />
<link rel="canonical" href="https://arstechnica.com/information-technology/2015/04/just-released-minecraft-exploit-makes-it-easy-to-crash-game-servers/" />
<link rel="amphtml" href="https://arstechnica.com/information-technology/2015/04/just-released-minecraft-exploit-makes-it-easy-to-crash-game-servers/?amp=1" />
<link rel="shorturl" href="https://arstechnica.com/?p=648287" />
<meta name="description" content="Two-year-old bug exposes thousands of servers to crippling attack." />
<meta name="twitter:card" content="summary_large_image" />
<meta name="twitter:url" content="https://arstechnica.com/information-technology/2015/04/just-released-minecraft-exploit-makes-it-easy-to-crash-game-servers/" />
<meta name="twitter:title" content="Just-released Minecraft exploit makes it easy to crash game servers" />
<meta name="twitter:description" content="Two-year-old bug exposes thousands of servers to crippling attack." />
<meta name="twitter:site" content="@arstechnica" />
<meta name="twitter:domain" content="arstechnica.com" />
<meta property="og:site_name" content="Ars Technica" />
<meta name="twitter:image:src" content="https://cdn.arstechnica.net/wp-content/uploads/2015/04/server-crash-640x215.jpg" />
<meta name="twitter:image:width" content="640" />
<meta name="twitter:image:height" content="215" />
<meta name="twitter:creator" content="@dangoodin001" />
<meta property="og:url" content="https://arstechnica.com/information-technology/2015/04/just-released-minecraft-exploit-makes-it-easy-to-crash-game-servers/" />
<meta property="og:title" content="Just-released Minecraft exploit makes it easy to crash game servers" />
<meta property="og:image" content="https://cdn.arstechnica.net/wp-content/uploads/2015/04/server-crash-640x215.jpg" />
<meta property="og:description" content="Two-year-old bug exposes thousands of servers to crippling attack." />
<meta property="og:type" content="article" /><!-- cache hit 581:single/header:5a5daf59fa5245a64fe8615caa0b1d1b -->
<!-- Google Tag Manager DataLayer -->
<script>
<![CDATA[
window.dataLayer = window.dataLayer || [];
window.dataLayer.push({"event":"data-layer-loaded","user":{"ars_userId":undefined,"amg_userId":undefined,"uID":undefined,"sID":undefined,"loginStatus":false,"subscriberStatus":"none","infinityId":undefined,"registrationSource":undefined,"mdw_cnd_id":undefined,"monthlyVisits":undefined,"accessPaywall":undefined,"view":"grid","theme":"light","show_comments":false},"content":{"pageTemplate":"single","pageType":"article|report","contentCategory":"information-technology","section":"information technology","subsection":undefined,"contributor":"Dan Goodin","contentID":648287,"contentLength":835,"display":"Just-released Minecraft exploit makes it easy to crash game servers","contentSource":"web","pageAssets":undefined,"uniqueContentCount":undefined,"monthlyContentCount":undefined,"publishDate":"2015-04-16T20:02:01+00:00","modifiedDate":"2015-04-16T20:11:02+00:00","keywords":"denial of service attack|exploits|minecraft|vulnerabilities","dataSource":undefined},"marketing":{"campaignName":undefined,"circCampaignId":undefined,"internalCampaignId":undefined,"brand":"Ars Technica","certified_mrc_data":undefined,"condeNastId":undefined},"page":{"pID":undefined,"syndicatorUrl":undefined,"pageURL":"https:\/\/arstechnica.com\/?p=648287","canonical":"https:\/\/arstechnica.com\/information-technology\/2015\/04\/just-released-minecraft-exploit-makes-it-easy-to-crash-game-servers\/","canonicalPathName":"\/information-technology\/2015\/04\/just-released-minecraft-exploit-makes-it-easy-to-crash-game-servers\/"},"search":{"facets":undefined,"searchTerms":undefined},"site":{"appVersion":"1.0.0"}});
]]>
</script><!-- End Google Tag Manager DataLayer -->
<!-- Google Tag Manager -->
<script>
<![CDATA[
(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':
new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0],
j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src=
'https://www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f);
})(window,document,'script','dataLayer','GTM-NLXNPCQ');
]]>
</script><!-- End Google Tag Manager -->
<!-- OneTrust Cookies Consent Notice start -->
<script src="https://cdn.cookielaw.org/scripttemplates/otSDKStub.js" type="text/javascript" charset="UTF-8" data-domain-script="b10882a1-8446-4e7d-bfb2-ce2c770ad910"></script>
<script type="text/javascript">
//<![CDATA[
function OptanonWrapper(){};
//]]>
</script>
<script src="https://cdn.cookielaw.org/opt-out/otCCPAiab.js" type="text/javascript" charset="UTF-8" ccpa-opt-out-ids="C0002,C0003,C0004,C0005" ccpa-opt-out-geo="ca" ccpa-opt-out-lspa="true"></script><!-- OneTrust Cookies Consent Notice end -->
<script src="https://www.googletagservices.com/tag/js/gpt.js" id="gpt-script" async="async"></script>
<script>
<![CDATA[
window.googletag=window.googletag||{};window.googletag.cmd=window.googletag.cmd||[];window.cns=window.cns||{};window.cns.queue=[];window.cns.async=function(s,c){cns.queue.push({service:s,callback:c})};window.sparrowQueue=window.sparrowQueue||[];
]]>
</script>
<link rel="dns-prefetch" href="//aax.amazon-adsystem.com" />
<link rel="preconnect" href="//aax.amazon-adsystem.com" crossorigin="" />
<link rel="preconnect" href="https://mb.moatads.com" crossorigin="" />
<script src="https://c.amazon-adsystem.com/aax2/apstag.js" async="async"></script>
<script src="https://cdn.arstechnica.net/cns/prebid.min.js?v=1597375105"></script>
<script src="https://js-sec.indexww.com/ht/p/183973-93942139695505.js" async="async"></script>
<script src="https://z.moatads.com/condenastprebidheader987326845656/moatheader.js" async="async"></script>
<script>
<![CDATA[
window.cns.pageContext = {"contentType":"article","templateType":"article","channel":"information-technology","subChannel":"","slug":"just-released-minecraft-exploit-makes-it-easy-to-crash-game-servers","server":"production","keywords":{"tags":["denial-of-service-attack","exploits","minecraft","vulnerabilities"],"cm":[],"platform":["wordpress"],"copilotid":""}};
]]>
</script>
<script src="https://cdn.arstechnica.net/cns/ars-technica.min.js?v=1597375105"></script>
<script type="text/javascript" src="https://cdn.arstechnica.net/wp-content/themes/ars/assets/js/ars-32ecec341f.ads.us.js"></script>
</head>
<body class="post-template-default single single-post postid-648287 single-format-standard grid-view light blog-us">
<!-- Google Tag Manager (noscript) -->
<noscript><iframe src="https://www.googletagmanager.com/ns.html?id=GTM-NLXNPCQ" height="0" width="0" style="display:none;visibility:hidden"></iframe></noscript> <!-- End Google Tag Manager (noscript) -->
<aside class="ad ad_crown" aria-label="Top of page advertisement"></aside>
<div class="site-wrapper">
<a class="screen-reader-text skip-link" href="#main" aria-label="Skip to main content">Skip to main content</a>
<header class="site-header">
<div class="header-left">
<a href="https://arstechnica.com" id="header-logo" title="Ars Technica Homepage"></a>
</div>
<div class="header-right">
<nav id="header-nav-primary">
<ul>
<li>
<a class="nav-link section-information-technology active" href="/information-technology/">Biz &amp; IT</a>
</li>
<li>
<a class="nav-link section-gadgets" href="/gadgets/">Tech</a>
</li>
<li>
<a class="nav-link section-science" href="/science/">Science</a>
</li>
<li>
<a class="nav-link section-tech-policy" href="/tech-policy/">Policy</a>
</li>
<li>
<a class="nav-link section-cars" href="/cars/">Cars</a>
</li>
<li>
<a class="nav-link section-gaming" href="/gaming/">Gaming &amp; Culture</a>
</li>
<li>
<a class="nav-link store" href="/store/">Store</a>
</li>
<li>
<a class="nav-link forums" href="/civis/">Forums</a>
</li>
</ul>
</nav><a href="/store/product/subscriptions/" class="header-highlight-link">Subscribe</a>
<div class="dropdown" id="header-search">
<a href="/search/" class="dropdown-toggle search-toggle" aria-label="Search" aria-expanded="false"></a>
<div class="dropdown-content">
<form action="/search/" method="get" id="search_form" name="search_form">
<input type="hidden" name="ie" value="UTF-8" /> <input type="text" name="q" id="hdr_search_input" value="" aria-label="Search..." placeholder="Search..." />
</form><a class="nav-search-close">Close</a>
</div>
</div>
<div class="dropdown dropdown-mega" id="header-burger">
<a href="#site-menu" class="dropdown-toggle" aria-label="Menu" aria-expanded="false"></a>
<div id="site-menu" class="dropdown-content">
<section class="burger-navigate">
<h3>
Navigate
</h3>
<ul>
<li>
<a class="nav-link store" href="/store/">Store</a>
</li>
<li>
<a class="nav-link subscribe" href="/store/product/subscriptions/">Subscribe</a>
</li>
<li>
<a class="nav-link videos" href="http://video.arstechnica.com/">Videos</a>
</li>
<li>
<a class="nav-link section-features" href="/features/">Features</a>
</li>
<li>
<a class="nav-link section-reviews" href="/reviews/">Reviews</a>
</li>
</ul>
<ul>
<li>
<a class="nav-link page-rss-feeds" href="/rss-feeds/">RSS Feeds</a>
</li>
<li>
<a class="nav-link mobile" href="/?view=mobile">Mobile Site</a>
</li>
</ul>
<ul>
<li>
<a class="nav-link page-about-us" href="/about-us/">About Ars</a>
</li>
<li>
<a class="nav-link page-staff-directory" href="/staff-directory/">Staff Directory</a>
</li>
<li>
<a class="nav-link page-contact-us" href="/contact-us/">Contact Us</a>
</li>
</ul>
<ul>
<li>
<a class="nav-link page-advertise-with-us" href="/advertise-with-us/">Advertise with Ars</a>
</li>
<li>
<a class="nav-link page-reprints" href="/reprints/">Reprints</a>
</li>
</ul>
</section>
<section class="burger-filter">
<h3>
Filter by topic
</h3>
<ul id="burger-nav-primary">
<li>
<a class="nav-link section-information-technology active" href="/information-technology/">Biz &amp; IT</a>
</li>
<li>
<a class="nav-link section-gadgets" href="/gadgets/">Tech</a>
</li>
<li>
<a class="nav-link section-science" href="/science/">Science</a>
</li>
<li>
<a class="nav-link section-tech-policy" href="/tech-policy/">Policy</a>
</li>
<li>
<a class="nav-link section-cars" href="/cars/">Cars</a>
</li>
<li>
<a class="nav-link section-gaming" href="/gaming/">Gaming &amp; Culture</a>
</li>
<li>
<a class="nav-link store" href="/store/">Store</a>
</li>
<li>
<a class="nav-link forums" href="/civis/">Forums</a>
</li>
</ul>
</section>
<section class="burger-settings">
<h3>
Settings
</h3>
<div>
<div class="burger-layout">
<p>
Front page layout
</p>
<div class="burger-layout-grid">
<a rel="nofollow" href="http://arstechnica.com/information-technology/2015/04/just-released-minecraft-exploit-makes-it-easy-to-crash-game-servers/?view=grid" class=""><br />
Grid
<div class="faux-radio active"></div></a>
</div>
<div class="burger-layout-list">
<a rel="nofollow" href="http://arstechnica.com/information-technology/2015/04/just-released-minecraft-exploit-makes-it-easy-to-crash-game-servers/?view=archive" class=""><br />
List
<div class="faux-radio"></div></a>
</div>
</div>
<div class="burger-theme">
<p>
Site theme
</p>
<div class="burger-theme-light">
<a rel="nofollow" href="http://arstechnica.com/information-technology/2015/04/just-released-minecraft-exploit-makes-it-easy-to-crash-game-servers/?theme=light" class=""><span><span>Black on white</span></span>
<div class="faux-radio active"></div></a>
</div>
<div class="burger-theme-dark">
<a rel="nofollow" href="http://arstechnica.com/information-technology/2015/04/just-released-minecraft-exploit-makes-it-easy-to-crash-game-servers/?theme=dark" class=""><span><span>White on black</span></span>
<div class="faux-radio"></div></a>
</div>
</div>
</div>
</section>
</div>
</div>
<div class="dropdown dropdown-mega" id="header-account">
<a href="https://arstechnica.com/civis/ucp.php?mode=login&amp;return_to=%2Finformation-technology%2F2015%2F04%2Fjust-released-minecraft-exploit-makes-it-easy-to-crash-game-servers%2F" class="dropdown-toggle" aria-expanded="false">Sign in</a>
<div class="dropdown-content">
<section class="profile-activity">
<h3>
Comment activity
</h3>
<p>
Sign up or login to join the discussions!
</p>
</section>
<section class="profile-settings">
<form id="login-form" action="https://arstechnica.com/civis/ucp.php?mode=login" method="post" name="login-form">
<input type="text" name="username" id="username" placeholder="Username or Email" aria-label="Username or Email" /> <input type="password" name="password" id="password" placeholder="Password" aria-label="Password" /> <input type="submit" value="Submit" class="button button-orange button-wide" name="login" /> <label id="remember-label"><input type="checkbox" name="autologin" id="autologin" /> Stay logged in</label> <span>|</span> <a href="/civis/ucp.php?mode=sendpassword">Having trouble?</a> <input type="hidden" name="redirect" value="./ucp.php?mode=login&amp;autoredirect=1&amp;return_to=%2Finformation-technology%2F2015%2F04%2Fjust-released-minecraft-exploit-makes-it-easy-to-crash-game-servers%2F" /> <input type="hidden" name="return_to" value="/information-technology/2015/04/just-released-minecraft-exploit-makes-it-easy-to-crash-game-servers/" />
</form>
<div class="register-account">
<span>Sign up to comment and more</span> <a href="https://arstechnica.com/civis/ucp.php?mode=register" class="signup-btn button button-wide">Sign up</a>
</div>
</section>
</div>
</div>
</div>
</header>
<main id="main" class="content-wrapper">
<script type="text/javascript">
//<![CDATA[
ars.ARTICLE = {"url":"https:\/\/arstechnica.com\/information-technology\/2015\/04\/just-released-minecraft-exploit-makes-it-easy-to-crash-game-servers\/","short_url":"https:\/\/arstechnica.com\/?p=648287","title":"Just-released Minecraft exploit makes it easy to crash game servers","author":329388,"id":648287,"topic":1280621,"pages":1,"current_page":1,"superscroll":false,"promoted":[],"single_page":false,"comments":75,"fullwidth":false,"slug":"just-released-minecraft-exploit-makes-it-easy-to-crash-game-servers","arsStaff":{"104481":{"name":"Aaron Zimmerman","title":"Copyeditor","staff":true},"1002":{"name":"Aurich Lawson","title":"Creative Director","staff":true},"509873":{"name":"Beth Mole","title":"Health Reporter","staff":true},"453791":{"name":"Cathleen O'Grady","title":"Contributing science reporter","staff":true},"102179":{"name":"Chris Lee","title":"Associate writer","staff":true},"821742":{"name":"Corey Gaskin","title":"Senior Commerce Writer","staff":true},"329388":{"name":"Dan Goodin","title":"Security Editor","staff":true},"254631":{"name":"Diana Gitig","title":"Associate Writer","staff":false},"25862":{"name":"Eric Bangeman","title":"Managing Editor","staff":true},"512413":{"name":"Eric Berger","title":"Senior Space Editor","staff":true},"46707":{"name":"Iljitsch van Beijnum","title":"Associate Writer","staff":false},"316010":{"name":"Jason Marlin","title":"Technical Director","staff":true},"746799":{"name":"Jennifer Ouellette","title":"Senior Writer","staff":true},"15365":{"name":"Jeremy Reimer","title":"Senior Niche Technology Historian","staff":false},"4086":{"name":"Jim Salter","title":"Technology Reporter","staff":true},"52979":{"name":"John Timmer","title":"Senior Science Editor","staff":true},"312082":{"name":"Jon Brodkin","title":"Senior IT Reporter","staff":true},"14317":{"name":"Jonathan M. Gitlin","title":"Automotive Editor","staff":true},"786739":{"name":"Kate Cox","title":"Tech Policy Reporter","staff":true},"998":{"name":"Ken Fisher","title":"Editor in Chief","staff":true},"440179":{"name":"Kerry Staurseth","title":"Associate Copyeditor","staff":true},"328283":{"name":"Kyle Orland","title":"Senior Gaming Editor","staff":true},"10243":{"name":"Lee Hutchinson","title":"Senior Technology Editor","staff":true},"173191":{"name":"Matthew Lasar","title":"Associate writer","staff":true},"182268":{"name":"Nate Anderson","title":"Deputy Editor","staff":true},"330533":{"name":"Nathan Mattise","title":"Features Editor","staff":true},"1991":{"name":"Ohrmazd","title":"","staff":false},"391727":{"name":"Ron Amadeo","title":"Reviews Editor","staff":true},"348927":{"name":"Sam Machkovech","title":"Tech Culture Editor","staff":true},"588289":{"name":"Samuel Axon","title":"Senior Reviews Editor","staff":true},"294205":{"name":"Scott K. Johnson","title":"Associate Writer","staff":true},"671621":{"name":"Steven Klein","title":"Developer","staff":false},"173910":{"name":"Timothy B. Lee","title":"Senior tech policy reporter","staff":true}},"tags":["denial-of-service-attack","exploits","minecraft","vulnerabilities"],"zen_mode":false,"vote_sentiments":[{"sentiment_id":"1","sentiment":"agree","direction":"positive","icon":null,"label":"Agree"},{"sentiment_id":"3","sentiment":"interesting","direction":"positive","icon":null,"label":"Interesting"},{"sentiment_id":"5","sentiment":"funny","direction":"positive","icon":null,"label":"Funny"},{"sentiment_id":"6","sentiment":"addsto","direction":"positive","icon":null,"label":"Adds to Story"},{"sentiment_id":"7","sentiment":"disagree","direction":"negative","icon":null,"label":"Disagree"},{"sentiment_id":"8","sentiment":"inaccurate","direction":"negative","icon":null,"label":"Inaccurate"},{"sentiment_id":"11","sentiment":"pointless","direction":"negative","icon":null,"label":"Doesn't Contribute"},{"sentiment_id":"12","sentiment":"abusive","direction":"negative","icon":null,"label":"Abusive"}]};
//]]>
</script>
<article itemscope="itemscope" itemtype="http://schema.org/NewsArticle" class="article-single standalone intro-default" id="">
<div class="column-wrapper">
<div class="left-column">
<header class="article-header">
<h4 class="post-upperdek">
Biz &amp; IT —
</h4>
<h1 itemprop="headline">
Just-released <i>Minecraft</i> exploit makes it easy to crash game servers
</h1>
<h2 itemprop="description">
Two-year-old bug exposes thousands of servers to crippling attack.
</h2>
<section class="post-meta">
<p class="byline" itemprop="author creator" itemscope="itemscope" itemtype="http://schema.org/Person">
<a itemprop="url" href="https://arstechnica.com/author/dan-goodin/" rel="author"><span itemprop="name">Dan Goodin</span></a> - <time class="date" data-time="1429214521" datetime="2015-04-16T20:02:01+00:00">Apr 16, 2015 8:02 pm UTC</time>
</p>
</section>
</header>
<section class="article-guts">
<div itemprop="articleBody" class="article-content post-page">
<figure class="intro-image intro-left">
<img src="https://cdn.arstechnica.net/wp-content/uploads/2015/04/server-crash-640x426.jpg" alt="Just-released Minecraft exploit makes it easy to crash game servers" />
<figcaption class="caption">
<div class="caption-credit">
<a rel="nofollow" class="caption-link" href="https://en.wikipedia.org/wiki/Kernel_panic#/media/File:Kernel-panic.jpg">Kevin</a>
</div>
</figcaption>
</figure>
<aside id="social-left" class="social-left" aria-label="Read the comments or share this article">
<a title="51 posters participating" class="comment-count icon-comment-bubble-down" href="https://arstechnica.com/information-technology/2015/04/just-released-minecraft-exploit-makes-it-easy-to-crash-game-servers/?comments=1">
<h4 class="comment-count-before">
reader comments
</h4><span class="comment-count-number">75</span> <span class="visually-hidden">with 51 posters participating</span></a>
<div class="share-links">
<h4>
Share this story
</h4>
<ul>
<li>
<a href="https://www.facebook.com/sharer.php?u=https%3A%2F%2Farstechnica.com%2F%3Fpost_type%3Dpost%26p%3D648287" target="_blank" class="social-icon share-facebook" title="Share on Facebook"><span class="visually-hidden">Share on Facebook</span></a>
</li>
<li>
<a href="https://twitter.com/share?text=Just-released+%3Ci%3EMinecraft%3C%2Fi%3E+exploit+makes+it+easy+to+crash+game+servers&amp;url=https%3A%2F%2Farstechnica.com%2F%3Fpost_type%3Dpost%26p%3D648287" target="_blank" class="social-icon share-twitter" title="Share on Twitter"><span class="visually-hidden">Share on Twitter</span></a>
</li>
<li>
<a href="https://www.reddit.com/submit?url=https%3A%2F%2Farstechnica.com%2F%3Fpost_type%3Dpost%26p%3D648287&amp;title=Just-released+%3Ci%3EMinecraft%3C%2Fi%3E+exploit+makes+it+easy+to+crash+game+servers" target="_blank" class="social-icon share-reddit" title="Share on Reddit"><span class="visually-hidden">Share on Reddit</span></a>
</li>
</ul>
</div>
</aside><!-- cache miss 581:single/related:5a5daf59fa5245a64fe8615caa0b1d1b --><!-- empty -->
<p>
A flaw in the wildly popular online game <em>Minecraft</em> makes it easy for just about anyone to crash the server hosting the game, according to a computer programmer who has released proof-of-concept code that exploits the vulnerability.
</p>
<p>
"I thought a lot before writing this post," Pakistan-based developer Ammar Askar wrote in a <a href="http://blog.ammaraskar.com/minecraft-vulnerability-advisory">blog post published Thursday</a>, 21 months, he said, after privately reporting the bug to <em>Minecraft</em> developer Mojang. "On the one hand I don't want to expose thousands of servers to a major vulnerability, yet on the other hand Mojang has failed to act on it."
</p>
<p>
The bug resides in the <a href="https://github.com/ammaraskar/pyCraft">networking internals of the <em>Minecraft</em> protocol</a>. It allows the contents of inventory slots to be exchanged, so that, among other things, items in players' hotbars are displayed automatically after logging in. <em>Minecraft</em> items can also store arbitrary metadata in a file format known as <a href="http://wiki.vg/NBT">Named Binary Tag (NBT)</a>, which allows complex data structures to be kept in hierarchical nests. Askar has released <a href="https://github.com/ammaraskar/pyCraft/tree/nbt_exploit">proof-of-concept attack code</a> he said exploits the vulnerability to crash any server hosting the game. Here's how it works.
</p>
<blockquote>
<p>
The vulnerability stems from the fact that the client is allowed to send the server information about certain slots. This, coupled with the NBT formats nesting allows us to <em>craft</em> a packet that is incredibly complex for the server to deserialize but trivial for us to generate.
</p>
<p>
In my case, I chose to create lists within lists, down to five levels. This is a json representation of what it looks like.
</p>
<div class="highlight">
<pre><code class="language-javascript" data-lang="javascript"><span class="nx">rekt</span><span class="o">:</span> <span class="p">{</span>
<span class="nx">list</span><span class="o">:</span> <span class="p">[</span>
<span class="nx">list</span><span class="o">:</span> <span class="p">[</span>
<span class="nx">list</span><span class="o">:</span> <span class="p">[</span>
<span class="nx">list</span><span class="o">:</span> <span class="p">[</span>
<span class="nx">list</span><span class="o">:</span> <span class="p">[</span>
<span class="nx">list</span><span class="o">:</span> <span class="p">[</span>
<span class="p">]</span>
<span class="nx">list</span><span class="o">:</span> <span class="p">[</span>
<span class="p">]</span>
<span class="nx">list</span><span class="o">:</span> <span class="p">[</span>
<span class="p">]</span>
<span class="nx">list</span><span class="o">:</span> <span class="p">[</span>
<span class="p">]</span>
<span class="p">...</span>
<span class="p">]</span>
<span class="p">...</span>
<span class="p">]</span>
<span class="p">...</span>
<span class="p">]</span>
<span class="p">...</span>
<span class="p">]</span>
<span class="p">...</span>
<span class="p">]</span>
<span class="p">...</span>
<span class="p">}</span></code></pre>
</div>
<p>
The root of the object, <code>rekt</code>, contains 300 lists. Each list has a list with 10 sublists, and each of those sublists has 10 of their own, up until 5 levels of recursion. Thats a total of <code>10^5 * 300 = 30,000,000</code> lists.
</p>
<p>
And this isnt even the theoretical maximum for this attack. Just the nbt data for this payload is 26.6 megabytes. But luckily Minecraft implements a way to compress large packets, lucky us! zlib shrinks down our evil data to a mere 39 kilobytes.
</p>
<p>
Note: in previous versions of Minecraft, there was no protocol wide compression for big packets. Previously, NBT was sent compressed with gzip and prefixed with a signed short of its length, which reduced our maximum payload size to <code>2^15 - 1</code>. Now that the length is a varint capable of storing integers up to <code>2^28</code>, our potential for attack has increased significantly.
</p>
<p>
When the server will decompress our data, itll have 27 megs in a buffer somewhere in memory, but that isnt the bit thatll kill it. When it attempts to parse it into NBT, itll create java representations of the objects meaning suddenly, the sever is having to create several million java objects including ArrayLists. This runs the server out of memory and causes tremendous CPU load.
</p>
<p>
This vulnerability exists on almost all previous and current Minecraft versions as of 1.8.3, the packets used as attack vectors are the <a href="http://wiki.vg/Protocol#Player_Block_Placement">0x08: Block Placement Packet</a> and <a href="http://wiki.vg/Protocol#Creative_Inventory_Action">0x10: Creative Inventory Action</a>.
</p>
<p>
The fix for this vulnerability isnt exactly that hard, the client should never really send a data structure as complex as NBT of arbitrary size and if it must, some form of recursion and size limits should be implemented.
</p>
<p>
These were the fixes that I recommended to Mojang 2 years ago.
</p>
</blockquote>
<p>
Ars is asking Mojang for comment and will update this post if company officials respond.
</p>
<div id="action_button_container"></div>
</div>
</section>
</div>
<div class="xrail">
<div class="xrail-content">
<aside class="ad ad_xrail ad_xrail_top" aria-label="Top sidebar advertisement"></aside>
<aside class="ad_native ad_native_xrail" aria-label="Sidebar native advertisement"></aside>
</div>
</div>
</div>
<div class="column-wrapper">
<div class="left-column">
<div id="social-footer">
<a title="51 posters participating" class="comment-count icon-comment-bubble-down" href="https://arstechnica.com/information-technology/2015/04/just-released-minecraft-exploit-makes-it-easy-to-crash-game-servers/?comments=1">
<h4 class="comment-count-before">
reader comments
</h4><span class="comment-count-number">75</span> <span class="visually-hidden">with 51 posters participating</span></a>
<div class="share-links">
<h4>
Share this story
</h4>
<ul>
<li>
<a href="https://www.facebook.com/sharer.php?u=https%3A%2F%2Farstechnica.com%2F%3Fpost_type%3Dpost%26p%3D648287" target="_blank" class="social-icon share-facebook" title="Share on Facebook"><span class="visually-hidden">Share on Facebook</span></a>
</li>
<li>
<a href="https://twitter.com/share?text=Just-released+%3Ci%3EMinecraft%3C%2Fi%3E+exploit+makes+it+easy+to+crash+game+servers&amp;url=https%3A%2F%2Farstechnica.com%2F%3Fpost_type%3Dpost%26p%3D648287" target="_blank" class="social-icon share-twitter" title="Share on Twitter"><span class="visually-hidden">Share on Twitter</span></a>
</li>
<li>
<a href="https://www.reddit.com/submit?url=https%3A%2F%2Farstechnica.com%2F%3Fpost_type%3Dpost%26p%3D648287&amp;title=Just-released+%3Ci%3EMinecraft%3C%2Fi%3E+exploit+makes+it+easy+to+crash+game+servers" target="_blank" class="social-icon share-reddit" title="Share on Reddit"><span class="visually-hidden">Share on Reddit</span></a>
</li>
</ul>
</div>
</div><!-- cache hit 581:single/author:2814756d09510ff24ad530ca37a5a9a9 -->
<section class="article-author">
<a style="background-image:url('https://cdn.arstechnica.net/wp-content/uploads/2018/10/Dang.jpg');" class="author-photo" href="/author/dan-goodin" tabindex="-1" role="presentation" aria-hidden="true"></a>
<div class="author-bio">
<section class="author-bio-top">
<a href="/author/dan-goodin" class="author-name">Dan Goodin</a> Dan is the Security Editor at Ars Technica, which he joined in 2012 after working for The Register, the Associated Press, Bloomberg News, and other publications.
</section>
<section class="author-social">
<strong>Email</strong> <a href="mailto:dan.goodin@arstechnica.com">dan.goodin@arstechnica.com</a> <span class="slashes">//</span> <strong>Twitter</strong> <a href="https://www.twitter.com/dangoodin001" target="_blank">@dangoodin001</a>
</section>
</div>
</section>
</div>
<div class="xrail"></div>
</div>
<div id="article-footer-wrap">
<aside class="ad ad_fullwidth fullwidth" aria-label="Full width advertisement"></aside>
<section id="comments-area" class="comments-area column-wrapper">
<div class="row comments-row left-column">
<a name="comments-bar" id="comments-bar"></a>
<div id="comments-container"></div>
<div id="comments-posting-container" class="thick-divide-bottom">
<p id="reply">
You must <a href="https://arstechnica.com/civis/ucp.php?mode=login&amp;return_to/information-technology/2015/04/just-released-minecraft-exploit-makes-it-easy-to-crash-game-servers/" class="vote_login">login or create an account</a> to comment.
</p>
</div>
</div>
<div class="xrail xrail-comments">
<div class="xrail-content xrail-content-comments">
<aside class="ad ad_xrail ad_xrail_comments" aria-label="Comments sidebar advertisement"></aside>
</div>
</div>
</section>
<section class="inline-playlist">
<div class="ars-video-playlist">
<h3 class="ars-video-playlist-module-header">
Channel <span>Ars Technica</span>
</h3>
<div class="ars-video-playlist-module" data-playlist-id="arstechnica-channel-ars-information-technology" data-video-options="[]"></div>
</div>
</section>
<div class="prev-next-links">
<a href="https://arstechnica.com/tech-policy/2015/04/dozens-of-us-government-online-whistleblower-sites-not-secured-by-https/" rel="prev"><span class="arrow"></span> Previous story</a> <a href="https://arstechnica.com/gaming/2015/04/hidden-files-suggest-street-fighters-ryu-may-come-to-smash-bros/" rel="next">Next story <span class="arrow"></span></a>
</div>
<footer id="article-footer">
<div id="recommendations-footer">
<div id="story-recommendations">
<div class="heading-column">
<h3>
Related Stories
</h3>
</div>
<ul id="story-recs" class="rec-wrap"></ul>
</div>
<div id="sponsored-recommendations">
<div class="heading-column">
<h3>
Sponsored Stories
</h3><a href="http://www.outbrain.com/what-is/default/en" target="_blank">Powered by </a>
</div>
<ul id="outbrain-recs"></ul>
</div>
<div id="latest-stories">
<div class="heading-column">
<h3>
Today on Ars
</h3>
</div>
<ul id="latest-recs" class="rec-wrap"></ul>
</div>
</div>
</footer>
</div>
</article>
</main>
<footer class="site-footer">
<nav class="nav-footer">
<section>
<ul>
<li>
<a href="/store/">Store</a>
</li>
<li>
<a href="/store/product/subscriptions/">Subscribe</a>
</li>
<li>
<a href="/about-us/">About Us</a>
</li>
<li>
<a href="/rss-feeds/">RSS Feeds</a>
</li>
<li>
<a rel="nofollow" href="http://arstechnica.com/information-technology/2015/04/just-released-minecraft-exploit-makes-it-easy-to-crash-game-servers/?view=mobile">View Mobile Site</a>
</li>
</ul>
</section>
<section>
<ul>
<li>
<a href="/contact-us/">Contact Us</a>
</li>
<li>
<a href="/staff-directory/">Staff</a>
</li>
<li>
<a href="/advertise-with-us/">Advertise with us</a>
</li>
<li>
<a href="/reprints/">Reprints</a>
</li>
</ul>
</section>
<section class="footer-newsletter">
<div class="newsletter-wrapper">
<h3>
<a href="/newsletters/">Newsletter Signup</a>
</h3>
<p>
Join the Ars Orbital Transmission mailing list to get weekly updates delivered to your inbox.
</p><a class="button" href="/newsletters/">Sign me up →</a>
</div>
</section>
</nav>
<section class="footer-terms-logo">
<div class="cn-logo">
<a href="http://condenast.com/" class="icon icon-logo-cn-us" title="Visit Condé Nast"></a>
</div>
<p id="copyright-terms">
CNMN Collection<br />
WIRED Media Group<br />
© 2020 Condé Nast. All rights reserved. Use of and/or registration on any portion of this site constitutes acceptance of our <a href="https://www.condenast.com/user-agreement/">User Agreement</a> (updated 1/1/20) and <a href="https://www.condenast.com/privacy-policy/">Privacy Policy and Cookie Statement</a> (updated 1/1/20) and <a href="/amendment-to-conde-nast-user-agreement-privacy-policy/">Ars Technica Addendum</a> (effective 8/21/2018). Ars may earn compensation on sales from links on this site. <a href="/affiliate-link-policy/">Read our affiliate link policy</a>.<br />
<a href="https://www.condenast.com/privacy-policy/#california">Your California Privacy Rights</a> | <a id="ot-sdk-btn" class="ot-sdk-show-settings">Do Not Sell My Personal Information</a><br />
The material on this site may not be reproduced, distributed, transmitted, cached or otherwise used, except with the prior written permission of Condé Nast.<br />
<a href="https://www.condenast.com/online-behavioral-advertising-oba-and-how-to-opt-out-of-oba/#clickheretoreadmoreaboutonlinebehavioraladvertising(oba)">Ad Choices</a>
</p>
</section>
</footer>
</div>
<script type="text/javascript" src="https://cdn.arstechnica.net/wp-content/themes/ars/assets/js/main-fafdd8b108.js"></script> <!-- cache hit 581:single/javascript-footer:5a5daf59fa5245a64fe8615caa0b1d1b -->
<script async="async" type="application/javascript" src="https://embed.actionbutton.co/widget/widget.min.js"></script> <!-- Parse.ly start -->
<script id="parsely-cfg" src="//fpa-cdn.arstechnica.com/keys/arstechnica.com/p.js"></script> <!-- Parse.ly end -->
<script src="https://player.cnevids.com/interlude/arstechnica.js" async="async"></script>
<script id="conde-polar" src="https://cdn.mediavoice.com/nativeads/script/condenastcorporate/conde-asa-polar-master.js" async="async"></script>
<script>
<![CDATA[
(function () {
function DQ() {
var queue = window.sparrowQueue;
this.push = fn => fn();
window.sparrowQueue = this;
while (queue.length) {
queue.shift()();
}
}
function e(t, e) {
var n, a, o;
a = !1, n = document.createElement("script"), n.type = "text/javascript", n.src = t, n.onload = n.onreadystatechange = function () {
a || this.readyState && "complete" != this.readyState || (a = !0, e ? e() : !0)
}, o = document.getElementsByTagName("script")[0], o.parentNode.insertBefore(n, o)
}
if (location.search.indexOf('no_sparrow') < 0) {
e("https://pixel.condenastdigital.com/config/v2/production/ars-technica.config.js", function () {
e("https://pixel.condenastdigital.com/sparrow.min.js", function () {
if (window.SparrowConfigV2) {
window.sparrow = new window.Sparrow(window.SparrowConfigV2);
new DQ();
}
})
})
}
})();
]]>
</script>
<script type="text/javascript" src="//s.skimresources.com/js/100098X1555750.skimlinks.js"></script>
</body>
</html>

View File

@ -1,6 +0,0 @@
[
"http:\/\/fakehost\/test\/base\/foo\/bar\/baz.png",
"http:\/\/fakehost\/foo\/bar\/baz.png",
"http:\/\/test\/foo\/bar\/baz.png",
"https:\/\/test\/foo\/bar\/baz.png"
]

View File

@ -1,8 +0,0 @@
{
"Author": null,
"Direction": null,
"Excerpt": "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod\n tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,\n quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo\n consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse\n cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non\n proident, sunt in culpa qui officia deserunt mollit anim id est laborum.",
"Image": null,
"Title": "Base URL with base relative test",
"SiteName": null
}

View File

@ -1,34 +0,0 @@
<article>
<h2>Lorem</h2>
<p>
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</p>
<p>Links</p>
<p><a href="http://fakehost/test/base/foo/bar/baz.html">link</a></p>
<p><a href="http://fakehost/test/base/foo/bar/baz.html">link</a></p>
<p><a href="http://fakehost/foo/bar/baz.html">link</a></p>
<p><a href="#foo">link</a></p>
<p><a href="http://fakehost/test/base/baz.html#foo">link</a></p>
<p><a href="http://fakehost/foo/bar/baz.html#foo">link</a></p>
<p><a href="http://test/foo/bar/baz.html">link</a></p>
<p><a href="https://test/foo/bar/baz.html">link</a></p>
<p>Images</p>
<p><img src="http://fakehost/test/base/foo/bar/baz.png"></p>
<p><img src="http://fakehost/test/base/foo/bar/baz.png"></p>
<p><img src="http://fakehost/foo/bar/baz.png"></p>
<p><img src="http://test/foo/bar/baz.png"></p>
<p><img src="https://test/foo/bar/baz.png"></p>
<h2>Foo</h2>
<p>
Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</p>
</article>

View File

@ -1,44 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8"/>
<base href="base/"/>
<title>Base URL with base relative test</title>
</head>
<body>
<article>
<h1>Lorem</h1>
<div>
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</div>
<p>Links</p>
<p><a href="foo/bar/baz.html">link</a></p>
<p><a href="./foo/bar/baz.html">link</a></p>
<p><a href="/foo/bar/baz.html">link</a></p>
<p><a href="#foo">link</a></p>
<p><a href="baz.html#foo">link</a></p>
<p><a href="/foo/bar/baz.html#foo">link</a></p>
<p><a href="http://test/foo/bar/baz.html">link</a></p>
<p><a href="https://test/foo/bar/baz.html">link</a></p>
<p>Images</p>
<p><img src="foo/bar/baz.png"/></p>
<p><img src="./foo/bar/baz.png"/></p>
<p><img src="/foo/bar/baz.png"/></p>
<p><img src="http://test/foo/bar/baz.png"/></p>
<p><img src="https://test/foo/bar/baz.png"/></p>
<h2>Foo</h2>
<div>
Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</div>
</article>
</body>
</html>

View File

@ -1,5 +0,0 @@
[
"http:\/\/fakehost\/foo\/bar\/baz.png",
"http:\/\/test\/foo\/bar\/baz.png",
"https:\/\/test\/foo\/bar\/baz.png"
]

View File

@ -1,8 +0,0 @@
{
"Author": null,
"Direction": null,
"Excerpt": "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod\n tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,\n quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo\n consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse\n cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non\n proident, sunt in culpa qui officia deserunt mollit anim id est laborum.",
"Image": null,
"Title": "Base URL with base test",
"SiteName": null
}

View File

@ -1,34 +0,0 @@
<article>
<h2>Lorem</h2>
<p>
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</p>
<p>Links</p>
<p><a href="http://fakehost/foo/bar/baz.html">link</a></p>
<p><a href="http://fakehost/foo/bar/baz.html">link</a></p>
<p><a href="http://fakehost/foo/bar/baz.html">link</a></p>
<p><a href="#foo">link</a></p>
<p><a href="http://fakehost/baz.html#foo">link</a></p>
<p><a href="http://fakehost/foo/bar/baz.html#foo">link</a></p>
<p><a href="http://test/foo/bar/baz.html">link</a></p>
<p><a href="https://test/foo/bar/baz.html">link</a></p>
<p>Images</p>
<p><img src="http://fakehost/foo/bar/baz.png"></p>
<p><img src="http://fakehost/foo/bar/baz.png"></p>
<p><img src="http://fakehost/foo/bar/baz.png"></p>
<p><img src="http://test/foo/bar/baz.png"></p>
<p><img src="https://test/foo/bar/baz.png"></p>
<h2>Foo</h2>
<p>
Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</p>
</article>

View File

@ -1,44 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8"/>
<base href="/"/>
<title>Base URL with base test</title>
</head>
<body>
<article>
<h1>Lorem</h1>
<div>
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</div>
<p>Links</p>
<p><a href="foo/bar/baz.html">link</a></p>
<p><a href="./foo/bar/baz.html">link</a></p>
<p><a href="/foo/bar/baz.html">link</a></p>
<p><a href="#foo">link</a></p>
<p><a href="baz.html#foo">link</a></p>
<p><a href="/foo/bar/baz.html#foo">link</a></p>
<p><a href="http://test/foo/bar/baz.html">link</a></p>
<p><a href="https://test/foo/bar/baz.html">link</a></p>
<p>Images</p>
<p><img src="foo/bar/baz.png"/></p>
<p><img src="./foo/bar/baz.png"/></p>
<p><img src="/foo/bar/baz.png"/></p>
<p><img src="http://test/foo/bar/baz.png"/></p>
<p><img src="https://test/foo/bar/baz.png"/></p>
<h2>Foo</h2>
<div>
Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</div>
</article>
</body>
</html>

View File

@ -1,6 +0,0 @@
[
"http:\/\/fakehost\/test\/foo\/bar\/baz.png",
"http:\/\/fakehost\/foo\/bar\/baz.png",
"http:\/\/test\/foo\/bar\/baz.png",
"https:\/\/test\/foo\/bar\/baz.png"
]

View File

@ -1,8 +0,0 @@
{
"Author": null,
"Direction": null,
"Excerpt": "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod\n tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,\n quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo\n consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse\n cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non\n proident, sunt in culpa qui officia deserunt mollit anim id est laborum.",
"Image": null,
"Title": "Base URL test",
"SiteName": null
}

View File

@ -1,34 +0,0 @@
<article>
<h2>Lorem</h2>
<p>
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</p>
<p>Links</p>
<p><a href="http://fakehost/test/foo/bar/baz.html">link</a></p>
<p><a href="http://fakehost/test/foo/bar/baz.html">link</a></p>
<p><a href="http://fakehost/foo/bar/baz.html">link</a></p>
<p><a href="#foo">link</a></p>
<p><a href="http://fakehost/test/baz.html#foo">link</a></p>
<p><a href="http://fakehost/foo/bar/baz.html#foo">link</a></p>
<p><a href="http://test/foo/bar/baz.html">link</a></p>
<p><a href="https://test/foo/bar/baz.html">link</a></p>
<p>Images</p>
<p><img src="http://fakehost/test/foo/bar/baz.png"></p>
<p><img src="http://fakehost/test/foo/bar/baz.png"></p>
<p><img src="http://fakehost/foo/bar/baz.png"></p>
<p><img src="http://test/foo/bar/baz.png"></p>
<p><img src="https://test/foo/bar/baz.png"></p>
<h2>Foo</h2>
<p>
Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</p>
</article>

View File

@ -1,43 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8"/>
<title>Base URL test</title>
</head>
<body>
<article>
<h1>Lorem</h1>
<div>
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</div>
<p>Links</p>
<p><a href="foo/bar/baz.html">link</a></p>
<p><a href="./foo/bar/baz.html">link</a></p>
<p><a href="/foo/bar/baz.html">link</a></p>
<p><a href="#foo">link</a></p>
<p><a href="baz.html#foo">link</a></p>
<p><a href="/foo/bar/baz.html#foo">link</a></p>
<p><a href="http://test/foo/bar/baz.html">link</a></p>
<p><a href="https://test/foo/bar/baz.html">link</a></p>
<p>Images</p>
<p><img src="foo/bar/baz.png"/></p>
<p><img src="./foo/bar/baz.png"/></p>
<p><img src="/foo/bar/baz.png"/></p>
<p><img src="http://test/foo/bar/baz.png"/></p>
<p><img src="https://test/foo/bar/baz.png"/></p>
<h2>Foo</h2>
<div>
Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
</div>
</article>
</body>
</html>

View File

@ -1,8 +0,0 @@
{
"Author": null,
"Direction": null,
"Excerpt": "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod\n tempor incididunt ut labore et dolore magna aliqua.",
"Image": null,
"Title": "Basic tag cleaning test",
"SiteName": null
}

View File

@ -1,18 +0,0 @@
<div>
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua.</p>
<p>Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat.</p>
<p>Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
</div><div>
<p>Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat.</p>
<p>Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
</div>

View File

@ -1,36 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8"/>
<title>Basic tag cleaning test</title>
</head>
<body>
<article>
<h1>Lorem</h1>
<div>
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
tempor incididunt ut labore et dolore magna aliqua.</p>
<p>Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat.</p>
<iframe src="about:blank">Iframe fallback test</iframe>
<p>Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
</div>
<h2>Foo</h2>
<div>
<p>Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat.</p>
<object data="foo.swf" type="application/x-shockwave-flash" width="88" height="31">
<param movie="foo.swf" />
</object>
<embed src="foo.swf"/>
<p>Duis aute irure dolor in reprehenderit in voluptate velit esse
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
</div>
</article>
</body>
</html>

Some files were not shown because too many files have changed in this diff Show More