Revert "remove htmlpurifier"

This reverts commit c21a462d52.
This commit is contained in:
Andrew Dolgov 2012-06-05 21:52:21 +04:00
parent 705b97b7fc
commit 010efc9b81
356 changed files with 26185 additions and 4 deletions

0
cache/htmlpurifier/.empty vendored Normal file
View File

View File

@ -21,6 +21,10 @@ case "$1" in
install -d -m775 -gwww-data /var/lock/tt-rss
fi
if [ ! -d /var/cache/tt-rss/htmlpurifier ]; then
install -d -m775 -gwww-data /var/cache/tt-rss/htmlpurifier
fi
if [ ! -d /var/cache/tt-rss/magpie ]; then
install -d -m775 -gwww-data /var/cache/tt-rss/magpie
fi

View File

@ -100,6 +100,8 @@
require_once 'lib/pubsubhubbub/publisher.php';
$purifier = false;
$tz_offset = -1;
$utc_tz = new DateTimeZone('UTC');
$schema_version = false;
@ -2632,17 +2634,36 @@
}
function sanitize($link, $str, $force_strip_tags = false, $owner = false, $site_url = false) {
global $purifier;
if (!$owner) $owner = $_SESSION["uid"];
$res = trim($str); if (!$res) return '';
// TODO implement better HTML tag stripping and XSS protection
// create global Purifier object if needed
if (!$purifier) {
require_once 'lib/htmlpurifier/library/HTMLPurifier.auto.php';
if (function_exists('filter_var')) {
$res = filter_var($res, FILTER_SANITIZE_STRING);
$config = HTMLPurifier_Config::createDefault();
$allowed = "p,a[href],i,em,b,strong,code,pre,blockquote,br,img[src|alt|title|align|hspace],ul,ol,li,h1,h2,h3,h4,s,object[classid|type|id|name|width|height|codebase],param[name|value],table,tr,td,span[class]";
$config->set('HTML.SafeObject', true);
@$config->set('HTML', 'Allowed', $allowed);
$config->set('Output.FlashCompat', true);
$config->set('Attr.EnableID', true);
if (!defined('MOBILE_VERSION')) {
@$config->set('Cache', 'SerializerPath', CACHE_DIR . "/htmlpurifier");
} else {
@$config->set('Cache', 'SerializerPath', "../" . CACHE_DIR . "/htmlpurifier");
}
$config->set('Filter.YouTube', true);
$purifier = new HTMLPurifier($config);
}
$res = strip_tags($str, "<p><a><i><em><b><strong><code><pre><blockquote><br><img><ul><ol><li><h1><h2><h3><h4><s><object><param><table><tr><td><span>");
$res = $purifier->purify($res);
if (get_pref($link, "STRIP_IMAGES", $owner)) {
$res = preg_replace('/<img[^>]+>/is', '', $res);

View File

@ -23,6 +23,12 @@
$array_push($errors, "Configuration file (config.php) has incorrect version. Update it with new options from config.php-dist and set CONFIG_VERSION to the correct value.");
}
$purifier_cache_dir = CACHE_DIR . "/htmlpurifier";
if (!is_writable($purifier_cache_dir)) {
array_push($errors, "HTMLPurifier cache directory should be writable by anyone (chmod -R 777 $purifier_cache_dir)");
}
if (!is_writable(CACHE_DIR . "/images")) {
array_push($errors, "Image cache is not writable (chmod -R 777 ".CACHE_DIR."/images)");
}

9
lib/htmlpurifier/CREDITS Normal file
View File

@ -0,0 +1,9 @@
CREDITS
Almost everything written by Edward Z. Yang (Ambush Commander). Lots of thanks
to the DevNetwork Community for their help (see docs/ref-devnetwork.html for
more details), Feyd especially (namely IPv6 and optimization). Thanks to RSnake
for letting me package his fantastic XSS cheatsheet for a smoketest.
vim: et sw=4 sts=4

504
lib/htmlpurifier/LICENSE Normal file
View File

@ -0,0 +1,504 @@
GNU LESSER GENERAL PUBLIC LICENSE
Version 2.1, February 1999
Copyright (C) 1991, 1999 Free Software Foundation, Inc.
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
[This is the first released version of the Lesser GPL. It also counts
as the successor of the GNU Library Public License, version 2, hence
the version number 2.1.]
Preamble
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General Public
Licenses are intended to guarantee your freedom to share and change
free software--to make sure the software is free for all its users.
This license, the Lesser General Public License, applies to some
specially designated software packages--typically libraries--of the
Free Software Foundation and other authors who decide to use it. You
can use it too, but we suggest you first think carefully about whether
this license or the ordinary General Public License is the better
strategy to use in any particular case, based on the explanations below.
When we speak of free software, we are referring to freedom of use,
not price. Our General Public Licenses are designed to make sure that
you have the freedom to distribute copies of free software (and charge
for this service if you wish); that you receive source code or can get
it if you want it; that you can change the software and use pieces of
it in new free programs; and that you are informed that you can do
these things.
To protect your rights, we need to make restrictions that forbid
distributors to deny you these rights or to ask you to surrender these
rights. These restrictions translate to certain responsibilities for
you if you distribute copies of the library or if you modify it.
For example, if you distribute copies of the library, whether gratis
or for a fee, you must give the recipients all the rights that we gave
you. You must make sure that they, too, receive or can get the source
code. If you link other code with the library, you must provide
complete object files to the recipients, so that they can relink them
with the library after making changes to the library and recompiling
it. And you must show them these terms so they know their rights.
We protect your rights with a two-step method: (1) we copyright the
library, and (2) we offer you this license, which gives you legal
permission to copy, distribute and/or modify the library.
To protect each distributor, we want to make it very clear that
there is no warranty for the free library. Also, if the library is
modified by someone else and passed on, the recipients should know
that what they have is not the original version, so that the original
author's reputation will not be affected by problems that might be
introduced by others.
Finally, software patents pose a constant threat to the existence of
any free program. We wish to make sure that a company cannot
effectively restrict the users of a free program by obtaining a
restrictive license from a patent holder. Therefore, we insist that
any patent license obtained for a version of the library must be
consistent with the full freedom of use specified in this license.
Most GNU software, including some libraries, is covered by the
ordinary GNU General Public License. This license, the GNU Lesser
General Public License, applies to certain designated libraries, and
is quite different from the ordinary General Public License. We use
this license for certain libraries in order to permit linking those
libraries into non-free programs.
When a program is linked with a library, whether statically or using
a shared library, the combination of the two is legally speaking a
combined work, a derivative of the original library. The ordinary
General Public License therefore permits such linking only if the
entire combination fits its criteria of freedom. The Lesser General
Public License permits more lax criteria for linking other code with
the library.
We call this license the "Lesser" General Public License because it
does Less to protect the user's freedom than the ordinary General
Public License. It also provides other free software developers Less
of an advantage over competing non-free programs. These disadvantages
are the reason we use the ordinary General Public License for many
libraries. However, the Lesser license provides advantages in certain
special circumstances.
For example, on rare occasions, there may be a special need to
encourage the widest possible use of a certain library, so that it becomes
a de-facto standard. To achieve this, non-free programs must be
allowed to use the library. A more frequent case is that a free
library does the same job as widely used non-free libraries. In this
case, there is little to gain by limiting the free library to free
software only, so we use the Lesser General Public License.
In other cases, permission to use a particular library in non-free
programs enables a greater number of people to use a large body of
free software. For example, permission to use the GNU C Library in
non-free programs enables many more people to use the whole GNU
operating system, as well as its variant, the GNU/Linux operating
system.
Although the Lesser General Public License is Less protective of the
users' freedom, it does ensure that the user of a program that is
linked with the Library has the freedom and the wherewithal to run
that program using a modified version of the Library.
The precise terms and conditions for copying, distribution and
modification follow. Pay close attention to the difference between a
"work based on the library" and a "work that uses the library". The
former contains code derived from the library, whereas the latter must
be combined with the library in order to run.
GNU LESSER GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License Agreement applies to any software library or other
program which contains a notice placed by the copyright holder or
other authorized party saying it may be distributed under the terms of
this Lesser General Public License (also called "this License").
Each licensee is addressed as "you".
A "library" means a collection of software functions and/or data
prepared so as to be conveniently linked with application programs
(which use some of those functions and data) to form executables.
The "Library", below, refers to any such software library or work
which has been distributed under these terms. A "work based on the
Library" means either the Library or any derivative work under
copyright law: that is to say, a work containing the Library or a
portion of it, either verbatim or with modifications and/or translated
straightforwardly into another language. (Hereinafter, translation is
included without limitation in the term "modification".)
"Source code" for a work means the preferred form of the work for
making modifications to it. For a library, complete source code means
all the source code for all modules it contains, plus any associated
interface definition files, plus the scripts used to control compilation
and installation of the library.
Activities other than copying, distribution and modification are not
covered by this License; they are outside its scope. The act of
running a program using the Library is not restricted, and output from
such a program is covered only if its contents constitute a work based
on the Library (independent of the use of the Library in a tool for
writing it). Whether that is true depends on what the Library does
and what the program that uses the Library does.
1. You may copy and distribute verbatim copies of the Library's
complete source code as you receive it, in any medium, provided that
you conspicuously and appropriately publish on each copy an
appropriate copyright notice and disclaimer of warranty; keep intact
all the notices that refer to this License and to the absence of any
warranty; and distribute a copy of this License along with the
Library.
You may charge a fee for the physical act of transferring a copy,
and you may at your option offer warranty protection in exchange for a
fee.
2. You may modify your copy or copies of the Library or any portion
of it, thus forming a work based on the Library, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:
a) The modified work must itself be a software library.
b) You must cause the files modified to carry prominent notices
stating that you changed the files and the date of any change.
c) You must cause the whole of the work to be licensed at no
charge to all third parties under the terms of this License.
d) If a facility in the modified Library refers to a function or a
table of data to be supplied by an application program that uses
the facility, other than as an argument passed when the facility
is invoked, then you must make a good faith effort to ensure that,
in the event an application does not supply such function or
table, the facility still operates, and performs whatever part of
its purpose remains meaningful.
(For example, a function in a library to compute square roots has
a purpose that is entirely well-defined independent of the
application. Therefore, Subsection 2d requires that any
application-supplied function or table used by this function must
be optional: if the application does not supply it, the square
root function must still compute square roots.)
These requirements apply to the modified work as a whole. If
identifiable sections of that work are not derived from the Library,
and can be reasonably considered independent and separate works in
themselves, then this License, and its terms, do not apply to those
sections when you distribute them as separate works. But when you
distribute the same sections as part of a whole which is a work based
on the Library, the distribution of the whole must be on the terms of
this License, whose permissions for other licensees extend to the
entire whole, and thus to each and every part regardless of who wrote
it.
Thus, it is not the intent of this section to claim rights or contest
your rights to work written entirely by you; rather, the intent is to
exercise the right to control the distribution of derivative or
collective works based on the Library.
In addition, mere aggregation of another work not based on the Library
with the Library (or with a work based on the Library) on a volume of
a storage or distribution medium does not bring the other work under
the scope of this License.
3. You may opt to apply the terms of the ordinary GNU General Public
License instead of this License to a given copy of the Library. To do
this, you must alter all the notices that refer to this License, so
that they refer to the ordinary GNU General Public License, version 2,
instead of to this License. (If a newer version than version 2 of the
ordinary GNU General Public License has appeared, then you can specify
that version instead if you wish.) Do not make any other change in
these notices.
Once this change is made in a given copy, it is irreversible for
that copy, so the ordinary GNU General Public License applies to all
subsequent copies and derivative works made from that copy.
This option is useful when you wish to copy part of the code of
the Library into a program that is not a library.
4. You may copy and distribute the Library (or a portion or
derivative of it, under Section 2) in object code or executable form
under the terms of Sections 1 and 2 above provided that you accompany
it with the complete corresponding machine-readable source code, which
must be distributed under the terms of Sections 1 and 2 above on a
medium customarily used for software interchange.
If distribution of object code is made by offering access to copy
from a designated place, then offering equivalent access to copy the
source code from the same place satisfies the requirement to
distribute the source code, even though third parties are not
compelled to copy the source along with the object code.
5. A program that contains no derivative of any portion of the
Library, but is designed to work with the Library by being compiled or
linked with it, is called a "work that uses the Library". Such a
work, in isolation, is not a derivative work of the Library, and
therefore falls outside the scope of this License.
However, linking a "work that uses the Library" with the Library
creates an executable that is a derivative of the Library (because it
contains portions of the Library), rather than a "work that uses the
library". The executable is therefore covered by this License.
Section 6 states terms for distribution of such executables.
When a "work that uses the Library" uses material from a header file
that is part of the Library, the object code for the work may be a
derivative work of the Library even though the source code is not.
Whether this is true is especially significant if the work can be
linked without the Library, or if the work is itself a library. The
threshold for this to be true is not precisely defined by law.
If such an object file uses only numerical parameters, data
structure layouts and accessors, and small macros and small inline
functions (ten lines or less in length), then the use of the object
file is unrestricted, regardless of whether it is legally a derivative
work. (Executables containing this object code plus portions of the
Library will still fall under Section 6.)
Otherwise, if the work is a derivative of the Library, you may
distribute the object code for the work under the terms of Section 6.
Any executables containing that work also fall under Section 6,
whether or not they are linked directly with the Library itself.
6. As an exception to the Sections above, you may also combine or
link a "work that uses the Library" with the Library to produce a
work containing portions of the Library, and distribute that work
under terms of your choice, provided that the terms permit
modification of the work for the customer's own use and reverse
engineering for debugging such modifications.
You must give prominent notice with each copy of the work that the
Library is used in it and that the Library and its use are covered by
this License. You must supply a copy of this License. If the work
during execution displays copyright notices, you must include the
copyright notice for the Library among them, as well as a reference
directing the user to the copy of this License. Also, you must do one
of these things:
a) Accompany the work with the complete corresponding
machine-readable source code for the Library including whatever
changes were used in the work (which must be distributed under
Sections 1 and 2 above); and, if the work is an executable linked
with the Library, with the complete machine-readable "work that
uses the Library", as object code and/or source code, so that the
user can modify the Library and then relink to produce a modified
executable containing the modified Library. (It is understood
that the user who changes the contents of definitions files in the
Library will not necessarily be able to recompile the application
to use the modified definitions.)
b) Use a suitable shared library mechanism for linking with the
Library. A suitable mechanism is one that (1) uses at run time a
copy of the library already present on the user's computer system,
rather than copying library functions into the executable, and (2)
will operate properly with a modified version of the library, if
the user installs one, as long as the modified version is
interface-compatible with the version that the work was made with.
c) Accompany the work with a written offer, valid for at
least three years, to give the same user the materials
specified in Subsection 6a, above, for a charge no more
than the cost of performing this distribution.
d) If distribution of the work is made by offering access to copy
from a designated place, offer equivalent access to copy the above
specified materials from the same place.
e) Verify that the user has already received a copy of these
materials or that you have already sent this user a copy.
For an executable, the required form of the "work that uses the
Library" must include any data and utility programs needed for
reproducing the executable from it. However, as a special exception,
the materials to be distributed need not include anything that is
normally distributed (in either source or binary form) with the major
components (compiler, kernel, and so on) of the operating system on
which the executable runs, unless that component itself accompanies
the executable.
It may happen that this requirement contradicts the license
restrictions of other proprietary libraries that do not normally
accompany the operating system. Such a contradiction means you cannot
use both them and the Library together in an executable that you
distribute.
7. You may place library facilities that are a work based on the
Library side-by-side in a single library together with other library
facilities not covered by this License, and distribute such a combined
library, provided that the separate distribution of the work based on
the Library and of the other library facilities is otherwise
permitted, and provided that you do these two things:
a) Accompany the combined library with a copy of the same work
based on the Library, uncombined with any other library
facilities. This must be distributed under the terms of the
Sections above.
b) Give prominent notice with the combined library of the fact
that part of it is a work based on the Library, and explaining
where to find the accompanying uncombined form of the same work.
8. You may not copy, modify, sublicense, link with, or distribute
the Library except as expressly provided under this License. Any
attempt otherwise to copy, modify, sublicense, link with, or
distribute the Library is void, and will automatically terminate your
rights under this License. However, parties who have received copies,
or rights, from you under this License will not have their licenses
terminated so long as such parties remain in full compliance.
9. You are not required to accept this License, since you have not
signed it. However, nothing else grants you permission to modify or
distribute the Library or its derivative works. These actions are
prohibited by law if you do not accept this License. Therefore, by
modifying or distributing the Library (or any work based on the
Library), you indicate your acceptance of this License to do so, and
all its terms and conditions for copying, distributing or modifying
the Library or works based on it.
10. Each time you redistribute the Library (or any work based on the
Library), the recipient automatically receives a license from the
original licensor to copy, distribute, link with or modify the Library
subject to these terms and conditions. You may not impose any further
restrictions on the recipients' exercise of the rights granted herein.
You are not responsible for enforcing compliance by third parties with
this License.
11. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent issues),
conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot
distribute so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you
may not distribute the Library at all. For example, if a patent
license would not permit royalty-free redistribution of the Library by
all those who receive copies directly or indirectly through you, then
the only way you could satisfy both it and this License would be to
refrain entirely from distribution of the Library.
If any portion of this section is held invalid or unenforceable under any
particular circumstance, the balance of the section is intended to apply,
and the section as a whole is intended to apply in other circumstances.
It is not the purpose of this section to induce you to infringe any
patents or other property right claims or to contest validity of any
such claims; this section has the sole purpose of protecting the
integrity of the free software distribution system which is
implemented by public license practices. Many people have made
generous contributions to the wide range of software distributed
through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is willing
to distribute software through any other system and a licensee cannot
impose that choice.
This section is intended to make thoroughly clear what is believed to
be a consequence of the rest of this License.
12. If the distribution and/or use of the Library is restricted in
certain countries either by patents or by copyrighted interfaces, the
original copyright holder who places the Library under this License may add
an explicit geographical distribution limitation excluding those countries,
so that distribution is permitted only in or among countries not thus
excluded. In such case, this License incorporates the limitation as if
written in the body of this License.
13. The Free Software Foundation may publish revised and/or new
versions of the Lesser General Public License from time to time.
Such new versions will be similar in spirit to the present version,
but may differ in detail to address new problems or concerns.
Each version is given a distinguishing version number. If the Library
specifies a version number of this License which applies to it and
"any later version", you have the option of following the terms and
conditions either of that version or of any later version published by
the Free Software Foundation. If the Library does not specify a
license version number, you may choose any version ever published by
the Free Software Foundation.
14. If you wish to incorporate parts of the Library into other free
programs whose distribution conditions are incompatible with these,
write to the author to ask for permission. For software which is
copyrighted by the Free Software Foundation, write to the Free
Software Foundation; we sometimes make exceptions for this. Our
decision will be guided by the two goals of preserving the free status
of all derivatives of our free software and of promoting the sharing
and reuse of software generally.
NO WARRANTY
15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
DAMAGES.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Libraries
If you develop a new library, and you want it to be of the greatest
possible use to the public, we recommend making it free software that
everyone can redistribute and change. You can do so by permitting
redistribution under these terms (or, alternatively, under the terms of the
ordinary General Public License).
To apply these terms, attach the following notices to the library. It is
safest to attach them to the start of each source file to most effectively
convey the exclusion of warranty; and each file should have at least the
"copyright" line and a pointer to where the full notice is found.
<one line to give the library's name and a brief idea of what it does.>
Copyright (C) <year> <name of author>
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Also add information on how to contact you by electronic and paper mail.
You should also get your employer (if you work as a programmer) or your
school, if any, to sign a "copyright disclaimer" for the library, if
necessary. Here is a sample; alter the names:
Yoyodyne, Inc., hereby disclaims all copyright interest in the
library `Frob' (a library for tweaking knobs) written by James Random Hacker.
<signature of Ty Coon>, 1 April 1990
Ty Coon, President of Vice
That's all there is to it!
vim: et sw=4 sts=4

View File

@ -0,0 +1,11 @@
<?php
/**
* This is a stub include that automatically configures the include path.
*/
set_include_path(dirname(__FILE__) . PATH_SEPARATOR . get_include_path() );
require_once 'HTMLPurifier/Bootstrap.php';
require_once 'HTMLPurifier.autoload.php';
// vim: et sw=4 sts=4

View File

@ -0,0 +1,26 @@
<?php
/**
* @file
* Convenience file that registers autoload handler for HTML Purifier.
* It also does some sanity checks.
*/
if (function_exists('spl_autoload_register') && function_exists('spl_autoload_unregister')) {
// We need unregister for our pre-registering functionality
HTMLPurifier_Bootstrap::registerAutoload();
if (function_exists('__autoload')) {
// Be polite and ensure that userland autoload gets retained
spl_autoload_register('__autoload');
}
} elseif (!function_exists('__autoload')) {
function __autoload($class) {
return HTMLPurifier_Bootstrap::autoload($class);
}
}
if (ini_get('zend.ze1_compatibility_mode')) {
trigger_error("HTML Purifier is not compatible with zend.ze1_compatibility_mode; please turn it off", E_USER_ERROR);
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,23 @@
<?php
/**
* @file
* Defines a function wrapper for HTML Purifier for quick use.
* @note ''HTMLPurifier()'' is NOT the same as ''new HTMLPurifier()''
*/
/**
* Purify HTML.
* @param $html String HTML to purify
* @param $config Configuration to use, can be any value accepted by
* HTMLPurifier_Config::create()
*/
function HTMLPurifier($html, $config = null) {
static $purifier = false;
if (!$purifier) {
$purifier = new HTMLPurifier();
}
return $purifier->purify($html, $config);
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,221 @@
<?php
/**
* @file
* This file was auto-generated by generate-includes.php and includes all of
* the core files required by HTML Purifier. Use this if performance is a
* primary concern and you are using an opcode cache. PLEASE DO NOT EDIT THIS
* FILE, changes will be overwritten the next time the script is run.
*
* @version 4.4.0
*
* @warning
* You must *not* include any other HTML Purifier files before this file,
* because 'require' not 'require_once' is used.
*
* @warning
* This file requires that the include path contains the HTML Purifier
* library directory; this is not auto-set.
*/
require 'HTMLPurifier.php';
require 'HTMLPurifier/AttrCollections.php';
require 'HTMLPurifier/AttrDef.php';
require 'HTMLPurifier/AttrTransform.php';
require 'HTMLPurifier/AttrTypes.php';
require 'HTMLPurifier/AttrValidator.php';
require 'HTMLPurifier/Bootstrap.php';
require 'HTMLPurifier/Definition.php';
require 'HTMLPurifier/CSSDefinition.php';
require 'HTMLPurifier/ChildDef.php';
require 'HTMLPurifier/Config.php';
require 'HTMLPurifier/ConfigSchema.php';
require 'HTMLPurifier/ContentSets.php';
require 'HTMLPurifier/Context.php';
require 'HTMLPurifier/DefinitionCache.php';
require 'HTMLPurifier/DefinitionCacheFactory.php';
require 'HTMLPurifier/Doctype.php';
require 'HTMLPurifier/DoctypeRegistry.php';
require 'HTMLPurifier/ElementDef.php';
require 'HTMLPurifier/Encoder.php';
require 'HTMLPurifier/EntityLookup.php';
require 'HTMLPurifier/EntityParser.php';
require 'HTMLPurifier/ErrorCollector.php';
require 'HTMLPurifier/ErrorStruct.php';
require 'HTMLPurifier/Exception.php';
require 'HTMLPurifier/Filter.php';
require 'HTMLPurifier/Generator.php';
require 'HTMLPurifier/HTMLDefinition.php';
require 'HTMLPurifier/HTMLModule.php';
require 'HTMLPurifier/HTMLModuleManager.php';
require 'HTMLPurifier/IDAccumulator.php';
require 'HTMLPurifier/Injector.php';
require 'HTMLPurifier/Language.php';
require 'HTMLPurifier/LanguageFactory.php';
require 'HTMLPurifier/Length.php';
require 'HTMLPurifier/Lexer.php';
require 'HTMLPurifier/PercentEncoder.php';
require 'HTMLPurifier/PropertyList.php';
require 'HTMLPurifier/PropertyListIterator.php';
require 'HTMLPurifier/Strategy.php';
require 'HTMLPurifier/StringHash.php';
require 'HTMLPurifier/StringHashParser.php';
require 'HTMLPurifier/TagTransform.php';
require 'HTMLPurifier/Token.php';
require 'HTMLPurifier/TokenFactory.php';
require 'HTMLPurifier/URI.php';
require 'HTMLPurifier/URIDefinition.php';
require 'HTMLPurifier/URIFilter.php';
require 'HTMLPurifier/URIParser.php';
require 'HTMLPurifier/URIScheme.php';
require 'HTMLPurifier/URISchemeRegistry.php';
require 'HTMLPurifier/UnitConverter.php';
require 'HTMLPurifier/VarParser.php';
require 'HTMLPurifier/VarParserException.php';
require 'HTMLPurifier/AttrDef/CSS.php';
require 'HTMLPurifier/AttrDef/Clone.php';
require 'HTMLPurifier/AttrDef/Enum.php';
require 'HTMLPurifier/AttrDef/Integer.php';
require 'HTMLPurifier/AttrDef/Lang.php';
require 'HTMLPurifier/AttrDef/Switch.php';
require 'HTMLPurifier/AttrDef/Text.php';
require 'HTMLPurifier/AttrDef/URI.php';
require 'HTMLPurifier/AttrDef/CSS/Number.php';
require 'HTMLPurifier/AttrDef/CSS/AlphaValue.php';
require 'HTMLPurifier/AttrDef/CSS/Background.php';
require 'HTMLPurifier/AttrDef/CSS/BackgroundPosition.php';
require 'HTMLPurifier/AttrDef/CSS/Border.php';
require 'HTMLPurifier/AttrDef/CSS/Color.php';
require 'HTMLPurifier/AttrDef/CSS/Composite.php';
require 'HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php';
require 'HTMLPurifier/AttrDef/CSS/Filter.php';
require 'HTMLPurifier/AttrDef/CSS/Font.php';
require 'HTMLPurifier/AttrDef/CSS/FontFamily.php';
require 'HTMLPurifier/AttrDef/CSS/Ident.php';
require 'HTMLPurifier/AttrDef/CSS/ImportantDecorator.php';
require 'HTMLPurifier/AttrDef/CSS/Length.php';
require 'HTMLPurifier/AttrDef/CSS/ListStyle.php';
require 'HTMLPurifier/AttrDef/CSS/Multiple.php';
require 'HTMLPurifier/AttrDef/CSS/Percentage.php';
require 'HTMLPurifier/AttrDef/CSS/TextDecoration.php';
require 'HTMLPurifier/AttrDef/CSS/URI.php';
require 'HTMLPurifier/AttrDef/HTML/Bool.php';
require 'HTMLPurifier/AttrDef/HTML/Nmtokens.php';
require 'HTMLPurifier/AttrDef/HTML/Class.php';
require 'HTMLPurifier/AttrDef/HTML/Color.php';
require 'HTMLPurifier/AttrDef/HTML/FrameTarget.php';
require 'HTMLPurifier/AttrDef/HTML/ID.php';
require 'HTMLPurifier/AttrDef/HTML/Pixels.php';
require 'HTMLPurifier/AttrDef/HTML/Length.php';
require 'HTMLPurifier/AttrDef/HTML/LinkTypes.php';
require 'HTMLPurifier/AttrDef/HTML/MultiLength.php';
require 'HTMLPurifier/AttrDef/URI/Email.php';
require 'HTMLPurifier/AttrDef/URI/Host.php';
require 'HTMLPurifier/AttrDef/URI/IPv4.php';
require 'HTMLPurifier/AttrDef/URI/IPv6.php';
require 'HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php';
require 'HTMLPurifier/AttrTransform/Background.php';
require 'HTMLPurifier/AttrTransform/BdoDir.php';
require 'HTMLPurifier/AttrTransform/BgColor.php';
require 'HTMLPurifier/AttrTransform/BoolToCSS.php';
require 'HTMLPurifier/AttrTransform/Border.php';
require 'HTMLPurifier/AttrTransform/EnumToCSS.php';
require 'HTMLPurifier/AttrTransform/ImgRequired.php';
require 'HTMLPurifier/AttrTransform/ImgSpace.php';
require 'HTMLPurifier/AttrTransform/Input.php';
require 'HTMLPurifier/AttrTransform/Lang.php';
require 'HTMLPurifier/AttrTransform/Length.php';
require 'HTMLPurifier/AttrTransform/Name.php';
require 'HTMLPurifier/AttrTransform/NameSync.php';
require 'HTMLPurifier/AttrTransform/Nofollow.php';
require 'HTMLPurifier/AttrTransform/SafeEmbed.php';
require 'HTMLPurifier/AttrTransform/SafeObject.php';
require 'HTMLPurifier/AttrTransform/SafeParam.php';
require 'HTMLPurifier/AttrTransform/ScriptRequired.php';
require 'HTMLPurifier/AttrTransform/TargetBlank.php';
require 'HTMLPurifier/AttrTransform/Textarea.php';
require 'HTMLPurifier/ChildDef/Chameleon.php';
require 'HTMLPurifier/ChildDef/Custom.php';
require 'HTMLPurifier/ChildDef/Empty.php';
require 'HTMLPurifier/ChildDef/List.php';
require 'HTMLPurifier/ChildDef/Required.php';
require 'HTMLPurifier/ChildDef/Optional.php';
require 'HTMLPurifier/ChildDef/StrictBlockquote.php';
require 'HTMLPurifier/ChildDef/Table.php';
require 'HTMLPurifier/DefinitionCache/Decorator.php';
require 'HTMLPurifier/DefinitionCache/Null.php';
require 'HTMLPurifier/DefinitionCache/Serializer.php';
require 'HTMLPurifier/DefinitionCache/Decorator/Cleanup.php';
require 'HTMLPurifier/DefinitionCache/Decorator/Memory.php';
require 'HTMLPurifier/HTMLModule/Bdo.php';
require 'HTMLPurifier/HTMLModule/CommonAttributes.php';
require 'HTMLPurifier/HTMLModule/Edit.php';
require 'HTMLPurifier/HTMLModule/Forms.php';
require 'HTMLPurifier/HTMLModule/Hypertext.php';
require 'HTMLPurifier/HTMLModule/Iframe.php';
require 'HTMLPurifier/HTMLModule/Image.php';
require 'HTMLPurifier/HTMLModule/Legacy.php';
require 'HTMLPurifier/HTMLModule/List.php';
require 'HTMLPurifier/HTMLModule/Name.php';
require 'HTMLPurifier/HTMLModule/Nofollow.php';
require 'HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php';
require 'HTMLPurifier/HTMLModule/Object.php';
require 'HTMLPurifier/HTMLModule/Presentation.php';
require 'HTMLPurifier/HTMLModule/Proprietary.php';
require 'HTMLPurifier/HTMLModule/Ruby.php';
require 'HTMLPurifier/HTMLModule/SafeEmbed.php';
require 'HTMLPurifier/HTMLModule/SafeObject.php';
require 'HTMLPurifier/HTMLModule/Scripting.php';
require 'HTMLPurifier/HTMLModule/StyleAttribute.php';
require 'HTMLPurifier/HTMLModule/Tables.php';
require 'HTMLPurifier/HTMLModule/Target.php';
require 'HTMLPurifier/HTMLModule/TargetBlank.php';
require 'HTMLPurifier/HTMLModule/Text.php';
require 'HTMLPurifier/HTMLModule/Tidy.php';
require 'HTMLPurifier/HTMLModule/XMLCommonAttributes.php';
require 'HTMLPurifier/HTMLModule/Tidy/Name.php';
require 'HTMLPurifier/HTMLModule/Tidy/Proprietary.php';
require 'HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php';
require 'HTMLPurifier/HTMLModule/Tidy/Strict.php';
require 'HTMLPurifier/HTMLModule/Tidy/Transitional.php';
require 'HTMLPurifier/HTMLModule/Tidy/XHTML.php';
require 'HTMLPurifier/Injector/AutoParagraph.php';
require 'HTMLPurifier/Injector/DisplayLinkURI.php';
require 'HTMLPurifier/Injector/Linkify.php';
require 'HTMLPurifier/Injector/PurifierLinkify.php';
require 'HTMLPurifier/Injector/RemoveEmpty.php';
require 'HTMLPurifier/Injector/RemoveSpansWithoutAttributes.php';
require 'HTMLPurifier/Injector/SafeObject.php';
require 'HTMLPurifier/Lexer/DOMLex.php';
require 'HTMLPurifier/Lexer/DirectLex.php';
require 'HTMLPurifier/Strategy/Composite.php';
require 'HTMLPurifier/Strategy/Core.php';
require 'HTMLPurifier/Strategy/FixNesting.php';
require 'HTMLPurifier/Strategy/MakeWellFormed.php';
require 'HTMLPurifier/Strategy/RemoveForeignElements.php';
require 'HTMLPurifier/Strategy/ValidateAttributes.php';
require 'HTMLPurifier/TagTransform/Font.php';
require 'HTMLPurifier/TagTransform/Simple.php';
require 'HTMLPurifier/Token/Comment.php';
require 'HTMLPurifier/Token/Tag.php';
require 'HTMLPurifier/Token/Empty.php';
require 'HTMLPurifier/Token/End.php';
require 'HTMLPurifier/Token/Start.php';
require 'HTMLPurifier/Token/Text.php';
require 'HTMLPurifier/URIFilter/DisableExternal.php';
require 'HTMLPurifier/URIFilter/DisableExternalResources.php';
require 'HTMLPurifier/URIFilter/DisableResources.php';
require 'HTMLPurifier/URIFilter/HostBlacklist.php';
require 'HTMLPurifier/URIFilter/MakeAbsolute.php';
require 'HTMLPurifier/URIFilter/Munge.php';
require 'HTMLPurifier/URIFilter/SafeIframe.php';
require 'HTMLPurifier/URIScheme/data.php';
require 'HTMLPurifier/URIScheme/file.php';
require 'HTMLPurifier/URIScheme/ftp.php';
require 'HTMLPurifier/URIScheme/http.php';
require 'HTMLPurifier/URIScheme/https.php';
require 'HTMLPurifier/URIScheme/mailto.php';
require 'HTMLPurifier/URIScheme/news.php';
require 'HTMLPurifier/URIScheme/nntp.php';
require 'HTMLPurifier/VarParser/Flexible.php';
require 'HTMLPurifier/VarParser/Native.php';

View File

@ -0,0 +1,30 @@
<?php
/**
* @file
* Emulation layer for code that used kses(), substituting in HTML Purifier.
*/
require_once dirname(__FILE__) . '/HTMLPurifier.auto.php';
function kses($string, $allowed_html, $allowed_protocols = null) {
$config = HTMLPurifier_Config::createDefault();
$allowed_elements = array();
$allowed_attributes = array();
foreach ($allowed_html as $element => $attributes) {
$allowed_elements[$element] = true;
foreach ($attributes as $attribute => $x) {
$allowed_attributes["$element.$attribute"] = true;
}
}
$config->set('HTML.AllowedElements', $allowed_elements);
$config->set('HTML.AllowedAttributes', $allowed_attributes);
$allowed_schemes = array();
if ($allowed_protocols !== null) {
$config->set('URI.AllowedSchemes', $allowed_protocols);
}
$purifier = new HTMLPurifier($config);
return $purifier->purify($string);
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,11 @@
<?php
/**
* @file
* Convenience stub file that adds HTML Purifier's library file to the path
* without any other side-effects.
*/
set_include_path(dirname(__FILE__) . PATH_SEPARATOR . get_include_path() );
// vim: et sw=4 sts=4

View File

@ -0,0 +1,237 @@
<?php
/*! @mainpage
*
* HTML Purifier is an HTML filter that will take an arbitrary snippet of
* HTML and rigorously test, validate and filter it into a version that
* is safe for output onto webpages. It achieves this by:
*
* -# Lexing (parsing into tokens) the document,
* -# Executing various strategies on the tokens:
* -# Removing all elements not in the whitelist,
* -# Making the tokens well-formed,
* -# Fixing the nesting of the nodes, and
* -# Validating attributes of the nodes; and
* -# Generating HTML from the purified tokens.
*
* However, most users will only need to interface with the HTMLPurifier
* and HTMLPurifier_Config.
*/
/*
HTML Purifier 4.4.0 - Standards Compliant HTML Filtering
Copyright (C) 2006-2008 Edward Z. Yang
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* Facade that coordinates HTML Purifier's subsystems in order to purify HTML.
*
* @note There are several points in which configuration can be specified
* for HTML Purifier. The precedence of these (from lowest to
* highest) is as follows:
* -# Instance: new HTMLPurifier($config)
* -# Invocation: purify($html, $config)
* These configurations are entirely independent of each other and
* are *not* merged (this behavior may change in the future).
*
* @todo We need an easier way to inject strategies using the configuration
* object.
*/
class HTMLPurifier
{
/** Version of HTML Purifier */
public $version = '4.4.0';
/** Constant with version of HTML Purifier */
const VERSION = '4.4.0';
/** Global configuration object */
public $config;
/** Array of extra HTMLPurifier_Filter objects to run on HTML, for backwards compatibility */
private $filters = array();
/** Single instance of HTML Purifier */
private static $instance;
protected $strategy, $generator;
/**
* Resultant HTMLPurifier_Context of last run purification. Is an array
* of contexts if the last called method was purifyArray().
*/
public $context;
/**
* Initializes the purifier.
* @param $config Optional HTMLPurifier_Config object for all instances of
* the purifier, if omitted, a default configuration is
* supplied (which can be overridden on a per-use basis).
* The parameter can also be any type that
* HTMLPurifier_Config::create() supports.
*/
public function __construct($config = null) {
$this->config = HTMLPurifier_Config::create($config);
$this->strategy = new HTMLPurifier_Strategy_Core();
}
/**
* Adds a filter to process the output. First come first serve
* @param $filter HTMLPurifier_Filter object
*/
public function addFilter($filter) {
trigger_error('HTMLPurifier->addFilter() is deprecated, use configuration directives in the Filter namespace or Filter.Custom', E_USER_WARNING);
$this->filters[] = $filter;
}
/**
* Filters an HTML snippet/document to be XSS-free and standards-compliant.
*
* @param $html String of HTML to purify
* @param $config HTMLPurifier_Config object for this operation, if omitted,
* defaults to the config object specified during this
* object's construction. The parameter can also be any type
* that HTMLPurifier_Config::create() supports.
* @return Purified HTML
*/
public function purify($html, $config = null) {
// :TODO: make the config merge in, instead of replace
$config = $config ? HTMLPurifier_Config::create($config) : $this->config;
// implementation is partially environment dependant, partially
// configuration dependant
$lexer = HTMLPurifier_Lexer::create($config);
$context = new HTMLPurifier_Context();
// setup HTML generator
$this->generator = new HTMLPurifier_Generator($config, $context);
$context->register('Generator', $this->generator);
// set up global context variables
if ($config->get('Core.CollectErrors')) {
// may get moved out if other facilities use it
$language_factory = HTMLPurifier_LanguageFactory::instance();
$language = $language_factory->create($config, $context);
$context->register('Locale', $language);
$error_collector = new HTMLPurifier_ErrorCollector($context);
$context->register('ErrorCollector', $error_collector);
}
// setup id_accumulator context, necessary due to the fact that
// AttrValidator can be called from many places
$id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
$context->register('IDAccumulator', $id_accumulator);
$html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
// setup filters
$filter_flags = $config->getBatch('Filter');
$custom_filters = $filter_flags['Custom'];
unset($filter_flags['Custom']);
$filters = array();
foreach ($filter_flags as $filter => $flag) {
if (!$flag) continue;
if (strpos($filter, '.') !== false) continue;
$class = "HTMLPurifier_Filter_$filter";
$filters[] = new $class;
}
foreach ($custom_filters as $filter) {
// maybe "HTMLPurifier_Filter_$filter", but be consistent with AutoFormat
$filters[] = $filter;
}
$filters = array_merge($filters, $this->filters);
// maybe prepare(), but later
for ($i = 0, $filter_size = count($filters); $i < $filter_size; $i++) {
$html = $filters[$i]->preFilter($html, $config, $context);
}
// purified HTML
$html =
$this->generator->generateFromTokens(
// list of tokens
$this->strategy->execute(
// list of un-purified tokens
$lexer->tokenizeHTML(
// un-purified HTML
$html, $config, $context
),
$config, $context
)
);
for ($i = $filter_size - 1; $i >= 0; $i--) {
$html = $filters[$i]->postFilter($html, $config, $context);
}
$html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context);
$this->context =& $context;
return $html;
}
/**
* Filters an array of HTML snippets
* @param $config Optional HTMLPurifier_Config object for this operation.
* See HTMLPurifier::purify() for more details.
* @return Array of purified HTML
*/
public function purifyArray($array_of_html, $config = null) {
$context_array = array();
foreach ($array_of_html as $key => $html) {
$array_of_html[$key] = $this->purify($html, $config);
$context_array[$key] = $this->context;
}
$this->context = $context_array;
return $array_of_html;
}
/**
* Singleton for enforcing just one HTML Purifier in your system
* @param $prototype Optional prototype HTMLPurifier instance to
* overload singleton with, or HTMLPurifier_Config
* instance to configure the generated version with.
*/
public static function instance($prototype = null) {
if (!self::$instance || $prototype) {
if ($prototype instanceof HTMLPurifier) {
self::$instance = $prototype;
} elseif ($prototype) {
self::$instance = new HTMLPurifier($prototype);
} else {
self::$instance = new HTMLPurifier();
}
}
return self::$instance;
}
/**
* @note Backwards compatibility, see instance()
*/
public static function getInstance($prototype = null) {
return HTMLPurifier::instance($prototype);
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,215 @@
<?php
/**
* @file
* This file was auto-generated by generate-includes.php and includes all of
* the core files required by HTML Purifier. This is a convenience stub that
* includes all files using dirname(__FILE__) and require_once. PLEASE DO NOT
* EDIT THIS FILE, changes will be overwritten the next time the script is run.
*
* Changes to include_path are not necessary.
*/
$__dir = dirname(__FILE__);
require_once $__dir . '/HTMLPurifier.php';
require_once $__dir . '/HTMLPurifier/AttrCollections.php';
require_once $__dir . '/HTMLPurifier/AttrDef.php';
require_once $__dir . '/HTMLPurifier/AttrTransform.php';
require_once $__dir . '/HTMLPurifier/AttrTypes.php';
require_once $__dir . '/HTMLPurifier/AttrValidator.php';
require_once $__dir . '/HTMLPurifier/Bootstrap.php';
require_once $__dir . '/HTMLPurifier/Definition.php';
require_once $__dir . '/HTMLPurifier/CSSDefinition.php';
require_once $__dir . '/HTMLPurifier/ChildDef.php';
require_once $__dir . '/HTMLPurifier/Config.php';
require_once $__dir . '/HTMLPurifier/ConfigSchema.php';
require_once $__dir . '/HTMLPurifier/ContentSets.php';
require_once $__dir . '/HTMLPurifier/Context.php';
require_once $__dir . '/HTMLPurifier/DefinitionCache.php';
require_once $__dir . '/HTMLPurifier/DefinitionCacheFactory.php';
require_once $__dir . '/HTMLPurifier/Doctype.php';
require_once $__dir . '/HTMLPurifier/DoctypeRegistry.php';
require_once $__dir . '/HTMLPurifier/ElementDef.php';
require_once $__dir . '/HTMLPurifier/Encoder.php';
require_once $__dir . '/HTMLPurifier/EntityLookup.php';
require_once $__dir . '/HTMLPurifier/EntityParser.php';
require_once $__dir . '/HTMLPurifier/ErrorCollector.php';
require_once $__dir . '/HTMLPurifier/ErrorStruct.php';
require_once $__dir . '/HTMLPurifier/Exception.php';
require_once $__dir . '/HTMLPurifier/Filter.php';
require_once $__dir . '/HTMLPurifier/Generator.php';
require_once $__dir . '/HTMLPurifier/HTMLDefinition.php';
require_once $__dir . '/HTMLPurifier/HTMLModule.php';
require_once $__dir . '/HTMLPurifier/HTMLModuleManager.php';
require_once $__dir . '/HTMLPurifier/IDAccumulator.php';
require_once $__dir . '/HTMLPurifier/Injector.php';
require_once $__dir . '/HTMLPurifier/Language.php';
require_once $__dir . '/HTMLPurifier/LanguageFactory.php';
require_once $__dir . '/HTMLPurifier/Length.php';
require_once $__dir . '/HTMLPurifier/Lexer.php';
require_once $__dir . '/HTMLPurifier/PercentEncoder.php';
require_once $__dir . '/HTMLPurifier/PropertyList.php';
require_once $__dir . '/HTMLPurifier/PropertyListIterator.php';
require_once $__dir . '/HTMLPurifier/Strategy.php';
require_once $__dir . '/HTMLPurifier/StringHash.php';
require_once $__dir . '/HTMLPurifier/StringHashParser.php';
require_once $__dir . '/HTMLPurifier/TagTransform.php';
require_once $__dir . '/HTMLPurifier/Token.php';
require_once $__dir . '/HTMLPurifier/TokenFactory.php';
require_once $__dir . '/HTMLPurifier/URI.php';
require_once $__dir . '/HTMLPurifier/URIDefinition.php';
require_once $__dir . '/HTMLPurifier/URIFilter.php';
require_once $__dir . '/HTMLPurifier/URIParser.php';
require_once $__dir . '/HTMLPurifier/URIScheme.php';
require_once $__dir . '/HTMLPurifier/URISchemeRegistry.php';
require_once $__dir . '/HTMLPurifier/UnitConverter.php';
require_once $__dir . '/HTMLPurifier/VarParser.php';
require_once $__dir . '/HTMLPurifier/VarParserException.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS.php';
require_once $__dir . '/HTMLPurifier/AttrDef/Clone.php';
require_once $__dir . '/HTMLPurifier/AttrDef/Enum.php';
require_once $__dir . '/HTMLPurifier/AttrDef/Integer.php';
require_once $__dir . '/HTMLPurifier/AttrDef/Lang.php';
require_once $__dir . '/HTMLPurifier/AttrDef/Switch.php';
require_once $__dir . '/HTMLPurifier/AttrDef/Text.php';
require_once $__dir . '/HTMLPurifier/AttrDef/URI.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Number.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/AlphaValue.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Background.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Border.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Color.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Composite.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Filter.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Font.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/FontFamily.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Ident.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/ImportantDecorator.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Length.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/ListStyle.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Multiple.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Percentage.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/TextDecoration.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/URI.php';
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Bool.php';
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Nmtokens.php';
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Class.php';
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Color.php';
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/FrameTarget.php';
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/ID.php';
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Pixels.php';
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Length.php';
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/LinkTypes.php';
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/MultiLength.php';
require_once $__dir . '/HTMLPurifier/AttrDef/URI/Email.php';
require_once $__dir . '/HTMLPurifier/AttrDef/URI/Host.php';
require_once $__dir . '/HTMLPurifier/AttrDef/URI/IPv4.php';
require_once $__dir . '/HTMLPurifier/AttrDef/URI/IPv6.php';
require_once $__dir . '/HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/Background.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/BdoDir.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/BgColor.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/BoolToCSS.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/Border.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/EnumToCSS.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/ImgRequired.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/ImgSpace.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/Input.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/Lang.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/Length.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/Name.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/NameSync.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/Nofollow.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/SafeEmbed.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/SafeObject.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/SafeParam.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/ScriptRequired.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/TargetBlank.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/Textarea.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Chameleon.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Custom.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Empty.php';
require_once $__dir . '/HTMLPurifier/ChildDef/List.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Required.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Optional.php';
require_once $__dir . '/HTMLPurifier/ChildDef/StrictBlockquote.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Table.php';
require_once $__dir . '/HTMLPurifier/DefinitionCache/Decorator.php';
require_once $__dir . '/HTMLPurifier/DefinitionCache/Null.php';
require_once $__dir . '/HTMLPurifier/DefinitionCache/Serializer.php';
require_once $__dir . '/HTMLPurifier/DefinitionCache/Decorator/Cleanup.php';
require_once $__dir . '/HTMLPurifier/DefinitionCache/Decorator/Memory.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Bdo.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/CommonAttributes.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Edit.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Forms.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Hypertext.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Iframe.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Image.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Legacy.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/List.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Name.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Nofollow.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Object.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Presentation.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Proprietary.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Ruby.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/SafeEmbed.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/SafeObject.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Scripting.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/StyleAttribute.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Tables.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Target.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/TargetBlank.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Text.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/XMLCommonAttributes.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy/Name.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy/Proprietary.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy/Strict.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy/Transitional.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy/XHTML.php';
require_once $__dir . '/HTMLPurifier/Injector/AutoParagraph.php';
require_once $__dir . '/HTMLPurifier/Injector/DisplayLinkURI.php';
require_once $__dir . '/HTMLPurifier/Injector/Linkify.php';
require_once $__dir . '/HTMLPurifier/Injector/PurifierLinkify.php';
require_once $__dir . '/HTMLPurifier/Injector/RemoveEmpty.php';
require_once $__dir . '/HTMLPurifier/Injector/RemoveSpansWithoutAttributes.php';
require_once $__dir . '/HTMLPurifier/Injector/SafeObject.php';
require_once $__dir . '/HTMLPurifier/Lexer/DOMLex.php';
require_once $__dir . '/HTMLPurifier/Lexer/DirectLex.php';
require_once $__dir . '/HTMLPurifier/Strategy/Composite.php';
require_once $__dir . '/HTMLPurifier/Strategy/Core.php';
require_once $__dir . '/HTMLPurifier/Strategy/FixNesting.php';
require_once $__dir . '/HTMLPurifier/Strategy/MakeWellFormed.php';
require_once $__dir . '/HTMLPurifier/Strategy/RemoveForeignElements.php';
require_once $__dir . '/HTMLPurifier/Strategy/ValidateAttributes.php';
require_once $__dir . '/HTMLPurifier/TagTransform/Font.php';
require_once $__dir . '/HTMLPurifier/TagTransform/Simple.php';
require_once $__dir . '/HTMLPurifier/Token/Comment.php';
require_once $__dir . '/HTMLPurifier/Token/Tag.php';
require_once $__dir . '/HTMLPurifier/Token/Empty.php';
require_once $__dir . '/HTMLPurifier/Token/End.php';
require_once $__dir . '/HTMLPurifier/Token/Start.php';
require_once $__dir . '/HTMLPurifier/Token/Text.php';
require_once $__dir . '/HTMLPurifier/URIFilter/DisableExternal.php';
require_once $__dir . '/HTMLPurifier/URIFilter/DisableExternalResources.php';
require_once $__dir . '/HTMLPurifier/URIFilter/DisableResources.php';
require_once $__dir . '/HTMLPurifier/URIFilter/HostBlacklist.php';
require_once $__dir . '/HTMLPurifier/URIFilter/MakeAbsolute.php';
require_once $__dir . '/HTMLPurifier/URIFilter/Munge.php';
require_once $__dir . '/HTMLPurifier/URIFilter/SafeIframe.php';
require_once $__dir . '/HTMLPurifier/URIScheme/data.php';
require_once $__dir . '/HTMLPurifier/URIScheme/file.php';
require_once $__dir . '/HTMLPurifier/URIScheme/ftp.php';
require_once $__dir . '/HTMLPurifier/URIScheme/http.php';
require_once $__dir . '/HTMLPurifier/URIScheme/https.php';
require_once $__dir . '/HTMLPurifier/URIScheme/mailto.php';
require_once $__dir . '/HTMLPurifier/URIScheme/news.php';
require_once $__dir . '/HTMLPurifier/URIScheme/nntp.php';
require_once $__dir . '/HTMLPurifier/VarParser/Flexible.php';
require_once $__dir . '/HTMLPurifier/VarParser/Native.php';

View File

@ -0,0 +1,128 @@
<?php
/**
* Defines common attribute collections that modules reference
*/
class HTMLPurifier_AttrCollections
{
/**
* Associative array of attribute collections, indexed by name
*/
public $info = array();
/**
* Performs all expansions on internal data for use by other inclusions
* It also collects all attribute collection extensions from
* modules
* @param $attr_types HTMLPurifier_AttrTypes instance
* @param $modules Hash array of HTMLPurifier_HTMLModule members
*/
public function __construct($attr_types, $modules) {
// load extensions from the modules
foreach ($modules as $module) {
foreach ($module->attr_collections as $coll_i => $coll) {
if (!isset($this->info[$coll_i])) {
$this->info[$coll_i] = array();
}
foreach ($coll as $attr_i => $attr) {
if ($attr_i === 0 && isset($this->info[$coll_i][$attr_i])) {
// merge in includes
$this->info[$coll_i][$attr_i] = array_merge(
$this->info[$coll_i][$attr_i], $attr);
continue;
}
$this->info[$coll_i][$attr_i] = $attr;
}
}
}
// perform internal expansions and inclusions
foreach ($this->info as $name => $attr) {
// merge attribute collections that include others
$this->performInclusions($this->info[$name]);
// replace string identifiers with actual attribute objects
$this->expandIdentifiers($this->info[$name], $attr_types);
}
}
/**
* Takes a reference to an attribute associative array and performs
* all inclusions specified by the zero index.
* @param &$attr Reference to attribute array
*/
public function performInclusions(&$attr) {
if (!isset($attr[0])) return;
$merge = $attr[0];
$seen = array(); // recursion guard
// loop through all the inclusions
for ($i = 0; isset($merge[$i]); $i++) {
if (isset($seen[$merge[$i]])) continue;
$seen[$merge[$i]] = true;
// foreach attribute of the inclusion, copy it over
if (!isset($this->info[$merge[$i]])) continue;
foreach ($this->info[$merge[$i]] as $key => $value) {
if (isset($attr[$key])) continue; // also catches more inclusions
$attr[$key] = $value;
}
if (isset($this->info[$merge[$i]][0])) {
// recursion
$merge = array_merge($merge, $this->info[$merge[$i]][0]);
}
}
unset($attr[0]);
}
/**
* Expands all string identifiers in an attribute array by replacing
* them with the appropriate values inside HTMLPurifier_AttrTypes
* @param &$attr Reference to attribute array
* @param $attr_types HTMLPurifier_AttrTypes instance
*/
public function expandIdentifiers(&$attr, $attr_types) {
// because foreach will process new elements we add, make sure we
// skip duplicates
$processed = array();
foreach ($attr as $def_i => $def) {
// skip inclusions
if ($def_i === 0) continue;
if (isset($processed[$def_i])) continue;
// determine whether or not attribute is required
if ($required = (strpos($def_i, '*') !== false)) {
// rename the definition
unset($attr[$def_i]);
$def_i = trim($def_i, '*');
$attr[$def_i] = $def;
}
$processed[$def_i] = true;
// if we've already got a literal object, move on
if (is_object($def)) {
// preserve previous required
$attr[$def_i]->required = ($required || $attr[$def_i]->required);
continue;
}
if ($def === false) {
unset($attr[$def_i]);
continue;
}
if ($t = $attr_types->get($def)) {
$attr[$def_i] = $t;
$attr[$def_i]->required = $required;
} else {
unset($attr[$def_i]);
}
}
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,123 @@
<?php
/**
* Base class for all validating attribute definitions.
*
* This family of classes forms the core for not only HTML attribute validation,
* but also any sort of string that needs to be validated or cleaned (which
* means CSS properties and composite definitions are defined here too).
* Besides defining (through code) what precisely makes the string valid,
* subclasses are also responsible for cleaning the code if possible.
*/
abstract class HTMLPurifier_AttrDef
{
/**
* Tells us whether or not an HTML attribute is minimized. Has no
* meaning in other contexts.
*/
public $minimized = false;
/**
* Tells us whether or not an HTML attribute is required. Has no
* meaning in other contexts
*/
public $required = false;
/**
* Validates and cleans passed string according to a definition.
*
* @param $string String to be validated and cleaned.
* @param $config Mandatory HTMLPurifier_Config object.
* @param $context Mandatory HTMLPurifier_AttrContext object.
*/
abstract public function validate($string, $config, $context);
/**
* Convenience method that parses a string as if it were CDATA.
*
* This method process a string in the manner specified at
* <http://www.w3.org/TR/html4/types.html#h-6.2> by removing
* leading and trailing whitespace, ignoring line feeds, and replacing
* carriage returns and tabs with spaces. While most useful for HTML
* attributes specified as CDATA, it can also be applied to most CSS
* values.
*
* @note This method is not entirely standards compliant, as trim() removes
* more types of whitespace than specified in the spec. In practice,
* this is rarely a problem, as those extra characters usually have
* already been removed by HTMLPurifier_Encoder.
*
* @warning This processing is inconsistent with XML's whitespace handling
* as specified by section 3.3.3 and referenced XHTML 1.0 section
* 4.7. However, note that we are NOT necessarily
* parsing XML, thus, this behavior may still be correct. We
* assume that newlines have been normalized.
*/
public function parseCDATA($string) {
$string = trim($string);
$string = str_replace(array("\n", "\t", "\r"), ' ', $string);
return $string;
}
/**
* Factory method for creating this class from a string.
* @param $string String construction info
* @return Created AttrDef object corresponding to $string
*/
public function make($string) {
// default implementation, return a flyweight of this object.
// If $string has an effect on the returned object (i.e. you
// need to overload this method), it is best
// to clone or instantiate new copies. (Instantiation is safer.)
return $this;
}
/**
* Removes spaces from rgb(0, 0, 0) so that shorthand CSS properties work
* properly. THIS IS A HACK!
*/
protected function mungeRgb($string) {
return preg_replace('/rgb\((\d+)\s*,\s*(\d+)\s*,\s*(\d+)\)/', 'rgb(\1,\2,\3)', $string);
}
/**
* Parses a possibly escaped CSS string and returns the "pure"
* version of it.
*/
protected function expandCSSEscape($string) {
// flexibly parse it
$ret = '';
for ($i = 0, $c = strlen($string); $i < $c; $i++) {
if ($string[$i] === '\\') {
$i++;
if ($i >= $c) {
$ret .= '\\';
break;
}
if (ctype_xdigit($string[$i])) {
$code = $string[$i];
for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
if (!ctype_xdigit($string[$i])) break;
$code .= $string[$i];
}
// We have to be extremely careful when adding
// new characters, to make sure we're not breaking
// the encoding.
$char = HTMLPurifier_Encoder::unichr(hexdec($code));
if (HTMLPurifier_Encoder::cleanUTF8($char) === '') continue;
$ret .= $char;
if ($i < $c && trim($string[$i]) !== '') $i--;
continue;
}
if ($string[$i] === "\n") continue;
}
$ret .= $string[$i];
}
return $ret;
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,87 @@
<?php
/**
* Validates the HTML attribute style, otherwise known as CSS.
* @note We don't implement the whole CSS specification, so it might be
* difficult to reuse this component in the context of validating
* actual stylesheet declarations.
* @note If we were really serious about validating the CSS, we would
* tokenize the styles and then parse the tokens. Obviously, we
* are not doing that. Doing that could seriously harm performance,
* but would make these components a lot more viable for a CSS
* filtering solution.
*/
class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
{
public function validate($css, $config, $context) {
$css = $this->parseCDATA($css);
$definition = $config->getCSSDefinition();
// we're going to break the spec and explode by semicolons.
// This is because semicolon rarely appears in escaped form
// Doing this is generally flaky but fast
// IT MIGHT APPEAR IN URIs, see HTMLPurifier_AttrDef_CSSURI
// for details
$declarations = explode(';', $css);
$propvalues = array();
/**
* Name of the current CSS property being validated.
*/
$property = false;
$context->register('CurrentCSSProperty', $property);
foreach ($declarations as $declaration) {
if (!$declaration) continue;
if (!strpos($declaration, ':')) continue;
list($property, $value) = explode(':', $declaration, 2);
$property = trim($property);
$value = trim($value);
$ok = false;
do {
if (isset($definition->info[$property])) {
$ok = true;
break;
}
if (ctype_lower($property)) break;
$property = strtolower($property);
if (isset($definition->info[$property])) {
$ok = true;
break;
}
} while(0);
if (!$ok) continue;
// inefficient call, since the validator will do this again
if (strtolower(trim($value)) !== 'inherit') {
// inherit works for everything (but only on the base property)
$result = $definition->info[$property]->validate(
$value, $config, $context );
} else {
$result = 'inherit';
}
if ($result === false) continue;
$propvalues[$property] = $result;
}
$context->destroy('CurrentCSSProperty');
// procedure does not write the new CSS simultaneously, so it's
// slightly inefficient, but it's the only way of getting rid of
// duplicates. Perhaps config to optimize it, but not now.
$new_declarations = '';
foreach ($propvalues as $prop => $value) {
$new_declarations .= "$prop:$value;";
}
return $new_declarations ? $new_declarations : false;
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,21 @@
<?php
class HTMLPurifier_AttrDef_CSS_AlphaValue extends HTMLPurifier_AttrDef_CSS_Number
{
public function __construct() {
parent::__construct(false); // opacity is non-negative, but we will clamp it
}
public function validate($number, $config, $context) {
$result = parent::validate($number, $config, $context);
if ($result === false) return $result;
$float = (float) $result;
if ($float < 0.0) $result = '0';
if ($float > 1.0) $result = '1';
return $result;
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,87 @@
<?php
/**
* Validates shorthand CSS property background.
* @warning Does not support url tokens that have internal spaces.
*/
class HTMLPurifier_AttrDef_CSS_Background extends HTMLPurifier_AttrDef
{
/**
* Local copy of component validators.
* @note See HTMLPurifier_AttrDef_Font::$info for a similar impl.
*/
protected $info;
public function __construct($config) {
$def = $config->getCSSDefinition();
$this->info['background-color'] = $def->info['background-color'];
$this->info['background-image'] = $def->info['background-image'];
$this->info['background-repeat'] = $def->info['background-repeat'];
$this->info['background-attachment'] = $def->info['background-attachment'];
$this->info['background-position'] = $def->info['background-position'];
}
public function validate($string, $config, $context) {
// regular pre-processing
$string = $this->parseCDATA($string);
if ($string === '') return false;
// munge rgb() decl if necessary
$string = $this->mungeRgb($string);
// assumes URI doesn't have spaces in it
$bits = explode(' ', strtolower($string)); // bits to process
$caught = array();
$caught['color'] = false;
$caught['image'] = false;
$caught['repeat'] = false;
$caught['attachment'] = false;
$caught['position'] = false;
$i = 0; // number of catches
$none = false;
foreach ($bits as $bit) {
if ($bit === '') continue;
foreach ($caught as $key => $status) {
if ($key != 'position') {
if ($status !== false) continue;
$r = $this->info['background-' . $key]->validate($bit, $config, $context);
} else {
$r = $bit;
}
if ($r === false) continue;
if ($key == 'position') {
if ($caught[$key] === false) $caught[$key] = '';
$caught[$key] .= $r . ' ';
} else {
$caught[$key] = $r;
}
$i++;
break;
}
}
if (!$i) return false;
if ($caught['position'] !== false) {
$caught['position'] = $this->info['background-position']->
validate($caught['position'], $config, $context);
}
$ret = array();
foreach ($caught as $value) {
if ($value === false) continue;
$ret[] = $value;
}
if (empty($ret)) return false;
return implode(' ', $ret);
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,133 @@
<?php
/* W3C says:
[ // adjective and number must be in correct order, even if
// you could switch them without introducing ambiguity.
// some browsers support that syntax
[
<percentage> | <length> | left | center | right
]
[
<percentage> | <length> | top | center | bottom
]?
] |
[ // this signifies that the vertical and horizontal adjectives
// can be arbitrarily ordered, however, there can only be two,
// one of each, or none at all
[
left | center | right
] ||
[
top | center | bottom
]
]
top, left = 0%
center, (none) = 50%
bottom, right = 100%
*/
/* QuirksMode says:
keyword + length/percentage must be ordered correctly, as per W3C
Internet Explorer and Opera, however, support arbitrary ordering. We
should fix it up.
Minor issue though, not strictly necessary.
*/
// control freaks may appreciate the ability to convert these to
// percentages or something, but it's not necessary
/**
* Validates the value of background-position.
*/
class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef
{
protected $length;
protected $percentage;
public function __construct() {
$this->length = new HTMLPurifier_AttrDef_CSS_Length();
$this->percentage = new HTMLPurifier_AttrDef_CSS_Percentage();
}
public function validate($string, $config, $context) {
$string = $this->parseCDATA($string);
$bits = explode(' ', $string);
$keywords = array();
$keywords['h'] = false; // left, right
$keywords['v'] = false; // top, bottom
$keywords['ch'] = false; // center (first word)
$keywords['cv'] = false; // center (second word)
$measures = array();
$i = 0;
$lookup = array(
'top' => 'v',
'bottom' => 'v',
'left' => 'h',
'right' => 'h',
'center' => 'c'
);
foreach ($bits as $bit) {
if ($bit === '') continue;
// test for keyword
$lbit = ctype_lower($bit) ? $bit : strtolower($bit);
if (isset($lookup[$lbit])) {
$status = $lookup[$lbit];
if ($status == 'c') {
if ($i == 0) {
$status = 'ch';
} else {
$status = 'cv';
}
}
$keywords[$status] = $lbit;
$i++;
}
// test for length
$r = $this->length->validate($bit, $config, $context);
if ($r !== false) {
$measures[] = $r;
$i++;
}
// test for percentage
$r = $this->percentage->validate($bit, $config, $context);
if ($r !== false) {
$measures[] = $r;
$i++;
}
}
if (!$i) return false; // no valid values were caught
$ret = array();
// first keyword
if ($keywords['h']) $ret[] = $keywords['h'];
elseif ($keywords['ch']) {
$ret[] = $keywords['ch'];
$keywords['cv'] = false; // prevent re-use: center = center center
}
elseif (count($measures)) $ret[] = array_shift($measures);
if ($keywords['v']) $ret[] = $keywords['v'];
elseif ($keywords['cv']) $ret[] = $keywords['cv'];
elseif (count($measures)) $ret[] = array_shift($measures);
if (empty($ret)) return false;
return implode(' ', $ret);
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,43 @@
<?php
/**
* Validates the border property as defined by CSS.
*/
class HTMLPurifier_AttrDef_CSS_Border extends HTMLPurifier_AttrDef
{
/**
* Local copy of properties this property is shorthand for.
*/
protected $info = array();
public function __construct($config) {
$def = $config->getCSSDefinition();
$this->info['border-width'] = $def->info['border-width'];
$this->info['border-style'] = $def->info['border-style'];
$this->info['border-top-color'] = $def->info['border-top-color'];
}
public function validate($string, $config, $context) {
$string = $this->parseCDATA($string);
$string = $this->mungeRgb($string);
$bits = explode(' ', $string);
$done = array(); // segments we've finished
$ret = ''; // return value
foreach ($bits as $bit) {
foreach ($this->info as $propname => $validator) {
if (isset($done[$propname])) continue;
$r = $validator->validate($bit, $config, $context);
if ($r !== false) {
$ret .= $r . ' ';
$done[$propname] = true;
break;
}
}
}
return rtrim($ret);
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,78 @@
<?php
/**
* Validates Color as defined by CSS.
*/
class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
{
public function validate($color, $config, $context) {
static $colors = null;
if ($colors === null) $colors = $config->get('Core.ColorKeywords');
$color = trim($color);
if ($color === '') return false;
$lower = strtolower($color);
if (isset($colors[$lower])) return $colors[$lower];
if (strpos($color, 'rgb(') !== false) {
// rgb literal handling
$length = strlen($color);
if (strpos($color, ')') !== $length - 1) return false;
$triad = substr($color, 4, $length - 4 - 1);
$parts = explode(',', $triad);
if (count($parts) !== 3) return false;
$type = false; // to ensure that they're all the same type
$new_parts = array();
foreach ($parts as $part) {
$part = trim($part);
if ($part === '') return false;
$length = strlen($part);
if ($part[$length - 1] === '%') {
// handle percents
if (!$type) {
$type = 'percentage';
} elseif ($type !== 'percentage') {
return false;
}
$num = (float) substr($part, 0, $length - 1);
if ($num < 0) $num = 0;
if ($num > 100) $num = 100;
$new_parts[] = "$num%";
} else {
// handle integers
if (!$type) {
$type = 'integer';
} elseif ($type !== 'integer') {
return false;
}
$num = (int) $part;
if ($num < 0) $num = 0;
if ($num > 255) $num = 255;
$new_parts[] = (string) $num;
}
}
$new_triad = implode(',', $new_parts);
$color = "rgb($new_triad)";
} else {
// hexadecimal handling
if ($color[0] === '#') {
$hex = substr($color, 1);
} else {
$hex = $color;
$color = '#' . $color;
}
$length = strlen($hex);
if ($length !== 3 && $length !== 6) return false;
if (!ctype_xdigit($hex)) return false;
}
return $color;
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,38 @@
<?php
/**
* Allows multiple validators to attempt to validate attribute.
*
* Composite is just what it sounds like: a composite of many validators.
* This means that multiple HTMLPurifier_AttrDef objects will have a whack
* at the string. If one of them passes, that's what is returned. This is
* especially useful for CSS values, which often are a choice between
* an enumerated set of predefined values or a flexible data type.
*/
class HTMLPurifier_AttrDef_CSS_Composite extends HTMLPurifier_AttrDef
{
/**
* List of HTMLPurifier_AttrDef objects that may process strings
* @todo Make protected
*/
public $defs;
/**
* @param $defs List of HTMLPurifier_AttrDef objects
*/
public function __construct($defs) {
$this->defs = $defs;
}
public function validate($string, $config, $context) {
foreach ($this->defs as $i => $def) {
$result = $this->defs[$i]->validate($string, $config, $context);
if ($result !== false) return $result;
}
return false;
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,28 @@
<?php
/**
* Decorator which enables CSS properties to be disabled for specific elements.
*/
class HTMLPurifier_AttrDef_CSS_DenyElementDecorator extends HTMLPurifier_AttrDef
{
public $def, $element;
/**
* @param $def Definition to wrap
* @param $element Element to deny
*/
public function __construct($def, $element) {
$this->def = $def;
$this->element = $element;
}
/**
* Checks if CurrentToken is set and equal to $this->element
*/
public function validate($string, $config, $context) {
$token = $context->get('CurrentToken', true);
if ($token && $token->name == $this->element) return false;
return $this->def->validate($string, $config, $context);
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,54 @@
<?php
/**
* Microsoft's proprietary filter: CSS property
* @note Currently supports the alpha filter. In the future, this will
* probably need an extensible framework
*/
class HTMLPurifier_AttrDef_CSS_Filter extends HTMLPurifier_AttrDef
{
protected $intValidator;
public function __construct() {
$this->intValidator = new HTMLPurifier_AttrDef_Integer();
}
public function validate($value, $config, $context) {
$value = $this->parseCDATA($value);
if ($value === 'none') return $value;
// if we looped this we could support multiple filters
$function_length = strcspn($value, '(');
$function = trim(substr($value, 0, $function_length));
if ($function !== 'alpha' &&
$function !== 'Alpha' &&
$function !== 'progid:DXImageTransform.Microsoft.Alpha'
) return false;
$cursor = $function_length + 1;
$parameters_length = strcspn($value, ')', $cursor);
$parameters = substr($value, $cursor, $parameters_length);
$params = explode(',', $parameters);
$ret_params = array();
$lookup = array();
foreach ($params as $param) {
list($key, $value) = explode('=', $param);
$key = trim($key);
$value = trim($value);
if (isset($lookup[$key])) continue;
if ($key !== 'opacity') continue;
$value = $this->intValidator->validate($value, $config, $context);
if ($value === false) continue;
$int = (int) $value;
if ($int > 100) $value = '100';
if ($int < 0) $value = '0';
$ret_params[] = "$key=$value";
$lookup[$key] = true;
}
$ret_parameters = implode(',', $ret_params);
$ret_function = "$function($ret_parameters)";
return $ret_function;
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,149 @@
<?php
/**
* Validates shorthand CSS property font.
*/
class HTMLPurifier_AttrDef_CSS_Font extends HTMLPurifier_AttrDef
{
/**
* Local copy of component validators.
*
* @note If we moved specific CSS property definitions to their own
* classes instead of having them be assembled at run time by
* CSSDefinition, this wouldn't be necessary. We'd instantiate
* our own copies.
*/
protected $info = array();
public function __construct($config) {
$def = $config->getCSSDefinition();
$this->info['font-style'] = $def->info['font-style'];
$this->info['font-variant'] = $def->info['font-variant'];
$this->info['font-weight'] = $def->info['font-weight'];
$this->info['font-size'] = $def->info['font-size'];
$this->info['line-height'] = $def->info['line-height'];
$this->info['font-family'] = $def->info['font-family'];
}
public function validate($string, $config, $context) {
static $system_fonts = array(
'caption' => true,
'icon' => true,
'menu' => true,
'message-box' => true,
'small-caption' => true,
'status-bar' => true
);
// regular pre-processing
$string = $this->parseCDATA($string);
if ($string === '') return false;
// check if it's one of the keywords
$lowercase_string = strtolower($string);
if (isset($system_fonts[$lowercase_string])) {
return $lowercase_string;
}
$bits = explode(' ', $string); // bits to process
$stage = 0; // this indicates what we're looking for
$caught = array(); // which stage 0 properties have we caught?
$stage_1 = array('font-style', 'font-variant', 'font-weight');
$final = ''; // output
for ($i = 0, $size = count($bits); $i < $size; $i++) {
if ($bits[$i] === '') continue;
switch ($stage) {
// attempting to catch font-style, font-variant or font-weight
case 0:
foreach ($stage_1 as $validator_name) {
if (isset($caught[$validator_name])) continue;
$r = $this->info[$validator_name]->validate(
$bits[$i], $config, $context);
if ($r !== false) {
$final .= $r . ' ';
$caught[$validator_name] = true;
break;
}
}
// all three caught, continue on
if (count($caught) >= 3) $stage = 1;
if ($r !== false) break;
// attempting to catch font-size and perhaps line-height
case 1:
$found_slash = false;
if (strpos($bits[$i], '/') !== false) {
list($font_size, $line_height) =
explode('/', $bits[$i]);
if ($line_height === '') {
// ooh, there's a space after the slash!
$line_height = false;
$found_slash = true;
}
} else {
$font_size = $bits[$i];
$line_height = false;
}
$r = $this->info['font-size']->validate(
$font_size, $config, $context);
if ($r !== false) {
$final .= $r;
// attempt to catch line-height
if ($line_height === false) {
// we need to scroll forward
for ($j = $i + 1; $j < $size; $j++) {
if ($bits[$j] === '') continue;
if ($bits[$j] === '/') {
if ($found_slash) {
return false;
} else {
$found_slash = true;
continue;
}
}
$line_height = $bits[$j];
break;
}
} else {
// slash already found
$found_slash = true;
$j = $i;
}
if ($found_slash) {
$i = $j;
$r = $this->info['line-height']->validate(
$line_height, $config, $context);
if ($r !== false) {
$final .= '/' . $r;
}
}
$final .= ' ';
$stage = 2;
break;
}
return false;
// attempting to catch font-family
case 2:
$font_family =
implode(' ', array_slice($bits, $i, $size - $i));
$r = $this->info['font-family']->validate(
$font_family, $config, $context);
if ($r !== false) {
$final .= $r . ' ';
// processing completed successfully
return rtrim($final);
}
return false;
}
}
return false;
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,197 @@
<?php
/**
* Validates a font family list according to CSS spec
*/
class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
{
protected $mask = null;
public function __construct() {
$this->mask = '- ';
for ($c = 'a'; $c <= 'z'; $c++) $this->mask .= $c;
for ($c = 'A'; $c <= 'Z'; $c++) $this->mask .= $c;
for ($c = '0'; $c <= '9'; $c++) $this->mask .= $c; // cast-y, but should be fine
// special bytes used by UTF-8
for ($i = 0x80; $i <= 0xFF; $i++) {
// We don't bother excluding invalid bytes in this range,
// because the our restriction of well-formed UTF-8 will
// prevent these from ever occurring.
$this->mask .= chr($i);
}
/*
PHP's internal strcspn implementation is
O(length of string * length of mask), making it inefficient
for large masks. However, it's still faster than
preg_match 8)
for (p = s1;;) {
spanp = s2;
do {
if (*spanp == c || p == s1_end) {
return p - s1;
}
} while (spanp++ < (s2_end - 1));
c = *++p;
}
*/
// possible optimization: invert the mask.
}
public function validate($string, $config, $context) {
static $generic_names = array(
'serif' => true,
'sans-serif' => true,
'monospace' => true,
'fantasy' => true,
'cursive' => true
);
$allowed_fonts = $config->get('CSS.AllowedFonts');
// assume that no font names contain commas in them
$fonts = explode(',', $string);
$final = '';
foreach($fonts as $font) {
$font = trim($font);
if ($font === '') continue;
// match a generic name
if (isset($generic_names[$font])) {
if ($allowed_fonts === null || isset($allowed_fonts[$font])) {
$final .= $font . ', ';
}
continue;
}
// match a quoted name
if ($font[0] === '"' || $font[0] === "'") {
$length = strlen($font);
if ($length <= 2) continue;
$quote = $font[0];
if ($font[$length - 1] !== $quote) continue;
$font = substr($font, 1, $length - 2);
}
$font = $this->expandCSSEscape($font);
// $font is a pure representation of the font name
if ($allowed_fonts !== null && !isset($allowed_fonts[$font])) {
continue;
}
if (ctype_alnum($font) && $font !== '') {
// very simple font, allow it in unharmed
$final .= $font . ', ';
continue;
}
// bugger out on whitespace. form feed (0C) really
// shouldn't show up regardless
$font = str_replace(array("\n", "\t", "\r", "\x0C"), ' ', $font);
// Here, there are various classes of characters which need
// to be treated differently:
// - Alphanumeric characters are essentially safe. We
// handled these above.
// - Spaces require quoting, though most parsers will do
// the right thing if there aren't any characters that
// can be misinterpreted
// - Dashes rarely occur, but they fairly unproblematic
// for parsing/rendering purposes.
// The above characters cover the majority of Western font
// names.
// - Arbitrary Unicode characters not in ASCII. Because
// most parsers give little thought to Unicode, treatment
// of these codepoints is basically uniform, even for
// punctuation-like codepoints. These characters can
// show up in non-Western pages and are supported by most
// major browsers, for example: " 明朝" is a
// legitimate font-name
// <http://ja.wikipedia.org/wiki/MS_明朝>. See
// the CSS3 spec for more examples:
// <http://www.w3.org/TR/2011/WD-css3-fonts-20110324/localizedfamilynames.png>
// You can see live samples of these on the Internet:
// <http://www.google.co.jp/search?q=font-family++明朝|ゴシック>
// However, most of these fonts have ASCII equivalents:
// for example, 'MS Mincho', and it's considered
// professional to use ASCII font names instead of
// Unicode font names. Thanks Takeshi Terada for
// providing this information.
// The following characters, to my knowledge, have not been
// used to name font names.
// - Single quote. While theoretically you might find a
// font name that has a single quote in its name (serving
// as an apostrophe, e.g. Dave's Scribble), I haven't
// been able to find any actual examples of this.
// Internet Explorer's cssText translation (which I
// believe is invoked by innerHTML) normalizes any
// quoting to single quotes, and fails to escape single
// quotes. (Note that this is not IE's behavior for all
// CSS properties, just some sort of special casing for
// font-family). So a single quote *cannot* be used
// safely in the font-family context if there will be an
// innerHTML/cssText translation. Note that Firefox 3.x
// does this too.
// - Double quote. In IE, these get normalized to
// single-quotes, no matter what the encoding. (Fun
// fact, in IE8, the 'content' CSS property gained
// support, where they special cased to preserve encoded
// double quotes, but still translate unadorned double
// quotes into single quotes.) So, because their
// fixpoint behavior is identical to single quotes, they
// cannot be allowed either. Firefox 3.x displays
// single-quote style behavior.
// - Backslashes are reduced by one (so \\ -> \) every
// iteration, so they cannot be used safely. This shows
// up in IE7, IE8 and FF3
// - Semicolons, commas and backticks are handled properly.
// - The rest of the ASCII punctuation is handled properly.
// We haven't checked what browsers do to unadorned
// versions, but this is not important as long as the
// browser doesn't /remove/ surrounding quotes (as IE does
// for HTML).
//
// With these results in hand, we conclude that there are
// various levels of safety:
// - Paranoid: alphanumeric, spaces and dashes(?)
// - International: Paranoid + non-ASCII Unicode
// - Edgy: Everything except quotes, backslashes
// - NoJS: Standards compliance, e.g. sod IE. Note that
// with some judicious character escaping (since certain
// types of escaping doesn't work) this is theoretically
// OK as long as innerHTML/cssText is not called.
// We believe that international is a reasonable default
// (that we will implement now), and once we do more
// extensive research, we may feel comfortable with dropping
// it down to edgy.
// Edgy: alphanumeric, spaces, dashes and Unicode. Use of
// str(c)spn assumes that the string was already well formed
// Unicode (which of course it is).
if (strspn($font, $this->mask) !== strlen($font)) {
continue;
}
// Historical:
// In the absence of innerHTML/cssText, these ugly
// transforms don't pose a security risk (as \\ and \"
// might--these escapes are not supported by most browsers).
// We could try to be clever and use single-quote wrapping
// when there is a double quote present, but I have choosen
// not to implement that. (NOTE: you can reduce the amount
// of escapes by one depending on what quoting style you use)
// $font = str_replace('\\', '\\5C ', $font);
// $font = str_replace('"', '\\22 ', $font);
// $font = str_replace("'", '\\27 ', $font);
// font possibly with spaces, requires quoting
$final .= "'$font', ";
}
$final = rtrim($final, ', ');
if ($final === '') return false;
return $final;
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,24 @@
<?php
/**
* Validates based on {ident} CSS grammar production
*/
class HTMLPurifier_AttrDef_CSS_Ident extends HTMLPurifier_AttrDef
{
public function validate($string, $config, $context) {
$string = trim($string);
// early abort: '' and '0' (strings that convert to false) are invalid
if (!$string) return false;
$pattern = '/^(-?[A-Za-z_][A-Za-z_\-0-9]*)$/';
if (!preg_match($pattern, $string)) return false;
return $string;
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,40 @@
<?php
/**
* Decorator which enables !important to be used in CSS values.
*/
class HTMLPurifier_AttrDef_CSS_ImportantDecorator extends HTMLPurifier_AttrDef
{
public $def, $allow;
/**
* @param $def Definition to wrap
* @param $allow Whether or not to allow !important
*/
public function __construct($def, $allow = false) {
$this->def = $def;
$this->allow = $allow;
}
/**
* Intercepts and removes !important if necessary
*/
public function validate($string, $config, $context) {
// test for ! and important tokens
$string = trim($string);
$is_important = false;
// :TODO: optimization: test directly for !important and ! important
if (strlen($string) >= 9 && substr($string, -9) === 'important') {
$temp = rtrim(substr($string, 0, -9));
// use a temp, because we might want to restore important
if (strlen($temp) >= 1 && substr($temp, -1) === '!') {
$string = rtrim(substr($temp, 0, -1));
$is_important = true;
}
}
$string = $this->def->validate($string, $config, $context);
if ($this->allow && $is_important) $string .= ' !important';
return $string;
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,47 @@
<?php
/**
* Represents a Length as defined by CSS.
*/
class HTMLPurifier_AttrDef_CSS_Length extends HTMLPurifier_AttrDef
{
protected $min, $max;
/**
* @param HTMLPurifier_Length $max Minimum length, or null for no bound. String is also acceptable.
* @param HTMLPurifier_Length $max Maximum length, or null for no bound. String is also acceptable.
*/
public function __construct($min = null, $max = null) {
$this->min = $min !== null ? HTMLPurifier_Length::make($min) : null;
$this->max = $max !== null ? HTMLPurifier_Length::make($max) : null;
}
public function validate($string, $config, $context) {
$string = $this->parseCDATA($string);
// Optimizations
if ($string === '') return false;
if ($string === '0') return '0';
if (strlen($string) === 1) return false;
$length = HTMLPurifier_Length::make($string);
if (!$length->isValid()) return false;
if ($this->min) {
$c = $length->compareTo($this->min);
if ($c === false) return false;
if ($c < 0) return false;
}
if ($this->max) {
$c = $length->compareTo($this->max);
if ($c === false) return false;
if ($c > 0) return false;
}
return $length->toString();
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,78 @@
<?php
/**
* Validates shorthand CSS property list-style.
* @warning Does not support url tokens that have internal spaces.
*/
class HTMLPurifier_AttrDef_CSS_ListStyle extends HTMLPurifier_AttrDef
{
/**
* Local copy of component validators.
* @note See HTMLPurifier_AttrDef_CSS_Font::$info for a similar impl.
*/
protected $info;
public function __construct($config) {
$def = $config->getCSSDefinition();
$this->info['list-style-type'] = $def->info['list-style-type'];
$this->info['list-style-position'] = $def->info['list-style-position'];
$this->info['list-style-image'] = $def->info['list-style-image'];
}
public function validate($string, $config, $context) {
// regular pre-processing
$string = $this->parseCDATA($string);
if ($string === '') return false;
// assumes URI doesn't have spaces in it
$bits = explode(' ', strtolower($string)); // bits to process
$caught = array();
$caught['type'] = false;
$caught['position'] = false;
$caught['image'] = false;
$i = 0; // number of catches
$none = false;
foreach ($bits as $bit) {
if ($i >= 3) return; // optimization bit
if ($bit === '') continue;
foreach ($caught as $key => $status) {
if ($status !== false) continue;
$r = $this->info['list-style-' . $key]->validate($bit, $config, $context);
if ($r === false) continue;
if ($r === 'none') {
if ($none) continue;
else $none = true;
if ($key == 'image') continue;
}
$caught[$key] = $r;
$i++;
break;
}
}
if (!$i) return false;
$ret = array();
// construct type
if ($caught['type']) $ret[] = $caught['type'];
// construct image
if ($caught['image']) $ret[] = $caught['image'];
// construct position
if ($caught['position']) $ret[] = $caught['position'];
if (empty($ret)) return false;
return implode(' ', $ret);
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,58 @@
<?php
/**
* Framework class for strings that involve multiple values.
*
* Certain CSS properties such as border-width and margin allow multiple
* lengths to be specified. This class can take a vanilla border-width
* definition and multiply it, usually into a max of four.
*
* @note Even though the CSS specification isn't clear about it, inherit
* can only be used alone: it will never manifest as part of a multi
* shorthand declaration. Thus, this class does not allow inherit.
*/
class HTMLPurifier_AttrDef_CSS_Multiple extends HTMLPurifier_AttrDef
{
/**
* Instance of component definition to defer validation to.
* @todo Make protected
*/
public $single;
/**
* Max number of values allowed.
* @todo Make protected
*/
public $max;
/**
* @param $single HTMLPurifier_AttrDef to multiply
* @param $max Max number of values allowed (usually four)
*/
public function __construct($single, $max = 4) {
$this->single = $single;
$this->max = $max;
}
public function validate($string, $config, $context) {
$string = $this->parseCDATA($string);
if ($string === '') return false;
$parts = explode(' ', $string); // parseCDATA replaced \r, \t and \n
$length = count($parts);
$final = '';
for ($i = 0, $num = 0; $i < $length && $num < $this->max; $i++) {
if (ctype_space($parts[$i])) continue;
$result = $this->single->validate($parts[$i], $config, $context);
if ($result !== false) {
$final .= $result . ' ';
$num++;
}
}
if ($final === '') return false;
return rtrim($final);
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,69 @@
<?php
/**
* Validates a number as defined by the CSS spec.
*/
class HTMLPurifier_AttrDef_CSS_Number extends HTMLPurifier_AttrDef
{
/**
* Bool indicating whether or not only positive values allowed.
*/
protected $non_negative = false;
/**
* @param $non_negative Bool indicating whether negatives are forbidden
*/
public function __construct($non_negative = false) {
$this->non_negative = $non_negative;
}
/**
* @warning Some contexts do not pass $config, $context. These
* variables should not be used without checking HTMLPurifier_Length
*/
public function validate($number, $config, $context) {
$number = $this->parseCDATA($number);
if ($number === '') return false;
if ($number === '0') return '0';
$sign = '';
switch ($number[0]) {
case '-':
if ($this->non_negative) return false;
$sign = '-';
case '+':
$number = substr($number, 1);
}
if (ctype_digit($number)) {
$number = ltrim($number, '0');
return $number ? $sign . $number : '0';
}
// Period is the only non-numeric character allowed
if (strpos($number, '.') === false) return false;
list($left, $right) = explode('.', $number, 2);
if ($left === '' && $right === '') return false;
if ($left !== '' && !ctype_digit($left)) return false;
$left = ltrim($left, '0');
$right = rtrim($right, '0');
if ($right === '') {
return $left ? $sign . $left : '0';
} elseif (!ctype_digit($right)) {
return false;
}
return $sign . $left . '.' . $right;
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,40 @@
<?php
/**
* Validates a Percentage as defined by the CSS spec.
*/
class HTMLPurifier_AttrDef_CSS_Percentage extends HTMLPurifier_AttrDef
{
/**
* Instance of HTMLPurifier_AttrDef_CSS_Number to defer number validation
*/
protected $number_def;
/**
* @param Bool indicating whether to forbid negative values
*/
public function __construct($non_negative = false) {
$this->number_def = new HTMLPurifier_AttrDef_CSS_Number($non_negative);
}
public function validate($string, $config, $context) {
$string = $this->parseCDATA($string);
if ($string === '') return false;
$length = strlen($string);
if ($length === 1) return false;
if ($string[$length - 1] !== '%') return false;
$number = substr($string, 0, $length - 1);
$number = $this->number_def->validate($number, $config, $context);
if ($number === false) return false;
return "$number%";
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,38 @@
<?php
/**
* Validates the value for the CSS property text-decoration
* @note This class could be generalized into a version that acts sort of
* like Enum except you can compound the allowed values.
*/
class HTMLPurifier_AttrDef_CSS_TextDecoration extends HTMLPurifier_AttrDef
{
public function validate($string, $config, $context) {
static $allowed_values = array(
'line-through' => true,
'overline' => true,
'underline' => true,
);
$string = strtolower($this->parseCDATA($string));
if ($string === 'none') return $string;
$parts = explode(' ', $string);
$final = '';
foreach ($parts as $part) {
if (isset($allowed_values[$part])) {
$final .= $part . ' ';
}
}
$final = rtrim($final);
if ($final === '') return false;
return $final;
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,61 @@
<?php
/**
* Validates a URI in CSS syntax, which uses url('http://example.com')
* @note While theoretically speaking a URI in a CSS document could
* be non-embedded, as of CSS2 there is no such usage so we're
* generalizing it. This may need to be changed in the future.
* @warning Since HTMLPurifier_AttrDef_CSS blindly uses semicolons as
* the separator, you cannot put a literal semicolon in
* in the URI. Try percent encoding it, in that case.
*/
class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI
{
public function __construct() {
parent::__construct(true); // always embedded
}
public function validate($uri_string, $config, $context) {
// parse the URI out of the string and then pass it onto
// the parent object
$uri_string = $this->parseCDATA($uri_string);
if (strpos($uri_string, 'url(') !== 0) return false;
$uri_string = substr($uri_string, 4);
$new_length = strlen($uri_string) - 1;
if ($uri_string[$new_length] != ')') return false;
$uri = trim(substr($uri_string, 0, $new_length));
if (!empty($uri) && ($uri[0] == "'" || $uri[0] == '"')) {
$quote = $uri[0];
$new_length = strlen($uri) - 1;
if ($uri[$new_length] !== $quote) return false;
$uri = substr($uri, 1, $new_length - 1);
}
$uri = $this->expandCSSEscape($uri);
$result = parent::validate($uri, $config, $context);
if ($result === false) return false;
// extra sanity check; should have been done by URI
$result = str_replace(array('"', "\\", "\n", "\x0c", "\r"), "", $result);
// suspicious characters are ()'; we're going to percent encode
// them for safety.
$result = str_replace(array('(', ')', "'"), array('%28', '%29', '%27'), $result);
// there's an extra bug where ampersands lose their escaping on
// an innerHTML cycle, so a very unlucky query parameter could
// then change the meaning of the URL. Unfortunately, there's
// not much we can do about that...
return "url(\"$result\")";
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,28 @@
<?php
/**
* Dummy AttrDef that mimics another AttrDef, BUT it generates clones
* with make.
*/
class HTMLPurifier_AttrDef_Clone extends HTMLPurifier_AttrDef
{
/**
* What we're cloning
*/
protected $clone;
public function __construct($clone) {
$this->clone = $clone;
}
public function validate($v, $config, $context) {
return $this->clone->validate($v, $config, $context);
}
public function make($string) {
return clone $this->clone;
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,65 @@
<?php
// Enum = Enumerated
/**
* Validates a keyword against a list of valid values.
* @warning The case-insensitive compare of this function uses PHP's
* built-in strtolower and ctype_lower functions, which may
* cause problems with international comparisons
*/
class HTMLPurifier_AttrDef_Enum extends HTMLPurifier_AttrDef
{
/**
* Lookup table of valid values.
* @todo Make protected
*/
public $valid_values = array();
/**
* Bool indicating whether or not enumeration is case sensitive.
* @note In general this is always case insensitive.
*/
protected $case_sensitive = false; // values according to W3C spec
/**
* @param $valid_values List of valid values
* @param $case_sensitive Bool indicating whether or not case sensitive
*/
public function __construct(
$valid_values = array(), $case_sensitive = false
) {
$this->valid_values = array_flip($valid_values);
$this->case_sensitive = $case_sensitive;
}
public function validate($string, $config, $context) {
$string = trim($string);
if (!$this->case_sensitive) {
// we may want to do full case-insensitive libraries
$string = ctype_lower($string) ? $string : strtolower($string);
}
$result = isset($this->valid_values[$string]);
return $result ? $string : false;
}
/**
* @param $string In form of comma-delimited list of case-insensitive
* valid values. Example: "foo,bar,baz". Prepend "s:" to make
* case sensitive
*/
public function make($string) {
if (strlen($string) > 2 && $string[0] == 's' && $string[1] == ':') {
$string = substr($string, 2);
$sensitive = true;
} else {
$sensitive = false;
}
$values = explode(',', $string);
return new HTMLPurifier_AttrDef_Enum($values, $sensitive);
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,28 @@
<?php
/**
* Validates a boolean attribute
*/
class HTMLPurifier_AttrDef_HTML_Bool extends HTMLPurifier_AttrDef
{
protected $name;
public $minimized = true;
public function __construct($name = false) {$this->name = $name;}
public function validate($string, $config, $context) {
if (empty($string)) return false;
return $this->name;
}
/**
* @param $string Name of attribute
*/
public function make($string) {
return new HTMLPurifier_AttrDef_HTML_Bool($string);
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,34 @@
<?php
/**
* Implements special behavior for class attribute (normally NMTOKENS)
*/
class HTMLPurifier_AttrDef_HTML_Class extends HTMLPurifier_AttrDef_HTML_Nmtokens
{
protected function split($string, $config, $context) {
// really, this twiddle should be lazy loaded
$name = $config->getDefinition('HTML')->doctype->name;
if ($name == "XHTML 1.1" || $name == "XHTML 2.0") {
return parent::split($string, $config, $context);
} else {
return preg_split('/\s+/', $string);
}
}
protected function filter($tokens, $config, $context) {
$allowed = $config->get('Attr.AllowedClasses');
$forbidden = $config->get('Attr.ForbiddenClasses');
$ret = array();
foreach ($tokens as $token) {
if (
($allowed === null || isset($allowed[$token])) &&
!isset($forbidden[$token]) &&
// We need this O(n) check because of PHP's array
// implementation that casts -0 to 0.
!in_array($token, $ret, true)
) {
$ret[] = $token;
}
}
return $ret;
}
}

View File

@ -0,0 +1,32 @@
<?php
/**
* Validates a color according to the HTML spec.
*/
class HTMLPurifier_AttrDef_HTML_Color extends HTMLPurifier_AttrDef
{
public function validate($string, $config, $context) {
static $colors = null;
if ($colors === null) $colors = $config->get('Core.ColorKeywords');
$string = trim($string);
if (empty($string)) return false;
if (isset($colors[strtolower($string)])) return $colors[$string];
if ($string[0] === '#') $hex = substr($string, 1);
else $hex = $string;
$length = strlen($hex);
if ($length !== 3 && $length !== 6) return false;
if (!ctype_xdigit($hex)) return false;
if ($length === 3) $hex = $hex[0].$hex[0].$hex[1].$hex[1].$hex[2].$hex[2];
return "#$hex";
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,21 @@
<?php
/**
* Special-case enum attribute definition that lazy loads allowed frame targets
*/
class HTMLPurifier_AttrDef_HTML_FrameTarget extends HTMLPurifier_AttrDef_Enum
{
public $valid_values = false; // uninitialized value
protected $case_sensitive = false;
public function __construct() {}
public function validate($string, $config, $context) {
if ($this->valid_values === false) $this->valid_values = $config->get('Attr.AllowedFrameTargets');
return parent::validate($string, $config, $context);
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,80 @@
<?php
/**
* Validates the HTML attribute ID.
* @warning Even though this is the id processor, it
* will ignore the directive Attr:IDBlacklist, since it will only
* go according to the ID accumulator. Since the accumulator is
* automatically generated, it will have already absorbed the
* blacklist. If you're hacking around, make sure you use load()!
*/
class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
{
// selector is NOT a valid thing to use for IDREFs, because IDREFs
// *must* target IDs that exist, whereas selector #ids do not.
/**
* Determines whether or not we're validating an ID in a CSS
* selector context.
*/
protected $selector;
public function __construct($selector = false) {
$this->selector = $selector;
}
public function validate($id, $config, $context) {
if (!$this->selector && !$config->get('Attr.EnableID')) return false;
$id = trim($id); // trim it first
if ($id === '') return false;
$prefix = $config->get('Attr.IDPrefix');
if ($prefix !== '') {
$prefix .= $config->get('Attr.IDPrefixLocal');
// prevent re-appending the prefix
if (strpos($id, $prefix) !== 0) $id = $prefix . $id;
} elseif ($config->get('Attr.IDPrefixLocal') !== '') {
trigger_error('%Attr.IDPrefixLocal cannot be used unless '.
'%Attr.IDPrefix is set', E_USER_WARNING);
}
if (!$this->selector) {
$id_accumulator =& $context->get('IDAccumulator');
if (isset($id_accumulator->ids[$id])) return false;
}
// we purposely avoid using regex, hopefully this is faster
if (ctype_alpha($id)) {
$result = true;
} else {
if (!ctype_alpha(@$id[0])) return false;
$trim = trim( // primitive style of regexps, I suppose
$id,
'A..Za..z0..9:-._'
);
$result = ($trim === '');
}
$regexp = $config->get('Attr.IDBlacklistRegexp');
if ($regexp && preg_match($regexp, $id)) {
return false;
}
if (!$this->selector && $result) $id_accumulator->add($id);
// if no change was made to the ID, return the result
// else, return the new id if stripping whitespace made it
// valid, or return false.
return $result ? $id : false;
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,41 @@
<?php
/**
* Validates the HTML type length (not to be confused with CSS's length).
*
* This accepts integer pixels or percentages as lengths for certain
* HTML attributes.
*/
class HTMLPurifier_AttrDef_HTML_Length extends HTMLPurifier_AttrDef_HTML_Pixels
{
public function validate($string, $config, $context) {
$string = trim($string);
if ($string === '') return false;
$parent_result = parent::validate($string, $config, $context);
if ($parent_result !== false) return $parent_result;
$length = strlen($string);
$last_char = $string[$length - 1];
if ($last_char !== '%') return false;
$points = substr($string, 0, $length - 1);
if (!is_numeric($points)) return false;
$points = (int) $points;
if ($points < 0) return '0%';
if ($points > 100) return '100%';
return ((string) $points) . '%';
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,53 @@
<?php
/**
* Validates a rel/rev link attribute against a directive of allowed values
* @note We cannot use Enum because link types allow multiple
* values.
* @note Assumes link types are ASCII text
*/
class HTMLPurifier_AttrDef_HTML_LinkTypes extends HTMLPurifier_AttrDef
{
/** Name config attribute to pull. */
protected $name;
public function __construct($name) {
$configLookup = array(
'rel' => 'AllowedRel',
'rev' => 'AllowedRev'
);
if (!isset($configLookup[$name])) {
trigger_error('Unrecognized attribute name for link '.
'relationship.', E_USER_ERROR);
return;
}
$this->name = $configLookup[$name];
}
public function validate($string, $config, $context) {
$allowed = $config->get('Attr.' . $this->name);
if (empty($allowed)) return false;
$string = $this->parseCDATA($string);
$parts = explode(' ', $string);
// lookup to prevent duplicates
$ret_lookup = array();
foreach ($parts as $part) {
$part = strtolower(trim($part));
if (!isset($allowed[$part])) continue;
$ret_lookup[$part] = true;
}
if (empty($ret_lookup)) return false;
$string = implode(' ', array_keys($ret_lookup));
return $string;
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,41 @@
<?php
/**
* Validates a MultiLength as defined by the HTML spec.
*
* A multilength is either a integer (pixel count), a percentage, or
* a relative number.
*/
class HTMLPurifier_AttrDef_HTML_MultiLength extends HTMLPurifier_AttrDef_HTML_Length
{
public function validate($string, $config, $context) {
$string = trim($string);
if ($string === '') return false;
$parent_result = parent::validate($string, $config, $context);
if ($parent_result !== false) return $parent_result;
$length = strlen($string);
$last_char = $string[$length - 1];
if ($last_char !== '*') return false;
$int = substr($string, 0, $length - 1);
if ($int == '') return '*';
if (!is_numeric($int)) return false;
$int = (int) $int;
if ($int < 0) return false;
if ($int == 0) return '0';
if ($int == 1) return '*';
return ((string) $int) . '*';
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,52 @@
<?php
/**
* Validates contents based on NMTOKENS attribute type.
*/
class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef
{
public function validate($string, $config, $context) {
$string = trim($string);
// early abort: '' and '0' (strings that convert to false) are invalid
if (!$string) return false;
$tokens = $this->split($string, $config, $context);
$tokens = $this->filter($tokens, $config, $context);
if (empty($tokens)) return false;
return implode(' ', $tokens);
}
/**
* Splits a space separated list of tokens into its constituent parts.
*/
protected function split($string, $config, $context) {
// OPTIMIZABLE!
// do the preg_match, capture all subpatterns for reformulation
// we don't support U+00A1 and up codepoints or
// escaping because I don't know how to do that with regexps
// and plus it would complicate optimization efforts (you never
// see that anyway).
$pattern = '/(?:(?<=\s)|\A)'. // look behind for space or string start
'((?:--|-?[A-Za-z_])[A-Za-z_\-0-9]*)'.
'(?:(?=\s)|\z)/'; // look ahead for space or string end
preg_match_all($pattern, $string, $matches);
return $matches[1];
}
/**
* Template method for removing certain tokens based on arbitrary criteria.
* @note If we wanted to be really functional, we'd do an array_filter
* with a callback. But... we're not.
*/
protected function filter($tokens, $config, $context) {
return $tokens;
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,48 @@
<?php
/**
* Validates an integer representation of pixels according to the HTML spec.
*/
class HTMLPurifier_AttrDef_HTML_Pixels extends HTMLPurifier_AttrDef
{
protected $max;
public function __construct($max = null) {
$this->max = $max;
}
public function validate($string, $config, $context) {
$string = trim($string);
if ($string === '0') return $string;
if ($string === '') return false;
$length = strlen($string);
if (substr($string, $length - 2) == 'px') {
$string = substr($string, 0, $length - 2);
}
if (!is_numeric($string)) return false;
$int = (int) $string;
if ($int < 0) return '0';
// upper-bound value, extremely high values can
// crash operating systems, see <http://ha.ckers.org/imagecrash.html>
// WARNING, above link WILL crash you if you're using Windows
if ($this->max !== null && $int > $this->max) return (string) $this->max;
return (string) $int;
}
public function make($string) {
if ($string === '') $max = null;
else $max = (int) $string;
$class = get_class($this);
return new $class($max);
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,73 @@
<?php
/**
* Validates an integer.
* @note While this class was modeled off the CSS definition, no currently
* allowed CSS uses this type. The properties that do are: widows,
* orphans, z-index, counter-increment, counter-reset. Some of the
* HTML attributes, however, find use for a non-negative version of this.
*/
class HTMLPurifier_AttrDef_Integer extends HTMLPurifier_AttrDef
{
/**
* Bool indicating whether or not negative values are allowed
*/
protected $negative = true;
/**
* Bool indicating whether or not zero is allowed
*/
protected $zero = true;
/**
* Bool indicating whether or not positive values are allowed
*/
protected $positive = true;
/**
* @param $negative Bool indicating whether or not negative values are allowed
* @param $zero Bool indicating whether or not zero is allowed
* @param $positive Bool indicating whether or not positive values are allowed
*/
public function __construct(
$negative = true, $zero = true, $positive = true
) {
$this->negative = $negative;
$this->zero = $zero;
$this->positive = $positive;
}
public function validate($integer, $config, $context) {
$integer = $this->parseCDATA($integer);
if ($integer === '') return false;
// we could possibly simply typecast it to integer, but there are
// certain fringe cases that must not return an integer.
// clip leading sign
if ( $this->negative && $integer[0] === '-' ) {
$digits = substr($integer, 1);
if ($digits === '0') $integer = '0'; // rm minus sign for zero
} elseif( $this->positive && $integer[0] === '+' ) {
$digits = $integer = substr($integer, 1); // rm unnecessary plus
} else {
$digits = $integer;
}
// test if it's numeric
if (!ctype_digit($digits)) return false;
// perform scope tests
if (!$this->zero && $integer == 0) return false;
if (!$this->positive && $integer > 0) return false;
if (!$this->negative && $integer < 0) return false;
return $integer;
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,73 @@
<?php
/**
* Validates the HTML attribute lang, effectively a language code.
* @note Built according to RFC 3066, which obsoleted RFC 1766
*/
class HTMLPurifier_AttrDef_Lang extends HTMLPurifier_AttrDef
{
public function validate($string, $config, $context) {
$string = trim($string);
if (!$string) return false;
$subtags = explode('-', $string);
$num_subtags = count($subtags);
if ($num_subtags == 0) return false; // sanity check
// process primary subtag : $subtags[0]
$length = strlen($subtags[0]);
switch ($length) {
case 0:
return false;
case 1:
if (! ($subtags[0] == 'x' || $subtags[0] == 'i') ) {
return false;
}
break;
case 2:
case 3:
if (! ctype_alpha($subtags[0]) ) {
return false;
} elseif (! ctype_lower($subtags[0]) ) {
$subtags[0] = strtolower($subtags[0]);
}
break;
default:
return false;
}
$new_string = $subtags[0];
if ($num_subtags == 1) return $new_string;
// process second subtag : $subtags[1]
$length = strlen($subtags[1]);
if ($length == 0 || ($length == 1 && $subtags[1] != 'x') || $length > 8 || !ctype_alnum($subtags[1])) {
return $new_string;
}
if (!ctype_lower($subtags[1])) $subtags[1] = strtolower($subtags[1]);
$new_string .= '-' . $subtags[1];
if ($num_subtags == 2) return $new_string;
// process all other subtags, index 2 and up
for ($i = 2; $i < $num_subtags; $i++) {
$length = strlen($subtags[$i]);
if ($length == 0 || $length > 8 || !ctype_alnum($subtags[$i])) {
return $new_string;
}
if (!ctype_lower($subtags[$i])) {
$subtags[$i] = strtolower($subtags[$i]);
}
$new_string .= '-' . $subtags[$i];
}
return $new_string;
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,34 @@
<?php
/**
* Decorator that, depending on a token, switches between two definitions.
*/
class HTMLPurifier_AttrDef_Switch
{
protected $tag;
protected $withTag, $withoutTag;
/**
* @param string $tag Tag name to switch upon
* @param HTMLPurifier_AttrDef $with_tag Call if token matches tag
* @param HTMLPurifier_AttrDef $without_tag Call if token doesn't match, or there is no token
*/
public function __construct($tag, $with_tag, $without_tag) {
$this->tag = $tag;
$this->withTag = $with_tag;
$this->withoutTag = $without_tag;
}
public function validate($string, $config, $context) {
$token = $context->get('CurrentToken', true);
if (!$token || $token->name !== $this->tag) {
return $this->withoutTag->validate($string, $config, $context);
} else {
return $this->withTag->validate($string, $config, $context);
}
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,15 @@
<?php
/**
* Validates arbitrary text according to the HTML spec.
*/
class HTMLPurifier_AttrDef_Text extends HTMLPurifier_AttrDef
{
public function validate($string, $config, $context) {
return $this->parseCDATA($string);
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,77 @@
<?php
/**
* Validates a URI as defined by RFC 3986.
* @note Scheme-specific mechanics deferred to HTMLPurifier_URIScheme
*/
class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
{
protected $parser;
protected $embedsResource;
/**
* @param $embeds_resource_resource Does the URI here result in an extra HTTP request?
*/
public function __construct($embeds_resource = false) {
$this->parser = new HTMLPurifier_URIParser();
$this->embedsResource = (bool) $embeds_resource;
}
public function make($string) {
$embeds = ($string === 'embedded');
return new HTMLPurifier_AttrDef_URI($embeds);
}
public function validate($uri, $config, $context) {
if ($config->get('URI.Disable')) return false;
$uri = $this->parseCDATA($uri);
// parse the URI
$uri = $this->parser->parse($uri);
if ($uri === false) return false;
// add embedded flag to context for validators
$context->register('EmbeddedURI', $this->embedsResource);
$ok = false;
do {
// generic validation
$result = $uri->validate($config, $context);
if (!$result) break;
// chained filtering
$uri_def = $config->getDefinition('URI');
$result = $uri_def->filter($uri, $config, $context);
if (!$result) break;
// scheme-specific validation
$scheme_obj = $uri->getSchemeObj($config, $context);
if (!$scheme_obj) break;
if ($this->embedsResource && !$scheme_obj->browsable) break;
$result = $scheme_obj->validate($uri, $config, $context);
if (!$result) break;
// Post chained filtering
$result = $uri_def->postFilter($uri, $config, $context);
if (!$result) break;
// survived gauntlet
$ok = true;
} while (false);
$context->destroy('EmbeddedURI');
if (!$ok) return false;
// back to string
return $uri->toString();
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,17 @@
<?php
abstract class HTMLPurifier_AttrDef_URI_Email extends HTMLPurifier_AttrDef
{
/**
* Unpacks a mailbox into its display-name and address
*/
function unpack($string) {
// needs to be implemented
}
}
// sub-implementations
// vim: et sw=4 sts=4

View File

@ -0,0 +1,21 @@
<?php
/**
* Primitive email validation class based on the regexp found at
* http://www.regular-expressions.info/email.html
*/
class HTMLPurifier_AttrDef_URI_Email_SimpleCheck extends HTMLPurifier_AttrDef_URI_Email
{
public function validate($string, $config, $context) {
// no support for named mailboxes i.e. "Bob <bob@example.com>"
// that needs more percent encoding to be done
if ($string == '') return false;
$string = trim($string);
$result = preg_match('/^[A-Z0-9._%-]+@[A-Z0-9.-]+\.[A-Z]{2,4}$/i', $string);
return $result ? $string : false;
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,101 @@
<?php
/**
* Validates a host according to the IPv4, IPv6 and DNS (future) specifications.
*/
class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
{
/**
* Instance of HTMLPurifier_AttrDef_URI_IPv4 sub-validator
*/
protected $ipv4;
/**
* Instance of HTMLPurifier_AttrDef_URI_IPv6 sub-validator
*/
protected $ipv6;
public function __construct() {
$this->ipv4 = new HTMLPurifier_AttrDef_URI_IPv4();
$this->ipv6 = new HTMLPurifier_AttrDef_URI_IPv6();
}
public function validate($string, $config, $context) {
$length = strlen($string);
// empty hostname is OK; it's usually semantically equivalent:
// the default host as defined by a URI scheme is used:
//
// If the URI scheme defines a default for host, then that
// default applies when the host subcomponent is undefined
// or when the registered name is empty (zero length).
if ($string === '') return '';
if ($length > 1 && $string[0] === '[' && $string[$length-1] === ']') {
//IPv6
$ip = substr($string, 1, $length - 2);
$valid = $this->ipv6->validate($ip, $config, $context);
if ($valid === false) return false;
return '['. $valid . ']';
}
// need to do checks on unusual encodings too
$ipv4 = $this->ipv4->validate($string, $config, $context);
if ($ipv4 !== false) return $ipv4;
// A regular domain name.
// This doesn't match I18N domain names, but we don't have proper IRI support,
// so force users to insert Punycode.
// The productions describing this are:
$a = '[a-z]'; // alpha
$an = '[a-z0-9]'; // alphanum
$and = '[a-z0-9-]'; // alphanum | "-"
// domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
$domainlabel = "$an($and*$an)?";
// toplabel = alpha | alpha *( alphanum | "-" ) alphanum
$toplabel = "$a($and*$an)?";
// hostname = *( domainlabel "." ) toplabel [ "." ]
if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) {
return $string;
}
// If we have Net_IDNA2 support, we can support IRIs by
// punycoding them. (This is the most portable thing to do,
// since otherwise we have to assume browsers support
if ($config->get('Core.EnableIDNA')) {
$idna = new Net_IDNA2(array('encoding' => 'utf8', 'overlong' => false, 'strict' => true));
// we need to encode each period separately
$parts = explode('.', $string);
try {
$new_parts = array();
foreach ($parts as $part) {
$encodable = false;
for ($i = 0, $c = strlen($part); $i < $c; $i++) {
if (ord($part[$i]) > 0x7a) {
$encodable = true;
break;
}
}
if (!$encodable) {
$new_parts[] = $part;
} else {
$new_parts[] = $idna->encode($part);
}
}
$string = implode('.', $new_parts);
if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) {
return $string;
}
} catch (Exception $e) {
// XXX error reporting
}
}
return false;
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,39 @@
<?php
/**
* Validates an IPv4 address
* @author Feyd @ forums.devnetwork.net (public domain)
*/
class HTMLPurifier_AttrDef_URI_IPv4 extends HTMLPurifier_AttrDef
{
/**
* IPv4 regex, protected so that IPv6 can reuse it
*/
protected $ip4;
public function validate($aIP, $config, $context) {
if (!$this->ip4) $this->_loadRegex();
if (preg_match('#^' . $this->ip4 . '$#s', $aIP))
{
return $aIP;
}
return false;
}
/**
* Lazy load function to prevent regex from being stuffed in
* cache.
*/
protected function _loadRegex() {
$oct = '(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])'; // 0-255
$this->ip4 = "(?:{$oct}\\.{$oct}\\.{$oct}\\.{$oct})";
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,99 @@
<?php
/**
* Validates an IPv6 address.
* @author Feyd @ forums.devnetwork.net (public domain)
* @note This function requires brackets to have been removed from address
* in URI.
*/
class HTMLPurifier_AttrDef_URI_IPv6 extends HTMLPurifier_AttrDef_URI_IPv4
{
public function validate($aIP, $config, $context) {
if (!$this->ip4) $this->_loadRegex();
$original = $aIP;
$hex = '[0-9a-fA-F]';
$blk = '(?:' . $hex . '{1,4})';
$pre = '(?:/(?:12[0-8]|1[0-1][0-9]|[1-9][0-9]|[0-9]))'; // /0 - /128
// prefix check
if (strpos($aIP, '/') !== false)
{
if (preg_match('#' . $pre . '$#s', $aIP, $find))
{
$aIP = substr($aIP, 0, 0-strlen($find[0]));
unset($find);
}
else
{
return false;
}
}
// IPv4-compatiblity check
if (preg_match('#(?<=:'.')' . $this->ip4 . '$#s', $aIP, $find))
{
$aIP = substr($aIP, 0, 0-strlen($find[0]));
$ip = explode('.', $find[0]);
$ip = array_map('dechex', $ip);
$aIP .= $ip[0] . $ip[1] . ':' . $ip[2] . $ip[3];
unset($find, $ip);
}
// compression check
$aIP = explode('::', $aIP);
$c = count($aIP);
if ($c > 2)
{
return false;
}
elseif ($c == 2)
{
list($first, $second) = $aIP;
$first = explode(':', $first);
$second = explode(':', $second);
if (count($first) + count($second) > 8)
{
return false;
}
while(count($first) < 8)
{
array_push($first, '0');
}
array_splice($first, 8 - count($second), 8, $second);
$aIP = $first;
unset($first,$second);
}
else
{
$aIP = explode(':', $aIP[0]);
}
$c = count($aIP);
if ($c != 8)
{
return false;
}
// All the pieces should be 16-bit hex strings. Are they?
foreach ($aIP as $piece)
{
if (!preg_match('#^[0-9a-fA-F]{4}$#s', sprintf('%04s', $piece)))
{
return false;
}
}
return $original;
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,56 @@
<?php
/**
* Processes an entire attribute array for corrections needing multiple values.
*
* Occasionally, a certain attribute will need to be removed and popped onto
* another value. Instead of creating a complex return syntax for
* HTMLPurifier_AttrDef, we just pass the whole attribute array to a
* specialized object and have that do the special work. That is the
* family of HTMLPurifier_AttrTransform.
*
* An attribute transformation can be assigned to run before or after
* HTMLPurifier_AttrDef validation. See HTMLPurifier_HTMLDefinition for
* more details.
*/
abstract class HTMLPurifier_AttrTransform
{
/**
* Abstract: makes changes to the attributes dependent on multiple values.
*
* @param $attr Assoc array of attributes, usually from
* HTMLPurifier_Token_Tag::$attr
* @param $config Mandatory HTMLPurifier_Config object.
* @param $context Mandatory HTMLPurifier_Context object
* @returns Processed attribute array.
*/
abstract public function transform($attr, $config, $context);
/**
* Prepends CSS properties to the style attribute, creating the
* attribute if it doesn't exist.
* @param $attr Attribute array to process (passed by reference)
* @param $css CSS to prepend
*/
public function prependCSS(&$attr, $css) {
$attr['style'] = isset($attr['style']) ? $attr['style'] : '';
$attr['style'] = $css . $attr['style'];
}
/**
* Retrieves and removes an attribute
* @param $attr Attribute array to process (passed by reference)
* @param $key Key of attribute to confiscate
*/
public function confiscateAttr(&$attr, $key) {
if (!isset($attr[$key])) return null;
$value = $attr[$key];
unset($attr[$key]);
return $value;
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,23 @@
<?php
/**
* Pre-transform that changes proprietary background attribute to CSS.
*/
class HTMLPurifier_AttrTransform_Background extends HTMLPurifier_AttrTransform {
public function transform($attr, $config, $context) {
if (!isset($attr['background'])) return $attr;
$background = $this->confiscateAttr($attr, 'background');
// some validation should happen here
$this->prependCSS($attr, "background-image:url($background);");
return $attr;
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,19 @@
<?php
// this MUST be placed in post, as it assumes that any value in dir is valid
/**
* Post-trasnform that ensures that bdo tags have the dir attribute set.
*/
class HTMLPurifier_AttrTransform_BdoDir extends HTMLPurifier_AttrTransform
{
public function transform($attr, $config, $context) {
if (isset($attr['dir'])) return $attr;
$attr['dir'] = $config->get('Attr.DefaultTextDir');
return $attr;
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,23 @@
<?php
/**
* Pre-transform that changes deprecated bgcolor attribute to CSS.
*/
class HTMLPurifier_AttrTransform_BgColor extends HTMLPurifier_AttrTransform {
public function transform($attr, $config, $context) {
if (!isset($attr['bgcolor'])) return $attr;
$bgcolor = $this->confiscateAttr($attr, 'bgcolor');
// some validation should happen here
$this->prependCSS($attr, "background-color:$bgcolor;");
return $attr;
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,36 @@
<?php
/**
* Pre-transform that changes converts a boolean attribute to fixed CSS
*/
class HTMLPurifier_AttrTransform_BoolToCSS extends HTMLPurifier_AttrTransform {
/**
* Name of boolean attribute that is trigger
*/
protected $attr;
/**
* CSS declarations to add to style, needs trailing semicolon
*/
protected $css;
/**
* @param $attr string attribute name to convert from
* @param $css string CSS declarations to add to style (needs semicolon)
*/
public function __construct($attr, $css) {
$this->attr = $attr;
$this->css = $css;
}
public function transform($attr, $config, $context) {
if (!isset($attr[$this->attr])) return $attr;
unset($attr[$this->attr]);
$this->prependCSS($attr, $this->css);
return $attr;
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,18 @@
<?php
/**
* Pre-transform that changes deprecated border attribute to CSS.
*/
class HTMLPurifier_AttrTransform_Border extends HTMLPurifier_AttrTransform {
public function transform($attr, $config, $context) {
if (!isset($attr['border'])) return $attr;
$border_width = $this->confiscateAttr($attr, 'border');
// some validation should happen here
$this->prependCSS($attr, "border:{$border_width}px solid;");
return $attr;
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,58 @@
<?php
/**
* Generic pre-transform that converts an attribute with a fixed number of
* values (enumerated) to CSS.
*/
class HTMLPurifier_AttrTransform_EnumToCSS extends HTMLPurifier_AttrTransform {
/**
* Name of attribute to transform from
*/
protected $attr;
/**
* Lookup array of attribute values to CSS
*/
protected $enumToCSS = array();
/**
* Case sensitivity of the matching
* @warning Currently can only be guaranteed to work with ASCII
* values.
*/
protected $caseSensitive = false;
/**
* @param $attr String attribute name to transform from
* @param $enumToCSS Lookup array of attribute values to CSS
* @param $case_sensitive Boolean case sensitivity indicator, default false
*/
public function __construct($attr, $enum_to_css, $case_sensitive = false) {
$this->attr = $attr;
$this->enumToCSS = $enum_to_css;
$this->caseSensitive = (bool) $case_sensitive;
}
public function transform($attr, $config, $context) {
if (!isset($attr[$this->attr])) return $attr;
$value = trim($attr[$this->attr]);
unset($attr[$this->attr]);
if (!$this->caseSensitive) $value = strtolower($value);
if (!isset($this->enumToCSS[$value])) {
return $attr;
}
$this->prependCSS($attr, $this->enumToCSS[$value]);
return $attr;
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,43 @@
<?php
// must be called POST validation
/**
* Transform that supplies default values for the src and alt attributes
* in img tags, as well as prevents the img tag from being removed
* because of a missing alt tag. This needs to be registered as both
* a pre and post attribute transform.
*/
class HTMLPurifier_AttrTransform_ImgRequired extends HTMLPurifier_AttrTransform
{
public function transform($attr, $config, $context) {
$src = true;
if (!isset($attr['src'])) {
if ($config->get('Core.RemoveInvalidImg')) return $attr;
$attr['src'] = $config->get('Attr.DefaultInvalidImage');
$src = false;
}
if (!isset($attr['alt'])) {
if ($src) {
$alt = $config->get('Attr.DefaultImageAlt');
if ($alt === null) {
// truncate if the alt is too long
$attr['alt'] = substr(basename($attr['src']),0,40);
} else {
$attr['alt'] = $alt;
}
} else {
$attr['alt'] = $config->get('Attr.DefaultInvalidImageAlt');
}
}
return $attr;
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,44 @@
<?php
/**
* Pre-transform that changes deprecated hspace and vspace attributes to CSS
*/
class HTMLPurifier_AttrTransform_ImgSpace extends HTMLPurifier_AttrTransform {
protected $attr;
protected $css = array(
'hspace' => array('left', 'right'),
'vspace' => array('top', 'bottom')
);
public function __construct($attr) {
$this->attr = $attr;
if (!isset($this->css[$attr])) {
trigger_error(htmlspecialchars($attr) . ' is not valid space attribute');
}
}
public function transform($attr, $config, $context) {
if (!isset($attr[$this->attr])) return $attr;
$width = $this->confiscateAttr($attr, $this->attr);
// some validation could happen here
if (!isset($this->css[$this->attr])) return $attr;
$style = '';
foreach ($this->css[$this->attr] as $suffix) {
$property = "margin-$suffix";
$style .= "$property:{$width}px;";
}
$this->prependCSS($attr, $style);
return $attr;
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,40 @@
<?php
/**
* Performs miscellaneous cross attribute validation and filtering for
* input elements. This is meant to be a post-transform.
*/
class HTMLPurifier_AttrTransform_Input extends HTMLPurifier_AttrTransform {
protected $pixels;
public function __construct() {
$this->pixels = new HTMLPurifier_AttrDef_HTML_Pixels();
}
public function transform($attr, $config, $context) {
if (!isset($attr['type'])) $t = 'text';
else $t = strtolower($attr['type']);
if (isset($attr['checked']) && $t !== 'radio' && $t !== 'checkbox') {
unset($attr['checked']);
}
if (isset($attr['maxlength']) && $t !== 'text' && $t !== 'password') {
unset($attr['maxlength']);
}
if (isset($attr['size']) && $t !== 'text' && $t !== 'password') {
$result = $this->pixels->validate($attr['size'], $config, $context);
if ($result === false) unset($attr['size']);
else $attr['size'] = $result;
}
if (isset($attr['src']) && $t !== 'image') {
unset($attr['src']);
}
if (!isset($attr['value']) && ($t === 'radio' || $t === 'checkbox')) {
$attr['value'] = '';
}
return $attr;
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,28 @@
<?php
/**
* Post-transform that copies lang's value to xml:lang (and vice-versa)
* @note Theoretically speaking, this could be a pre-transform, but putting
* post is more efficient.
*/
class HTMLPurifier_AttrTransform_Lang extends HTMLPurifier_AttrTransform
{
public function transform($attr, $config, $context) {
$lang = isset($attr['lang']) ? $attr['lang'] : false;
$xml_lang = isset($attr['xml:lang']) ? $attr['xml:lang'] : false;
if ($lang !== false && $xml_lang === false) {
$attr['xml:lang'] = $lang;
} elseif ($xml_lang !== false) {
$attr['lang'] = $xml_lang;
}
return $attr;
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,27 @@
<?php
/**
* Class for handling width/height length attribute transformations to CSS
*/
class HTMLPurifier_AttrTransform_Length extends HTMLPurifier_AttrTransform
{
protected $name;
protected $cssName;
public function __construct($name, $css_name = null) {
$this->name = $name;
$this->cssName = $css_name ? $css_name : $name;
}
public function transform($attr, $config, $context) {
if (!isset($attr[$this->name])) return $attr;
$length = $this->confiscateAttr($attr, $this->name);
if(ctype_digit($length)) $length .= 'px';
$this->prependCSS($attr, $this->cssName . ":$length;");
return $attr;
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,21 @@
<?php
/**
* Pre-transform that changes deprecated name attribute to ID if necessary
*/
class HTMLPurifier_AttrTransform_Name extends HTMLPurifier_AttrTransform
{
public function transform($attr, $config, $context) {
// Abort early if we're using relaxed definition of name
if ($config->get('HTML.Attr.Name.UseCDATA')) return $attr;
if (!isset($attr['name'])) return $attr;
$id = $this->confiscateAttr($attr, 'name');
if ( isset($attr['id'])) return $attr;
$attr['id'] = $id;
return $attr;
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,27 @@
<?php
/**
* Post-transform that performs validation to the name attribute; if
* it is present with an equivalent id attribute, it is passed through;
* otherwise validation is performed.
*/
class HTMLPurifier_AttrTransform_NameSync extends HTMLPurifier_AttrTransform
{
public function __construct() {
$this->idDef = new HTMLPurifier_AttrDef_HTML_ID();
}
public function transform($attr, $config, $context) {
if (!isset($attr['name'])) return $attr;
$name = $attr['name'];
if (isset($attr['id']) && $attr['id'] === $name) return $attr;
$result = $this->idDef->validate($name, $config, $context);
if ($result === false) unset($attr['name']);
else $attr['name'] = $result;
return $attr;
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,45 @@
<?php
// must be called POST validation
/**
* Adds rel="nofollow" to all outbound links. This transform is
* only attached if Attr.Nofollow is TRUE.
*/
class HTMLPurifier_AttrTransform_Nofollow extends HTMLPurifier_AttrTransform
{
private $parser;
public function __construct() {
$this->parser = new HTMLPurifier_URIParser();
}
public function transform($attr, $config, $context) {
if (!isset($attr['href'])) {
return $attr;
}
// XXX Kind of inefficient
$url = $this->parser->parse($attr['href']);
$scheme = $url->getSchemeObj($config, $context);
if ($scheme->browsable && !$url->isLocal($config, $context)) {
if (isset($attr['rel'])) {
$rels = explode(' ', $attr);
if (!in_array('nofollow', $rels)) {
$rels[] = 'nofollow';
}
$attr['rel'] = implode(' ', $rels);
} else {
$attr['rel'] = 'nofollow';
}
}
return $attr;
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,15 @@
<?php
class HTMLPurifier_AttrTransform_SafeEmbed extends HTMLPurifier_AttrTransform
{
public $name = "SafeEmbed";
public function transform($attr, $config, $context) {
$attr['allowscriptaccess'] = 'never';
$attr['allownetworking'] = 'internal';
$attr['type'] = 'application/x-shockwave-flash';
return $attr;
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,16 @@
<?php
/**
* Writes default type for all objects. Currently only supports flash.
*/
class HTMLPurifier_AttrTransform_SafeObject extends HTMLPurifier_AttrTransform
{
public $name = "SafeObject";
function transform($attr, $config, $context) {
if (!isset($attr['type'])) $attr['type'] = 'application/x-shockwave-flash';
return $attr;
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,64 @@
<?php
/**
* Validates name/value pairs in param tags to be used in safe objects. This
* will only allow name values it recognizes, and pre-fill certain attributes
* with required values.
*
* @note
* This class only supports Flash. In the future, Quicktime support
* may be added.
*
* @warning
* This class expects an injector to add the necessary parameters tags.
*/
class HTMLPurifier_AttrTransform_SafeParam extends HTMLPurifier_AttrTransform
{
public $name = "SafeParam";
private $uri;
public function __construct() {
$this->uri = new HTMLPurifier_AttrDef_URI(true); // embedded
$this->wmode = new HTMLPurifier_AttrDef_Enum(array('window', 'opaque', 'transparent'));
}
public function transform($attr, $config, $context) {
// If we add support for other objects, we'll need to alter the
// transforms.
switch ($attr['name']) {
// application/x-shockwave-flash
// Keep this synchronized with Injector/SafeObject.php
case 'allowScriptAccess':
$attr['value'] = 'never';
break;
case 'allowNetworking':
$attr['value'] = 'internal';
break;
case 'allowFullScreen':
if ($config->get('HTML.FlashAllowFullScreen')) {
$attr['value'] = ($attr['value'] == 'true') ? 'true' : 'false';
} else {
$attr['value'] = 'false';
}
break;
case 'wmode':
$attr['value'] = $this->wmode->validate($attr['value'], $config, $context);
break;
case 'movie':
case 'src':
$attr['name'] = "movie";
$attr['value'] = $this->uri->validate($attr['value'], $config, $context);
break;
case 'flashvars':
// we're going to allow arbitrary inputs to the SWF, on
// the reasoning that it could only hack the SWF, not us.
break;
// add other cases to support other param name/value pairs
default:
$attr['name'] = $attr['value'] = null;
}
return $attr;
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,16 @@
<?php
/**
* Implements required attribute stipulation for <script>
*/
class HTMLPurifier_AttrTransform_ScriptRequired extends HTMLPurifier_AttrTransform
{
public function transform($attr, $config, $context) {
if (!isset($attr['type'])) {
$attr['type'] = 'text/javascript';
}
return $attr;
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,38 @@
<?php
// must be called POST validation
/**
* Adds target="blank" to all outbound links. This transform is
* only attached if Attr.TargetBlank is TRUE. This works regardless
* of whether or not Attr.AllowedFrameTargets
*/
class HTMLPurifier_AttrTransform_TargetBlank extends HTMLPurifier_AttrTransform
{
private $parser;
public function __construct() {
$this->parser = new HTMLPurifier_URIParser();
}
public function transform($attr, $config, $context) {
if (!isset($attr['href'])) {
return $attr;
}
// XXX Kind of inefficient
$url = $this->parser->parse($attr['href']);
$scheme = $url->getSchemeObj($config, $context);
if ($scheme->browsable && !$url->isBenign($config, $context)) {
$attr['target'] = 'blank';
}
return $attr;
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,18 @@
<?php
/**
* Sets height/width defaults for <textarea>
*/
class HTMLPurifier_AttrTransform_Textarea extends HTMLPurifier_AttrTransform
{
public function transform($attr, $config, $context) {
// Calculated from Firefox
if (!isset($attr['cols'])) $attr['cols'] = '22';
if (!isset($attr['rows'])) $attr['rows'] = '3';
return $attr;
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,91 @@
<?php
/**
* Provides lookup array of attribute types to HTMLPurifier_AttrDef objects
*/
class HTMLPurifier_AttrTypes
{
/**
* Lookup array of attribute string identifiers to concrete implementations
*/
protected $info = array();
/**
* Constructs the info array, supplying default implementations for attribute
* types.
*/
public function __construct() {
// XXX This is kind of poor, since we don't actually /clone/
// instances; instead, we use the supplied make() attribute. So,
// the underlying class must know how to deal with arguments.
// With the old implementation of Enum, that ignored its
// arguments when handling a make dispatch, the IAlign
// definition wouldn't work.
// pseudo-types, must be instantiated via shorthand
$this->info['Enum'] = new HTMLPurifier_AttrDef_Enum();
$this->info['Bool'] = new HTMLPurifier_AttrDef_HTML_Bool();
$this->info['CDATA'] = new HTMLPurifier_AttrDef_Text();
$this->info['ID'] = new HTMLPurifier_AttrDef_HTML_ID();
$this->info['Length'] = new HTMLPurifier_AttrDef_HTML_Length();
$this->info['MultiLength'] = new HTMLPurifier_AttrDef_HTML_MultiLength();
$this->info['NMTOKENS'] = new HTMLPurifier_AttrDef_HTML_Nmtokens();
$this->info['Pixels'] = new HTMLPurifier_AttrDef_HTML_Pixels();
$this->info['Text'] = new HTMLPurifier_AttrDef_Text();
$this->info['URI'] = new HTMLPurifier_AttrDef_URI();
$this->info['LanguageCode'] = new HTMLPurifier_AttrDef_Lang();
$this->info['Color'] = new HTMLPurifier_AttrDef_HTML_Color();
$this->info['IAlign'] = self::makeEnum('top,middle,bottom,left,right');
$this->info['LAlign'] = self::makeEnum('top,bottom,left,right');
$this->info['FrameTarget'] = new HTMLPurifier_AttrDef_HTML_FrameTarget();
// unimplemented aliases
$this->info['ContentType'] = new HTMLPurifier_AttrDef_Text();
$this->info['ContentTypes'] = new HTMLPurifier_AttrDef_Text();
$this->info['Charsets'] = new HTMLPurifier_AttrDef_Text();
$this->info['Character'] = new HTMLPurifier_AttrDef_Text();
// "proprietary" types
$this->info['Class'] = new HTMLPurifier_AttrDef_HTML_Class();
// number is really a positive integer (one or more digits)
// FIXME: ^^ not always, see start and value of list items
$this->info['Number'] = new HTMLPurifier_AttrDef_Integer(false, false, true);
}
private static function makeEnum($in) {
return new HTMLPurifier_AttrDef_Clone(new HTMLPurifier_AttrDef_Enum(explode(',', $in)));
}
/**
* Retrieves a type
* @param $type String type name
* @return Object AttrDef for type
*/
public function get($type) {
// determine if there is any extra info tacked on
if (strpos($type, '#') !== false) list($type, $string) = explode('#', $type, 2);
else $string = '';
if (!isset($this->info[$type])) {
trigger_error('Cannot retrieve undefined attribute type ' . $type, E_USER_ERROR);
return;
}
return $this->info[$type]->make($string);
}
/**
* Sets a new implementation for a type
* @param $type String type name
* @param $impl Object AttrDef for type
*/
public function set($type, $impl) {
$this->info[$type] = $impl;
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,162 @@
<?php
/**
* Validates the attributes of a token. Doesn't manage required attributes
* very well. The only reason we factored this out was because RemoveForeignElements
* also needed it besides ValidateAttributes.
*/
class HTMLPurifier_AttrValidator
{
/**
* Validates the attributes of a token, returning a modified token
* that has valid tokens
* @param $token Reference to token to validate. We require a reference
* because the operation this class performs on the token are
* not atomic, so the context CurrentToken to be updated
* throughout
* @param $config Instance of HTMLPurifier_Config
* @param $context Instance of HTMLPurifier_Context
*/
public function validateToken(&$token, &$config, $context) {
$definition = $config->getHTMLDefinition();
$e =& $context->get('ErrorCollector', true);
// initialize IDAccumulator if necessary
$ok =& $context->get('IDAccumulator', true);
if (!$ok) {
$id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
$context->register('IDAccumulator', $id_accumulator);
}
// initialize CurrentToken if necessary
$current_token =& $context->get('CurrentToken', true);
if (!$current_token) $context->register('CurrentToken', $token);
if (
!$token instanceof HTMLPurifier_Token_Start &&
!$token instanceof HTMLPurifier_Token_Empty
) return $token;
// create alias to global definition array, see also $defs
// DEFINITION CALL
$d_defs = $definition->info_global_attr;
// don't update token until the very end, to ensure an atomic update
$attr = $token->attr;
// do global transformations (pre)
// nothing currently utilizes this
foreach ($definition->info_attr_transform_pre as $transform) {
$attr = $transform->transform($o = $attr, $config, $context);
if ($e) {
if ($attr != $o) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
}
}
// do local transformations only applicable to this element (pre)
// ex. <p align="right"> to <p style="text-align:right;">
foreach ($definition->info[$token->name]->attr_transform_pre as $transform) {
$attr = $transform->transform($o = $attr, $config, $context);
if ($e) {
if ($attr != $o) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
}
}
// create alias to this element's attribute definition array, see
// also $d_defs (global attribute definition array)
// DEFINITION CALL
$defs = $definition->info[$token->name]->attr;
$attr_key = false;
$context->register('CurrentAttr', $attr_key);
// iterate through all the attribute keypairs
// Watch out for name collisions: $key has previously been used
foreach ($attr as $attr_key => $value) {
// call the definition
if ( isset($defs[$attr_key]) ) {
// there is a local definition defined
if ($defs[$attr_key] === false) {
// We've explicitly been told not to allow this element.
// This is usually when there's a global definition
// that must be overridden.
// Theoretically speaking, we could have a
// AttrDef_DenyAll, but this is faster!
$result = false;
} else {
// validate according to the element's definition
$result = $defs[$attr_key]->validate(
$value, $config, $context
);
}
} elseif ( isset($d_defs[$attr_key]) ) {
// there is a global definition defined, validate according
// to the global definition
$result = $d_defs[$attr_key]->validate(
$value, $config, $context
);
} else {
// system never heard of the attribute? DELETE!
$result = false;
}
// put the results into effect
if ($result === false || $result === null) {
// this is a generic error message that should replaced
// with more specific ones when possible
if ($e) $e->send(E_ERROR, 'AttrValidator: Attribute removed');
// remove the attribute
unset($attr[$attr_key]);
} elseif (is_string($result)) {
// generally, if a substitution is happening, there
// was some sort of implicit correction going on. We'll
// delegate it to the attribute classes to say exactly what.
// simple substitution
$attr[$attr_key] = $result;
} else {
// nothing happens
}
// we'd also want slightly more complicated substitution
// involving an array as the return value,
// although we're not sure how colliding attributes would
// resolve (certain ones would be completely overriden,
// others would prepend themselves).
}
$context->destroy('CurrentAttr');
// post transforms
// global (error reporting untested)
foreach ($definition->info_attr_transform_post as $transform) {
$attr = $transform->transform($o = $attr, $config, $context);
if ($e) {
if ($attr != $o) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
}
}
// local (error reporting untested)
foreach ($definition->info[$token->name]->attr_transform_post as $transform) {
$attr = $transform->transform($o = $attr, $config, $context);
if ($e) {
if ($attr != $o) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
}
}
$token->attr = $attr;
// destroy CurrentToken if we made it ourselves
if (!$current_token) $context->destroy('CurrentToken');
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,104 @@
<?php
// constants are slow, so we use as few as possible
if (!defined('HTMLPURIFIER_PREFIX')) {
define('HTMLPURIFIER_PREFIX', realpath(dirname(__FILE__) . '/..'));
}
// accomodations for versions earlier than 5.0.2
// borrowed from PHP_Compat, LGPL licensed, by Aidan Lister <aidan@php.net>
if (!defined('PHP_EOL')) {
switch (strtoupper(substr(PHP_OS, 0, 3))) {
case 'WIN':
define('PHP_EOL', "\r\n");
break;
case 'DAR':
define('PHP_EOL', "\r");
break;
default:
define('PHP_EOL', "\n");
}
}
/**
* Bootstrap class that contains meta-functionality for HTML Purifier such as
* the autoload function.
*
* @note
* This class may be used without any other files from HTML Purifier.
*/
class HTMLPurifier_Bootstrap
{
/**
* Autoload function for HTML Purifier
* @param $class Class to load
*/
public static function autoload($class) {
$file = HTMLPurifier_Bootstrap::getPath($class);
if (!$file) return false;
// Technically speaking, it should be ok and more efficient to
// just do 'require', but Antonio Parraga reports that with
// Zend extensions such as Zend debugger and APC, this invariant
// may be broken. Since we have efficient alternatives, pay
// the cost here and avoid the bug.
require_once HTMLPURIFIER_PREFIX . '/' . $file;
return true;
}
/**
* Returns the path for a specific class.
*/
public static function getPath($class) {
if (strncmp('HTMLPurifier', $class, 12) !== 0) return false;
// Custom implementations
if (strncmp('HTMLPurifier_Language_', $class, 22) === 0) {
$code = str_replace('_', '-', substr($class, 22));
$file = 'HTMLPurifier/Language/classes/' . $code . '.php';
} else {
$file = str_replace('_', '/', $class) . '.php';
}
if (!file_exists(HTMLPURIFIER_PREFIX . '/' . $file)) return false;
return $file;
}
/**
* "Pre-registers" our autoloader on the SPL stack.
*/
public static function registerAutoload() {
$autoload = array('HTMLPurifier_Bootstrap', 'autoload');
if ( ($funcs = spl_autoload_functions()) === false ) {
spl_autoload_register($autoload);
} elseif (function_exists('spl_autoload_unregister')) {
$buggy = version_compare(PHP_VERSION, '5.2.11', '<');
$compat = version_compare(PHP_VERSION, '5.1.2', '<=') &&
version_compare(PHP_VERSION, '5.1.0', '>=');
foreach ($funcs as $func) {
if ($buggy && is_array($func)) {
// :TRICKY: There are some compatibility issues and some
// places where we need to error out
$reflector = new ReflectionMethod($func[0], $func[1]);
if (!$reflector->isStatic()) {
throw new Exception('
HTML Purifier autoloader registrar is not compatible
with non-static object methods due to PHP Bug #44144;
Please do not use HTMLPurifier.autoload.php (or any
file that includes this file); instead, place the code:
spl_autoload_register(array(\'HTMLPurifier_Bootstrap\', \'autoload\'))
after your own autoloaders.
');
}
// Suprisingly, spl_autoload_register supports the
// Class::staticMethod callback format, although call_user_func doesn't
if ($compat) $func = implode('::', $func);
}
spl_autoload_unregister($func);
}
spl_autoload_register($autoload);
foreach ($funcs as $func) spl_autoload_register($func);
}
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,322 @@
<?php
/**
* Defines allowed CSS attributes and what their values are.
* @see HTMLPurifier_HTMLDefinition
*/
class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
{
public $type = 'CSS';
/**
* Assoc array of attribute name to definition object.
*/
public $info = array();
/**
* Constructs the info array. The meat of this class.
*/
protected function doSetup($config) {
$this->info['text-align'] = new HTMLPurifier_AttrDef_Enum(
array('left', 'right', 'center', 'justify'), false);
$border_style =
$this->info['border-bottom-style'] =
$this->info['border-right-style'] =
$this->info['border-left-style'] =
$this->info['border-top-style'] = new HTMLPurifier_AttrDef_Enum(
array('none', 'hidden', 'dotted', 'dashed', 'solid', 'double',
'groove', 'ridge', 'inset', 'outset'), false);
$this->info['border-style'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_style);
$this->info['clear'] = new HTMLPurifier_AttrDef_Enum(
array('none', 'left', 'right', 'both'), false);
$this->info['float'] = new HTMLPurifier_AttrDef_Enum(
array('none', 'left', 'right'), false);
$this->info['font-style'] = new HTMLPurifier_AttrDef_Enum(
array('normal', 'italic', 'oblique'), false);
$this->info['font-variant'] = new HTMLPurifier_AttrDef_Enum(
array('normal', 'small-caps'), false);
$uri_or_none = new HTMLPurifier_AttrDef_CSS_Composite(
array(
new HTMLPurifier_AttrDef_Enum(array('none')),
new HTMLPurifier_AttrDef_CSS_URI()
)
);
$this->info['list-style-position'] = new HTMLPurifier_AttrDef_Enum(
array('inside', 'outside'), false);
$this->info['list-style-type'] = new HTMLPurifier_AttrDef_Enum(
array('disc', 'circle', 'square', 'decimal', 'lower-roman',
'upper-roman', 'lower-alpha', 'upper-alpha', 'none'), false);
$this->info['list-style-image'] = $uri_or_none;
$this->info['list-style'] = new HTMLPurifier_AttrDef_CSS_ListStyle($config);
$this->info['text-transform'] = new HTMLPurifier_AttrDef_Enum(
array('capitalize', 'uppercase', 'lowercase', 'none'), false);
$this->info['color'] = new HTMLPurifier_AttrDef_CSS_Color();
$this->info['background-image'] = $uri_or_none;
$this->info['background-repeat'] = new HTMLPurifier_AttrDef_Enum(
array('repeat', 'repeat-x', 'repeat-y', 'no-repeat')
);
$this->info['background-attachment'] = new HTMLPurifier_AttrDef_Enum(
array('scroll', 'fixed')
);
$this->info['background-position'] = new HTMLPurifier_AttrDef_CSS_BackgroundPosition();
$border_color =
$this->info['border-top-color'] =
$this->info['border-bottom-color'] =
$this->info['border-left-color'] =
$this->info['border-right-color'] =
$this->info['background-color'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
new HTMLPurifier_AttrDef_Enum(array('transparent')),
new HTMLPurifier_AttrDef_CSS_Color()
));
$this->info['background'] = new HTMLPurifier_AttrDef_CSS_Background($config);
$this->info['border-color'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_color);
$border_width =
$this->info['border-top-width'] =
$this->info['border-bottom-width'] =
$this->info['border-left-width'] =
$this->info['border-right-width'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
new HTMLPurifier_AttrDef_Enum(array('thin', 'medium', 'thick')),
new HTMLPurifier_AttrDef_CSS_Length('0') //disallow negative
));
$this->info['border-width'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_width);
$this->info['letter-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
new HTMLPurifier_AttrDef_Enum(array('normal')),
new HTMLPurifier_AttrDef_CSS_Length()
));
$this->info['word-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
new HTMLPurifier_AttrDef_Enum(array('normal')),
new HTMLPurifier_AttrDef_CSS_Length()
));
$this->info['font-size'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
new HTMLPurifier_AttrDef_Enum(array('xx-small', 'x-small',
'small', 'medium', 'large', 'x-large', 'xx-large',
'larger', 'smaller')),
new HTMLPurifier_AttrDef_CSS_Percentage(),
new HTMLPurifier_AttrDef_CSS_Length()
));
$this->info['line-height'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
new HTMLPurifier_AttrDef_Enum(array('normal')),
new HTMLPurifier_AttrDef_CSS_Number(true), // no negatives
new HTMLPurifier_AttrDef_CSS_Length('0'),
new HTMLPurifier_AttrDef_CSS_Percentage(true)
));
$margin =
$this->info['margin-top'] =
$this->info['margin-bottom'] =
$this->info['margin-left'] =
$this->info['margin-right'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
new HTMLPurifier_AttrDef_CSS_Length(),
new HTMLPurifier_AttrDef_CSS_Percentage(),
new HTMLPurifier_AttrDef_Enum(array('auto'))
));
$this->info['margin'] = new HTMLPurifier_AttrDef_CSS_Multiple($margin);
// non-negative
$padding =
$this->info['padding-top'] =
$this->info['padding-bottom'] =
$this->info['padding-left'] =
$this->info['padding-right'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
new HTMLPurifier_AttrDef_CSS_Length('0'),
new HTMLPurifier_AttrDef_CSS_Percentage(true)
));
$this->info['padding'] = new HTMLPurifier_AttrDef_CSS_Multiple($padding);
$this->info['text-indent'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
new HTMLPurifier_AttrDef_CSS_Length(),
new HTMLPurifier_AttrDef_CSS_Percentage()
));
$trusted_wh = new HTMLPurifier_AttrDef_CSS_Composite(array(
new HTMLPurifier_AttrDef_CSS_Length('0'),
new HTMLPurifier_AttrDef_CSS_Percentage(true),
new HTMLPurifier_AttrDef_Enum(array('auto'))
));
$max = $config->get('CSS.MaxImgLength');
$this->info['width'] =
$this->info['height'] =
$max === null ?
$trusted_wh :
new HTMLPurifier_AttrDef_Switch('img',
// For img tags:
new HTMLPurifier_AttrDef_CSS_Composite(array(
new HTMLPurifier_AttrDef_CSS_Length('0', $max),
new HTMLPurifier_AttrDef_Enum(array('auto'))
)),
// For everyone else:
$trusted_wh
);
$this->info['text-decoration'] = new HTMLPurifier_AttrDef_CSS_TextDecoration();
$this->info['font-family'] = new HTMLPurifier_AttrDef_CSS_FontFamily();
// this could use specialized code
$this->info['font-weight'] = new HTMLPurifier_AttrDef_Enum(
array('normal', 'bold', 'bolder', 'lighter', '100', '200', '300',
'400', '500', '600', '700', '800', '900'), false);
// MUST be called after other font properties, as it references
// a CSSDefinition object
$this->info['font'] = new HTMLPurifier_AttrDef_CSS_Font($config);
// same here
$this->info['border'] =
$this->info['border-bottom'] =
$this->info['border-top'] =
$this->info['border-left'] =
$this->info['border-right'] = new HTMLPurifier_AttrDef_CSS_Border($config);
$this->info['border-collapse'] = new HTMLPurifier_AttrDef_Enum(array(
'collapse', 'separate'));
$this->info['caption-side'] = new HTMLPurifier_AttrDef_Enum(array(
'top', 'bottom'));
$this->info['table-layout'] = new HTMLPurifier_AttrDef_Enum(array(
'auto', 'fixed'));
$this->info['vertical-align'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
new HTMLPurifier_AttrDef_Enum(array('baseline', 'sub', 'super',
'top', 'text-top', 'middle', 'bottom', 'text-bottom')),
new HTMLPurifier_AttrDef_CSS_Length(),
new HTMLPurifier_AttrDef_CSS_Percentage()
));
$this->info['border-spacing'] = new HTMLPurifier_AttrDef_CSS_Multiple(new HTMLPurifier_AttrDef_CSS_Length(), 2);
// partial support
$this->info['white-space'] = new HTMLPurifier_AttrDef_Enum(array('nowrap'));
if ($config->get('CSS.Proprietary')) {
$this->doSetupProprietary($config);
}
if ($config->get('CSS.AllowTricky')) {
$this->doSetupTricky($config);
}
if ($config->get('CSS.Trusted')) {
$this->doSetupTrusted($config);
}
$allow_important = $config->get('CSS.AllowImportant');
// wrap all attr-defs with decorator that handles !important
foreach ($this->info as $k => $v) {
$this->info[$k] = new HTMLPurifier_AttrDef_CSS_ImportantDecorator($v, $allow_important);
}
$this->setupConfigStuff($config);
}
protected function doSetupProprietary($config) {
// Internet Explorer only scrollbar colors
$this->info['scrollbar-arrow-color'] = new HTMLPurifier_AttrDef_CSS_Color();
$this->info['scrollbar-base-color'] = new HTMLPurifier_AttrDef_CSS_Color();
$this->info['scrollbar-darkshadow-color'] = new HTMLPurifier_AttrDef_CSS_Color();
$this->info['scrollbar-face-color'] = new HTMLPurifier_AttrDef_CSS_Color();
$this->info['scrollbar-highlight-color'] = new HTMLPurifier_AttrDef_CSS_Color();
$this->info['scrollbar-shadow-color'] = new HTMLPurifier_AttrDef_CSS_Color();
// technically not proprietary, but CSS3, and no one supports it
$this->info['opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue();
$this->info['-moz-opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue();
$this->info['-khtml-opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue();
// only opacity, for now
$this->info['filter'] = new HTMLPurifier_AttrDef_CSS_Filter();
}
protected function doSetupTricky($config) {
$this->info['display'] = new HTMLPurifier_AttrDef_Enum(array(
'inline', 'block', 'list-item', 'run-in', 'compact',
'marker', 'table', 'inline-table', 'table-row-group',
'table-header-group', 'table-footer-group', 'table-row',
'table-column-group', 'table-column', 'table-cell', 'table-caption', 'none'
));
$this->info['visibility'] = new HTMLPurifier_AttrDef_Enum(array(
'visible', 'hidden', 'collapse'
));
$this->info['overflow'] = new HTMLPurifier_AttrDef_Enum(array('visible', 'hidden', 'auto', 'scroll'));
}
protected function doSetupTrusted($config) {
$this->info['position'] = new HTMLPurifier_AttrDef_Enum(array(
'static', 'relative', 'absolute', 'fixed'
));
$this->info['top'] =
$this->info['left'] =
$this->info['right'] =
$this->info['bottom'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
new HTMLPurifier_AttrDef_CSS_Length(),
new HTMLPurifier_AttrDef_CSS_Percentage(),
new HTMLPurifier_AttrDef_Enum(array('auto')),
));
$this->info['z-index'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
new HTMLPurifier_AttrDef_Integer(),
new HTMLPurifier_AttrDef_Enum(array('auto')),
));
}
/**
* Performs extra config-based processing. Based off of
* HTMLPurifier_HTMLDefinition.
* @todo Refactor duplicate elements into common class (probably using
* composition, not inheritance).
*/
protected function setupConfigStuff($config) {
// setup allowed elements
$support = "(for information on implementing this, see the ".
"support forums) ";
$allowed_properties = $config->get('CSS.AllowedProperties');
if ($allowed_properties !== null) {
foreach ($this->info as $name => $d) {
if(!isset($allowed_properties[$name])) unset($this->info[$name]);
unset($allowed_properties[$name]);
}
// emit errors
foreach ($allowed_properties as $name => $d) {
// :TODO: Is this htmlspecialchars() call really necessary?
$name = htmlspecialchars($name);
trigger_error("Style attribute '$name' is not supported $support", E_USER_WARNING);
}
}
$forbidden_properties = $config->get('CSS.ForbiddenProperties');
if ($forbidden_properties !== null) {
foreach ($this->info as $name => $d) {
if (isset($forbidden_properties[$name])) {
unset($this->info[$name]);
}
}
}
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,48 @@
<?php
/**
* Defines allowed child nodes and validates tokens against it.
*/
abstract class HTMLPurifier_ChildDef
{
/**
* Type of child definition, usually right-most part of class name lowercase.
* Used occasionally in terms of context.
*/
public $type;
/**
* Bool that indicates whether or not an empty array of children is okay
*
* This is necessary for redundant checking when changes affecting
* a child node may cause a parent node to now be disallowed.
*/
public $allow_empty;
/**
* Lookup array of all elements that this definition could possibly allow
*/
public $elements = array();
/**
* Get lookup of tag names that should not close this element automatically.
* All other elements will do so.
*/
public function getAllowedElements($config) {
return $this->elements;
}
/**
* Validates nodes according to definition and returns modification.
*
* @param $tokens_of_children Array of HTMLPurifier_Token
* @param $config HTMLPurifier_Config object
* @param $context HTMLPurifier_Context object
* @return bool true to leave nodes as is
* @return bool false to remove parent node
* @return array of replacement child tokens
*/
abstract public function validateChildren($tokens_of_children, $config, $context);
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,48 @@
<?php
/**
* Definition that uses different definitions depending on context.
*
* The del and ins tags are notable because they allow different types of
* elements depending on whether or not they're in a block or inline context.
* Chameleon allows this behavior to happen by using two different
* definitions depending on context. While this somewhat generalized,
* it is specifically intended for those two tags.
*/
class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef
{
/**
* Instance of the definition object to use when inline. Usually stricter.
*/
public $inline;
/**
* Instance of the definition object to use when block.
*/
public $block;
public $type = 'chameleon';
/**
* @param $inline List of elements to allow when inline.
* @param $block List of elements to allow when block.
*/
public function __construct($inline, $block) {
$this->inline = new HTMLPurifier_ChildDef_Optional($inline);
$this->block = new HTMLPurifier_ChildDef_Optional($block);
$this->elements = $this->block->elements;
}
public function validateChildren($tokens_of_children, $config, $context) {
if ($context->get('IsInline') === false) {
return $this->block->validateChildren(
$tokens_of_children, $config, $context);
} else {
return $this->inline->validateChildren(
$tokens_of_children, $config, $context);
}
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,90 @@
<?php
/**
* Custom validation class, accepts DTD child definitions
*
* @warning Currently this class is an all or nothing proposition, that is,
* it will only give a bool return value.
*/
class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
{
public $type = 'custom';
public $allow_empty = false;
/**
* Allowed child pattern as defined by the DTD
*/
public $dtd_regex;
/**
* PCRE regex derived from $dtd_regex
* @private
*/
private $_pcre_regex;
/**
* @param $dtd_regex Allowed child pattern from the DTD
*/
public function __construct($dtd_regex) {
$this->dtd_regex = $dtd_regex;
$this->_compileRegex();
}
/**
* Compiles the PCRE regex from a DTD regex ($dtd_regex to $_pcre_regex)
*/
protected function _compileRegex() {
$raw = str_replace(' ', '', $this->dtd_regex);
if ($raw{0} != '(') {
$raw = "($raw)";
}
$el = '[#a-zA-Z0-9_.-]+';
$reg = $raw;
// COMPLICATED! AND MIGHT BE BUGGY! I HAVE NO CLUE WHAT I'M
// DOING! Seriously: if there's problems, please report them.
// collect all elements into the $elements array
preg_match_all("/$el/", $reg, $matches);
foreach ($matches[0] as $match) {
$this->elements[$match] = true;
}
// setup all elements as parentheticals with leading commas
$reg = preg_replace("/$el/", '(,\\0)', $reg);
// remove commas when they were not solicited
$reg = preg_replace("/([^,(|]\(+),/", '\\1', $reg);
// remove all non-paranthetical commas: they are handled by first regex
$reg = preg_replace("/,\(/", '(', $reg);
$this->_pcre_regex = $reg;
}
public function validateChildren($tokens_of_children, $config, $context) {
$list_of_children = '';
$nesting = 0; // depth into the nest
foreach ($tokens_of_children as $token) {
if (!empty($token->is_whitespace)) continue;
$is_child = ($nesting == 0); // direct
if ($token instanceof HTMLPurifier_Token_Start) {
$nesting++;
} elseif ($token instanceof HTMLPurifier_Token_End) {
$nesting--;
}
if ($is_child) {
$list_of_children .= $token->name . ',';
}
}
// add leading comma to deal with stray comma declarations
$list_of_children = ',' . rtrim($list_of_children, ',');
$okay =
preg_match(
'/^,?'.$this->_pcre_regex.'$/',
$list_of_children
);
return (bool) $okay;
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,20 @@
<?php
/**
* Definition that disallows all elements.
* @warning validateChildren() in this class is actually never called, because
* empty elements are corrected in HTMLPurifier_Strategy_MakeWellFormed
* before child definitions are parsed in earnest by
* HTMLPurifier_Strategy_FixNesting.
*/
class HTMLPurifier_ChildDef_Empty extends HTMLPurifier_ChildDef
{
public $allow_empty = true;
public $type = 'empty';
public function __construct() {}
public function validateChildren($tokens_of_children, $config, $context) {
return array();
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,120 @@
<?php
/**
* Definition for list containers ul and ol.
*/
class HTMLPurifier_ChildDef_List extends HTMLPurifier_ChildDef
{
public $type = 'list';
// lying a little bit, so that we can handle ul and ol ourselves
// XXX: This whole business with 'wrap' is all a bit unsatisfactory
public $elements = array('li' => true, 'ul' => true, 'ol' => true);
public function validateChildren($tokens_of_children, $config, $context) {
// Flag for subclasses
$this->whitespace = false;
// if there are no tokens, delete parent node
if (empty($tokens_of_children)) return false;
// the new set of children
$result = array();
// current depth into the nest
$nesting = 0;
// a little sanity check to make sure it's not ALL whitespace
$all_whitespace = true;
$seen_li = false;
$need_close_li = false;
foreach ($tokens_of_children as $token) {
if (!empty($token->is_whitespace)) {
$result[] = $token;
continue;
}
$all_whitespace = false; // phew, we're not talking about whitespace
if ($nesting == 1 && $need_close_li) {
$result[] = new HTMLPurifier_Token_End('li');
$nesting--;
$need_close_li = false;
}
$is_child = ($nesting == 0);
if ($token instanceof HTMLPurifier_Token_Start) {
$nesting++;
} elseif ($token instanceof HTMLPurifier_Token_End) {
$nesting--;
}
if ($is_child) {
if ($token->name === 'li') {
// good
$seen_li = true;
} elseif ($token->name === 'ul' || $token->name === 'ol') {
// we want to tuck this into the previous li
$need_close_li = true;
$nesting++;
if (!$seen_li) {
// create a new li element
$result[] = new HTMLPurifier_Token_Start('li');
} else {
// backtrack until </li> found
while(true) {
$t = array_pop($result);
if ($t instanceof HTMLPurifier_Token_End) {
// XXX actually, these invariants could very plausibly be violated
// if we are doing silly things with modifying the set of allowed elements.
// FORTUNATELY, it doesn't make a difference, since the allowed
// elements are hard-coded here!
if ($t->name !== 'li') {
trigger_error("Only li present invariant violated in List ChildDef", E_USER_ERROR);
return false;
}
break;
} elseif ($t instanceof HTMLPurifier_Token_Empty) { // bleagh
if ($t->name !== 'li') {
trigger_error("Only li present invariant violated in List ChildDef", E_USER_ERROR);
return false;
}
// XXX this should have a helper for it...
$result[] = new HTMLPurifier_Token_Start('li', $t->attr, $t->line, $t->col, $t->armor);
break;
} else {
if (!$t->is_whitespace) {
trigger_error("Only whitespace present invariant violated in List ChildDef", E_USER_ERROR);
return false;
}
}
}
}
} else {
// start wrapping (this doesn't precisely mimic
// browser behavior, but what browsers do is kind of
// hard to mimic in a standards compliant way
// XXX Actually, this has no impact in practice,
// because this gets handled earlier. Arguably,
// we should rip out all of that processing
$result[] = new HTMLPurifier_Token_Start('li');
$nesting++;
$seen_li = true;
$need_close_li = true;
}
}
$result[] = $token;
}
if ($need_close_li) {
$result[] = new HTMLPurifier_Token_End('li');
}
if (empty($result)) return false;
if ($all_whitespace) {
return false;
}
if ($tokens_of_children == $result) return true;
return $result;
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,26 @@
<?php
/**
* Definition that allows a set of elements, and allows no children.
* @note This is a hack to reuse code from HTMLPurifier_ChildDef_Required,
* really, one shouldn't inherit from the other. Only altered behavior
* is to overload a returned false with an array. Thus, it will never
* return false.
*/
class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required
{
public $allow_empty = true;
public $type = 'optional';
public function validateChildren($tokens_of_children, $config, $context) {
$result = parent::validateChildren($tokens_of_children, $config, $context);
// we assume that $tokens_of_children is not modified
if ($result === false) {
if (empty($tokens_of_children)) return true;
elseif ($this->whitespace) return $tokens_of_children;
else return array();
}
return $result;
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,117 @@
<?php
/**
* Definition that allows a set of elements, but disallows empty children.
*/
class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
{
/**
* Lookup table of allowed elements.
* @public
*/
public $elements = array();
/**
* Whether or not the last passed node was all whitespace.
*/
protected $whitespace = false;
/**
* @param $elements List of allowed element names (lowercase).
*/
public function __construct($elements) {
if (is_string($elements)) {
$elements = str_replace(' ', '', $elements);
$elements = explode('|', $elements);
}
$keys = array_keys($elements);
if ($keys == array_keys($keys)) {
$elements = array_flip($elements);
foreach ($elements as $i => $x) {
$elements[$i] = true;
if (empty($i)) unset($elements[$i]); // remove blank
}
}
$this->elements = $elements;
}
public $allow_empty = false;
public $type = 'required';
public function validateChildren($tokens_of_children, $config, $context) {
// Flag for subclasses
$this->whitespace = false;
// if there are no tokens, delete parent node
if (empty($tokens_of_children)) return false;
// the new set of children
$result = array();
// current depth into the nest
$nesting = 0;
// whether or not we're deleting a node
$is_deleting = false;
// whether or not parsed character data is allowed
// this controls whether or not we silently drop a tag
// or generate escaped HTML from it
$pcdata_allowed = isset($this->elements['#PCDATA']);
// a little sanity check to make sure it's not ALL whitespace
$all_whitespace = true;
// some configuration
$escape_invalid_children = $config->get('Core.EscapeInvalidChildren');
// generator
$gen = new HTMLPurifier_Generator($config, $context);
foreach ($tokens_of_children as $token) {
if (!empty($token->is_whitespace)) {
$result[] = $token;
continue;
}
$all_whitespace = false; // phew, we're not talking about whitespace
$is_child = ($nesting == 0);
if ($token instanceof HTMLPurifier_Token_Start) {
$nesting++;
} elseif ($token instanceof HTMLPurifier_Token_End) {
$nesting--;
}
if ($is_child) {
$is_deleting = false;
if (!isset($this->elements[$token->name])) {
$is_deleting = true;
if ($pcdata_allowed && $token instanceof HTMLPurifier_Token_Text) {
$result[] = $token;
} elseif ($pcdata_allowed && $escape_invalid_children) {
$result[] = new HTMLPurifier_Token_Text(
$gen->generateFromToken($token)
);
}
continue;
}
}
if (!$is_deleting || ($pcdata_allowed && $token instanceof HTMLPurifier_Token_Text)) {
$result[] = $token;
} elseif ($pcdata_allowed && $escape_invalid_children) {
$result[] =
new HTMLPurifier_Token_Text(
$gen->generateFromToken($token)
);
} else {
// drop silently
}
}
if (empty($result)) return false;
if ($all_whitespace) {
$this->whitespace = true;
return false;
}
if ($tokens_of_children == $result) return true;
return $result;
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,88 @@
<?php
/**
* Takes the contents of blockquote when in strict and reformats for validation.
*/
class HTMLPurifier_ChildDef_StrictBlockquote extends HTMLPurifier_ChildDef_Required
{
protected $real_elements;
protected $fake_elements;
public $allow_empty = true;
public $type = 'strictblockquote';
protected $init = false;
/**
* @note We don't want MakeWellFormed to auto-close inline elements since
* they might be allowed.
*/
public function getAllowedElements($config) {
$this->init($config);
return $this->fake_elements;
}
public function validateChildren($tokens_of_children, $config, $context) {
$this->init($config);
// trick the parent class into thinking it allows more
$this->elements = $this->fake_elements;
$result = parent::validateChildren($tokens_of_children, $config, $context);
$this->elements = $this->real_elements;
if ($result === false) return array();
if ($result === true) $result = $tokens_of_children;
$def = $config->getHTMLDefinition();
$block_wrap_start = new HTMLPurifier_Token_Start($def->info_block_wrapper);
$block_wrap_end = new HTMLPurifier_Token_End( $def->info_block_wrapper);
$is_inline = false;
$depth = 0;
$ret = array();
// assuming that there are no comment tokens
foreach ($result as $i => $token) {
$token = $result[$i];
// ifs are nested for readability
if (!$is_inline) {
if (!$depth) {
if (
($token instanceof HTMLPurifier_Token_Text && !$token->is_whitespace) ||
(!$token instanceof HTMLPurifier_Token_Text && !isset($this->elements[$token->name]))
) {
$is_inline = true;
$ret[] = $block_wrap_start;
}
}
} else {
if (!$depth) {
// starting tokens have been inline text / empty
if ($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) {
if (isset($this->elements[$token->name])) {
// ended
$ret[] = $block_wrap_end;
$is_inline = false;
}
}
}
}
$ret[] = $token;
if ($token instanceof HTMLPurifier_Token_Start) $depth++;
if ($token instanceof HTMLPurifier_Token_End) $depth--;
}
if ($is_inline) $ret[] = $block_wrap_end;
return $ret;
}
private function init($config) {
if (!$this->init) {
$def = $config->getHTMLDefinition();
// allow all inline elements
$this->real_elements = $this->elements;
$this->fake_elements = $def->info_content_sets['Flow'];
$this->fake_elements['#PCDATA'] = true;
$this->init = true;
}
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,227 @@
<?php
/**
* Definition for tables. The general idea is to extract out all of the
* essential bits, and then reconstruct it later.
*
* This is a bit confusing, because the DTDs and the W3C
* validators seem to disagree on the appropriate definition. The
* DTD claims:
*
* (CAPTION?, (COL*|COLGROUP*), THEAD?, TFOOT?, TBODY+)
*
* But actually, the HTML4 spec then has this to say:
*
* The TBODY start tag is always required except when the table
* contains only one table body and no table head or foot sections.
* The TBODY end tag may always be safely omitted.
*
* So the DTD is kind of wrong. The validator is, unfortunately, kind
* of on crack.
*
* The definition changed again in XHTML1.1; and in my opinion, this
* formulation makes the most sense.
*
* caption?, ( col* | colgroup* ), (( thead?, tfoot?, tbody+ ) | ( tr+ ))
*
* Essentially, we have two modes: thead/tfoot/tbody mode, and tr mode.
* If we encounter a thead, tfoot or tbody, we are placed in the former
* mode, and we *must* wrap any stray tr segments with a tbody. But if
* we don't run into any of them, just have tr tags is OK.
*/
class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
{
public $allow_empty = false;
public $type = 'table';
public $elements = array('tr' => true, 'tbody' => true, 'thead' => true,
'tfoot' => true, 'caption' => true, 'colgroup' => true, 'col' => true);
public function __construct() {}
public function validateChildren($tokens_of_children, $config, $context) {
if (empty($tokens_of_children)) return false;
// this ensures that the loop gets run one last time before closing
// up. It's a little bit of a hack, but it works! Just make sure you
// get rid of the token later.
$tokens_of_children[] = false;
// only one of these elements is allowed in a table
$caption = false;
$thead = false;
$tfoot = false;
// as many of these as you want
$cols = array();
$content = array();
$nesting = 0; // current depth so we can determine nodes
$is_collecting = false; // are we globbing together tokens to package
// into one of the collectors?
$collection = array(); // collected nodes
$tag_index = 0; // the first node might be whitespace,
// so this tells us where the start tag is
$tbody_mode = false; // if true, then we need to wrap any stray
// <tr>s with a <tbody>.
foreach ($tokens_of_children as $token) {
$is_child = ($nesting == 0);
if ($token === false) {
// terminating sequence started
} elseif ($token instanceof HTMLPurifier_Token_Start) {
$nesting++;
} elseif ($token instanceof HTMLPurifier_Token_End) {
$nesting--;
}
// handle node collection
if ($is_collecting) {
if ($is_child) {
// okay, let's stash the tokens away
// first token tells us the type of the collection
switch ($collection[$tag_index]->name) {
case 'tbody':
$tbody_mode = true;
case 'tr':
$content[] = $collection;
break;
case 'caption':
if ($caption !== false) break;
$caption = $collection;
break;
case 'thead':
case 'tfoot':
$tbody_mode = true;
// XXX This breaks rendering properties with
// Firefox, which never floats a <thead> to
// the top. Ever. (Our scheme will float the
// first <thead> to the top.) So maybe
// <thead>s that are not first should be
// turned into <tbody>? Very tricky, indeed.
// access the appropriate variable, $thead or $tfoot
$var = $collection[$tag_index]->name;
if ($$var === false) {
$$var = $collection;
} else {
// Oops, there's a second one! What
// should we do? Current behavior is to
// transmutate the first and last entries into
// tbody tags, and then put into content.
// Maybe a better idea is to *attach
// it* to the existing thead or tfoot?
// We don't do this, because Firefox
// doesn't float an extra tfoot to the
// bottom like it does for the first one.
$collection[$tag_index]->name = 'tbody';
$collection[count($collection)-1]->name = 'tbody';
$content[] = $collection;
}
break;
case 'colgroup':
$cols[] = $collection;
break;
}
$collection = array();
$is_collecting = false;
$tag_index = 0;
} else {
// add the node to the collection
$collection[] = $token;
}
}
// terminate
if ($token === false) break;
if ($is_child) {
// determine what we're dealing with
if ($token->name == 'col') {
// the only empty tag in the possie, we can handle it
// immediately
$cols[] = array_merge($collection, array($token));
$collection = array();
$tag_index = 0;
continue;
}
switch($token->name) {
case 'caption':
case 'colgroup':
case 'thead':
case 'tfoot':
case 'tbody':
case 'tr':
$is_collecting = true;
$collection[] = $token;
continue;
default:
if (!empty($token->is_whitespace)) {
$collection[] = $token;
$tag_index++;
}
continue;
}
}
}
if (empty($content)) return false;
$ret = array();
if ($caption !== false) $ret = array_merge($ret, $caption);
if ($cols !== false) foreach ($cols as $token_array) $ret = array_merge($ret, $token_array);
if ($thead !== false) $ret = array_merge($ret, $thead);
if ($tfoot !== false) $ret = array_merge($ret, $tfoot);
if ($tbody_mode) {
// a little tricky, since the start of the collection may be
// whitespace
$inside_tbody = false;
foreach ($content as $token_array) {
// find the starting token
foreach ($token_array as $t) {
if ($t->name === 'tr' || $t->name === 'tbody') {
break;
}
} // iterator variable carries over
if ($t->name === 'tr') {
if ($inside_tbody) {
$ret = array_merge($ret, $token_array);
} else {
$ret[] = new HTMLPurifier_Token_Start('tbody');
$ret = array_merge($ret, $token_array);
$inside_tbody = true;
}
} elseif ($t->name === 'tbody') {
if ($inside_tbody) {
$ret[] = new HTMLPurifier_Token_End('tbody');
$inside_tbody = false;
$ret = array_merge($ret, $token_array);
} else {
$ret = array_merge($ret, $token_array);
}
} else {
trigger_error("tr/tbody in content invariant failed in Table ChildDef", E_USER_ERROR);
}
}
if ($inside_tbody) {
$ret[] = new HTMLPurifier_Token_End('tbody');
}
} else {
foreach ($content as $token_array) {
// invariant: everything in here is <tr>s
$ret = array_merge($ret, $token_array);
}
}
if (!empty($collection) && $is_collecting == false){
// grab the trailing space
$ret = array_merge($ret, $collection);
}
array_pop($tokens_of_children); // remove phantom token
return ($ret === $tokens_of_children) ? true : $ret;
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,709 @@
<?php
/**
* Configuration object that triggers customizable behavior.
*
* @warning This class is strongly defined: that means that the class
* will fail if an undefined directive is retrieved or set.
*
* @note Many classes that could (although many times don't) use the
* configuration object make it a mandatory parameter. This is
* because a configuration object should always be forwarded,
* otherwise, you run the risk of missing a parameter and then
* being stumped when a configuration directive doesn't work.
*
* @todo Reconsider some of the public member variables
*/
class HTMLPurifier_Config
{
/**
* HTML Purifier's version
*/
public $version = '4.4.0';
/**
* Bool indicator whether or not to automatically finalize
* the object if a read operation is done
*/
public $autoFinalize = true;
// protected member variables
/**
* Namespace indexed array of serials for specific namespaces (see
* getSerial() for more info).
*/
protected $serials = array();
/**
* Serial for entire configuration object
*/
protected $serial;
/**
* Parser for variables
*/
protected $parser = null;
/**
* Reference HTMLPurifier_ConfigSchema for value checking
* @note This is public for introspective purposes. Please don't
* abuse!
*/
public $def;
/**
* Indexed array of definitions
*/
protected $definitions;
/**
* Bool indicator whether or not config is finalized
*/
protected $finalized = false;
/**
* Property list containing configuration directives.
*/
protected $plist;
/**
* Whether or not a set is taking place due to an
* alias lookup.
*/
private $aliasMode;
/**
* Set to false if you do not want line and file numbers in errors
* (useful when unit testing). This will also compress some errors
* and exceptions.
*/
public $chatty = true;
/**
* Current lock; only gets to this namespace are allowed.
*/
private $lock;
/**
* @param $definition HTMLPurifier_ConfigSchema that defines what directives
* are allowed.
*/
public function __construct($definition, $parent = null) {
$parent = $parent ? $parent : $definition->defaultPlist;
$this->plist = new HTMLPurifier_PropertyList($parent);
$this->def = $definition; // keep a copy around for checking
$this->parser = new HTMLPurifier_VarParser_Flexible();
}
/**
* Convenience constructor that creates a config object based on a mixed var
* @param mixed $config Variable that defines the state of the config
* object. Can be: a HTMLPurifier_Config() object,
* an array of directives based on loadArray(),
* or a string filename of an ini file.
* @param HTMLPurifier_ConfigSchema Schema object
* @return Configured HTMLPurifier_Config object
*/
public static function create($config, $schema = null) {
if ($config instanceof HTMLPurifier_Config) {
// pass-through
return $config;
}
if (!$schema) {
$ret = HTMLPurifier_Config::createDefault();
} else {
$ret = new HTMLPurifier_Config($schema);
}
if (is_string($config)) $ret->loadIni($config);
elseif (is_array($config)) $ret->loadArray($config);
return $ret;
}
/**
* Creates a new config object that inherits from a previous one.
* @param HTMLPurifier_Config $config Configuration object to inherit
* from.
* @return HTMLPurifier_Config object with $config as its parent.
*/
public static function inherit(HTMLPurifier_Config $config) {
return new HTMLPurifier_Config($config->def, $config->plist);
}
/**
* Convenience constructor that creates a default configuration object.
* @return Default HTMLPurifier_Config object.
*/
public static function createDefault() {
$definition = HTMLPurifier_ConfigSchema::instance();
$config = new HTMLPurifier_Config($definition);
return $config;
}
/**
* Retreives a value from the configuration.
* @param $key String key
*/
public function get($key, $a = null) {
if ($a !== null) {
$this->triggerError("Using deprecated API: use \$config->get('$key.$a') instead", E_USER_WARNING);
$key = "$key.$a";
}
if (!$this->finalized) $this->autoFinalize();
if (!isset($this->def->info[$key])) {
// can't add % due to SimpleTest bug
$this->triggerError('Cannot retrieve value of undefined directive ' . htmlspecialchars($key),
E_USER_WARNING);
return;
}
if (isset($this->def->info[$key]->isAlias)) {
$d = $this->def->info[$key];
$this->triggerError('Cannot get value from aliased directive, use real name ' . $d->key,
E_USER_ERROR);
return;
}
if ($this->lock) {
list($ns) = explode('.', $key);
if ($ns !== $this->lock) {
$this->triggerError('Cannot get value of namespace ' . $ns . ' when lock for ' . $this->lock . ' is active, this probably indicates a Definition setup method is accessing directives that are not within its namespace', E_USER_ERROR);
return;
}
}
return $this->plist->get($key);
}
/**
* Retreives an array of directives to values from a given namespace
* @param $namespace String namespace
*/
public function getBatch($namespace) {
if (!$this->finalized) $this->autoFinalize();
$full = $this->getAll();
if (!isset($full[$namespace])) {
$this->triggerError('Cannot retrieve undefined namespace ' . htmlspecialchars($namespace),
E_USER_WARNING);
return;
}
return $full[$namespace];
}
/**
* Returns a md5 signature of a segment of the configuration object
* that uniquely identifies that particular configuration
* @note Revision is handled specially and is removed from the batch
* before processing!
* @param $namespace Namespace to get serial for
*/
public function getBatchSerial($namespace) {
if (empty($this->serials[$namespace])) {
$batch = $this->getBatch($namespace);
unset($batch['DefinitionRev']);
$this->serials[$namespace] = md5(serialize($batch));
}
return $this->serials[$namespace];
}
/**
* Returns a md5 signature for the entire configuration object
* that uniquely identifies that particular configuration
*/
public function getSerial() {
if (empty($this->serial)) {
$this->serial = md5(serialize($this->getAll()));
}
return $this->serial;
}
/**
* Retrieves all directives, organized by namespace
* @warning This is a pretty inefficient function, avoid if you can
*/
public function getAll() {
if (!$this->finalized) $this->autoFinalize();
$ret = array();
foreach ($this->plist->squash() as $name => $value) {
list($ns, $key) = explode('.', $name, 2);
$ret[$ns][$key] = $value;
}
return $ret;
}
/**
* Sets a value to configuration.
* @param $key String key
* @param $value Mixed value
*/
public function set($key, $value, $a = null) {
if (strpos($key, '.') === false) {
$namespace = $key;
$directive = $value;
$value = $a;
$key = "$key.$directive";
$this->triggerError("Using deprecated API: use \$config->set('$key', ...) instead", E_USER_NOTICE);
} else {
list($namespace) = explode('.', $key);
}
if ($this->isFinalized('Cannot set directive after finalization')) return;
if (!isset($this->def->info[$key])) {
$this->triggerError('Cannot set undefined directive ' . htmlspecialchars($key) . ' to value',
E_USER_WARNING);
return;
}
$def = $this->def->info[$key];
if (isset($def->isAlias)) {
if ($this->aliasMode) {
$this->triggerError('Double-aliases not allowed, please fix '.
'ConfigSchema bug with' . $key, E_USER_ERROR);
return;
}
$this->aliasMode = true;
$this->set($def->key, $value);
$this->aliasMode = false;
$this->triggerError("$key is an alias, preferred directive name is {$def->key}", E_USER_NOTICE);
return;
}
// Raw type might be negative when using the fully optimized form
// of stdclass, which indicates allow_null == true
$rtype = is_int($def) ? $def : $def->type;
if ($rtype < 0) {
$type = -$rtype;
$allow_null = true;
} else {
$type = $rtype;
$allow_null = isset($def->allow_null);
}
try {
$value = $this->parser->parse($value, $type, $allow_null);
} catch (HTMLPurifier_VarParserException $e) {
$this->triggerError('Value for ' . $key . ' is of invalid type, should be ' . HTMLPurifier_VarParser::getTypeName($type), E_USER_WARNING);
return;
}
if (is_string($value) && is_object($def)) {
// resolve value alias if defined
if (isset($def->aliases[$value])) {
$value = $def->aliases[$value];
}
// check to see if the value is allowed
if (isset($def->allowed) && !isset($def->allowed[$value])) {
$this->triggerError('Value not supported, valid values are: ' .
$this->_listify($def->allowed), E_USER_WARNING);
return;
}
}
$this->plist->set($key, $value);
// reset definitions if the directives they depend on changed
// this is a very costly process, so it's discouraged
// with finalization
if ($namespace == 'HTML' || $namespace == 'CSS' || $namespace == 'URI') {
$this->definitions[$namespace] = null;
}
$this->serials[$namespace] = false;
}
/**
* Convenience function for error reporting
*/
private function _listify($lookup) {
$list = array();
foreach ($lookup as $name => $b) $list[] = $name;
return implode(', ', $list);
}
/**
* Retrieves object reference to the HTML definition.
* @param $raw Return a copy that has not been setup yet. Must be
* called before it's been setup, otherwise won't work.
* @param $optimized If true, this method may return null, to
* indicate that a cached version of the modified
* definition object is available and no further edits
* are necessary. Consider using
* maybeGetRawHTMLDefinition, which is more explicitly
* named, instead.
*/
public function getHTMLDefinition($raw = false, $optimized = false) {
return $this->getDefinition('HTML', $raw, $optimized);
}
/**
* Retrieves object reference to the CSS definition
* @param $raw Return a copy that has not been setup yet. Must be
* called before it's been setup, otherwise won't work.
* @param $optimized If true, this method may return null, to
* indicate that a cached version of the modified
* definition object is available and no further edits
* are necessary. Consider using
* maybeGetRawCSSDefinition, which is more explicitly
* named, instead.
*/
public function getCSSDefinition($raw = false, $optimized = false) {
return $this->getDefinition('CSS', $raw, $optimized);
}
/**
* Retrieves object reference to the URI definition
* @param $raw Return a copy that has not been setup yet. Must be
* called before it's been setup, otherwise won't work.
* @param $optimized If true, this method may return null, to
* indicate that a cached version of the modified
* definition object is available and no further edits
* are necessary. Consider using
* maybeGetRawURIDefinition, which is more explicitly
* named, instead.
*/
public function getURIDefinition($raw = false, $optimized = false) {
return $this->getDefinition('URI', $raw, $optimized);
}
/**
* Retrieves a definition
* @param $type Type of definition: HTML, CSS, etc
* @param $raw Whether or not definition should be returned raw
* @param $optimized Only has an effect when $raw is true. Whether
* or not to return null if the result is already present in
* the cache. This is off by default for backwards
* compatibility reasons, but you need to do things this
* way in order to ensure that caching is done properly.
* Check out enduser-customize.html for more details.
* We probably won't ever change this default, as much as the
* maybe semantics is the "right thing to do."
*/
public function getDefinition($type, $raw = false, $optimized = false) {
if ($optimized && !$raw) {
throw new HTMLPurifier_Exception("Cannot set optimized = true when raw = false");
}
if (!$this->finalized) $this->autoFinalize();
// temporarily suspend locks, so we can handle recursive definition calls
$lock = $this->lock;
$this->lock = null;
$factory = HTMLPurifier_DefinitionCacheFactory::instance();
$cache = $factory->create($type, $this);
$this->lock = $lock;
if (!$raw) {
// full definition
// ---------------
// check if definition is in memory
if (!empty($this->definitions[$type])) {
$def = $this->definitions[$type];
// check if the definition is setup
if ($def->setup) {
return $def;
} else {
$def->setup($this);
if ($def->optimized) $cache->add($def, $this);
return $def;
}
}
// check if definition is in cache
$def = $cache->get($this);
if ($def) {
// definition in cache, save to memory and return it
$this->definitions[$type] = $def;
return $def;
}
// initialize it
$def = $this->initDefinition($type);
// set it up
$this->lock = $type;
$def->setup($this);
$this->lock = null;
// save in cache
$cache->add($def, $this);
// return it
return $def;
} else {
// raw definition
// --------------
// check preconditions
$def = null;
if ($optimized) {
if (is_null($this->get($type . '.DefinitionID'))) {
// fatally error out if definition ID not set
throw new HTMLPurifier_Exception("Cannot retrieve raw version without specifying %$type.DefinitionID");
}
}
if (!empty($this->definitions[$type])) {
$def = $this->definitions[$type];
if ($def->setup && !$optimized) {
$extra = $this->chatty ? " (try moving this code block earlier in your initialization)" : "";
throw new HTMLPurifier_Exception("Cannot retrieve raw definition after it has already been setup" . $extra);
}
if ($def->optimized === null) {
$extra = $this->chatty ? " (try flushing your cache)" : "";
throw new HTMLPurifier_Exception("Optimization status of definition is unknown" . $extra);
}
if ($def->optimized !== $optimized) {
$msg = $optimized ? "optimized" : "unoptimized";
$extra = $this->chatty ? " (this backtrace is for the first inconsistent call, which was for a $msg raw definition)" : "";
throw new HTMLPurifier_Exception("Inconsistent use of optimized and unoptimized raw definition retrievals" . $extra);
}
}
// check if definition was in memory
if ($def) {
if ($def->setup) {
// invariant: $optimized === true (checked above)
return null;
} else {
return $def;
}
}
// if optimized, check if definition was in cache
// (because we do the memory check first, this formulation
// is prone to cache slamming, but I think
// guaranteeing that either /all/ of the raw
// setup code or /none/ of it is run is more important.)
if ($optimized) {
// This code path only gets run once; once we put
// something in $definitions (which is guaranteed by the
// trailing code), we always short-circuit above.
$def = $cache->get($this);
if ($def) {
// save the full definition for later, but don't
// return it yet
$this->definitions[$type] = $def;
return null;
}
}
// check invariants for creation
if (!$optimized) {
if (!is_null($this->get($type . '.DefinitionID'))) {
if ($this->chatty) {
$this->triggerError("Due to a documentation error in previous version of HTML Purifier, your definitions are not being cached. If this is OK, you can remove the %$type.DefinitionRev and %$type.DefinitionID declaration. Otherwise, modify your code to use maybeGetRawDefinition, and test if the returned value is null before making any edits (if it is null, that means that a cached version is available, and no raw operations are necessary). See <a href='http://htmlpurifier.org/docs/enduser-customize.html#optimized'>Customize</a> for more details", E_USER_WARNING);
} else {
$this->triggerError("Useless DefinitionID declaration", E_USER_WARNING);
}
}
}
// initialize it
$def = $this->initDefinition($type);
$def->optimized = $optimized;
return $def;
}
throw new HTMLPurifier_Exception("The impossible happened!");
}
private function initDefinition($type) {
// quick checks failed, let's create the object
if ($type == 'HTML') {
$def = new HTMLPurifier_HTMLDefinition();
} elseif ($type == 'CSS') {
$def = new HTMLPurifier_CSSDefinition();
} elseif ($type == 'URI') {
$def = new HTMLPurifier_URIDefinition();
} else {
throw new HTMLPurifier_Exception("Definition of $type type not supported");
}
$this->definitions[$type] = $def;
return $def;
}
public function maybeGetRawDefinition($name) {
return $this->getDefinition($name, true, true);
}
public function maybeGetRawHTMLDefinition() {
return $this->getDefinition('HTML', true, true);
}
public function maybeGetRawCSSDefinition() {
return $this->getDefinition('CSS', true, true);
}
public function maybeGetRawURIDefinition() {
return $this->getDefinition('URI', true, true);
}
/**
* Loads configuration values from an array with the following structure:
* Namespace.Directive => Value
* @param $config_array Configuration associative array
*/
public function loadArray($config_array) {
if ($this->isFinalized('Cannot load directives after finalization')) return;
foreach ($config_array as $key => $value) {
$key = str_replace('_', '.', $key);
if (strpos($key, '.') !== false) {
$this->set($key, $value);
} else {
$namespace = $key;
$namespace_values = $value;
foreach ($namespace_values as $directive => $value) {
$this->set($namespace .'.'. $directive, $value);
}
}
}
}
/**
* Returns a list of array(namespace, directive) for all directives
* that are allowed in a web-form context as per an allowed
* namespaces/directives list.
* @param $allowed List of allowed namespaces/directives
*/
public static function getAllowedDirectivesForForm($allowed, $schema = null) {
if (!$schema) {
$schema = HTMLPurifier_ConfigSchema::instance();
}
if ($allowed !== true) {
if (is_string($allowed)) $allowed = array($allowed);
$allowed_ns = array();
$allowed_directives = array();
$blacklisted_directives = array();
foreach ($allowed as $ns_or_directive) {
if (strpos($ns_or_directive, '.') !== false) {
// directive
if ($ns_or_directive[0] == '-') {
$blacklisted_directives[substr($ns_or_directive, 1)] = true;
} else {
$allowed_directives[$ns_or_directive] = true;
}
} else {
// namespace
$allowed_ns[$ns_or_directive] = true;
}
}
}
$ret = array();
foreach ($schema->info as $key => $def) {
list($ns, $directive) = explode('.', $key, 2);
if ($allowed !== true) {
if (isset($blacklisted_directives["$ns.$directive"])) continue;
if (!isset($allowed_directives["$ns.$directive"]) && !isset($allowed_ns[$ns])) continue;
}
if (isset($def->isAlias)) continue;
if ($directive == 'DefinitionID' || $directive == 'DefinitionRev') continue;
$ret[] = array($ns, $directive);
}
return $ret;
}
/**
* Loads configuration values from $_GET/$_POST that were posted
* via ConfigForm
* @param $array $_GET or $_POST array to import
* @param $index Index/name that the config variables are in
* @param $allowed List of allowed namespaces/directives
* @param $mq_fix Boolean whether or not to enable magic quotes fix
* @param $schema Instance of HTMLPurifier_ConfigSchema to use, if not global copy
*/
public static function loadArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true, $schema = null) {
$ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix, $schema);
$config = HTMLPurifier_Config::create($ret, $schema);
return $config;
}
/**
* Merges in configuration values from $_GET/$_POST to object. NOT STATIC.
* @note Same parameters as loadArrayFromForm
*/
public function mergeArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true) {
$ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix, $this->def);
$this->loadArray($ret);
}
/**
* Prepares an array from a form into something usable for the more
* strict parts of HTMLPurifier_Config
*/
public static function prepareArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true, $schema = null) {
if ($index !== false) $array = (isset($array[$index]) && is_array($array[$index])) ? $array[$index] : array();
$mq = $mq_fix && function_exists('get_magic_quotes_gpc') && get_magic_quotes_gpc();
$allowed = HTMLPurifier_Config::getAllowedDirectivesForForm($allowed, $schema);
$ret = array();
foreach ($allowed as $key) {
list($ns, $directive) = $key;
$skey = "$ns.$directive";
if (!empty($array["Null_$skey"])) {
$ret[$ns][$directive] = null;
continue;
}
if (!isset($array[$skey])) continue;
$value = $mq ? stripslashes($array[$skey]) : $array[$skey];
$ret[$ns][$directive] = $value;
}
return $ret;
}
/**
* Loads configuration values from an ini file
* @param $filename Name of ini file
*/
public function loadIni($filename) {
if ($this->isFinalized('Cannot load directives after finalization')) return;
$array = parse_ini_file($filename, true);
$this->loadArray($array);
}
/**
* Checks whether or not the configuration object is finalized.
* @param $error String error message, or false for no error
*/
public function isFinalized($error = false) {
if ($this->finalized && $error) {
$this->triggerError($error, E_USER_ERROR);
}
return $this->finalized;
}
/**
* Finalizes configuration only if auto finalize is on and not
* already finalized
*/
public function autoFinalize() {
if ($this->autoFinalize) {
$this->finalize();
} else {
$this->plist->squash(true);
}
}
/**
* Finalizes a configuration object, prohibiting further change
*/
public function finalize() {
$this->finalized = true;
$this->parser = null;
}
/**
* Produces a nicely formatted error message by supplying the
* stack frame information OUTSIDE of HTMLPurifier_Config.
*/
protected function triggerError($msg, $no) {
// determine previous stack frame
$extra = '';
if ($this->chatty) {
$trace = debug_backtrace();
// zip(tail(trace), trace) -- but PHP is not Haskell har har
for ($i = 0, $c = count($trace); $i < $c - 1; $i++) {
if ($trace[$i + 1]['class'] === 'HTMLPurifier_Config') {
continue;
}
$frame = $trace[$i];
$extra = " invoked on line {$frame['line']} in file {$frame['file']}";
break;
}
}
trigger_error($msg . $extra, $no);
}
/**
* Returns a serialized form of the configuration object that can
* be reconstituted.
*/
public function serialize() {
$this->getDefinition('HTML');
$this->getDefinition('CSS');
$this->getDefinition('URI');
return serialize($this);
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,164 @@
<?php
/**
* Configuration definition, defines directives and their defaults.
*/
class HTMLPurifier_ConfigSchema {
/**
* Defaults of the directives and namespaces.
* @note This shares the exact same structure as HTMLPurifier_Config::$conf
*/
public $defaults = array();
/**
* The default property list. Do not edit this property list.
*/
public $defaultPlist;
/**
* Definition of the directives. The structure of this is:
*
* array(
* 'Namespace' => array(
* 'Directive' => new stdclass(),
* )
* )
*
* The stdclass may have the following properties:
*
* - If isAlias isn't set:
* - type: Integer type of directive, see HTMLPurifier_VarParser for definitions
* - allow_null: If set, this directive allows null values
* - aliases: If set, an associative array of value aliases to real values
* - allowed: If set, a lookup array of allowed (string) values
* - If isAlias is set:
* - namespace: Namespace this directive aliases to
* - name: Directive name this directive aliases to
*
* In certain degenerate cases, stdclass will actually be an integer. In
* that case, the value is equivalent to an stdclass with the type
* property set to the integer. If the integer is negative, type is
* equal to the absolute value of integer, and allow_null is true.
*
* This class is friendly with HTMLPurifier_Config. If you need introspection
* about the schema, you're better of using the ConfigSchema_Interchange,
* which uses more memory but has much richer information.
*/
public $info = array();
/**
* Application-wide singleton
*/
static protected $singleton;
public function __construct() {
$this->defaultPlist = new HTMLPurifier_PropertyList();
}
/**
* Unserializes the default ConfigSchema.
*/
public static function makeFromSerial() {
$contents = file_get_contents(HTMLPURIFIER_PREFIX . '/HTMLPurifier/ConfigSchema/schema.ser');
$r = unserialize($contents);
if (!$r) {
$hash = sha1($contents);
trigger_error("Unserialization of configuration schema failed, sha1 of file was $hash", E_USER_ERROR);
}
return $r;
}
/**
* Retrieves an instance of the application-wide configuration definition.
*/
public static function instance($prototype = null) {
if ($prototype !== null) {
HTMLPurifier_ConfigSchema::$singleton = $prototype;
} elseif (HTMLPurifier_ConfigSchema::$singleton === null || $prototype === true) {
HTMLPurifier_ConfigSchema::$singleton = HTMLPurifier_ConfigSchema::makeFromSerial();
}
return HTMLPurifier_ConfigSchema::$singleton;
}
/**
* Defines a directive for configuration
* @warning Will fail of directive's namespace is defined.
* @warning This method's signature is slightly different from the legacy
* define() static method! Beware!
* @param $namespace Namespace the directive is in
* @param $name Key of directive
* @param $default Default value of directive
* @param $type Allowed type of the directive. See
* HTMLPurifier_DirectiveDef::$type for allowed values
* @param $allow_null Whether or not to allow null values
*/
public function add($key, $default, $type, $allow_null) {
$obj = new stdclass();
$obj->type = is_int($type) ? $type : HTMLPurifier_VarParser::$types[$type];
if ($allow_null) $obj->allow_null = true;
$this->info[$key] = $obj;
$this->defaults[$key] = $default;
$this->defaultPlist->set($key, $default);
}
/**
* Defines a directive value alias.
*
* Directive value aliases are convenient for developers because it lets
* them set a directive to several values and get the same result.
* @param $namespace Directive's namespace
* @param $name Name of Directive
* @param $aliases Hash of aliased values to the real alias
*/
public function addValueAliases($key, $aliases) {
if (!isset($this->info[$key]->aliases)) {
$this->info[$key]->aliases = array();
}
foreach ($aliases as $alias => $real) {
$this->info[$key]->aliases[$alias] = $real;
}
}
/**
* Defines a set of allowed values for a directive.
* @warning This is slightly different from the corresponding static
* method definition.
* @param $namespace Namespace of directive
* @param $name Name of directive
* @param $allowed Lookup array of allowed values
*/
public function addAllowedValues($key, $allowed) {
$this->info[$key]->allowed = $allowed;
}
/**
* Defines a directive alias for backwards compatibility
* @param $namespace
* @param $name Directive that will be aliased
* @param $new_namespace
* @param $new_name Directive that the alias will be to
*/
public function addAlias($key, $new_key) {
$obj = new stdclass;
$obj->key = $new_key;
$obj->isAlias = true;
$this->info[$key] = $obj;
}
/**
* Replaces any stdclass that only has the type property with type integer.
*/
public function postProcess() {
foreach ($this->info as $key => $v) {
if (count((array) $v) == 1) {
$this->info[$key] = $v->type;
} elseif (count((array) $v) == 2 && isset($v->allow_null)) {
$this->info[$key] = -$v->type;
}
}
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,44 @@
<?php
/**
* Converts HTMLPurifier_ConfigSchema_Interchange to our runtime
* representation used to perform checks on user configuration.
*/
class HTMLPurifier_ConfigSchema_Builder_ConfigSchema
{
public function build($interchange) {
$schema = new HTMLPurifier_ConfigSchema();
foreach ($interchange->directives as $d) {
$schema->add(
$d->id->key,
$d->default,
$d->type,
$d->typeAllowsNull
);
if ($d->allowed !== null) {
$schema->addAllowedValues(
$d->id->key,
$d->allowed
);
}
foreach ($d->aliases as $alias) {
$schema->addAlias(
$alias->key,
$d->id->key
);
}
if ($d->valueAliases !== null) {
$schema->addValueAliases(
$d->id->key,
$d->valueAliases
);
}
}
$schema->postProcess();
return $schema;
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,106 @@
<?php
/**
* Converts HTMLPurifier_ConfigSchema_Interchange to an XML format,
* which can be further processed to generate documentation.
*/
class HTMLPurifier_ConfigSchema_Builder_Xml extends XMLWriter
{
protected $interchange;
private $namespace;
protected function writeHTMLDiv($html) {
$this->startElement('div');
$purifier = HTMLPurifier::getInstance();
$html = $purifier->purify($html);
$this->writeAttribute('xmlns', 'http://www.w3.org/1999/xhtml');
$this->writeRaw($html);
$this->endElement(); // div
}
protected function export($var) {
if ($var === array()) return 'array()';
return var_export($var, true);
}
public function build($interchange) {
// global access, only use as last resort
$this->interchange = $interchange;
$this->setIndent(true);
$this->startDocument('1.0', 'UTF-8');
$this->startElement('configdoc');
$this->writeElement('title', $interchange->name);
foreach ($interchange->directives as $directive) {
$this->buildDirective($directive);
}
if ($this->namespace) $this->endElement(); // namespace
$this->endElement(); // configdoc
$this->flush();
}
public function buildDirective($directive) {
// Kludge, although I suppose having a notion of a "root namespace"
// certainly makes things look nicer when documentation is built.
// Depends on things being sorted.
if (!$this->namespace || $this->namespace !== $directive->id->getRootNamespace()) {
if ($this->namespace) $this->endElement(); // namespace
$this->namespace = $directive->id->getRootNamespace();
$this->startElement('namespace');
$this->writeAttribute('id', $this->namespace);
$this->writeElement('name', $this->namespace);
}
$this->startElement('directive');
$this->writeAttribute('id', $directive->id->toString());
$this->writeElement('name', $directive->id->getDirective());
$this->startElement('aliases');
foreach ($directive->aliases as $alias) $this->writeElement('alias', $alias->toString());
$this->endElement(); // aliases
$this->startElement('constraints');
if ($directive->version) $this->writeElement('version', $directive->version);
$this->startElement('type');
if ($directive->typeAllowsNull) $this->writeAttribute('allow-null', 'yes');
$this->text($directive->type);
$this->endElement(); // type
if ($directive->allowed) {
$this->startElement('allowed');
foreach ($directive->allowed as $value => $x) $this->writeElement('value', $value);
$this->endElement(); // allowed
}
$this->writeElement('default', $this->export($directive->default));
$this->writeAttribute('xml:space', 'preserve');
if ($directive->external) {
$this->startElement('external');
foreach ($directive->external as $project) $this->writeElement('project', $project);
$this->endElement();
}
$this->endElement(); // constraints
if ($directive->deprecatedVersion) {
$this->startElement('deprecated');
$this->writeElement('version', $directive->deprecatedVersion);
$this->writeElement('use', $directive->deprecatedUse->toString());
$this->endElement(); // deprecated
}
$this->startElement('description');
$this->writeHTMLDiv($directive->description);
$this->endElement(); // description
$this->endElement(); // directive
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,11 @@
<?php
/**
* Exceptions related to configuration schema
*/
class HTMLPurifier_ConfigSchema_Exception extends HTMLPurifier_Exception
{
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,42 @@
<?php
/**
* Generic schema interchange format that can be converted to a runtime
* representation (HTMLPurifier_ConfigSchema) or HTML documentation. Members
* are completely validated.
*/
class HTMLPurifier_ConfigSchema_Interchange
{
/**
* Name of the application this schema is describing.
*/
public $name;
/**
* Array of Directive ID => array(directive info)
*/
public $directives = array();
/**
* Adds a directive array to $directives
*/
public function addDirective($directive) {
if (isset($this->directives[$i = $directive->id->toString()])) {
throw new HTMLPurifier_ConfigSchema_Exception("Cannot redefine directive '$i'");
}
$this->directives[$i] = $directive;
}
/**
* Convenience function to perform standard validation. Throws exception
* on failed validation.
*/
public function validate() {
$validator = new HTMLPurifier_ConfigSchema_Validator();
return $validator->validate($this);
}
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,77 @@
<?php
/**
* Interchange component class describing configuration directives.
*/
class HTMLPurifier_ConfigSchema_Interchange_Directive
{
/**
* ID of directive, instance of HTMLPurifier_ConfigSchema_Interchange_Id.
*/
public $id;
/**
* String type, e.g. 'integer' or 'istring'.
*/
public $type;
/**
* Default value, e.g. 3 or 'DefaultVal'.
*/
public $default;
/**
* HTML description.
*/
public $description;
/**
* Boolean whether or not null is allowed as a value.
*/
public $typeAllowsNull = false;
/**
* Lookup table of allowed scalar values, e.g. array('allowed' => true).
* Null if all values are allowed.
*/
public $allowed;
/**
* List of aliases for the directive,
* e.g. array(new HTMLPurifier_ConfigSchema_Interchange_Id('Ns', 'Dir'))).
*/
public $aliases = array();
/**
* Hash of value aliases, e.g. array('alt' => 'real'). Null if value
* aliasing is disabled (necessary for non-scalar types).
*/
public $valueAliases;
/**
* Version of HTML Purifier the directive was introduced, e.g. '1.3.1'.
* Null if the directive has always existed.
*/
public $version;
/**
* ID of directive that supercedes this old directive, is an instance
* of HTMLPurifier_ConfigSchema_Interchange_Id. Null if not deprecated.
*/
public $deprecatedUse;
/**
* Version of HTML Purifier this directive was deprecated. Null if not
* deprecated.
*/
public $deprecatedVersion;
/**
* List of external projects this directive depends on, e.g. array('CSSTidy').
*/
public $external = array();
}
// vim: et sw=4 sts=4

View File

@ -0,0 +1,37 @@
<?php
/**
* Represents a directive ID in the interchange format.
*/
class HTMLPurifier_ConfigSchema_Interchange_Id
{
public $key;
public function __construct($key) {
$this->key = $key;
}
/**
* @warning This is NOT magic, to ensure that people don't abuse SPL and
* cause problems for PHP 5.0 support.
*/
public function toString() {
return $this->key;
}
public function getRootNamespace() {
return substr($this->key, 0, strpos($this->key, "."));
}
public function getDirective() {
return substr($this->key, strpos($this->key, ".") + 1);
}
public static function make($id) {
return new HTMLPurifier_ConfigSchema_Interchange_Id($id);
}
}
// vim: et sw=4 sts=4

Some files were not shown because too many files have changed in this diff Show More