1: <?php
2:
3: /*! @mainpage
4: *
5: * HTML Purifier is an HTML filter that will take an arbitrary snippet of
6: * HTML and rigorously test, validate and filter it into a version that
7: * is safe for output onto webpages. It achieves this by:
8: *
9: * -# Lexing (parsing into tokens) the document,
10: * -# Executing various strategies on the tokens:
11: * -# Removing all elements not in the whitelist,
12: * -# Making the tokens well-formed,
13: * -# Fixing the nesting of the nodes, and
14: * -# Validating attributes of the nodes; and
15: * -# Generating HTML from the purified tokens.
16: *
17: * However, most users will only need to interface with the HTMLPurifier
18: * and HTMLPurifier_Config.
19: */
20:
21: /*
22: HTML Purifier 4.3.0 - Standards Compliant HTML Filtering
23: Copyright (C) 2006-2008 Edward Z. Yang
24:
25: This library is free software; you can redistribute it and/or
26: modify it under the terms of the GNU Lesser General Public
27: License as published by the Free Software Foundation; either
28: version 2.1 of the License, or (at your option) any later version.
29:
30: This library is distributed in the hope that it will be useful,
31: but WITHOUT ANY WARRANTY; without even the implied warranty of
32: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
33: Lesser General Public License for more details.
34:
35: You should have received a copy of the GNU Lesser General Public
36: License along with this library; if not, write to the Free Software
37: Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
38: */
39:
40: /**
41: * Facade that coordinates HTML Purifier's subsystems in order to purify HTML.
42: *
43: * @note There are several points in which configuration can be specified
44: * for HTML Purifier. The precedence of these (from lowest to
45: * highest) is as follows:
46: * -# Instance: new HTMLPurifier($config)
47: * -# Invocation: purify($html, $config)
48: * These configurations are entirely independent of each other and
49: * are *not* merged (this behavior may change in the future).
50: *
51: * @todo We need an easier way to inject strategies using the configuration
52: * object.
53: */
54: class HTMLPurifier
55: {
56:
57: /** Version of HTML Purifier */
58: public $version = '4.3.0';
59:
60: /** Constant with version of HTML Purifier */
61: const VERSION = '4.3.0';
62:
63: /** Global configuration object */
64: public $config;
65:
66: /** Array of extra HTMLPurifier_Filter objects to run on HTML, for backwards compatibility */
67: private $filters = array();
68:
69: /** Single instance of HTML Purifier */
70: private static $instance;
71:
72: protected $strategy, $generator;
73:
74: /**
75: * Resultant HTMLPurifier_Context of last run purification. Is an array
76: * of contexts if the last called method was purifyArray().
77: */
78: public $context;
79:
80: /**
81: * Initializes the purifier.
82: * @param $config Optional HTMLPurifier_Config object for all instances of
83: * the purifier, if omitted, a default configuration is
84: * supplied (which can be overridden on a per-use basis).
85: * The parameter can also be any type that
86: * HTMLPurifier_Config::create() supports.
87: */
88: public function __construct($config = null) {
89:
90: $this->config = HTMLPurifier_Config::create($config);
91:
92: $this->strategy = new HTMLPurifier_Strategy_Core();
93:
94: }
95:
96: /**
97: * Adds a filter to process the output. First come first serve
98: * @param $filter HTMLPurifier_Filter object
99: */
100: public function addFilter($filter) {
101: trigger_error('HTMLPurifier->addFilter() is deprecated, use configuration directives in the Filter namespace or Filter.Custom', E_USER_WARNING);
102: $this->filters[] = $filter;
103: }
104:
105: /**
106: * Filters an HTML snippet/document to be XSS-free and standards-compliant.
107: *
108: * @param $html String of HTML to purify
109: * @param $config HTMLPurifier_Config object for this operation, if omitted,
110: * defaults to the config object specified during this
111: * object's construction. The parameter can also be any type
112: * that HTMLPurifier_Config::create() supports.
113: * @return Purified HTML
114: */
115: public function purify($html, $config = null) {
116:
117: // :TODO: make the config merge in, instead of replace
118: $config = $config ? HTMLPurifier_Config::create($config) : $this->config;
119:
120: // implementation is partially environment dependant, partially
121: // configuration dependant
122: $lexer = HTMLPurifier_Lexer::create($config);
123:
124: $context = new HTMLPurifier_Context();
125:
126: // setup HTML generator
127: $this->generator = new HTMLPurifier_Generator($config, $context);
128: $context->register('Generator', $this->generator);
129:
130: // set up global context variables
131: if ($config->get('Core.CollectErrors')) {
132: // may get moved out if other facilities use it
133: $language_factory = HTMLPurifier_LanguageFactory::instance();
134: $language = $language_factory->create($config, $context);
135: $context->register('Locale', $language);
136:
137: $error_collector = new HTMLPurifier_ErrorCollector($context);
138: $context->register('ErrorCollector', $error_collector);
139: }
140:
141: // setup id_accumulator context, necessary due to the fact that
142: // AttrValidator can be called from many places
143: $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
144: $context->register('IDAccumulator', $id_accumulator);
145:
146: $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
147:
148: // setup filters
149: $filter_flags = $config->getBatch('Filter');
150: $custom_filters = $filter_flags['Custom'];
151: unset($filter_flags['Custom']);
152: $filters = array();
153: foreach ($filter_flags as $filter => $flag) {
154: if (!$flag) continue;
155: if (strpos($filter, '.') !== false) continue;
156: $class = "HTMLPurifier_Filter_$filter";
157: $filters[] = new $class;
158: }
159: foreach ($custom_filters as $filter) {
160: // maybe "HTMLPurifier_Filter_$filter", but be consistent with AutoFormat
161: $filters[] = $filter;
162: }
163: $filters = array_merge($filters, $this->filters);
164: // maybe prepare(), but later
165:
166: for ($i = 0, $filter_size = count($filters); $i < $filter_size; $i++) {
167: $html = $filters[$i]->preFilter($html, $config, $context);
168: }
169:
170: // purified HTML
171: $html =
172: $this->generator->generateFromTokens(
173: // list of tokens
174: $this->strategy->execute(
175: // list of un-purified tokens
176: $lexer->tokenizeHTML(
177: // un-purified HTML
178: $html, $config, $context
179: ),
180: $config, $context
181: )
182: );
183:
184: for ($i = $filter_size - 1; $i >= 0; $i--) {
185: $html = $filters[$i]->postFilter($html, $config, $context);
186: }
187:
188: $html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context);
189: $this->context =& $context;
190: return $html;
191: }
192:
193: /**
194: * Filters an array of HTML snippets
195: * @param $config Optional HTMLPurifier_Config object for this operation.
196: * See HTMLPurifier::purify() for more details.
197: * @return Array of purified HTML
198: */
199: public function purifyArray($array_of_html, $config = null) {
200: $context_array = array();
201: foreach ($array_of_html as $key => $html) {
202: $array_of_html[$key] = $this->purify($html, $config);
203: $context_array[$key] = $this->context;
204: }
205: $this->context = $context_array;
206: return $array_of_html;
207: }
208:
209: /**
210: * Singleton for enforcing just one HTML Purifier in your system
211: * @param $prototype Optional prototype HTMLPurifier instance to
212: * overload singleton with, or HTMLPurifier_Config
213: * instance to configure the generated version with.
214: */
215: public static function instance($prototype = null) {
216: if (!self::$instance || $prototype) {
217: if ($prototype instanceof HTMLPurifier) {
218: self::$instance = $prototype;
219: } elseif ($prototype) {
220: self::$instance = new HTMLPurifier($prototype);
221: } else {
222: self::$instance = new HTMLPurifier();
223: }
224: }
225: return self::$instance;
226: }
227:
228: /**
229: * @note Backwards compatibility, see instance()
230: */
231: public static function getInstance($prototype = null) {
232: return HTMLPurifier::instance($prototype);
233: }
234:
235: }
236:
237: // vim: et sw=4 sts=4
238: