1: <?php
2:
3: /**
4: * Abstract class for a set of proprietary modules that clean up (tidy)
5: * poorly written HTML.
6: * @todo Figure out how to protect some of these methods/properties
7: */
8: class HTMLPurifier_HTMLModule_Tidy extends HTMLPurifier_HTMLModule
9: {
10:
11: /**
12: * List of supported levels. Index zero is a special case "no fixes"
13: * level.
14: */
15: public $levels = array(0 => 'none', 'light', 'medium', 'heavy');
16:
17: /**
18: * Default level to place all fixes in. Disabled by default
19: */
20: public $defaultLevel = null;
21:
22: /**
23: * Lists of fixes used by getFixesForLevel(). Format is:
24: * HTMLModule_Tidy->fixesForLevel[$level] = array('fix-1', 'fix-2');
25: */
26: public $fixesForLevel = array(
27: 'light' => array(),
28: 'medium' => array(),
29: 'heavy' => array()
30: );
31:
32: /**
33: * Lazy load constructs the module by determining the necessary
34: * fixes to create and then delegating to the populate() function.
35: * @todo Wildcard matching and error reporting when an added or
36: * subtracted fix has no effect.
37: */
38: public function setup($config) {
39:
40: // create fixes, initialize fixesForLevel
41: $fixes = $this->makeFixes();
42: $this->makeFixesForLevel($fixes);
43:
44: // figure out which fixes to use
45: $level = $config->get('HTML.TidyLevel');
46: $fixes_lookup = $this->getFixesForLevel($level);
47:
48: // get custom fix declarations: these need namespace processing
49: $add_fixes = $config->get('HTML.TidyAdd');
50: $remove_fixes = $config->get('HTML.TidyRemove');
51:
52: foreach ($fixes as $name => $fix) {
53: // needs to be refactored a little to implement globbing
54: if (
55: isset($remove_fixes[$name]) ||
56: (!isset($add_fixes[$name]) && !isset($fixes_lookup[$name]))
57: ) {
58: unset($fixes[$name]);
59: }
60: }
61:
62: // populate this module with necessary fixes
63: $this->populate($fixes);
64:
65: }
66:
67: /**
68: * Retrieves all fixes per a level, returning fixes for that specific
69: * level as well as all levels below it.
70: * @param $level String level identifier, see $levels for valid values
71: * @return Lookup up table of fixes
72: */
73: public function getFixesForLevel($level) {
74: if ($level == $this->levels[0]) {
75: return array();
76: }
77: $activated_levels = array();
78: for ($i = 1, $c = count($this->levels); $i < $c; $i++) {
79: $activated_levels[] = $this->levels[$i];
80: if ($this->levels[$i] == $level) break;
81: }
82: if ($i == $c) {
83: trigger_error(
84: 'Tidy level ' . htmlspecialchars($level) . ' not recognized',
85: E_USER_WARNING
86: );
87: return array();
88: }
89: $ret = array();
90: foreach ($activated_levels as $level) {
91: foreach ($this->fixesForLevel[$level] as $fix) {
92: $ret[$fix] = true;
93: }
94: }
95: return $ret;
96: }
97:
98: /**
99: * Dynamically populates the $fixesForLevel member variable using
100: * the fixes array. It may be custom overloaded, used in conjunction
101: * with $defaultLevel, or not used at all.
102: */
103: public function makeFixesForLevel($fixes) {
104: if (!isset($this->defaultLevel)) return;
105: if (!isset($this->fixesForLevel[$this->defaultLevel])) {
106: trigger_error(
107: 'Default level ' . $this->defaultLevel . ' does not exist',
108: E_USER_ERROR
109: );
110: return;
111: }
112: $this->fixesForLevel[$this->defaultLevel] = array_keys($fixes);
113: }
114:
115: /**
116: * Populates the module with transforms and other special-case code
117: * based on a list of fixes passed to it
118: * @param $lookup Lookup table of fixes to activate
119: */
120: public function populate($fixes) {
121: foreach ($fixes as $name => $fix) {
122: // determine what the fix is for
123: list($type, $params) = $this->getFixType($name);
124: switch ($type) {
125: case 'attr_transform_pre':
126: case 'attr_transform_post':
127: $attr = $params['attr'];
128: if (isset($params['element'])) {
129: $element = $params['element'];
130: if (empty($this->info[$element])) {
131: $e = $this->addBlankElement($element);
132: } else {
133: $e = $this->info[$element];
134: }
135: } else {
136: $type = "info_$type";
137: $e = $this;
138: }
139: // PHP does some weird parsing when I do
140: // $e->$type[$attr], so I have to assign a ref.
141: $f =& $e->$type;
142: $f[$attr] = $fix;
143: break;
144: case 'tag_transform':
145: $this->info_tag_transform[$params['element']] = $fix;
146: break;
147: case 'child':
148: case 'content_model_type':
149: $element = $params['element'];
150: if (empty($this->info[$element])) {
151: $e = $this->addBlankElement($element);
152: } else {
153: $e = $this->info[$element];
154: }
155: $e->$type = $fix;
156: break;
157: default:
158: trigger_error("Fix type $type not supported", E_USER_ERROR);
159: break;
160: }
161: }
162: }
163:
164: /**
165: * Parses a fix name and determines what kind of fix it is, as well
166: * as other information defined by the fix
167: * @param $name String name of fix
168: * @return array(string $fix_type, array $fix_parameters)
169: * @note $fix_parameters is type dependant, see populate() for usage
170: * of these parameters
171: */
172: public function getFixType($name) {
173: // parse it
174: $property = $attr = null;
175: if (strpos($name, '#') !== false) list($name, $property) = explode('#', $name);
176: if (strpos($name, '@') !== false) list($name, $attr) = explode('@', $name);
177:
178: // figure out the parameters
179: $params = array();
180: if ($name !== '') $params['element'] = $name;
181: if (!is_null($attr)) $params['attr'] = $attr;
182:
183: // special case: attribute transform
184: if (!is_null($attr)) {
185: if (is_null($property)) $property = 'pre';
186: $type = 'attr_transform_' . $property;
187: return array($type, $params);
188: }
189:
190: // special case: tag transform
191: if (is_null($property)) {
192: return array('tag_transform', $params);
193: }
194:
195: return array($property, $params);
196:
197: }
198:
199: /**
200: * Defines all fixes the module will perform in a compact
201: * associative array of fix name to fix implementation.
202: */
203: public function makeFixes() {}
204:
205: }
206:
207: // vim: et sw=4 sts=4
208: