1: <?php
2:
3: /**
4: * Base class for all validating attribute definitions.
5: *
6: * This family of classes forms the core for not only HTML attribute validation,
7: * but also any sort of string that needs to be validated or cleaned (which
8: * means CSS properties and composite definitions are defined here too).
9: * Besides defining (through code) what precisely makes the string valid,
10: * subclasses are also responsible for cleaning the code if possible.
11: */
12:
13: abstract class HTMLPurifier_AttrDef
14: {
15:
16: /**
17: * Tells us whether or not an HTML attribute is minimized. Has no
18: * meaning in other contexts.
19: */
20: public $minimized = false;
21:
22: /**
23: * Tells us whether or not an HTML attribute is required. Has no
24: * meaning in other contexts
25: */
26: public $required = false;
27:
28: /**
29: * Validates and cleans passed string according to a definition.
30: *
31: * @param $string String to be validated and cleaned.
32: * @param $config Mandatory HTMLPurifier_Config object.
33: * @param $context Mandatory HTMLPurifier_AttrContext object.
34: */
35: abstract public function validate($string, $config, $context);
36:
37: /**
38: * Convenience method that parses a string as if it were CDATA.
39: *
40: * This method process a string in the manner specified at
41: * <http://www.w3.org/TR/html4/types.html#h-6.2> by removing
42: * leading and trailing whitespace, ignoring line feeds, and replacing
43: * carriage returns and tabs with spaces. While most useful for HTML
44: * attributes specified as CDATA, it can also be applied to most CSS
45: * values.
46: *
47: * @note This method is not entirely standards compliant, as trim() removes
48: * more types of whitespace than specified in the spec. In practice,
49: * this is rarely a problem, as those extra characters usually have
50: * already been removed by HTMLPurifier_Encoder.
51: *
52: * @warning This processing is inconsistent with XML's whitespace handling
53: * as specified by section 3.3.3 and referenced XHTML 1.0 section
54: * 4.7. However, note that we are NOT necessarily
55: * parsing XML, thus, this behavior may still be correct. We
56: * assume that newlines have been normalized.
57: */
58: public function parseCDATA($string) {
59: $string = trim($string);
60: $string = str_replace(array("\n", "\t", "\r"), ' ', $string);
61: return $string;
62: }
63:
64: /**
65: * Factory method for creating this class from a string.
66: * @param $string String construction info
67: * @return Created AttrDef object corresponding to $string
68: */
69: public function make($string) {
70: // default implementation, return a flyweight of this object.
71: // If $string has an effect on the returned object (i.e. you
72: // need to overload this method), it is best
73: // to clone or instantiate new copies. (Instantiation is safer.)
74: return $this;
75: }
76:
77: /**
78: * Removes spaces from rgb(0, 0, 0) so that shorthand CSS properties work
79: * properly. THIS IS A HACK!
80: */
81: protected function mungeRgb($string) {
82: return preg_replace('/rgb\((\d+)\s*,\s*(\d+)\s*,\s*(\d+)\)/', 'rgb(\1,\2,\3)', $string);
83: }
84:
85: /**
86: * Parses a possibly escaped CSS string and returns the "pure"
87: * version of it.
88: */
89: protected function expandCSSEscape($string) {
90: // flexibly parse it
91: $ret = '';
92: for ($i = 0, $c = strlen($string); $i < $c; $i++) {
93: if ($string[$i] === '\\') {
94: $i++;
95: if ($i >= $c) {
96: $ret .= '\\';
97: break;
98: }
99: if (ctype_xdigit($string[$i])) {
100: $code = $string[$i];
101: for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
102: if (!ctype_xdigit($string[$i])) break;
103: $code .= $string[$i];
104: }
105: // We have to be extremely careful when adding
106: // new characters, to make sure we're not breaking
107: // the encoding.
108: $char = HTMLPurifier_Encoder::unichr(hexdec($code));
109: if (HTMLPurifier_Encoder::cleanUTF8($char) === '') continue;
110: $ret .= $char;
111: if ($i < $c && trim($string[$i]) !== '') $i--;
112: continue;
113: }
114: if ($string[$i] === "\n") continue;
115: }
116: $ret .= $string[$i];
117: }
118: return $ret;
119: }
120:
121: }
122:
123: // vim: et sw=4 sts=4
124: