1: <?php
2:
3: /**
4: * Validates contents based on NMTOKENS attribute type.
5: */
6: class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef
7: {
8:
9: public function validate($string, $config, $context) {
10:
11: $string = trim($string);
12:
13: // early abort: '' and '0' (strings that convert to false) are invalid
14: if (!$string) return false;
15:
16: $tokens = $this->split($string, $config, $context);
17: $tokens = $this->filter($tokens, $config, $context);
18: if (empty($tokens)) return false;
19: return implode(' ', $tokens);
20:
21: }
22:
23: /**
24: * Splits a space separated list of tokens into its constituent parts.
25: */
26: protected function split($string, $config, $context) {
27: // OPTIMIZABLE!
28: // do the preg_match, capture all subpatterns for reformulation
29:
30: // we don't support U+00A1 and up codepoints or
31: // escaping because I don't know how to do that with regexps
32: // and plus it would complicate optimization efforts (you never
33: // see that anyway).
34: $pattern = '/(?:(?<=\s)|\A)'. // look behind for space or string start
35: '((?:--|-?[A-Za-z_])[A-Za-z_\-0-9]*)'.
36: '(?:(?=\s)|\z)/'; // look ahead for space or string end
37: preg_match_all($pattern, $string, $matches);
38: return $matches[1];
39: }
40:
41: /**
42: * Template method for removing certain tokens based on arbitrary criteria.
43: * @note If we wanted to be really functional, we'd do an array_filter
44: * with a callback. But... we're not.
45: */
46: protected function filter($tokens, $config, $context) {
47: return $tokens;
48: }
49:
50: }
51:
52: // vim: et sw=4 sts=4
53: