1: <?php
2:
3: /**
4: * Class that handles operations involving percent-encoding in URIs.
5: *
6: * @warning
7: * Be careful when reusing instances of PercentEncoder. The object
8: * you use for normalize() SHOULD NOT be used for encode(), or
9: * vice-versa.
10: */
11: class HTMLPurifier_PercentEncoder
12: {
13:
14: /**
15: * Reserved characters to preserve when using encode().
16: */
17: protected $preserve = array();
18:
19: /**
20: * String of characters that should be preserved while using encode().
21: */
22: public function __construct($preserve = false) {
23: // unreserved letters, ought to const-ify
24: for ($i = 48; $i <= 57; $i++) $this->preserve[$i] = true; // digits
25: for ($i = 65; $i <= 90; $i++) $this->preserve[$i] = true; // upper-case
26: for ($i = 97; $i <= 122; $i++) $this->preserve[$i] = true; // lower-case
27: $this->preserve[45] = true; // Dash -
28: $this->preserve[46] = true; // Period .
29: $this->preserve[95] = true; // Underscore _
30: $this->preserve[126]= true; // Tilde ~
31:
32: // extra letters not to escape
33: if ($preserve !== false) {
34: for ($i = 0, $c = strlen($preserve); $i < $c; $i++) {
35: $this->preserve[ord($preserve[$i])] = true;
36: }
37: }
38: }
39:
40: /**
41: * Our replacement for urlencode, it encodes all non-reserved characters,
42: * as well as any extra characters that were instructed to be preserved.
43: * @note
44: * Assumes that the string has already been normalized, making any
45: * and all percent escape sequences valid. Percents will not be
46: * re-escaped, regardless of their status in $preserve
47: * @param $string String to be encoded
48: * @return Encoded string.
49: */
50: public function encode($string) {
51: $ret = '';
52: for ($i = 0, $c = strlen($string); $i < $c; $i++) {
53: if ($string[$i] !== '%' && !isset($this->preserve[$int = ord($string[$i])]) ) {
54: $ret .= '%' . sprintf('%02X', $int);
55: } else {
56: $ret .= $string[$i];
57: }
58: }
59: return $ret;
60: }
61:
62: /**
63: * Fix up percent-encoding by decoding unreserved characters and normalizing.
64: * @warning This function is affected by $preserve, even though the
65: * usual desired behavior is for this not to preserve those
66: * characters. Be careful when reusing instances of PercentEncoder!
67: * @param $string String to normalize
68: */
69: public function normalize($string) {
70: if ($string == '') return '';
71: $parts = explode('%', $string);
72: $ret = array_shift($parts);
73: foreach ($parts as $part) {
74: $length = strlen($part);
75: if ($length < 2) {
76: $ret .= '%25' . $part;
77: continue;
78: }
79: $encoding = substr($part, 0, 2);
80: $text = substr($part, 2);
81: if (!ctype_xdigit($encoding)) {
82: $ret .= '%25' . $part;
83: continue;
84: }
85: $int = hexdec($encoding);
86: if (isset($this->preserve[$int])) {
87: $ret .= chr($int) . $text;
88: continue;
89: }
90: $encoding = strtoupper($encoding);
91: $ret .= '%' . $encoding . $text;
92: }
93: return $ret;
94: }
95:
96: }
97:
98: // vim: et sw=4 sts=4
99: