Parent Directory
|
Revision Log
Revision 1.9 - (view) (download)
| 1 : | stronk7 | 1.2 | <?php |
| 2 : | samhemelryk | 1.8 | /** |
| 3 : | * kses 0.2.2 - HTML/XHTML filter that only allows some elements and attributes | ||
| 4 : | * Copyright (C) 2002, 2003, 2005 Ulf Harnhammar | ||
| 5 : | * | ||
| 6 : | * This program is free software and open source software; you can redistribute | ||
| 7 : | * it and/or modify it under the terms of the GNU General Public License as | ||
| 8 : | * published by the Free Software Foundation; either version 3 of the License, | ||
| 9 : | * or (at your option) any later version. | ||
| 10 : | * | ||
| 11 : | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
| 12 : | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 13 : | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 14 : | * more details. | ||
| 15 : | * | ||
| 16 : | * You should have received a copy of the GNU General Public License along | ||
| 17 : | * with this program; if not, write to the Free Software Foundation, Inc., | ||
| 18 : | * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA or visit | ||
| 19 : | * http://www.gnu.org/licenses/gpl.html | ||
| 20 : | * | ||
| 21 : | * *** CONTACT INFORMATION *** | ||
| 22 : | * | ||
| 23 : | * E-mail: metaur at users dot sourceforge dot net | ||
| 24 : | * Web page: http://sourceforge.net/projects/kses | ||
| 25 : | * Paper mail: Ulf Harnhammar | ||
| 26 : | * Ymergatan 17 C | ||
| 27 : | * 753 25 Uppsala | ||
| 28 : | * SWEDEN | ||
| 29 : | * | ||
| 30 : | * [kses strips evil scripts!] | ||
| 31 : | * | ||
| 32 : | * @package moodlecore | ||
| 33 : | * @copyright Ulf Harnhammar {@link http://sourceforge.net/projects/kses} | ||
| 34 : | * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later | ||
| 35 : | */ | ||
| 36 : | |||
| 37 : | /** | ||
| 38 : | * This function makes sure that only the allowed HTML element names, attribute | ||
| 39 : | * names and attribute values plus only sane HTML entities will occur in | ||
| 40 : | * $string. You have to remove any slashes from PHP's magic quotes before you | ||
| 41 : | * call this function. | ||
| 42 : | * | ||
| 43 : | * @param string $string | ||
| 44 : | * @param string $allowed_html | ||
| 45 : | * @param array $allowed_protocols | ||
| 46 : | * @return string | ||
| 47 : | */ | ||
| 48 : | stronk7 | 1.2 | function kses($string, $allowed_html, $allowed_protocols = |
| 49 : | array('http', 'https', 'ftp', 'news', 'nntp', 'telnet', | ||
| 50 : | 'gopher', 'mailto')) | ||
| 51 : | ############################################################################### | ||
| 52 : | # This function makes sure that only the allowed HTML element names, attribute | ||
| 53 : | # names and attribute values plus only sane HTML entities will occur in | ||
| 54 : | # $string. You have to remove any slashes from PHP's magic quotes before you | ||
| 55 : | # call this function. | ||
| 56 : | ############################################################################### | ||
| 57 : | { | ||
| 58 : | $string = kses_no_null($string); | ||
| 59 : | $string = kses_js_entities($string); | ||
| 60 : | $string = kses_normalize_entities($string); | ||
| 61 : | $string = kses_hook($string); | ||
| 62 : | $allowed_html_fixed = kses_array_lc($allowed_html); | ||
| 63 : | return kses_split($string, $allowed_html_fixed, $allowed_protocols); | ||
| 64 : | } # function kses | ||
| 65 : | |||
| 66 : | |||
| 67 : | samhemelryk | 1.8 | /** |
| 68 : | * You add any kses hooks here | ||
| 69 : | * | ||
| 70 : | * @param string $string | ||
| 71 : | * @return string | ||
| 72 : | */ | ||
| 73 : | stronk7 | 1.2 | function kses_hook($string) |
| 74 : | ############################################################################### | ||
| 75 : | # You add any kses hooks here. | ||
| 76 : | ############################################################################### | ||
| 77 : | { | ||
| 78 : | return $string; | ||
| 79 : | } # function kses_hook | ||
| 80 : | |||
| 81 : | samhemelryk | 1.8 | /** |
| 82 : | * This function returns kses' version number. | ||
| 83 : | * | ||
| 84 : | * @return string | ||
| 85 : | */ | ||
| 86 : | stronk7 | 1.2 | function kses_version() |
| 87 : | ############################################################################### | ||
| 88 : | # This function returns kses' version number. | ||
| 89 : | ############################################################################### | ||
| 90 : | { | ||
| 91 : | return '0.2.2'; | ||
| 92 : | } # function kses_version | ||
| 93 : | |||
| 94 : | |||
| 95 : | samhemelryk | 1.8 | /** |
| 96 : | * This function searches for HTML tags, no matter how malformed. It also | ||
| 97 : | * matches stray ">" characters. | ||
| 98 : | * | ||
| 99 : | * @param string $string | ||
| 100 : | * @param string $allowed_html | ||
| 101 : | * @param array $allowed_protocols | ||
| 102 : | * @return string | ||
| 103 : | */ | ||
| 104 : | stronk7 | 1.2 | function kses_split($string, $allowed_html, $allowed_protocols) |
| 105 : | ############################################################################### | ||
| 106 : | # This function searches for HTML tags, no matter how malformed. It also | ||
| 107 : | # matches stray ">" characters. | ||
| 108 : | ############################################################################### | ||
| 109 : | { | ||
| 110 : | return preg_replace('%(<'. # EITHER: < | ||
| 111 : | '[^>]*'. # things that aren't > | ||
| 112 : | '(>|$)'. # > or end of string | ||
| 113 : | '|>)%e', # OR: just a > | ||
| 114 : | "kses_split2('\\1', \$allowed_html, ". | ||
| 115 : | '$allowed_protocols)', | ||
| 116 : | $string); | ||
| 117 : | } # function kses_split | ||
| 118 : | |||
| 119 : | samhemelryk | 1.8 | /** |
| 120 : | * This function does a lot of work. It rejects some very malformed things | ||
| 121 : | * like <:::>. It returns an empty string, if the element isn't allowed (look | ||
| 122 : | * ma, no strip_tags()!). Otherwise it splits the tag into an element and an | ||
| 123 : | * attribute list. | ||
| 124 : | * | ||
| 125 : | * @param string $string | ||
| 126 : | * @param string $allowed_html | ||
| 127 : | * @param array $allowed_protocols | ||
| 128 : | * @return string | ||
| 129 : | */ | ||
| 130 : | stronk7 | 1.2 | function kses_split2($string, $allowed_html, $allowed_protocols) |
| 131 : | ############################################################################### | ||
| 132 : | # This function does a lot of work. It rejects some very malformed things | ||
| 133 : | # like <:::>. It returns an empty string, if the element isn't allowed (look | ||
| 134 : | # ma, no strip_tags()!). Otherwise it splits the tag into an element and an | ||
| 135 : | # attribute list. | ||
| 136 : | ############################################################################### | ||
| 137 : | { | ||
| 138 : | $string = kses_stripslashes($string); | ||
| 139 : | |||
| 140 : | if (substr($string, 0, 1) != '<') | ||
| 141 : | return '>'; | ||
| 142 : | # It matched a ">" character | ||
| 143 : | |||
| 144 : | if (!preg_match('%^<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?$%', $string, $matches)) | ||
| 145 : | return ''; | ||
| 146 : | # It's seriously malformed | ||
| 147 : | |||
| 148 : | $slash = trim($matches[1]); | ||
| 149 : | $elem = $matches[2]; | ||
| 150 : | $attrlist = $matches[3]; | ||
| 151 : | |||
| 152 : | if (!@isset($allowed_html[strtolower($elem)])) | ||
| 153 : | return ''; | ||
| 154 : | # They are using a not allowed HTML element | ||
| 155 : | |||
| 156 : | if ($slash != '') | ||
| 157 : | return "<$slash$elem>"; | ||
| 158 : | # No attributes are allowed for closing elements | ||
| 159 : | |||
| 160 : | return kses_attr("$slash$elem", $attrlist, $allowed_html, | ||
| 161 : | $allowed_protocols); | ||
| 162 : | } # function kses_split2 | ||
| 163 : | |||
| 164 : | samhemelryk | 1.8 | /** |
| 165 : | * This function removes all attributes, if none are allowed for this element. | ||
| 166 : | * If some are allowed it calls kses_hair() to split them further, and then it | ||
| 167 : | * builds up new HTML code from the data that kses_hair() returns. It also | ||
| 168 : | * removes "<" and ">" characters, if there are any left. One more thing it | ||
| 169 : | * does is to check if the tag has a closing XHTML slash, and if it does, | ||
| 170 : | * it puts one in the returned code as well. | ||
| 171 : | * | ||
| 172 : | * @param string $element | ||
| 173 : | * @param string $attr | ||
| 174 : | * @param string $allowed_html | ||
| 175 : | * @param array $allowed_protocols | ||
| 176 : | * @return string | ||
| 177 : | */ | ||
| 178 : | stronk7 | 1.2 | function kses_attr($element, $attr, $allowed_html, $allowed_protocols) |
| 179 : | ############################################################################### | ||
| 180 : | # This function removes all attributes, if none are allowed for this element. | ||
| 181 : | # If some are allowed it calls kses_hair() to split them further, and then it | ||
| 182 : | # builds up new HTML code from the data that kses_hair() returns. It also | ||
| 183 : | # removes "<" and ">" characters, if there are any left. One more thing it | ||
| 184 : | # does is to check if the tag has a closing XHTML slash, and if it does, | ||
| 185 : | # it puts one in the returned code as well. | ||
| 186 : | ############################################################################### | ||
| 187 : | { | ||
| 188 : | # Is there a closing XHTML slash at the end of the attributes? | ||
| 189 : | |||
| 190 : | $xhtml_slash = ''; | ||
| 191 : | if (preg_match('%\s/\s*$%', $attr)) | ||
| 192 : | $xhtml_slash = ' /'; | ||
| 193 : | |||
| 194 : | # Are any attributes allowed at all for this element? | ||
| 195 : | |||
| 196 : | if (@count($allowed_html[strtolower($element)]) == 0) | ||
| 197 : | return "<$element$xhtml_slash>"; | ||
| 198 : | |||
| 199 : | # Split it | ||
| 200 : | |||
| 201 : | $attrarr = kses_hair($attr, $allowed_protocols); | ||
| 202 : | |||
| 203 : | # Go through $attrarr, and save the allowed attributes for this element | ||
| 204 : | # in $attr2 | ||
| 205 : | |||
| 206 : | $attr2 = ''; | ||
| 207 : | |||
| 208 : | foreach ($attrarr as $arreach) | ||
| 209 : | { | ||
| 210 : | if (!@isset($allowed_html[strtolower($element)] | ||
| 211 : | [strtolower($arreach['name'])])) | ||
| 212 : | continue; # the attribute is not allowed | ||
| 213 : | |||
| 214 : | $current = $allowed_html[strtolower($element)] | ||
| 215 : | [strtolower($arreach['name'])]; | ||
| 216 : | |||
| 217 : | if (!is_array($current)) | ||
| 218 : | $attr2 .= ' '.$arreach['whole']; | ||
| 219 : | # there are no checks | ||
| 220 : | |||
| 221 : | else | ||
| 222 : | { | ||
| 223 : | # there are some checks | ||
| 224 : | $ok = true; | ||
| 225 : | foreach ($current as $currkey => $currval) | ||
| 226 : | if (!kses_check_attr_val($arreach['value'], $arreach['vless'], | ||
| 227 : | $currkey, $currval)) | ||
| 228 : | { $ok = false; break; } | ||
| 229 : | |||
| 230 : | if ($ok) | ||
| 231 : | $attr2 .= ' '.$arreach['whole']; # it passed them | ||
| 232 : | } # if !is_array($current) | ||
| 233 : | } # foreach | ||
| 234 : | |||
| 235 : | # Remove any "<" or ">" characters | ||
| 236 : | |||
| 237 : | $attr2 = preg_replace('/[<>]/', '', $attr2); | ||
| 238 : | |||
| 239 : | return "<$element$attr2$xhtml_slash>"; | ||
| 240 : | } # function kses_attr | ||
| 241 : | |||
| 242 : | samhemelryk | 1.8 | /** |
| 243 : | * This function does a lot of work. It parses an attribute list into an array | ||
| 244 : | * with attribute data, and tries to do the right thing even if it gets weird | ||
| 245 : | * input. It will add quotes around attribute values that don't have any quotes | ||
| 246 : | * or apostrophes around them, to make it easier to produce HTML code that will | ||
| 247 : | * conform to W3C's HTML specification. It will also remove bad URL protocols | ||
| 248 : | * from attribute values. | ||
| 249 : | * | ||
| 250 : | * @param string $attr | ||
| 251 : | * @param array $allowed_protocols | ||
| 252 : | * @return array | ||
| 253 : | */ | ||
| 254 : | stronk7 | 1.2 | function kses_hair($attr, $allowed_protocols) |
| 255 : | ############################################################################### | ||
| 256 : | # This function does a lot of work. It parses an attribute list into an array | ||
| 257 : | # with attribute data, and tries to do the right thing even if it gets weird | ||
| 258 : | # input. It will add quotes around attribute values that don't have any quotes | ||
| 259 : | # or apostrophes around them, to make it easier to produce HTML code that will | ||
| 260 : | # conform to W3C's HTML specification. It will also remove bad URL protocols | ||
| 261 : | # from attribute values. | ||
| 262 : | ############################################################################### | ||
| 263 : | { | ||
| 264 : | $attrarr = array(); | ||
| 265 : | $mode = 0; | ||
| 266 : | $attrname = ''; | ||
| 267 : | |||
| 268 : | # Loop through the whole attribute list | ||
| 269 : | |||
| 270 : | while (strlen($attr) != 0) | ||
| 271 : | { | ||
| 272 : | $working = 0; # Was the last operation successful? | ||
| 273 : | |||
| 274 : | switch ($mode) | ||
| 275 : | { | ||
| 276 : | case 0: # attribute name, href for instance | ||
| 277 : | |||
| 278 : | if (preg_match('/^([-a-zA-Z]+)/', $attr, $match)) | ||
| 279 : | { | ||
| 280 : | $attrname = $match[1]; | ||
| 281 : | $working = $mode = 1; | ||
| 282 : | $attr = preg_replace('/^[-a-zA-Z]+/', '', $attr); | ||
| 283 : | } | ||
| 284 : | |||
| 285 : | break; | ||
| 286 : | |||
| 287 : | case 1: # equals sign or valueless ("selected") | ||
| 288 : | |||
| 289 : | if (preg_match('/^\s*=\s*/', $attr)) # equals sign | ||
| 290 : | { | ||
| 291 : | $working = 1; $mode = 2; | ||
| 292 : | $attr = preg_replace('/^\s*=\s*/', '', $attr); | ||
| 293 : | break; | ||
| 294 : | } | ||
| 295 : | |||
| 296 : | if (preg_match('/^\s+/', $attr)) # valueless | ||
| 297 : | { | ||
| 298 : | $working = 1; $mode = 0; | ||
| 299 : | $attrarr[] = array | ||
| 300 : | ('name' => $attrname, | ||
| 301 : | 'value' => '', | ||
| 302 : | 'whole' => $attrname, | ||
| 303 : | 'vless' => 'y'); | ||
| 304 : | $attr = preg_replace('/^\s+/', '', $attr); | ||
| 305 : | } | ||
| 306 : | |||
| 307 : | break; | ||
| 308 : | |||
| 309 : | case 2: # attribute value, a URL after href= for instance | ||
| 310 : | |||
| 311 : | if (preg_match('/^"([^"]*)"(\s+|$)/', $attr, $match)) | ||
| 312 : | # "value" | ||
| 313 : | { | ||
| 314 : | $thisval = kses_bad_protocol($match[1], $allowed_protocols); | ||
| 315 : | |||
| 316 : | $attrarr[] = array | ||
| 317 : | ('name' => $attrname, | ||
| 318 : | 'value' => $thisval, | ||
| 319 : | 'whole' => "$attrname=\"$thisval\"", | ||
| 320 : | 'vless' => 'n'); | ||
| 321 : | $working = 1; $mode = 0; | ||
| 322 : | $attr = preg_replace('/^"[^"]*"(\s+|$)/', '', $attr); | ||
| 323 : | break; | ||
| 324 : | } | ||
| 325 : | |||
| 326 : | if (preg_match("/^'([^']*)'(\s+|$)/", $attr, $match)) | ||
| 327 : | # 'value' | ||
| 328 : | { | ||
| 329 : | $thisval = kses_bad_protocol($match[1], $allowed_protocols); | ||
| 330 : | |||
| 331 : | $attrarr[] = array | ||
| 332 : | ('name' => $attrname, | ||
| 333 : | 'value' => $thisval, | ||
| 334 : | 'whole' => "$attrname='$thisval'", | ||
| 335 : | 'vless' => 'n'); | ||
| 336 : | $working = 1; $mode = 0; | ||
| 337 : | $attr = preg_replace("/^'[^']*'(\s+|$)/", '', $attr); | ||
| 338 : | break; | ||
| 339 : | } | ||
| 340 : | |||
| 341 : | if (preg_match("%^([^\s\"']+)(\s+|$)%", $attr, $match)) | ||
| 342 : | # value | ||
| 343 : | { | ||
| 344 : | $thisval = kses_bad_protocol($match[1], $allowed_protocols); | ||
| 345 : | |||
| 346 : | $attrarr[] = array | ||
| 347 : | ('name' => $attrname, | ||
| 348 : | 'value' => $thisval, | ||
| 349 : | 'whole' => "$attrname=\"$thisval\"", | ||
| 350 : | 'vless' => 'n'); | ||
| 351 : | # We add quotes to conform to W3C's HTML spec. | ||
| 352 : | $working = 1; $mode = 0; | ||
| 353 : | $attr = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attr); | ||
| 354 : | } | ||
| 355 : | |||
| 356 : | break; | ||
| 357 : | } # switch | ||
| 358 : | |||
| 359 : | if ($working == 0) # not well formed, remove and try again | ||
| 360 : | { | ||
| 361 : | $attr = kses_html_error($attr); | ||
| 362 : | $mode = 0; | ||
| 363 : | } | ||
| 364 : | } # while | ||
| 365 : | |||
| 366 : | if ($mode == 1) | ||
| 367 : | # special case, for when the attribute list ends with a valueless | ||
| 368 : | # attribute like "selected" | ||
| 369 : | $attrarr[] = array | ||
| 370 : | ('name' => $attrname, | ||
| 371 : | 'value' => '', | ||
| 372 : | 'whole' => $attrname, | ||
| 373 : | 'vless' => 'y'); | ||
| 374 : | |||
| 375 : | return $attrarr; | ||
| 376 : | } # function kses_hair | ||
| 377 : | |||
| 378 : | samhemelryk | 1.8 | /** |
| 379 : | * This function performs different checks for attribute values. The currently | ||
| 380 : | * implemented checks are "maxlen", "minlen", "maxval", "minval" and "valueless" | ||
| 381 : | * with even more checks to come soon. | ||
| 382 : | * | ||
| 383 : | * @param string $value | ||
| 384 : | * @param string $vless | ||
| 385 : | * @param string $checkname | ||
| 386 : | * @param string $checkvalue | ||
| 387 : | * @return bool | ||
| 388 : | */ | ||
| 389 : | stronk7 | 1.2 | function kses_check_attr_val($value, $vless, $checkname, $checkvalue) |
| 390 : | ############################################################################### | ||
| 391 : | # This function performs different checks for attribute values. The currently | ||
| 392 : | # implemented checks are "maxlen", "minlen", "maxval", "minval" and "valueless" | ||
| 393 : | # with even more checks to come soon. | ||
| 394 : | ############################################################################### | ||
| 395 : | { | ||
| 396 : | $ok = true; | ||
| 397 : | |||
| 398 : | switch (strtolower($checkname)) | ||
| 399 : | { | ||
| 400 : | case 'maxlen': | ||
| 401 : | # The maxlen check makes sure that the attribute value has a length not | ||
| 402 : | # greater than the given value. This can be used to avoid Buffer Overflows | ||
| 403 : | # in WWW clients and various Internet servers. | ||
| 404 : | |||
| 405 : | if (strlen($value) > $checkvalue) | ||
| 406 : | $ok = false; | ||
| 407 : | break; | ||
| 408 : | |||
| 409 : | case 'minlen': | ||
| 410 : | # The minlen check makes sure that the attribute value has a length not | ||
| 411 : | # smaller than the given value. | ||
| 412 : | |||
| 413 : | if (strlen($value) < $checkvalue) | ||
| 414 : | $ok = false; | ||
| 415 : | break; | ||
| 416 : | |||
| 417 : | case 'maxval': | ||
| 418 : | # The maxval check does two things: it checks that the attribute value is | ||
| 419 : | # an integer from 0 and up, without an excessive amount of zeroes or | ||
| 420 : | # whitespace (to avoid Buffer Overflows). It also checks that the attribute | ||
| 421 : | # value is not greater than the given value. | ||
| 422 : | # This check can be used to avoid Denial of Service attacks. | ||
| 423 : | |||
| 424 : | if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value)) | ||
| 425 : | $ok = false; | ||
| 426 : | if ($value > $checkvalue) | ||
| 427 : | $ok = false; | ||
| 428 : | break; | ||
| 429 : | |||
| 430 : | case 'minval': | ||
| 431 : | # The minval check checks that the attribute value is a positive integer, | ||
| 432 : | # and that it is not smaller than the given value. | ||
| 433 : | |||
| 434 : | if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value)) | ||
| 435 : | $ok = false; | ||
| 436 : | if ($value < $checkvalue) | ||
| 437 : | $ok = false; | ||
| 438 : | break; | ||
| 439 : | |||
| 440 : | case 'valueless': | ||
| 441 : | # The valueless check checks if the attribute has a value | ||
| 442 : | # (like <a href="blah">) or not (<option selected>). If the given value | ||
| 443 : | # is a "y" or a "Y", the attribute must not have a value. | ||
| 444 : | # If the given value is an "n" or an "N", the attribute must have one. | ||
| 445 : | |||
| 446 : | if (strtolower($checkvalue) != $vless) | ||
| 447 : | $ok = false; | ||
| 448 : | break; | ||
| 449 : | } # switch | ||
| 450 : | |||
| 451 : | return $ok; | ||
| 452 : | } # function kses_check_attr_val | ||
| 453 : | |||
| 454 : | samhemelryk | 1.8 | /** |
| 455 : | * This function removes all non-allowed protocols from the beginning of | ||
| 456 : | * $string. It ignores whitespace and the case of the letters, and it does | ||
| 457 : | * understand HTML entities. It does its work in a while loop, so it won't be | ||
| 458 : | * fooled by a string like "javascript:javascript:alert(57)". | ||
| 459 : | * | ||
| 460 : | * @param string $string | ||
| 461 : | * @param array $$allowed_protocols | ||
| 462 : | * @return string | ||
| 463 : | */ | ||
| 464 : | stronk7 | 1.2 | function kses_bad_protocol($string, $allowed_protocols) |
| 465 : | ############################################################################### | ||
| 466 : | # This function removes all non-allowed protocols from the beginning of | ||
| 467 : | # $string. It ignores whitespace and the case of the letters, and it does | ||
| 468 : | # understand HTML entities. It does its work in a while loop, so it won't be | ||
| 469 : | # fooled by a string like "javascript:javascript:alert(57)". | ||
| 470 : | ############################################################################### | ||
| 471 : | { | ||
| 472 : | $string = kses_no_null($string); | ||
| 473 : | skodak | 1.9 | $string = preg_replace('/([^\xc3-\xcf])\xad+/', '\\1', $string); # deals with Opera "feature" -- moodle utf8 fix |
| 474 : | stronk7 | 1.2 | $string2 = $string.'a'; |
| 475 : | |||
| 476 : | while ($string != $string2) | ||
| 477 : | { | ||
| 478 : | $string2 = $string; | ||
| 479 : | $string = kses_bad_protocol_once($string, $allowed_protocols); | ||
| 480 : | } # while | ||
| 481 : | |||
| 482 : | return $string; | ||
| 483 : | } # function kses_bad_protocol | ||
| 484 : | |||
| 485 : | samhemelryk | 1.8 | /** |
| 486 : | * This function removes any NULL characters in $string. | ||
| 487 : | * | ||
| 488 : | * @param string $string | ||
| 489 : | * @return string | ||
| 490 : | */ | ||
| 491 : | stronk7 | 1.2 | function kses_no_null($string) |
| 492 : | ############################################################################### | ||
| 493 : | # This function removes any NULL characters in $string. | ||
| 494 : | ############################################################################### | ||
| 495 : | { | ||
| 496 : | $string = preg_replace('/\0+/', '', $string); | ||
| 497 : | $string = preg_replace('/(\\\\0)+/', '', $string); | ||
| 498 : | |||
| 499 : | return $string; | ||
| 500 : | } # function kses_no_null | ||
| 501 : | |||
| 502 : | |||
| 503 : | samhemelryk | 1.8 | /** |
| 504 : | * This function changes the character sequence \" to just " | ||
| 505 : | * It leaves all other slashes alone. It's really weird, but the quoting from | ||
| 506 : | * preg_replace(//e) seems to require this. | ||
| 507 : | * | ||
| 508 : | * @param string $string | ||
| 509 : | * @return string | ||
| 510 : | */ | ||
| 511 : | stronk7 | 1.2 | function kses_stripslashes($string) |
| 512 : | ############################################################################### | ||
| 513 : | # This function changes the character sequence \" to just " | ||
| 514 : | # It leaves all other slashes alone. It's really weird, but the quoting from | ||
| 515 : | # preg_replace(//e) seems to require this. | ||
| 516 : | ############################################################################### | ||
| 517 : | { | ||
| 518 : | return preg_replace('%\\\\"%', '"', $string); | ||
| 519 : | } # function kses_stripslashes | ||
| 520 : | |||
| 521 : | |||
| 522 : | samhemelryk | 1.8 | /** |
| 523 : | * This function goes through an array, and changes the keys to all lower case. | ||
| 524 : | * | ||
| 525 : | * @param array $inarray | ||
| 526 : | * @return array | ||
| 527 : | */ | ||
| 528 : | stronk7 | 1.2 | function kses_array_lc($inarray) |
| 529 : | ############################################################################### | ||
| 530 : | # This function goes through an array, and changes the keys to all lower case. | ||
| 531 : | ############################################################################### | ||
| 532 : | { | ||
| 533 : | $outarray = array(); | ||
| 534 : | |||
| 535 : | foreach ($inarray as $inkey => $inval) | ||
| 536 : | { | ||
| 537 : | $outkey = strtolower($inkey); | ||
| 538 : | $outarray[$outkey] = array(); | ||
| 539 : | |||
| 540 : | foreach ($inval as $inkey2 => $inval2) | ||
| 541 : | { | ||
| 542 : | $outkey2 = strtolower($inkey2); | ||
| 543 : | $outarray[$outkey][$outkey2] = $inval2; | ||
| 544 : | } # foreach $inval | ||
| 545 : | } # foreach $inarray | ||
| 546 : | |||
| 547 : | return $outarray; | ||
| 548 : | } # function kses_array_lc | ||
| 549 : | |||
| 550 : | samhemelryk | 1.8 | /** |
| 551 : | * This function removes the HTML JavaScript entities found in early versions of | ||
| 552 : | * Netscape 4. | ||
| 553 : | * | ||
| 554 : | * @param string $string | ||
| 555 : | */ | ||
| 556 : | stronk7 | 1.2 | function kses_js_entities($string) |
| 557 : | ############################################################################### | ||
| 558 : | # This function removes the HTML JavaScript entities found in early versions of | ||
| 559 : | # Netscape 4. | ||
| 560 : | ############################################################################### | ||
| 561 : | { | ||
| 562 : | return preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string); | ||
| 563 : | } # function kses_js_entities | ||
| 564 : | |||
| 565 : | samhemelryk | 1.8 | /** |
| 566 : | * This function deals with parsing errors in kses_hair(). The general plan is | ||
| 567 : | * to remove everything to and including some whitespace, but it deals with | ||
| 568 : | * quotes and apostrophes as well. | ||
| 569 : | * | ||
| 570 : | * @param string $string | ||
| 571 : | * @return string | ||
| 572 : | */ | ||
| 573 : | stronk7 | 1.2 | function kses_html_error($string) |
| 574 : | ############################################################################### | ||
| 575 : | # This function deals with parsing errors in kses_hair(). The general plan is | ||
| 576 : | # to remove everything to and including some whitespace, but it deals with | ||
| 577 : | # quotes and apostrophes as well. | ||
| 578 : | ############################################################################### | ||
| 579 : | { | ||
| 580 : | return preg_replace('/^("[^"]*("|$)|\'[^\']*(\'|$)|\S)*\s*/', '', $string); | ||
| 581 : | } # function kses_html_error | ||
| 582 : | |||
| 583 : | samhemelryk | 1.8 | /** |
| 584 : | * This function searches for URL protocols at the beginning of $string, while | ||
| 585 : | * handling whitespace and HTML entities. | ||
| 586 : | * | ||
| 587 : | * @param string $string | ||
| 588 : | * @param string $allowed_protocols | ||
| 589 : | * @return string | ||
| 590 : | */ | ||
| 591 : | stronk7 | 1.2 | function kses_bad_protocol_once($string, $allowed_protocols) |
| 592 : | ############################################################################### | ||
| 593 : | # This function searches for URL protocols at the beginning of $string, while | ||
| 594 : | # handling whitespace and HTML entities. | ||
| 595 : | ############################################################################### | ||
| 596 : | { | ||
| 597 : | skodak | 1.7 | $string2 = preg_split('/:|:|:/i', $string, 2); |
| 598 : | if(isset($string2[1]) && !preg_match('%/\?%',$string2[0])) | ||
| 599 : | { | ||
| 600 : | $string = kses_bad_protocol_once2($string2[0],$allowed_protocols).trim($string2[1]); | ||
| 601 : | } | ||
| 602 : | return $string; | ||
| 603 : | stronk7 | 1.2 | } # function kses_bad_protocol_once |
| 604 : | |||
| 605 : | samhemelryk | 1.8 | /** |
| 606 : | * This function processes URL protocols, checks to see if they're in the white- | ||
| 607 : | * list or not, and returns different data depending on the answer. | ||
| 608 : | * | ||
| 609 : | * @param string $string | ||
| 610 : | * @param string $allowed_protocols | ||
| 611 : | * @return string | ||
| 612 : | */ | ||
| 613 : | stronk7 | 1.2 | function kses_bad_protocol_once2($string, $allowed_protocols) |
| 614 : | ############################################################################### | ||
| 615 : | # This function processes URL protocols, checks to see if they're in the white- | ||
| 616 : | # list or not, and returns different data depending on the answer. | ||
| 617 : | ############################################################################### | ||
| 618 : | { | ||
| 619 : | $string2 = kses_decode_entities($string); | ||
| 620 : | $string2 = preg_replace('/\s/', '', $string2); | ||
| 621 : | $string2 = kses_no_null($string2); | ||
| 622 : | $string2 = preg_replace('/\xad+/', '', $string2); | ||
| 623 : | # deals with Opera "feature" | ||
| 624 : | $string2 = strtolower($string2); | ||
| 625 : | |||
| 626 : | $allowed = false; | ||
| 627 : | foreach ($allowed_protocols as $one_protocol) | ||
| 628 : | if (strtolower($one_protocol) == $string2) | ||
| 629 : | { | ||
| 630 : | $allowed = true; | ||
| 631 : | break; | ||
| 632 : | } | ||
| 633 : | |||
| 634 : | if ($allowed) | ||
| 635 : | return "$string2:"; | ||
| 636 : | else | ||
| 637 : | return ''; | ||
| 638 : | } # function kses_bad_protocol_once2 | ||
| 639 : | |||
| 640 : | samhemelryk | 1.8 | /** |
| 641 : | * This function normalizes HTML entities. It will convert "AT&T" to the correct | ||
| 642 : | * "AT&T", ":" to ":", "&#XYZZY;" to "&#XYZZY;" and so on. | ||
| 643 : | * | ||
| 644 : | * @param string $string | ||
| 645 : | * @return string | ||
| 646 : | */ | ||
| 647 : | stronk7 | 1.2 | function kses_normalize_entities($string) |
| 648 : | ############################################################################### | ||
| 649 : | # This function normalizes HTML entities. It will convert "AT&T" to the correct | ||
| 650 : | # "AT&T", ":" to ":", "&#XYZZY;" to "&#XYZZY;" and so on. | ||
| 651 : | ############################################################################### | ||
| 652 : | { | ||
| 653 : | # Disarm all entities by converting & to & | ||
| 654 : | |||
| 655 : | $string = str_replace('&', '&', $string); | ||
| 656 : | |||
| 657 : | # Change back the allowed entities in our entity whitelist | ||
| 658 : | |||
| 659 : | $string = preg_replace('/&([A-Za-z][A-Za-z0-9]{0,19});/', | ||
| 660 : | '&\\1;', $string); | ||
| 661 : | $string = preg_replace('/&#0*([0-9]{1,5});/e', | ||
| 662 : | 'kses_normalize_entities2("\\1")', $string); | ||
| 663 : | $string = preg_replace('/&#([Xx])0*(([0-9A-Fa-f]{2}){1,2});/', | ||
| 664 : | '&#\\1\\2;', $string); | ||
| 665 : | |||
| 666 : | return $string; | ||
| 667 : | } # function kses_normalize_entities | ||
| 668 : | |||
| 669 : | samhemelryk | 1.8 | /** |
| 670 : | * This function helps kses_normalize_entities() to only accept 16 bit values | ||
| 671 : | * and nothing more for &#number; entities. | ||
| 672 : | * | ||
| 673 : | * @param int $i | ||
| 674 : | * @return string | ||
| 675 : | */ | ||
| 676 : | stronk7 | 1.2 | function kses_normalize_entities2($i) |
| 677 : | ############################################################################### | ||
| 678 : | # This function helps kses_normalize_entities() to only accept 16 bit values | ||
| 679 : | # and nothing more for &#number; entities. | ||
| 680 : | ############################################################################### | ||
| 681 : | { | ||
| 682 : | return (($i > 65535) ? "&#$i;" : "&#$i;"); | ||
| 683 : | } # function kses_normalize_entities2 | ||
| 684 : | |||
| 685 : | samhemelryk | 1.8 | /** |
| 686 : | * This function decodes numeric HTML entities (A and A). It doesn't | ||
| 687 : | * do anything with other entities like ä, but we don't need them in the | ||
| 688 : | * URL protocol whitelisting system anyway. | ||
| 689 : | * | ||
| 690 : | * @param string $string | ||
| 691 : | * @return string | ||
| 692 : | */ | ||
| 693 : | stronk7 | 1.2 | function kses_decode_entities($string) |
| 694 : | ############################################################################### | ||
| 695 : | # This function decodes numeric HTML entities (A and A). It doesn't | ||
| 696 : | # do anything with other entities like ä, but we don't need them in the | ||
| 697 : | # URL protocol whitelisting system anyway. | ||
| 698 : | ############################################################################### | ||
| 699 : | { | ||
| 700 : | $string = preg_replace('/&#([0-9]+);/e', 'chr("\\1")', $string); | ||
| 701 : | $string = preg_replace('/&#[Xx]([0-9A-Fa-f]+);/e', 'chr(hexdec("\\1"))', | ||
| 702 : | $string); | ||
| 703 : | |||
| 704 : | return $string; | ||
| 705 : | } # function kses_decode_entities | ||
| 706 : | |||
| 707 : | ?> |
| Moodle CVS Admin | ViewVC Help |
| Powered by ViewVC 1.0.7 |