Code Coverage for /Users/ericduran/Sites/drupal/drupal/core/lib/Drupal/Component/Utility/Xss.php

	Code Coverage
	Classes and Traits			Functions and Methods				Lines
Total	0.00% covered (danger)	0.00%	0 / 1	62.50% covered (warning)	62.50%	5 / 8	CRAP	95.05% covered (success)	95.05%	96 / 101
Xss	0.00% covered (danger)	0.00%	0 / 1	62.50% covered (warning)	62.50%	5 / 8	41	95.05% covered (success)	95.05%	96 / 101
filter				100.00% covered (success)	100.00%	1 / 1	3	100.00% covered (success)	100.00%	12 / 12
anonymous function				100.00% covered (success)	100.00%	1 / 1	1	100.00% covered (success)	100.00%	2 / 2
filterAdmin				100.00% covered (success)	100.00%	1 / 1	1	100.00% covered (success)	100.00%	1 / 1
split				0.00% covered (danger)	0.00%	0 / 1	10	95.83% covered (success)	95.83%	23 / 24
attributes				0.00% covered (danger)	0.00%	0 / 1	23.08	94.74% covered (success)	94.74%	54 / 57
needsRemoval				100.00% covered (success)	100.00%	1 / 1	1	100.00% covered (success)	100.00%	1 / 1
getAdminTagList				100.00% covered (success)	100.00%	1 / 1	1	100.00% covered (success)	100.00%	1 / 1
getHtmlTagList				0.00% covered (danger)	0.00%	0 / 1	2	0.00% covered (danger)	0.00%	0 / 1

1	<?php
2
3	/**
4	* @file
5	* Contains \Drupal\Component\Utility\Xss.
6	*/
7
8	namespace Drupal\Component\Utility;
9
10	/**
11	* Provides helper to filter for cross-site scripting.
12	*
13	* @ingroup utility
14	*/
15	class Xss {
16
17	/**
18	* The list of HTML tags allowed by filterAdmin().
19	*
20	* @var array
21	*
22	* @see \Drupal\Component\Utility\Xss::filterAdmin()
23	*/
24	protected static $adminTags = array('a', 'abbr', 'acronym', 'address', 'article', 'aside', 'b', 'bdi', 'bdo', 'big', 'blockquote', 'br', 'caption', 'cite', 'code', 'col', 'colgroup', 'command', 'dd', 'del', 'details', 'dfn', 'div', 'dl', 'dt', 'em', 'figcaption', 'figure', 'footer', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup', 'hr', 'i', 'img', 'ins', 'kbd', 'li', 'mark', 'menu', 'meter', 'nav', 'ol', 'output', 'p', 'pre', 'progress', 'q', 'rp', 'rt', 'ruby', 's', 'samp', 'section', 'small', 'span', 'strong', 'sub', 'summary', 'sup', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'time', 'tr', 'tt', 'u', 'ul', 'var', 'wbr');
25
26	/**
27	* The default list of HTML tags allowed by filter().
28	*
29	* @var array
30	*
31	* @see \Drupal\Component\Utility\Xss::filter()
32	*/
33	protected static $htmlTags = array('a', 'em', 'strong', 'cite', 'blockquote', 'code', 'ul', 'ol', 'li', 'dl', 'dt', 'dd');
34
35	/**
36	* Filters HTML to prevent cross-site-scripting (XSS) vulnerabilities.
37	*
38	* Based on kses by Ulf Harnhammar, see http://sourceforge.net/projects/kses.
39	* For examples of various XSS attacks, see: http://ha.ckers.org/xss.html.
40	*
41	* This code does four things:
42	* - Removes characters and constructs that can trick browsers.
43	* - Makes sure all HTML entities are well-formed.
44	* - Makes sure all HTML tags and attributes are well-formed.
45	* - Makes sure no HTML tags contain URLs with a disallowed protocol (e.g.
46	* javascript:).
47	*
48	* @param $string
49	* The string with raw HTML in it. It will be stripped of everything that
50	* can cause an XSS attack.
51	* @param array $html_tags
52	* An array of HTML tags.
53	*
54	* @return string
55	* An XSS safe version of $string, or an empty string if $string is not
56	* valid UTF-8.
57	*
58	* @see \Drupal\Component\Utility\Unicode::validateUtf8()
59	*
60	* @ingroup sanitization
61	*/
62	public static function filter($string, array $html_tags = NULL) {
63	if (is_null($html_tags)) {
64	$html_tags = static::$htmlTags;
65	}
66	// Only operate on valid UTF-8 strings. This is necessary to prevent cross
67	// site scripting issues on Internet Explorer 6.
68	if (!Unicode::validateUtf8($string)) {
69	return '';
70	}
71	// Remove NULL characters (ignored by some browsers).
72	$string = str_replace(chr(0), '', $string);
73	// Remove Netscape 4 JS entities.
74	$string = preg_replace('%&\s\{[^}](\}\s*;?\|$)%', '', $string);
75
76	// Defuse all HTML entities.
77	$string = str_replace('&', '&', $string);
78	// Change back only well-formed entities in our whitelist:
79	// Decimal numeric entities.
80	$string = preg_replace('/&#([0-9]+;)/', '&#\1', $string);
81	// Hexadecimal numeric entities.
82	$string = preg_replace('/&#[Xx]0*((?:[0-9A-Fa-f]{2})+;)/', '&#x\1', $string);
83	// Named entities.
84	$string = preg_replace('/&([A-Za-z][A-Za-z0-9]*;)/', '&\1', $string);
85	$html_tags = array_flip($html_tags);
86	// Late static binding does not work inside anonymous functions.
87	$class = get_called_class();
88	$splitter = function ($matches) use ($html_tags, $class) {
89	return $class::split($matches[1], $html_tags, $class);
90	};
91	// Strip any tags that are not in the whitelist.
92	return preg_replace_callback('%
93	(
94	<(?=[^a-zA-Z!/]) # a lone <
95	\| # or
96	<!--.*?--> # a comment
97	\| # or
98	<[^>]*(>\|$) # a string that starts with a <, up until the > or the end of the string
99	\| # or
100	> # just a >
101	)%x', $splitter, $string);
102	}
103
104	/**
105	* Applies a very permissive XSS/HTML filter for admin-only use.
106	*
107	* Use only for fields where it is impractical to use the
108	* whole filter system, but where some (mainly inline) mark-up
109	* is desired (so \Drupal\Component\Utility\Html::escape() is
110	* not acceptable).
111	*
112	* Allows all tags that can be used inside an HTML body, save
113	* for scripts and styles.
114	*
115	* @param string $string
116	* The string to apply the filter to.
117	*
118	* @return string
119	* The filtered string.
120	*
121	* @ingroup sanitization
122	*
123	* @see \Drupal\Component\Utility\Xss::getAdminTagList()
124	*
125	*/
126	public static function filterAdmin($string) {
127	return static::filter($string, static::$adminTags);
128	}
129
130	/**
131	* Processes an HTML tag.
132	*
133	* @param string $string
134	* The HTML tag to process.
135	* @param array $html_tags
136	* An array where the keys are the allowed tags and the values are not
137	* used.
138	* @param string $class
139	* The called class. This method is called from an anonymous function which
140	* breaks late static binding. See https://bugs.php.net/bug.php?id=66622 for
141	* more information.
142	*
143	* @return string
144	* If the element isn't allowed, an empty string. Otherwise, the cleaned up
145	* version of the HTML element.
146	*/
147	protected static function split($string, $html_tags, $class) {
148	if (substr($string, 0, 1) != '<') {
149	// We matched a lone ">" character.
150	return '>';
151	}
152	elseif (strlen($string) == 1) {
153	// We matched a lone "<" character.
154	return '<';
155	}
156
157	if (!preg_match('%^<\s(/\s)?([a-zA-Z0-9\-]+)\s([^>])>?\|(<!--.*?-->)$%', $string, $matches)) {
158	// Seriously malformed.
159	return '';
160	}
161	$slash = trim($matches[1]);
162	$elem = &$matches[2];
163	$attrlist = &$matches[3];
164	$comment = &$matches[4];
165
166	if ($comment) {
167	$elem = '!--';
168	}
169
170	// When in whitelist mode, an element is disallowed when not listed.
171	if ($class::needsRemoval($html_tags, $elem)) {
172	return '';
173	}
174
175	if ($comment) {
176	return $comment;
177	}
178
179	if ($slash != '') {
180	return "</$elem>";
181	}
182
183	// Is there a closing XHTML slash at the end of the attributes?
184	$attrlist = preg_replace('%(\s?)/\s*$%', '\1', $attrlist, -1, $count);
185	$xhtml_slash = $count ? ' /' : '';
186
187	// Clean up attributes.
188	$attr2 = implode(' ', $class::attributes($attrlist));
189	$attr2 = preg_replace('/[<>]/', '', $attr2);
190	$attr2 = strlen($attr2) ? ' ' . $attr2 : '';
191
192	return "<$elem$attr2$xhtml_slash>";
193	}
194
195	/**
196	* Processes a string of HTML attributes.
197	*
198	* @param string $attributes
199	* The html attribute to process.
200	*
201	* @return string
202	* Cleaned up version of the HTML attributes.
203	*/
204	protected static function attributes($attributes) {
205	$attributes_array = array();
206	$mode = 0;
207	$attribute_name = '';
208	$skip = FALSE;
209	$skip_protocol_filtering = FALSE;
210
211	while (strlen($attributes) != 0) {
212	// Was the last operation successful?
213	$working = 0;
214
215	switch ($mode) {
216	case 0:
217	// Attribute name, href for instance.
218	if (preg_match('/^([-a-zA-Z][-a-zA-Z0-9]*)/', $attributes, $match)) {
219	$attribute_name = strtolower($match[1]);
220	$skip = ($attribute_name == 'style' \|\| substr($attribute_name, 0, 2) == 'on');
221
222	// Values for attributes of type URI should be filtered for
223	// potentially malicious protocols (for example, an href-attribute
224	// starting with "javascript:"). However, for some non-URI
225	// attributes performing this filtering causes valid and safe data
226	// to be mangled. We prevent this by skipping protocol filtering on
227	// such attributes.
228	// @see \Drupal\Component\Utility\UrlHelper::filterBadProtocol()
229	// @see http://www.w3.org/TR/html4/index/attributes.html
230	$skip_protocol_filtering = substr($attribute_name, 0, 5) === 'data-' \|\| in_array($attribute_name, array(
231	'title',
232	'alt',
233	));
234
235	$working = $mode = 1;
236	$attributes = preg_replace('/^[-a-zA-Z][-a-zA-Z0-9]*/', '', $attributes);
237	}
238	break;
239
240	case 1:
241	// Equals sign or valueless ("selected").
242	if (preg_match('/^\s=\s/', $attributes)) {
243	$working = 1; $mode = 2;
244	$attributes = preg_replace('/^\s=\s/', '', $attributes);
245	break;
246	}
247
248	if (preg_match('/^\s+/', $attributes)) {
249	$working = 1; $mode = 0;
250	if (!$skip) {
251	$attributes_array[] = $attribute_name;
252	}
253	$attributes = preg_replace('/^\s+/', '', $attributes);
254	}
255	break;
256
257	case 2:
258	// Attribute value, a URL after href= for instance.
259	if (preg_match('/^"([^"]*)"(\s+\|$)/', $attributes, $match)) {
260	$thisval = $skip_protocol_filtering ? $match[1] : UrlHelper::filterBadProtocol($match[1]);
261
262	if (!$skip) {
263	$attributes_array[] = "$attribute_name=\"$thisval\"";
264	}
265	$working = 1;
266	$mode = 0;
267	$attributes = preg_replace('/^"[^"]*"(\s+\|$)/', '', $attributes);
268	break;
269	}
270
271	if (preg_match("/^'([^']*)'(\s+\|$)/", $attributes, $match)) {
272	$thisval = $skip_protocol_filtering ? $match[1] : UrlHelper::filterBadProtocol($match[1]);
273
274	if (!$skip) {
275	$attributes_array[] = "$attribute_name='$thisval'";
276	}
277	$working = 1; $mode = 0;
278	$attributes = preg_replace("/^'[^']*'(\s+\|$)/", '', $attributes);
279	break;
280	}
281
282	if (preg_match("%^([^\s\"']+)(\s+\|$)%", $attributes, $match)) {
283	$thisval = $skip_protocol_filtering ? $match[1] : UrlHelper::filterBadProtocol($match[1]);
284
285	if (!$skip) {
286	$attributes_array[] = "$attribute_name=\"$thisval\"";
287	}
288	$working = 1; $mode = 0;
289	$attributes = preg_replace("%^[^\s\"']+(\s+\|$)%", '', $attributes);
290	}
291	break;
292	}
293
294	if ($working == 0) {
295	// Not well formed; remove and try again.
296	$attributes = preg_replace('/
297	^
298	(
299	"[^"]*("\|$) # - a string that starts with a double quote, up until the next double quote or the end of the string
300	\| # or
301	\'[^\']*(\'\|$)\| # - a string that starts with a quote, up until the next quote or the end of the string
302	\| # or
303	\S # - a non-whitespace character
304	)* # any number of the above three
305	\s* # any number of whitespaces
306	/x', '', $attributes);
307	$mode = 0;
308	}
309	}
310
311	// The attribute list ends with a valueless attribute like "selected".
312	if ($mode == 1 && !$skip) {
313	$attributes_array[] = $attribute_name;
314	}
315	return $attributes_array;
316	}
317
318	/**
319	* Whether this element needs to be removed altogether.
320	*
321	* @param $html_tags
322	* The list of HTML tags.
323	* @param $elem
324	* The name of the HTML element.
325	*
326	* @return bool
327	* TRUE if this element needs to be removed.
328	*/
329	protected static function needsRemoval($html_tags, $elem) {
330	return !isset($html_tags[strtolower($elem)]);
331	}
332
333	/**
334	* Gets the list of HTML tags allowed by Xss::filterAdmin().
335	*
336	* @return array
337	* The list of HTML tags allowed by filterAdmin().
338	*/
339	public static function getAdminTagList() {
340	return static::$adminTags;
341	}
342
343	/**
344	* Gets the standard list of HTML tags allowed by Xss::filter().
345	*
346	* @return array
347	* The list of HTML tags allowed by Xss::filter().
348	*/
349	public static function getHtmlTagList() {
350	return static::$htmlTags;
351	}
352	}