Code Coverage for /Users/ericduran/Sites/drupal/drupal/core/lib/Drupal/Component/Utility/Html.php

	Code Coverage
	Classes and Traits			Functions and Methods				Lines
Total	0.00% covered (danger)	0.00%	0 / 1	66.67% covered (warning)	66.67%	8 / 12	CRAP	74.19% covered (warning)	74.19%	46 / 62
Html	0.00% covered (danger)	0.00%	0 / 1	66.67% covered (warning)	66.67%	8 / 12	35.74	74.19% covered (warning)	74.19%	46 / 62
getClass				100.00% covered (success)	100.00%	1 / 1	2	100.00% covered (success)	100.00%	3 / 3
cleanCssIdentifier				100.00% covered (success)	100.00%	1 / 1	3	100.00% covered (success)	100.00%	11 / 11
setIsAjax				0.00% covered (danger)	0.00%	0 / 1	2	0.00% covered (danger)	0.00%	0 / 2
getUniqueId				100.00% covered (success)	100.00%	1 / 1	5	100.00% covered (success)	100.00%	11 / 11
getId				100.00% covered (success)	100.00%	1 / 1	1	100.00% covered (success)	100.00%	4 / 4
resetSeenIds				100.00% covered (success)	100.00%	1 / 1	1	100.00% covered (success)	100.00%	2 / 2
normalize				0.00% covered (danger)	0.00%	0 / 1	2	0.00% covered (danger)	0.00%	0 / 2
load				100.00% covered (success)	100.00%	1 / 1	1	100.00% covered (success)	100.00%	5 / 5
serialize				0.00% covered (danger)	0.00%	0 / 1	5.20	80.00% covered (warning)	80.00%	8 / 10
escapeCdataElement				0.00% covered (danger)	0.00%	0 / 1	12	0.00% covered (danger)	0.00%	0 / 10
decodeEntities				100.00% covered (success)	100.00%	1 / 1	1	100.00% covered (success)	100.00%	1 / 1
escape				100.00% covered (success)	100.00%	1 / 1	1	100.00% covered (success)	100.00%	1 / 1

1	<?php
2
3	/**
4	* @file
5	* Contains \Drupal\Component\Utility\Html.
6	*/
7
8	namespace Drupal\Component\Utility;
9
10	/**
11	* Provides DOMDocument helpers for parsing and serializing HTML strings.
12	*
13	* @ingroup utility
14	*/
15	class Html {
16
17	/**
18	* An array of previously cleaned HTML classes.
19	*
20	* @var array
21	*/
22	protected static $classes = array();
23
24	/**
25	* An array of the initial IDs used in one request.
26	*
27	* @var array
28	*/
29	protected static $seenIdsInit;
30
31	/**
32	* An array of IDs, including incremented versions when an ID is duplicated.
33	* @var array
34	*/
35	protected static $seenIds;
36
37	/**
38	* Stores whether the current request was sent via AJAX.
39	*
40	* @var bool
41	*/
42	protected static $isAjax = FALSE;
43
44	/**
45	* Prepares a string for use as a valid class name.
46	*
47	* Do not pass one string containing multiple classes as they will be
48	* incorrectly concatenated with dashes, i.e. "one two" will become "one-two".
49	*
50	* @param string $class
51	* The class name to clean.
52	*
53	* @return string
54	* The cleaned class name.
55	*/
56	public static function getClass($class) {
57	if (!isset(static::$classes[$class])) {
58	static::$classes[$class] = static::cleanCssIdentifier(Unicode::strtolower($class));
59	}
60	return static::$classes[$class];
61	}
62
63	/**
64	* Prepares a string for use as a CSS identifier (element, class, or ID name).
65	*
66	* http://www.w3.org/TR/CSS21/syndata.html#characters shows the syntax for
67	* valid CSS identifiers (including element names, classes, and IDs in
68	* selectors.)
69	*
70	* @param string $identifier
71	* The identifier to clean.
72	* @param array $filter
73	* An array of string replacements to use on the identifier.
74	*
75	* @return string
76	* The cleaned identifier.
77	*/
78	public static function cleanCssIdentifier($identifier, array $filter = array(
79	' ' => '-',
80	'_' => '-',
81	'/' => '-',
82	'[' => '-',
83	']' => '',
84	)) {
85	// We could also use strtr() here but its much slower than str_replace(). In
86	// order to keep '__' to stay '__' we first replace it with a different
87	// placeholder after checking that it is not defined as a filter.
88	$double_underscore_replacements = 0;
89	if (!isset($filter['__'])) {
90	$identifier = str_replace('__', '##', $identifier, $double_underscore_replacements);
91	}
92	$identifier = str_replace(array_keys($filter), array_values($filter), $identifier);
93	// Replace temporary placeholder '##' with '__' only if the original
94	// $identifier contained '__'.
95	if ($double_underscore_replacements > 0) {
96	$identifier = str_replace('##', '__', $identifier);
97	}
98
99	// Valid characters in a CSS identifier are:
100	// - the hyphen (U+002D)
101	// - a-z (U+0030 - U+0039)
102	// - A-Z (U+0041 - U+005A)
103	// - the underscore (U+005F)
104	// - 0-9 (U+0061 - U+007A)
105	// - ISO 10646 characters U+00A1 and higher
106	// We strip out any character not in the above list.
107	$identifier = preg_replace('/[^\x{002D}\x{0030}-\x{0039}\x{0041}-\x{005A}\x{005F}\x{0061}-\x{007A}\x{00A1}-\x{FFFF}]/u', '', $identifier);
108	// Identifiers cannot start with a digit, two hyphens, or a hyphen followed by a digit.
109	$identifier = preg_replace(array(
110	'/^[0-9]/',
111	'/^(-[0-9])\|^(--)/'
112	), array('_', '__'), $identifier);
113	return $identifier;
114	}
115
116	/**
117	* Sets if this request is an Ajax request.
118	*
119	* @param bool $is_ajax
120	* TRUE if this request is an Ajax request, FALSE otherwise.
121	*/
122	public static function setIsAjax($is_ajax) {
123	static::$isAjax = $is_ajax;
124	}
125
126	/**
127	* Prepares a string for use as a valid HTML ID and guarantees uniqueness.
128	*
129	* This function ensures that each passed HTML ID value only exists once on
130	* the page. By tracking the already returned ids, this function enables
131	* forms, blocks, and other content to be output multiple times on the same
132	* page, without breaking (X)HTML validation.
133	*
134	* For already existing IDs, a counter is appended to the ID string.
135	* Therefore, JavaScript and CSS code should not rely on any value that was
136	* generated by this function and instead should rely on manually added CSS
137	* classes or similarly reliable constructs.
138	*
139	* Two consecutive hyphens separate the counter from the original ID. To
140	* manage uniqueness across multiple Ajax requests on the same page, Ajax
141	* requests POST an array of all IDs currently present on the page, which are
142	* used to prime this function's cache upon first invocation.
143	*
144	* To allow reverse-parsing of IDs submitted via Ajax, any multiple
145	* consecutive hyphens in the originally passed $id are replaced with a
146	* single hyphen.
147	*
148	* @param string $id
149	* The ID to clean.
150	*
151	* @return string
152	* The cleaned ID.
153	*/
154	public static function getUniqueId($id) {
155	// If this is an Ajax request, then content returned by this page request
156	// will be merged with content already on the base page. The HTML IDs must
157	// be unique for the fully merged content. Therefore use unique IDs.
158	if (static::$isAjax) {
159	return static::getId($id) . '--' . Crypt::randomBytesBase64(8);
160	}
161
162	// @todo Remove all that code once we switch over to random IDs only,
163	// see https://www.drupal.org/node/1090592.
164	if (!isset(static::$seenIdsInit)) {
165	static::$seenIdsInit = array();
166	}
167	if (!isset(static::$seenIds)) {
168	static::$seenIds = static::$seenIdsInit;
169	}
170
171	$id = static::getId($id);
172
173	// Ensure IDs are unique by appending a counter after the first occurrence.
174	// The counter needs to be appended with a delimiter that does not exist in
175	// the base ID. Requiring a unique delimiter helps ensure that we really do
176	// return unique IDs and also helps us re-create the $seen_ids array during
177	// Ajax requests.
178	if (isset(static::$seenIds[$id])) {
179	$id = $id . '--' . ++static::$seenIds[$id];
180	}
181	else {
182	static::$seenIds[$id] = 1;
183	}
184	return $id;
185	}
186
187	/**
188	* Prepares a string for use as a valid HTML ID.
189	*
190	* Only use this function when you want to intentionally skip the uniqueness
191	* guarantee of self::getUniqueId().
192	*
193	* @param string $id
194	* The ID to clean.
195	*
196	* @return string
197	* The cleaned ID.
198	*
199	* @see self::getUniqueId()
200	*/
201	public static function getId($id) {
202	$id = str_replace([' ', '_', '[', ']'], ['-', '-', '-', ''], Unicode::strtolower($id));
203
204	// As defined in http://www.w3.org/TR/html4/types.html#type-name, HTML IDs can
205	// only contain letters, digits ([0-9]), hyphens ("-"), underscores ("_"),
206	// colons (":"), and periods ("."). We strip out any character not in that
207	// list. Note that the CSS spec doesn't allow colons or periods in identifiers
208	// (http://www.w3.org/TR/CSS21/syndata.html#characters), so we strip those two
209	// characters as well.
210	$id = preg_replace('/[^A-Za-z0-9\-_]/', '', $id);
211
212	// Removing multiple consecutive hyphens.
213	$id = preg_replace('/\-+/', '-', $id);
214	return $id;
215	}
216
217	/**
218	* Resets the list of seen IDs.
219	*/
220	public static function resetSeenIds() {
221	static::$seenIds = NULL;
222	}
223
224	/**
225	* Normalizes an HTML snippet.
226	*
227	* This function is essentially \DOMDocument::normalizeDocument(), but
228	* operates on an HTML string instead of a \DOMDocument.
229	*
230	* @param string $html
231	* The HTML string to normalize.
232	*
233	* @return string
234	* The normalized HTML string.
235	*/
236	public static function normalize($html) {
237	$document = static::load($html);
238	return static::serialize($document);
239	}
240
241	/**
242	* Parses an HTML snippet and returns it as a DOM object.
243	*
244	* This function loads the body part of a partial (X)HTML document and returns
245	* a full \DOMDocument object that represents this document.
246	*
247	* Use \Drupal\Component\Utility\Html::serialize() to serialize this
248	* \DOMDocument back to a string.
249	*
250	* @param string $html
251	* The partial (X)HTML snippet to load. Invalid markup will be corrected on
252	* import.
253	*
254	* @return \DOMDocument
255	* A \DOMDocument that represents the loaded (X)HTML snippet.
256	*/
257	public static function load($html) {
258	$document = <<<EOD
259	<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
260	<html xmlns="http://www.w3.org/1999/xhtml">
261	<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head>
262	<body>!html</body>
263	</html>
264	EOD;
265	// PHP's \DOMDocument serialization adds extra whitespace when the markup
266	// of the wrapping document contains newlines, so ensure we remove all
267	// newlines before injecting the actual HTML body to be processed.
268	$document = strtr($document, array("\n" => '', '!html' => $html));
269
270	$dom = new \DOMDocument();
271	// Ignore warnings during HTML soup loading.
272	@$dom->loadHTML($document);
273
274	return $dom;
275	}
276
277	/**
278	* Converts the body of a \DOMDocument back to an HTML snippet.
279	*
280	* The function serializes the body part of a \DOMDocument back to an (X)HTML
281	* snippet. The resulting (X)HTML snippet will be properly formatted to be
282	* compatible with HTML user agents.
283	*
284	* @param \DOMDocument $document
285	* A \DOMDocument object to serialize, only the tags below the first <body>
286	* node will be converted.
287	*
288	* @return string
289	* A valid (X)HTML snippet, as a string.
290	*/
291	public static function serialize(\DOMDocument $document) {
292	$body_node = $document->getElementsByTagName('body')->item(0);
293	$html = '';
294
295	if ($body_node !== NULL) {
296	foreach ($body_node->getElementsByTagName('script') as $node) {
297	static::escapeCdataElement($node);
298	}
299	foreach ($body_node->getElementsByTagName('style') as $node) {
300	static::escapeCdataElement($node, '/', '/');
301	}
302	foreach ($body_node->childNodes as $node) {
303	$html .= $document->saveXML($node);
304	}
305	}
306	return $html;
307	}
308
309	/**
310	* Adds comments around a <!CDATA section in a \DOMNode.
311	*
312	* \DOMDocument::loadHTML() in \Drupal\Component\Utility\Html::load() makes
313	* CDATA sections from the contents of inline script and style tags. This can
314	* cause HTML4 browsers to throw exceptions.
315	*
316	* This function attempts to solve the problem by creating a
317	* \DOMDocumentFragment to comment the CDATA tag.
318	*
319	* @param \DOMNode $node
320	* The element potentially containing a CDATA node.
321	* @param string $comment_start
322	* (optional) A string to use as a comment start marker to escape the CDATA
323	* declaration. Defaults to '//'.
324	* @param string $comment_end
325	* (optional) A string to use as a comment end marker to escape the CDATA
326	* declaration. Defaults to an empty string.
327	*/
328	public static function escapeCdataElement(\DOMNode $node, $comment_start = '//', $comment_end = '') {
329	foreach ($node->childNodes as $child_node) {
330	if ($child_node instanceof \DOMCdataSection) {
331	$embed_prefix = "\n<!--{$comment_start}--><![CDATA[{$comment_start} ><!--{$comment_end}\n";
332	$embed_suffix = "\n{$comment_start}--><!]]>{$comment_end}\n";
333
334	// Prevent invalid cdata escaping as this would throw a DOM error.
335	// This is the same behavior as found in libxml2.
336	// Related W3C standard: http://www.w3.org/TR/REC-xml/#dt-cdsection
337	// Fix explanation: http://en.wikipedia.org/wiki/CDATA#Nesting
338	$data = str_replace(']]>', ']]]]><![CDATA[>', $child_node->data);
339
340	$fragment = $node->ownerDocument->createDocumentFragment();
341	$fragment->appendXML($embed_prefix . $data . $embed_suffix);
342	$node->appendChild($fragment);
343	$node->removeChild($child_node);
344	}
345	}
346	}
347
348	/**
349	* Decodes all HTML entities including numerical ones to regular UTF-8 bytes.
350	*
351	* Double-escaped entities will only be decoded once ("&lt;" becomes
352	* "<", not "<"). Be careful when using this function, as it will revert
353	* previous sanitization efforts (<script> will become <script>).
354	*
355	* This method is not the opposite of Html::escape(). For example, this method
356	* will convert "é" to "é", whereas Html::escape() will not convert "é"
357	* to "é".
358	*
359	* @param string $text
360	* The text to decode entities in.
361	*
362	* @return string
363	* The input $text, with all HTML entities decoded once.
364	*
365	* @see html_entity_decode()
366	* @see \Drupal\Component\Utility\Html::escape()
367	*/
368	public static function decodeEntities($text) {
369	return html_entity_decode($text, ENT_QUOTES, 'UTF-8');
370	}
371
372	/**
373	* Escapes text by converting special characters to HTML entities.
374	*
375	* This method escapes HTML for sanitization purposes by replacing the
376	* following special characters with their HTML entity equivalents:
377	* - & (ampersand) becomes &
378	* - " (double quote) becomes "
379	* - ' (single quote) becomes '
380	* - < (less than) becomes <
381	* - > (greater than) becomes >
382	* Special characters that have already been escaped will be double-escaped
383	* (for example, "<" becomes "&lt;"), and invalid UTF-8 encoding
384	* will be converted to the Unicode replacement character ("�").
385	*
386	* This method is not the opposite of Html::decodeEntities(). For example,
387	* this method will not encode "é" to "é", whereas
388	* Html::decodeEntities() will convert all HTML entities to UTF-8 bytes,
389	* including "é" and "<" to "é" and "<".
390	*
391	* When constructing @link theme_render render arrays @endlink passing the output of Html::escape() to
392	* '#markup' is not recommended. Use the '#plain_text' key instead and the
393	* renderer will autoescape the text.
394	*
395	* @param string $text
396	* The input text.
397	*
398	* @return string
399	* The text with all HTML special characters converted.
400	*
401	* @see htmlspecialchars()
402	* @see \Drupal\Component\Utility\Html::decodeEntities()
403	*
404	* @ingroup sanitization
405	*/
406	public static function escape($text) {
407	return htmlspecialchars($text, ENT_QUOTES \| ENT_SUBSTITUTE, 'UTF-8');
408	}
409
410	}