Code Coverage |
||||||||||
Classes and Traits |
Functions and Methods |
Lines |
||||||||
| Total | |
0.00% |
0 / 1 |
|
66.67% |
8 / 12 |
CRAP | |
74.19% |
46 / 62 |
| Html | |
0.00% |
0 / 1 |
|
66.67% |
8 / 12 |
35.74 | |
74.19% |
46 / 62 |
| getClass | |
100.00% |
1 / 1 |
2 | |
100.00% |
3 / 3 |
|||
| cleanCssIdentifier | |
100.00% |
1 / 1 |
3 | |
100.00% |
11 / 11 |
|||
| setIsAjax | |
0.00% |
0 / 1 |
2 | |
0.00% |
0 / 2 |
|||
| getUniqueId | |
100.00% |
1 / 1 |
5 | |
100.00% |
11 / 11 |
|||
| getId | |
100.00% |
1 / 1 |
1 | |
100.00% |
4 / 4 |
|||
| resetSeenIds | |
100.00% |
1 / 1 |
1 | |
100.00% |
2 / 2 |
|||
| normalize | |
0.00% |
0 / 1 |
2 | |
0.00% |
0 / 2 |
|||
| load | |
100.00% |
1 / 1 |
1 | |
100.00% |
5 / 5 |
|||
| serialize | |
0.00% |
0 / 1 |
5.20 | |
80.00% |
8 / 10 |
|||
| escapeCdataElement | |
0.00% |
0 / 1 |
12 | |
0.00% |
0 / 10 |
|||
| decodeEntities | |
100.00% |
1 / 1 |
1 | |
100.00% |
1 / 1 |
|||
| escape | |
100.00% |
1 / 1 |
1 | |
100.00% |
1 / 1 |
|||
| <?php | |
| /** | |
| * @file | |
| * Contains \Drupal\Component\Utility\Html. | |
| */ | |
| namespace Drupal\Component\Utility; | |
| /** | |
| * Provides DOMDocument helpers for parsing and serializing HTML strings. | |
| * | |
| * @ingroup utility | |
| */ | |
| class Html { | |
| /** | |
| * An array of previously cleaned HTML classes. | |
| * | |
| * @var array | |
| */ | |
| protected static $classes = array(); | |
| /** | |
| * An array of the initial IDs used in one request. | |
| * | |
| * @var array | |
| */ | |
| protected static $seenIdsInit; | |
| /** | |
| * An array of IDs, including incremented versions when an ID is duplicated. | |
| * @var array | |
| */ | |
| protected static $seenIds; | |
| /** | |
| * Stores whether the current request was sent via AJAX. | |
| * | |
| * @var bool | |
| */ | |
| protected static $isAjax = FALSE; | |
| /** | |
| * Prepares a string for use as a valid class name. | |
| * | |
| * Do not pass one string containing multiple classes as they will be | |
| * incorrectly concatenated with dashes, i.e. "one two" will become "one-two". | |
| * | |
| * @param string $class | |
| * The class name to clean. | |
| * | |
| * @return string | |
| * The cleaned class name. | |
| */ | |
| public static function getClass($class) { | |
| if (!isset(static::$classes[$class])) { | |
| static::$classes[$class] = static::cleanCssIdentifier(Unicode::strtolower($class)); | |
| } | |
| return static::$classes[$class]; | |
| } | |
| /** | |
| * Prepares a string for use as a CSS identifier (element, class, or ID name). | |
| * | |
| * http://www.w3.org/TR/CSS21/syndata.html#characters shows the syntax for | |
| * valid CSS identifiers (including element names, classes, and IDs in | |
| * selectors.) | |
| * | |
| * @param string $identifier | |
| * The identifier to clean. | |
| * @param array $filter | |
| * An array of string replacements to use on the identifier. | |
| * | |
| * @return string | |
| * The cleaned identifier. | |
| */ | |
| public static function cleanCssIdentifier($identifier, array $filter = array( | |
| ' ' => '-', | |
| '_' => '-', | |
| '/' => '-', | |
| '[' => '-', | |
| ']' => '', | |
| )) { | |
| // We could also use strtr() here but its much slower than str_replace(). In | |
| // order to keep '__' to stay '__' we first replace it with a different | |
| // placeholder after checking that it is not defined as a filter. | |
| $double_underscore_replacements = 0; | |
| if (!isset($filter['__'])) { | |
| $identifier = str_replace('__', '##', $identifier, $double_underscore_replacements); | |
| } | |
| $identifier = str_replace(array_keys($filter), array_values($filter), $identifier); | |
| // Replace temporary placeholder '##' with '__' only if the original | |
| // $identifier contained '__'. | |
| if ($double_underscore_replacements > 0) { | |
| $identifier = str_replace('##', '__', $identifier); | |
| } | |
| // Valid characters in a CSS identifier are: | |
| // - the hyphen (U+002D) | |
| // - a-z (U+0030 - U+0039) | |
| // - A-Z (U+0041 - U+005A) | |
| // - the underscore (U+005F) | |
| // - 0-9 (U+0061 - U+007A) | |
| // - ISO 10646 characters U+00A1 and higher | |
| // We strip out any character not in the above list. | |
| $identifier = preg_replace('/[^\x{002D}\x{0030}-\x{0039}\x{0041}-\x{005A}\x{005F}\x{0061}-\x{007A}\x{00A1}-\x{FFFF}]/u', '', $identifier); | |
| // Identifiers cannot start with a digit, two hyphens, or a hyphen followed by a digit. | |
| $identifier = preg_replace(array( | |
| '/^[0-9]/', | |
| '/^(-[0-9])|^(--)/' | |
| ), array('_', '__'), $identifier); | |
| return $identifier; | |
| } | |
| /** | |
| * Sets if this request is an Ajax request. | |
| * | |
| * @param bool $is_ajax | |
| * TRUE if this request is an Ajax request, FALSE otherwise. | |
| */ | |
| public static function setIsAjax($is_ajax) { | |
| static::$isAjax = $is_ajax; | |
| } | |
| /** | |
| * Prepares a string for use as a valid HTML ID and guarantees uniqueness. | |
| * | |
| * This function ensures that each passed HTML ID value only exists once on | |
| * the page. By tracking the already returned ids, this function enables | |
| * forms, blocks, and other content to be output multiple times on the same | |
| * page, without breaking (X)HTML validation. | |
| * | |
| * For already existing IDs, a counter is appended to the ID string. | |
| * Therefore, JavaScript and CSS code should not rely on any value that was | |
| * generated by this function and instead should rely on manually added CSS | |
| * classes or similarly reliable constructs. | |
| * | |
| * Two consecutive hyphens separate the counter from the original ID. To | |
| * manage uniqueness across multiple Ajax requests on the same page, Ajax | |
| * requests POST an array of all IDs currently present on the page, which are | |
| * used to prime this function's cache upon first invocation. | |
| * | |
| * To allow reverse-parsing of IDs submitted via Ajax, any multiple | |
| * consecutive hyphens in the originally passed $id are replaced with a | |
| * single hyphen. | |
| * | |
| * @param string $id | |
| * The ID to clean. | |
| * | |
| * @return string | |
| * The cleaned ID. | |
| */ | |
| public static function getUniqueId($id) { | |
| // If this is an Ajax request, then content returned by this page request | |
| // will be merged with content already on the base page. The HTML IDs must | |
| // be unique for the fully merged content. Therefore use unique IDs. | |
| if (static::$isAjax) { | |
| return static::getId($id) . '--' . Crypt::randomBytesBase64(8); | |
| } | |
| // @todo Remove all that code once we switch over to random IDs only, | |
| // see https://www.drupal.org/node/1090592. | |
| if (!isset(static::$seenIdsInit)) { | |
| static::$seenIdsInit = array(); | |
| } | |
| if (!isset(static::$seenIds)) { | |
| static::$seenIds = static::$seenIdsInit; | |
| } | |
| $id = static::getId($id); | |
| // Ensure IDs are unique by appending a counter after the first occurrence. | |
| // The counter needs to be appended with a delimiter that does not exist in | |
| // the base ID. Requiring a unique delimiter helps ensure that we really do | |
| // return unique IDs and also helps us re-create the $seen_ids array during | |
| // Ajax requests. | |
| if (isset(static::$seenIds[$id])) { | |
| $id = $id . '--' . ++static::$seenIds[$id]; | |
| } | |
| else { | |
| static::$seenIds[$id] = 1; | |
| } | |
| return $id; | |
| } | |
| /** | |
| * Prepares a string for use as a valid HTML ID. | |
| * | |
| * Only use this function when you want to intentionally skip the uniqueness | |
| * guarantee of self::getUniqueId(). | |
| * | |
| * @param string $id | |
| * The ID to clean. | |
| * | |
| * @return string | |
| * The cleaned ID. | |
| * | |
| * @see self::getUniqueId() | |
| */ | |
| public static function getId($id) { | |
| $id = str_replace([' ', '_', '[', ']'], ['-', '-', '-', ''], Unicode::strtolower($id)); | |
| // As defined in http://www.w3.org/TR/html4/types.html#type-name, HTML IDs can | |
| // only contain letters, digits ([0-9]), hyphens ("-"), underscores ("_"), | |
| // colons (":"), and periods ("."). We strip out any character not in that | |
| // list. Note that the CSS spec doesn't allow colons or periods in identifiers | |
| // (http://www.w3.org/TR/CSS21/syndata.html#characters), so we strip those two | |
| // characters as well. | |
| $id = preg_replace('/[^A-Za-z0-9\-_]/', '', $id); | |
| // Removing multiple consecutive hyphens. | |
| $id = preg_replace('/\-+/', '-', $id); | |
| return $id; | |
| } | |
| /** | |
| * Resets the list of seen IDs. | |
| */ | |
| public static function resetSeenIds() { | |
| static::$seenIds = NULL; | |
| } | |
| /** | |
| * Normalizes an HTML snippet. | |
| * | |
| * This function is essentially \DOMDocument::normalizeDocument(), but | |
| * operates on an HTML string instead of a \DOMDocument. | |
| * | |
| * @param string $html | |
| * The HTML string to normalize. | |
| * | |
| * @return string | |
| * The normalized HTML string. | |
| */ | |
| public static function normalize($html) { | |
| $document = static::load($html); | |
| return static::serialize($document); | |
| } | |
| /** | |
| * Parses an HTML snippet and returns it as a DOM object. | |
| * | |
| * This function loads the body part of a partial (X)HTML document and returns | |
| * a full \DOMDocument object that represents this document. | |
| * | |
| * Use \Drupal\Component\Utility\Html::serialize() to serialize this | |
| * \DOMDocument back to a string. | |
| * | |
| * @param string $html | |
| * The partial (X)HTML snippet to load. Invalid markup will be corrected on | |
| * import. | |
| * | |
| * @return \DOMDocument | |
| * A \DOMDocument that represents the loaded (X)HTML snippet. | |
| */ | |
| public static function load($html) { | |
| $document = <<<EOD | |
| <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
| <html xmlns="http://www.w3.org/1999/xhtml"> | |
| <head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head> | |
| <body>!html</body> | |
| </html> | |
| EOD; | |
| // PHP's \DOMDocument serialization adds extra whitespace when the markup | |
| // of the wrapping document contains newlines, so ensure we remove all | |
| // newlines before injecting the actual HTML body to be processed. | |
| $document = strtr($document, array("\n" => '', '!html' => $html)); | |
| $dom = new \DOMDocument(); | |
| // Ignore warnings during HTML soup loading. | |
| @$dom->loadHTML($document); | |
| return $dom; | |
| } | |
| /** | |
| * Converts the body of a \DOMDocument back to an HTML snippet. | |
| * | |
| * The function serializes the body part of a \DOMDocument back to an (X)HTML | |
| * snippet. The resulting (X)HTML snippet will be properly formatted to be | |
| * compatible with HTML user agents. | |
| * | |
| * @param \DOMDocument $document | |
| * A \DOMDocument object to serialize, only the tags below the first <body> | |
| * node will be converted. | |
| * | |
| * @return string | |
| * A valid (X)HTML snippet, as a string. | |
| */ | |
| public static function serialize(\DOMDocument $document) { | |
| $body_node = $document->getElementsByTagName('body')->item(0); | |
| $html = ''; | |
| if ($body_node !== NULL) { | |
| foreach ($body_node->getElementsByTagName('script') as $node) { | |
| static::escapeCdataElement($node); | |
| } | |
| foreach ($body_node->getElementsByTagName('style') as $node) { | |
| static::escapeCdataElement($node, '/*', '*/'); | |
| } | |
| foreach ($body_node->childNodes as $node) { | |
| $html .= $document->saveXML($node); | |
| } | |
| } | |
| return $html; | |
| } | |
| /** | |
| * Adds comments around a <!CDATA section in a \DOMNode. | |
| * | |
| * \DOMDocument::loadHTML() in \Drupal\Component\Utility\Html::load() makes | |
| * CDATA sections from the contents of inline script and style tags. This can | |
| * cause HTML4 browsers to throw exceptions. | |
| * | |
| * This function attempts to solve the problem by creating a | |
| * \DOMDocumentFragment to comment the CDATA tag. | |
| * | |
| * @param \DOMNode $node | |
| * The element potentially containing a CDATA node. | |
| * @param string $comment_start | |
| * (optional) A string to use as a comment start marker to escape the CDATA | |
| * declaration. Defaults to '//'. | |
| * @param string $comment_end | |
| * (optional) A string to use as a comment end marker to escape the CDATA | |
| * declaration. Defaults to an empty string. | |
| */ | |
| public static function escapeCdataElement(\DOMNode $node, $comment_start = '//', $comment_end = '') { | |
| foreach ($node->childNodes as $child_node) { | |
| if ($child_node instanceof \DOMCdataSection) { | |
| $embed_prefix = "\n<!--{$comment_start}--><![CDATA[{$comment_start} ><!--{$comment_end}\n"; | |
| $embed_suffix = "\n{$comment_start}--><!]]>{$comment_end}\n"; | |
| // Prevent invalid cdata escaping as this would throw a DOM error. | |
| // This is the same behavior as found in libxml2. | |
| // Related W3C standard: http://www.w3.org/TR/REC-xml/#dt-cdsection | |
| // Fix explanation: http://en.wikipedia.org/wiki/CDATA#Nesting | |
| $data = str_replace(']]>', ']]]]><![CDATA[>', $child_node->data); | |
| $fragment = $node->ownerDocument->createDocumentFragment(); | |
| $fragment->appendXML($embed_prefix . $data . $embed_suffix); | |
| $node->appendChild($fragment); | |
| $node->removeChild($child_node); | |
| } | |
| } | |
| } | |
| /** | |
| * Decodes all HTML entities including numerical ones to regular UTF-8 bytes. | |
| * | |
| * Double-escaped entities will only be decoded once ("&lt;" becomes | |
| * "<", not "<"). Be careful when using this function, as it will revert | |
| * previous sanitization efforts (<script> will become <script>). | |
| * | |
| * This method is not the opposite of Html::escape(). For example, this method | |
| * will convert "é" to "é", whereas Html::escape() will not convert "é" | |
| * to "é". | |
| * | |
| * @param string $text | |
| * The text to decode entities in. | |
| * | |
| * @return string | |
| * The input $text, with all HTML entities decoded once. | |
| * | |
| * @see html_entity_decode() | |
| * @see \Drupal\Component\Utility\Html::escape() | |
| */ | |
| public static function decodeEntities($text) { | |
| return html_entity_decode($text, ENT_QUOTES, 'UTF-8'); | |
| } | |
| /** | |
| * Escapes text by converting special characters to HTML entities. | |
| * | |
| * This method escapes HTML for sanitization purposes by replacing the | |
| * following special characters with their HTML entity equivalents: | |
| * - & (ampersand) becomes & | |
| * - " (double quote) becomes " | |
| * - ' (single quote) becomes ' | |
| * - < (less than) becomes < | |
| * - > (greater than) becomes > | |
| * Special characters that have already been escaped will be double-escaped | |
| * (for example, "<" becomes "&lt;"), and invalid UTF-8 encoding | |
| * will be converted to the Unicode replacement character ("�"). | |
| * | |
| * This method is not the opposite of Html::decodeEntities(). For example, | |
| * this method will not encode "é" to "é", whereas | |
| * Html::decodeEntities() will convert all HTML entities to UTF-8 bytes, | |
| * including "é" and "<" to "é" and "<". | |
| * | |
| * When constructing @link theme_render render arrays @endlink passing the output of Html::escape() to | |
| * '#markup' is not recommended. Use the '#plain_text' key instead and the | |
| * renderer will autoescape the text. | |
| * | |
| * @param string $text | |
| * The input text. | |
| * | |
| * @return string | |
| * The text with all HTML special characters converted. | |
| * | |
| * @see htmlspecialchars() | |
| * @see \Drupal\Component\Utility\Html::decodeEntities() | |
| * | |
| * @ingroup sanitization | |
| */ | |
| public static function escape($text) { | |
| return htmlspecialchars($text, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8'); | |
| } | |
| } |