class-emogrifier.php 41 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555
  1. <?php
  2. /**
  3. * This class provides functions for converting CSS styles into inline style attributes in your HTML code.
  4. *
  5. * For more information, please see the README.md file.
  6. *
  7. * @version 1.2.0
  8. *
  9. * @author Cameron Brooks
  10. * @author Jaime Prado
  11. * @author Oliver Klee <typo3-coding@oliverklee.de>
  12. * @author Roman Ožana <ozana@omdesign.cz>
  13. * @author Sander Kruger <s.kruger@invessel.com>
  14. */
  15. // @codingStandardsIgnoreFile
  16. class Emogrifier
  17. {
  18. /**
  19. * @var int
  20. */
  21. const CACHE_KEY_CSS = 0;
  22. /**
  23. * @var int
  24. */
  25. const CACHE_KEY_SELECTOR = 1;
  26. /**
  27. * @var int
  28. */
  29. const CACHE_KEY_XPATH = 2;
  30. /**
  31. * @var int
  32. */
  33. const CACHE_KEY_CSS_DECLARATIONS_BLOCK = 3;
  34. /**
  35. * @var int
  36. */
  37. const CACHE_KEY_COMBINED_STYLES = 4;
  38. /**
  39. * for calculating nth-of-type and nth-child selectors
  40. *
  41. * @var int
  42. */
  43. const INDEX = 0;
  44. /**
  45. * for calculating nth-of-type and nth-child selectors
  46. *
  47. * @var int
  48. */
  49. const MULTIPLIER = 1;
  50. /**
  51. * @var string
  52. */
  53. const ID_ATTRIBUTE_MATCHER = '/(\\w+)?\\#([\\w\\-]+)/';
  54. /**
  55. * @var string
  56. */
  57. const CLASS_ATTRIBUTE_MATCHER = '/(\\w+|[\\*\\]])?((\\.[\\w\\-]+)+)/';
  58. /**
  59. * @var string
  60. */
  61. const CONTENT_TYPE_META_TAG = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">';
  62. /**
  63. * @var string
  64. */
  65. const DEFAULT_DOCUMENT_TYPE = '<!DOCTYPE html>';
  66. /**
  67. * @var string
  68. */
  69. private $html = '';
  70. /**
  71. * @var string
  72. */
  73. private $css = '';
  74. /**
  75. * @var bool[]
  76. */
  77. private $excludedSelectors = array();
  78. /**
  79. * @var string[]
  80. */
  81. private $unprocessableHtmlTags = array( 'wbr' );
  82. /**
  83. * @var bool[]
  84. */
  85. private $allowedMediaTypes = array( 'all' => true, 'screen' => true, 'print' => true );
  86. /**
  87. * @var mixed[]
  88. */
  89. private $caches = array(
  90. self::CACHE_KEY_CSS => array(),
  91. self::CACHE_KEY_SELECTOR => array(),
  92. self::CACHE_KEY_XPATH => array(),
  93. self::CACHE_KEY_CSS_DECLARATIONS_BLOCK => array(),
  94. self::CACHE_KEY_COMBINED_STYLES => array(),
  95. );
  96. /**
  97. * the visited nodes with the XPath paths as array keys
  98. *
  99. * @var \DOMElement[]
  100. */
  101. private $visitedNodes = array();
  102. /**
  103. * the styles to apply to the nodes with the XPath paths as array keys for the outer array
  104. * and the attribute names/values as key/value pairs for the inner array
  105. *
  106. * @var string[][]
  107. */
  108. private $styleAttributesForNodes = array();
  109. /**
  110. * Determines whether the "style" attributes of tags in the the HTML passed to this class should be preserved.
  111. * If set to false, the value of the style attributes will be discarded.
  112. *
  113. * @var bool
  114. */
  115. private $isInlineStyleAttributesParsingEnabled = true;
  116. /**
  117. * Determines whether the <style> blocks in the HTML passed to this class should be parsed.
  118. *
  119. * If set to true, the <style> blocks will be removed from the HTML and their contents will be applied to the HTML
  120. * via inline styles.
  121. *
  122. * If set to false, the <style> blocks will be left as they are in the HTML.
  123. *
  124. * @var bool
  125. */
  126. private $isStyleBlocksParsingEnabled = true;
  127. /**
  128. * Determines whether elements with the `display: none` property are
  129. * removed from the DOM.
  130. *
  131. * @var bool
  132. */
  133. private $shouldKeepInvisibleNodes = true;
  134. /**
  135. * @var string[]
  136. */
  137. private $xPathRules = array(
  138. // child
  139. '/\\s*>\\s*/' => '/',
  140. // adjacent sibling
  141. '/\\s+\\+\\s+/' => '/following-sibling::*[1]/self::',
  142. // descendant
  143. '/\\s+(?=.*[^\\]]{1}$)/' => '//',
  144. // :first-child
  145. '/([^\\/]+):first-child/i' => '*[1]/self::\\1',
  146. // :last-child
  147. '/([^\\/]+):last-child/i' => '*[last()]/self::\\1',
  148. // attribute only
  149. '/^\\[(\\w+|\\w+\\=[\'"]?\\w+[\'"]?)\\]/' => '*[@\\1]',
  150. // attribute
  151. '/(\\w)\\[(\\w+)\\]/' => '\\1[@\\2]',
  152. // exact attribute
  153. '/(\\w)\\[(\\w+)\\=[\'"]?([\\w\\s]+)[\'"]?\\]/' => '\\1[@\\2="\\3"]',
  154. // element attribute~=
  155. '/([\\w\\*]+)\\[(\\w+)[\\s]*\\~\\=[\\s]*[\'"]?([\\w-_\\/]+)[\'"]?\\]/'
  156. => '\\1[contains(concat(" ", @\\2, " "), concat(" ", "\\3", " "))]',
  157. // element attribute^=
  158. '/([\\w\\*]+)\\[(\\w+)[\\s]*\\^\\=[\\s]*[\'"]?([\\w-_\\/]+)[\'"]?\\]/' => '\\1[starts-with(@\\2, "\\3")]',
  159. // element attribute*=
  160. '/([\\w\\*]+)\\[(\\w+)[\\s]*\\*\\=[\\s]*[\'"]?([\\w-_\\s\\/:;]+)[\'"]?\\]/' => '\\1[contains(@\\2, "\\3")]',
  161. // element attribute$=
  162. '/([\\w\\*]+)\\[(\\w+)[\\s]*\\$\\=[\\s]*[\'"]?([\\w-_\\s\\/]+)[\'"]?\\]/'
  163. => '\\1[substring(@\\2, string-length(@\\2) - string-length("\\3") + 1) = "\\3"]',
  164. // element attribute|=
  165. '/([\\w\\*]+)\\[(\\w+)[\\s]*\\|\\=[\\s]*[\'"]?([\\w-_\\s\\/]+)[\'"]?\\]/'
  166. => '\\1[@\\2="\\3" or starts-with(@\\2, concat("\\3", "-"))]',
  167. );
  168. /**
  169. * Determines whether CSS styles that have an equivalent HTML attribute
  170. * should be mapped and attached to those elements.
  171. *
  172. * @var bool
  173. */
  174. private $shouldMapCssToHtml = false;
  175. /**
  176. * This multi-level array contains simple mappings of CSS properties to
  177. * HTML attributes. If a mapping only applies to certain HTML nodes or
  178. * only for certain values, the mapping is an object with a whitelist
  179. * of nodes and values.
  180. *
  181. * @var mixed[][]
  182. */
  183. private $cssToHtmlMap = array(
  184. 'background-color' => array(
  185. 'attribute' => 'bgcolor',
  186. ),
  187. 'text-align' => array(
  188. 'attribute' => 'align',
  189. 'nodes' => array('p', 'div', 'td'),
  190. 'values' => array('left', 'right', 'center', 'justify'),
  191. ),
  192. 'float' => array(
  193. 'attribute' => 'align',
  194. 'nodes' => array('table', 'img'),
  195. 'values' => array('left', 'right'),
  196. ),
  197. 'border-spacing' => array(
  198. 'attribute' => 'cellspacing',
  199. 'nodes' => array('table'),
  200. ),
  201. );
  202. public static $_media = '';
  203. /**
  204. * The constructor.
  205. *
  206. * @param string $html the HTML to emogrify, must be UTF-8-encoded
  207. * @param string $css the CSS to merge, must be UTF-8-encoded
  208. */
  209. public function __construct($html = '', $css = '')
  210. {
  211. $this->setHtml($html);
  212. $this->setCss($css);
  213. }
  214. /**
  215. * The destructor.
  216. */
  217. public function __destruct()
  218. {
  219. $this->purgeVisitedNodes();
  220. }
  221. /**
  222. * Sets the HTML to emogrify.
  223. *
  224. * @param string $html the HTML to emogrify, must be UTF-8-encoded
  225. *
  226. * @return void
  227. */
  228. public function setHtml($html)
  229. {
  230. $this->html = $html;
  231. }
  232. /**
  233. * Sets the CSS to merge with the HTML.
  234. *
  235. * @param string $css the CSS to merge, must be UTF-8-encoded
  236. *
  237. * @return void
  238. */
  239. public function setCss($css)
  240. {
  241. $this->css = $css;
  242. }
  243. /**
  244. * Applies $this->css to $this->html and returns the HTML with the CSS
  245. * applied.
  246. *
  247. * This method places the CSS inline.
  248. *
  249. * @return string
  250. *
  251. * @throws \BadMethodCallException
  252. */
  253. public function emogrify()
  254. {
  255. if ($this->html === '') {
  256. throw new BadMethodCallException('Please set some HTML first before calling emogrify.', 1390393096);
  257. }
  258. self::$_media = ''; // reset.
  259. $xmlDocument = $this->createXmlDocument();
  260. $this->process($xmlDocument);
  261. return $xmlDocument->saveHTML();
  262. }
  263. /**
  264. * Applies $this->css to $this->html and returns only the HTML content
  265. * within the <body> tag.
  266. *
  267. * This method places the CSS inline.
  268. *
  269. * @return string
  270. *
  271. * @throws \BadMethodCallException
  272. */
  273. public function emogrifyBodyContent()
  274. {
  275. if ($this->html === '') {
  276. throw new BadMethodCallException('Please set some HTML first before calling emogrify.', 1390393096);
  277. }
  278. $xmlDocument = $this->createXmlDocument();
  279. $this->process($xmlDocument);
  280. $innerDocument = new DOMDocument();
  281. foreach ($xmlDocument->documentElement->getElementsByTagName('body')->item(0)->childNodes as $childNode) {
  282. $innerDocument->appendChild($innerDocument->importNode($childNode, true));
  283. }
  284. return html_entity_decode($innerDocument->saveHTML());
  285. }
  286. /**
  287. * Applies $this->css to $xmlDocument.
  288. *
  289. * This method places the CSS inline.
  290. *
  291. * @param \DOMDocument $xmlDocument
  292. *
  293. * @return void
  294. *
  295. * @throws \InvalidArgumentException
  296. */
  297. protected function process(DOMDocument $xmlDocument)
  298. {
  299. $xPath = new DOMXPath($xmlDocument);
  300. $this->clearAllCaches();
  301. // Before be begin processing the CSS file, parse the document and normalize all existing CSS attributes.
  302. // This changes 'DISPLAY: none' to 'display: none'.
  303. // We wouldn't have to do this if DOMXPath supported XPath 2.0.
  304. // Also store a reference of nodes with existing inline styles so we don't overwrite them.
  305. $this->purgeVisitedNodes();
  306. set_error_handler(array($this, 'handleXpathError'), E_WARNING);
  307. $nodesWithStyleAttributes = $xPath->query('//*[@style]');
  308. if ($nodesWithStyleAttributes !== false) {
  309. /** @var \DOMElement $node */
  310. foreach ($nodesWithStyleAttributes as $node) {
  311. if ($this->isInlineStyleAttributesParsingEnabled) {
  312. $this->normalizeStyleAttributes($node);
  313. } else {
  314. $node->removeAttribute('style');
  315. }
  316. }
  317. }
  318. // grab any existing style blocks from the html and append them to the existing CSS
  319. // (these blocks should be appended so as to have precedence over conflicting styles in the existing CSS)
  320. $allCss = $this->css;
  321. if ($this->isStyleBlocksParsingEnabled) {
  322. $allCss .= $this->getCssFromAllStyleNodes($xPath);
  323. }
  324. $cssParts = $this->splitCssAndMediaQuery($allCss);
  325. $excludedNodes = $this->getNodesToExclude($xPath);
  326. $cssRules = $this->parseCssRules($cssParts['css']);
  327. foreach ($cssRules as $cssRule) {
  328. // query the body for the xpath selector
  329. $nodesMatchingCssSelectors = $xPath->query($this->translateCssToXpath($cssRule['selector']));
  330. // ignore invalid selectors
  331. if ($nodesMatchingCssSelectors === false) {
  332. continue;
  333. }
  334. /** @var \DOMElement $node */
  335. foreach ($nodesMatchingCssSelectors as $node) {
  336. if (in_array($node, $excludedNodes, true)) {
  337. continue;
  338. }
  339. // if it has a style attribute, get it, process it, and append (overwrite) new stuff
  340. if ($node->hasAttribute('style')) {
  341. // break it up into an associative array
  342. $oldStyleDeclarations = $this->parseCssDeclarationsBlock($node->getAttribute('style'));
  343. } else {
  344. $oldStyleDeclarations = array();
  345. }
  346. $newStyleDeclarations = $this->parseCssDeclarationsBlock($cssRule['declarationsBlock']);
  347. if ($this->shouldMapCssToHtml) {
  348. $this->mapCssToHtmlAttributes($newStyleDeclarations, $node);
  349. }
  350. $node->setAttribute(
  351. 'style',
  352. $this->generateStyleStringFromDeclarationsArrays($oldStyleDeclarations, $newStyleDeclarations)
  353. );
  354. }
  355. }
  356. restore_error_handler();
  357. if ($this->isInlineStyleAttributesParsingEnabled) {
  358. $this->fillStyleAttributesWithMergedStyles();
  359. }
  360. if ($this->shouldKeepInvisibleNodes) {
  361. $this->removeInvisibleNodes($xPath);
  362. }
  363. $this->copyCssWithMediaToStyleNode($xmlDocument, $xPath, $cssParts['media']);
  364. }
  365. /**
  366. * Applies $styles to $node.
  367. *
  368. * This method maps CSS styles to HTML attributes and adds those to the
  369. * node.
  370. *
  371. * @param string[] $styles the new CSS styles taken from the global styles to be applied to this node
  372. * @param \DOMNode $node node to apply styles to
  373. *
  374. * @return void
  375. */
  376. private function mapCssToHtmlAttributes(array $styles, DOMNode $node)
  377. {
  378. foreach ($styles as $property => $value) {
  379. // Strip !important indicator
  380. $value = trim(str_replace('!important', '', $value));
  381. $this->mapCssToHtmlAttribute($property, $value, $node);
  382. }
  383. }
  384. /**
  385. * Tries to apply the CSS style to $node as an attribute.
  386. *
  387. * This method maps a CSS rule to HTML attributes and adds those to the node.
  388. *
  389. * @param string $property the name of the CSS property to map
  390. * @param string $value the value of the style rule to map
  391. * @param \DOMNode $node node to apply styles to
  392. *
  393. * @return void
  394. */
  395. private function mapCssToHtmlAttribute($property, $value, DOMNode $node)
  396. {
  397. if (!$this->mapSimpleCssProperty($property, $value, $node)) {
  398. $this->mapComplexCssProperty($property, $value, $node);
  399. }
  400. }
  401. /**
  402. * Looks up the CSS property in the mapping table and maps it if it matches the conditions.
  403. *
  404. * @param string $property the name of the CSS property to map
  405. * @param string $value the value of the style rule to map
  406. * @param \DOMNode $node node to apply styles to
  407. *
  408. * @return bool true if the property cab be mapped using the simple mapping table
  409. */
  410. private function mapSimpleCssProperty($property, $value, DOMNode $node)
  411. {
  412. if (!isset($this->cssToHtmlMap[$property])) {
  413. return false;
  414. }
  415. $mapping = $this->cssToHtmlMap[$property];
  416. $nodesMatch = !isset($mapping['nodes']) || in_array($node->nodeName, $mapping['nodes'], true);
  417. $valuesMatch = !isset($mapping['values']) || in_array($value, $mapping['values'], true);
  418. if (!$nodesMatch || !$valuesMatch) {
  419. return false;
  420. }
  421. $node->setAttribute($mapping['attribute'], $value);
  422. return true;
  423. }
  424. /**
  425. * Maps CSS properties that need special transformation to an HTML attribute.
  426. *
  427. * @param string $property the name of the CSS property to map
  428. * @param string $value the value of the style rule to map
  429. * @param \DOMNode $node node to apply styles to
  430. *
  431. * @return void
  432. */
  433. private function mapComplexCssProperty($property, $value, DOMNode $node)
  434. {
  435. $nodeName = $node->nodeName;
  436. $isTable = $nodeName === 'table';
  437. $isImage = $nodeName === 'img';
  438. $isTableOrImage = $isTable || $isImage;
  439. switch ($property) {
  440. case 'background':
  441. // Parse out the color, if any
  442. $styles = explode(' ', $value);
  443. $first = $styles[0];
  444. if (!is_numeric(substr($first, 0, 1)) && substr($first, 0, 3) !== 'url') {
  445. // This is not a position or image, assume it's a color
  446. $node->setAttribute('bgcolor', $first);
  447. }
  448. break;
  449. case 'width':
  450. // intentional fall-through
  451. case 'height':
  452. // Only parse values in px and %, but not values like "auto".
  453. if (preg_match('/^\d+(px|%)$/', $value)) {
  454. // Remove 'px'. This regex only conserves numbers and %
  455. $number = preg_replace('/[^0-9.%]/', '', $value);
  456. $node->setAttribute($property, $number);
  457. }
  458. break;
  459. case 'margin':
  460. if ($isTableOrImage) {
  461. $margins = $this->parseCssShorthandValue($value);
  462. if ($margins['left'] === 'auto' && $margins['right'] === 'auto') {
  463. $node->setAttribute('align', 'center');
  464. }
  465. }
  466. break;
  467. case 'border':
  468. if ($isTableOrImage) {
  469. if ($value === 'none' || $value === '0') {
  470. $node->setAttribute('border', '0');
  471. }
  472. }
  473. break;
  474. default:
  475. }
  476. }
  477. /**
  478. * Parses a shorthand CSS value and splits it into individual values
  479. *
  480. * @param string $value a string of CSS value with 1, 2, 3 or 4 sizes
  481. * For example: padding: 0 auto;
  482. * '0 auto' is split into top: 0, left: auto, bottom: 0,
  483. * right: auto.
  484. *
  485. * @return string[] an array of values for top, right, bottom and left (using these as associative array keys)
  486. */
  487. private function parseCssShorthandValue($value)
  488. {
  489. $values = preg_split('/\\s+/', $value);
  490. $css = array();
  491. $css['top'] = $values[0];
  492. $css['right'] = (count($values) > 1) ? $values[1] : $css['top'];
  493. $css['bottom'] = (count($values) > 2) ? $values[2] : $css['top'];
  494. $css['left'] = (count($values) > 3) ? $values[3] : $css['right'];
  495. return $css;
  496. }
  497. /**
  498. * Extracts and parses the individual rules from a CSS string.
  499. *
  500. * @param string $css a string of raw CSS code
  501. *
  502. * @return string[][] an array of string sub-arrays with the keys
  503. * "selector" (the CSS selector(s), e.g., "*" or "h1"),
  504. * "declarationsBLock" (the semicolon-separated CSS declarations for that selector(s),
  505. * e.g., "color: red; height: 4px;"),
  506. * and "line" (the line number e.g. 42)
  507. */
  508. private function parseCssRules($css)
  509. {
  510. $cssKey = md5($css);
  511. if (!isset($this->caches[self::CACHE_KEY_CSS][$cssKey])) {
  512. // process the CSS file for selectors and definitions
  513. preg_match_all('/(?:^|[\\s^{}]*)([^{]+){([^}]*)}/mis', $css, $matches, PREG_SET_ORDER);
  514. $cssRules = array();
  515. /** @var string[] $cssRule */
  516. foreach ($matches as $key => $cssRule) {
  517. $cssDeclaration = trim($cssRule[2]);
  518. if ($cssDeclaration === '') {
  519. continue;
  520. }
  521. $selectors = explode(',', $cssRule[1]);
  522. foreach ($selectors as $selector) {
  523. // don't process pseudo-elements and behavioral (dynamic) pseudo-classes;
  524. // only allow structural pseudo-classes
  525. $hasPseudoElement = strpos($selector, '::') !== false;
  526. $hasAnyPseudoClass = (bool) preg_match('/:[a-zA-Z]/', $selector);
  527. $hasSupportedPseudoClass = (bool) preg_match('/:\\S+\\-(child|type\\()/i', $selector);
  528. if ($hasPseudoElement || ($hasAnyPseudoClass && !$hasSupportedPseudoClass)) {
  529. continue;
  530. }
  531. $cssRules[] = array(
  532. 'selector' => trim($selector),
  533. 'declarationsBlock' => $cssDeclaration,
  534. // keep track of where it appears in the file, since order is important
  535. 'line' => $key,
  536. );
  537. }
  538. }
  539. usort($cssRules, array($this, 'sortBySelectorPrecedence'));
  540. $this->caches[self::CACHE_KEY_CSS][$cssKey] = $cssRules;
  541. }
  542. return $this->caches[self::CACHE_KEY_CSS][$cssKey];
  543. }
  544. /**
  545. * Disables the parsing of inline styles.
  546. *
  547. * @return void
  548. */
  549. public function disableInlineStyleAttributesParsing()
  550. {
  551. $this->isInlineStyleAttributesParsingEnabled = false;
  552. }
  553. /**
  554. * Disables the parsing of <style> blocks.
  555. *
  556. * @return void
  557. */
  558. public function disableStyleBlocksParsing()
  559. {
  560. $this->isStyleBlocksParsingEnabled = false;
  561. }
  562. /**
  563. * Disables the removal of elements with `display: none` properties.
  564. *
  565. * @return void
  566. */
  567. public function disableInvisibleNodeRemoval()
  568. {
  569. $this->shouldKeepInvisibleNodes = false;
  570. }
  571. /**
  572. * Enables the attachment/override of HTML attributes for which a
  573. * corresponding CSS property has been set.
  574. *
  575. * @return void
  576. */
  577. public function enableCssToHtmlMapping()
  578. {
  579. $this->shouldMapCssToHtml = true;
  580. }
  581. /**
  582. * Clears all caches.
  583. *
  584. * @return void
  585. */
  586. private function clearAllCaches()
  587. {
  588. $this->clearCache(self::CACHE_KEY_CSS);
  589. $this->clearCache(self::CACHE_KEY_SELECTOR);
  590. $this->clearCache(self::CACHE_KEY_XPATH);
  591. $this->clearCache(self::CACHE_KEY_CSS_DECLARATIONS_BLOCK);
  592. $this->clearCache(self::CACHE_KEY_COMBINED_STYLES);
  593. }
  594. /**
  595. * Clears a single cache by key.
  596. *
  597. * @param int $key the cache key, must be CACHE_KEY_CSS, CACHE_KEY_SELECTOR, CACHE_KEY_XPATH
  598. * or CACHE_KEY_CSS_DECLARATION_BLOCK
  599. *
  600. * @return void
  601. *
  602. * @throws \InvalidArgumentException
  603. */
  604. private function clearCache($key)
  605. {
  606. $allowedCacheKeys = array(
  607. self::CACHE_KEY_CSS,
  608. self::CACHE_KEY_SELECTOR,
  609. self::CACHE_KEY_XPATH,
  610. self::CACHE_KEY_CSS_DECLARATIONS_BLOCK,
  611. self::CACHE_KEY_COMBINED_STYLES,
  612. );
  613. if (!in_array($key, $allowedCacheKeys, true)) {
  614. throw new InvalidArgumentException('Invalid cache key: ' . $key, 1391822035);
  615. }
  616. $this->caches[$key] = array();
  617. }
  618. /**
  619. * Purges the visited nodes.
  620. *
  621. * @return void
  622. */
  623. private function purgeVisitedNodes()
  624. {
  625. $this->visitedNodes = array();
  626. $this->styleAttributesForNodes = array();
  627. }
  628. /**
  629. * Marks a tag for removal.
  630. *
  631. * There are some HTML tags that DOMDocument cannot process, and it will throw an error if it encounters them.
  632. * In particular, DOMDocument will complain if you try to use HTML5 tags in an XHTML document.
  633. *
  634. * Note: The tags will not be removed if they have any content.
  635. *
  636. * @param string $tagName the tag name, e.g., "p"
  637. *
  638. * @return void
  639. */
  640. public function addUnprocessableHtmlTag($tagName)
  641. {
  642. $this->unprocessableHtmlTags[] = $tagName;
  643. }
  644. /**
  645. * Drops a tag from the removal list.
  646. *
  647. * @param string $tagName the tag name, e.g., "p"
  648. *
  649. * @return void
  650. */
  651. public function removeUnprocessableHtmlTag($tagName)
  652. {
  653. $key = array_search($tagName, $this->unprocessableHtmlTags, true);
  654. if ($key !== false) {
  655. unset($this->unprocessableHtmlTags[$key]);
  656. }
  657. }
  658. /**
  659. * Marks a media query type to keep.
  660. *
  661. * @param string $mediaName the media type name, e.g., "braille"
  662. *
  663. * @return void
  664. */
  665. public function addAllowedMediaType($mediaName)
  666. {
  667. $this->allowedMediaTypes[$mediaName] = true;
  668. }
  669. /**
  670. * Drops a media query type from the allowed list.
  671. *
  672. * @param string $mediaName the tag name, e.g., "braille"
  673. *
  674. * @return void
  675. */
  676. public function removeAllowedMediaType($mediaName)
  677. {
  678. if (isset($this->allowedMediaTypes[$mediaName])) {
  679. unset($this->allowedMediaTypes[$mediaName]);
  680. }
  681. }
  682. /**
  683. * Adds a selector to exclude nodes from emogrification.
  684. *
  685. * Any nodes that match the selector will not have their style altered.
  686. *
  687. * @param string $selector the selector to exclude, e.g., ".editor"
  688. *
  689. * @return void
  690. */
  691. public function addExcludedSelector($selector)
  692. {
  693. $this->excludedSelectors[$selector] = true;
  694. }
  695. /**
  696. * No longer excludes the nodes matching this selector from emogrification.
  697. *
  698. * @param string $selector the selector to no longer exclude, e.g., ".editor"
  699. *
  700. * @return void
  701. */
  702. public function removeExcludedSelector($selector)
  703. {
  704. if (isset($this->excludedSelectors[$selector])) {
  705. unset($this->excludedSelectors[$selector]);
  706. }
  707. }
  708. /**
  709. * This removes styles from your email that contain display:none.
  710. * We need to look for display:none, but we need to do a case-insensitive search. Since DOMDocument only
  711. * supports XPath 1.0, lower-case() isn't available to us. We've thus far only set attributes to lowercase,
  712. * not attribute values. Consequently, we need to translate() the letters that would be in 'NONE' ("NOE")
  713. * to lowercase.
  714. *
  715. * @param \DOMXPath $xPath
  716. *
  717. * @return void
  718. */
  719. private function removeInvisibleNodes(DOMXPath $xPath)
  720. {
  721. $nodesWithStyleDisplayNone = $xPath->query(
  722. '//*[contains(translate(translate(@style," ",""),"NOE","noe"),"display:none")]'
  723. );
  724. if ($nodesWithStyleDisplayNone->length === 0) {
  725. return;
  726. }
  727. // The checks on parentNode and is_callable below ensure that if we've deleted the parent node,
  728. // we don't try to call removeChild on a nonexistent child node
  729. /** @var \DOMNode $node */
  730. foreach ($nodesWithStyleDisplayNone as $node) {
  731. if ($node->parentNode && is_callable(array($node->parentNode, 'removeChild'))) {
  732. $node->parentNode->removeChild($node);
  733. }
  734. }
  735. }
  736. private function normalizeStyleAttributes_callback( $m ) {
  737. return strtolower( $m[0] );
  738. }
  739. /**
  740. * Normalizes the value of the "style" attribute and saves it.
  741. *
  742. * @param \DOMElement $node
  743. *
  744. * @return void
  745. */
  746. private function normalizeStyleAttributes(DOMElement $node)
  747. {
  748. $normalizedOriginalStyle = preg_replace_callback(
  749. '/[A-z\\-]+(?=\\:)/S',
  750. array( $this, 'normalizeStyleAttributes_callback' ),
  751. $node->getAttribute('style')
  752. );
  753. // in order to not overwrite existing style attributes in the HTML, we
  754. // have to save the original HTML styles
  755. $nodePath = $node->getNodePath();
  756. if (!isset($this->styleAttributesForNodes[$nodePath])) {
  757. $this->styleAttributesForNodes[$nodePath] = $this->parseCssDeclarationsBlock($normalizedOriginalStyle);
  758. $this->visitedNodes[$nodePath] = $node;
  759. }
  760. $node->setAttribute('style', $normalizedOriginalStyle);
  761. }
  762. /**
  763. * Merges styles from styles attributes and style nodes and applies them to the attribute nodes
  764. *
  765. * @return void
  766. */
  767. private function fillStyleAttributesWithMergedStyles()
  768. {
  769. foreach ($this->styleAttributesForNodes as $nodePath => $styleAttributesForNode) {
  770. $node = $this->visitedNodes[$nodePath];
  771. $currentStyleAttributes = $this->parseCssDeclarationsBlock($node->getAttribute('style'));
  772. $node->setAttribute(
  773. 'style',
  774. $this->generateStyleStringFromDeclarationsArrays(
  775. $currentStyleAttributes,
  776. $styleAttributesForNode
  777. )
  778. );
  779. }
  780. }
  781. /**
  782. * This method merges old or existing name/value array with new name/value array
  783. * and then generates a string of the combined style suitable for placing inline.
  784. * This becomes the single point for CSS string generation allowing for consistent
  785. * CSS output no matter where the CSS originally came from.
  786. *
  787. * @param string[] $oldStyles
  788. * @param string[] $newStyles
  789. *
  790. * @return string
  791. */
  792. private function generateStyleStringFromDeclarationsArrays(array $oldStyles, array $newStyles)
  793. {
  794. $combinedStyles = array_merge($oldStyles, $newStyles);
  795. $cacheKey = serialize($combinedStyles);
  796. if (isset($this->caches[self::CACHE_KEY_COMBINED_STYLES][$cacheKey])) {
  797. return $this->caches[self::CACHE_KEY_COMBINED_STYLES][$cacheKey];
  798. }
  799. foreach ($oldStyles as $attributeName => $attributeValue) {
  800. if (!isset($newStyles[$attributeName])) {
  801. continue;
  802. }
  803. $newAttributeValue = $newStyles[$attributeName];
  804. if ($this->attributeValueIsImportant($attributeValue)
  805. && !$this->attributeValueIsImportant($newAttributeValue)
  806. ) {
  807. $combinedStyles[$attributeName] = $attributeValue;
  808. }
  809. }
  810. $style = '';
  811. foreach ($combinedStyles as $attributeName => $attributeValue) {
  812. $style .= strtolower(trim($attributeName)) . ': ' . trim($attributeValue) . '; ';
  813. }
  814. $trimmedStyle = rtrim($style);
  815. $this->caches[self::CACHE_KEY_COMBINED_STYLES][$cacheKey] = $trimmedStyle;
  816. return $trimmedStyle;
  817. }
  818. /**
  819. * Checks whether $attributeValue is marked as !important.
  820. *
  821. * @param string $attributeValue
  822. *
  823. * @return bool
  824. */
  825. private function attributeValueIsImportant($attributeValue)
  826. {
  827. return strtolower(substr(trim($attributeValue), -10)) === '!important';
  828. }
  829. /**
  830. * Applies $css to $xmlDocument, limited to the media queries that actually apply to the document.
  831. *
  832. * @param \DOMDocument $xmlDocument the document to match against
  833. * @param \DOMXPath $xPath
  834. * @param string $css a string of CSS
  835. *
  836. * @return void
  837. */
  838. private function copyCssWithMediaToStyleNode(DOMDocument $xmlDocument, DOMXPath $xPath, $css)
  839. {
  840. if ($css === '') {
  841. return;
  842. }
  843. $mediaQueriesRelevantForDocument = array();
  844. foreach ($this->extractMediaQueriesFromCss($css) as $mediaQuery) {
  845. foreach ($this->parseCssRules($mediaQuery['css']) as $selector) {
  846. if ($this->existsMatchForCssSelector($xPath, $selector['selector'])) {
  847. $mediaQueriesRelevantForDocument[] = $mediaQuery['query'];
  848. break;
  849. }
  850. }
  851. }
  852. $this->addStyleElementToDocument($xmlDocument, implode($mediaQueriesRelevantForDocument));
  853. }
  854. /**
  855. * Extracts the media queries from $css while skipping empty media queries.
  856. *
  857. * @param string $css
  858. *
  859. * @return string[][] numeric array with string sub-arrays with the keys "css" and "query"
  860. */
  861. private function extractMediaQueriesFromCss($css)
  862. {
  863. preg_match_all('/@media\\b[^{]*({((?:[^{}]+|(?1))*)})/', $css, $rawMediaQueries, PREG_SET_ORDER);
  864. $parsedQueries = array();
  865. foreach ($rawMediaQueries as $mediaQuery) {
  866. if ($mediaQuery[2] !== '') {
  867. $parsedQueries[] = array(
  868. 'css' => $mediaQuery[2],
  869. 'query' => $mediaQuery[0],
  870. );
  871. }
  872. }
  873. return $parsedQueries;
  874. }
  875. /**
  876. * Checks whether there is at least one matching element for $cssSelector.
  877. *
  878. * @param \DOMXPath $xPath
  879. * @param string $cssSelector
  880. *
  881. * @return bool
  882. */
  883. private function existsMatchForCssSelector(DOMXPath $xPath, $cssSelector)
  884. {
  885. $nodesMatchingSelector = $xPath->query($this->translateCssToXpath($cssSelector));
  886. return $nodesMatchingSelector !== false && $nodesMatchingSelector->length !== 0;
  887. }
  888. /**
  889. * Returns CSS content.
  890. *
  891. * @param \DOMXPath $xPath
  892. *
  893. * @return string
  894. */
  895. private function getCssFromAllStyleNodes(DOMXPath $xPath)
  896. {
  897. $styleNodes = $xPath->query('//style');
  898. if ($styleNodes === false) {
  899. return '';
  900. }
  901. $css = '';
  902. /** @var \DOMNode $styleNode */
  903. foreach ($styleNodes as $styleNode) {
  904. $css .= "\n\n" . $styleNode->nodeValue;
  905. $styleNode->parentNode->removeChild($styleNode);
  906. }
  907. return $css;
  908. }
  909. /**
  910. * Adds a style element with $css to $document.
  911. *
  912. * This method is protected to allow overriding.
  913. *
  914. * @see https://github.com/jjriv/emogrifier/issues/103
  915. *
  916. * @param \DOMDocument $document
  917. * @param string $css
  918. *
  919. * @return void
  920. */
  921. protected function addStyleElementToDocument(DOMDocument $document, $css)
  922. {
  923. $styleElement = $document->createElement('style', $css);
  924. $styleAttribute = $document->createAttribute('type');
  925. $styleAttribute->value = 'text/css';
  926. $styleElement->appendChild($styleAttribute);
  927. $head = $this->getOrCreateHeadElement($document);
  928. $head->appendChild($styleElement);
  929. }
  930. /**
  931. * Returns the existing or creates a new head element in $document.
  932. *
  933. * @param \DOMDocument $document
  934. *
  935. * @return \DOMNode the head element
  936. */
  937. private function getOrCreateHeadElement(DOMDocument $document)
  938. {
  939. $head = $document->getElementsByTagName('head')->item(0);
  940. if ($head === null) {
  941. $head = $document->createElement('head');
  942. $html = $document->getElementsByTagName('html')->item(0);
  943. $html->insertBefore($head, $document->getElementsByTagName('body')->item(0));
  944. }
  945. return $head;
  946. }
  947. /**
  948. * Splits input CSS code to an array where:
  949. *
  950. * - key "css" will be contains clean CSS code
  951. * - key "media" will be contains all valuable media queries
  952. *
  953. * Example:
  954. *
  955. * The CSS code
  956. *
  957. * "@import "file.css"; h1 { color:red; } @media { h1 {}} @media tv { h1 {}}"
  958. *
  959. * will be parsed into the following array:
  960. *
  961. * "css" => "h1 { color:red; }"
  962. * "media" => "@media { h1 {}}"
  963. *
  964. * @param string $css
  965. *
  966. * @return string[]
  967. */
  968. private function splitCssAndMediaQuery($css)
  969. {
  970. $cssWithoutComments = preg_replace('/\\/\\*.*\\*\\//sU', '', $css);
  971. $mediaTypesExpression = '';
  972. if (!empty($this->allowedMediaTypes)) {
  973. $mediaTypesExpression = '|' . implode('|', array_keys($this->allowedMediaTypes));
  974. }
  975. $cssForAllowedMediaTypes = preg_replace_callback(
  976. '#@media\\s+(?:only\\s)?(?:[\\s{\\(]' . $mediaTypesExpression . ')\\s?[^{]+{.*}\\s*}\\s*#misU',
  977. array( $this, '_media_concat' ),
  978. $cssWithoutComments
  979. );
  980. // filter the CSS
  981. $search = array(
  982. 'import directives' => '/^\\s*@import\\s[^;]+;/misU',
  983. 'remaining media enclosures' => '/^\\s*@media\\s[^{]+{(.*)}\\s*}\\s/misU',
  984. );
  985. $cleanedCss = preg_replace($search, '', $cssForAllowedMediaTypes);
  986. return array('css' => $cleanedCss, 'media' => self::$_media);
  987. }
  988. private function _media_concat( $matches ) {
  989. self::$_media .= $matches[0];
  990. }
  991. /**
  992. * Creates a DOMDocument instance with the current HTML.
  993. *
  994. * @return \DOMDocument
  995. */
  996. private function createXmlDocument()
  997. {
  998. $xmlDocument = new DOMDocument;
  999. $xmlDocument->encoding = 'UTF-8';
  1000. $xmlDocument->strictErrorChecking = false;
  1001. $xmlDocument->formatOutput = true;
  1002. $libXmlState = libxml_use_internal_errors(true);
  1003. $xmlDocument->loadHTML($this->getUnifiedHtml());
  1004. libxml_clear_errors();
  1005. libxml_use_internal_errors($libXmlState);
  1006. $xmlDocument->normalizeDocument();
  1007. return $xmlDocument;
  1008. }
  1009. /**
  1010. * Returns the HTML with the unprocessable HTML tags removed and
  1011. * with added document type and Content-Type meta tag if needed.
  1012. *
  1013. * @return string the unified HTML
  1014. *
  1015. * @throws \BadMethodCallException
  1016. */
  1017. private function getUnifiedHtml()
  1018. {
  1019. $htmlWithoutUnprocessableTags = $this->removeUnprocessableTags($this->html);
  1020. $htmlWithDocumentType = $this->ensureDocumentType($htmlWithoutUnprocessableTags);
  1021. return $this->addContentTypeMetaTag($htmlWithDocumentType);
  1022. }
  1023. /**
  1024. * Removes the unprocessable tags from $html (if this feature is enabled).
  1025. *
  1026. * @param string $html
  1027. *
  1028. * @return string the reworked HTML with the unprocessable tags removed
  1029. */
  1030. private function removeUnprocessableTags($html)
  1031. {
  1032. if (empty($this->unprocessableHtmlTags)) {
  1033. return $html;
  1034. }
  1035. $unprocessableHtmlTags = implode('|', $this->unprocessableHtmlTags);
  1036. return preg_replace(
  1037. '/<\\/?(' . $unprocessableHtmlTags . ')[^>]*>/i',
  1038. '',
  1039. $html
  1040. );
  1041. }
  1042. /**
  1043. * Makes sure that the passed HTML has a document type.
  1044. *
  1045. * @param string $html
  1046. *
  1047. * @return string HTML with document type
  1048. */
  1049. private function ensureDocumentType($html)
  1050. {
  1051. $hasDocumentType = stripos($html, '<!DOCTYPE') !== false;
  1052. if ($hasDocumentType) {
  1053. return $html;
  1054. }
  1055. return self::DEFAULT_DOCUMENT_TYPE . $html;
  1056. }
  1057. /**
  1058. * Adds a Content-Type meta tag for the charset.
  1059. *
  1060. * @param string $html
  1061. *
  1062. * @return string the HTML with the meta tag added
  1063. */
  1064. private function addContentTypeMetaTag($html)
  1065. {
  1066. $hasContentTypeMetaTag = stristr($html, 'Content-Type') !== false;
  1067. if ($hasContentTypeMetaTag) {
  1068. return $html;
  1069. }
  1070. // We are trying to insert the meta tag to the right spot in the DOM.
  1071. // If we just prepended it to the HTML, we would lose attributes set to the HTML tag.
  1072. $hasHeadTag = stripos($html, '<head') !== false;
  1073. $hasHtmlTag = stripos($html, '<html') !== false;
  1074. if ($hasHeadTag) {
  1075. $reworkedHtml = preg_replace('/<head(.*?)>/i', '<head$1>' . self::CONTENT_TYPE_META_TAG, $html);
  1076. } elseif ($hasHtmlTag) {
  1077. $reworkedHtml = preg_replace(
  1078. '/<html(.*?)>/i',
  1079. '<html$1><head>' . self::CONTENT_TYPE_META_TAG . '</head>',
  1080. $html
  1081. );
  1082. } else {
  1083. $reworkedHtml = self::CONTENT_TYPE_META_TAG . $html;
  1084. }
  1085. return $reworkedHtml;
  1086. }
  1087. /**
  1088. * @param string[] $a
  1089. * @param string[] $b
  1090. *
  1091. * @return int
  1092. */
  1093. private function sortBySelectorPrecedence(array $a, array $b)
  1094. {
  1095. $precedenceA = $this->getCssSelectorPrecedence($a['selector']);
  1096. $precedenceB = $this->getCssSelectorPrecedence($b['selector']);
  1097. // We want these sorted in ascending order so selectors with lesser precedence get processed first and
  1098. // selectors with greater precedence get sorted last.
  1099. $precedenceForEquals = ($a['line'] < $b['line'] ? -1 : 1);
  1100. $precedenceForNotEquals = ($precedenceA < $precedenceB ? -1 : 1);
  1101. return ($precedenceA === $precedenceB) ? $precedenceForEquals : $precedenceForNotEquals;
  1102. }
  1103. /**
  1104. * @param string $selector
  1105. *
  1106. * @return int
  1107. */
  1108. private function getCssSelectorPrecedence($selector)
  1109. {
  1110. $selectorKey = md5($selector);
  1111. if (!isset($this->caches[self::CACHE_KEY_SELECTOR][$selectorKey])) {
  1112. $precedence = 0;
  1113. $value = 100;
  1114. // ids: worth 100, classes: worth 10, elements: worth 1
  1115. $search = array('\\#','\\.','');
  1116. foreach ($search as $s) {
  1117. if (trim($selector) === '') {
  1118. break;
  1119. }
  1120. $number = 0;
  1121. $selector = preg_replace('/' . $s . '\\w+/', '', $selector, -1, $number);
  1122. $precedence += ($value * $number);
  1123. $value /= 10;
  1124. }
  1125. $this->caches[self::CACHE_KEY_SELECTOR][$selectorKey] = $precedence;
  1126. }
  1127. return $this->caches[self::CACHE_KEY_SELECTOR][$selectorKey];
  1128. }
  1129. private function translateCssToXpath_callback( $matches ) {
  1130. return strtolower($matches[0]);
  1131. }
  1132. /**
  1133. * Maps a CSS selector to an XPath query string.
  1134. *
  1135. * @see http://plasmasturm.org/log/444/
  1136. *
  1137. * @param string $cssSelector a CSS selector
  1138. *
  1139. * @return string the corresponding XPath selector
  1140. */
  1141. private function translateCssToXpath($cssSelector)
  1142. {
  1143. $paddedSelector = ' ' . $cssSelector . ' ';
  1144. $lowercasePaddedSelector = preg_replace_callback(
  1145. '/\\s+\\w+\\s+/',
  1146. array( $this, 'translateCssToXpath_callback' ),
  1147. $paddedSelector
  1148. );
  1149. $trimmedLowercaseSelector = trim($lowercasePaddedSelector);
  1150. $xPathKey = md5($trimmedLowercaseSelector);
  1151. if (!isset($this->caches[self::CACHE_KEY_XPATH][$xPathKey])) {
  1152. $roughXpath = '//' . preg_replace(
  1153. array_keys($this->xPathRules),
  1154. $this->xPathRules,
  1155. $trimmedLowercaseSelector
  1156. );
  1157. $xPathWithIdAttributeMatchers = preg_replace_callback(
  1158. self::ID_ATTRIBUTE_MATCHER,
  1159. array($this, 'matchIdAttributes'),
  1160. $roughXpath
  1161. );
  1162. $xPathWithIdAttributeAndClassMatchers = preg_replace_callback(
  1163. self::CLASS_ATTRIBUTE_MATCHER,
  1164. array($this, 'matchClassAttributes'),
  1165. $xPathWithIdAttributeMatchers
  1166. );
  1167. // Advanced selectors are going to require a bit more advanced emogrification.
  1168. // When we required PHP 5.3, we could do this with closures.
  1169. $xPathWithIdAttributeAndClassMatchers = preg_replace_callback(
  1170. '/([^\\/]+):nth-child\\(\\s*(odd|even|[+\\-]?\\d|[+\\-]?\\d?n(\\s*[+\\-]\\s*\\d)?)\\s*\\)/i',
  1171. array($this, 'translateNthChild'),
  1172. $xPathWithIdAttributeAndClassMatchers
  1173. );
  1174. $finalXpath = preg_replace_callback(
  1175. '/([^\\/]+):nth-of-type\\(\s*(odd|even|[+\\-]?\\d|[+\\-]?\\d?n(\\s*[+\\-]\\s*\\d)?)\\s*\\)/i',
  1176. array($this, 'translateNthOfType'),
  1177. $xPathWithIdAttributeAndClassMatchers
  1178. );
  1179. $this->caches[self::CACHE_KEY_SELECTOR][$xPathKey] = $finalXpath;
  1180. }
  1181. return $this->caches[self::CACHE_KEY_SELECTOR][$xPathKey];
  1182. }
  1183. /**
  1184. * @param string[] $match
  1185. *
  1186. * @return string
  1187. */
  1188. private function matchIdAttributes(array $match)
  1189. {
  1190. return ($match[1] !== '' ? $match[1] : '*') . '[@id="' . $match[2] . '"]';
  1191. }
  1192. /**
  1193. * @param string[] $match
  1194. *
  1195. * @return string
  1196. */
  1197. private function matchClassAttributes(array $match)
  1198. {
  1199. return ($match[1] !== '' ? $match[1] : '*') . '[contains(concat(" ",@class," "),concat(" ","' .
  1200. implode(
  1201. '"," "))][contains(concat(" ",@class," "),concat(" ","',
  1202. explode('.', substr($match[2], 1))
  1203. ) . '"," "))]';
  1204. }
  1205. /**
  1206. * @param string[] $match
  1207. *
  1208. * @return string
  1209. */
  1210. private function translateNthChild(array $match)
  1211. {
  1212. $parseResult = $this->parseNth($match);
  1213. if (isset($parseResult[self::MULTIPLIER])) {
  1214. if ($parseResult[self::MULTIPLIER] < 0) {
  1215. $parseResult[self::MULTIPLIER] = abs($parseResult[self::MULTIPLIER]);
  1216. $xPathExpression = sprintf(
  1217. '*[(last() - position()) mod %u = %u]/self::%s',
  1218. $parseResult[self::MULTIPLIER],
  1219. $parseResult[self::INDEX],
  1220. $match[1]
  1221. );
  1222. } else {
  1223. $xPathExpression = sprintf(
  1224. '*[position() mod %u = %u]/self::%s',
  1225. $parseResult[self::MULTIPLIER],
  1226. $parseResult[self::INDEX],
  1227. $match[1]
  1228. );
  1229. }
  1230. } else {
  1231. $xPathExpression = sprintf('*[%u]/self::%s', $parseResult[self::INDEX], $match[1]);
  1232. }
  1233. return $xPathExpression;
  1234. }
  1235. /**
  1236. * @param string[] $match
  1237. *
  1238. * @return string
  1239. */
  1240. private function translateNthOfType(array $match)
  1241. {
  1242. $parseResult = $this->parseNth($match);
  1243. if (isset($parseResult[self::MULTIPLIER])) {
  1244. if ($parseResult[self::MULTIPLIER] < 0) {
  1245. $parseResult[self::MULTIPLIER] = abs($parseResult[self::MULTIPLIER]);
  1246. $xPathExpression = sprintf(
  1247. '%s[(last() - position()) mod %u = %u]',
  1248. $match[1],
  1249. $parseResult[self::MULTIPLIER],
  1250. $parseResult[self::INDEX]
  1251. );
  1252. } else {
  1253. $xPathExpression = sprintf(
  1254. '%s[position() mod %u = %u]',
  1255. $match[1],
  1256. $parseResult[self::MULTIPLIER],
  1257. $parseResult[self::INDEX]
  1258. );
  1259. }
  1260. } else {
  1261. $xPathExpression = sprintf('%s[%u]', $match[1], $parseResult[self::INDEX]);
  1262. }
  1263. return $xPathExpression;
  1264. }
  1265. /**
  1266. * @param string[] $match
  1267. *
  1268. * @return int[]
  1269. */
  1270. private function parseNth(array $match)
  1271. {
  1272. if (in_array(strtolower($match[2]), array('even', 'odd'), true)) {
  1273. // we have "even" or "odd"
  1274. $index = strtolower($match[2]) === 'even' ? 0 : 1;
  1275. return array(self::MULTIPLIER => 2, self::INDEX => $index);
  1276. }
  1277. if (stripos($match[2], 'n') === false) {
  1278. // if there is a multiplier
  1279. $index = (int) str_replace(' ', '', $match[2]);
  1280. return array(self::INDEX => $index);
  1281. }
  1282. if (isset($match[3])) {
  1283. $multipleTerm = str_replace($match[3], '', $match[2]);
  1284. $index = (int) str_replace(' ', '', $match[3]);
  1285. } else {
  1286. $multipleTerm = $match[2];
  1287. $index = 0;
  1288. }
  1289. $multiplier = str_ireplace('n', '', $multipleTerm);
  1290. if ($multiplier === '') {
  1291. $multiplier = 1;
  1292. } elseif ($multiplier === '0') {
  1293. return array(self::INDEX => $index);
  1294. } else {
  1295. $multiplier = (int) $multiplier;
  1296. }
  1297. while ($index < 0) {
  1298. $index += abs($multiplier);
  1299. }
  1300. return array(self::MULTIPLIER => $multiplier, self::INDEX => $index);
  1301. }
  1302. /**
  1303. * Parses a CSS declaration block into property name/value pairs.
  1304. *
  1305. * Example:
  1306. *
  1307. * The declaration block
  1308. *
  1309. * "color: #000; font-weight: bold;"
  1310. *
  1311. * will be parsed into the following array:
  1312. *
  1313. * "color" => "#000"
  1314. * "font-weight" => "bold"
  1315. *
  1316. * @param string $cssDeclarationsBlock the CSS declarations block without the curly braces, may be empty
  1317. *
  1318. * @return string[]
  1319. * the CSS declarations with the property names as array keys and the property values as array values
  1320. */
  1321. private function parseCssDeclarationsBlock($cssDeclarationsBlock)
  1322. {
  1323. if (isset($this->caches[self::CACHE_KEY_CSS_DECLARATIONS_BLOCK][$cssDeclarationsBlock])) {
  1324. return $this->caches[self::CACHE_KEY_CSS_DECLARATIONS_BLOCK][$cssDeclarationsBlock];
  1325. }
  1326. $properties = array();
  1327. $declarations = preg_split('/;(?!base64|charset)/', $cssDeclarationsBlock);
  1328. foreach ($declarations as $declaration) {
  1329. $matches = array();
  1330. if (!preg_match('/^([A-Za-z\\-]+)\\s*:\\s*(.+)$/', trim($declaration), $matches)) {
  1331. continue;
  1332. }
  1333. $propertyName = strtolower($matches[1]);
  1334. $propertyValue = $matches[2];
  1335. $properties[$propertyName] = $propertyValue;
  1336. }
  1337. $this->caches[self::CACHE_KEY_CSS_DECLARATIONS_BLOCK][$cssDeclarationsBlock] = $properties;
  1338. return $properties;
  1339. }
  1340. /**
  1341. * Find the nodes that are not to be emogrified.
  1342. *
  1343. * @param \DOMXPath $xPath
  1344. *
  1345. * @return \DOMElement[]
  1346. */
  1347. private function getNodesToExclude(DOMXPath $xPath)
  1348. {
  1349. $excludedNodes = array();
  1350. foreach (array_keys($this->excludedSelectors) as $selectorToExclude) {
  1351. foreach ($xPath->query($this->translateCssToXpath($selectorToExclude)) as $node) {
  1352. $excludedNodes[] = $node;
  1353. }
  1354. }
  1355. return $excludedNodes;
  1356. }
  1357. /**
  1358. * Handles invalid xPath expression warnings, generated by process() method,
  1359. * during querying \DOMDocument and trigger \InvalidArgumentException
  1360. * with invalid selector.
  1361. *
  1362. * @param int $type
  1363. * @param string $message
  1364. * @param string $file
  1365. * @param int $line
  1366. * @param array $context
  1367. *
  1368. * @return bool always false
  1369. *
  1370. * @throws \InvalidArgumentException
  1371. */
  1372. public function handleXpathError($type, $message, $file, $line, array $context)
  1373. {
  1374. if ($type === E_WARNING && isset($context['cssRule']['selector'])) {
  1375. throw new InvalidArgumentException(
  1376. sprintf(
  1377. '%s in selector >> %s << in %s on line %s',
  1378. $message,
  1379. $context['cssRule']['selector'],
  1380. $file,
  1381. $line
  1382. )
  1383. );
  1384. }
  1385. // the normal error handling continues when handler return false
  1386. return false;
  1387. }
  1388. }