MailFormatHelper.php 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393
  1. <?php
  2. namespace Drupal\Core\Mail;
  3. use Drupal\Component\Utility\Html;
  4. use Drupal\Component\Utility\Unicode;
  5. use Drupal\Component\Utility\Xss;
  6. use Drupal\Core\Site\Settings;
  7. /**
  8. * Defines a class containing utility methods for formatting mail messages.
  9. */
  10. class MailFormatHelper {
  11. /**
  12. * Internal array of urls replaced with tokens.
  13. *
  14. * @var array
  15. */
  16. protected static $urls = [];
  17. /**
  18. * Quoted regex expression based on base path.
  19. *
  20. * @var string
  21. */
  22. protected static $regexp;
  23. /**
  24. * Array of tags supported.
  25. *
  26. * @var array
  27. */
  28. protected static $supportedTags = [];
  29. /**
  30. * Performs format=flowed soft wrapping for mail (RFC 3676).
  31. *
  32. * We use delsp=yes wrapping, but only break non-spaced languages when
  33. * absolutely necessary to avoid compatibility issues.
  34. *
  35. * We deliberately use LF rather than CRLF, see MailManagerInterface::mail().
  36. *
  37. * @param string $text
  38. * The plain text to process.
  39. * @param string $indent
  40. * (optional) A string to indent the text with. Only '>' characters are
  41. * repeated on subsequent wrapped lines. Others are replaced by spaces.
  42. *
  43. * @return string
  44. * The content of the email as a string with formatting applied.
  45. */
  46. public static function wrapMail($text, $indent = '') {
  47. // Convert CRLF into LF.
  48. $text = str_replace("\r", '', $text);
  49. // See if soft-wrapping is allowed.
  50. $clean_indent = static::htmlToTextClean($indent);
  51. $soft = strpos($clean_indent, ' ') === FALSE;
  52. // Check if the string has line breaks.
  53. if (strpos($text, "\n") !== FALSE) {
  54. // Remove trailing spaces to make existing breaks hard, but leave
  55. // signature marker untouched (RFC 3676, Section 4.3).
  56. $text = preg_replace('/(?(?<!^--) +\n| +\n)/m', "\n", $text);
  57. // Wrap each line at the needed width.
  58. $lines = explode("\n", $text);
  59. array_walk($lines, '\Drupal\Core\Mail\MailFormatHelper::wrapMailLine', ['soft' => $soft, 'length' => strlen($indent)]);
  60. $text = implode("\n", $lines);
  61. }
  62. else {
  63. // Wrap this line.
  64. static::wrapMailLine($text, 0, ['soft' => $soft, 'length' => strlen($indent)]);
  65. }
  66. // Empty lines with nothing but spaces.
  67. $text = preg_replace('/^ +\n/m', "\n", $text);
  68. // Space-stuff special lines.
  69. $text = preg_replace('/^(>| |From)/m', ' $1', $text);
  70. // Apply indentation. We only include non-'>' indentation on the first line.
  71. $text = $indent . substr(preg_replace('/^/m', $clean_indent, $text), strlen($indent));
  72. return $text;
  73. }
  74. /**
  75. * Transforms an HTML string into plain text, preserving its structure.
  76. *
  77. * The output will be suitable for use as 'format=flowed; delsp=yes' text
  78. * (RFC 3676) and can be passed directly to MailManagerInterface::mail() for sending.
  79. *
  80. * We deliberately use LF rather than CRLF, see MailManagerInterface::mail().
  81. *
  82. * This function provides suitable alternatives for the following tags:
  83. * <a> <em> <i> <strong> <b> <br> <p> <blockquote> <ul> <ol> <li> <dl> <dt>
  84. * <dd> <h1> <h2> <h3> <h4> <h5> <h6> <hr>
  85. *
  86. * @param string $string
  87. * The string to be transformed.
  88. * @param array $allowed_tags
  89. * (optional) If supplied, a list of tags that will be transformed. If
  90. * omitted, all supported tags are transformed.
  91. *
  92. * @return string
  93. * The transformed string.
  94. */
  95. public static function htmlToText($string, $allowed_tags = NULL) {
  96. // Cache list of supported tags.
  97. if (empty(static::$supportedTags)) {
  98. static::$supportedTags = ['a', 'em', 'i', 'strong', 'b', 'br', 'p',
  99. 'blockquote', 'ul', 'ol', 'li', 'dl', 'dt', 'dd', 'h1', 'h2', 'h3',
  100. 'h4', 'h5', 'h6', 'hr',
  101. ];
  102. }
  103. // Make sure only supported tags are kept.
  104. $allowed_tags = isset($allowed_tags) ? array_intersect(static::$supportedTags, $allowed_tags) : static::$supportedTags;
  105. // Make sure tags, entities and attributes are well-formed and properly
  106. // nested.
  107. $string = Html::normalize(Xss::filter($string, $allowed_tags));
  108. // Apply inline styles.
  109. $string = preg_replace('!</?(em|i)((?> +)[^>]*)?>!i', '/', $string);
  110. $string = preg_replace('!</?(strong|b)((?> +)[^>]*)?>!i', '*', $string);
  111. // Replace inline <a> tags with the text of link and a footnote.
  112. // 'See <a href="https://www.drupal.org">the Drupal site</a>' becomes
  113. // 'See the Drupal site [1]' with the URL included as a footnote.
  114. static::htmlToMailUrls(NULL, TRUE);
  115. $pattern = '@(<a[^>]+?href="([^"]*)"[^>]*?>(.+?)</a>)@i';
  116. $string = preg_replace_callback($pattern, 'static::htmlToMailUrls', $string);
  117. $urls = static::htmlToMailUrls();
  118. $footnotes = '';
  119. if (count($urls)) {
  120. $footnotes .= "\n";
  121. for ($i = 0, $max = count($urls); $i < $max; $i++) {
  122. $footnotes .= '[' . ($i + 1) . '] ' . $urls[$i] . "\n";
  123. }
  124. }
  125. // Split tags from text.
  126. $split = preg_split('/<([^>]+?)>/', $string, -1, PREG_SPLIT_DELIM_CAPTURE);
  127. // Note: PHP ensures the array consists of alternating delimiters and
  128. // literals and begins and ends with a literal (inserting $null as
  129. // required).
  130. // Odd/even counter (tag or no tag).
  131. $tag = FALSE;
  132. // Case conversion function.
  133. $casing = NULL;
  134. $output = '';
  135. // All current indentation string chunks.
  136. $indent = [];
  137. // Array of counters for opened lists.
  138. $lists = [];
  139. foreach ($split as $value) {
  140. // Holds a string ready to be formatted and output.
  141. $chunk = NULL;
  142. // Process HTML tags (but don't output any literally).
  143. if ($tag) {
  144. list($tagname) = explode(' ', strtolower($value), 2);
  145. switch ($tagname) {
  146. // List counters.
  147. case 'ul':
  148. array_unshift($lists, '*');
  149. break;
  150. case 'ol':
  151. array_unshift($lists, 1);
  152. break;
  153. case '/ul':
  154. case '/ol':
  155. array_shift($lists);
  156. // Ensure blank new-line.
  157. $chunk = '';
  158. break;
  159. // Quotation/list markers, non-fancy headers.
  160. case 'blockquote':
  161. // Format=flowed indentation cannot be mixed with lists.
  162. $indent[] = count($lists) ? ' "' : '>';
  163. break;
  164. case 'li':
  165. $indent[] = isset($lists[0]) && is_numeric($lists[0]) ? ' ' . $lists[0]++ . ') ' : ' * ';
  166. break;
  167. case 'dd':
  168. $indent[] = ' ';
  169. break;
  170. case 'h3':
  171. $indent[] = '.... ';
  172. break;
  173. case 'h4':
  174. $indent[] = '.. ';
  175. break;
  176. case '/blockquote':
  177. if (count($lists)) {
  178. // Append closing quote for inline quotes (immediately).
  179. $output = rtrim($output, "> \n") . "\"\n";
  180. // Ensure blank new-line.
  181. $chunk = '';
  182. }
  183. // Intentional fall-through to the processing for '/li' and '/dd'.
  184. case '/li':
  185. case '/dd':
  186. array_pop($indent);
  187. break;
  188. case '/h3':
  189. case '/h4':
  190. array_pop($indent);
  191. // Intentional fall-through to the processing for '/h5' and '/h6'.
  192. case '/h5':
  193. case '/h6':
  194. // Ensure blank new-line.
  195. $chunk = '';
  196. break;
  197. // Fancy headers.
  198. case 'h1':
  199. $indent[] = '======== ';
  200. $casing = '\Drupal\Component\Utility\Unicode::strtoupper';
  201. break;
  202. case 'h2':
  203. $indent[] = '-------- ';
  204. $casing = '\Drupal\Component\Utility\Unicode::strtoupper';
  205. break;
  206. case '/h1':
  207. case '/h2':
  208. $casing = NULL;
  209. // Pad the line with dashes.
  210. $output = static::htmlToTextPad($output, ($tagname == '/h1') ? '=' : '-', ' ');
  211. array_pop($indent);
  212. // Ensure blank new-line.
  213. $chunk = '';
  214. break;
  215. // Horizontal rulers.
  216. case 'hr':
  217. // Insert immediately.
  218. $output .= static::wrapMail('', implode('', $indent)) . "\n";
  219. $output = static::htmlToTextPad($output, '-');
  220. break;
  221. // Paragraphs and definition lists.
  222. case '/p':
  223. case '/dl':
  224. // Ensure blank new-line.
  225. $chunk = '';
  226. break;
  227. }
  228. }
  229. // Process blocks of text.
  230. else {
  231. // Convert inline HTML text to plain text; not removing line-breaks or
  232. // white-space, since that breaks newlines when sanitizing plain-text.
  233. $value = trim(Html::decodeEntities($value));
  234. if (Unicode::strlen($value)) {
  235. $chunk = $value;
  236. }
  237. }
  238. // See if there is something waiting to be output.
  239. if (isset($chunk)) {
  240. // Apply any necessary case conversion.
  241. if (isset($casing)) {
  242. $chunk = call_user_func($casing, $chunk);
  243. }
  244. $line_endings = Settings::get('mail_line_endings', PHP_EOL);
  245. // Format it and apply the current indentation.
  246. $output .= static::wrapMail($chunk, implode('', $indent)) . $line_endings;
  247. // Remove non-quotation markers from indentation.
  248. $indent = array_map('\Drupal\Core\Mail\MailFormatHelper::htmlToTextClean', $indent);
  249. }
  250. $tag = !$tag;
  251. }
  252. return $output . $footnotes;
  253. }
  254. /**
  255. * Wraps words on a single line.
  256. *
  257. * Callback for array_walk() within
  258. * \Drupal\Core\Mail\MailFormatHelper::wrapMail().
  259. *
  260. * Note that we are skipping MIME content header lines, because attached
  261. * files, especially applications, could have long MIME types or long
  262. * filenames which result in line length longer than the 77 characters limit
  263. * and wrapping that line will break the email format. For instance, the
  264. * attached file hello_drupal.docx will produce the following Content-Type:
  265. * @code
  266. * Content-Type:
  267. * application/vnd.openxmlformats-officedocument.wordprocessingml.document;
  268. * name="hello_drupal.docx"
  269. * @endcode
  270. */
  271. protected static function wrapMailLine(&$line, $key, $values) {
  272. $line_is_mime_header = FALSE;
  273. $mime_headers = [
  274. 'Content-Type',
  275. 'Content-Transfer-Encoding',
  276. 'Content-Disposition',
  277. 'Content-Description',
  278. ];
  279. // Do not break MIME headers which could be longer than 77 characters.
  280. foreach ($mime_headers as $header) {
  281. if (strpos($line, $header . ': ') === 0) {
  282. $line_is_mime_header = TRUE;
  283. break;
  284. }
  285. }
  286. if (!$line_is_mime_header) {
  287. // Use soft-breaks only for purely quoted or unindented text.
  288. $line = wordwrap($line, 77 - $values['length'], $values['soft'] ? " \n" : "\n");
  289. }
  290. // Break really long words at the maximum width allowed.
  291. $line = wordwrap($line, 996 - $values['length'], $values['soft'] ? " \n" : "\n", TRUE);
  292. }
  293. /**
  294. * Keeps track of URLs and replaces them with placeholder tokens.
  295. *
  296. * Callback for preg_replace_callback() within
  297. * \Drupal\Core\Mail\MailFormatHelper::htmlToText().
  298. */
  299. protected static function htmlToMailUrls($match = NULL, $reset = FALSE) {
  300. // @todo Use request context instead.
  301. global $base_url, $base_path;
  302. if ($reset) {
  303. // Reset internal URL list.
  304. static::$urls = [];
  305. }
  306. else {
  307. if (empty(static::$regexp)) {
  308. static::$regexp = '@^' . preg_quote($base_path, '@') . '@';
  309. }
  310. if ($match) {
  311. list(, , $url, $label) = $match;
  312. // Ensure all URLs are absolute.
  313. static::$urls[] = strpos($url, '://') ? $url : preg_replace(static::$regexp, $base_url . '/', $url);
  314. return $label . ' [' . count(static::$urls) . ']';
  315. }
  316. }
  317. return static::$urls;
  318. }
  319. /**
  320. * Replaces non-quotation markers from a piece of indentation with spaces.
  321. *
  322. * Callback for array_map() within
  323. * \Drupal\Core\Mail\MailFormatHelper::htmlToText().
  324. */
  325. protected static function htmlToTextClean($indent) {
  326. return preg_replace('/[^>]/', ' ', $indent);
  327. }
  328. /**
  329. * Pads the last line with the given character.
  330. *
  331. * @param string $text
  332. * The text to pad.
  333. * @param string $pad
  334. * The character to pad the end of the string with.
  335. * @param string $prefix
  336. * (optional) Prefix to add to the string.
  337. *
  338. * @return string
  339. * The padded string.
  340. *
  341. * @see \Drupal\Core\Mail\MailFormatHelper::htmlToText()
  342. */
  343. protected static function htmlToTextPad($text, $pad, $prefix = '') {
  344. // Remove last line break.
  345. $text = substr($text, 0, -1);
  346. // Calculate needed padding space and add it.
  347. if (($p = strrpos($text, "\n")) === FALSE) {
  348. $p = -1;
  349. }
  350. $n = max(0, 79 - (strlen($text) - $p) - strlen($prefix));
  351. // Add prefix and padding, and restore linebreak.
  352. return $text . $prefix . str_repeat($pad, $n) . "\n";
  353. }
  354. }