autoseo.php 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247
  1. <?php
  2. namespace Grav\Plugin;
  3. use Grav\Common\Plugin;
  4. /**
  5. * Class AutoSeoPlugin
  6. * @package Grav\Plugin
  7. */
  8. class AutoSeoPlugin extends Plugin
  9. {
  10. /**
  11. * Initialize plugin and subsequent events
  12. * @return array
  13. */
  14. public static function getSubscribedEvents() {
  15. return [
  16. 'onPluginsInitialized' => ['onPluginsInitialized', 0],
  17. ];
  18. }
  19. /**
  20. * Register events with Grav
  21. * @return void
  22. */
  23. public function onPluginsInitialized()
  24. {
  25. // deactivate plugin in admin
  26. if ( !$this->isAdmin()
  27. and $this->config->get('plugins.autoseo.enabled')
  28. ) {
  29. $this->enable([
  30. 'onPageInitialized' => ['onPageInitialized', 0]
  31. ]);
  32. }
  33. }
  34. /**
  35. * Add content after page content was read into the system.
  36. *
  37. */
  38. public function onPageInitialized()
  39. {
  40. $page = $this->grav['page'];
  41. $config = $this->mergeConfig($page);
  42. if ( !$config['enabled']) return;
  43. $updateDescription = $config['description.enabled'];
  44. $updatekeywords = $config['keywords.enabled'];
  45. $updateFacebook = $config['facebook.enabled'];
  46. $updateTwitter = $config['twitter.enabled'];
  47. if (!$updateDescription && !$updatekeywords && !$updateFacebook && !$updateTwitter) return;
  48. $meta = $page->metadata();
  49. $metaSite = $this->config->get('site')['metadata'];
  50. // limit the content size to reduce the performance impact
  51. $content = mb_substr(strip_tags($page->content()),0, 1000 );
  52. $cleanContent = $this->cleanText ($content, $config); // here because we don't want to make this call several times
  53. $cleanTitle = $this->cleanString ($page->title()); // here because we don't want to make this call several times
  54. if ($updateDescription) $meta = $this->getMetaDescription ($meta, $metaSite, $config, $cleanContent);
  55. if ($updatekeywords) $meta = $this->getMetaKeywords ($meta, $metaSite, $config);
  56. if ($updateFacebook) $meta = $this->getMetaOpenGraph ($meta, $metaSite, $config, $cleanContent, $cleanTitle);
  57. if ($updateTwitter) $meta = $this->getMetaTwitter ($meta, $metaSite, $config, $cleanContent, $cleanTitle);
  58. $page->metadata ($meta);
  59. }
  60. // PROCESS for the description metadata
  61. private function getMetaDescription ($meta, $metaSite, $config, $cleanContent) {
  62. if (array_key_exists('description', $metaSite)) { $metaSiteContent = htmlspecialchars($metaSite['description'], ENT_QUOTES, 'UTF-8'); } else { $metaSiteContent = ''; }
  63. // if the page has a meta that is different from the default one, we return its value
  64. if (!empty($meta['description']['content']) && $meta['description']['content'] != $metaSiteContent) return $meta;
  65. $metaPageContent = $cleanContent;
  66. if (empty($metaPageContent)) $metaPageContent = $metaSiteContent;
  67. $meta['description'] = [ 'name' => 'description', 'content' => $metaPageContent];
  68. return $meta;
  69. }
  70. // PROCESS for the keywords metadata
  71. private function getMetaKeywords ($meta, $metaSite, $config) {
  72. $page = $this->grav['page'];
  73. if (array_key_exists('keywords', $metaSite)) { $metaSiteContent = htmlspecialchars($metaSite['keywords'], ENT_QUOTES, 'UTF-8'); } else { $metaSiteContent = ''; }
  74. // if the page has a meta that is different from the default one, we return its value
  75. if (!empty($meta['keywords']['content']) && $meta['keywords']['content'] != $metaSiteContent) return $meta;
  76. $length = $config['keywords.length'];
  77. if ($length <=1 ) $length=20;
  78. // we create a keywords list using the page tags and categories
  79. if (array_key_exists( 'category', $page->taxonomy() )) { $categories = $page->taxonomy()['category']; } else { $categories = []; }
  80. if (array_key_exists( 'tag', $page->taxonomy() )) { $tags = $page->taxonomy()['tag']; } else { $tags = []; }
  81. $content = array_merge ($categories, $tags) ;
  82. $content = array_unique ($content);
  83. $content = array_slice($content, 0, $length);
  84. $content = join(',',$content);
  85. $content = $this->cleanString($content);
  86. $metaPageContent = $content;
  87. if (empty($metaPageContent)) $metaPageContent = $metaSiteContent;
  88. $meta['keywords'] = [ 'name' => 'keywords', 'content' => $metaPageContent];
  89. return $meta;
  90. }
  91. // PROCESS for the OpenGraph metadata
  92. private function getMetaOpenGraph ($meta, $metaSite, $config, $cleanContent, $cleanTitle) {
  93. $page = $this->grav['page'];
  94. $meta['og:sitename']['name'] = 'og:sitename';
  95. $meta['og:sitename']['property'] = 'og:sitename';
  96. $meta['og:sitename']['content'] = $this->config->get('site.title');
  97. $meta['og:title']['name'] = 'og:title';
  98. $meta['og:title']['property'] = 'og:title';
  99. $meta['og:title']['content'] = $cleanTitle;
  100. $meta['og:type']['name'] = 'og:type';
  101. $meta['og:type']['property'] = 'og:type';
  102. $meta['og:type']['content'] = 'article';
  103. $meta['og:url']['name'] = 'og:url';
  104. $meta['og:url']['property'] = 'og:url';
  105. $meta['og:url']['content'] = $this->grav['uri']->url(true);
  106. $meta['og:description']['name'] = 'og:description';
  107. $meta['og:description']['property'] = 'og:description';
  108. if (empty($cleanContent)) $cleanContent = $meta['description']['content'];
  109. else {
  110. if (array_key_exists('description', $metaSite)) { $metaSiteContent = htmlspecialchars($metaSite['description'], ENT_QUOTES, 'UTF-8'); } else { $metaSiteContent = ''; }
  111. if ($meta['description']['content'] != $metaSiteContent) $cleanContent = $meta['description']['content'];
  112. }
  113. $meta['og:description']['content'] = $cleanContent;
  114. if (!empty($page->value('media.image'))) {
  115. $images = $page->media()->images();
  116. $image = array_shift($images);
  117. $meta['og:image']['name'] = 'og:image';
  118. $meta['og:image']['property'] = 'og:image';
  119. $meta['og:image']['content'] = $this->grav['uri']->base() . $image->url();
  120. }
  121. return $meta;
  122. }
  123. // PROCESS for the twitter metadata
  124. private function getMetaTwitter ($meta, $metaSite, $config, $cleanContent, $cleanTitle) {
  125. $page = $this->grav['page'];
  126. if (!isset($meta['twitter:card'])) {
  127. $meta['twitter:card']['name'] = 'twitter:card';
  128. $meta['twitter:card']['property'] = 'twitter:card';
  129. $meta['twitter:card']['content'] = 'summary_large_image';
  130. }
  131. if (!isset($meta['twitter:title'])) {
  132. $meta['twitter:title']['name'] = 'twitter:title';
  133. $meta['twitter:title']['property'] = 'twitter:title';
  134. $meta['twitter:title']['content'] = $cleanTitle;
  135. }
  136. if (!isset($meta['twitter:description'])) {
  137. $meta['twitter:description']['name'] = 'twitter:description';
  138. $meta['twitter:description']['property'] = 'twitter:description';
  139. if (empty($cleanContent))
  140. $cleanContent = $meta['description']['content'];
  141. else {
  142. if (array_key_exists('description', $metaSite)) { $metaSiteContent = htmlspecialchars($metaSite['description'], ENT_QUOTES, 'UTF-8'); } else { $metaSiteContent = ''; }
  143. if ($meta['description']['content'] != $metaSiteContent) $cleanContent = $meta['description']['content'];
  144. }
  145. $meta['twitter:description']['content'] = mb_substr($cleanContent,0,140);
  146. }
  147. if (!isset($meta['twitter:image'])) {
  148. if (!empty($page->value('media.image'))) {
  149. $images = $page->media()->images();
  150. $image = array_shift($images);
  151. $meta['twitter:image']['name'] = 'twitter:image';
  152. $meta['twitter:image']['property'] = 'twitter:image';
  153. $meta['twitter:image']['content'] = $this->grav['uri']->base() . $image->url();
  154. }
  155. }
  156. return $meta;
  157. }
  158. private function cleanMarkdown($text){
  159. $rules = array (
  160. '/(#+)(.*)/' => '\2', // headers
  161. '/(&lt;|<)!--\n((.*|\n)*)\n--(&gt;|\>)/' => '', // comments
  162. '/(\*|-|_){3}/' => '', // hr
  163. '/!\[([^\[]+)\]\(([^\)]+)\)/' => '', // images
  164. '/\[([^\[]+)\]\(([^\)]+)\)/' => '\1', // links
  165. '/(\*\*|__)(.*?)\1/' => '\2', // bold
  166. '/(\*|_)(.*?)\1/' => '\2', // emphasis
  167. '/\~\~(.*?)\~\~/' => '\1', // del
  168. '/\:\"(.*?)\"\:/' => '\1', // quote
  169. '/```(.*)\n((.*|\n)+)\n```/' => '\2', // fence code
  170. '/`(.*?)`/' => '\1', // inline code
  171. '/(\*|\+|-)(.*)/' => '\2', // ul lists
  172. '/\n[0-9]+\.(.*)/' => '\2', // ol lists
  173. '/(&gt;|\>)+(.*)/' => '\2', // blockquotes
  174. );
  175. foreach ($rules as $regex => $replacement) {
  176. if (is_callable ( $replacement)) {
  177. $text = preg_replace_callback ($regex, $replacement, $text);
  178. } else {
  179. $text = preg_replace ($regex, $replacement, $text);
  180. }
  181. }
  182. $text=str_replace(".\n", '.', $text);
  183. $text=str_replace("\n", '.', $text);
  184. $text=str_replace('"', '', $text);
  185. return htmlspecialchars($text, ENT_QUOTES, 'UTF-8');
  186. }
  187. private function cleanText ($content, $config) {
  188. try {
  189. $length = $config['description.length'];
  190. if ($length <=1 ) $length=20;
  191. $content = $this->cleanMarkdown($content);
  192. // truncate the content to the number of words set in config
  193. $contentSmall = mb_ereg_replace('((\w+\W*){'.$length.'}(\w+))(.*)', '\\1', $content); // beware if content is less than length words, it will be nulled
  194. if ($contentSmall == '' ) $contentSmall = $content;
  195. return $contentSmall;
  196. } catch (\Throwable $e) {
  197. $this->grav['log']->addError($e->getMessage());
  198. return $content;
  199. }
  200. }
  201. private function cleanString ($content) {
  202. // remove some annoying characters
  203. $content = str_replace("&nbsp;",' ',$content);
  204. $content = str_replace('"',"'",$content);
  205. $content = trim($content);
  206. // Removes special chars.
  207. // $content = \Grav\Plugin\Admin\Utils::slug($content);
  208. return $content;
  209. }
  210. }