Content.php 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254
  1. <?php
  2. namespace PHPHtmlParser;
  3. /**
  4. * Class Content
  5. *
  6. * @package PHPHtmlParser
  7. */
  8. class Content
  9. {
  10. /**
  11. * The content string.
  12. *
  13. * @var string
  14. */
  15. protected $content;
  16. /**
  17. * The size of the content.
  18. *
  19. * @var integer
  20. */
  21. protected $size;
  22. /**
  23. * The current position we are in the content.
  24. *
  25. * @var integer
  26. */
  27. protected $pos;
  28. /**
  29. * The following 4 strings are tags that are important to us.
  30. *
  31. * @var string
  32. */
  33. protected $blank = " \t\r\n";
  34. protected $equal = ' =/>';
  35. protected $slash = " />\r\n\t";
  36. protected $attr = ' >';
  37. /**
  38. * Content constructor.
  39. *
  40. * @param string $content
  41. */
  42. public function __construct(string $content = '')
  43. {
  44. $this->content = $content;
  45. $this->size = strlen($content);
  46. $this->pos = 0;
  47. }
  48. /**
  49. * Returns the current position of the content.
  50. *
  51. * @return int
  52. */
  53. public function getPosition(): int
  54. {
  55. return $this->pos;
  56. }
  57. /**
  58. * Gets the current character we are at.
  59. *
  60. * @param int $char
  61. * @return string
  62. */
  63. public function char(int $char = null): string
  64. {
  65. $pos = $this->pos;
  66. if ( ! is_null($char)) {
  67. $pos = $char;
  68. }
  69. if ( ! isset($this->content[$pos])) {
  70. return '';
  71. }
  72. return $this->content[$pos];
  73. }
  74. /**
  75. * Moves the current position forward.
  76. *
  77. * @param int $count
  78. * @return Content
  79. * @chainable
  80. */
  81. public function fastForward(int $count): Content
  82. {
  83. $this->pos += $count;
  84. return $this;
  85. }
  86. /**
  87. * Moves the current position backward.
  88. *
  89. * @param int $count
  90. * @return Content
  91. * @chainable
  92. */
  93. public function rewind(int $count): Content
  94. {
  95. $this->pos -= $count;
  96. if ($this->pos < 0) {
  97. $this->pos = 0;
  98. }
  99. return $this;
  100. }
  101. /**
  102. * Copy the content until we find the given string.
  103. *
  104. * @param string $string
  105. * @param bool $char
  106. * @param bool $escape
  107. * @return string
  108. */
  109. public function copyUntil(string $string, bool $char = false, bool $escape = false): string
  110. {
  111. if ($this->pos >= $this->size) {
  112. // nothing left
  113. return '';
  114. }
  115. if ($escape) {
  116. $position = $this->pos;
  117. $found = false;
  118. while ( ! $found) {
  119. $position = strpos($this->content, $string, $position);
  120. if ($position === false) {
  121. // reached the end
  122. $found = true;
  123. continue;
  124. }
  125. if ($this->char($position - 1) == '\\') {
  126. // this character is escaped
  127. ++$position;
  128. continue;
  129. }
  130. $found = true;
  131. }
  132. } elseif ($char) {
  133. $position = strcspn($this->content, $string, $this->pos);
  134. $position += $this->pos;
  135. } else {
  136. $position = strpos($this->content, $string, $this->pos);
  137. }
  138. if ($position === false) {
  139. // could not find character, just return the remaining of the content
  140. $return = substr($this->content, $this->pos, $this->size - $this->pos);
  141. $this->pos = $this->size;
  142. return $return;
  143. }
  144. if ($position == $this->pos) {
  145. // we are at the right place
  146. return '';
  147. }
  148. $return = substr($this->content, $this->pos, $position - $this->pos);
  149. // set the new position
  150. $this->pos = $position;
  151. return $return;
  152. }
  153. /**
  154. * Copies the content until the string is found and return it
  155. * unless the 'unless' is found in the substring.
  156. *
  157. * @param string $string
  158. * @param string $unless
  159. * @return string
  160. */
  161. public function copyUntilUnless(string $string, string $unless)
  162. {
  163. $lastPos = $this->pos;
  164. $this->fastForward(1);
  165. $foundString = $this->copyUntil($string, true, true);
  166. $position = strcspn($foundString, $unless);
  167. if ($position == strlen($foundString)) {
  168. return $string.$foundString;
  169. }
  170. // rewind changes and return nothing
  171. $this->pos = $lastPos;
  172. return '';
  173. }
  174. /**
  175. * Copies the content until it reaches the token string.,
  176. *
  177. * @param string $token
  178. * @param bool $char
  179. * @param bool $escape
  180. * @return string
  181. * @uses $this->copyUntil()
  182. */
  183. public function copyByToken(string $token, bool $char = false, bool $escape = false)
  184. {
  185. $string = $this->$token;
  186. return $this->copyUntil($string, $char, $escape);
  187. }
  188. /**
  189. * Skip a given set of characters.
  190. *
  191. * @param string $string
  192. * @param bool $copy
  193. * @return Content|string
  194. */
  195. public function skip(string $string, bool $copy = false)
  196. {
  197. $len = strspn($this->content, $string, $this->pos);
  198. // make it chainable if they don't want a copy
  199. $return = $this;
  200. if ($copy) {
  201. $return = substr($this->content, $this->pos, $len);
  202. }
  203. // update the position
  204. $this->pos += $len;
  205. return $return;
  206. }
  207. /**
  208. * Skip a given token of pre-defined characters.
  209. *
  210. * @param string $token
  211. * @param bool $copy
  212. * @return Content|string
  213. * @uses $this->skip()
  214. */
  215. public function skipByToken(string $token, bool $copy = false)
  216. {
  217. $string = $this->$token;
  218. return $this->skip($string, $copy);
  219. }
  220. }