EmailLexer.php 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279
  1. <?php
  2. namespace Egulias\EmailValidator;
  3. use Doctrine\Common\Lexer\AbstractLexer;
  4. class EmailLexer extends AbstractLexer
  5. {
  6. //ASCII values
  7. const C_DEL = 127;
  8. const C_NUL = 0;
  9. const S_AT = 64;
  10. const S_BACKSLASH = 92;
  11. const S_DOT = 46;
  12. const S_DQUOTE = 34;
  13. const S_OPENPARENTHESIS = 49;
  14. const S_CLOSEPARENTHESIS = 261;
  15. const S_OPENBRACKET = 262;
  16. const S_CLOSEBRACKET = 263;
  17. const S_HYPHEN = 264;
  18. const S_COLON = 265;
  19. const S_DOUBLECOLON = 266;
  20. const S_SP = 267;
  21. const S_HTAB = 268;
  22. const S_CR = 269;
  23. const S_LF = 270;
  24. const S_IPV6TAG = 271;
  25. const S_LOWERTHAN = 272;
  26. const S_GREATERTHAN = 273;
  27. const S_COMMA = 274;
  28. const S_SEMICOLON = 275;
  29. const S_OPENQBRACKET = 276;
  30. const S_CLOSEQBRACKET = 277;
  31. const S_SLASH = 278;
  32. const S_EMPTY = null;
  33. const GENERIC = 300;
  34. const CRLF = 301;
  35. const INVALID = 302;
  36. const ASCII_INVALID_FROM = 127;
  37. const ASCII_INVALID_TO = 199;
  38. /**
  39. * US-ASCII visible characters not valid for atext (@link http://tools.ietf.org/html/rfc5322#section-3.2.3)
  40. *
  41. * @var array
  42. */
  43. protected $charValue = array(
  44. '(' => self::S_OPENPARENTHESIS,
  45. ')' => self::S_CLOSEPARENTHESIS,
  46. '<' => self::S_LOWERTHAN,
  47. '>' => self::S_GREATERTHAN,
  48. '[' => self::S_OPENBRACKET,
  49. ']' => self::S_CLOSEBRACKET,
  50. ':' => self::S_COLON,
  51. ';' => self::S_SEMICOLON,
  52. '@' => self::S_AT,
  53. '\\' => self::S_BACKSLASH,
  54. '/' => self::S_SLASH,
  55. ',' => self::S_COMMA,
  56. '.' => self::S_DOT,
  57. '"' => self::S_DQUOTE,
  58. '-' => self::S_HYPHEN,
  59. '::' => self::S_DOUBLECOLON,
  60. ' ' => self::S_SP,
  61. "\t" => self::S_HTAB,
  62. "\r" => self::S_CR,
  63. "\n" => self::S_LF,
  64. "\r\n" => self::CRLF,
  65. 'IPv6' => self::S_IPV6TAG,
  66. '{' => self::S_OPENQBRACKET,
  67. '}' => self::S_CLOSEQBRACKET,
  68. '' => self::S_EMPTY,
  69. '\0' => self::C_NUL,
  70. );
  71. /**
  72. * @var bool
  73. */
  74. protected $hasInvalidTokens = false;
  75. /**
  76. * @var array
  77. *
  78. * @psalm-var array{value:string, type:null|int, position:int}|array<empty, empty>
  79. */
  80. protected $previous = [];
  81. /**
  82. * The last matched/seen token.
  83. *
  84. * @var array
  85. *
  86. * @psalm-var array{value:string, type:null|int, position:int}
  87. */
  88. public $token;
  89. /**
  90. * The next token in the input.
  91. *
  92. * @var array|null
  93. */
  94. public $lookahead;
  95. /**
  96. * @psalm-var array{value:'', type:null, position:0}
  97. */
  98. private static $nullToken = [
  99. 'value' => '',
  100. 'type' => null,
  101. 'position' => 0,
  102. ];
  103. public function __construct()
  104. {
  105. $this->previous = $this->token = self::$nullToken;
  106. $this->lookahead = null;
  107. }
  108. /**
  109. * @return void
  110. */
  111. public function reset()
  112. {
  113. $this->hasInvalidTokens = false;
  114. parent::reset();
  115. $this->previous = $this->token = self::$nullToken;
  116. }
  117. /**
  118. * @return bool
  119. */
  120. public function hasInvalidTokens()
  121. {
  122. return $this->hasInvalidTokens;
  123. }
  124. /**
  125. * @param int $type
  126. * @throws \UnexpectedValueException
  127. * @return boolean
  128. *
  129. * @psalm-suppress InvalidScalarArgument
  130. */
  131. public function find($type)
  132. {
  133. $search = clone $this;
  134. $search->skipUntil($type);
  135. if (!$search->lookahead) {
  136. throw new \UnexpectedValueException($type . ' not found');
  137. }
  138. return true;
  139. }
  140. /**
  141. * getPrevious
  142. *
  143. * @return array
  144. */
  145. public function getPrevious()
  146. {
  147. return $this->previous;
  148. }
  149. /**
  150. * moveNext
  151. *
  152. * @return boolean
  153. */
  154. public function moveNext()
  155. {
  156. $this->previous = $this->token;
  157. $hasNext = parent::moveNext();
  158. $this->token = $this->token ?: self::$nullToken;
  159. return $hasNext;
  160. }
  161. /**
  162. * Lexical catchable patterns.
  163. *
  164. * @return string[]
  165. */
  166. protected function getCatchablePatterns()
  167. {
  168. return array(
  169. '[a-zA-Z_]+[46]?', //ASCII and domain literal
  170. '[^\x00-\x7F]', //UTF-8
  171. '[0-9]+',
  172. '\r\n',
  173. '::',
  174. '\s+?',
  175. '.',
  176. );
  177. }
  178. /**
  179. * Lexical non-catchable patterns.
  180. *
  181. * @return string[]
  182. */
  183. protected function getNonCatchablePatterns()
  184. {
  185. return array('[\xA0-\xff]+');
  186. }
  187. /**
  188. * Retrieve token type. Also processes the token value if necessary.
  189. *
  190. * @param string $value
  191. * @throws \InvalidArgumentException
  192. * @return integer
  193. */
  194. protected function getType(&$value)
  195. {
  196. if ($this->isNullType($value)) {
  197. return self::C_NUL;
  198. }
  199. if ($this->isValid($value)) {
  200. return $this->charValue[$value];
  201. }
  202. if ($this->isUTF8Invalid($value)) {
  203. $this->hasInvalidTokens = true;
  204. return self::INVALID;
  205. }
  206. return self::GENERIC;
  207. }
  208. /**
  209. * @param string $value
  210. *
  211. * @return bool
  212. */
  213. protected function isValid($value)
  214. {
  215. if (isset($this->charValue[$value])) {
  216. return true;
  217. }
  218. return false;
  219. }
  220. /**
  221. * @param string $value
  222. * @return bool
  223. */
  224. protected function isNullType($value)
  225. {
  226. if ($value === "\0") {
  227. return true;
  228. }
  229. return false;
  230. }
  231. /**
  232. * @param string $value
  233. * @return bool
  234. */
  235. protected function isUTF8Invalid($value)
  236. {
  237. if (preg_match('/\p{Cc}+/u', $value)) {
  238. return true;
  239. }
  240. return false;
  241. }
  242. /**
  243. * @return string
  244. */
  245. protected function getModifiers()
  246. {
  247. return 'iu';
  248. }
  249. }