HostRecord.php 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446
  1. <?php
  2. /**
  3. * League.Uri (https://uri.thephpleague.com)
  4. *
  5. * (c) Ignace Nyamagana Butera <nyamsprod@gmail.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. declare(strict_types=1);
  11. namespace League\Uri;
  12. use BackedEnum;
  13. use Exception;
  14. use JsonSerializable;
  15. use League\Uri\Contracts\UriComponentInterface;
  16. use League\Uri\Exceptions\SyntaxError;
  17. use League\Uri\Idna\Converter as IdnConverter;
  18. use Stringable;
  19. use Throwable;
  20. use function array_key_first;
  21. use function count;
  22. use function explode;
  23. use function filter_var;
  24. use function get_object_vars;
  25. use function in_array;
  26. use function inet_pton;
  27. use function is_object;
  28. use function preg_match;
  29. use function rawurldecode;
  30. use function strpos;
  31. use function strtolower;
  32. use function substr;
  33. use const FILTER_FLAG_IPV4;
  34. use const FILTER_FLAG_IPV6;
  35. use const FILTER_VALIDATE_IP;
  36. /**
  37. * @phpstan-type HostRecordSerializedShape array{0: array{host: ?string}, 1: array{}}
  38. */
  39. final class HostRecord implements JsonSerializable
  40. {
  41. /**
  42. * Maximum number of host cached.
  43. *
  44. * @var int
  45. */
  46. private const MAXIMUM_HOST_CACHED = 100;
  47. private const REGEXP_NON_ASCII_PATTERN = '/[^\x20-\x7f]/';
  48. /**
  49. * @see https://tools.ietf.org/html/rfc3986#section-3.2.2
  50. *
  51. * invalid characters in host regular expression
  52. */
  53. private const REGEXP_INVALID_HOST_CHARS = '/
  54. [:\/?#\[\]@ ] # gen-delims characters as well as the space character
  55. /ix';
  56. /**
  57. * General registered name regular expression.
  58. *
  59. * @see https://tools.ietf.org/html/rfc3986#section-3.2.2
  60. * @see https://regex101.com/r/fptU8V/1
  61. */
  62. private const REGEXP_REGISTERED_NAME = '/
  63. (?(DEFINE)
  64. (?<unreserved>[a-z0-9_~\-]) # . is missing as it is used to separate labels
  65. (?<sub_delims>[!$&\'()*+,;=])
  66. (?<encoded>%[A-F0-9]{2})
  67. (?<reg_name>(?:(?&unreserved)|(?&sub_delims)|(?&encoded))*)
  68. )
  69. ^(?:(?&reg_name)\.)*(?&reg_name)\.?$
  70. /ix';
  71. /**
  72. * Domain name regular expression.
  73. *
  74. * Everything but the domain name length is validated
  75. *
  76. * @see https://tools.ietf.org/html/rfc1034#section-3.5
  77. * @see https://tools.ietf.org/html/rfc1123#section-2.1
  78. * @see https://regex101.com/r/71j6rt/1
  79. */
  80. private const REGEXP_DOMAIN_NAME = '/
  81. (?(DEFINE)
  82. (?<let_dig> [a-z0-9]) # alpha digit
  83. (?<let_dig_hyp> [a-z0-9-]) # alpha digit and hyphen
  84. (?<ldh_str> (?&let_dig_hyp){0,61}(?&let_dig)) # domain label end
  85. (?<label> (?&let_dig)((?&ldh_str))?) # domain label
  86. (?<domain> (?&label)(\.(?&label)){0,126}\.?) # domain name
  87. )
  88. ^(?&domain)$
  89. /ix';
  90. /**
  91. * @see https://tools.ietf.org/html/rfc3986#section-3.2.2
  92. *
  93. * IPvFuture regular expression
  94. */
  95. private const REGEXP_IP_FUTURE = '/^
  96. v(?<version>[A-F\d])+\.
  97. (?:
  98. (?<unreserved>[a-z\d_~\-\.])|
  99. (?<sub_delims>[!$&\'()*+,;=:]) # also include the : character
  100. )+
  101. $/ix';
  102. private const REGEXP_GEN_DELIMS = '/[:\/?#\[\]@ ]/';
  103. private const ADDRESS_BLOCK = "\xfe\x80";
  104. private ?bool $isDomainName = null;
  105. private ?bool $hasZoneIdentifier = null;
  106. private bool $asciiIsLoaded = false;
  107. private ?string $hostAsAscii = null;
  108. private bool $unicodeIsLoaded = false;
  109. private ?string $hostAsUnicode = null;
  110. private bool $isIpVersionLoaded = false;
  111. private ?string $ipVersion = null;
  112. private bool $isIpValueLoaded = false;
  113. private ?string $ipValue = null;
  114. private function __construct(
  115. public readonly ?string $value,
  116. public readonly HostType $type,
  117. public readonly HostFormat $format
  118. ) {
  119. }
  120. public function hasZoneIdentifier(): bool
  121. {
  122. return $this->hasZoneIdentifier ??= HostType::Ipv6 === $this->type && str_contains((string) $this->value, '%');
  123. }
  124. public function toAscii(): ?string
  125. {
  126. if (!$this->asciiIsLoaded) {
  127. $this->asciiIsLoaded = true;
  128. $this->hostAsAscii = (function (): ?string {
  129. if (HostType::RegisteredName !== $this->type || null === $this->value) {
  130. return $this->value;
  131. }
  132. $formattedHost = rawurldecode($this->value);
  133. if ($formattedHost === $this->value) {
  134. return $this->isDomainType() ? IdnConverter::toAscii($this->value)->domain() : strtolower($formattedHost);
  135. }
  136. return Encoder::normalizeHost($this->value);
  137. })();
  138. }
  139. return $this->hostAsAscii;
  140. }
  141. public function toUnicode(): ?string
  142. {
  143. if (!$this->unicodeIsLoaded) {
  144. $this->unicodeIsLoaded = true;
  145. $this->hostAsUnicode = $this->isDomainType() && null !== $this->value ? IdnConverter::toUnicode($this->value)->domain() : $this->value;
  146. }
  147. return $this->hostAsUnicode;
  148. }
  149. public function isDomainType(): bool
  150. {
  151. return $this->isDomainName ??= match (true) {
  152. HostType::RegisteredName !== $this->type, '' === $this->value => false,
  153. null === $this->value => true,
  154. default => is_object($result = IdnConverter::toAscii($this->value))
  155. && !$result->hasErrors()
  156. && self::isValidDomain($result->domain()),
  157. };
  158. }
  159. public function ipVersion(): ?string
  160. {
  161. if (!$this->isIpVersionLoaded) {
  162. $this->isIpVersionLoaded = true;
  163. $this->ipVersion = match (true) {
  164. HostType::Ipv4 === $this->type => '4',
  165. HostType::Ipv6 === $this->type => '6',
  166. 1 === preg_match(self::REGEXP_IP_FUTURE, substr((string) $this->value, 1, -1), $matches) => $matches['version'],
  167. default => null,
  168. };
  169. }
  170. return $this->ipVersion;
  171. }
  172. public function ipValue(): ?string
  173. {
  174. if (!$this->isIpValueLoaded) {
  175. $this->isIpValueLoaded = true;
  176. $this->ipValue = (function (): ?string {
  177. if (HostType::RegisteredName === $this->type) {
  178. return null;
  179. }
  180. if (HostType::Ipv4 === $this->type) {
  181. return $this->value;
  182. }
  183. $ip = substr((string) $this->value, 1, -1);
  184. if (HostType::Ipv6 !== $this->type) {
  185. return substr($ip, (int) strpos($ip, '.') + 1);
  186. }
  187. $pos = strpos($ip, '%');
  188. if (false === $pos) {
  189. return $ip;
  190. }
  191. return substr($ip, 0, $pos).'%'.rawurldecode(substr($ip, $pos + 3));
  192. })();
  193. }
  194. return $this->ipValue;
  195. }
  196. public static function isValid(BackedEnum|Stringable|string|null $host): bool
  197. {
  198. try {
  199. HostRecord::from($host);
  200. return true;
  201. } catch (Throwable) {
  202. return false;
  203. }
  204. }
  205. public static function isIpv4(Stringable|string|null $host): bool
  206. {
  207. try {
  208. return HostType::Ipv4 === HostRecord::from($host)->type;
  209. } catch (Throwable) {
  210. return false;
  211. }
  212. }
  213. public static function isIpv6(Stringable|string|null $host): bool
  214. {
  215. try {
  216. return HostType::Ipv6 === HostRecord::from($host)->type;
  217. } catch (Throwable) {
  218. return false;
  219. }
  220. }
  221. public static function isIpvFuture(Stringable|string|null $host): bool
  222. {
  223. try {
  224. return HostType::IpvFuture === HostRecord::from($host)->type;
  225. } catch (Throwable) {
  226. return false;
  227. }
  228. }
  229. public static function isIp(Stringable|string|null $host): bool
  230. {
  231. return !self::isRegisteredName($host);
  232. }
  233. public static function isRegisteredName(Stringable|string|null $host): bool
  234. {
  235. try {
  236. return HostType::RegisteredName === HostRecord::from($host)->type;
  237. } catch (Throwable) {
  238. return false;
  239. }
  240. }
  241. public static function isDomain(Stringable|string|null $host): bool
  242. {
  243. try {
  244. return HostRecord::from($host)->isDomainType();
  245. } catch (Throwable) {
  246. return false;
  247. }
  248. }
  249. /**
  250. * @throws SyntaxError
  251. */
  252. public static function from(BackedEnum|Stringable|string|null $host): self
  253. {
  254. if ($host instanceof BackedEnum) {
  255. $host = $host->value;
  256. }
  257. if ($host instanceof UriComponentInterface) {
  258. $host = $host->value();
  259. }
  260. if (null === $host) {
  261. return new self(
  262. value: null,
  263. type: HostType::RegisteredName,
  264. format: HostFormat::Ascii,
  265. );
  266. }
  267. $host = (string) $host;
  268. if ('' === $host) {
  269. return new self(
  270. value: '',
  271. type: HostType::RegisteredName,
  272. format: HostFormat::Ascii,
  273. );
  274. }
  275. static $inMemoryCache = [];
  276. if (isset($inMemoryCache[$host])) {
  277. return $inMemoryCache[$host];
  278. }
  279. if (self::MAXIMUM_HOST_CACHED < count($inMemoryCache)) {
  280. unset($inMemoryCache[array_key_first($inMemoryCache)]);
  281. }
  282. if ($host === filter_var($host, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4)) {
  283. return $inMemoryCache[$host] = new self(
  284. value: $host,
  285. type: HostType::Ipv4,
  286. format: HostFormat::Ascii,
  287. );
  288. }
  289. if (str_starts_with($host, '[')) {
  290. str_ends_with($host, ']') || throw new SyntaxError('The host '.$host.' is not a valid IPv6 host.');
  291. $ipHost = substr($host, 1, -1);
  292. if (1 === preg_match(self::REGEXP_IP_FUTURE, $ipHost, $matches)) {
  293. return !in_array($matches['version'], ['4', '6'], true) ? ($inMemoryCache[$host] = new self(
  294. value: $host,
  295. type: HostType::IpvFuture,
  296. format: HostFormat::Ascii,
  297. )) : throw new SyntaxError('The host '.$host.' is not a valid IPvFuture host.');
  298. }
  299. if (self::isValidIpv6Hostname($ipHost)) {
  300. return $inMemoryCache[$host] = new self(
  301. value: $host,
  302. type: HostType::Ipv6,
  303. format: HostFormat::Ascii,
  304. );
  305. }
  306. throw new SyntaxError('The host '.$host.' is not a valid IPv6 host.');
  307. }
  308. $domainName = rawurldecode($host);
  309. $format = HostFormat::Unicode;
  310. if (1 !== preg_match(self::REGEXP_NON_ASCII_PATTERN, $domainName)) {
  311. $domainName = strtolower($domainName);
  312. $format = HostFormat::Ascii;
  313. }
  314. if (1 === preg_match(self::REGEXP_REGISTERED_NAME, $domainName)) {
  315. return $inMemoryCache[$host] = new self(
  316. value: $host,
  317. type: HostType::RegisteredName,
  318. format: $format,
  319. );
  320. }
  321. (HostFormat::Ascii !== $format && 1 !== preg_match(self::REGEXP_INVALID_HOST_CHARS, $domainName)) || throw new SyntaxError('`'.$host.'` is an invalid domain name : the host contains invalid characters.');
  322. IdnConverter::toAsciiOrFail($domainName);
  323. return $inMemoryCache[$host] = new self(
  324. value: $host,
  325. type: HostType::RegisteredName,
  326. format: $format,
  327. );
  328. }
  329. /**
  330. * Tells whether the registered name is a valid domain name according to RFC1123.
  331. *
  332. * @see http://man7.org/linux/man-pages/man7/hostname.7.html
  333. * @see https://tools.ietf.org/html/rfc1123#section-2.1
  334. */
  335. private static function isValidDomain(string $hostname): bool
  336. {
  337. $domainMaxLength = str_ends_with($hostname, '.') ? 254 : 253;
  338. return !isset($hostname[$domainMaxLength])
  339. && 1 === preg_match(self::REGEXP_DOMAIN_NAME, $hostname);
  340. }
  341. /**
  342. * Validates an Ipv6 as Host.
  343. *
  344. * @see http://tools.ietf.org/html/rfc6874#section-2
  345. * @see http://tools.ietf.org/html/rfc6874#section-4
  346. */
  347. private static function isValidIpv6Hostname(string $host): bool
  348. {
  349. [$ipv6, $scope] = explode('%', $host, 2) + [1 => null];
  350. if (null === $scope) {
  351. return (bool) filter_var($ipv6, FILTER_VALIDATE_IP, FILTER_FLAG_IPV6);
  352. }
  353. $scope = rawurldecode('%'.$scope);
  354. return 1 !== preg_match(self::REGEXP_NON_ASCII_PATTERN, $scope)
  355. && 1 !== preg_match(self::REGEXP_GEN_DELIMS, $scope)
  356. && false !== filter_var($ipv6, FILTER_VALIDATE_IP, FILTER_FLAG_IPV6)
  357. && str_starts_with((string)inet_pton((string)$ipv6), self::ADDRESS_BLOCK);
  358. }
  359. public function jsonSerialize(): ?string
  360. {
  361. return $this->value;
  362. }
  363. /**
  364. * @return HostRecordSerializedShape
  365. */
  366. public function __serialize(): array
  367. {
  368. return [['host' => $this->value], []];
  369. }
  370. /**
  371. * @param HostRecordSerializedShape $data
  372. *
  373. * @throws Exception|SyntaxError
  374. */
  375. public function __unserialize(array $data): void
  376. {
  377. [$properties] = $data;
  378. $record = self::from($properties['host'] ?? throw new Exception('The `host` property is missing from the serialized object.'));
  379. //if the Host computed value are already cache this avoid recomputing them
  380. foreach (get_object_vars($record) as $prop => $value) {
  381. /* @phpstan-ignore-next-line */
  382. $this->{$prop} = $value;
  383. }
  384. }
  385. }