Converter.php 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231
  1. <?php
  2. /**
  3. * League.Uri (https://uri.thephpleague.com)
  4. *
  5. * (c) Ignace Nyamagana Butera <nyamsprod@gmail.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. declare(strict_types=1);
  11. namespace League\Uri\Idna;
  12. use BackedEnum;
  13. use League\Uri\Exceptions\ConversionFailed;
  14. use League\Uri\Exceptions\SyntaxError;
  15. use League\Uri\FeatureDetection;
  16. use Stringable;
  17. use function idn_to_ascii;
  18. use function idn_to_utf8;
  19. use function rawurldecode;
  20. use function strtolower;
  21. use const INTL_IDNA_VARIANT_UTS46;
  22. /**
  23. * @see https://unicode-org.github.io/icu-docs/apidoc/released/icu4c/uidna_8h.html
  24. */
  25. final class Converter
  26. {
  27. private const REGEXP_IDNA_PATTERN = '/[^\x20-\x7f]/';
  28. private const MAX_DOMAIN_LENGTH = 253;
  29. private const MAX_LABEL_LENGTH = 63;
  30. /**
  31. * General registered name regular expression.
  32. *
  33. * @see https://tools.ietf.org/html/rfc3986#section-3.2.2
  34. * @see https://regex101.com/r/fptU8V/1
  35. */
  36. private const REGEXP_REGISTERED_NAME = '/
  37. (?(DEFINE)
  38. (?<unreserved>[a-z0-9_~\-]) # . is missing as it is used to separate labels
  39. (?<sub_delims>[!$&\'()*+,;=])
  40. (?<encoded>%[A-F0-9]{2})
  41. (?<reg_name>(?:(?&unreserved)|(?&sub_delims)|(?&encoded))*)
  42. )
  43. ^(?:(?&reg_name)\.)*(?&reg_name)\.?$
  44. /ix';
  45. /**
  46. * Converts the input to its IDNA ASCII form or throw on failure.
  47. *
  48. * @see Converter::toAscii()
  49. *
  50. * @throws SyntaxError if the string cannot be converted to UNICODE using IDN UTS46 algorithm
  51. * @throws ConversionFailed if the conversion returns error
  52. */
  53. public static function toAsciiOrFail(BackedEnum|Stringable|string $domain, Option|int|null $options = null): string
  54. {
  55. $result = self::toAscii($domain, $options);
  56. return match (true) {
  57. $result->hasErrors() => throw ConversionFailed::dueToIdnError($domain, $result),
  58. default => $result->domain(),
  59. };
  60. }
  61. /**
  62. * Converts the input to its IDNA ASCII form.
  63. *
  64. * This method returns the string converted to IDN ASCII form
  65. *
  66. * @throws SyntaxError if the string cannot be converted to ASCII using IDN UTS46 algorithm
  67. */
  68. public static function toAscii(BackedEnum|Stringable|string $domain, Option|int|null $options = null): Result
  69. {
  70. if ($domain instanceof BackedEnum) {
  71. $domain = $domain->value;
  72. }
  73. $domain = rawurldecode((string) $domain);
  74. if (1 === preg_match(self::REGEXP_IDNA_PATTERN, $domain)) {
  75. FeatureDetection::supportsIdn();
  76. $flags = match (true) {
  77. null === $options => Option::forIDNA2008Ascii(),
  78. $options instanceof Option => $options,
  79. default => Option::new($options),
  80. };
  81. idn_to_ascii($domain, $flags->toBytes(), INTL_IDNA_VARIANT_UTS46, $idnaInfo);
  82. if ([] === $idnaInfo) {
  83. return Result::fromIntl([
  84. 'result' => strtolower($domain),
  85. 'isTransitionalDifferent' => false,
  86. 'errors' => self::validateDomainAndLabelLength($domain),
  87. ]);
  88. }
  89. return Result::fromIntl($idnaInfo);
  90. }
  91. $error = Error::NONE->value;
  92. if (1 !== preg_match(self::REGEXP_REGISTERED_NAME, $domain)) {
  93. $error |= Error::DISALLOWED->value;
  94. }
  95. return Result::fromIntl([
  96. 'result' => strtolower($domain),
  97. 'isTransitionalDifferent' => false,
  98. 'errors' => self::validateDomainAndLabelLength($domain) | $error,
  99. ]);
  100. }
  101. /**
  102. * Converts the input to its IDNA UNICODE form or throw on failure.
  103. *
  104. * @see Converter::toUnicode()
  105. *
  106. * @throws ConversionFailed if the conversion returns error
  107. */
  108. public static function toUnicodeOrFail(BackedEnum|Stringable|string $domain, Option|int|null $options = null): string
  109. {
  110. $result = self::toUnicode($domain, $options);
  111. return match (true) {
  112. $result->hasErrors() => throw ConversionFailed::dueToIdnError($domain, $result),
  113. default => $result->domain(),
  114. };
  115. }
  116. /**
  117. * Converts the input to its IDNA UNICODE form.
  118. *
  119. * This method returns the string converted to IDN UNICODE form
  120. *
  121. * @throws SyntaxError if the string cannot be converted to UNICODE using IDN UTS46 algorithm
  122. */
  123. public static function toUnicode(BackedEnum|Stringable|string $domain, Option|int|null $options = null): Result
  124. {
  125. if ($domain instanceof BackedEnum) {
  126. $domain = $domain->value;
  127. }
  128. $domain = rawurldecode((string) $domain);
  129. if (false === stripos($domain, 'xn--')) {
  130. return Result::fromIntl(['result' => strtolower($domain), 'isTransitionalDifferent' => false, 'errors' => Error::NONE->value]);
  131. }
  132. FeatureDetection::supportsIdn();
  133. $flags = match (true) {
  134. null === $options => Option::forIDNA2008Unicode(),
  135. $options instanceof Option => $options,
  136. default => Option::new($options),
  137. };
  138. idn_to_utf8($domain, $flags->toBytes(), INTL_IDNA_VARIANT_UTS46, $idnaInfo);
  139. if ([] === $idnaInfo) {
  140. return Result::fromIntl(['result' => strtolower($domain), 'isTransitionalDifferent' => false, 'errors' => Error::NONE->value]);
  141. }
  142. return Result::fromIntl($idnaInfo);
  143. }
  144. /**
  145. * Tells whether the submitted host is a valid IDN regardless of its format.
  146. *
  147. * Returns false if the host is invalid or if its conversion yields the same result
  148. */
  149. public static function isIdn(BackedEnum|Stringable|string|null $domain): bool
  150. {
  151. if ($domain instanceof BackedEnum) {
  152. $domain = $domain->value;
  153. }
  154. $domain = strtolower(rawurldecode((string) $domain));
  155. $result = match (1) {
  156. preg_match(self::REGEXP_IDNA_PATTERN, $domain) => self::toAscii($domain),
  157. default => self::toUnicode($domain),
  158. };
  159. return match (true) {
  160. $result->hasErrors() => false,
  161. default => $result->domain() !== $domain,
  162. };
  163. }
  164. /**
  165. * Adapted from https://github.com/TRowbotham/idna.
  166. *
  167. * @see https://github.com/TRowbotham/idna/blob/master/src/Idna.php#L236
  168. */
  169. private static function validateDomainAndLabelLength(string $domain): int
  170. {
  171. $error = Error::NONE->value;
  172. $labels = explode('.', $domain);
  173. $maxDomainSize = self::MAX_DOMAIN_LENGTH;
  174. $length = count($labels);
  175. // If the last label is empty, and it is not the first label, then it is the root label.
  176. // Increase the max size by 1, making it 254, to account for the root label's "."
  177. // delimiter. This also means we don't need to check the last label's length for being too
  178. // long.
  179. if ($length > 1 && '' === $labels[$length - 1]) {
  180. ++$maxDomainSize;
  181. array_pop($labels);
  182. }
  183. if (strlen($domain) > $maxDomainSize) {
  184. $error |= Error::DOMAIN_NAME_TOO_LONG->value;
  185. }
  186. foreach ($labels as $label) {
  187. if (strlen($label) > self::MAX_LABEL_LENGTH) {
  188. $error |= Error::LABEL_TOO_LONG->value;
  189. break;
  190. }
  191. }
  192. return $error;
  193. }
  194. }