UriString.php 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755
  1. <?php
  2. /**
  3. * League.Uri (https://uri.thephpleague.com)
  4. *
  5. * (c) Ignace Nyamagana Butera <nyamsprod@gmail.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. declare(strict_types=1);
  11. namespace League\Uri;
  12. use BackedEnum;
  13. use Deprecated;
  14. use League\Uri\Exceptions\SyntaxError;
  15. use League\Uri\Idna\Converter as IdnaConverter;
  16. use Stringable;
  17. use Throwable;
  18. use function array_map;
  19. use function array_merge;
  20. use function array_pop;
  21. use function array_reduce;
  22. use function defined;
  23. use function explode;
  24. use function filter_var;
  25. use function function_exists;
  26. use function implode;
  27. use function preg_match;
  28. use function sprintf;
  29. use function str_replace;
  30. use function strpos;
  31. use function strtolower;
  32. use function substr;
  33. use const FILTER_FLAG_IPV4;
  34. use const FILTER_VALIDATE_IP;
  35. /**
  36. * A class to parse a URI string according to RFC3986.
  37. *
  38. * @link https://tools.ietf.org/html/rfc3986
  39. * @package League\Uri
  40. * @author Ignace Nyamagana Butera <nyamsprod@gmail.com>
  41. * @since 6.0.0
  42. *
  43. * @phpstan-type AuthorityMap array{user: ?string, pass: ?string, host: ?string, port: ?int}
  44. * @phpstan-type ComponentMap array{scheme: ?string, user: ?string, pass: ?string, host: ?string, port: ?int, path: string, query: ?string, fragment: ?string}
  45. * @phpstan-type InputComponentMap array{scheme? : ?string, user? : ?string, pass? : ?string, host? : ?string, port? : ?int, path? : ?string, query? : ?string, fragment? : ?string}
  46. */
  47. final class UriString
  48. {
  49. /**
  50. * Default URI component values.
  51. *
  52. * @var ComponentMap
  53. */
  54. private const URI_COMPONENTS = [
  55. 'scheme' => null, 'user' => null, 'pass' => null, 'host' => null,
  56. 'port' => null, 'path' => '', 'query' => null, 'fragment' => null,
  57. ];
  58. /**
  59. * Simple URI which do not need any parsing.
  60. *
  61. * @var array<string, array<string>>
  62. */
  63. private const URI_SHORTCUTS = [
  64. '' => ['path' => ''],
  65. '#' => ['fragment' => ''],
  66. '?' => ['query' => ''],
  67. '?#' => ['query' => '', 'fragment' => ''],
  68. '/' => ['path' => '/'],
  69. '//' => ['host' => ''],
  70. '///' => ['host' => '', 'path' => '/'],
  71. ];
  72. /**
  73. * Range of invalid characters in URI 3986 string.
  74. *
  75. * @var string
  76. */
  77. private const REGEXP_VALID_URI_RFC3986_CHARS = '/^(?:[A-Za-z0-9\-._~:\/?#[\]@!$&\'()*+,;=%]|%[0-9A-Fa-f]{2})*$/';
  78. /**
  79. * Range of invalid characters in URI 3987 string.
  80. *
  81. * @var string
  82. */
  83. private const REGEXP_INVALID_URI_RFC3987_CHARS = '/[\x00-\x1f\x7f\s]/';
  84. /**
  85. * RFC3986 regular expression URI splitter.
  86. *
  87. * @link https://tools.ietf.org/html/rfc3986#appendix-B
  88. * @var string
  89. */
  90. private const REGEXP_URI_PARTS = ',^
  91. (?<scheme>(?<scontent>[^:/?\#]+):)? # URI scheme component
  92. (?<authority>//(?<acontent>[^/?\#]*))? # URI authority part
  93. (?<path>[^?\#]*) # URI path component
  94. (?<query>\?(?<qcontent>[^\#]*))? # URI query component
  95. (?<fragment>\#(?<fcontent>.*))? # URI fragment component
  96. ,x';
  97. /**
  98. * URI scheme regular expression.
  99. *
  100. * @link https://tools.ietf.org/html/rfc3986#section-3.1
  101. * @var string
  102. */
  103. private const REGEXP_URI_SCHEME = '/^([a-z][a-z\d+.-]*)?$/i';
  104. /**
  105. * Invalid path for URI without scheme and authority regular expression.
  106. *
  107. * @link https://tools.ietf.org/html/rfc3986#section-3.3
  108. * @var string
  109. */
  110. private const REGEXP_INVALID_PATH = ',^(([^/]*):)(.*)?/,';
  111. /**
  112. * Host and Port splitter regular expression.
  113. *
  114. * @var string
  115. */
  116. private const REGEXP_HOST_PORT = ',^(?<host>\[.*\]|[^:]*)(:(?<port>.*))?$,';
  117. /** @var array<string,int> */
  118. private const DOT_SEGMENTS = ['.' => 1, '..' => 1];
  119. /**
  120. * Generate an IRI string representation (RFC3987) from its parsed representation
  121. * returned by League\UriString::parse() or PHP's parse_url.
  122. *
  123. * If you supply your own array, you are responsible for providing
  124. * valid components without their URI delimiters.
  125. *
  126. * @link https://tools.ietf.org/html/rfc3986#section-5.3
  127. * @link https://tools.ietf.org/html/rfc3986#section-7.5
  128. */
  129. public static function toIriString(BackedEnum|Stringable|string $uri): string
  130. {
  131. $components = UriString::parse($uri);
  132. $port = null;
  133. if (isset($components['port'])) {
  134. $port = (int) $components['port'];
  135. unset($components['port']);
  136. }
  137. if (null !== $components['host']) {
  138. $components['host'] = IdnaConverter::toUnicode($components['host'])->domain();
  139. }
  140. $components['path'] = Encoder::decodePath($components['path']);
  141. $components['user'] = Encoder::decodeNecessary($components['user']);
  142. $components['pass'] = Encoder::decodeNecessary($components['pass']);
  143. $components['query'] = Encoder::decodeQuery($components['query']);
  144. $components['fragment'] = Encoder::decodeFragment($components['fragment']);
  145. return self::build([
  146. ...array_map(fn (?string $value) => match (true) {
  147. null === $value,
  148. !str_contains($value, '%20') => $value,
  149. default => str_replace('%20', ' ', $value),
  150. }, $components),
  151. ...['port' => $port],
  152. ]);
  153. }
  154. /**
  155. * Generate a URI string representation from its parsed representation
  156. * returned by League\UriString::parse() or PHP's parse_url.
  157. *
  158. * If you supply your own array, you are responsible for providing
  159. * valid components without their URI delimiters.
  160. *
  161. * @link https://tools.ietf.org/html/rfc3986#section-5.3
  162. * @link https://tools.ietf.org/html/rfc3986#section-7.5
  163. *
  164. * @param InputComponentMap $components
  165. */
  166. public static function build(array $components): string
  167. {
  168. return self::buildUri(
  169. $components['scheme'] ?? null,
  170. self::buildAuthority($components),
  171. $components['path'] ?? null,
  172. $components['query'] ?? null,
  173. $components['fragment'] ?? null,
  174. );
  175. }
  176. /**
  177. * Generates a URI string representation based on RFC3986 algorithm.
  178. *
  179. * Valid URI component MUST be provided without their URI delimiters
  180. * but properly encoded.
  181. *
  182. * @link https://tools.ietf.org/html/rfc3986#section-5.3
  183. * @link https://tools.ietf.org/html/rfc3986#section-7.5§
  184. */
  185. public static function buildUri(
  186. ?string $scheme = null,
  187. ?string $authority = null,
  188. ?string $path = null,
  189. ?string $query = null,
  190. ?string $fragment = null,
  191. ): string {
  192. self::validateComponents($scheme, $authority, $path);
  193. $uri = '';
  194. if (null !== $scheme) {
  195. $uri .= $scheme.':';
  196. }
  197. if (null !== $authority) {
  198. $uri .= '//'.$authority;
  199. }
  200. $uri .= $path;
  201. if (null !== $query) {
  202. $uri .= '?'.$query;
  203. }
  204. if (null !== $fragment) {
  205. $uri .= '#'.$fragment;
  206. }
  207. return $uri;
  208. }
  209. /**
  210. * Generate a URI authority representation from its parsed representation.
  211. *
  212. * @param InputComponentMap $components
  213. */
  214. public static function buildAuthority(array $components): ?string
  215. {
  216. if (!isset($components['host'])) {
  217. (!isset($components['user']) && !isset($components['pass'])) || throw new SyntaxError('The user info component must not be set if the host is not defined.');
  218. !isset($components['port']) || throw new SyntaxError('The port component must not be set if the host is not defined.');
  219. return null;
  220. }
  221. $userInfo = $components['user'] ?? null;
  222. if (isset($components['pass'])) {
  223. $userInfo .= ':'.$components['pass'];
  224. }
  225. $authority = '';
  226. if (isset($userInfo)) {
  227. $authority .= $userInfo.'@';
  228. }
  229. $authority .= $components['host'];
  230. if (isset($components['port'])) {
  231. $authority .= ':'.$components['port'];
  232. }
  233. return $authority;
  234. }
  235. /**
  236. * Parses and normalizes the URI following RFC3986 destructive and non-destructive constraints.
  237. *
  238. * @throws SyntaxError if the URI is not parsable
  239. *
  240. * @return ComponentMap
  241. */
  242. public static function parseNormalized(Stringable|string $uri): array
  243. {
  244. $components = self::parse($uri);
  245. if (null !== $components['scheme']) {
  246. $components['scheme'] = strtolower($components['scheme']);
  247. }
  248. $components['host'] = self::normalizeHost($components['host']);
  249. $path = $components['path'];
  250. $authority = self::buildAuthority($components);
  251. //dot segment only happens when:
  252. // - the path is absolute
  253. // - the scheme and/or the authority are defined
  254. if ('/' === ($path[0] ?? '') || '' !== $components['scheme'].$authority) {
  255. $path = self::removeDotSegments($path);
  256. }
  257. // if there is an authority, the path must be absolute
  258. if ('' !== $path && '/' !== $path[0]) {
  259. if (null !== $authority) {
  260. $path = '/'.$path;
  261. }
  262. }
  263. $components['path'] = (string) Encoder::normalizePath($path);
  264. $components['query'] = Encoder::normalizeQuery($components['query']);
  265. $components['fragment'] = Encoder::normalizeFragment($components['fragment']);
  266. $components['user'] = Encoder::normalizeUser($components['user']);
  267. $components['pass'] = Encoder::normalizePassword($components['pass']);
  268. return $components;
  269. }
  270. /**
  271. * Parses and normalizes the URI following RFC3986 destructive and non-destructive constraints.
  272. *
  273. * @throws SyntaxError if the URI is not parsable
  274. */
  275. public static function normalize(Stringable|string $uri): string
  276. {
  277. return self::build(self::parseNormalized($uri));
  278. }
  279. /**
  280. * Parses and normalizes the URI following RFC3986 destructive and non-destructive constraints.
  281. *
  282. * @throws SyntaxError if the URI is not parsable
  283. */
  284. public static function normalizeAuthority(Stringable|string|null $authority): ?string
  285. {
  286. if (null === $authority) {
  287. return null;
  288. }
  289. $components = UriString::parseAuthority($authority);
  290. $components['host'] = self::normalizeHost($components['host'] ?? null);
  291. $components['user'] = Encoder::normalizeUser($components['user']);
  292. $components['pass'] = Encoder::normalizePassword($components['pass']);
  293. return (string) self::buildAuthority($components);
  294. }
  295. /**
  296. * Resolves a URI against a base URI using RFC3986 rules.
  297. *
  298. * This method MUST retain the state of the submitted URI instance, and return
  299. * a URI instance of the same type that contains the applied modifications.
  300. *
  301. * This method MUST be transparent when dealing with error and exceptions.
  302. * It MUST not alter or silence them apart from validating its own parameters.
  303. *
  304. * @see https://www.rfc-editor.org/rfc/rfc3986.html#section-5
  305. *
  306. * @throws SyntaxError if the BaseUri is not absolute or in absence of a BaseUri if the uri is not absolute
  307. */
  308. public static function resolve(BackedEnum|Stringable|string $uri, BackedEnum|Stringable|string|null $baseUri = null): string
  309. {
  310. if ($uri instanceof BackedEnum) {
  311. $uri = (string) $uri->value;
  312. }
  313. if ($baseUri instanceof BackedEnum) {
  314. $baseUri = (string) $baseUri->value;
  315. }
  316. $uri = (string) $uri;
  317. if ('' === $uri) {
  318. $uri = $baseUri ?? throw new SyntaxError('The uri can not be the empty string when there\'s no base URI.');
  319. }
  320. $uriComponents = self::parse($uri);
  321. $baseUriComponents = $uriComponents;
  322. if (null !== $baseUri && (string) $uri !== (string) $baseUri) {
  323. $baseUriComponents = self::parse($baseUri);
  324. }
  325. $hasLeadingSlash = str_starts_with($baseUriComponents['path'], '/');
  326. if (null === $baseUriComponents['scheme']) {
  327. throw new SyntaxError('The base URI must be an absolute URI or null; If the base URI is null the URI must be an absolute URI.');
  328. }
  329. if (null !== $uriComponents['scheme'] && '' !== $uriComponents['scheme']) {
  330. $uriComponents['path'] = self::removeDotSegments($uriComponents['path']);
  331. if ('' !== $uriComponents['path'] && '/' !== $uriComponents['path'][0] && $hasLeadingSlash) {
  332. $uriComponents['path'] = '/'.$uriComponents['path'];
  333. }
  334. return UriString::build($uriComponents);
  335. }
  336. if (null !== self::buildAuthority($uriComponents)) {
  337. $uriComponents['scheme'] = $baseUriComponents['scheme'];
  338. $uriComponents['path'] = self::removeDotSegments($uriComponents['path']);
  339. if ('' !== $uriComponents['path'] && '/' !== $uriComponents['path'][0] && $hasLeadingSlash) {
  340. $uriComponents['path'] = '/'.$uriComponents['path'];
  341. }
  342. return UriString::build($uriComponents);
  343. }
  344. [$path, $query] = self::resolvePathAndQuery($uriComponents, $baseUriComponents);
  345. $path = UriString::removeDotSegments($path);
  346. if ('' !== $path && '/' !== $path[0] && $hasLeadingSlash) {
  347. $path = '/'.$path;
  348. }
  349. $baseUriComponents['path'] = $path;
  350. $baseUriComponents['query'] = $query;
  351. $baseUriComponents['fragment'] = $uriComponents['fragment'];
  352. return UriString::build($baseUriComponents);
  353. }
  354. /**
  355. * Filter Dot segment according to RFC3986.
  356. *
  357. * @see http://tools.ietf.org/html/rfc3986#section-5.2.4
  358. */
  359. public static function removeDotSegments(Stringable|string $path): string
  360. {
  361. $path = (string) $path;
  362. if (!str_contains($path, '.')) {
  363. return $path;
  364. }
  365. $reducer = function (array $carry, string $segment): array {
  366. if ('..' === $segment) {
  367. array_pop($carry);
  368. return $carry;
  369. }
  370. if (!isset(self::DOT_SEGMENTS[$segment])) {
  371. $carry[] = $segment;
  372. }
  373. return $carry;
  374. };
  375. $oldSegments = explode('/', $path);
  376. $newPath = implode('/', array_reduce($oldSegments, $reducer(...), []));
  377. if (isset(self::DOT_SEGMENTS[$oldSegments[array_key_last($oldSegments)]])) {
  378. $newPath .= '/';
  379. }
  380. return $newPath;
  381. }
  382. /**
  383. * Resolves an URI path and query component.
  384. *
  385. * @param ComponentMap $uri
  386. * @param ComponentMap $baseUri
  387. *
  388. * @return array{0:string, 1:string|null}
  389. */
  390. private static function resolvePathAndQuery(array $uri, array $baseUri): array
  391. {
  392. if (str_starts_with($uri['path'], '/')) {
  393. return [$uri['path'], $uri['query']];
  394. }
  395. if ('' === $uri['path']) {
  396. return [$baseUri['path'], $uri['query'] ?? $baseUri['query']];
  397. }
  398. $targetPath = $uri['path'];
  399. if (null !== self::buildAuthority($baseUri) && '' === $baseUri['path']) {
  400. $targetPath = '/'.$targetPath;
  401. }
  402. if ('' !== $baseUri['path']) {
  403. $segments = explode('/', $baseUri['path']);
  404. array_pop($segments);
  405. if ([] !== $segments) {
  406. $targetPath = implode('/', $segments).'/'.$targetPath;
  407. }
  408. }
  409. return [$targetPath, $uri['query']];
  410. }
  411. public static function containsRfc3986Chars(Stringable|string $uri): bool
  412. {
  413. return 1 === preg_match(self::REGEXP_VALID_URI_RFC3986_CHARS, (string) $uri);
  414. }
  415. public static function containsRfc3987Chars(Stringable|string $uri): bool
  416. {
  417. return 1 !== preg_match(self::REGEXP_INVALID_URI_RFC3987_CHARS, (string) $uri);
  418. }
  419. /**
  420. * Parse a URI string into its components.
  421. *
  422. * This method parses a URI and returns an associative array containing any
  423. * of the various components of the URI that are present.
  424. *
  425. * <code>
  426. * $components = UriString::parse('http://foo@test.example.com:42?query#');
  427. * var_export($components);
  428. * //will display
  429. * array(
  430. * 'scheme' => 'http', // the URI scheme component
  431. * 'user' => 'foo', // the URI user component
  432. * 'pass' => null, // the URI pass component
  433. * 'host' => 'test.example.com', // the URI host component
  434. * 'port' => 42, // the URI port component
  435. * 'path' => '', // the URI path component
  436. * 'query' => 'query', // the URI query component
  437. * 'fragment' => '', // the URI fragment component
  438. * );
  439. * </code>
  440. *
  441. * The returned array is similar to PHP's parse_url return value with the following
  442. * differences:
  443. *
  444. * <ul>
  445. * <li>All components are always present in the returned array</li>
  446. * <li>Empty and undefined component are treated differently. And empty component is
  447. * set to the empty string while an undefined component is set to the `null` value.</li>
  448. * <li>The path component is never undefined</li>
  449. * <li>The method parses the URI following the RFC3986 rules, but you are still
  450. * required to validate the returned components against its related scheme specific rules.</li>
  451. * </ul>
  452. *
  453. * @link https://tools.ietf.org/html/rfc3986
  454. *
  455. * @throws SyntaxError if the URI contains invalid characters
  456. * @throws SyntaxError if the URI contains an invalid scheme
  457. * @throws SyntaxError if the URI contains an invalid path
  458. *
  459. * @return ComponentMap
  460. */
  461. public static function parse(BackedEnum|Stringable|string|int $uri): array
  462. {
  463. if ($uri instanceof BackedEnum) {
  464. $uri = $uri->value;
  465. }
  466. $uri = (string) $uri;
  467. if (isset(self::URI_SHORTCUTS[$uri])) {
  468. /** @var ComponentMap $components */
  469. $components = [...self::URI_COMPONENTS, ...self::URI_SHORTCUTS[$uri]];
  470. return $components;
  471. }
  472. self::containsRfc3987Chars($uri) || throw new SyntaxError(sprintf('The uri `%s` contains invalid characters', $uri));
  473. //if the first character is a known URI delimiter, parsing can be simplified
  474. $first_char = $uri[0];
  475. //The URI is made of the fragment only
  476. if ('#' === $first_char) {
  477. [, $fragment] = explode('#', $uri, 2);
  478. $components = self::URI_COMPONENTS;
  479. $components['fragment'] = $fragment;
  480. return $components;
  481. }
  482. //The URI is made of the query and fragment
  483. if ('?' === $first_char) {
  484. [, $partial] = explode('?', $uri, 2);
  485. [$query, $fragment] = explode('#', $partial, 2) + [1 => null];
  486. $components = self::URI_COMPONENTS;
  487. $components['query'] = $query;
  488. $components['fragment'] = $fragment;
  489. return $components;
  490. }
  491. //use RFC3986 URI regexp to split the URI
  492. preg_match(self::REGEXP_URI_PARTS, $uri, $parts);
  493. $parts += ['query' => '', 'fragment' => ''];
  494. if (':' === ($parts['scheme'] ?? null) || 1 !== preg_match(self::REGEXP_URI_SCHEME, $parts['scontent'] ?? '')) {
  495. throw new SyntaxError(sprintf('The uri `%s` contains an invalid scheme', $uri));
  496. }
  497. if ('' === ($parts['scheme'] ?? '').($parts['authority'] ?? '') && 1 === preg_match(self::REGEXP_INVALID_PATH, $parts['path'] ?? '')) {
  498. throw new SyntaxError(sprintf('The uri `%s` contains an invalid path.', $uri));
  499. }
  500. /** @var ComponentMap $components */
  501. $components = array_merge(
  502. self::URI_COMPONENTS,
  503. '' === ($parts['authority'] ?? null) ? [] : self::parseAuthority($parts['acontent'] ?? null),
  504. [
  505. 'path' => $parts['path'] ?? '',
  506. 'scheme' => '' === ($parts['scheme'] ?? null) ? null : ($parts['scontent'] ?? null),
  507. 'query' => '' === $parts['query'] ? null : ($parts['qcontent'] ?? null),
  508. 'fragment' => '' === $parts['fragment'] ? null : ($parts['fcontent'] ?? null),
  509. ]
  510. );
  511. return $components;
  512. }
  513. /**
  514. * Assert the URI internal state is valid.
  515. *
  516. * @link https://tools.ietf.org/html/rfc3986#section-3
  517. * @link https://tools.ietf.org/html/rfc3986#section-3.3
  518. *
  519. * @throws SyntaxError
  520. */
  521. private static function validateComponents(?string $scheme, ?string $authority, ?string $path): void
  522. {
  523. if (null !== $authority) {
  524. if (null !== $path && '' !== $path && '/' !== $path[0]) {
  525. throw new SyntaxError('If an authority is present the path must be empty or start with a `/`.');
  526. }
  527. return;
  528. }
  529. if (null === $path || '' === $path) {
  530. return;
  531. }
  532. if (str_starts_with($path, '//')) {
  533. throw new SyntaxError('If there is no authority the path `'.$path.'` cannot start with a `//`.');
  534. }
  535. if (null !== $scheme || false === ($pos = strpos($path, ':'))) {
  536. return;
  537. }
  538. if (!str_contains(substr($path, 0, $pos), '/')) {
  539. throw new SyntaxError('In absence of a scheme and an authority the first path segment cannot contain a colon (":") character.');
  540. }
  541. }
  542. /**
  543. * Parses the URI authority part.
  544. *
  545. * @link https://tools.ietf.org/html/rfc3986#section-3.2
  546. *
  547. * @throws SyntaxError If the port component is invalid
  548. *
  549. * @return AuthorityMap
  550. */
  551. public static function parseAuthority(BackedEnum|Stringable|string|null $authority): array
  552. {
  553. $components = ['user' => null, 'pass' => null, 'host' => null, 'port' => null];
  554. if (null === $authority) {
  555. return $components;
  556. }
  557. if ($authority instanceof BackedEnum) {
  558. $authority = $authority->value;
  559. }
  560. $authority = (string) $authority;
  561. $components['host'] = '';
  562. if ('' === $authority) {
  563. return $components;
  564. }
  565. $parts = explode('@', $authority, 2);
  566. if (isset($parts[1])) {
  567. [$components['user'], $components['pass']] = explode(':', $parts[0], 2) + [1 => null];
  568. }
  569. preg_match(self::REGEXP_HOST_PORT, $parts[1] ?? $parts[0], $matches);
  570. $matches += ['port' => ''];
  571. $components['port'] = self::filterPort($matches['port']);
  572. $components['host'] = self::filterHost($matches['host'] ?? '');
  573. return $components;
  574. }
  575. /**
  576. * Filter and format the port component.
  577. *
  578. * @link https://tools.ietf.org/html/rfc3986#section-3.2.2
  579. *
  580. * @throws SyntaxError if the registered name is invalid
  581. */
  582. private static function filterPort(string $port): ?int
  583. {
  584. return match (true) {
  585. '' === $port => null,
  586. 1 === preg_match('/^\d*$/', $port) => (int) $port,
  587. default => throw new SyntaxError(sprintf('The port `%s` is invalid', $port)),
  588. };
  589. }
  590. /**
  591. * Returns whether a hostname is valid.
  592. *
  593. * @link https://tools.ietf.org/html/rfc3986#section-3.2.2
  594. *
  595. * @throws SyntaxError if the registered name is invalid
  596. */
  597. private static function filterHost(Stringable|string|null $host): ?string
  598. {
  599. try {
  600. return HostRecord::from($host)->value;
  601. } catch (Throwable) {
  602. throw new SyntaxError(sprintf('Host `%s` is invalid : the IP host is malformed', $host));
  603. }
  604. }
  605. /**
  606. * Tells whether the scheme component is valid.
  607. */
  608. public static function isValidScheme(BackedEnum|Stringable|string|null $scheme): bool
  609. {
  610. if ($scheme instanceof BackedEnum) {
  611. $scheme = $scheme->value;
  612. }
  613. return null === $scheme || 1 === preg_match('/^[A-Za-z]([-A-Za-z\d+.]+)?$/', (string) $scheme);
  614. }
  615. private static function normalizeHost(BackedEnum|Stringable|string|null $host): ?string
  616. {
  617. if ($host instanceof BackedEnum) {
  618. $host = $host->value;
  619. }
  620. if (null !== $host) {
  621. $host = (string) $host;
  622. }
  623. if (null === $host || false !== filter_var($host, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4)) {
  624. return $host;
  625. }
  626. $host = (string) Encoder::normalizeHost($host);
  627. static $isSupported = null;
  628. $isSupported ??= (function_exists('\idn_to_ascii') && defined('\INTL_IDNA_VARIANT_UTS46'));
  629. if (! $isSupported) {
  630. return $host;
  631. }
  632. $idnaHost = IdnaConverter::toAscii($host);
  633. if (!$idnaHost->hasErrors()) {
  634. return $idnaHost->domain();
  635. }
  636. return $host;
  637. }
  638. /**
  639. * DEPRECATION WARNING! This method will be removed in the next major point release.
  640. *
  641. * @deprecated Since version 7.6.0
  642. * @codeCoverageIgnore
  643. * @see HostRecoord::validate()
  644. *
  645. * Create a new instance from the environment.
  646. */
  647. #[Deprecated(message:'use League\Uri\HostRecord::validate() instead', since:'league/uri:7.6.0')]
  648. public static function isValidHost(Stringable|string|null $host): bool
  649. {
  650. return HostRecord::isValid($host);
  651. }
  652. }