Encoder.php 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506
  1. <?php
  2. /**
  3. * League.Uri (https://uri.thephpleague.com)
  4. *
  5. * (c) Ignace Nyamagana Butera <nyamsprod@gmail.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. declare(strict_types=1);
  11. namespace League\Uri;
  12. use BackedEnum;
  13. use Closure;
  14. use Deprecated;
  15. use League\Uri\Exceptions\SyntaxError;
  16. use League\Uri\IPv6\Converter as IPv6Converter;
  17. use SensitiveParameter;
  18. use Stringable;
  19. use Throwable;
  20. use function explode;
  21. use function filter_var;
  22. use function gettype;
  23. use function in_array;
  24. use function preg_match;
  25. use function preg_replace_callback;
  26. use function rawurldecode;
  27. use function rawurlencode;
  28. use function sprintf;
  29. use function str_starts_with;
  30. use function strtolower;
  31. use function strtoupper;
  32. use const FILTER_FLAG_IPV4;
  33. use const FILTER_VALIDATE_IP;
  34. final class Encoder
  35. {
  36. private const REGEXP_CHARS_INVALID = '/[\x00-\x1f\x7f]/';
  37. private const REGEXP_CHARS_ENCODED = ',%[A-Fa-f0-9]{2},';
  38. private const REGEXP_CHARS_PREVENTS_DECODING = ',%
  39. 2[A-F|1-2|4-9]|
  40. 3[0-9|B|D]|
  41. 4[1-9|A-F]|
  42. 5[0-9|A|F]|
  43. 6[1-9|A-F]|
  44. 7[0-9|E]
  45. ,ix';
  46. private const REGEXP_PART_SUBDELIM = "\!\$&'\(\)\*\+,;\=%";
  47. private const REGEXP_PART_UNRESERVED = 'A-Za-z\d_\-.~';
  48. private const REGEXP_PART_ENCODED = '%(?![A-Fa-f\d]{2})';
  49. /**
  50. * Unreserved characters.
  51. *
  52. * @see https://www.rfc-editor.org/rfc/rfc3986.html#section-2.3
  53. */
  54. private const REGEXP_UNRESERVED_CHARACTERS = ',%(2[DdEe]|3[0-9]|4[1-9A-Fa-f]|5[AaFf]|6[1-9A-Fa-f]|7[0-9A-Ea-e]),';
  55. /**
  56. * Tell whether the user component is correctly encoded.
  57. */
  58. public static function isUserEncoded(BackedEnum|Stringable|string|null $encoded): bool
  59. {
  60. static $pattern = '/[^'.self::REGEXP_PART_UNRESERVED.self::REGEXP_PART_SUBDELIM.']+|'.self::REGEXP_PART_ENCODED.'/';
  61. if ($encoded instanceof BackedEnum) {
  62. $encoded = $encoded->value;
  63. }
  64. return null === $encoded || 1 !== preg_match($pattern, (string) $encoded);
  65. }
  66. /**
  67. * Encode User.
  68. *
  69. * All generic delimiters MUST be encoded
  70. */
  71. public static function encodeUser(BackedEnum|Stringable|string|null $user): ?string
  72. {
  73. static $pattern = '/[^'.self::REGEXP_PART_UNRESERVED.self::REGEXP_PART_SUBDELIM.']+|'.self::REGEXP_PART_ENCODED.'/';
  74. return self::encode($user, $pattern);
  75. }
  76. /**
  77. * Normalize user component.
  78. *
  79. * The value returned MUST be percent-encoded, but MUST NOT double-encode
  80. * any characters. To determine what characters to encode, please refer to
  81. * RFC 3986.
  82. */
  83. public static function normalizeUser(BackedEnum|Stringable|string|null $user): ?string
  84. {
  85. return self::normalize(self::encodeUser(self::decodeUnreservedCharacters($user)));
  86. }
  87. private static function normalize(?string $component): ?string
  88. {
  89. if (null === $component) {
  90. return null;
  91. }
  92. return (string) preg_replace_callback(
  93. '/%[0-9a-f]{2}/i',
  94. static fn (array $found) => strtoupper($found[0]),
  95. $component
  96. );
  97. }
  98. /**
  99. * Tell whether the password component is correctly encoded.
  100. */
  101. public static function isPasswordEncoded(#[SensitiveParameter] BackedEnum|Stringable|string|null $encoded): bool
  102. {
  103. static $pattern = '/[^'.self::REGEXP_PART_UNRESERVED.self::REGEXP_PART_SUBDELIM.':]+|'.self::REGEXP_PART_ENCODED.'/';
  104. if ($encoded instanceof BackedEnum) {
  105. $encoded = $encoded->value;
  106. }
  107. return null === $encoded || 1 !== preg_match($pattern, (string) $encoded);
  108. }
  109. /**
  110. * Encode Password.
  111. *
  112. * Generic delimiters ":" MUST NOT be encoded
  113. */
  114. public static function encodePassword(#[SensitiveParameter] BackedEnum|Stringable|string|null $component): ?string
  115. {
  116. static $pattern = '/[^'.self::REGEXP_PART_UNRESERVED.self::REGEXP_PART_SUBDELIM.':]+|'.self::REGEXP_PART_ENCODED.'/';
  117. return self::encode($component, $pattern);
  118. }
  119. /**
  120. * Normalize password component.
  121. *
  122. * The value returned MUST be percent-encoded, but MUST NOT double-encode
  123. * any characters. To determine what characters to encode, please refer to
  124. * RFC 3986.
  125. */
  126. public static function normalizePassword(#[SensitiveParameter] BackedEnum|Stringable|string|null $password): ?string
  127. {
  128. return self::normalize(self::encodePassword(self::decodeUnreservedCharacters($password)));
  129. }
  130. /**
  131. * Tell whether the userInfo component is correctly encoded.
  132. */
  133. public static function isUserInfoEncoded(#[SensitiveParameter] BackedEnum|Stringable|string|null $userInfo): bool
  134. {
  135. if (null === $userInfo) {
  136. return true;
  137. }
  138. if ($userInfo instanceof BackedEnum) {
  139. $userInfo = $userInfo->value;
  140. }
  141. [$user, $password] = explode(':', (string) $userInfo, 2) + [1 => null];
  142. return self::isUserEncoded($user)
  143. && self::isPasswordEncoded($password);
  144. }
  145. public static function encodeUserInfo(#[SensitiveParameter] BackedEnum|Stringable|string|null $userInfo): ?string
  146. {
  147. if (null === $userInfo) {
  148. return null;
  149. }
  150. if ($userInfo instanceof BackedEnum) {
  151. $userInfo = $userInfo->value;
  152. }
  153. [$user, $password] = explode(':', (string) $userInfo, 2) + [1 => null];
  154. $userInfo = self::encodeUser($user);
  155. if (null === $password) {
  156. return $userInfo;
  157. }
  158. return $userInfo.':'.self::encodePassword($password);
  159. }
  160. public static function normalizeUserInfo(#[SensitiveParameter] BackedEnum|Stringable|string|null $userInfo): ?string
  161. {
  162. if (null === $userInfo) {
  163. return null;
  164. }
  165. if ($userInfo instanceof BackedEnum) {
  166. $userInfo = $userInfo->value;
  167. }
  168. [$user, $password] = explode(':', (string) $userInfo, 2) + [1 => null];
  169. $userInfo = self::normalizeUser($user);
  170. if (null === $password) {
  171. return $userInfo;
  172. }
  173. return $userInfo.':'.self::normalizePassword($password);
  174. }
  175. /**
  176. * Decodes all the URI component characters.
  177. */
  178. public static function decodeAll(BackedEnum|Stringable|string|null $component): ?string
  179. {
  180. return self::decode($component, static fn (array $matches): string => rawurldecode($matches[0]));
  181. }
  182. /**
  183. * Decodes the URI component without decoding the unreserved characters which are already encoded.
  184. */
  185. public static function decodeNecessary(BackedEnum|Stringable|string|int|null $component): ?string
  186. {
  187. $decoder = static function (array $matches): string {
  188. if (1 === preg_match(self::REGEXP_CHARS_PREVENTS_DECODING, $matches[0])) {
  189. return strtoupper($matches[0]);
  190. }
  191. return rawurldecode($matches[0]);
  192. };
  193. return self::decode($component, $decoder);
  194. }
  195. /**
  196. * Decodes the component unreserved characters.
  197. */
  198. public static function decodeUnreservedCharacters(BackedEnum|Stringable|string|null $str): ?string
  199. {
  200. if ($str instanceof BackedEnum) {
  201. $str = $str->value;
  202. }
  203. if (null === $str) {
  204. return null;
  205. }
  206. return preg_replace_callback(
  207. self::REGEXP_UNRESERVED_CHARACTERS,
  208. static fn (array $matches): string => rawurldecode($matches[0]),
  209. (string) $str
  210. );
  211. }
  212. /**
  213. * Tell whether the path component is correctly encoded.
  214. */
  215. public static function isPathEncoded(BackedEnum|Stringable|string|null $encoded): bool
  216. {
  217. static $pattern = '/[^'.self::REGEXP_PART_UNRESERVED.self::REGEXP_PART_SUBDELIM.':@\/]+|'.self::REGEXP_PART_ENCODED.'/';
  218. if ($encoded instanceof BackedEnum) {
  219. $encoded = $encoded->value;
  220. }
  221. return null === $encoded || 1 !== preg_match($pattern, (string) $encoded);
  222. }
  223. /**
  224. * Encode Path.
  225. *
  226. * Generic delimiters ":", "@", and "/" MUST NOT be encoded
  227. */
  228. public static function encodePath(BackedEnum|Stringable|string|null $component): string
  229. {
  230. static $pattern = '/[^'.self::REGEXP_PART_UNRESERVED.self::REGEXP_PART_SUBDELIM.':@\/]+|'.self::REGEXP_PART_ENCODED.'/';
  231. return (string) self::encode($component, $pattern);
  232. }
  233. /**
  234. * Decodes the path component while preserving characters that should not be decoded in the context of a full valid URI.
  235. */
  236. public static function decodePath(BackedEnum|Stringable|string|null $path): ?string
  237. {
  238. $decoder = static function (array $matches): string {
  239. $encodedChar = strtoupper($matches[0]);
  240. return in_array($encodedChar, ['%2F', '%20', '%3F', '%23'], true) ? $encodedChar : rawurldecode($encodedChar);
  241. };
  242. return self::decode($path, $decoder);
  243. }
  244. /**
  245. * Normalize path component.
  246. *
  247. * The value returned MUST be percent-encoded, but MUST NOT double-encode
  248. * any characters. To determine what characters to encode, please refer to
  249. * RFC 3986.
  250. */
  251. public static function normalizePath(BackedEnum|Stringable|string|null $component): ?string
  252. {
  253. return self::normalize(self::encodePath(self::decodePath($component)));
  254. }
  255. /**
  256. * Tell whether the query component is correctly encoded.
  257. */
  258. public static function isQueryEncoded(BackedEnum|Stringable|string|null $encoded): bool
  259. {
  260. static $pattern = '/[^'.self::REGEXP_PART_UNRESERVED.self::REGEXP_PART_SUBDELIM.'\/?%]+|'.self::REGEXP_PART_ENCODED.'/';
  261. if ($encoded instanceof BackedEnum) {
  262. $encoded = $encoded->value;
  263. }
  264. return null === $encoded || 1 !== preg_match($pattern, (string) $encoded);
  265. }
  266. /**
  267. * Decodes the query component while preserving characters that should not be decoded in the context of a full valid URI.
  268. */
  269. public static function decodeQuery(BackedEnum|Stringable|string|null $path): ?string
  270. {
  271. $decoder = static function (array $matches): string {
  272. $encodedChar = strtoupper($matches[0]);
  273. return in_array($encodedChar, ['%26', '%3D', '%20', '%23', '%3F'], true) ? $encodedChar : rawurldecode($encodedChar);
  274. };
  275. return self::decode($path, $decoder);
  276. }
  277. /**
  278. * Normalize the query component.
  279. *
  280. * The value returned MUST be percent-encoded, but MUST NOT double-encode
  281. * any characters. To determine what characters to encode, please refer to
  282. * RFC 3986.
  283. */
  284. public static function normalizeQuery(BackedEnum|Stringable|string|null $query): ?string
  285. {
  286. return self::normalize(self::encodeQueryOrFragment(self::decodeQuery($query)));
  287. }
  288. /**
  289. * Tell whether the query component is correctly encoded.
  290. */
  291. public static function isFragmentEncoded(BackedEnum|Stringable|string|null $encoded): bool
  292. {
  293. static $pattern = '/[^'.self::REGEXP_PART_UNRESERVED.self::REGEXP_PART_SUBDELIM.':@\/?%]|'.self::REGEXP_PART_ENCODED.'/';
  294. if ($encoded instanceof BackedEnum) {
  295. $encoded = $encoded->value;
  296. }
  297. return null === $encoded || 1 !== preg_match($pattern, (string) $encoded);
  298. }
  299. /**
  300. * Decodes the fragment component while preserving characters that should not be decoded in the context of a full valid URI.
  301. */
  302. public static function decodeFragment(BackedEnum|Stringable|string|null $path): ?string
  303. {
  304. return self::decode($path, static fn (array $matches): string => '%20' === $matches[0] ? $matches[0] : rawurldecode($matches[0]));
  305. }
  306. /**
  307. * Normalize the fragment component.
  308. *
  309. * The value returned MUST be percent-encoded, but MUST NOT double-encode
  310. * any characters. To determine what characters to encode, please refer to
  311. * RFC 3986.
  312. */
  313. public static function normalizeFragment(BackedEnum|Stringable|string|null $fragment): ?string
  314. {
  315. return self::normalize(self::encodeQueryOrFragment(self::decodeFragment($fragment)));
  316. }
  317. /**
  318. * Normalize the host component.
  319. *
  320. * @see https://www.rfc-editor.org/rfc/rfc3986.html#section-3.2.2
  321. *
  322. * The value returned MUST be percent-encoded, but MUST NOT double-encode
  323. * any characters. To determine what characters to encode, please refer to
  324. * RFC 3986.
  325. */
  326. public static function normalizeHost(BackedEnum|Stringable|string|null $host): ?string
  327. {
  328. if ($host instanceof BackedEnum) {
  329. $host = (string) $host->value;
  330. }
  331. if ($host instanceof Stringable) {
  332. $host = (string) $host;
  333. }
  334. if (null === $host || '' === $host || false !== filter_var($host, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4)) {
  335. return $host;
  336. }
  337. if (str_starts_with($host, '[')) {
  338. return IPv6Converter::normalize($host);
  339. }
  340. $host = strtolower($host);
  341. return (!str_contains($host, '%')) ? $host : preg_replace_callback(
  342. '/%[a-f0-9]{2}/',
  343. fn (array $matches) => 1 === preg_match('/%([0-7][0-9a-f])/', $matches[0]) ? rawurldecode($matches[0]) : strtoupper($matches[0]),
  344. $host
  345. );
  346. }
  347. /**
  348. * Encode Query or Fragment.
  349. *
  350. * Generic delimiters ":", "@", "?", and "/" MUST NOT be encoded
  351. */
  352. public static function encodeQueryOrFragment(BackedEnum|Stringable|string|null $component): ?string
  353. {
  354. static $pattern = '/[^'.self::REGEXP_PART_UNRESERVED.self::REGEXP_PART_SUBDELIM.':@\/?]+|'.self::REGEXP_PART_ENCODED.'/';
  355. return self::encode($component, $pattern);
  356. }
  357. public static function encodeQueryKeyValue(mixed $component): ?string
  358. {
  359. static $pattern = '/[^'.self::REGEXP_PART_UNRESERVED.']+|'.self::REGEXP_PART_ENCODED.'/';
  360. $encoder = static fn (array $found): string => 1 === preg_match('/[^'.self::REGEXP_PART_UNRESERVED.']/', rawurldecode($found[0])) ? rawurlencode($found[0]) : $found[0];
  361. $filteredComponent = self::filterComponent($component);
  362. return match (true) {
  363. null === $filteredComponent => throw new SyntaxError(sprintf('A pair key/value must be a scalar value `%s` given.', gettype($component))),
  364. 1 === preg_match(self::REGEXP_CHARS_INVALID, $filteredComponent) => rawurlencode($filteredComponent),
  365. default => (string) preg_replace_callback($pattern, $encoder, $filteredComponent),
  366. };
  367. }
  368. private static function filterComponent(mixed $component): ?string
  369. {
  370. try {
  371. return StringCoercionMode::Native->coerce($component);
  372. } catch (Throwable $exception) {
  373. throw new SyntaxError(
  374. sprintf('The component must be a scalar value `%s` given.', gettype($component)),
  375. previous: $exception
  376. );
  377. }
  378. }
  379. /**
  380. * Encodes the URI component characters using a regular expression to find which characters need encoding.
  381. */
  382. private static function encode(BackedEnum|Stringable|string|int|bool|null $component, string $pattern): ?string
  383. {
  384. $component = self::filterComponent($component);
  385. if (null === $component || '' === $component) {
  386. return $component;
  387. }
  388. return (string) preg_replace_callback(
  389. $pattern,
  390. static fn (array $found): string => 1 === preg_match('/[^'.self::REGEXP_PART_UNRESERVED.']/', rawurldecode($found[0])) ? rawurlencode($found[0]) : $found[0],
  391. $component
  392. );
  393. }
  394. /**
  395. * Decodes the URI component characters using a closure.
  396. */
  397. private static function decode(BackedEnum|Stringable|string|int|null $component, Closure $decoder): ?string
  398. {
  399. $component = self::filterComponent($component);
  400. if (null === $component || '' === $component) {
  401. return $component;
  402. }
  403. if (1 === preg_match(self::REGEXP_CHARS_INVALID, $component)) {
  404. throw new SyntaxError('Invalid component string: '.$component.'.');
  405. }
  406. if (1 === preg_match(self::REGEXP_CHARS_ENCODED, $component)) {
  407. return (string) preg_replace_callback(self::REGEXP_CHARS_ENCODED, $decoder, $component);
  408. }
  409. return $component;
  410. }
  411. /**
  412. * Decodes the URI component without decoding the unreserved characters which are already encoded.
  413. *
  414. * DEPRECATION WARNING! This method will be removed in the next major point release.
  415. *
  416. * @deprecated Since version 7.6.0
  417. * @codeCoverageIgnore
  418. * @see Encoder::decodeNecessary()
  419. *
  420. * Create a new instance from the environment.
  421. */
  422. #[Deprecated(message:'use League\Uri\Encoder::decodeNecessary() instead', since:'league/uri:7.6.0')]
  423. public static function decodePartial(BackedEnum|Stringable|string|int|null $component): ?string
  424. {
  425. return self::decodeNecessary($component);
  426. }
  427. }