| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506 |
- <?php
- /**
- * League.Uri (https://uri.thephpleague.com)
- *
- * (c) Ignace Nyamagana Butera <nyamsprod@gmail.com>
- *
- * For the full copyright and license information, please view the LICENSE
- * file that was distributed with this source code.
- */
- declare(strict_types=1);
- namespace League\Uri;
- use BackedEnum;
- use Closure;
- use Deprecated;
- use League\Uri\Exceptions\SyntaxError;
- use League\Uri\IPv6\Converter as IPv6Converter;
- use SensitiveParameter;
- use Stringable;
- use Throwable;
- use function explode;
- use function filter_var;
- use function gettype;
- use function in_array;
- use function preg_match;
- use function preg_replace_callback;
- use function rawurldecode;
- use function rawurlencode;
- use function sprintf;
- use function str_starts_with;
- use function strtolower;
- use function strtoupper;
- use const FILTER_FLAG_IPV4;
- use const FILTER_VALIDATE_IP;
- final class Encoder
- {
- private const REGEXP_CHARS_INVALID = '/[\x00-\x1f\x7f]/';
- private const REGEXP_CHARS_ENCODED = ',%[A-Fa-f0-9]{2},';
- private const REGEXP_CHARS_PREVENTS_DECODING = ',%
- 2[A-F|1-2|4-9]|
- 3[0-9|B|D]|
- 4[1-9|A-F]|
- 5[0-9|A|F]|
- 6[1-9|A-F]|
- 7[0-9|E]
- ,ix';
- private const REGEXP_PART_SUBDELIM = "\!\$&'\(\)\*\+,;\=%";
- private const REGEXP_PART_UNRESERVED = 'A-Za-z\d_\-.~';
- private const REGEXP_PART_ENCODED = '%(?![A-Fa-f\d]{2})';
- /**
- * Unreserved characters.
- *
- * @see https://www.rfc-editor.org/rfc/rfc3986.html#section-2.3
- */
- private const REGEXP_UNRESERVED_CHARACTERS = ',%(2[DdEe]|3[0-9]|4[1-9A-Fa-f]|5[AaFf]|6[1-9A-Fa-f]|7[0-9A-Ea-e]),';
- /**
- * Tell whether the user component is correctly encoded.
- */
- public static function isUserEncoded(BackedEnum|Stringable|string|null $encoded): bool
- {
- static $pattern = '/[^'.self::REGEXP_PART_UNRESERVED.self::REGEXP_PART_SUBDELIM.']+|'.self::REGEXP_PART_ENCODED.'/';
- if ($encoded instanceof BackedEnum) {
- $encoded = $encoded->value;
- }
- return null === $encoded || 1 !== preg_match($pattern, (string) $encoded);
- }
- /**
- * Encode User.
- *
- * All generic delimiters MUST be encoded
- */
- public static function encodeUser(BackedEnum|Stringable|string|null $user): ?string
- {
- static $pattern = '/[^'.self::REGEXP_PART_UNRESERVED.self::REGEXP_PART_SUBDELIM.']+|'.self::REGEXP_PART_ENCODED.'/';
- return self::encode($user, $pattern);
- }
- /**
- * Normalize user component.
- *
- * The value returned MUST be percent-encoded, but MUST NOT double-encode
- * any characters. To determine what characters to encode, please refer to
- * RFC 3986.
- */
- public static function normalizeUser(BackedEnum|Stringable|string|null $user): ?string
- {
- return self::normalize(self::encodeUser(self::decodeUnreservedCharacters($user)));
- }
- private static function normalize(?string $component): ?string
- {
- if (null === $component) {
- return null;
- }
- return (string) preg_replace_callback(
- '/%[0-9a-f]{2}/i',
- static fn (array $found) => strtoupper($found[0]),
- $component
- );
- }
- /**
- * Tell whether the password component is correctly encoded.
- */
- public static function isPasswordEncoded(#[SensitiveParameter] BackedEnum|Stringable|string|null $encoded): bool
- {
- static $pattern = '/[^'.self::REGEXP_PART_UNRESERVED.self::REGEXP_PART_SUBDELIM.':]+|'.self::REGEXP_PART_ENCODED.'/';
- if ($encoded instanceof BackedEnum) {
- $encoded = $encoded->value;
- }
- return null === $encoded || 1 !== preg_match($pattern, (string) $encoded);
- }
- /**
- * Encode Password.
- *
- * Generic delimiters ":" MUST NOT be encoded
- */
- public static function encodePassword(#[SensitiveParameter] BackedEnum|Stringable|string|null $component): ?string
- {
- static $pattern = '/[^'.self::REGEXP_PART_UNRESERVED.self::REGEXP_PART_SUBDELIM.':]+|'.self::REGEXP_PART_ENCODED.'/';
- return self::encode($component, $pattern);
- }
- /**
- * Normalize password component.
- *
- * The value returned MUST be percent-encoded, but MUST NOT double-encode
- * any characters. To determine what characters to encode, please refer to
- * RFC 3986.
- */
- public static function normalizePassword(#[SensitiveParameter] BackedEnum|Stringable|string|null $password): ?string
- {
- return self::normalize(self::encodePassword(self::decodeUnreservedCharacters($password)));
- }
- /**
- * Tell whether the userInfo component is correctly encoded.
- */
- public static function isUserInfoEncoded(#[SensitiveParameter] BackedEnum|Stringable|string|null $userInfo): bool
- {
- if (null === $userInfo) {
- return true;
- }
- if ($userInfo instanceof BackedEnum) {
- $userInfo = $userInfo->value;
- }
- [$user, $password] = explode(':', (string) $userInfo, 2) + [1 => null];
- return self::isUserEncoded($user)
- && self::isPasswordEncoded($password);
- }
- public static function encodeUserInfo(#[SensitiveParameter] BackedEnum|Stringable|string|null $userInfo): ?string
- {
- if (null === $userInfo) {
- return null;
- }
- if ($userInfo instanceof BackedEnum) {
- $userInfo = $userInfo->value;
- }
- [$user, $password] = explode(':', (string) $userInfo, 2) + [1 => null];
- $userInfo = self::encodeUser($user);
- if (null === $password) {
- return $userInfo;
- }
- return $userInfo.':'.self::encodePassword($password);
- }
- public static function normalizeUserInfo(#[SensitiveParameter] BackedEnum|Stringable|string|null $userInfo): ?string
- {
- if (null === $userInfo) {
- return null;
- }
- if ($userInfo instanceof BackedEnum) {
- $userInfo = $userInfo->value;
- }
- [$user, $password] = explode(':', (string) $userInfo, 2) + [1 => null];
- $userInfo = self::normalizeUser($user);
- if (null === $password) {
- return $userInfo;
- }
- return $userInfo.':'.self::normalizePassword($password);
- }
- /**
- * Decodes all the URI component characters.
- */
- public static function decodeAll(BackedEnum|Stringable|string|null $component): ?string
- {
- return self::decode($component, static fn (array $matches): string => rawurldecode($matches[0]));
- }
- /**
- * Decodes the URI component without decoding the unreserved characters which are already encoded.
- */
- public static function decodeNecessary(BackedEnum|Stringable|string|int|null $component): ?string
- {
- $decoder = static function (array $matches): string {
- if (1 === preg_match(self::REGEXP_CHARS_PREVENTS_DECODING, $matches[0])) {
- return strtoupper($matches[0]);
- }
- return rawurldecode($matches[0]);
- };
- return self::decode($component, $decoder);
- }
- /**
- * Decodes the component unreserved characters.
- */
- public static function decodeUnreservedCharacters(BackedEnum|Stringable|string|null $str): ?string
- {
- if ($str instanceof BackedEnum) {
- $str = $str->value;
- }
- if (null === $str) {
- return null;
- }
- return preg_replace_callback(
- self::REGEXP_UNRESERVED_CHARACTERS,
- static fn (array $matches): string => rawurldecode($matches[0]),
- (string) $str
- );
- }
- /**
- * Tell whether the path component is correctly encoded.
- */
- public static function isPathEncoded(BackedEnum|Stringable|string|null $encoded): bool
- {
- static $pattern = '/[^'.self::REGEXP_PART_UNRESERVED.self::REGEXP_PART_SUBDELIM.':@\/]+|'.self::REGEXP_PART_ENCODED.'/';
- if ($encoded instanceof BackedEnum) {
- $encoded = $encoded->value;
- }
- return null === $encoded || 1 !== preg_match($pattern, (string) $encoded);
- }
- /**
- * Encode Path.
- *
- * Generic delimiters ":", "@", and "/" MUST NOT be encoded
- */
- public static function encodePath(BackedEnum|Stringable|string|null $component): string
- {
- static $pattern = '/[^'.self::REGEXP_PART_UNRESERVED.self::REGEXP_PART_SUBDELIM.':@\/]+|'.self::REGEXP_PART_ENCODED.'/';
- return (string) self::encode($component, $pattern);
- }
- /**
- * Decodes the path component while preserving characters that should not be decoded in the context of a full valid URI.
- */
- public static function decodePath(BackedEnum|Stringable|string|null $path): ?string
- {
- $decoder = static function (array $matches): string {
- $encodedChar = strtoupper($matches[0]);
- return in_array($encodedChar, ['%2F', '%20', '%3F', '%23'], true) ? $encodedChar : rawurldecode($encodedChar);
- };
- return self::decode($path, $decoder);
- }
- /**
- * Normalize path component.
- *
- * The value returned MUST be percent-encoded, but MUST NOT double-encode
- * any characters. To determine what characters to encode, please refer to
- * RFC 3986.
- */
- public static function normalizePath(BackedEnum|Stringable|string|null $component): ?string
- {
- return self::normalize(self::encodePath(self::decodePath($component)));
- }
- /**
- * Tell whether the query component is correctly encoded.
- */
- public static function isQueryEncoded(BackedEnum|Stringable|string|null $encoded): bool
- {
- static $pattern = '/[^'.self::REGEXP_PART_UNRESERVED.self::REGEXP_PART_SUBDELIM.'\/?%]+|'.self::REGEXP_PART_ENCODED.'/';
- if ($encoded instanceof BackedEnum) {
- $encoded = $encoded->value;
- }
- return null === $encoded || 1 !== preg_match($pattern, (string) $encoded);
- }
- /**
- * Decodes the query component while preserving characters that should not be decoded in the context of a full valid URI.
- */
- public static function decodeQuery(BackedEnum|Stringable|string|null $path): ?string
- {
- $decoder = static function (array $matches): string {
- $encodedChar = strtoupper($matches[0]);
- return in_array($encodedChar, ['%26', '%3D', '%20', '%23', '%3F'], true) ? $encodedChar : rawurldecode($encodedChar);
- };
- return self::decode($path, $decoder);
- }
- /**
- * Normalize the query component.
- *
- * The value returned MUST be percent-encoded, but MUST NOT double-encode
- * any characters. To determine what characters to encode, please refer to
- * RFC 3986.
- */
- public static function normalizeQuery(BackedEnum|Stringable|string|null $query): ?string
- {
- return self::normalize(self::encodeQueryOrFragment(self::decodeQuery($query)));
- }
- /**
- * Tell whether the query component is correctly encoded.
- */
- public static function isFragmentEncoded(BackedEnum|Stringable|string|null $encoded): bool
- {
- static $pattern = '/[^'.self::REGEXP_PART_UNRESERVED.self::REGEXP_PART_SUBDELIM.':@\/?%]|'.self::REGEXP_PART_ENCODED.'/';
- if ($encoded instanceof BackedEnum) {
- $encoded = $encoded->value;
- }
- return null === $encoded || 1 !== preg_match($pattern, (string) $encoded);
- }
- /**
- * Decodes the fragment component while preserving characters that should not be decoded in the context of a full valid URI.
- */
- public static function decodeFragment(BackedEnum|Stringable|string|null $path): ?string
- {
- return self::decode($path, static fn (array $matches): string => '%20' === $matches[0] ? $matches[0] : rawurldecode($matches[0]));
- }
- /**
- * Normalize the fragment component.
- *
- * The value returned MUST be percent-encoded, but MUST NOT double-encode
- * any characters. To determine what characters to encode, please refer to
- * RFC 3986.
- */
- public static function normalizeFragment(BackedEnum|Stringable|string|null $fragment): ?string
- {
- return self::normalize(self::encodeQueryOrFragment(self::decodeFragment($fragment)));
- }
- /**
- * Normalize the host component.
- *
- * @see https://www.rfc-editor.org/rfc/rfc3986.html#section-3.2.2
- *
- * The value returned MUST be percent-encoded, but MUST NOT double-encode
- * any characters. To determine what characters to encode, please refer to
- * RFC 3986.
- */
- public static function normalizeHost(BackedEnum|Stringable|string|null $host): ?string
- {
- if ($host instanceof BackedEnum) {
- $host = (string) $host->value;
- }
- if ($host instanceof Stringable) {
- $host = (string) $host;
- }
- if (null === $host || '' === $host || false !== filter_var($host, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4)) {
- return $host;
- }
- if (str_starts_with($host, '[')) {
- return IPv6Converter::normalize($host);
- }
- $host = strtolower($host);
- return (!str_contains($host, '%')) ? $host : preg_replace_callback(
- '/%[a-f0-9]{2}/',
- fn (array $matches) => 1 === preg_match('/%([0-7][0-9a-f])/', $matches[0]) ? rawurldecode($matches[0]) : strtoupper($matches[0]),
- $host
- );
- }
- /**
- * Encode Query or Fragment.
- *
- * Generic delimiters ":", "@", "?", and "/" MUST NOT be encoded
- */
- public static function encodeQueryOrFragment(BackedEnum|Stringable|string|null $component): ?string
- {
- static $pattern = '/[^'.self::REGEXP_PART_UNRESERVED.self::REGEXP_PART_SUBDELIM.':@\/?]+|'.self::REGEXP_PART_ENCODED.'/';
- return self::encode($component, $pattern);
- }
- public static function encodeQueryKeyValue(mixed $component): ?string
- {
- static $pattern = '/[^'.self::REGEXP_PART_UNRESERVED.']+|'.self::REGEXP_PART_ENCODED.'/';
- $encoder = static fn (array $found): string => 1 === preg_match('/[^'.self::REGEXP_PART_UNRESERVED.']/', rawurldecode($found[0])) ? rawurlencode($found[0]) : $found[0];
- $filteredComponent = self::filterComponent($component);
- return match (true) {
- null === $filteredComponent => throw new SyntaxError(sprintf('A pair key/value must be a scalar value `%s` given.', gettype($component))),
- 1 === preg_match(self::REGEXP_CHARS_INVALID, $filteredComponent) => rawurlencode($filteredComponent),
- default => (string) preg_replace_callback($pattern, $encoder, $filteredComponent),
- };
- }
- private static function filterComponent(mixed $component): ?string
- {
- try {
- return StringCoercionMode::Native->coerce($component);
- } catch (Throwable $exception) {
- throw new SyntaxError(
- sprintf('The component must be a scalar value `%s` given.', gettype($component)),
- previous: $exception
- );
- }
- }
- /**
- * Encodes the URI component characters using a regular expression to find which characters need encoding.
- */
- private static function encode(BackedEnum|Stringable|string|int|bool|null $component, string $pattern): ?string
- {
- $component = self::filterComponent($component);
- if (null === $component || '' === $component) {
- return $component;
- }
- return (string) preg_replace_callback(
- $pattern,
- static fn (array $found): string => 1 === preg_match('/[^'.self::REGEXP_PART_UNRESERVED.']/', rawurldecode($found[0])) ? rawurlencode($found[0]) : $found[0],
- $component
- );
- }
- /**
- * Decodes the URI component characters using a closure.
- */
- private static function decode(BackedEnum|Stringable|string|int|null $component, Closure $decoder): ?string
- {
- $component = self::filterComponent($component);
- if (null === $component || '' === $component) {
- return $component;
- }
- if (1 === preg_match(self::REGEXP_CHARS_INVALID, $component)) {
- throw new SyntaxError('Invalid component string: '.$component.'.');
- }
- if (1 === preg_match(self::REGEXP_CHARS_ENCODED, $component)) {
- return (string) preg_replace_callback(self::REGEXP_CHARS_ENCODED, $decoder, $component);
- }
- return $component;
- }
- /**
- * Decodes the URI component without decoding the unreserved characters which are already encoded.
- *
- * DEPRECATION WARNING! This method will be removed in the next major point release.
- *
- * @deprecated Since version 7.6.0
- * @codeCoverageIgnore
- * @see Encoder::decodeNecessary()
- *
- * Create a new instance from the environment.
- */
- #[Deprecated(message:'use League\Uri\Encoder::decodeNecessary() instead', since:'league/uri:7.6.0')]
- public static function decodePartial(BackedEnum|Stringable|string|int|null $component): ?string
- {
- return self::decodeNecessary($component);
- }
- }
|