| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507 |
- <?php
- declare(strict_types=1);
- namespace Doctrine\Inflector;
- use RuntimeException;
- use function chr;
- use function function_exists;
- use function lcfirst;
- use function mb_strtolower;
- use function ord;
- use function preg_match;
- use function preg_replace;
- use function sprintf;
- use function str_replace;
- use function strlen;
- use function strtolower;
- use function strtr;
- use function trim;
- use function ucwords;
- class Inflector
- {
- private const ACCENTED_CHARACTERS = [
- 'À' => 'A',
- 'Á' => 'A',
- 'Â' => 'A',
- 'Ã' => 'A',
- 'Ä' => 'Ae',
- 'Æ' => 'Ae',
- 'Å' => 'Aa',
- 'æ' => 'a',
- 'Ç' => 'C',
- 'È' => 'E',
- 'É' => 'E',
- 'Ê' => 'E',
- 'Ë' => 'E',
- 'Ì' => 'I',
- 'Í' => 'I',
- 'Î' => 'I',
- 'Ï' => 'I',
- 'Ñ' => 'N',
- 'Ò' => 'O',
- 'Ó' => 'O',
- 'Ô' => 'O',
- 'Õ' => 'O',
- 'Ö' => 'Oe',
- 'Ù' => 'U',
- 'Ú' => 'U',
- 'Û' => 'U',
- 'Ü' => 'Ue',
- 'Ý' => 'Y',
- 'ß' => 'ss',
- 'à' => 'a',
- 'á' => 'a',
- 'â' => 'a',
- 'ã' => 'a',
- 'ä' => 'ae',
- 'å' => 'aa',
- 'ç' => 'c',
- 'è' => 'e',
- 'é' => 'e',
- 'ê' => 'e',
- 'ë' => 'e',
- 'ì' => 'i',
- 'í' => 'i',
- 'î' => 'i',
- 'ï' => 'i',
- 'ñ' => 'n',
- 'ò' => 'o',
- 'ó' => 'o',
- 'ô' => 'o',
- 'õ' => 'o',
- 'ö' => 'oe',
- 'ù' => 'u',
- 'ú' => 'u',
- 'û' => 'u',
- 'ü' => 'ue',
- 'ý' => 'y',
- 'ÿ' => 'y',
- 'Ā' => 'A',
- 'ā' => 'a',
- 'Ă' => 'A',
- 'ă' => 'a',
- 'Ą' => 'A',
- 'ą' => 'a',
- 'Ć' => 'C',
- 'ć' => 'c',
- 'Ĉ' => 'C',
- 'ĉ' => 'c',
- 'Ċ' => 'C',
- 'ċ' => 'c',
- 'Č' => 'C',
- 'č' => 'c',
- 'Ď' => 'D',
- 'ď' => 'd',
- 'Đ' => 'D',
- 'đ' => 'd',
- 'Ē' => 'E',
- 'ē' => 'e',
- 'Ĕ' => 'E',
- 'ĕ' => 'e',
- 'Ė' => 'E',
- 'ė' => 'e',
- 'Ę' => 'E',
- 'ę' => 'e',
- 'Ě' => 'E',
- 'ě' => 'e',
- 'Ĝ' => 'G',
- 'ĝ' => 'g',
- 'Ğ' => 'G',
- 'ğ' => 'g',
- 'Ġ' => 'G',
- 'ġ' => 'g',
- 'Ģ' => 'G',
- 'ģ' => 'g',
- 'Ĥ' => 'H',
- 'ĥ' => 'h',
- 'Ħ' => 'H',
- 'ħ' => 'h',
- 'Ĩ' => 'I',
- 'ĩ' => 'i',
- 'Ī' => 'I',
- 'ī' => 'i',
- 'Ĭ' => 'I',
- 'ĭ' => 'i',
- 'Į' => 'I',
- 'į' => 'i',
- 'İ' => 'I',
- 'ı' => 'i',
- 'IJ' => 'IJ',
- 'ij' => 'ij',
- 'Ĵ' => 'J',
- 'ĵ' => 'j',
- 'Ķ' => 'K',
- 'ķ' => 'k',
- 'ĸ' => 'k',
- 'Ĺ' => 'L',
- 'ĺ' => 'l',
- 'Ļ' => 'L',
- 'ļ' => 'l',
- 'Ľ' => 'L',
- 'ľ' => 'l',
- 'Ŀ' => 'L',
- 'ŀ' => 'l',
- 'Ł' => 'L',
- 'ł' => 'l',
- 'Ń' => 'N',
- 'ń' => 'n',
- 'Ņ' => 'N',
- 'ņ' => 'n',
- 'Ň' => 'N',
- 'ň' => 'n',
- 'ʼn' => 'N',
- 'Ŋ' => 'n',
- 'ŋ' => 'N',
- 'Ō' => 'O',
- 'ō' => 'o',
- 'Ŏ' => 'O',
- 'ŏ' => 'o',
- 'Ő' => 'O',
- 'ő' => 'o',
- 'Œ' => 'OE',
- 'œ' => 'oe',
- 'Ø' => 'O',
- 'ø' => 'o',
- 'Ŕ' => 'R',
- 'ŕ' => 'r',
- 'Ŗ' => 'R',
- 'ŗ' => 'r',
- 'Ř' => 'R',
- 'ř' => 'r',
- 'Ś' => 'S',
- 'ś' => 's',
- 'Ŝ' => 'S',
- 'ŝ' => 's',
- 'Ş' => 'S',
- 'ş' => 's',
- 'Š' => 'S',
- 'š' => 's',
- 'Ţ' => 'T',
- 'ţ' => 't',
- 'Ť' => 'T',
- 'ť' => 't',
- 'Ŧ' => 'T',
- 'ŧ' => 't',
- 'Ũ' => 'U',
- 'ũ' => 'u',
- 'Ū' => 'U',
- 'ū' => 'u',
- 'Ŭ' => 'U',
- 'ŭ' => 'u',
- 'Ů' => 'U',
- 'ů' => 'u',
- 'Ű' => 'U',
- 'ű' => 'u',
- 'Ų' => 'U',
- 'ų' => 'u',
- 'Ŵ' => 'W',
- 'ŵ' => 'w',
- 'Ŷ' => 'Y',
- 'ŷ' => 'y',
- 'Ÿ' => 'Y',
- 'Ź' => 'Z',
- 'ź' => 'z',
- 'Ż' => 'Z',
- 'ż' => 'z',
- 'Ž' => 'Z',
- 'ž' => 'z',
- 'ſ' => 's',
- '€' => 'E',
- '£' => '',
- ];
- /** @var WordInflector */
- private $singularizer;
- /** @var WordInflector */
- private $pluralizer;
- public function __construct(WordInflector $singularizer, WordInflector $pluralizer)
- {
- $this->singularizer = $singularizer;
- $this->pluralizer = $pluralizer;
- }
- /**
- * Converts a word into the format for a Doctrine table name. Converts 'ModelName' to 'model_name'.
- */
- public function tableize(string $word): string
- {
- $tableized = preg_replace('~(?<=\\w)([A-Z])~u', '_$1', $word);
- if ($tableized === null) {
- throw new RuntimeException(sprintf(
- 'preg_replace returned null for value "%s"',
- $word
- ));
- }
- return mb_strtolower($tableized);
- }
- /**
- * Converts a word into the format for a Doctrine class name. Converts 'table_name' to 'TableName'.
- */
- public function classify(string $word): string
- {
- return str_replace([' ', '_', '-'], '', ucwords($word, ' _-'));
- }
- /**
- * Camelizes a word. This uses the classify() method and turns the first character to lowercase.
- */
- public function camelize(string $word): string
- {
- return lcfirst($this->classify($word));
- }
- /**
- * Uppercases words with configurable delimiters between words.
- *
- * Takes a string and capitalizes all of the words, like PHP's built-in
- * ucwords function. This extends that behavior, however, by allowing the
- * word delimiters to be configured, rather than only separating on
- * whitespace.
- *
- * Here is an example:
- * <code>
- * <?php
- * $string = 'top-o-the-morning to all_of_you!';
- * echo $inflector->capitalize($string);
- * // Top-O-The-Morning To All_of_you!
- *
- * echo $inflector->capitalize($string, '-_ ');
- * // Top-O-The-Morning To All_Of_You!
- * ?>
- * </code>
- *
- * @param string $string The string to operate on.
- * @param string $delimiters A list of word separators.
- *
- * @return string The string with all delimiter-separated words capitalized.
- */
- public function capitalize(string $string, string $delimiters = " \n\t\r\0\x0B-"): string
- {
- return ucwords($string, $delimiters);
- }
- /**
- * Checks if the given string seems like it has utf8 characters in it.
- *
- * @param string $string The string to check for utf8 characters in.
- */
- public function seemsUtf8(string $string): bool
- {
- for ($i = 0; $i < strlen($string); $i++) {
- if (ord($string[$i]) < 0x80) {
- continue; // 0bbbbbbb
- }
- if ((ord($string[$i]) & 0xE0) === 0xC0) {
- $n = 1; // 110bbbbb
- } elseif ((ord($string[$i]) & 0xF0) === 0xE0) {
- $n = 2; // 1110bbbb
- } elseif ((ord($string[$i]) & 0xF8) === 0xF0) {
- $n = 3; // 11110bbb
- } elseif ((ord($string[$i]) & 0xFC) === 0xF8) {
- $n = 4; // 111110bb
- } elseif ((ord($string[$i]) & 0xFE) === 0xFC) {
- $n = 5; // 1111110b
- } else {
- return false; // Does not match any model
- }
- for ($j = 0; $j < $n; $j++) { // n bytes matching 10bbbbbb follow ?
- if (++$i === strlen($string) || ((ord($string[$i]) & 0xC0) !== 0x80)) {
- return false;
- }
- }
- }
- return true;
- }
- /**
- * Remove any illegal characters, accents, etc.
- *
- * @param string $string String to unaccent
- *
- * @return string Unaccented string
- */
- public function unaccent(string $string): string
- {
- if (preg_match('/[\x80-\xff]/', $string) === false) {
- return $string;
- }
- if ($this->seemsUtf8($string)) {
- $string = strtr($string, self::ACCENTED_CHARACTERS);
- } else {
- $characters = [];
- // Assume ISO-8859-1 if not UTF-8
- $characters['in'] =
- chr(128)
- . chr(131)
- . chr(138)
- . chr(142)
- . chr(154)
- . chr(158)
- . chr(159)
- . chr(162)
- . chr(165)
- . chr(181)
- . chr(192)
- . chr(193)
- . chr(194)
- . chr(195)
- . chr(196)
- . chr(197)
- . chr(199)
- . chr(200)
- . chr(201)
- . chr(202)
- . chr(203)
- . chr(204)
- . chr(205)
- . chr(206)
- . chr(207)
- . chr(209)
- . chr(210)
- . chr(211)
- . chr(212)
- . chr(213)
- . chr(214)
- . chr(216)
- . chr(217)
- . chr(218)
- . chr(219)
- . chr(220)
- . chr(221)
- . chr(224)
- . chr(225)
- . chr(226)
- . chr(227)
- . chr(228)
- . chr(229)
- . chr(231)
- . chr(232)
- . chr(233)
- . chr(234)
- . chr(235)
- . chr(236)
- . chr(237)
- . chr(238)
- . chr(239)
- . chr(241)
- . chr(242)
- . chr(243)
- . chr(244)
- . chr(245)
- . chr(246)
- . chr(248)
- . chr(249)
- . chr(250)
- . chr(251)
- . chr(252)
- . chr(253)
- . chr(255);
- $characters['out'] = 'EfSZszYcYuAAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyy';
- $string = strtr($string, $characters['in'], $characters['out']);
- $doubleChars = [];
- $doubleChars['in'] = [
- chr(140),
- chr(156),
- chr(198),
- chr(208),
- chr(222),
- chr(223),
- chr(230),
- chr(240),
- chr(254),
- ];
- $doubleChars['out'] = ['OE', 'oe', 'AE', 'DH', 'TH', 'ss', 'ae', 'dh', 'th'];
- $string = str_replace($doubleChars['in'], $doubleChars['out'], $string);
- }
- return $string;
- }
- /**
- * Convert any passed string to a url friendly string.
- * Converts 'My first blog post' to 'my-first-blog-post'
- *
- * @param string $string String to urlize.
- *
- * @return string Urlized string.
- */
- public function urlize(string $string): string
- {
- // Remove all non url friendly characters with the unaccent function
- $unaccented = $this->unaccent($string);
- if (function_exists('mb_strtolower')) {
- $lowered = mb_strtolower($unaccented);
- } else {
- $lowered = strtolower($unaccented);
- }
- $replacements = [
- '/\W/' => ' ',
- '/([A-Z]+)([A-Z][a-z])/' => '\1_\2',
- '/([a-z\d])([A-Z])/' => '\1_\2',
- '/[^A-Z^a-z^0-9^\/]+/' => '-',
- ];
- $urlized = $lowered;
- foreach ($replacements as $pattern => $replacement) {
- $replaced = preg_replace($pattern, $replacement, $urlized);
- if ($replaced === null) {
- throw new RuntimeException(sprintf(
- 'preg_replace returned null for value "%s"',
- $urlized
- ));
- }
- $urlized = $replaced;
- }
- return trim($urlized, '-');
- }
- /**
- * Returns a word in singular form.
- *
- * @param string $word The word in plural form.
- *
- * @return string The word in singular form.
- */
- public function singularize(string $word): string
- {
- return $this->singularizer->inflect($word);
- }
- /**
- * Returns a word in plural form.
- *
- * @param string $word The word in singular form.
- *
- * @return string The word in plural form.
- */
- public function pluralize(string $word): string
- {
- return $this->pluralizer->inflect($word);
- }
- }
|