Inflector.php 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507
  1. <?php
  2. declare(strict_types=1);
  3. namespace Doctrine\Inflector;
  4. use RuntimeException;
  5. use function chr;
  6. use function function_exists;
  7. use function lcfirst;
  8. use function mb_strtolower;
  9. use function ord;
  10. use function preg_match;
  11. use function preg_replace;
  12. use function sprintf;
  13. use function str_replace;
  14. use function strlen;
  15. use function strtolower;
  16. use function strtr;
  17. use function trim;
  18. use function ucwords;
  19. class Inflector
  20. {
  21. private const ACCENTED_CHARACTERS = [
  22. 'À' => 'A',
  23. 'Á' => 'A',
  24. 'Â' => 'A',
  25. 'Ã' => 'A',
  26. 'Ä' => 'Ae',
  27. 'Æ' => 'Ae',
  28. 'Å' => 'Aa',
  29. 'æ' => 'a',
  30. 'Ç' => 'C',
  31. 'È' => 'E',
  32. 'É' => 'E',
  33. 'Ê' => 'E',
  34. 'Ë' => 'E',
  35. 'Ì' => 'I',
  36. 'Í' => 'I',
  37. 'Î' => 'I',
  38. 'Ï' => 'I',
  39. 'Ñ' => 'N',
  40. 'Ò' => 'O',
  41. 'Ó' => 'O',
  42. 'Ô' => 'O',
  43. 'Õ' => 'O',
  44. 'Ö' => 'Oe',
  45. 'Ù' => 'U',
  46. 'Ú' => 'U',
  47. 'Û' => 'U',
  48. 'Ü' => 'Ue',
  49. 'Ý' => 'Y',
  50. 'ß' => 'ss',
  51. 'à' => 'a',
  52. 'á' => 'a',
  53. 'â' => 'a',
  54. 'ã' => 'a',
  55. 'ä' => 'ae',
  56. 'å' => 'aa',
  57. 'ç' => 'c',
  58. 'è' => 'e',
  59. 'é' => 'e',
  60. 'ê' => 'e',
  61. 'ë' => 'e',
  62. 'ì' => 'i',
  63. 'í' => 'i',
  64. 'î' => 'i',
  65. 'ï' => 'i',
  66. 'ñ' => 'n',
  67. 'ò' => 'o',
  68. 'ó' => 'o',
  69. 'ô' => 'o',
  70. 'õ' => 'o',
  71. 'ö' => 'oe',
  72. 'ù' => 'u',
  73. 'ú' => 'u',
  74. 'û' => 'u',
  75. 'ü' => 'ue',
  76. 'ý' => 'y',
  77. 'ÿ' => 'y',
  78. 'Ā' => 'A',
  79. 'ā' => 'a',
  80. 'Ă' => 'A',
  81. 'ă' => 'a',
  82. 'Ą' => 'A',
  83. 'ą' => 'a',
  84. 'Ć' => 'C',
  85. 'ć' => 'c',
  86. 'Ĉ' => 'C',
  87. 'ĉ' => 'c',
  88. 'Ċ' => 'C',
  89. 'ċ' => 'c',
  90. 'Č' => 'C',
  91. 'č' => 'c',
  92. 'Ď' => 'D',
  93. 'ď' => 'd',
  94. 'Đ' => 'D',
  95. 'đ' => 'd',
  96. 'Ē' => 'E',
  97. 'ē' => 'e',
  98. 'Ĕ' => 'E',
  99. 'ĕ' => 'e',
  100. 'Ė' => 'E',
  101. 'ė' => 'e',
  102. 'Ę' => 'E',
  103. 'ę' => 'e',
  104. 'Ě' => 'E',
  105. 'ě' => 'e',
  106. 'Ĝ' => 'G',
  107. 'ĝ' => 'g',
  108. 'Ğ' => 'G',
  109. 'ğ' => 'g',
  110. 'Ġ' => 'G',
  111. 'ġ' => 'g',
  112. 'Ģ' => 'G',
  113. 'ģ' => 'g',
  114. 'Ĥ' => 'H',
  115. 'ĥ' => 'h',
  116. 'Ħ' => 'H',
  117. 'ħ' => 'h',
  118. 'Ĩ' => 'I',
  119. 'ĩ' => 'i',
  120. 'Ī' => 'I',
  121. 'ī' => 'i',
  122. 'Ĭ' => 'I',
  123. 'ĭ' => 'i',
  124. 'Į' => 'I',
  125. 'į' => 'i',
  126. 'İ' => 'I',
  127. 'ı' => 'i',
  128. 'IJ' => 'IJ',
  129. 'ij' => 'ij',
  130. 'Ĵ' => 'J',
  131. 'ĵ' => 'j',
  132. 'Ķ' => 'K',
  133. 'ķ' => 'k',
  134. 'ĸ' => 'k',
  135. 'Ĺ' => 'L',
  136. 'ĺ' => 'l',
  137. 'Ļ' => 'L',
  138. 'ļ' => 'l',
  139. 'Ľ' => 'L',
  140. 'ľ' => 'l',
  141. 'Ŀ' => 'L',
  142. 'ŀ' => 'l',
  143. 'Ł' => 'L',
  144. 'ł' => 'l',
  145. 'Ń' => 'N',
  146. 'ń' => 'n',
  147. 'Ņ' => 'N',
  148. 'ņ' => 'n',
  149. 'Ň' => 'N',
  150. 'ň' => 'n',
  151. 'ʼn' => 'N',
  152. 'Ŋ' => 'n',
  153. 'ŋ' => 'N',
  154. 'Ō' => 'O',
  155. 'ō' => 'o',
  156. 'Ŏ' => 'O',
  157. 'ŏ' => 'o',
  158. 'Ő' => 'O',
  159. 'ő' => 'o',
  160. 'Œ' => 'OE',
  161. 'œ' => 'oe',
  162. 'Ø' => 'O',
  163. 'ø' => 'o',
  164. 'Ŕ' => 'R',
  165. 'ŕ' => 'r',
  166. 'Ŗ' => 'R',
  167. 'ŗ' => 'r',
  168. 'Ř' => 'R',
  169. 'ř' => 'r',
  170. 'Ś' => 'S',
  171. 'ś' => 's',
  172. 'Ŝ' => 'S',
  173. 'ŝ' => 's',
  174. 'Ş' => 'S',
  175. 'ş' => 's',
  176. 'Š' => 'S',
  177. 'š' => 's',
  178. 'Ţ' => 'T',
  179. 'ţ' => 't',
  180. 'Ť' => 'T',
  181. 'ť' => 't',
  182. 'Ŧ' => 'T',
  183. 'ŧ' => 't',
  184. 'Ũ' => 'U',
  185. 'ũ' => 'u',
  186. 'Ū' => 'U',
  187. 'ū' => 'u',
  188. 'Ŭ' => 'U',
  189. 'ŭ' => 'u',
  190. 'Ů' => 'U',
  191. 'ů' => 'u',
  192. 'Ű' => 'U',
  193. 'ű' => 'u',
  194. 'Ų' => 'U',
  195. 'ų' => 'u',
  196. 'Ŵ' => 'W',
  197. 'ŵ' => 'w',
  198. 'Ŷ' => 'Y',
  199. 'ŷ' => 'y',
  200. 'Ÿ' => 'Y',
  201. 'Ź' => 'Z',
  202. 'ź' => 'z',
  203. 'Ż' => 'Z',
  204. 'ż' => 'z',
  205. 'Ž' => 'Z',
  206. 'ž' => 'z',
  207. 'ſ' => 's',
  208. '€' => 'E',
  209. '£' => '',
  210. ];
  211. /** @var WordInflector */
  212. private $singularizer;
  213. /** @var WordInflector */
  214. private $pluralizer;
  215. public function __construct(WordInflector $singularizer, WordInflector $pluralizer)
  216. {
  217. $this->singularizer = $singularizer;
  218. $this->pluralizer = $pluralizer;
  219. }
  220. /**
  221. * Converts a word into the format for a Doctrine table name. Converts 'ModelName' to 'model_name'.
  222. */
  223. public function tableize(string $word): string
  224. {
  225. $tableized = preg_replace('~(?<=\\w)([A-Z])~u', '_$1', $word);
  226. if ($tableized === null) {
  227. throw new RuntimeException(sprintf(
  228. 'preg_replace returned null for value "%s"',
  229. $word
  230. ));
  231. }
  232. return mb_strtolower($tableized);
  233. }
  234. /**
  235. * Converts a word into the format for a Doctrine class name. Converts 'table_name' to 'TableName'.
  236. */
  237. public function classify(string $word): string
  238. {
  239. return str_replace([' ', '_', '-'], '', ucwords($word, ' _-'));
  240. }
  241. /**
  242. * Camelizes a word. This uses the classify() method and turns the first character to lowercase.
  243. */
  244. public function camelize(string $word): string
  245. {
  246. return lcfirst($this->classify($word));
  247. }
  248. /**
  249. * Uppercases words with configurable delimiters between words.
  250. *
  251. * Takes a string and capitalizes all of the words, like PHP's built-in
  252. * ucwords function. This extends that behavior, however, by allowing the
  253. * word delimiters to be configured, rather than only separating on
  254. * whitespace.
  255. *
  256. * Here is an example:
  257. * <code>
  258. * <?php
  259. * $string = 'top-o-the-morning to all_of_you!';
  260. * echo $inflector->capitalize($string);
  261. * // Top-O-The-Morning To All_of_you!
  262. *
  263. * echo $inflector->capitalize($string, '-_ ');
  264. * // Top-O-The-Morning To All_Of_You!
  265. * ?>
  266. * </code>
  267. *
  268. * @param string $string The string to operate on.
  269. * @param string $delimiters A list of word separators.
  270. *
  271. * @return string The string with all delimiter-separated words capitalized.
  272. */
  273. public function capitalize(string $string, string $delimiters = " \n\t\r\0\x0B-"): string
  274. {
  275. return ucwords($string, $delimiters);
  276. }
  277. /**
  278. * Checks if the given string seems like it has utf8 characters in it.
  279. *
  280. * @param string $string The string to check for utf8 characters in.
  281. */
  282. public function seemsUtf8(string $string): bool
  283. {
  284. for ($i = 0; $i < strlen($string); $i++) {
  285. if (ord($string[$i]) < 0x80) {
  286. continue; // 0bbbbbbb
  287. }
  288. if ((ord($string[$i]) & 0xE0) === 0xC0) {
  289. $n = 1; // 110bbbbb
  290. } elseif ((ord($string[$i]) & 0xF0) === 0xE0) {
  291. $n = 2; // 1110bbbb
  292. } elseif ((ord($string[$i]) & 0xF8) === 0xF0) {
  293. $n = 3; // 11110bbb
  294. } elseif ((ord($string[$i]) & 0xFC) === 0xF8) {
  295. $n = 4; // 111110bb
  296. } elseif ((ord($string[$i]) & 0xFE) === 0xFC) {
  297. $n = 5; // 1111110b
  298. } else {
  299. return false; // Does not match any model
  300. }
  301. for ($j = 0; $j < $n; $j++) { // n bytes matching 10bbbbbb follow ?
  302. if (++$i === strlen($string) || ((ord($string[$i]) & 0xC0) !== 0x80)) {
  303. return false;
  304. }
  305. }
  306. }
  307. return true;
  308. }
  309. /**
  310. * Remove any illegal characters, accents, etc.
  311. *
  312. * @param string $string String to unaccent
  313. *
  314. * @return string Unaccented string
  315. */
  316. public function unaccent(string $string): string
  317. {
  318. if (preg_match('/[\x80-\xff]/', $string) === false) {
  319. return $string;
  320. }
  321. if ($this->seemsUtf8($string)) {
  322. $string = strtr($string, self::ACCENTED_CHARACTERS);
  323. } else {
  324. $characters = [];
  325. // Assume ISO-8859-1 if not UTF-8
  326. $characters['in'] =
  327. chr(128)
  328. . chr(131)
  329. . chr(138)
  330. . chr(142)
  331. . chr(154)
  332. . chr(158)
  333. . chr(159)
  334. . chr(162)
  335. . chr(165)
  336. . chr(181)
  337. . chr(192)
  338. . chr(193)
  339. . chr(194)
  340. . chr(195)
  341. . chr(196)
  342. . chr(197)
  343. . chr(199)
  344. . chr(200)
  345. . chr(201)
  346. . chr(202)
  347. . chr(203)
  348. . chr(204)
  349. . chr(205)
  350. . chr(206)
  351. . chr(207)
  352. . chr(209)
  353. . chr(210)
  354. . chr(211)
  355. . chr(212)
  356. . chr(213)
  357. . chr(214)
  358. . chr(216)
  359. . chr(217)
  360. . chr(218)
  361. . chr(219)
  362. . chr(220)
  363. . chr(221)
  364. . chr(224)
  365. . chr(225)
  366. . chr(226)
  367. . chr(227)
  368. . chr(228)
  369. . chr(229)
  370. . chr(231)
  371. . chr(232)
  372. . chr(233)
  373. . chr(234)
  374. . chr(235)
  375. . chr(236)
  376. . chr(237)
  377. . chr(238)
  378. . chr(239)
  379. . chr(241)
  380. . chr(242)
  381. . chr(243)
  382. . chr(244)
  383. . chr(245)
  384. . chr(246)
  385. . chr(248)
  386. . chr(249)
  387. . chr(250)
  388. . chr(251)
  389. . chr(252)
  390. . chr(253)
  391. . chr(255);
  392. $characters['out'] = 'EfSZszYcYuAAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyy';
  393. $string = strtr($string, $characters['in'], $characters['out']);
  394. $doubleChars = [];
  395. $doubleChars['in'] = [
  396. chr(140),
  397. chr(156),
  398. chr(198),
  399. chr(208),
  400. chr(222),
  401. chr(223),
  402. chr(230),
  403. chr(240),
  404. chr(254),
  405. ];
  406. $doubleChars['out'] = ['OE', 'oe', 'AE', 'DH', 'TH', 'ss', 'ae', 'dh', 'th'];
  407. $string = str_replace($doubleChars['in'], $doubleChars['out'], $string);
  408. }
  409. return $string;
  410. }
  411. /**
  412. * Convert any passed string to a url friendly string.
  413. * Converts 'My first blog post' to 'my-first-blog-post'
  414. *
  415. * @param string $string String to urlize.
  416. *
  417. * @return string Urlized string.
  418. */
  419. public function urlize(string $string): string
  420. {
  421. // Remove all non url friendly characters with the unaccent function
  422. $unaccented = $this->unaccent($string);
  423. if (function_exists('mb_strtolower')) {
  424. $lowered = mb_strtolower($unaccented);
  425. } else {
  426. $lowered = strtolower($unaccented);
  427. }
  428. $replacements = [
  429. '/\W/' => ' ',
  430. '/([A-Z]+)([A-Z][a-z])/' => '\1_\2',
  431. '/([a-z\d])([A-Z])/' => '\1_\2',
  432. '/[^A-Z^a-z^0-9^\/]+/' => '-',
  433. ];
  434. $urlized = $lowered;
  435. foreach ($replacements as $pattern => $replacement) {
  436. $replaced = preg_replace($pattern, $replacement, $urlized);
  437. if ($replaced === null) {
  438. throw new RuntimeException(sprintf(
  439. 'preg_replace returned null for value "%s"',
  440. $urlized
  441. ));
  442. }
  443. $urlized = $replaced;
  444. }
  445. return trim($urlized, '-');
  446. }
  447. /**
  448. * Returns a word in singular form.
  449. *
  450. * @param string $word The word in plural form.
  451. *
  452. * @return string The word in singular form.
  453. */
  454. public function singularize(string $word): string
  455. {
  456. return $this->singularizer->inflect($word);
  457. }
  458. /**
  459. * Returns a word in plural form.
  460. *
  461. * @param string $word The word in singular form.
  462. *
  463. * @return string The word in plural form.
  464. */
  465. public function pluralize(string $word): string
  466. {
  467. return $this->pluralizer->inflect($word);
  468. }
  469. }