PseudoLocalizationTranslator.php 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384
  1. <?php
  2. /*
  3. * This file is part of the Symfony package.
  4. *
  5. * (c) Fabien Potencier <fabien@symfony.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace Symfony\Component\Translation;
  11. use Symfony\Component\Translation\Exception\LogicException;
  12. use Symfony\Contracts\Translation\TranslatorInterface;
  13. /**
  14. * This translator should only be used in a development environment.
  15. */
  16. final class PseudoLocalizationTranslator implements TranslatorInterface, TranslatorBagInterface
  17. {
  18. private const EXPANSION_CHARACTER = '~';
  19. private bool $accents;
  20. private float $expansionFactor;
  21. private bool $brackets;
  22. private bool $parseHTML;
  23. /**
  24. * @var string[]
  25. */
  26. private array $localizableHTMLAttributes;
  27. /**
  28. * Available options:
  29. * * accents:
  30. * type: boolean
  31. * default: true
  32. * description: replace ASCII characters of the translated string with accented versions or similar characters
  33. * example: if true, "foo" => "ƒöö".
  34. *
  35. * * expansion_factor:
  36. * type: float
  37. * default: 1
  38. * validation: it must be greater than or equal to 1
  39. * description: expand the translated string by the given factor with spaces and tildes
  40. * example: if 2, "foo" => "~foo ~"
  41. *
  42. * * brackets:
  43. * type: boolean
  44. * default: true
  45. * description: wrap the translated string with brackets
  46. * example: if true, "foo" => "[foo]"
  47. *
  48. * * parse_html:
  49. * type: boolean
  50. * default: false
  51. * description: parse the translated string as HTML - looking for HTML tags has a performance impact but allows to preserve them from alterations - it also allows to compute the visible translated string length which is useful to correctly expand or when it contains HTML
  52. * warning: unclosed tags are unsupported, they will be fixed (closed) by the parser - eg, "foo <div>bar" => "foo <div>bar</div>"
  53. *
  54. * * localizable_html_attributes:
  55. * type: string[]
  56. * default: []
  57. * description: the list of HTML attributes whose values can be altered - it is only useful when the "parse_html" option is set to true
  58. * example: if ["title"], and with the "accents" option set to true, "<a href="#" title="Go to your profile">Profile</a>" => "<a href="#" title="Ĝö ţö ýöûŕ þŕöƒîļé">Þŕöƒîļé</a>" - if "title" was not in the "localizable_html_attributes" list, the title attribute data would be left unchanged.
  59. */
  60. public function __construct(
  61. private TranslatorInterface $translator,
  62. array $options = [],
  63. ) {
  64. $this->translator = $translator;
  65. $this->accents = $options['accents'] ?? true;
  66. if (1.0 > ($this->expansionFactor = $options['expansion_factor'] ?? 1.0)) {
  67. throw new \InvalidArgumentException('The expansion factor must be greater than or equal to 1.');
  68. }
  69. $this->brackets = $options['brackets'] ?? true;
  70. $this->parseHTML = $options['parse_html'] ?? false;
  71. if ($this->parseHTML && !$this->accents && 1.0 === $this->expansionFactor) {
  72. $this->parseHTML = false;
  73. }
  74. $this->localizableHTMLAttributes = $options['localizable_html_attributes'] ?? [];
  75. }
  76. public function trans(string $id, array $parameters = [], ?string $domain = null, ?string $locale = null): string
  77. {
  78. $trans = '';
  79. $visibleText = '';
  80. foreach ($this->getParts($this->translator->trans($id, $parameters, $domain, $locale)) as [$visible, $localizable, $text]) {
  81. if ($visible) {
  82. $visibleText .= $text;
  83. }
  84. if (!$localizable) {
  85. $trans .= $text;
  86. continue;
  87. }
  88. $this->addAccents($trans, $text);
  89. }
  90. $this->expand($trans, $visibleText);
  91. $this->addBrackets($trans);
  92. return $trans;
  93. }
  94. public function getLocale(): string
  95. {
  96. return $this->translator->getLocale();
  97. }
  98. public function getCatalogue(?string $locale = null): MessageCatalogueInterface
  99. {
  100. if (!$this->translator instanceof TranslatorBagInterface) {
  101. throw new LogicException(\sprintf('The "%s()" method cannot be called as the wrapped translator class "%s" does not implement the "%s".', __METHOD__, $this->translator::class, TranslatorBagInterface::class));
  102. }
  103. return $this->translator->getCatalogue($locale);
  104. }
  105. public function getCatalogues(): array
  106. {
  107. if (!$this->translator instanceof TranslatorBagInterface) {
  108. throw new LogicException(\sprintf('The "%s()" method cannot be called as the wrapped translator class "%s" does not implement the "%s".', __METHOD__, $this->translator::class, TranslatorBagInterface::class));
  109. }
  110. return $this->translator->getCatalogues();
  111. }
  112. private function getParts(string $originalTrans): array
  113. {
  114. if (!$this->parseHTML) {
  115. return [[true, true, $originalTrans]];
  116. }
  117. $html = mb_encode_numericentity($originalTrans, [0x80, 0x10FFFF, 0, 0x1FFFFF], mb_detect_encoding($originalTrans, null, true) ?: 'UTF-8');
  118. $useInternalErrors = libxml_use_internal_errors(true);
  119. $dom = new \DOMDocument();
  120. $dom->loadHTML('<trans>'.$html.'</trans>');
  121. libxml_clear_errors();
  122. libxml_use_internal_errors($useInternalErrors);
  123. return $this->parseNode($dom->childNodes->item(1)->childNodes->item(0)->childNodes->item(0));
  124. }
  125. private function parseNode(\DOMNode $node): array
  126. {
  127. $parts = [];
  128. foreach ($node->childNodes as $childNode) {
  129. if (!$childNode instanceof \DOMElement) {
  130. $parts[] = [true, true, $childNode->nodeValue];
  131. continue;
  132. }
  133. $parts[] = [false, false, '<'.$childNode->tagName];
  134. foreach ($childNode->attributes as $attribute) {
  135. $parts[] = [false, false, ' '.$attribute->nodeName.'="'];
  136. $localizableAttribute = \in_array($attribute->nodeName, $this->localizableHTMLAttributes, true);
  137. foreach (preg_split('/(&(?:amp|quot|#039|lt|gt);+)/', htmlspecialchars($attribute->nodeValue, \ENT_QUOTES, 'UTF-8'), -1, \PREG_SPLIT_DELIM_CAPTURE) as $i => $match) {
  138. if ('' === $match) {
  139. continue;
  140. }
  141. $parts[] = [false, $localizableAttribute && 0 === $i % 2, $match];
  142. }
  143. $parts[] = [false, false, '"'];
  144. }
  145. $parts[] = [false, false, '>'];
  146. $parts = array_merge($parts, $this->parseNode($childNode));
  147. $parts[] = [false, false, '</'.$childNode->tagName.'>'];
  148. }
  149. return $parts;
  150. }
  151. private function addAccents(string &$trans, string $text): void
  152. {
  153. $trans .= $this->accents ? strtr($text, [
  154. ' ' => ' ',
  155. '!' => '¡',
  156. '"' => '″',
  157. '#' => '♯',
  158. '$' => '€',
  159. '%' => '‰',
  160. '&' => '⅋',
  161. '\'' => '´',
  162. '(' => '{',
  163. ')' => '}',
  164. '*' => '⁎',
  165. '+' => '⁺',
  166. ',' => '،',
  167. '-' => '‐',
  168. '.' => '·',
  169. '/' => '⁄',
  170. '0' => '⓪',
  171. '1' => '①',
  172. '2' => '②',
  173. '3' => '③',
  174. '4' => '④',
  175. '5' => '⑤',
  176. '6' => '⑥',
  177. '7' => '⑦',
  178. '8' => '⑧',
  179. '9' => '⑨',
  180. ':' => '∶',
  181. ';' => '⁏',
  182. '<' => '≤',
  183. '=' => '≂',
  184. '>' => '≥',
  185. '?' => '¿',
  186. '@' => '՞',
  187. 'A' => 'Å',
  188. 'B' => 'Ɓ',
  189. 'C' => 'Ç',
  190. 'D' => 'Ð',
  191. 'E' => 'É',
  192. 'F' => 'Ƒ',
  193. 'G' => 'Ĝ',
  194. 'H' => 'Ĥ',
  195. 'I' => 'Î',
  196. 'J' => 'Ĵ',
  197. 'K' => 'Ķ',
  198. 'L' => 'Ļ',
  199. 'M' => 'Ṁ',
  200. 'N' => 'Ñ',
  201. 'O' => 'Ö',
  202. 'P' => 'Þ',
  203. 'Q' => 'Ǫ',
  204. 'R' => 'Ŕ',
  205. 'S' => 'Š',
  206. 'T' => 'Ţ',
  207. 'U' => 'Û',
  208. 'V' => 'Ṽ',
  209. 'W' => 'Ŵ',
  210. 'X' => 'Ẋ',
  211. 'Y' => 'Ý',
  212. 'Z' => 'Ž',
  213. '[' => '⁅',
  214. '\\' => '∖',
  215. ']' => '⁆',
  216. '^' => '˄',
  217. '_' => '‿',
  218. '`' => '‵',
  219. 'a' => 'å',
  220. 'b' => 'ƀ',
  221. 'c' => 'ç',
  222. 'd' => 'ð',
  223. 'e' => 'é',
  224. 'f' => 'ƒ',
  225. 'g' => 'ĝ',
  226. 'h' => 'ĥ',
  227. 'i' => 'î',
  228. 'j' => 'ĵ',
  229. 'k' => 'ķ',
  230. 'l' => 'ļ',
  231. 'm' => 'ɱ',
  232. 'n' => 'ñ',
  233. 'o' => 'ö',
  234. 'p' => 'þ',
  235. 'q' => 'ǫ',
  236. 'r' => 'ŕ',
  237. 's' => 'š',
  238. 't' => 'ţ',
  239. 'u' => 'û',
  240. 'v' => 'ṽ',
  241. 'w' => 'ŵ',
  242. 'x' => 'ẋ',
  243. 'y' => 'ý',
  244. 'z' => 'ž',
  245. '{' => '(',
  246. '|' => '¦',
  247. '}' => ')',
  248. '~' => '˞',
  249. ]) : $text;
  250. }
  251. private function expand(string &$trans, string $visibleText): void
  252. {
  253. if (1.0 >= $this->expansionFactor) {
  254. return;
  255. }
  256. $visibleLength = $this->strlen($visibleText);
  257. $missingLength = (int) ceil($visibleLength * $this->expansionFactor) - $visibleLength;
  258. if ($this->brackets) {
  259. $missingLength -= 2;
  260. }
  261. if (0 >= $missingLength) {
  262. return;
  263. }
  264. $words = [];
  265. $wordsCount = 0;
  266. foreach (preg_split('/ +/', $visibleText, -1, \PREG_SPLIT_NO_EMPTY) as $word) {
  267. $wordLength = $this->strlen($word);
  268. if ($wordLength >= $missingLength) {
  269. continue;
  270. }
  271. if (!isset($words[$wordLength])) {
  272. $words[$wordLength] = 0;
  273. }
  274. ++$words[$wordLength];
  275. ++$wordsCount;
  276. }
  277. if (!$words) {
  278. $trans .= 1 === $missingLength ? self::EXPANSION_CHARACTER : ' '.str_repeat(self::EXPANSION_CHARACTER, $missingLength - 1);
  279. return;
  280. }
  281. arsort($words, \SORT_NUMERIC);
  282. $longestWordLength = max(array_keys($words));
  283. while (true) {
  284. $r = mt_rand(1, $wordsCount);
  285. foreach ($words as $length => $count) {
  286. $r -= $count;
  287. if ($r <= 0) {
  288. break;
  289. }
  290. }
  291. $trans .= ' '.str_repeat(self::EXPANSION_CHARACTER, $length);
  292. $missingLength -= $length + 1;
  293. if (0 === $missingLength) {
  294. return;
  295. }
  296. while ($longestWordLength >= $missingLength) {
  297. $wordsCount -= $words[$longestWordLength];
  298. unset($words[$longestWordLength]);
  299. if (!$words) {
  300. $trans .= 1 === $missingLength ? self::EXPANSION_CHARACTER : ' '.str_repeat(self::EXPANSION_CHARACTER, $missingLength - 1);
  301. return;
  302. }
  303. $longestWordLength = max(array_keys($words));
  304. }
  305. }
  306. }
  307. private function addBrackets(string &$trans): void
  308. {
  309. if (!$this->brackets) {
  310. return;
  311. }
  312. $trans = '['.$trans.']';
  313. }
  314. private function strlen(string $s): int
  315. {
  316. return false === ($encoding = mb_detect_encoding($s, null, true)) ? \strlen($s) : mb_strlen($s, $encoding);
  317. }
  318. }