436 lines
17KB

  1. <?php
  2. /**
  3. * @link http://www.yiiframework.com/
  4. * @copyright Copyright (c) 2008 Yii Software LLC
  5. * @license http://www.yiiframework.com/license/
  6. */
  7. namespace yii\i18n;
  8. use Yii;
  9. use yii\base\Component;
  10. use yii\base\NotSupportedException;
  11. /**
  12. * MessageFormatter allows formatting messages via [ICU message format](http://userguide.icu-project.org/formatparse/messages)
  13. *
  14. * This class enhances the message formatter class provided by the PHP intl extension.
  15. *
  16. * The following enhancements are provided:
  17. *
  18. * - It accepts named arguments and mixed numeric and named arguments.
  19. * - Issues no error when an insufficient number of arguments have been provided. Instead, the placeholders will not be
  20. * substituted.
  21. * - Fixes PHP 5.5 weird placeholder replacement in case no arguments are provided at all (https://bugs.php.net/bug.php?id=65920).
  22. * - Offers limited support for message formatting in case PHP intl extension is not installed.
  23. * However it is highly recommended that you install [PHP intl extension](http://php.net/manual/en/book.intl.php) if you want
  24. * to use MessageFormatter features.
  25. *
  26. * The fallback implementation only supports the following message formats:
  27. * - plural formatting for english ('one' and 'other' selectors)
  28. * - select format
  29. * - simple parameters
  30. * - integer number parameters
  31. *
  32. * The fallback implementation does NOT support the ['apostrophe-friendly' syntax](http://www.php.net/manual/en/messageformatter.formatmessage.php).
  33. * Also messages that are working with the fallback implementation are not necessarily compatible with the
  34. * PHP intl MessageFormatter so do not rely on the fallback if you are able to install intl extension somehow.
  35. *
  36. * @property string $errorCode Code of the last error. This property is read-only.
  37. * @property string $errorMessage Description of the last error. This property is read-only.
  38. *
  39. * @author Alexander Makarov <sam@rmcreative.ru>
  40. * @author Carsten Brandt <mail@cebe.cc>
  41. * @since 2.0
  42. */
  43. class MessageFormatter extends Component
  44. {
  45. private $_errorCode = 0;
  46. private $_errorMessage = '';
  47. /**
  48. * Get the error code from the last operation
  49. * @link http://php.net/manual/en/messageformatter.geterrorcode.php
  50. * @return string Code of the last error.
  51. */
  52. public function getErrorCode()
  53. {
  54. return $this->_errorCode;
  55. }
  56. /**
  57. * Get the error text from the last operation
  58. * @link http://php.net/manual/en/messageformatter.geterrormessage.php
  59. * @return string Description of the last error.
  60. */
  61. public function getErrorMessage()
  62. {
  63. return $this->_errorMessage;
  64. }
  65. /**
  66. * Formats a message via [ICU message format](http://userguide.icu-project.org/formatparse/messages)
  67. *
  68. * It uses the PHP intl extension's [MessageFormatter](http://www.php.net/manual/en/class.messageformatter.php)
  69. * and works around some issues.
  70. * If PHP intl is not installed a fallback will be used that supports a subset of the ICU message format.
  71. *
  72. * @param string $pattern The pattern string to insert parameters into.
  73. * @param array $params The array of name value pairs to insert into the format string.
  74. * @param string $language The locale to use for formatting locale-dependent parts
  75. * @return string|boolean The formatted pattern string or `FALSE` if an error occurred
  76. */
  77. public function format($pattern, $params, $language)
  78. {
  79. $this->_errorCode = 0;
  80. $this->_errorMessage = '';
  81. if ($params === []) {
  82. return $pattern;
  83. }
  84. if (!class_exists('MessageFormatter', false)) {
  85. return $this->fallbackFormat($pattern, $params, $language);
  86. }
  87. // replace named arguments (https://github.com/yiisoft/yii2/issues/9678)
  88. $newParams = [];
  89. $pattern = $this->replaceNamedArguments($pattern, $params, $newParams);
  90. $params = $newParams;
  91. try {
  92. $formatter = new \MessageFormatter($language, $pattern);
  93. if ($formatter === null) {
  94. // formatter may be null in PHP 5.x
  95. $this->_errorCode = intl_get_error_code();
  96. $this->_errorMessage = 'Message pattern is invalid: ' . intl_get_error_message();
  97. return false;
  98. }
  99. } catch (\IntlException $e) {
  100. // IntlException is thrown since PHP 7
  101. $this->_errorCode = $e->getCode();
  102. $this->_errorMessage = 'Message pattern is invalid: ' . $e->getMessage();
  103. return false;
  104. } catch (\Exception $e) {
  105. // Exception is thrown by HHVM
  106. $this->_errorCode = $e->getCode();
  107. $this->_errorMessage = 'Message pattern is invalid: ' . $e->getMessage();
  108. return false;
  109. }
  110. $result = $formatter->format($params);
  111. if ($result === false) {
  112. $this->_errorCode = $formatter->getErrorCode();
  113. $this->_errorMessage = $formatter->getErrorMessage();
  114. return false;
  115. } else {
  116. return $result;
  117. }
  118. }
  119. /**
  120. * Parses an input string according to an [ICU message format](http://userguide.icu-project.org/formatparse/messages) pattern.
  121. *
  122. * It uses the PHP intl extension's [MessageFormatter::parse()](http://www.php.net/manual/en/messageformatter.parsemessage.php)
  123. * and adds support for named arguments.
  124. * Usage of this method requires PHP intl extension to be installed.
  125. *
  126. * @param string $pattern The pattern to use for parsing the message.
  127. * @param string $message The message to parse, conforming to the pattern.
  128. * @param string $language The locale to use for formatting locale-dependent parts
  129. * @return array|boolean An array containing items extracted, or `FALSE` on error.
  130. * @throws \yii\base\NotSupportedException when PHP intl extension is not installed.
  131. */
  132. public function parse($pattern, $message, $language)
  133. {
  134. $this->_errorCode = 0;
  135. $this->_errorMessage = '';
  136. if (!class_exists('MessageFormatter', false)) {
  137. throw new NotSupportedException('You have to install PHP intl extension to use this feature.');
  138. }
  139. // replace named arguments
  140. if (($tokens = self::tokenizePattern($pattern)) === false) {
  141. $this->_errorCode = -1;
  142. $this->_errorMessage = 'Message pattern is invalid.';
  143. return false;
  144. }
  145. $map = [];
  146. foreach ($tokens as $i => $token) {
  147. if (is_array($token)) {
  148. $param = trim($token[0]);
  149. if (!isset($map[$param])) {
  150. $map[$param] = count($map);
  151. }
  152. $token[0] = $map[$param];
  153. $tokens[$i] = '{' . implode(',', $token) . '}';
  154. }
  155. }
  156. $pattern = implode('', $tokens);
  157. $map = array_flip($map);
  158. $formatter = new \MessageFormatter($language, $pattern);
  159. if ($formatter === null) {
  160. $this->_errorCode = -1;
  161. $this->_errorMessage = 'Message pattern is invalid.';
  162. return false;
  163. }
  164. $result = $formatter->parse($message);
  165. if ($result === false) {
  166. $this->_errorCode = $formatter->getErrorCode();
  167. $this->_errorMessage = $formatter->getErrorMessage();
  168. return false;
  169. } else {
  170. $values = [];
  171. foreach ($result as $key => $value) {
  172. $values[$map[$key]] = $value;
  173. }
  174. return $values;
  175. }
  176. }
  177. /**
  178. * Replace named placeholders with numeric placeholders and quote unused.
  179. *
  180. * @param string $pattern The pattern string to replace things into.
  181. * @param array $givenParams The array of values to insert into the format string.
  182. * @param array $resultingParams Modified array of parameters.
  183. * @param array $map
  184. * @return string The pattern string with placeholders replaced.
  185. */
  186. private function replaceNamedArguments($pattern, $givenParams, &$resultingParams = [], &$map = [])
  187. {
  188. if (($tokens = self::tokenizePattern($pattern)) === false) {
  189. return false;
  190. }
  191. foreach ($tokens as $i => $token) {
  192. if (!is_array($token)) {
  193. continue;
  194. }
  195. $param = trim($token[0]);
  196. if (isset($givenParams[$param])) {
  197. // if param is given, replace it with a number
  198. if (!isset($map[$param])) {
  199. $map[$param] = count($map);
  200. // make sure only used params are passed to format method
  201. $resultingParams[$map[$param]] = $givenParams[$param];
  202. }
  203. $token[0] = $map[$param];
  204. $quote = '';
  205. } else {
  206. // quote unused token
  207. $quote = "'";
  208. }
  209. $type = isset($token[1]) ? trim($token[1]) : 'none';
  210. // replace plural and select format recursively
  211. if ($type === 'plural' || $type === 'select') {
  212. if (!isset($token[2])) {
  213. return false;
  214. }
  215. if (($subtokens = self::tokenizePattern($token[2])) === false) {
  216. return false;
  217. }
  218. $c = count($subtokens);
  219. for ($k = 0; $k + 1 < $c; $k++) {
  220. if (is_array($subtokens[$k]) || !is_array($subtokens[++$k])) {
  221. return false;
  222. }
  223. $subpattern = $this->replaceNamedArguments(implode(',', $subtokens[$k]), $givenParams, $resultingParams, $map);
  224. $subtokens[$k] = $quote . '{' . $quote . $subpattern . $quote . '}' . $quote;
  225. }
  226. $token[2] = implode('', $subtokens);
  227. }
  228. $tokens[$i] = $quote . '{' . $quote . implode(',', $token) . $quote . '}' . $quote;
  229. }
  230. return implode('', $tokens);
  231. }
  232. /**
  233. * Fallback implementation for MessageFormatter::formatMessage
  234. * @param string $pattern The pattern string to insert things into.
  235. * @param array $args The array of values to insert into the format string
  236. * @param string $locale The locale to use for formatting locale-dependent parts
  237. * @return string|boolean The formatted pattern string or `FALSE` if an error occurred
  238. */
  239. protected function fallbackFormat($pattern, $args, $locale)
  240. {
  241. if (($tokens = self::tokenizePattern($pattern)) === false) {
  242. $this->_errorCode = -1;
  243. $this->_errorMessage = 'Message pattern is invalid.';
  244. return false;
  245. }
  246. foreach ($tokens as $i => $token) {
  247. if (is_array($token)) {
  248. if (($tokens[$i] = $this->parseToken($token, $args, $locale)) === false) {
  249. $this->_errorCode = -1;
  250. $this->_errorMessage = 'Message pattern is invalid.';
  251. return false;
  252. }
  253. }
  254. }
  255. return implode('', $tokens);
  256. }
  257. /**
  258. * Tokenizes a pattern by separating normal text from replaceable patterns
  259. * @param string $pattern patter to tokenize
  260. * @return array|boolean array of tokens or false on failure
  261. */
  262. private static function tokenizePattern($pattern)
  263. {
  264. $charset = Yii::$app ? Yii::$app->charset : 'UTF-8';
  265. $depth = 1;
  266. if (($start = $pos = mb_strpos($pattern, '{', 0, $charset)) === false) {
  267. return [$pattern];
  268. }
  269. $tokens = [mb_substr($pattern, 0, $pos, $charset)];
  270. while (true) {
  271. $open = mb_strpos($pattern, '{', $pos + 1, $charset);
  272. $close = mb_strpos($pattern, '}', $pos + 1, $charset);
  273. if ($open === false && $close === false) {
  274. break;
  275. }
  276. if ($open === false) {
  277. $open = mb_strlen($pattern, $charset);
  278. }
  279. if ($close > $open) {
  280. $depth++;
  281. $pos = $open;
  282. } else {
  283. $depth--;
  284. $pos = $close;
  285. }
  286. if ($depth === 0) {
  287. $tokens[] = explode(',', mb_substr($pattern, $start + 1, $pos - $start - 1, $charset), 3);
  288. $start = $pos + 1;
  289. $tokens[] = mb_substr($pattern, $start, $open - $start, $charset);
  290. $start = $open;
  291. }
  292. }
  293. if ($depth !== 0) {
  294. return false;
  295. }
  296. return $tokens;
  297. }
  298. /**
  299. * Parses a token
  300. * @param array $token the token to parse
  301. * @param array $args arguments to replace
  302. * @param string $locale the locale
  303. * @return boolean|string parsed token or false on failure
  304. * @throws \yii\base\NotSupportedException when unsupported formatting is used.
  305. */
  306. private function parseToken($token, $args, $locale)
  307. {
  308. // parsing pattern based on ICU grammar:
  309. // http://icu-project.org/apiref/icu4c/classMessageFormat.html#details
  310. $charset = Yii::$app ? Yii::$app->charset : 'UTF-8';
  311. $param = trim($token[0]);
  312. if (isset($args[$param])) {
  313. $arg = $args[$param];
  314. } else {
  315. return '{' . implode(',', $token) . '}';
  316. }
  317. $type = isset($token[1]) ? trim($token[1]) : 'none';
  318. switch ($type) {
  319. case 'date':
  320. case 'time':
  321. case 'spellout':
  322. case 'ordinal':
  323. case 'duration':
  324. case 'choice':
  325. case 'selectordinal':
  326. throw new NotSupportedException("Message format '$type' is not supported. You have to install PHP intl extension to use this feature.");
  327. case 'number':
  328. $format = isset($token[2]) ? trim($token[2]) : null;
  329. if (is_numeric($arg) && ($format === null || $format === 'integer')) {
  330. $number = number_format($arg);
  331. if ($format === null && ($pos = strpos($arg, '.')) !== false) {
  332. // add decimals with unknown length
  333. $number .= '.' . substr($arg, $pos + 1);
  334. }
  335. return $number;
  336. }
  337. throw new NotSupportedException("Message format 'number' is only supported for integer values. You have to install PHP intl extension to use this feature.");
  338. case 'none':
  339. return $arg;
  340. case 'select':
  341. /* http://icu-project.org/apiref/icu4c/classicu_1_1SelectFormat.html
  342. selectStyle = (selector '{' message '}')+
  343. */
  344. if (!isset($token[2])) {
  345. return false;
  346. }
  347. $select = self::tokenizePattern($token[2]);
  348. $c = count($select);
  349. $message = false;
  350. for ($i = 0; $i + 1 < $c; $i++) {
  351. if (is_array($select[$i]) || !is_array($select[$i + 1])) {
  352. return false;
  353. }
  354. $selector = trim($select[$i++]);
  355. if ($message === false && $selector === 'other' || $selector == $arg) {
  356. $message = implode(',', $select[$i]);
  357. }
  358. }
  359. if ($message !== false) {
  360. return $this->fallbackFormat($message, $args, $locale);
  361. }
  362. break;
  363. case 'plural':
  364. /* http://icu-project.org/apiref/icu4c/classicu_1_1PluralFormat.html
  365. pluralStyle = [offsetValue] (selector '{' message '}')+
  366. offsetValue = "offset:" number
  367. selector = explicitValue | keyword
  368. explicitValue = '=' number // adjacent, no white space in between
  369. keyword = [^[[:Pattern_Syntax:][:Pattern_White_Space:]]]+
  370. message: see MessageFormat
  371. */
  372. if (!isset($token[2])) {
  373. return false;
  374. }
  375. $plural = self::tokenizePattern($token[2]);
  376. $c = count($plural);
  377. $message = false;
  378. $offset = 0;
  379. for ($i = 0; $i + 1 < $c; $i++) {
  380. if (is_array($plural[$i]) || !is_array($plural[$i + 1])) {
  381. return false;
  382. }
  383. $selector = trim($plural[$i++]);
  384. if ($i == 1 && strncmp($selector, 'offset:', 7) === 0) {
  385. $offset = (int) trim(mb_substr($selector, 7, ($pos = mb_strpos(str_replace(["\n", "\r", "\t"], ' ', $selector), ' ', 7, $charset)) - 7, $charset));
  386. $selector = trim(mb_substr($selector, $pos + 1, mb_strlen($selector, $charset), $charset));
  387. }
  388. if ($message === false && $selector === 'other' ||
  389. $selector[0] === '=' && (int) mb_substr($selector, 1, mb_strlen($selector, $charset), $charset) === $arg ||
  390. $selector === 'one' && $arg - $offset == 1
  391. ) {
  392. $message = implode(',', str_replace('#', $arg - $offset, $plural[$i]));
  393. }
  394. }
  395. if ($message !== false) {
  396. return $this->fallbackFormat($message, $args, $locale);
  397. }
  398. break;
  399. }
  400. return false;
  401. }
  402. }