Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
37.50% |
3 / 8 |
|
50.00% |
2 / 4 |
CRAP | |
0.00% |
0 / 1 |
LanguageZh | |
37.50% |
3 / 8 |
|
50.00% |
2 / 4 |
7.91 | |
0.00% |
0 / 1 |
segmentForDiff | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
unsegmentForDiff | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getSearchIndexVariant | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
convertForSearchResult | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | /** |
3 | * This program is free software; you can redistribute it and/or modify |
4 | * it under the terms of the GNU General Public License as published by |
5 | * the Free Software Foundation; either version 2 of the License, or |
6 | * (at your option) any later version. |
7 | * |
8 | * This program is distributed in the hope that it will be useful, |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | * GNU General Public License for more details. |
12 | * |
13 | * You should have received a copy of the GNU General Public License along |
14 | * with this program; if not, write to the Free Software Foundation, Inc., |
15 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
16 | * http://www.gnu.org/copyleft/gpl.html |
17 | * |
18 | * @file |
19 | */ |
20 | |
21 | /** |
22 | * Chinese-specific code. |
23 | * |
24 | * This handles both Traditional and Simplified Chinese. |
25 | * Right now, we distinguish `zh_hans`, `zh_hant`, `zh_cn`, `zh_tw`, `zh_sg`, |
26 | * and `zh_hk`. |
27 | * |
28 | * @ingroup Languages |
29 | */ |
30 | class LanguageZh extends LanguageZh_hans { |
31 | /** |
32 | * Add a formfeed character between each non-ASCII character, so that |
33 | * "word-level" diffs will effectively operate on a character level. The FF |
34 | * characters are stripped out by unsegmentForDiff(). |
35 | * |
36 | * We use FF because it is the least used character that is matched by |
37 | * PCRE's \s class. |
38 | * |
39 | * In the unlikely event that an FF character appears in the input, it will |
40 | * be displayed in the diff as a replacement character. |
41 | * |
42 | * @param string $text |
43 | * @return string |
44 | */ |
45 | public function segmentForDiff( $text ) { |
46 | $text = str_replace( "\x0c", "\u{FFFD}", $text ); |
47 | return preg_replace( '/[\xc0-\xff][\x80-\xbf]*/', "\x0c$0", $text ); |
48 | } |
49 | |
50 | public function unsegmentForDiff( $text ) { |
51 | return str_replace( "\x0c", '', $text ); |
52 | } |
53 | |
54 | protected function getSearchIndexVariant() { |
55 | return 'zh-hans'; |
56 | } |
57 | |
58 | public function convertForSearchResult( $termsArray ) { |
59 | $terms = implode( '|', $termsArray ); |
60 | $terms = self::convertDoubleWidth( $terms ); |
61 | $terms = implode( '|', $this->getConverterInternal()->autoConvertToAllVariants( $terms ) ); |
62 | return array_unique( explode( '|', $terms ) ); |
63 | } |
64 | } |