Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 9 |
|
0.00% |
0 / 2 |
CRAP | |
0.00% |
0 / 1 |
LanguageJa | |
0.00% |
0 / 9 |
|
0.00% |
0 / 2 |
6 | |
0.00% |
0 / 1 |
segmentByWord | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
2 | |||
emphasize | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | /** |
3 | * This program is free software; you can redistribute it and/or modify |
4 | * it under the terms of the GNU General Public License as published by |
5 | * the Free Software Foundation; either version 2 of the License, or |
6 | * (at your option) any later version. |
7 | * |
8 | * This program is distributed in the hope that it will be useful, |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | * GNU General Public License for more details. |
12 | * |
13 | * You should have received a copy of the GNU General Public License along |
14 | * with this program; if not, write to the Free Software Foundation, Inc., |
15 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
16 | * http://www.gnu.org/copyleft/gpl.html |
17 | * |
18 | * @file |
19 | */ |
20 | |
21 | /** |
22 | * Japanese (日本語) |
23 | * |
24 | * @ingroup Languages |
25 | */ |
26 | class LanguageJa extends Language { |
27 | |
28 | public function segmentByWord( $string ) { |
29 | // Strip known punctuation? |
30 | // $s = preg_replace( '/\xe3\x80[\x80-\xbf]/', '', $s ); # U3000-303f |
31 | |
32 | // Space strings of like hiragana/katakana/kanji |
33 | $hiragana = '(?:\xe3(?:\x81[\x80-\xbf]|\x82[\x80-\x9f]))'; # U3040-309f |
34 | $katakana = '(?:\xe3(?:\x82[\xa0-\xbf]|\x83[\x80-\xbf]))'; # U30a0-30ff |
35 | $kanji = '(?:\xe3[\x88-\xbf][\x80-\xbf]' |
36 | . '|[\xe4-\xe8][\x80-\xbf]{2}' |
37 | . '|\xe9[\x80-\xa5][\x80-\xbf]' |
38 | . '|\xe9\xa6[\x80-\x99])'; |
39 | # U3200-9999 = \xe3\x88\x80-\xe9\xa6\x99 |
40 | $reg = "/({$hiragana}+|{$katakana}+|{$kanji}+)/"; |
41 | return self::insertSpace( $string, $reg ); |
42 | } |
43 | |
44 | /** |
45 | * Italic is not appropriate for Japanese script. |
46 | * Unfortunately, most browsers do not recognise this, and render `<em>` as italic. |
47 | * |
48 | * @param string $text |
49 | * @return string |
50 | */ |
51 | public function emphasize( $text ) { |
52 | return $text; |
53 | } |
54 | } |