Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
100.00% covered (success)
100.00%
42 / 42
100.00% covered (success)
100.00%
5 / 5
CRAP
100.00% covered (success)
100.00%
1 / 1
Validator
100.00% covered (success)
100.00%
42 / 42
100.00% covered (success)
100.00%
5 / 5
14
100.00% covered (success)
100.00%
1 / 1
 __construct
100.00% covered (success)
100.00%
6 / 6
100.00% covered (success)
100.00%
1 / 1
1
 removeDiacritics
100.00% covered (success)
100.00%
4 / 4
100.00% covered (success)
100.00%
1 / 1
2
 normalizeIPA
100.00% covered (success)
100.00%
23 / 23
100.00% covered (success)
100.00%
1 / 1
5
 stripIPA
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 validate
100.00% covered (success)
100.00%
7 / 7
100.00% covered (success)
100.00%
1 / 1
5
1<?php
2/**
3 * IPA Validator class
4 *
5 * @package   IPA Validator
6 * @author    TheresNoTime <sam@theresnotime.co.uk>
7 * @copyright 2022 TheresNoTime
8 * @license   https://opensource.org/licenses/GPL-3.0 GPL-2.0-or-later
9 */
10declare( strict_types=1 );
11namespace Wikimedia\IPAValidator;
12
13use Exception;
14
15/**
16 * IPA Validator class
17 *
18 * @package  IPA Validator
19 * @author   TheresNoTime <sam@theresnotime.co.uk>
20 * @license  https://opensource.org/licenses/GPL-3.0 GPL-2.0-or-later
21 */
22class Validator {
23
24    /** @var string */
25    protected $ipaRegex = <<<EOD
26/^[().a-z|æçðøħŋœǀ-ǃɐ-ɻɽɾʀ-ʄʈ-ʒʔʕʘʙʛ-ʝʟʡʢʰʲʷʼˀˈˌːˑ˞ˠˡˤ-˩̴̘̙̜̝̞̟̠̤̥̩̪̬̯̰̹̺̻̼̀́̂̃̄̆̈̊̋̌̏̽̚͜͡βθχ᷄᷅᷈‖‿ⁿⱱ]+$/ui
27EOD;
28
29    /** @var string */
30    protected $stripRegex = "/[\/\[\]]/ui";
31
32    /** @var string */
33    protected $diacriticsRegex = "/[\x{0300}-\x{036f}]/ui";
34
35    /** @var bool */
36    protected $strip;
37
38    /** @var bool */
39    protected $normalize;
40
41    /** @var bool */
42    protected $google;
43
44    /** @var string */
45    public $originalIPA;
46
47    /** @var string */
48    public $normalizedIPA;
49
50    /** @var bool */
51    public $valid;
52
53    /**
54     * Constructor
55     *
56     * @param string $ipa IPA to validate
57     * @param bool $strip Remove delimiters
58     * @param bool $normalize Normalize IPA
59     * @param bool $google Normalize IPA for Google TTS
60     */
61    public function __construct( string $ipa, bool $strip = true, bool $normalize = false, bool $google = false ) {
62        $this->originalIPA = $ipa;
63        $this->normalizedIPA = $ipa;
64        $this->strip = $strip;
65        $this->normalize = $normalize;
66        $this->google = $google;
67        $this->valid = $this->validate();
68    }
69
70    /**
71     * Remove diacritics from the IPA string
72     *
73     * @return string
74     */
75    private function removeDiacritics(): string {
76        if ( $this->strip ) {
77            $this->stripIPA();
78        }
79
80        $this->normalizedIPA = preg_replace( $this->diacriticsRegex, '', $this->normalizedIPA );
81
82        return $this->normalizedIPA;
83    }
84
85    /**
86     * Normalize the IPA string
87     *
88     * @return string
89     */
90    private function normalizeIPA(): string {
91        if ( $this->strip ) {
92            $this->stripIPA();
93        }
94
95        // Common normalizations
96        /** @var string[] */
97        $charmap = [
98            [ "'", 'ˈ' ],
99            [ ':', 'ː' ],
100            [ ',', 'ˌ' ],
101        ];
102        foreach ( $charmap as $char ) {
103            $this->normalizedIPA = str_replace( $char[0], $char[1], $this->normalizedIPA );
104        }
105
106        /*
107         * I'm going to guess Google's normalization is weird
108         * and different from what anyone else will want.
109         */
110        if ( $this->google ) {
111            /** @var string[] */
112            $charmap = [
113                [ '(', '' ],
114                [ ')', '' ],
115                // 207F
116                [ 'ⁿ', 'n' ],
117                // 02B0
118                [ 'ʰ', 'h' ],
119                // 026B
120                [ 'ɫ', 'l' ],
121                // 02E1
122                [ 'ˡ', 'l' ],
123                // 02B2
124                [ 'ʲ', 'j' ],
125            ];
126            foreach ( $charmap as $char ) {
127                $this->normalizedIPA = str_replace( $char[0], $char[1], $this->normalizedIPA );
128            }
129            $this->removeDiacritics();
130        }
131
132        return $this->normalizedIPA;
133    }
134
135    /**
136     * Strip delimiters from the IPA string
137     *
138     * @return string
139     */
140    private function stripIPA(): string {
141        $this->normalizedIPA = preg_replace( $this->stripRegex, '', $this->normalizedIPA );
142
143        return $this->normalizedIPA;
144    }
145
146    /**
147     * Validate the IPA string
148     *
149     * @return bool
150     */
151    private function validate(): bool {
152        if ( $this->strip ) {
153            $this->stripIPA();
154        }
155
156        if ( $this->normalize ) {
157            $this->normalizeIPA();
158        }
159
160        if ( $this->google && !$this->normalize ) {
161            throw new Exception( 'Google normalization being enabled also requires normalization to also be enabled' );
162        }
163
164        return boolval( preg_match( $this->ipaRegex, $this->normalizedIPA ) );
165    }
166}