Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
96.77% |
60 / 62 |
|
84.62% |
11 / 13 |
CRAP | |
0.00% |
0 / 1 |
WikitextConversions | |
96.77% |
60 / 62 |
|
84.62% |
11 / 13 |
28 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
24 / 24 |
|
100.00% |
1 / 1 |
9 | |||
swapHeading | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
isTemplateGood | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
hasGoodTemplates | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
isTemplateBad | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
isCategoryBad | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
isObsoleteTemplate | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
swapTemplate | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
getTemplateParameters | |
92.86% |
13 / 14 |
|
0.00% |
0 / 1 |
5.01 | |||
getRequiredTemplateParameters | |
88.89% |
8 / 9 |
|
0.00% |
0 / 1 |
4.02 | |||
lowercasePageName | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
normalizePageName | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
removeNamespace | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 |
1 | <?php |
2 | |
3 | namespace FileImporter\Data; |
4 | |
5 | /** |
6 | * Class holding validation and cleanup rules for the file description wikitext. This class is not |
7 | * aware of the source of these rules. They can be extracted from CommonsHelper2-compatible |
8 | * configuration files or other, yet to be defined sources. |
9 | * |
10 | * @license GPL-2.0-or-later |
11 | */ |
12 | class WikitextConversions { |
13 | |
14 | public const REQUIRED_TEMPLATES = 'requiredTemplates'; |
15 | public const FORBIDDEN_TEMPLATES = 'forbiddenTemplates'; |
16 | public const OBSOLETE_TEMPLATES = 'obsoleteTemplates'; |
17 | public const TEMPLATE_TRANSFORMATIONS = 'templateTransformations'; |
18 | public const FORBIDDEN_CATEGORIES = 'forbiddenCategories'; |
19 | public const HEADING_REPLACEMENTS = 'headingReplacements'; |
20 | |
21 | /** @var string[] */ |
22 | private array $headingReplacements = []; |
23 | /** @var true[] A string => true map for performance reasons */ |
24 | private array $goodTemplates = []; |
25 | /** @var true[] A string => true map for performance reasons */ |
26 | private array $badTemplates = []; |
27 | /** @var true[] A string => true map for performance reasons */ |
28 | private array $badCategories = []; |
29 | /** @var true[] A string => true map for performance reasons */ |
30 | private array $obsoleteTemplates = []; |
31 | /** @var array[] */ |
32 | private array $transferTemplates = []; |
33 | |
34 | /** |
35 | * @param array[] $conversions A nested array structure in the following format: |
36 | * [ |
37 | * self::REQUIRED_TEMPLATES => string[] List of case-insensitive page names without |
38 | * namespace prefix |
39 | * self::FORBIDDEN_TEMPLATES => string[] List of case-insensitive page names without |
40 | * namespace prefix |
41 | * self::OBSOLETE_TEMPLATES => string[] List of case-insensitive page names without |
42 | * namespace prefix |
43 | * self::TEMPLATE_TRANSFORMATIONS => array[] List mapping source template names without |
44 | * namespace prefix to replacement rules in the following format: |
45 | * string $sourceTemplate => [ |
46 | * 'targetTemplate' => string |
47 | * 'parameters' => [ |
48 | * string $targetParameter => [ |
49 | * 'addIfMissing' => bool |
50 | * 'addLanguageTemplate' => bool |
51 | * 'sourceParameters' => string[]|string |
52 | * ], |
53 | * … |
54 | * ] |
55 | * ], |
56 | * … |
57 | * self::FORBIDDEN_CATEGORIES => string[] List of case-insensitive page names without |
58 | * namespace prefix |
59 | * self::HEADING_REPLACEMENTS => string[] Straight 1:1 mapping of source to target headings |
60 | * without any `==` syntax |
61 | * ] |
62 | * |
63 | * @throws \InvalidArgumentException if the input format misses expected fields. This should be |
64 | * unreachable, as the only provider is the CommonsHelperConfigParser. |
65 | */ |
66 | public function __construct( array $conversions ) { |
67 | $goodTemplates = $conversions[self::REQUIRED_TEMPLATES] ?? []; |
68 | $badTemplates = $conversions[self::FORBIDDEN_TEMPLATES] ?? []; |
69 | $obsoleteTemplates = $conversions[self::OBSOLETE_TEMPLATES] ?? []; |
70 | $transferTemplates = $conversions[self::TEMPLATE_TRANSFORMATIONS] ?? []; |
71 | $badCategories = $conversions[self::FORBIDDEN_CATEGORIES] ?? []; |
72 | $this->headingReplacements = $conversions[self::HEADING_REPLACEMENTS] ?? []; |
73 | |
74 | foreach ( $goodTemplates as $pageName ) { |
75 | $this->goodTemplates[$this->lowercasePageName( $pageName )] = true; |
76 | } |
77 | |
78 | foreach ( $badTemplates as $pageName ) { |
79 | $this->badTemplates[$this->lowercasePageName( $pageName )] = true; |
80 | } |
81 | |
82 | foreach ( $badCategories as $pageName ) { |
83 | $this->badCategories[$this->lowercasePageName( $pageName )] = true; |
84 | } |
85 | |
86 | foreach ( $obsoleteTemplates as $pageName ) { |
87 | $this->obsoleteTemplates[$this->lowercasePageName( $pageName )] = true; |
88 | } |
89 | |
90 | foreach ( $transferTemplates as $from => $to ) { |
91 | // TODO: Accepts strings for backwards-compatibility; remove if not needed any more |
92 | if ( is_string( $to ) ) { |
93 | $to = [ 'targetTemplate' => $to, 'parameters' => [] ]; |
94 | } |
95 | |
96 | if ( empty( $to['targetTemplate'] ) ) { |
97 | throw new \InvalidArgumentException( "$from transfer rule misses targetTemplate" ); |
98 | } |
99 | if ( !isset( $to['parameters'] ) ) { |
100 | throw new \InvalidArgumentException( "$from transfer rule misses parameters" ); |
101 | } |
102 | |
103 | $from = $this->lowercasePageName( $from ); |
104 | $to['targetTemplate'] = $this->normalizePageName( $to['targetTemplate'] ); |
105 | $this->transferTemplates[$from] = $to; |
106 | } |
107 | } |
108 | |
109 | public function swapHeading( string $heading ): string { |
110 | return $this->headingReplacements[$heading] ?? $heading; |
111 | } |
112 | |
113 | /** |
114 | * @param string $pageName Case-insensitive page name. The namespace is ignored. Titles like |
115 | * "Template:A" and "User:A" are considered equal. |
116 | */ |
117 | public function isTemplateGood( string $pageName ): bool { |
118 | $pageName = $this->removeNamespace( $pageName ); |
119 | return array_key_exists( $this->lowercasePageName( $pageName ), $this->goodTemplates ); |
120 | } |
121 | |
122 | public function hasGoodTemplates(): bool { |
123 | return $this->goodTemplates !== []; |
124 | } |
125 | |
126 | /** |
127 | * @param string $pageName Case-insensitive page name. The namespace is ignored. Titles like |
128 | * "Template:A" and "User:A" are considered equal. |
129 | */ |
130 | public function isTemplateBad( string $pageName ): bool { |
131 | $pageName = $this->removeNamespace( $pageName ); |
132 | return array_key_exists( $this->lowercasePageName( $pageName ), $this->badTemplates ); |
133 | } |
134 | |
135 | /** |
136 | * @param string $pageName Case-insensitive page name. The namespace is ignored. Titles like |
137 | * "Category:A" and "User:A" are considered equal. |
138 | */ |
139 | public function isCategoryBad( string $pageName ): bool { |
140 | $pageName = $this->removeNamespace( $pageName ); |
141 | return array_key_exists( $this->lowercasePageName( $pageName ), $this->badCategories ); |
142 | } |
143 | |
144 | /** |
145 | * @param string $pageName Case-insensitive page name. Prefixes are significant. |
146 | */ |
147 | public function isObsoleteTemplate( string $pageName ): bool { |
148 | return array_key_exists( $this->lowercasePageName( $pageName ), $this->obsoleteTemplates ); |
149 | } |
150 | |
151 | /** |
152 | * @param string $templateName Case-insensitive page name. Prefixes are significant. |
153 | */ |
154 | public function swapTemplate( string $templateName ): ?string { |
155 | $templateName = $this->lowercasePageName( $templateName ); |
156 | return $this->transferTemplates[$templateName]['targetTemplate'] ?? null; |
157 | } |
158 | |
159 | /** |
160 | * @param string $templateName Case-insensitive page name. Prefixes are significant. |
161 | * |
162 | * @return array[] Array mapping source to target parameters: |
163 | * [ |
164 | * string $source => [ |
165 | * 'target' => string Target parameter name |
166 | * 'addLanguageTemplate' => bool Whether or not to add a template like {{de|…}} |
167 | * ], |
168 | * … |
169 | * ] |
170 | */ |
171 | public function getTemplateParameters( string $templateName ): array { |
172 | $templateName = $this->lowercasePageName( $templateName ); |
173 | if ( !isset( $this->transferTemplates[$templateName] ) ) { |
174 | return []; |
175 | } |
176 | |
177 | $replacements = []; |
178 | foreach ( $this->transferTemplates[$templateName]['parameters'] as $targetParameter => $opt ) { |
179 | $sourceParameters = (array)( $opt['sourceParameters'] ?? [] ); |
180 | $addLanguageTemplate = (bool)( $opt['addLanguageTemplate'] ?? false ); |
181 | |
182 | foreach ( $sourceParameters as $sourceParameter ) { |
183 | if ( $sourceParameter !== '' ) { |
184 | $replacements[$sourceParameter] = [ |
185 | 'target' => $targetParameter, |
186 | 'addLanguageTemplate' => $addLanguageTemplate |
187 | ]; |
188 | } |
189 | } |
190 | } |
191 | return $replacements; |
192 | } |
193 | |
194 | /** |
195 | * @param string $templateName Case-insensitive page name. Prefixes are significant. |
196 | * |
197 | * @return string[] Array mapping required target parameter names to static string values. |
198 | */ |
199 | public function getRequiredTemplateParameters( string $templateName ): array { |
200 | $templateName = $this->lowercasePageName( $templateName ); |
201 | if ( !isset( $this->transferTemplates[$templateName] ) ) { |
202 | return []; |
203 | } |
204 | |
205 | $additions = []; |
206 | foreach ( $this->transferTemplates[$templateName]['parameters'] as $targetParameter => $opt ) { |
207 | $addIfMissing = $opt['addIfMissing'] ?? false; |
208 | if ( $addIfMissing ) { |
209 | $additions[$targetParameter] = $opt['value'] ?? ''; |
210 | } |
211 | } |
212 | return $additions; |
213 | } |
214 | |
215 | private function lowercasePageName( string $pageName ): string { |
216 | return mb_convert_case( $this->normalizePageName( $pageName ), MB_CASE_LOWER ); |
217 | } |
218 | |
219 | private function normalizePageName( string $pageName ): string { |
220 | return trim( str_replace( '_', ' ', $pageName ) ); |
221 | } |
222 | |
223 | private function removeNamespace( string $title ): string { |
224 | $splitTitle = explode( ':', $title, 2 ); |
225 | return end( $splitTitle ); |
226 | } |
227 | |
228 | } |