Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
98.75% |
79 / 80 |
|
80.00% |
4 / 5 |
CRAP | |
0.00% |
0 / 1 |
CommonsHelperConfigParser | |
98.75% |
79 / 80 |
|
80.00% |
4 / 5 |
17 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
getWikitextConversions | |
100.00% |
29 / 29 |
|
100.00% |
1 / 1 |
1 | |||
grepSection | |
92.31% |
12 / 13 |
|
0.00% |
0 / 1 |
3.00 | |||
getItemList | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
parseTransferList | |
100.00% |
34 / 34 |
|
100.00% |
1 / 1 |
11 |
1 | <?php |
2 | |
3 | namespace FileImporter\Services\Wikitext; |
4 | |
5 | use FileImporter\Data\WikitextConversions; |
6 | use FileImporter\Exceptions\ImportException; |
7 | use FileImporter\Exceptions\LocalizedImportException; |
8 | |
9 | /** |
10 | * @license GPL-2.0-or-later |
11 | */ |
12 | class CommonsHelperConfigParser { |
13 | |
14 | /** @var string */ |
15 | private $commonsHelperConfigUrl; |
16 | /** @var string */ |
17 | private $wikitext; |
18 | |
19 | /** |
20 | * @param string $commonsHelperConfigUrl |
21 | * @param string $wikitext |
22 | */ |
23 | public function __construct( $commonsHelperConfigUrl, string $wikitext ) { |
24 | $this->commonsHelperConfigUrl = $commonsHelperConfigUrl; |
25 | $this->wikitext = $wikitext; |
26 | } |
27 | |
28 | /** |
29 | * @throws ImportException e.g. when the provided wikitext is incomplete |
30 | */ |
31 | public function getWikitextConversions(): WikitextConversions { |
32 | // HTML comments must be removed first |
33 | $wikitext = preg_replace( '/<!--.*?-->/s', '', $this->wikitext ); |
34 | |
35 | // Scan for all level-2 headings first, relevant for properly prioritized error reporting |
36 | $categorySection = $this->grepSection( $wikitext, '== Categories ==', 'Categories' ); |
37 | $templateSection = $this->grepSection( $wikitext, '== Templates ==', 'Templates' ); |
38 | $informationSection = $this->grepSection( $wikitext, '== Information ==', 'Information' ); |
39 | |
40 | $badCategorySection = $this->grepSection( $categorySection, '=== Bad ===', |
41 | 'Categories/Bad' ); |
42 | $goodTemplateSection = $this->grepSection( $templateSection, '=== Good ===', |
43 | 'Templates/Good' ); |
44 | $badTemplateSection = $this->grepSection( $templateSection, '=== Bad ===', |
45 | 'Templates/Bad' ); |
46 | $obsoleteTemplates = $this->grepSection( $templateSection, '=== Remove ===', |
47 | 'Templates/Remove' ); |
48 | $transferTemplateSection = $this->grepSection( $templateSection, '=== Transfer ===', |
49 | 'Templates/Transfer' ); |
50 | $descriptionSection = $this->grepSection( $informationSection, '=== Description ===', |
51 | 'Information/Description' ); |
52 | $licensingSection = $this->grepSection( $informationSection, '=== Licensing ===', |
53 | 'Information/Licensing' ); |
54 | |
55 | return new WikitextConversions( [ |
56 | WikitextConversions::REQUIRED_TEMPLATES => $this->getItemList( $goodTemplateSection ), |
57 | WikitextConversions::FORBIDDEN_TEMPLATES => $this->getItemList( $badTemplateSection ), |
58 | WikitextConversions::OBSOLETE_TEMPLATES => $this->getItemList( $obsoleteTemplates ), |
59 | WikitextConversions::TEMPLATE_TRANSFORMATIONS => |
60 | $this->parseTransferList( $transferTemplateSection ), |
61 | WikitextConversions::FORBIDDEN_CATEGORIES => $this->getItemList( $badCategorySection ), |
62 | WikitextConversions::HEADING_REPLACEMENTS => |
63 | array_fill_keys( $this->getItemList( $descriptionSection ), '{{int:filedesc}}' ) + |
64 | array_fill_keys( $this->getItemList( $licensingSection ), '{{int:license-header}}' ) |
65 | ] ); |
66 | } |
67 | |
68 | /** |
69 | * @throws ImportException if the section could not be found |
70 | */ |
71 | private function grepSection( string $wikitext, string $header, string $sectionName ): string { |
72 | $level = strpos( $header, '= ' ); |
73 | if ( $level === false ) { |
74 | throw new \InvalidArgumentException( '$header must follow this format: "== … =="' ); |
75 | } |
76 | $level++; |
77 | |
78 | // NOTE: This relaxes the parser to a degree that accepts "== Foobar ==" when |
79 | // "== Foo bar ==" is requested. |
80 | $headerRegex = str_replace( ' ', '\h*', preg_quote( $header, '/' ) ); |
81 | |
82 | // Extract a section from the given wikitext blob. Start from the given 2nd- or 3rd-level |
83 | // header. Stop at the same or a higher level (less equal signs), or at the end of the text. |
84 | $regex = '/^' . $headerRegex . '\h*$(.*?)(?=^={1,' . $level . '}[^=]|\Z)/ms'; |
85 | |
86 | if ( !preg_match( $regex, $wikitext, $matches ) ) { |
87 | throw new LocalizedImportException( [ |
88 | 'fileimporter-commonshelper-parsing-failed', |
89 | $this->commonsHelperConfigUrl, |
90 | $sectionName |
91 | ] ); |
92 | } |
93 | |
94 | return $matches[1]; |
95 | } |
96 | |
97 | /** |
98 | * @return string[] |
99 | */ |
100 | private function getItemList( string $wikitext ): array { |
101 | // Extract non-empty first-level list elements, exclude 2nd and deeper levels |
102 | preg_match_all( '/^\*\h*([^\s*#:;].*?)\h*$/mu', $wikitext, $matches ); |
103 | return $matches[1]; |
104 | } |
105 | |
106 | /** |
107 | * @return array[] |
108 | */ |
109 | private function parseTransferList( string $wikitext ): array { |
110 | $transfers = []; |
111 | |
112 | preg_match_all( |
113 | '/^;\h*+([^:|\n]+)\n?:\h*+([^|\n]+)(.*)/mu', |
114 | $wikitext, |
115 | $matches, |
116 | PREG_SET_ORDER |
117 | ); |
118 | foreach ( $matches as [ , $sourceTemplate, $targetTemplate, $paramPatternsString ] ) { |
119 | $parameterTransfers = []; |
120 | |
121 | $paramRules = preg_split( '/\s*\|+\s*/', $paramPatternsString, -1, PREG_SPLIT_NO_EMPTY ); |
122 | foreach ( $paramRules as $paramRule ) { |
123 | $parts = preg_split( '/\s*=\s*/', $paramRule, 2 ); |
124 | if ( count( $parts ) !== 2 ) { |
125 | continue; |
126 | } |
127 | |
128 | [ $targetParam, $sourceParam ] = $parts; |
129 | |
130 | // We don't want CommonsHelper's placeholders "%AUTHOR%" and "%TRANSFERUSER%" to |
131 | // show up as text values. Investigation and decision are documented in T198609. |
132 | if ( str_contains( $sourceParam, '%' ) ) { |
133 | continue; |
134 | } |
135 | |
136 | preg_match( '/^(?:(\+)|(@))?(.*)/', $targetParam, $matches ); |
137 | [ , $addIfMissing, $addLanguageTemplate, $targetParam ] = $matches; |
138 | |
139 | if ( !isset( $parameterTransfers[$targetParam] ) ) { |
140 | $parameterTransfers[$targetParam] = []; |
141 | } |
142 | |
143 | $opt = &$parameterTransfers[$targetParam]; |
144 | |
145 | if ( !isset( $opt['addIfMissing'] ) || $addIfMissing ) { |
146 | $opt['addIfMissing'] = (bool)$addIfMissing; |
147 | } |
148 | if ( !isset( $opt['addLanguageTemplate'] ) || $addLanguageTemplate ) { |
149 | $opt['addLanguageTemplate'] = (bool)$addLanguageTemplate; |
150 | } |
151 | |
152 | if ( $addIfMissing ) { |
153 | // It doesn't make sense to have multiple default values, only keep the last |
154 | $opt['value'] = $sourceParam; |
155 | } else { |
156 | $opt['sourceParameters'][] = $sourceParam; |
157 | } |
158 | } |
159 | |
160 | $transfers[$sourceTemplate] = [ |
161 | 'targetTemplate' => $targetTemplate, |
162 | 'parameters' => $parameterTransfers, |
163 | ]; |
164 | } |
165 | |
166 | return $transfers; |
167 | } |
168 | |
169 | } |