Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
98.75% |
79 / 80 |
|
80.00% |
4 / 5 |
CRAP | |
0.00% |
0 / 1 |
CommonsHelperConfigParser | |
98.75% |
79 / 80 |
|
80.00% |
4 / 5 |
17 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
getWikitextConversions | |
100.00% |
29 / 29 |
|
100.00% |
1 / 1 |
1 | |||
grepSection | |
92.31% |
12 / 13 |
|
0.00% |
0 / 1 |
3.00 | |||
getItemList | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
parseTransferList | |
100.00% |
34 / 34 |
|
100.00% |
1 / 1 |
11 |
1 | <?php |
2 | |
3 | namespace FileImporter\Services\Wikitext; |
4 | |
5 | use FileImporter\Data\WikitextConversions; |
6 | use FileImporter\Exceptions\ImportException; |
7 | use FileImporter\Exceptions\LocalizedImportException; |
8 | |
9 | /** |
10 | * @license GPL-2.0-or-later |
11 | */ |
12 | class CommonsHelperConfigParser { |
13 | |
14 | /** @var string */ |
15 | private $commonsHelperConfigUrl; |
16 | private string $wikitext; |
17 | |
18 | /** |
19 | * @param string $commonsHelperConfigUrl |
20 | * @param string $wikitext |
21 | */ |
22 | public function __construct( $commonsHelperConfigUrl, string $wikitext ) { |
23 | $this->commonsHelperConfigUrl = $commonsHelperConfigUrl; |
24 | $this->wikitext = $wikitext; |
25 | } |
26 | |
27 | /** |
28 | * @throws ImportException e.g. when the provided wikitext is incomplete |
29 | */ |
30 | public function getWikitextConversions(): WikitextConversions { |
31 | // HTML comments must be removed first |
32 | $wikitext = preg_replace( '/<!--.*?-->/s', '', $this->wikitext ); |
33 | |
34 | // Scan for all level-2 headings first, relevant for properly prioritized error reporting |
35 | $categorySection = $this->grepSection( $wikitext, '== Categories ==', 'Categories' ); |
36 | $templateSection = $this->grepSection( $wikitext, '== Templates ==', 'Templates' ); |
37 | $informationSection = $this->grepSection( $wikitext, '== Information ==', 'Information' ); |
38 | |
39 | $badCategorySection = $this->grepSection( $categorySection, '=== Bad ===', |
40 | 'Categories/Bad' ); |
41 | $goodTemplateSection = $this->grepSection( $templateSection, '=== Good ===', |
42 | 'Templates/Good' ); |
43 | $badTemplateSection = $this->grepSection( $templateSection, '=== Bad ===', |
44 | 'Templates/Bad' ); |
45 | $obsoleteTemplates = $this->grepSection( $templateSection, '=== Remove ===', |
46 | 'Templates/Remove' ); |
47 | $transferTemplateSection = $this->grepSection( $templateSection, '=== Transfer ===', |
48 | 'Templates/Transfer' ); |
49 | $descriptionSection = $this->grepSection( $informationSection, '=== Description ===', |
50 | 'Information/Description' ); |
51 | $licensingSection = $this->grepSection( $informationSection, '=== Licensing ===', |
52 | 'Information/Licensing' ); |
53 | |
54 | return new WikitextConversions( [ |
55 | WikitextConversions::REQUIRED_TEMPLATES => $this->getItemList( $goodTemplateSection ), |
56 | WikitextConversions::FORBIDDEN_TEMPLATES => $this->getItemList( $badTemplateSection ), |
57 | WikitextConversions::OBSOLETE_TEMPLATES => $this->getItemList( $obsoleteTemplates ), |
58 | WikitextConversions::TEMPLATE_TRANSFORMATIONS => |
59 | $this->parseTransferList( $transferTemplateSection ), |
60 | WikitextConversions::FORBIDDEN_CATEGORIES => $this->getItemList( $badCategorySection ), |
61 | WikitextConversions::HEADING_REPLACEMENTS => |
62 | array_fill_keys( $this->getItemList( $descriptionSection ), '{{int:filedesc}}' ) + |
63 | array_fill_keys( $this->getItemList( $licensingSection ), '{{int:license-header}}' ) |
64 | ] ); |
65 | } |
66 | |
67 | /** |
68 | * @throws ImportException if the section could not be found |
69 | */ |
70 | private function grepSection( string $wikitext, string $header, string $sectionName ): string { |
71 | $level = strpos( $header, '= ' ); |
72 | if ( $level === false ) { |
73 | throw new \InvalidArgumentException( '$header must follow this format: "== … =="' ); |
74 | } |
75 | $level++; |
76 | |
77 | // NOTE: This relaxes the parser to a degree that accepts "== Foobar ==" when |
78 | // "== Foo bar ==" is requested. |
79 | $headerRegex = str_replace( ' ', '\h*', preg_quote( $header, '/' ) ); |
80 | |
81 | // Extract a section from the given wikitext blob. Start from the given 2nd- or 3rd-level |
82 | // header. Stop at the same or a higher level (less equal signs), or at the end of the text. |
83 | $regex = '/^' . $headerRegex . '\h*$(.*?)(?=^={1,' . $level . '}[^=]|\Z)/ms'; |
84 | |
85 | if ( !preg_match( $regex, $wikitext, $matches ) ) { |
86 | throw new LocalizedImportException( [ |
87 | 'fileimporter-commonshelper-parsing-failed', |
88 | $this->commonsHelperConfigUrl, |
89 | $sectionName |
90 | ] ); |
91 | } |
92 | |
93 | return $matches[1]; |
94 | } |
95 | |
96 | /** |
97 | * @return string[] |
98 | */ |
99 | private function getItemList( string $wikitext ): array { |
100 | // Extract non-empty first-level list elements, exclude 2nd and deeper levels |
101 | preg_match_all( '/^\*\h*([^\s*#:;].*?)\h*$/mu', $wikitext, $matches ); |
102 | return $matches[1]; |
103 | } |
104 | |
105 | /** |
106 | * @return array[] |
107 | */ |
108 | private function parseTransferList( string $wikitext ): array { |
109 | $transfers = []; |
110 | |
111 | preg_match_all( |
112 | '/^;\h*+([^:|\n]+)\n?:\h*+([^|\n]+)(.*)/mu', |
113 | $wikitext, |
114 | $matches, |
115 | PREG_SET_ORDER |
116 | ); |
117 | foreach ( $matches as [ , $sourceTemplate, $targetTemplate, $paramPatternsString ] ) { |
118 | $parameterTransfers = []; |
119 | |
120 | $paramRules = preg_split( '/\s*\|+\s*/', $paramPatternsString, -1, PREG_SPLIT_NO_EMPTY ); |
121 | foreach ( $paramRules as $paramRule ) { |
122 | $parts = preg_split( '/\s*=\s*/', $paramRule, 2 ); |
123 | if ( count( $parts ) !== 2 ) { |
124 | continue; |
125 | } |
126 | |
127 | [ $targetParam, $sourceParam ] = $parts; |
128 | |
129 | // We don't want CommonsHelper's placeholders "%AUTHOR%" and "%TRANSFERUSER%" to |
130 | // show up as text values. Investigation and decision are documented in T198609. |
131 | if ( str_contains( $sourceParam, '%' ) ) { |
132 | continue; |
133 | } |
134 | |
135 | preg_match( '/^(?:(\+)|(@))?(.*)/', $targetParam, $matches ); |
136 | [ , $addIfMissing, $addLanguageTemplate, $targetParam ] = $matches; |
137 | |
138 | if ( !isset( $parameterTransfers[$targetParam] ) ) { |
139 | $parameterTransfers[$targetParam] = []; |
140 | } |
141 | |
142 | $opt = &$parameterTransfers[$targetParam]; |
143 | |
144 | if ( !isset( $opt['addIfMissing'] ) || $addIfMissing ) { |
145 | $opt['addIfMissing'] = (bool)$addIfMissing; |
146 | } |
147 | if ( !isset( $opt['addLanguageTemplate'] ) || $addLanguageTemplate ) { |
148 | $opt['addLanguageTemplate'] = (bool)$addLanguageTemplate; |
149 | } |
150 | |
151 | if ( $addIfMissing ) { |
152 | // It doesn't make sense to have multiple default values, only keep the last |
153 | $opt['value'] = $sourceParam; |
154 | } else { |
155 | $opt['sourceParameters'][] = $sourceParam; |
156 | } |
157 | } |
158 | |
159 | $transfers[$sourceTemplate] = [ |
160 | 'targetTemplate' => $targetTemplate, |
161 | 'parameters' => $parameterTransfers, |
162 | ]; |
163 | } |
164 | |
165 | return $transfers; |
166 | } |
167 | |
168 | } |