Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
98.73% |
78 / 79 |
|
80.00% |
4 / 5 |
CRAP | |
0.00% |
0 / 1 |
| CommonsHelperConfigParser | |
98.73% |
78 / 79 |
|
80.00% |
4 / 5 |
17 | |
0.00% |
0 / 1 |
| __construct | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| getWikitextConversions | |
100.00% |
29 / 29 |
|
100.00% |
1 / 1 |
1 | |||
| grepSection | |
92.31% |
12 / 13 |
|
0.00% |
0 / 1 |
3.00 | |||
| getItemList | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| parseTransferList | |
100.00% |
34 / 34 |
|
100.00% |
1 / 1 |
11 | |||
| 1 | <?php |
| 2 | |
| 3 | namespace FileImporter\Services\Wikitext; |
| 4 | |
| 5 | use FileImporter\Data\WikitextConversions; |
| 6 | use FileImporter\Exceptions\ImportException; |
| 7 | use FileImporter\Exceptions\LocalizedImportException; |
| 8 | |
| 9 | /** |
| 10 | * @license GPL-2.0-or-later |
| 11 | */ |
| 12 | class CommonsHelperConfigParser { |
| 13 | |
| 14 | public function __construct( |
| 15 | private readonly string $commonsHelperConfigUrl, |
| 16 | private readonly string $wikitext, |
| 17 | ) { |
| 18 | } |
| 19 | |
| 20 | /** |
| 21 | * @throws ImportException e.g. when the provided wikitext is incomplete |
| 22 | */ |
| 23 | public function getWikitextConversions(): WikitextConversions { |
| 24 | // HTML comments must be removed first |
| 25 | $wikitext = preg_replace( '/<!--.*?-->/s', '', $this->wikitext ); |
| 26 | |
| 27 | // Scan for all level-2 headings first, relevant for properly prioritized error reporting |
| 28 | $categorySection = $this->grepSection( $wikitext, '== Categories ==', 'Categories' ); |
| 29 | $templateSection = $this->grepSection( $wikitext, '== Templates ==', 'Templates' ); |
| 30 | $informationSection = $this->grepSection( $wikitext, '== Information ==', 'Information' ); |
| 31 | |
| 32 | $badCategorySection = $this->grepSection( $categorySection, '=== Bad ===', |
| 33 | 'Categories/Bad' ); |
| 34 | $goodTemplateSection = $this->grepSection( $templateSection, '=== Good ===', |
| 35 | 'Templates/Good' ); |
| 36 | $badTemplateSection = $this->grepSection( $templateSection, '=== Bad ===', |
| 37 | 'Templates/Bad' ); |
| 38 | $obsoleteTemplates = $this->grepSection( $templateSection, '=== Remove ===', |
| 39 | 'Templates/Remove' ); |
| 40 | $transferTemplateSection = $this->grepSection( $templateSection, '=== Transfer ===', |
| 41 | 'Templates/Transfer' ); |
| 42 | $descriptionSection = $this->grepSection( $informationSection, '=== Description ===', |
| 43 | 'Information/Description' ); |
| 44 | $licensingSection = $this->grepSection( $informationSection, '=== Licensing ===', |
| 45 | 'Information/Licensing' ); |
| 46 | |
| 47 | return new WikitextConversions( [ |
| 48 | WikitextConversions::REQUIRED_TEMPLATES => $this->getItemList( $goodTemplateSection ), |
| 49 | WikitextConversions::FORBIDDEN_TEMPLATES => $this->getItemList( $badTemplateSection ), |
| 50 | WikitextConversions::OBSOLETE_TEMPLATES => $this->getItemList( $obsoleteTemplates ), |
| 51 | WikitextConversions::TEMPLATE_TRANSFORMATIONS => |
| 52 | $this->parseTransferList( $transferTemplateSection ), |
| 53 | WikitextConversions::FORBIDDEN_CATEGORIES => $this->getItemList( $badCategorySection ), |
| 54 | WikitextConversions::HEADING_REPLACEMENTS => |
| 55 | array_fill_keys( $this->getItemList( $descriptionSection ), '{{int:filedesc}}' ) + |
| 56 | array_fill_keys( $this->getItemList( $licensingSection ), '{{int:license-header}}' ) |
| 57 | ] ); |
| 58 | } |
| 59 | |
| 60 | /** |
| 61 | * @throws ImportException if the section could not be found |
| 62 | */ |
| 63 | private function grepSection( string $wikitext, string $header, string $sectionName ): string { |
| 64 | $level = strpos( $header, '= ' ); |
| 65 | if ( $level === false ) { |
| 66 | throw new \InvalidArgumentException( '$header must follow this format: "== … =="' ); |
| 67 | } |
| 68 | $level++; |
| 69 | |
| 70 | // NOTE: This relaxes the parser to a degree that accepts "== Foobar ==" when |
| 71 | // "== Foo bar ==" is requested. |
| 72 | $headerRegex = str_replace( ' ', '\h*', preg_quote( $header, '/' ) ); |
| 73 | |
| 74 | // Extract a section from the given wikitext blob. Start from the given 2nd- or 3rd-level |
| 75 | // header. Stop at the same or a higher level (less equal signs), or at the end of the text. |
| 76 | $regex = '/^' . $headerRegex . '\h*$(.*?)(?=^={1,' . $level . '}[^=]|\Z)/ms'; |
| 77 | |
| 78 | if ( !preg_match( $regex, $wikitext, $matches ) ) { |
| 79 | throw new LocalizedImportException( [ |
| 80 | 'fileimporter-commonshelper-parsing-failed', |
| 81 | $this->commonsHelperConfigUrl, |
| 82 | $sectionName |
| 83 | ] ); |
| 84 | } |
| 85 | |
| 86 | return $matches[1]; |
| 87 | } |
| 88 | |
| 89 | /** |
| 90 | * @return string[] |
| 91 | */ |
| 92 | private function getItemList( string $wikitext ): array { |
| 93 | // Extract non-empty first-level list elements, exclude 2nd and deeper levels |
| 94 | preg_match_all( '/^\*\h*([^\s*#:;].*?)\h*$/mu', $wikitext, $matches ); |
| 95 | return $matches[1]; |
| 96 | } |
| 97 | |
| 98 | /** |
| 99 | * @return array[] |
| 100 | */ |
| 101 | private function parseTransferList( string $wikitext ): array { |
| 102 | $transfers = []; |
| 103 | |
| 104 | preg_match_all( |
| 105 | '/^;\h*+([^:|\n]+)\n?:\h*+([^|\n]+)(.*)/mu', |
| 106 | $wikitext, |
| 107 | $matches, |
| 108 | PREG_SET_ORDER |
| 109 | ); |
| 110 | foreach ( $matches as [ , $sourceTemplate, $targetTemplate, $paramPatternsString ] ) { |
| 111 | $parameterTransfers = []; |
| 112 | |
| 113 | $paramRules = preg_split( '/\s*\|+\s*/', $paramPatternsString, -1, PREG_SPLIT_NO_EMPTY ); |
| 114 | foreach ( $paramRules as $paramRule ) { |
| 115 | $parts = preg_split( '/\s*=\s*/', $paramRule, 2 ); |
| 116 | if ( count( $parts ) !== 2 ) { |
| 117 | continue; |
| 118 | } |
| 119 | |
| 120 | [ $targetParam, $sourceParam ] = $parts; |
| 121 | |
| 122 | // We don't want CommonsHelper's placeholders "%AUTHOR%" and "%TRANSFERUSER%" to |
| 123 | // show up as text values. Investigation and decision are documented in T198609. |
| 124 | if ( str_contains( $sourceParam, '%' ) ) { |
| 125 | continue; |
| 126 | } |
| 127 | |
| 128 | preg_match( '/^(?:(\+)|(@))?(.*)/', $targetParam, $matches ); |
| 129 | [ , $addIfMissing, $addLanguageTemplate, $targetParam ] = $matches; |
| 130 | |
| 131 | if ( !isset( $parameterTransfers[$targetParam] ) ) { |
| 132 | $parameterTransfers[$targetParam] = []; |
| 133 | } |
| 134 | |
| 135 | $opt = &$parameterTransfers[$targetParam]; |
| 136 | |
| 137 | if ( !isset( $opt['addIfMissing'] ) || $addIfMissing ) { |
| 138 | $opt['addIfMissing'] = (bool)$addIfMissing; |
| 139 | } |
| 140 | if ( !isset( $opt['addLanguageTemplate'] ) || $addLanguageTemplate ) { |
| 141 | $opt['addLanguageTemplate'] = (bool)$addLanguageTemplate; |
| 142 | } |
| 143 | |
| 144 | if ( $addIfMissing ) { |
| 145 | // It doesn't make sense to have multiple default values, only keep the last |
| 146 | $opt['value'] = $sourceParam; |
| 147 | } else { |
| 148 | $opt['sourceParameters'][] = $sourceParam; |
| 149 | } |
| 150 | } |
| 151 | |
| 152 | $transfers[$sourceTemplate] = [ |
| 153 | 'targetTemplate' => $targetTemplate, |
| 154 | 'parameters' => $parameterTransfers, |
| 155 | ]; |
| 156 | } |
| 157 | |
| 158 | return $transfers; |
| 159 | } |
| 160 | |
| 161 | } |