Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
98.73% covered (success)
98.73%
78 / 79
80.00% covered (warning)
80.00%
4 / 5
CRAP
0.00% covered (danger)
0.00%
0 / 1
CommonsHelperConfigParser
98.73% covered (success)
98.73%
78 / 79
80.00% covered (warning)
80.00%
4 / 5
17
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 getWikitextConversions
100.00% covered (success)
100.00%
29 / 29
100.00% covered (success)
100.00%
1 / 1
1
 grepSection
92.31% covered (success)
92.31%
12 / 13
0.00% covered (danger)
0.00%
0 / 1
3.00
 getItemList
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 parseTransferList
100.00% covered (success)
100.00%
34 / 34
100.00% covered (success)
100.00%
1 / 1
11
1<?php
2
3namespace FileImporter\Services\Wikitext;
4
5use FileImporter\Data\WikitextConversions;
6use FileImporter\Exceptions\ImportException;
7use FileImporter\Exceptions\LocalizedImportException;
8
9/**
10 * @license GPL-2.0-or-later
11 */
12class CommonsHelperConfigParser {
13
14    public function __construct(
15        private readonly string $commonsHelperConfigUrl,
16        private readonly string $wikitext,
17    ) {
18    }
19
20    /**
21     * @throws ImportException e.g. when the provided wikitext is incomplete
22     */
23    public function getWikitextConversions(): WikitextConversions {
24        // HTML comments must be removed first
25        $wikitext = preg_replace( '/<!--.*?-->/s', '', $this->wikitext );
26
27        // Scan for all level-2 headings first, relevant for properly prioritized error reporting
28        $categorySection = $this->grepSection( $wikitext, '== Categories ==', 'Categories' );
29        $templateSection = $this->grepSection( $wikitext, '== Templates ==', 'Templates' );
30        $informationSection = $this->grepSection( $wikitext, '== Information ==', 'Information' );
31
32        $badCategorySection = $this->grepSection( $categorySection, '=== Bad ===',
33            'Categories/Bad' );
34        $goodTemplateSection = $this->grepSection( $templateSection, '=== Good ===',
35            'Templates/Good' );
36        $badTemplateSection = $this->grepSection( $templateSection, '=== Bad ===',
37            'Templates/Bad' );
38        $obsoleteTemplates = $this->grepSection( $templateSection, '=== Remove ===',
39            'Templates/Remove' );
40        $transferTemplateSection = $this->grepSection( $templateSection, '=== Transfer ===',
41            'Templates/Transfer' );
42        $descriptionSection = $this->grepSection( $informationSection, '=== Description ===',
43            'Information/Description' );
44        $licensingSection = $this->grepSection( $informationSection, '=== Licensing ===',
45            'Information/Licensing' );
46
47        return new WikitextConversions( [
48            WikitextConversions::REQUIRED_TEMPLATES => $this->getItemList( $goodTemplateSection ),
49            WikitextConversions::FORBIDDEN_TEMPLATES => $this->getItemList( $badTemplateSection ),
50            WikitextConversions::OBSOLETE_TEMPLATES => $this->getItemList( $obsoleteTemplates ),
51            WikitextConversions::TEMPLATE_TRANSFORMATIONS =>
52                $this->parseTransferList( $transferTemplateSection ),
53            WikitextConversions::FORBIDDEN_CATEGORIES => $this->getItemList( $badCategorySection ),
54            WikitextConversions::HEADING_REPLACEMENTS =>
55                array_fill_keys( $this->getItemList( $descriptionSection ), '{{int:filedesc}}' ) +
56                array_fill_keys( $this->getItemList( $licensingSection ), '{{int:license-header}}' )
57        ] );
58    }
59
60    /**
61     * @throws ImportException if the section could not be found
62     */
63    private function grepSection( string $wikitext, string $header, string $sectionName ): string {
64        $level = strpos( $header, '= ' );
65        if ( $level === false ) {
66            throw new \InvalidArgumentException( '$header must follow this format: "== â€¦ =="' );
67        }
68        $level++;
69
70        // NOTE: This relaxes the parser to a degree that accepts "== Foobar ==" when
71        // "== Foo bar ==" is requested.
72        $headerRegex = str_replace( ' ', '\h*', preg_quote( $header, '/' ) );
73
74        // Extract a section from the given wikitext blob. Start from the given 2nd- or 3rd-level
75        // header. Stop at the same or a higher level (less equal signs), or at the end of the text.
76        $regex = '/^' . $headerRegex . '\h*$(.*?)(?=^={1,' . $level . '}[^=]|\Z)/ms';
77
78        if ( !preg_match( $regex, $wikitext, $matches ) ) {
79            throw new LocalizedImportException( [
80                'fileimporter-commonshelper-parsing-failed',
81                $this->commonsHelperConfigUrl,
82                $sectionName
83            ] );
84        }
85
86        return $matches[1];
87    }
88
89    /**
90     * @return string[]
91     */
92    private function getItemList( string $wikitext ): array {
93        // Extract non-empty first-level list elements, exclude 2nd and deeper levels
94        preg_match_all( '/^\*\h*([^\s*#:;].*?)\h*$/mu', $wikitext, $matches );
95        return $matches[1];
96    }
97
98    /**
99     * @return array[]
100     */
101    private function parseTransferList( string $wikitext ): array {
102        $transfers = [];
103
104        preg_match_all(
105            '/^;\h*+([^:|\n]+)\n?:\h*+([^|\n]+)(.*)/mu',
106            $wikitext,
107            $matches,
108            PREG_SET_ORDER
109        );
110        foreach ( $matches as [ , $sourceTemplate, $targetTemplate, $paramPatternsString ] ) {
111            $parameterTransfers = [];
112
113            $paramRules = preg_split( '/\s*\|+\s*/', $paramPatternsString, -1, PREG_SPLIT_NO_EMPTY );
114            foreach ( $paramRules as $paramRule ) {
115                $parts = preg_split( '/\s*=\s*/', $paramRule, 2 );
116                if ( count( $parts ) !== 2 ) {
117                    continue;
118                }
119
120                [ $targetParam, $sourceParam ] = $parts;
121
122                // We don't want CommonsHelper's placeholders "%AUTHOR%" and "%TRANSFERUSER%" to
123                // show up as text values. Investigation and decision are documented in T198609.
124                if ( str_contains( $sourceParam, '%' ) ) {
125                    continue;
126                }
127
128                preg_match( '/^(?:(\+)|(@))?(.*)/', $targetParam, $matches );
129                [ , $addIfMissing, $addLanguageTemplate, $targetParam ] = $matches;
130
131                if ( !isset( $parameterTransfers[$targetParam] ) ) {
132                    $parameterTransfers[$targetParam] = [];
133                }
134
135                $opt = &$parameterTransfers[$targetParam];
136
137                if ( !isset( $opt['addIfMissing'] ) || $addIfMissing ) {
138                    $opt['addIfMissing'] = (bool)$addIfMissing;
139                }
140                if ( !isset( $opt['addLanguageTemplate'] ) || $addLanguageTemplate ) {
141                    $opt['addLanguageTemplate'] = (bool)$addLanguageTemplate;
142                }
143
144                if ( $addIfMissing ) {
145                    // It doesn't make sense to have multiple default values, only keep the last
146                    $opt['value'] = $sourceParam;
147                } else {
148                    $opt['sourceParameters'][] = $sourceParam;
149                }
150            }
151
152            $transfers[$sourceTemplate] = [
153                'targetTemplate' => $targetTemplate,
154                'parameters' => $parameterTransfers,
155            ];
156        }
157
158        return $transfers;
159    }
160
161}