Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
98.75% covered (success)
98.75%
79 / 80
80.00% covered (warning)
80.00%
4 / 5
CRAP
0.00% covered (danger)
0.00%
0 / 1
CommonsHelperConfigParser
98.75% covered (success)
98.75%
79 / 80
80.00% covered (warning)
80.00%
4 / 5
17
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 getWikitextConversions
100.00% covered (success)
100.00%
29 / 29
100.00% covered (success)
100.00%
1 / 1
1
 grepSection
92.31% covered (success)
92.31%
12 / 13
0.00% covered (danger)
0.00%
0 / 1
3.00
 getItemList
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 parseTransferList
100.00% covered (success)
100.00%
34 / 34
100.00% covered (success)
100.00%
1 / 1
11
1<?php
2
3namespace FileImporter\Services\Wikitext;
4
5use FileImporter\Data\WikitextConversions;
6use FileImporter\Exceptions\ImportException;
7use FileImporter\Exceptions\LocalizedImportException;
8
9/**
10 * @license GPL-2.0-or-later
11 */
12class CommonsHelperConfigParser {
13
14    /** @var string */
15    private $commonsHelperConfigUrl;
16    /** @var string */
17    private $wikitext;
18
19    /**
20     * @param string $commonsHelperConfigUrl
21     * @param string $wikitext
22     */
23    public function __construct( $commonsHelperConfigUrl, string $wikitext ) {
24        $this->commonsHelperConfigUrl = $commonsHelperConfigUrl;
25        $this->wikitext = $wikitext;
26    }
27
28    /**
29     * @throws ImportException e.g. when the provided wikitext is incomplete
30     */
31    public function getWikitextConversions(): WikitextConversions {
32        // HTML comments must be removed first
33        $wikitext = preg_replace( '/<!--.*?-->/s', '', $this->wikitext );
34
35        // Scan for all level-2 headings first, relevant for properly prioritized error reporting
36        $categorySection = $this->grepSection( $wikitext, '== Categories ==', 'Categories' );
37        $templateSection = $this->grepSection( $wikitext, '== Templates ==', 'Templates' );
38        $informationSection = $this->grepSection( $wikitext, '== Information ==', 'Information' );
39
40        $badCategorySection = $this->grepSection( $categorySection, '=== Bad ===',
41            'Categories/Bad' );
42        $goodTemplateSection = $this->grepSection( $templateSection, '=== Good ===',
43            'Templates/Good' );
44        $badTemplateSection = $this->grepSection( $templateSection, '=== Bad ===',
45            'Templates/Bad' );
46        $obsoleteTemplates = $this->grepSection( $templateSection, '=== Remove ===',
47            'Templates/Remove' );
48        $transferTemplateSection = $this->grepSection( $templateSection, '=== Transfer ===',
49            'Templates/Transfer' );
50        $descriptionSection = $this->grepSection( $informationSection, '=== Description ===',
51            'Information/Description' );
52        $licensingSection = $this->grepSection( $informationSection, '=== Licensing ===',
53            'Information/Licensing' );
54
55        return new WikitextConversions( [
56            WikitextConversions::REQUIRED_TEMPLATES => $this->getItemList( $goodTemplateSection ),
57            WikitextConversions::FORBIDDEN_TEMPLATES => $this->getItemList( $badTemplateSection ),
58            WikitextConversions::OBSOLETE_TEMPLATES => $this->getItemList( $obsoleteTemplates ),
59            WikitextConversions::TEMPLATE_TRANSFORMATIONS =>
60                $this->parseTransferList( $transferTemplateSection ),
61            WikitextConversions::FORBIDDEN_CATEGORIES => $this->getItemList( $badCategorySection ),
62            WikitextConversions::HEADING_REPLACEMENTS =>
63                array_fill_keys( $this->getItemList( $descriptionSection ), '{{int:filedesc}}' ) +
64                array_fill_keys( $this->getItemList( $licensingSection ), '{{int:license-header}}' )
65        ] );
66    }
67
68    /**
69     * @throws ImportException if the section could not be found
70     */
71    private function grepSection( string $wikitext, string $header, string $sectionName ): string {
72        $level = strpos( $header, '= ' );
73        if ( $level === false ) {
74            throw new \InvalidArgumentException( '$header must follow this format: "== â€¦ =="' );
75        }
76        $level++;
77
78        // NOTE: This relaxes the parser to a degree that accepts "== Foobar ==" when
79        // "== Foo bar ==" is requested.
80        $headerRegex = str_replace( ' ', '\h*', preg_quote( $header, '/' ) );
81
82        // Extract a section from the given wikitext blob. Start from the given 2nd- or 3rd-level
83        // header. Stop at the same or a higher level (less equal signs), or at the end of the text.
84        $regex = '/^' . $headerRegex . '\h*$(.*?)(?=^={1,' . $level . '}[^=]|\Z)/ms';
85
86        if ( !preg_match( $regex, $wikitext, $matches ) ) {
87            throw new LocalizedImportException( [
88                'fileimporter-commonshelper-parsing-failed',
89                $this->commonsHelperConfigUrl,
90                $sectionName
91            ] );
92        }
93
94        return $matches[1];
95    }
96
97    /**
98     * @return string[]
99     */
100    private function getItemList( string $wikitext ): array {
101        // Extract non-empty first-level list elements, exclude 2nd and deeper levels
102        preg_match_all( '/^\*\h*([^\s*#:;].*?)\h*$/mu', $wikitext, $matches );
103        return $matches[1];
104    }
105
106    /**
107     * @return array[]
108     */
109    private function parseTransferList( string $wikitext ): array {
110        $transfers = [];
111
112        preg_match_all(
113            '/^;\h*+([^:|\n]+)\n?:\h*+([^|\n]+)(.*)/mu',
114            $wikitext,
115            $matches,
116            PREG_SET_ORDER
117        );
118        foreach ( $matches as [ , $sourceTemplate, $targetTemplate, $paramPatternsString ] ) {
119            $parameterTransfers = [];
120
121            $paramRules = preg_split( '/\s*\|+\s*/', $paramPatternsString, -1, PREG_SPLIT_NO_EMPTY );
122            foreach ( $paramRules as $paramRule ) {
123                $parts = preg_split( '/\s*=\s*/', $paramRule, 2 );
124                if ( count( $parts ) !== 2 ) {
125                    continue;
126                }
127
128                [ $targetParam, $sourceParam ] = $parts;
129
130                // We don't want CommonsHelper's placeholders "%AUTHOR%" and "%TRANSFERUSER%" to
131                // show up as text values. Investigation and decision are documented in T198609.
132                if ( str_contains( $sourceParam, '%' ) ) {
133                    continue;
134                }
135
136                preg_match( '/^(?:(\+)|(@))?(.*)/', $targetParam, $matches );
137                [ , $addIfMissing, $addLanguageTemplate, $targetParam ] = $matches;
138
139                if ( !isset( $parameterTransfers[$targetParam] ) ) {
140                    $parameterTransfers[$targetParam] = [];
141                }
142
143                $opt = &$parameterTransfers[$targetParam];
144
145                if ( !isset( $opt['addIfMissing'] ) || $addIfMissing ) {
146                    $opt['addIfMissing'] = (bool)$addIfMissing;
147                }
148                if ( !isset( $opt['addLanguageTemplate'] ) || $addLanguageTemplate ) {
149                    $opt['addLanguageTemplate'] = (bool)$addLanguageTemplate;
150                }
151
152                if ( $addIfMissing ) {
153                    // It doesn't make sense to have multiple default values, only keep the last
154                    $opt['value'] = $sourceParam;
155                } else {
156                    $opt['sourceParameters'][] = $sourceParam;
157                }
158            }
159
160            $transfers[$sourceTemplate] = [
161                'targetTemplate' => $targetTemplate,
162                'parameters' => $parameterTransfers,
163            ];
164        }
165
166        return $transfers;
167    }
168
169}