Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
98.75% covered (success)
98.75%
79 / 80
80.00% covered (warning)
80.00%
4 / 5
CRAP
0.00% covered (danger)
0.00%
0 / 1
CommonsHelperConfigParser
98.75% covered (success)
98.75%
79 / 80
80.00% covered (warning)
80.00%
4 / 5
17
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 getWikitextConversions
100.00% covered (success)
100.00%
29 / 29
100.00% covered (success)
100.00%
1 / 1
1
 grepSection
92.31% covered (success)
92.31%
12 / 13
0.00% covered (danger)
0.00%
0 / 1
3.00
 getItemList
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 parseTransferList
100.00% covered (success)
100.00%
34 / 34
100.00% covered (success)
100.00%
1 / 1
11
1<?php
2
3namespace FileImporter\Services\Wikitext;
4
5use FileImporter\Data\WikitextConversions;
6use FileImporter\Exceptions\ImportException;
7use FileImporter\Exceptions\LocalizedImportException;
8
9/**
10 * @license GPL-2.0-or-later
11 */
12class CommonsHelperConfigParser {
13
14    /** @var string */
15    private $commonsHelperConfigUrl;
16    private string $wikitext;
17
18    /**
19     * @param string $commonsHelperConfigUrl
20     * @param string $wikitext
21     */
22    public function __construct( $commonsHelperConfigUrl, string $wikitext ) {
23        $this->commonsHelperConfigUrl = $commonsHelperConfigUrl;
24        $this->wikitext = $wikitext;
25    }
26
27    /**
28     * @throws ImportException e.g. when the provided wikitext is incomplete
29     */
30    public function getWikitextConversions(): WikitextConversions {
31        // HTML comments must be removed first
32        $wikitext = preg_replace( '/<!--.*?-->/s', '', $this->wikitext );
33
34        // Scan for all level-2 headings first, relevant for properly prioritized error reporting
35        $categorySection = $this->grepSection( $wikitext, '== Categories ==', 'Categories' );
36        $templateSection = $this->grepSection( $wikitext, '== Templates ==', 'Templates' );
37        $informationSection = $this->grepSection( $wikitext, '== Information ==', 'Information' );
38
39        $badCategorySection = $this->grepSection( $categorySection, '=== Bad ===',
40            'Categories/Bad' );
41        $goodTemplateSection = $this->grepSection( $templateSection, '=== Good ===',
42            'Templates/Good' );
43        $badTemplateSection = $this->grepSection( $templateSection, '=== Bad ===',
44            'Templates/Bad' );
45        $obsoleteTemplates = $this->grepSection( $templateSection, '=== Remove ===',
46            'Templates/Remove' );
47        $transferTemplateSection = $this->grepSection( $templateSection, '=== Transfer ===',
48            'Templates/Transfer' );
49        $descriptionSection = $this->grepSection( $informationSection, '=== Description ===',
50            'Information/Description' );
51        $licensingSection = $this->grepSection( $informationSection, '=== Licensing ===',
52            'Information/Licensing' );
53
54        return new WikitextConversions( [
55            WikitextConversions::REQUIRED_TEMPLATES => $this->getItemList( $goodTemplateSection ),
56            WikitextConversions::FORBIDDEN_TEMPLATES => $this->getItemList( $badTemplateSection ),
57            WikitextConversions::OBSOLETE_TEMPLATES => $this->getItemList( $obsoleteTemplates ),
58            WikitextConversions::TEMPLATE_TRANSFORMATIONS =>
59                $this->parseTransferList( $transferTemplateSection ),
60            WikitextConversions::FORBIDDEN_CATEGORIES => $this->getItemList( $badCategorySection ),
61            WikitextConversions::HEADING_REPLACEMENTS =>
62                array_fill_keys( $this->getItemList( $descriptionSection ), '{{int:filedesc}}' ) +
63                array_fill_keys( $this->getItemList( $licensingSection ), '{{int:license-header}}' )
64        ] );
65    }
66
67    /**
68     * @throws ImportException if the section could not be found
69     */
70    private function grepSection( string $wikitext, string $header, string $sectionName ): string {
71        $level = strpos( $header, '= ' );
72        if ( $level === false ) {
73            throw new \InvalidArgumentException( '$header must follow this format: "== â€¦ =="' );
74        }
75        $level++;
76
77        // NOTE: This relaxes the parser to a degree that accepts "== Foobar ==" when
78        // "== Foo bar ==" is requested.
79        $headerRegex = str_replace( ' ', '\h*', preg_quote( $header, '/' ) );
80
81        // Extract a section from the given wikitext blob. Start from the given 2nd- or 3rd-level
82        // header. Stop at the same or a higher level (less equal signs), or at the end of the text.
83        $regex = '/^' . $headerRegex . '\h*$(.*?)(?=^={1,' . $level . '}[^=]|\Z)/ms';
84
85        if ( !preg_match( $regex, $wikitext, $matches ) ) {
86            throw new LocalizedImportException( [
87                'fileimporter-commonshelper-parsing-failed',
88                $this->commonsHelperConfigUrl,
89                $sectionName
90            ] );
91        }
92
93        return $matches[1];
94    }
95
96    /**
97     * @return string[]
98     */
99    private function getItemList( string $wikitext ): array {
100        // Extract non-empty first-level list elements, exclude 2nd and deeper levels
101        preg_match_all( '/^\*\h*([^\s*#:;].*?)\h*$/mu', $wikitext, $matches );
102        return $matches[1];
103    }
104
105    /**
106     * @return array[]
107     */
108    private function parseTransferList( string $wikitext ): array {
109        $transfers = [];
110
111        preg_match_all(
112            '/^;\h*+([^:|\n]+)\n?:\h*+([^|\n]+)(.*)/mu',
113            $wikitext,
114            $matches,
115            PREG_SET_ORDER
116        );
117        foreach ( $matches as [ , $sourceTemplate, $targetTemplate, $paramPatternsString ] ) {
118            $parameterTransfers = [];
119
120            $paramRules = preg_split( '/\s*\|+\s*/', $paramPatternsString, -1, PREG_SPLIT_NO_EMPTY );
121            foreach ( $paramRules as $paramRule ) {
122                $parts = preg_split( '/\s*=\s*/', $paramRule, 2 );
123                if ( count( $parts ) !== 2 ) {
124                    continue;
125                }
126
127                [ $targetParam, $sourceParam ] = $parts;
128
129                // We don't want CommonsHelper's placeholders "%AUTHOR%" and "%TRANSFERUSER%" to
130                // show up as text values. Investigation and decision are documented in T198609.
131                if ( str_contains( $sourceParam, '%' ) ) {
132                    continue;
133                }
134
135                preg_match( '/^(?:(\+)|(@))?(.*)/', $targetParam, $matches );
136                [ , $addIfMissing, $addLanguageTemplate, $targetParam ] = $matches;
137
138                if ( !isset( $parameterTransfers[$targetParam] ) ) {
139                    $parameterTransfers[$targetParam] = [];
140                }
141
142                $opt = &$parameterTransfers[$targetParam];
143
144                if ( !isset( $opt['addIfMissing'] ) || $addIfMissing ) {
145                    $opt['addIfMissing'] = (bool)$addIfMissing;
146                }
147                if ( !isset( $opt['addLanguageTemplate'] ) || $addLanguageTemplate ) {
148                    $opt['addLanguageTemplate'] = (bool)$addLanguageTemplate;
149                }
150
151                if ( $addIfMissing ) {
152                    // It doesn't make sense to have multiple default values, only keep the last
153                    $opt['value'] = $sourceParam;
154                } else {
155                    $opt['sourceParameters'][] = $sourceParam;
156                }
157            }
158
159            $transfers[$sourceTemplate] = [
160                'targetTemplate' => $targetTemplate,
161                'parameters' => $parameterTransfers,
162            ];
163        }
164
165        return $transfers;
166    }
167
168}