Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
98.57% |
69 / 70 |
|
80.00% |
4 / 5 |
CRAP | |
0.00% |
0 / 1 |
LicenseParser | |
98.57% |
69 / 70 |
|
80.00% |
4 / 5 |
28 | |
0.00% |
0 / 1 |
parseLicenseString | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
sortDataByLicensePriority | |
100.00% |
14 / 14 |
|
100.00% |
1 / 1 |
3 | |||
parseCreativeCommonsLicenseString | |
97.30% |
36 / 37 |
|
0.00% |
0 / 1 |
13 | |||
parsePublicDomainLicenseString | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
3 | |||
getLicensePriority | |
100.00% |
11 / 11 |
|
100.00% |
1 / 1 |
7 |
1 | <?php |
2 | |
3 | namespace CommonsMetadata; |
4 | |
5 | /** |
6 | * Takes a license name string, and splits it up into various license elements (version, etc). |
7 | * The string is typically a Commons category name, or template name, or license shortname |
8 | * (see {@link https://commons.wikimedia.org/wiki/Commons:Machine-readable_data}) |
9 | */ |
10 | class LicenseParser { |
11 | |
12 | /** |
13 | * @var string[] Nonstandard license name patterns used in categories/templates/shortnames |
14 | */ |
15 | public static $licenseAliases = [ |
16 | 'cc-by-sa-3.0-migrated' => 'cc-by-sa-3.0', |
17 | 'cc-by-sa-3.0-migrated-with-disclaimers' => 'cc-by-sa-3.0', |
18 | 'cc-by-sa-3.0-2.5-2.0-1.0' => 'cc-by-sa-3.0', |
19 | 'cc-by-sa-2.5-2.0-1.0' => 'cc-by-sa-2.5', |
20 | 'cc-by-2.0-stma' => 'cc-by-2.0', |
21 | 'cc-by-sa-1.0+' => 'cc-by-sa-3.0', |
22 | ]; |
23 | |
24 | /** |
25 | * Takes a CC license string (could be a category name, template name etc) |
26 | * and returns template information (or null if the license was not recognized). |
27 | * The returned array can have the following keys: |
28 | * - family: e.g. cc, gfdl |
29 | * - type: e.g. cc-by-sa |
30 | * - version: e.g. 2.5 |
31 | * - region: e.g. nl |
32 | * - name: all the above put together, e.g. cc-by-sa-2.5-nl |
33 | * Only name is required. |
34 | * @param string $str |
35 | * @return array|null |
36 | */ |
37 | public function parseLicenseString( $str ) { |
38 | return $this->parseCreativeCommonsLicenseString( $str ) |
39 | ?: $this->parsePublicDomainLicenseString( $str ); |
40 | } |
41 | |
42 | /** |
43 | * Takes an array width license data and sorts it by license priority. |
44 | * The sort is stable, and the input array is not changed. |
45 | * The method will call $getLicenseStringCallback( $data[$key], $key ) and expect a license name |
46 | * @param string[] $data |
47 | * @param callable $getLicenseStringCallback |
48 | * @return array |
49 | */ |
50 | public function sortDataByLicensePriority( array $data, $getLicenseStringCallback ) { |
51 | $licensePriorities = []; |
52 | $i = 0; |
53 | foreach ( $data as $key => $value ) { |
54 | $license = $getLicenseStringCallback( $value, $key ); |
55 | $licenseDetails = $this->parseLicenseString( $license ); |
56 | $priority = $this->getLicensePriority( $licenseDetails ); |
57 | $licensePriorities[$key] = [ $priority, $i ]; |
58 | $i++; |
59 | } |
60 | |
61 | uksort( $data, static function ( $a, $b ) use ( $licensePriorities ) { |
62 | // equal priority, keep original order |
63 | if ( $licensePriorities[$a][0] === $licensePriorities[$b][0] ) { |
64 | return $licensePriorities[$a][1] - $licensePriorities[$b][1]; |
65 | } else { |
66 | // higher priority means smaller wrt sorting |
67 | return $licensePriorities[$b][0] - $licensePriorities[$a][0]; |
68 | } |
69 | } ); |
70 | |
71 | return $data; |
72 | } |
73 | |
74 | /** |
75 | * Takes a CC license string and returns template information. |
76 | * @see parseLicenceString() |
77 | * @param string $str |
78 | * @return array|null |
79 | */ |
80 | protected function parseCreativeCommonsLicenseString( $str ) { |
81 | $data = [ |
82 | 'family' => 'cc', |
83 | 'type' => null, |
84 | 'version' => null, |
85 | 'region' => null, |
86 | 'name' => null, |
87 | ]; |
88 | |
89 | $str = strtolower( trim( $str ) ); |
90 | if ( isset( self::$licenseAliases[$str] ) ) { |
91 | $str = self::$licenseAliases[$str]; |
92 | } |
93 | |
94 | // some special cases first |
95 | if ( in_array( $str, [ 'cc0', 'cc-pd' ], true ) ) { |
96 | $data['type'] = $data['name'] = $str; |
97 | return $data; |
98 | } |
99 | |
100 | $parts = preg_split( '/[- ]/', $str ); |
101 | if ( $parts[0] != 'cc' ) { |
102 | return null; |
103 | } |
104 | |
105 | $countParts = count( $parts ); |
106 | for ( $i = 1; $i < $countParts; $i++ ) { |
107 | if ( !in_array( $parts[$i], [ 'by', 'sa', 'nc', 'nd' ] ) ) { |
108 | break; |
109 | } |
110 | if ( in_array( $parts[$i], [ 'nc', 'nd' ] ) ) { |
111 | // ignore non-free licenses |
112 | return null; |
113 | } |
114 | } |
115 | $data['type'] = implode( '-', array_slice( $parts, 0, $i ) ); |
116 | |
117 | if ( $i < $countParts && is_numeric( $parts[$i] ) ) { |
118 | $data['version'] = $parts[$i]; |
119 | $i++; |
120 | } else { |
121 | return null; |
122 | } |
123 | |
124 | if ( $i < $countParts && ( |
125 | preg_match( '/^\w\w$/', $parts[$i] ) |
126 | || $parts[$i] == 'scotland' |
127 | ) |
128 | ) { |
129 | $data['region'] = $parts[$i]; |
130 | $i++; |
131 | } |
132 | |
133 | if ( $i != $countParts ) { |
134 | return null; |
135 | } |
136 | |
137 | $data['name'] = implode( '-', |
138 | array_filter( [ $data['type'], $data['version'], $data['region'] ] ) ); |
139 | return $data; |
140 | } |
141 | |
142 | /** |
143 | * Takes a PD license string and returns template information. |
144 | * @see parseLicenceString() |
145 | * @param string $str |
146 | * @return array|null |
147 | */ |
148 | protected function parsePublicDomainLicenseString( $str ) { |
149 | // A very simple approach, but should work most of the time with licence shortnames. |
150 | if ( strtolower( $str ) === 'public domain' || strtolower( $str ) === 'pd' ) { |
151 | return [ |
152 | 'family' => 'pd', |
153 | 'name' => 'pd', |
154 | ]; |
155 | } |
156 | return null; |
157 | } |
158 | |
159 | /** |
160 | * Returns a priority value for this license. The license with the highest priority will be |
161 | * returned in the GetExtendedMetadata hook. |
162 | * @param array $licenseData data from LicenseParser::parseLicenseString() |
163 | * @return int |
164 | */ |
165 | protected function getLicensePriority( $licenseData ) { |
166 | $priority = 0; |
167 | if ( isset( $licenseData[ 'family' ] ) ) { |
168 | if ( $licenseData[ 'family' ] === 'pd' ) { |
169 | $priority = 2000; |
170 | } elseif ( $licenseData[ 'family' ] === 'cc' ) { |
171 | // ignore non-free CC licenses for now; an image with such a license probably |
172 | // won't have a better license anyway |
173 | $priority += 1000; |
174 | if ( isset( $licenseData['type'] ) && $licenseData['type'] === 'cc-by' ) { |
175 | // prefer the less restrictive CC-BY over CC-BY-SA |
176 | $priority += 100; |
177 | } |
178 | if ( isset( $licenseData['version'] ) ) { |
179 | // prefer newer licenses |
180 | $priority += (int)( 10 * (float)$licenseData['version'] ); |
181 | } |
182 | } |
183 | } |
184 | return $priority; |
185 | } |
186 | |
187 | } |