Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
86.36% |
57 / 66 |
|
60.00% |
3 / 5 |
CRAP | |
0.00% |
0 / 1 |
| SectionPositionCalculator | |
86.36% |
57 / 66 |
|
60.00% |
3 / 5 |
22.12 | |
0.00% |
0 / 1 |
| __construct | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| calculateSectionPosition | |
100.00% |
26 / 26 |
|
100.00% |
1 / 1 |
9 | |||
| calculatePositionFromSourceOrder | |
100.00% |
13 / 13 |
|
100.00% |
1 / 1 |
2 | |||
| calculateInsertionPosition | |
93.75% |
15 / 16 |
|
0.00% |
0 / 1 |
6.01 | |||
| fetchAppendixTitles | |
20.00% |
2 / 10 |
|
0.00% |
0 / 1 |
7.61 | |||
| 1 | <?php |
| 2 | declare( strict_types = 1 ); |
| 3 | |
| 4 | namespace ContentTranslation\Service; |
| 5 | |
| 6 | use MediaWiki\Json\FormatJson; |
| 7 | use MediaWiki\Title\Title; |
| 8 | |
| 9 | class SectionPositionCalculator { |
| 10 | private const APPENDIX_TITLES = [ |
| 11 | "en" => [ |
| 12 | "Works", |
| 13 | "Publications", |
| 14 | "Bibliography", |
| 15 | "Discography", |
| 16 | "Filmography", |
| 17 | "See also", |
| 18 | "Notes", |
| 19 | "Citations", |
| 20 | "References", |
| 21 | "Further reading", |
| 22 | "External links" |
| 23 | ], |
| 24 | "es" => [ |
| 25 | "Bibliografía", |
| 26 | "Referencias", |
| 27 | "Citas", |
| 28 | "Discografía", |
| 29 | "Filmografía", |
| 30 | "Notas", |
| 31 | "Publicaciones", |
| 32 | "Obra", |
| 33 | "Enlaces externos", |
| 34 | "Otras lecturas", |
| 35 | "Lecturas relacionadas", |
| 36 | "Véase también" |
| 37 | ], |
| 38 | "bn" => [ |
| 39 | "গ্রন্থপঞ্জী", |
| 40 | "গ্রন্থপঞ্জি", |
| 41 | "তথ্যাবলি", |
| 42 | "উদ্ধৃতিসমূহ", |
| 43 | "বর্ণনসমূহ", |
| 44 | "উদ্ধৃতি", |
| 45 | "উদ্ধ্বৃতি", |
| 46 | "তথ্যসূত্র", |
| 47 | "ডিস্কোগ্রাফি", |
| 48 | "বহিঃসংযোগ", |
| 49 | "চলচ্চিত্রের তালিকা", |
| 50 | "আরও পড়ুন", |
| 51 | "আরও পড়ুন", |
| 52 | "আরো পড়ুন", |
| 53 | "টীকা", |
| 54 | "নোট", |
| 55 | "প্রকাশনা", |
| 56 | "প্রকাশিত গ্রন্থ", |
| 57 | "আরও দেখুন", |
| 58 | "আরো দেখুন", |
| 59 | "কাজ", |
| 60 | "কর্মজীবন" |
| 61 | ], |
| 62 | "fr" => [ |
| 63 | "Bibliographie", |
| 64 | "Références", |
| 65 | "Discographie", |
| 66 | "Filmographie", |
| 67 | "Travaux", |
| 68 | "Liens externes", |
| 69 | "Principales publications", |
| 70 | "Voir aussi" |
| 71 | ], |
| 72 | "de" => [ |
| 73 | "Literatur", |
| 74 | "Bibliographie", |
| 75 | "Anmerkungen", |
| 76 | "Zitate", |
| 77 | "Belege", |
| 78 | "Diskografie", |
| 79 | "Diskographie", |
| 80 | "Weblinks", |
| 81 | "Filmografie", |
| 82 | "Literatur", |
| 83 | "Einzelnachweise", |
| 84 | "Veröffentlichungen", |
| 85 | "Einzelnachweise", |
| 86 | "Arbeit", |
| 87 | "Siehe auch" |
| 88 | ] |
| 89 | ]; |
| 90 | |
| 91 | public function __construct( |
| 92 | private readonly CxServerClient $cxServerClient, |
| 93 | private readonly SectionTitleFetcher $sectionTitleFetcher, |
| 94 | private readonly SectionMappingFetcher $sectionMappingFetcher |
| 95 | ) { |
| 96 | } |
| 97 | |
| 98 | /** |
| 99 | * This method returns the appropriate number indicating the position in |
| 100 | * which the new section should be published inside the target page, |
| 101 | * according to the following logic: |
| 102 | * |
| 103 | * 1. If the section is being published to user's sandbox, then the section |
| 104 | * position should be "new" |
| 105 | * 2. If section is a lead section then its position should be equal to 0. |
| 106 | * 3. If existingSectionTitle is provided and exists in target article, use its position |
| 107 | * (for expanding/updating existing sections) |
| 108 | * 4. If source article information is available, calculate position based on |
| 109 | * source article section order and existing target sections. |
| 110 | * 5. If at least one appendix section exists then it equals to the |
| 111 | * index of the first appendix section (in order of appearance) |
| 112 | * 6. Otherwise, it's equal to "new". |
| 113 | * |
| 114 | * @param Title $targetTitle |
| 115 | * @param string $targetLanguage |
| 116 | * @param bool $isSandbox |
| 117 | * @param string $sourceLanguage Source article language for position calculation |
| 118 | * @param string $sourceTitle Source article title for position calculation |
| 119 | * @param string $sourceSectionTitle Title of source section being translated |
| 120 | * @param string|null $existingTargetSectionTitle Title of existing target section being expanded |
| 121 | * @return int|string |
| 122 | */ |
| 123 | public function calculateSectionPosition( |
| 124 | Title $targetTitle, |
| 125 | string $targetLanguage, |
| 126 | bool $isSandbox, |
| 127 | string $sourceLanguage, |
| 128 | string $sourceTitle, |
| 129 | string $sourceSectionTitle, |
| 130 | ?string $existingTargetSectionTitle = null |
| 131 | ): int|string { |
| 132 | $sectionPosition = "new"; |
| 133 | if ( $isSandbox ) { |
| 134 | return $sectionPosition; |
| 135 | } |
| 136 | |
| 137 | $targetSectionTitles = $this->sectionTitleFetcher->fetchSectionTitles( $targetLanguage, $targetTitle ); |
| 138 | |
| 139 | // if target sections are null, this page doesn't exist, and this is a lead section |
| 140 | if ( |
| 141 | $targetSectionTitles === null || |
| 142 | $existingTargetSectionTitle === SectionContentEvaluator::LEAD_SECTION_DUMMY_TITLE |
| 143 | ) { |
| 144 | return 0; |
| 145 | } |
| 146 | |
| 147 | if ( $targetSectionTitles ) { |
| 148 | // If expanding or replacing existing section, use its current position |
| 149 | if ( $existingTargetSectionTitle !== null ) { |
| 150 | $existingSectionPosition = array_search( $existingTargetSectionTitle, $targetSectionTitles ); |
| 151 | if ( $existingSectionPosition !== false ) { |
| 152 | return $existingSectionPosition; |
| 153 | } |
| 154 | } |
| 155 | |
| 156 | // Try to calculate position based on source article order if source info is available |
| 157 | $sourceBasedPosition = $this->calculatePositionFromSourceOrder( |
| 158 | $sourceLanguage, |
| 159 | $sourceTitle, |
| 160 | $targetLanguage, |
| 161 | $sourceSectionTitle, |
| 162 | $targetSectionTitles |
| 163 | ); |
| 164 | if ( $sourceBasedPosition !== null ) { |
| 165 | return $sourceBasedPosition; |
| 166 | } |
| 167 | |
| 168 | // Fall back to appendix-based logic |
| 169 | $appendixTitles = $this->fetchAppendixTitles( $targetLanguage ); |
| 170 | $targetAppendixTitles = array_intersect( $targetSectionTitles, $appendixTitles ); |
| 171 | if ( $targetAppendixTitles ) { |
| 172 | $sectionPosition = array_key_first( $targetAppendixTitles ); |
| 173 | } |
| 174 | } |
| 175 | return $sectionPosition; |
| 176 | } |
| 177 | |
| 178 | /** |
| 179 | * Calculate section position based on source article order and existing target sections. |
| 180 | * Uses CX server section mappings to determine the optimal insertion position that |
| 181 | * preserves the source article's section ordering in the target article. |
| 182 | * |
| 183 | * Algorithm: |
| 184 | * 1. Fetch section mappings from CX server |
| 185 | * 2. Find which source section corresponds to the target section being inserted |
| 186 | * 3. Look for the next section in source order that already exists in target |
| 187 | * 4. Insert before that section, or at end if no such section exists |
| 188 | * |
| 189 | * @param string $sourceLanguage |
| 190 | * @param string $sourceTitle |
| 191 | * @param string $targetLanguage |
| 192 | * @param string $sourceSectionTitle |
| 193 | * @param array $targetSectionTitles Existing sections in target article (indexed by position) |
| 194 | * @return int|null Position index or null if unable to determine |
| 195 | */ |
| 196 | private function calculatePositionFromSourceOrder( |
| 197 | string $sourceLanguage, |
| 198 | string $sourceTitle, |
| 199 | string $targetLanguage, |
| 200 | string $sourceSectionTitle, |
| 201 | array $targetSectionTitles |
| 202 | ): int|null { |
| 203 | // Fetch section mappings from CX server |
| 204 | $sectionMappings = $this->sectionMappingFetcher->fetchSectionMapping( |
| 205 | $sourceLanguage, |
| 206 | $sourceTitle, |
| 207 | $targetLanguage |
| 208 | ); |
| 209 | |
| 210 | if ( !$sectionMappings ) { |
| 211 | return null; |
| 212 | } |
| 213 | |
| 214 | // Calculate the optimal insertion position |
| 215 | return $this->calculateInsertionPosition( |
| 216 | $sourceSectionTitle, |
| 217 | $sectionMappings['sourceSections'], |
| 218 | $sectionMappings['present'], |
| 219 | $targetSectionTitles |
| 220 | ); |
| 221 | } |
| 222 | |
| 223 | /** |
| 224 | * Calculate the optimal insertion position for a section based on source article order. |
| 225 | * Finds the next section in source order that already exists in the target article, |
| 226 | * and returns its position as the insertion point. |
| 227 | * |
| 228 | * @param string $sourceSectionTitle The source section being positioned |
| 229 | * @param array $sourceSections All source sections in order |
| 230 | * @param array $presentMappings Mapping of existing source->target sections |
| 231 | * @param array $targetSectionTitles Existing target sections (indexed by position) |
| 232 | * @return int|null Position index or null if no insertion point found |
| 233 | */ |
| 234 | private function calculateInsertionPosition( |
| 235 | string $sourceSectionTitle, |
| 236 | array $sourceSections, |
| 237 | array $presentMappings, |
| 238 | array $targetSectionTitles |
| 239 | ): ?int { |
| 240 | // Find the position of our source section in the source article |
| 241 | $sourceSectionIndex = array_search( $sourceSectionTitle, $sourceSections ); |
| 242 | if ( $sourceSectionIndex === false ) { |
| 243 | // This shouldn't happen if data is consistent, but handle gracefully |
| 244 | return null; |
| 245 | } |
| 246 | |
| 247 | $correspondingTargetSection = $presentMappings[$sourceSectionTitle] ?? null; |
| 248 | // if the corresponding target section exists, the user translated an existing section |
| 249 | // and wants to publish it as a new section (in case of section expansion, the positioning |
| 250 | // should be controlled by the "existingsectiontitle" endpoint parameter |
| 251 | if ( $correspondingTargetSection ) { |
| 252 | // Find where the existing target section appears in the actual target article |
| 253 | $targetPosition = array_search( $correspondingTargetSection, $targetSectionTitles ); |
| 254 | |
| 255 | // the new section should be published right after the existing one |
| 256 | return $targetPosition + 1; |
| 257 | } |
| 258 | |
| 259 | // Look for the next section in source order that already exists in target |
| 260 | // Insert new section before that existing section to maintain order |
| 261 | for ( $i = $sourceSectionIndex + 1; $i < count( $sourceSections ); $i++ ) { |
| 262 | $laterSourceSection = $sourceSections[$i]; |
| 263 | |
| 264 | // Check if this later source section has a corresponding target section |
| 265 | if ( !isset( $presentMappings[$laterSourceSection] ) ) { |
| 266 | continue; |
| 267 | } |
| 268 | |
| 269 | $correspondingTargetSection = $presentMappings[$laterSourceSection]; |
| 270 | |
| 271 | // Find where this target section appears in the actual target article |
| 272 | $targetPosition = array_search( $correspondingTargetSection, $targetSectionTitles ); |
| 273 | if ( $targetPosition !== false ) { |
| 274 | // Found a section to insert before - return its position |
| 275 | return $targetPosition; |
| 276 | } |
| 277 | } |
| 278 | |
| 279 | // No later sections found in target - no insertion point found |
| 280 | return null; |
| 281 | } |
| 282 | |
| 283 | /** |
| 284 | * Given a target language code, this method returns an array of |
| 285 | * strings containing the appendix section titles for this language. |
| 286 | * |
| 287 | * @param string $targetLanguage |
| 288 | * @return string[] |
| 289 | */ |
| 290 | public function fetchAppendixTitles( string $targetLanguage ): array { |
| 291 | if ( isset( self::APPENDIX_TITLES[$targetLanguage] ) ) { |
| 292 | return self::APPENDIX_TITLES[$targetLanguage]; |
| 293 | } |
| 294 | |
| 295 | $basePath = "/v2/suggest/sections/titles/en/$targetLanguage"; |
| 296 | $params = [ 'titles' => implode( '|', self::APPENDIX_TITLES['en'] ) ]; |
| 297 | $path = wfAppendQuery( $basePath, $params ); |
| 298 | |
| 299 | $response = $this->cxServerClient->get( $path ); |
| 300 | |
| 301 | if ( !$response ) { |
| 302 | return []; |
| 303 | } |
| 304 | |
| 305 | $json = FormatJson::decode( $response, true ); |
| 306 | return array_merge( ...array_values( $json ) ); |
| 307 | } |
| 308 | } |