Translate extension for MediaWiki
 
Loading...
Searching...
No Matches
CrossLanguageTranslationSearchQuery.php
1<?php
2declare( strict_types = 1 );
3
4namespace MediaWiki\Extension\Translate\TtmServer;
5
6use Elastica\Document;
7use Elastica\ResultSet;
11use MediaWiki\Title\Title;
12
20 private SearchableTtmServer $server;
21 private array $params;
22 private ?ResultSet $resultSet = null;
23 private int $total = 0;
24 private array $hl = [ '', '' ];
25
26 public function __construct( array $params, SearchableTtmServer $server ) {
27 $this->params = $params;
28 $this->server = $server;
29 }
30
31 public function getDocuments(): array {
32 $documents = [];
33 $offset = $this->params['offset'];
34 $limit = $this->params['limit'];
35
36 $options = $this->params;
37 $options['language'] = $this->params['sourcelanguage'];
38 // Use a bigger limit that what was requested, since we are likely to throw away many
39 // results in the local filtering step at extractMessages
40 $options['limit'] = $limit * 10;
41 // TODO: the real offset should be communicated to the frontend. It currently assumes
42 // next offset is current offset + limit and previous one is current offset - limit.
43 // It might be difficult to fix scrolling results backwards. For now we handle offset
44 // locally.
45 $options['offset'] = 0;
46
47 // @phan-suppress-next-line PhanUndeclaredMethod
48 $search = $this->server->createSearch( $this->params['query'], $options, $this->hl );
49 $scroll = $search->scroll( '5s' );
50
51 // Used for aggregations. Only the first scroll response has them.
52 $this->resultSet = null;
53
54 foreach ( $scroll as $resultSet ) {
55 if ( !$this->resultSet ) {
56 $this->resultSet = $resultSet;
57 $this->total = $resultSet->getTotalHits();
58 }
59
60 $results = $this->extractMessages( $resultSet->getDocuments() );
61 $documents = array_merge( $documents, $results );
62
63 $count = count( $documents );
64
65 if ( $count >= $offset + $limit ) {
66 break;
67 }
68 }
69
70 if ( !$this->resultSet ) {
71 // No hits for documents, just set the result set.
72 $this->resultSet = $scroll->current();
73 $this->total = $scroll->current()->getTotalHits();
74 }
75
76 // clear was introduced in Elastica 5.3.1, but Elastica extension uses 5.3.0
77 if ( is_callable( [ $scroll, 'clear' ] ) ) {
78 $scroll->clear();
79 }
80 return array_slice( $documents, $offset, $limit );
81 }
82
90 private function extractMessages( array $documents ): array {
91 $messages = $ret = [];
92
93 $language = $this->params['language'];
94 foreach ( $documents as $document ) {
95 $data = $document->getData();
96
97 // @phan-suppress-next-line PhanUndeclaredMethod
98 if ( !$this->server->isLocalSuggestion( $data ) ) {
99 continue;
100 }
101
102 $title = Title::newFromText( $data['localid'] );
103 if ( !$title ) {
104 continue;
105 }
106
107 $handle = new MessageHandle( $title );
108 if ( !$handle->isValid() ) {
109 continue;
110 }
111
112 $key = $title->getNamespace() . ':' . $title->getDBkey();
113 $messages[$key] = $data['content'];
114 }
115
116 $definitions = new MessageDefinitions( $messages );
117 $collection = MessageCollection::newFromDefinitions( $definitions, $language );
118
119 $filter = $this->params['filter'];
120 if ( $filter === 'untranslated' ) {
121 $collection->filter( MessageCollection::FILTER_HAS_TRANSLATION, MessageCollection::EXCLUDE_MATCHING );
122 } elseif ( in_array( $filter, $this->getAvailableFilters() ) ) {
123 $collection->filter( $filter, MessageCollection::INCLUDE_MATCHING );
124 }
125
126 if ( $filter === 'translated' || $filter === 'fuzzy' ) {
127 $collection->loadTranslations();
128 }
129
130 foreach ( $collection->keys() as $messageKey => $titleValue ) {
131 $title = Title::newFromLinkTarget( $titleValue );
132
133 $result = [];
134 $result['content'] = $messages[$messageKey];
135 if ( $filter === 'translated' || $filter === 'fuzzy' ) {
136 $result['content'] = $collection[$messageKey]->translation();
137 }
138 $handle = new MessageHandle( $title );
139 $result['localid'] = $handle->getTitleForBase()->getPrefixedText();
140 $result['language'] = $language;
141
142 $ret[] = $result;
143 }
144
145 return $ret;
146 }
147
148 public function getAvailableFilters(): array {
149 return [
150 'translated',
151 'fuzzy',
152 'untranslated'
153 ];
154 }
155
156 public function getTotalHits(): int {
157 return $this->total;
158 }
159
160 public function getResultSet(): ResultSet {
161 return $this->resultSet;
162 }
163}
This file contains the class for core message collections implementation.
Wrapper for message definitions, just to beauty the code.
Class for pointing to messages, like Title class is for titles.
Interface for TtmServer that can act as backend for translation search.