Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 61 |
|
0.00% |
0 / 4 |
CRAP | |
0.00% |
0 / 1 |
EditPageShowEditFormInitialHandler | |
0.00% |
0 / 61 |
|
0.00% |
0 / 4 |
182 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
2 | |||
onEditPage__showEditForm_initial | |
0.00% |
0 / 17 |
|
0.00% |
0 / 1 |
42 | |||
getLangs | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
2 | |||
getLangsForEngine | |
0.00% |
0 / 35 |
|
0.00% |
0 / 1 |
30 |
1 | <?php |
2 | |
3 | namespace MediaWiki\Extension\Wikisource\HookHandler; |
4 | |
5 | use MediaWiki\Config\Config; |
6 | use MediaWiki\Config\ConfigException; |
7 | use MediaWiki\EditPage\EditPage; |
8 | use MediaWiki\Hook\EditPage__showEditForm_initialHook; |
9 | use MediaWiki\Logger\LoggerFactory; |
10 | use MediaWiki\MediaWikiServices; |
11 | use MediaWiki\Output\OutputPage; |
12 | use MediaWiki\ResourceLoader\Context; |
13 | use MediaWiki\User\Options\UserOptionsLookup; |
14 | |
15 | // phpcs:disable MediaWiki.NamingConventions.LowerCamelFunctionsName.FunctionName |
16 | class EditPageShowEditFormInitialHandler implements EditPage__showEditForm_initialHook { |
17 | |
18 | /** @var bool */ |
19 | private $enabled; |
20 | |
21 | /** @var string */ |
22 | private $toolUrl; |
23 | |
24 | /** @var array */ |
25 | private $WikisourceTranskribusModels; |
26 | |
27 | private UserOptionsLookup $userOptionsLookup; |
28 | |
29 | public function __construct( |
30 | Config $config, |
31 | UserOptionsLookup $userOptionsLookup |
32 | ) { |
33 | $this->enabled = (bool)$config->get( 'WikisourceEnableOcr' ); |
34 | $this->toolUrl = rtrim( $config->get( 'WikisourceOcrUrl' ), '/' ); |
35 | $this->WikisourceTranskribusModels = $config->get( 'WikisourceTranskribusModels' ); |
36 | $this->userOptionsLookup = $userOptionsLookup; |
37 | } |
38 | |
39 | /** |
40 | * @param EditPage $editor |
41 | * @param OutputPage $out OutputPage instance to write to |
42 | * @return bool|void True or no return value to continue or false to abort |
43 | */ |
44 | public function onEditPage__showEditForm_initial( $editor, $out ) { |
45 | if ( !$this->enabled ) { |
46 | return; |
47 | } |
48 | // Make sure we're editing a page of the right content type (and that PRP is available). |
49 | if ( !defined( 'CONTENT_MODEL_PROOFREAD_PAGE' ) |
50 | || $editor->contentModel !== CONTENT_MODEL_PROOFREAD_PAGE ) { |
51 | return; |
52 | } |
53 | // Make sure there's a tool URL defined. |
54 | if ( !$this->toolUrl ) { |
55 | throw new ConfigException( 'Please set tool URL with $wgWikisourceOcrUrl' ); |
56 | } |
57 | // Require the WikiEditor toolbar to be enabled. |
58 | $useBetaToolbar = $this->userOptionsLookup |
59 | ->getOption( $out->getUser(), 'usebetatoolbar' ); |
60 | if ( !$useBetaToolbar ) { |
61 | return; |
62 | } |
63 | |
64 | // Add tool's URL to Content Security Policy. |
65 | $out->getCSP()->addDefaultSrc( $this->toolUrl ); |
66 | // Add OCR modules. |
67 | $out->addModules( 'ext.wikisource.OCR' ); |
68 | $out->addJsConfigVars( [ |
69 | 'WikisourceOcrUrl' => $this->toolUrl, |
70 | 'WikisourceTranskribusModels' => $this->WikisourceTranskribusModels, |
71 | ] ); |
72 | } |
73 | |
74 | /** |
75 | * Get all languages/models. |
76 | * @param Context $context |
77 | * @param Config $config |
78 | * @return array |
79 | */ |
80 | public static function getLangs( Context $context, Config $config ) { |
81 | return [ |
82 | 'google' => self::getLangsForEngine( 'google', $config ), |
83 | 'tesseract' => self::getLangsForEngine( 'tesseract', $config ), |
84 | 'transkribus' => self::getLangsForEngine( 'transkribus', $config ), |
85 | ]; |
86 | } |
87 | |
88 | /** |
89 | * Get available languages/models for a selected engine. |
90 | * @param string $engine |
91 | * @param Config $config |
92 | * @return array |
93 | */ |
94 | protected static function getLangsForEngine( $engine, $config ) { |
95 | if ( defined( 'MW_PHPUNIT_TEST' ) ) { |
96 | return []; |
97 | } |
98 | $http = MediaWikiServices::getInstance()->getHttpRequestFactory(); |
99 | $cache = MediaWikiServices::getInstance()->getMainWANObjectCache(); |
100 | $logger = LoggerFactory::getInstance( 'Wikisource' ); |
101 | $toolUrl = rtrim( $config->get( 'WikisourceOcrUrl' ), '/' ); |
102 | $proxy = $config->get( 'WikisourceHttpProxy' ); |
103 | $url = $toolUrl . '/api/available_langs?engine=' . $engine; |
104 | $fname = __METHOD__; |
105 | $langs = $cache->getWithSetCallback( |
106 | $cache->makeGlobalKey( 'wikisource-ocr-langs', $engine ), |
107 | $cache::TTL_DAY, |
108 | static function () use ( $url, $http, $proxy, $logger, $engine, $fname ) { |
109 | $logger->debug( 'Language list not cached for {engine}, fetching now', [ 'engine' => $engine ] ); |
110 | $options = []; |
111 | if ( $proxy ) { |
112 | $options[ 'proxy' ] = $proxy; |
113 | } |
114 | $startTime = microtime( true ); |
115 | $response = $http->get( $url, $options, $fname ); |
116 | $logger->info( |
117 | 'OCR tool responded with {response_size} bytes after {response_time}ms', |
118 | [ |
119 | 'response_size' => strlen( (string)$response ), |
120 | 'response_time' => ( microtime( true ) - $startTime ) * 1000, |
121 | ] |
122 | ); |
123 | if ( $response === null ) { |
124 | $logger->warning( 'OCR empty response from tool', [ 'url' => $url ] ); |
125 | return false; |
126 | } |
127 | $contents = json_decode( $response ); |
128 | return $contents->available_langs ?? false; |
129 | }, |
130 | [ 'staleTTL' => $cache::TTL_WEEK ] |
131 | ); |
132 | return $langs === false ? [] : $langs; |
133 | } |
134 | } |