Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 58 |
|
0.00% |
0 / 4 |
CRAP | |
0.00% |
0 / 1 |
EditPageShowEditFormInitialHandler | |
0.00% |
0 / 58 |
|
0.00% |
0 / 4 |
156 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
onEditPage__showEditForm_initial | |
0.00% |
0 / 18 |
|
0.00% |
0 / 1 |
42 | |||
getLangs | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
2 | |||
getLangsForEngine | |
0.00% |
0 / 32 |
|
0.00% |
0 / 1 |
20 |
1 | <?php |
2 | |
3 | namespace MediaWiki\Extension\Wikisource\HookHandler; |
4 | |
5 | use MediaWiki\Config\Config; |
6 | use MediaWiki\Config\ConfigException; |
7 | use MediaWiki\EditPage\EditPage; |
8 | use MediaWiki\Hook\EditPage__showEditForm_initialHook; |
9 | use MediaWiki\Logger\LoggerFactory; |
10 | use MediaWiki\MediaWikiServices; |
11 | use MediaWiki\Output\OutputPage; |
12 | use MediaWiki\ResourceLoader\Context; |
13 | |
14 | // phpcs:disable MediaWiki.NamingConventions.LowerCamelFunctionsName.FunctionName |
15 | class EditPageShowEditFormInitialHandler implements EditPage__showEditForm_initialHook { |
16 | |
17 | /** @var bool */ |
18 | private $enabled; |
19 | |
20 | /** @var string */ |
21 | private $toolUrl; |
22 | |
23 | /** @var array */ |
24 | private $WikisourceTranskribusModels; |
25 | |
26 | /** |
27 | * @param Config $config |
28 | */ |
29 | public function __construct( Config $config ) { |
30 | $this->enabled = (bool)$config->get( 'WikisourceEnableOcr' ); |
31 | $this->toolUrl = rtrim( $config->get( 'WikisourceOcrUrl' ), '/' ); |
32 | $this->WikisourceTranskribusModels = $config->get( 'WikisourceTranskribusModels' ); |
33 | } |
34 | |
35 | /** |
36 | * @param EditPage $editor |
37 | * @param OutputPage $out OutputPage instance to write to |
38 | * @return bool|void True or no return value to continue or false to abort |
39 | */ |
40 | public function onEditPage__showEditForm_initial( $editor, $out ) { |
41 | if ( !$this->enabled ) { |
42 | return; |
43 | } |
44 | // Make sure we're editing a page of the right content type (and that PRP is available). |
45 | if ( !defined( 'CONTENT_MODEL_PROOFREAD_PAGE' ) |
46 | || $editor->contentModel !== CONTENT_MODEL_PROOFREAD_PAGE ) { |
47 | return; |
48 | } |
49 | // Make sure there's a tool URL defined. |
50 | if ( !$this->toolUrl ) { |
51 | throw new ConfigException( 'Please set tool URL with $wgWikisourceOcrUrl' ); |
52 | } |
53 | // Require the WikiEditor toolbar to be enabled. |
54 | $useBetaToolbar = MediaWikiServices::getInstance() |
55 | ->getUserOptionsLookup() |
56 | ->getOption( $out->getUser(), 'usebetatoolbar' ); |
57 | if ( !$useBetaToolbar ) { |
58 | return; |
59 | } |
60 | |
61 | // Add tool's URL to Content Security Policy. |
62 | $out->getCSP()->addDefaultSrc( $this->toolUrl ); |
63 | // Add OCR modules. |
64 | $out->addModules( 'ext.wikisource.OCR' ); |
65 | $out->addJsConfigVars( [ |
66 | 'WikisourceOcrUrl' => $this->toolUrl, |
67 | 'WikisourceTranskribusModels' => $this->WikisourceTranskribusModels, |
68 | ] ); |
69 | } |
70 | |
71 | /** |
72 | * Get all languages/models. |
73 | * @param Context $context |
74 | * @param Config $config |
75 | * @return array |
76 | */ |
77 | public static function getLangs( Context $context, Config $config ) { |
78 | return [ |
79 | 'google' => self::getLangsForEngine( 'google', $config ), |
80 | 'tesseract' => self::getLangsForEngine( 'tesseract', $config ), |
81 | 'transkribus' => self::getLangsForEngine( 'transkribus', $config ), |
82 | ]; |
83 | } |
84 | |
85 | /** |
86 | * Get available languages/models for a selected engine. |
87 | * @param string $engine |
88 | * @param Config $config |
89 | * @return array |
90 | */ |
91 | protected static function getLangsForEngine( $engine, $config ) { |
92 | $http = MediaWikiServices::getInstance()->getHttpRequestFactory(); |
93 | $cache = MediaWikiServices::getInstance()->getMainWANObjectCache(); |
94 | $logger = LoggerFactory::getInstance( 'Wikisource' ); |
95 | $toolUrl = rtrim( $config->get( 'WikisourceOcrUrl' ), '/' ); |
96 | $proxy = $config->get( 'WikisourceHttpProxy' ); |
97 | $url = $toolUrl . '/api/available_langs?engine=' . $engine; |
98 | $langs = $cache->getWithSetCallback( |
99 | $cache->makeGlobalKey( 'wikisource-ocr-langs', $engine ), |
100 | $cache::TTL_DAY, |
101 | static function () use ( $url, $http, $proxy, $logger, $engine ) { |
102 | $logger->debug( 'Language list not cached for {engine}, fetching now', [ 'engine' => $engine ] ); |
103 | $options = []; |
104 | if ( $proxy ) { |
105 | $options[ 'proxy' ] = $proxy; |
106 | } |
107 | $startTime = microtime( true ); |
108 | $response = $http->get( $url, $options ); |
109 | $logger->info( |
110 | 'OCR tool responded with {response_size} bytes after {response_time}ms', |
111 | [ |
112 | 'response_size' => strlen( (string)$response ), |
113 | 'response_time' => ( microtime( true ) - $startTime ) * 1000, |
114 | ] |
115 | ); |
116 | if ( $response === null ) { |
117 | $logger->warning( 'OCR empty response from tool', [ 'url' => $url ] ); |
118 | return false; |
119 | } |
120 | $contents = json_decode( $response ); |
121 | return $contents->available_langs ?? false; |
122 | }, |
123 | [ 'staleTTL' => $cache::TTL_WEEK ] |
124 | ); |
125 | return $langs === false ? [] : $langs; |
126 | } |
127 | } |