-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathtika_config_tester.php
116 lines (99 loc) · 3.49 KB
/
tika_config_tester.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
<?php
// This file is part of Moodle - http://moodle.org/
//
// Moodle is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Moodle is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
/**
* CLI config tester
*
* @package search
* @copyright 2023 David Castro <[email protected]>
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
*/
define('CLI_SCRIPT', true);
require(__DIR__.'/../../../../config.php');
require_once($CFG->libdir.'/clilib.php'); // Cli only functions.
list($options, $unrecognized) = cli_get_params([
'help' => false,
'testfileid' => '',
], [
'h' => 'help',
't' => 'testfileid',
]);
if ($unrecognized) {
$unrecognized = implode("\n ", $unrecognized);
cli_error(get_string('cliunknowoption', 'admin', $unrecognized));
}
if ($options['help']) {
$help = "
Run Tika diagnostics.
Options:
-h, --help Print out this help
-t, --testfileid (Optional) PDF or accepted file id to send to tika for analysis
Examples:
\$ sudo -u www-data /usr/bin/php search/engine/elastic/cli/tika_config_tester.php -t=<file id>
";
echo $help;
die;
}
/**
* Inspired by \search_elastic\enrich\text\tika::tika_server_ready.
* Outputs cli messages on error.
*/
function tika_server_ready() {
$tikahostname = get_config('search_elastic', 'tikahostname');
$tikaport = get_config('search_elastic', 'tikaport');
$returnval = false;
$client = new \search_elastic\esrequest();
$url = '';
// Check if we have a valid set of config.
if (!empty($tikahostname) && !empty($tikaport)) {
$port = $tikaport;
$hostname = rtrim($tikahostname, "/");
$url = $hostname . ':' . $port;
} else {
cli_writeln('tikahostname or tikaport are not set in elasticsearch config');
}
// Check we can reach Tika server.
if ($url !== '') {
$response = $client->get($url);
$responsecode = $response->getStatusCode();
if ($responsecode == 200) {
$returnval = true;
} else {
$error = 'Undetermined';
if (method_exists($response, 'getBody')) {
// This might be transformed into a guzzleexception.
// We need to check if it is still a response.
$error = $response->getBody();
}
cli_error("Making a GET request to $url resulted in error:\nHTTP Code: $responsecode\nResponse: $error");
}
}
return $returnval;
}
$canusetika = tika_server_ready();
if (!$canusetika) {
cli_error("Tika cannot be used. Please verify plugin configuration.");
}
cli_writeln('Connection to tika was successful!');
$fileid = $options['testfileid'];
if (empty($fileid)) {
cli_writeln('No file id specified, exiting.');
exit(0);
}
$tika = new \search_elastic\enrich\text\tika(get_config('search_elastic'));
$fs = get_file_storage();
$file = $fs->get_file_by_id($fileid);
$text = $tika->analyze_file($file);
cli_writeln('Text found in file ' . $file->get_filename() . ': ' . $text);