Skip to content

Commit

Permalink
db-tabulator: expose more APIs in JS preprocessing
Browse files Browse the repository at this point in the history
  • Loading branch information
siddharthvp committed Jun 1, 2024
1 parent ec506ee commit 02f59f0
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 10 deletions.
2 changes: 1 addition & 1 deletion db-tabulator/app.ts
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,7 @@ export class Query extends EventEmitter {
let excerpts: Record<string, string> = {};
for (let pageSet of arrayChunk(pages, 100)) {
for await (let pg of bot.readGen(pageSet, {
rvsection: 0,
rvsection: '0',
redirects: false
})) {
if (pg.invalid || pg.missing) {
Expand Down
8 changes: 8 additions & 0 deletions db-tabulator/database-report.hbs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,13 @@
function error(msg) {
return `<span style="color: red; font-weight: bold">${msg}</span>`;
}
function safeStringify(obj) {
try {
return JSON.stringify(obj, undefined, 2);
} catch (e) {
return '<Circular object>';
}
}
let params = new Map(new URLSearchParams(location.search));
let page = params.get('page');
Expand Down Expand Up @@ -46,6 +53,7 @@
'query-executing': data => `Query (<code>${shorten(data.args[0], 80)}</code>) submitted to database.`,
'query-executed': data => `Query finished running in ${data.args[0]} seconds.`,
'preprocessing': _ => `Started JS preprocessing on query result.`,
'js-logging': data => `Logging output: <pre>${safeStringify(data.args[0])}</pre>`,
'js-no-array': _ => error(`JS preprocess() must return an array. `) + 'Saving result without preprocessing.',
'js-invalid-return': _ => error(`JS preprocess() returned a value which is not transferable. `) +
'Saving result without preprocessing.',
Expand Down
20 changes: 14 additions & 6 deletions db-tabulator/isolate.vm.js
Original file line number Diff line number Diff line change
@@ -1,12 +1,20 @@
/* eslint-disable no-unused-vars */
/* global __mwApiGet, __dbQueryResult, preprocess */
/* global __mwApiGet, __rawReq, __dbQueryResult, preprocess */
(async function() {
"${JS_CODE}";

async function mwApiGet(params) {
const response = await __mwApiGet.applySyncPromise(undefined, [JSON.stringify(params)]);
return JSON.parse(response);
const bot = {
async request(url) {
if (typeof url !== 'string') throw new Error('bot.request() needs a string url');
const response = await __rawReq.applySyncPromise(undefined, [url]);
return JSON.parse(response);
},
async api(params) {
if (typeof params !== 'object') throw new Error('bot.api() parameters need to be an object');
const response = await __mwApiGet.applySyncPromise(undefined, [JSON.stringify(params)]);
return JSON.parse(response);
}
}

"${JS_CODE}";

return JSON.stringify(await preprocess(JSON.parse(__dbQueryResult)));
})
48 changes: 45 additions & 3 deletions db-tabulator/preprocess.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import {argv, fs, log, Mwn} from "../botbase";
import {argv, AuthManager, fs, log, Mwn} from "../botbase";
import {fork} from "child_process";
import EventEmitter from "events";
import type {Query} from "./app";
Expand Down Expand Up @@ -65,7 +65,8 @@ const apiClient = new Mwn({
apiUrl: 'https://en.wikipedia.org/w/api.php',
maxRetries: 0,
silent: true,
userAgent: '[[w:en:Template:Database report]], [[w:en:SDZeroBot]], node.js isolated-vm',
userAgent: '[[w:en:Template:Database report]] via [[w:en:SDZeroBot]], node.js isolated-vm',
OAuth2AccessToken: AuthManager.get('sdzerobot-dbreports').OAuth2AccessToken,
defaultParams: {
maxlag: undefined
}
Expand All @@ -82,7 +83,7 @@ export async function applyJsPreprocessing(rows: Record<string, string>[], jsCod
let startTime = process.hrtime.bigint();

// Import dynamically as this has native dependencies
let {Isolate, Reference} = await import('isolated-vm');
let {Isolate, Callback, Reference} = await import('isolated-vm');

const isolate = new Isolate({
memoryLimit: 16,
Expand All @@ -95,11 +96,17 @@ export async function applyJsPreprocessing(rows: Record<string, string>[], jsCod
const jail = context.global;
await jail.set('__dbQueryResult', JSON.stringify(rows));

await jail.set('log', new Callback(function(arg) {
console.log(arg);
query.emit('js-logging', arg);
}));

// Support readonly API access
await jail.set('__mwApiGet', new Reference(async function (rawParams: string) {
let params = JSON.parse(rawParams);
// Disallow write operations
params.action = 'query';
params.format = 'json';
delete params.token;
try {
return JSON.stringify(await apiClient.query(params));
Expand All @@ -108,6 +115,41 @@ export async function applyJsPreprocessing(rows: Record<string, string>[], jsCod
}
}));

await jail.set('__rawReq', new Reference(async function (url: string) {
const allowedDomains = [
'https://en.wikipedia.org/api/rest_v1/', // Wikimedia REST API
'https://wikimedia.org/api/rest_v1/', // Wikimedia REST API
'https://en.wikipedia.org/w/rest.php/', // MediaWiki REST API
'https://en.wikipedia/org/w/api.php?', // Action API
'https://api.wikimedia.org/', // Wikimedia API gateway
];

if (!allowedDomains.find(domain => url.startsWith(domain))) {
return JSON.stringify({ error: `Disallowed domain. Allowed domains are: ${allowedDomains.join(', ')}` });
}

try {
const response = await apiClient.rawRequest({
method: 'GET',
url: url,
timeout: 10000,
headers: {
// Bot grant enables apihighlimit (for Action API), and helps avoid throttling for some REST APIs.
// It has no write access.
'Authorization': `Bearer ${AuthManager.get('sdzerobot-dbreports').OAuth2AccessToken}`
}
});
try {
return JSON.stringify(response.data);
} catch (e) {
return JSON.stringify({ error: `Non JSON response from ${url}: ${response.data}` });
}
} catch (err) {
let errMsg = err.statusCode ? (err.statusCode + ': ' + err.statusMessage) : err.message;
return JSON.stringify({ error: errMsg });
}
}));

let result = rows;

let doPreprocessing = async () => {
Expand Down

0 comments on commit 02f59f0

Please sign in to comment.