diff --git a/src/background/background-script.js b/src/background/background-script.js index 813e6020..99035953 100644 --- a/src/background/background-script.js +++ b/src/background/background-script.js @@ -1,295 +1,25 @@ import compat from '../shared/compat.js'; -import { product } from '../shared/func.js'; +import { lazy } from '../shared/func.js'; import Recorder from './Recorder.js'; import preferences from '../shared/preferences.js'; +import { detectLanguage } from '../shared/langid.js' +import { MessageHandler, DefaultMap } from '../shared/common.js'; +import { StorageArea } from '../shared/storage.js'; function isSameDomain(url1, url2) { return url1 && url2 && new URL(url1).host === new URL(url2).host; } -// Just a little test to run in the web inspector for debugging -async function test(provider) { - console.log(await Promise.all([ - provider.translate({ - from: 'de', - to: 'en', - text: 'Hallo Welt. Wie geht es dir?' - }), - provider.translate({ - from: 'de', - to: 'en', - text: 'Mein Name ist Jelmer.', - html: true - }) - ])); -} - -/** - * Temporary fix around few models, bad classified, and similar looking languages. - * From https://github.com/bitextor/bicleaner/blob/3df2b2e5e2044a27b4f95b83710be7c751267e5c/bicleaner/bicleaner_hardrules.py#L50 - * @type {Set[]} - */ -const SimilarLanguages = [ - new Set(['es', 'ca', 'gl', 'pt']), - new Set(['no', 'nb', 'nn', 'da']) // no == nb for bicleaner -]; - -/** - * @typedef {Object} TranslationModel - * @property {String} from - * @property {String} to - * @property {Boolean} local - */ - -/** - * @typedef {Object} TranslationProvider - * @property {Promise} registry - * @property {(request:Object) => Promise} translate - */ - -/** - * Language detection function that also provides a sorted list of - * from->to language pairs, based on the detected language, the preferred - * target language, and what models are available. - * @param {{sample:String, suggested:{[lang:String]: Number}}} - * @param {TranslationProvider} provider - * @return {Promise<{from:String|Undefined, to:String|Undefined, models: TranslationModel[]}>} - */ -async function detectLanguage({sample, suggested}, provider, options) { - if (!sample) - throw new Error('Empty sample'); - - const [detected, models] = await Promise.all([ - compat.i18n.detectLanguage(sample), - provider.registry - ]); - - const modelsFromEng = models.filter(({from}) => from === 'en'); - const modelsToEng = models.filter(({to}) => to === 'en'); - - // List of all available from->to translation pairs including ones that we - // achieve by pivoting through English. - const pairs = [ - ...models.map(model => ({from: model.from, to: model.to, pivot: null, models: [model]})), - ...Array.from(product(modelsToEng, modelsFromEng)) - .filter(([{from}, {to}]) => from !== to) - .map(([from, to]) => ({from: from.from, to: to.to, pivot: 'en', models: [from, to]})) - ]; - - // {[lang]: 0.0 .. 1.0} map of likeliness the page is in this language - /** @type {{[lang:String]: Number }} **/ - let confidence = Object.fromEntries(detected.languages.map(({language, percentage}) => [language, percentage / 100])); - - // Take suggestions into account - Object.entries(suggested || {}).forEach(([lang, score]) => { - lang = lang.substr(0, 2); // TODO: not strip everything down to two letters - confidence[lang] = Math.max(score, confidence[lang] || 0.0); - }); - - // Work-around for language pairs that are close together - Object.entries(confidence).forEach(([lang, score]) => { - SimilarLanguages.forEach(group => { - if (group.has(lang)) { - group.forEach(other => { - if (!(other in confidence)) - confidence[other] = score / 2; // little bit lower though - }) - } - }) - }); - - // Fetch the languages that the browser says the user accepts (i.e Accept header) - /** @type {String[]} **/ - let accepted = await compat.i18n.getAcceptLanguages(); - - // TODO: right now all our models are just two-letter codes instead of BCP-47 :( - accepted = accepted.map(language => language.substr(0, 2)) - - // If the user has a preference, put that up front - if (options?.preferred) - accepted.unshift(options.preferred); - - // Remove duplicates - accepted = accepted.filter((val, index, values) => values.indexOf(val, index + 1) === -1) - - // {[lang]: 0.0 .. 1.0} map of likeliness the user wants to translate to this language. - /** @type {{[lang:String]: Number }} */ - const preferred = accepted.reduce((preferred, language, i, languages) => { - return language in preferred - ? preferred - : {...preferred, [language]: 1.0 - (i / languages.length)}; - }, {}); - - // Function to score a translation model. Higher score is better - const score = ({from, to, pivot, models}) => { - return 1.0 * (confidence[from] || 0.0) // from language is good - + 0.5 * (preferred[to] || 0.0) // to language is good - + 0.2 * (pivot ? 0.0 : 1.0) // preferably don't pivot - + 0.1 * (1.0 / models.reduce((acc, model) => acc + model.local ? 0.0 : 1.0, 1.0)) // prefer local models - }; - - // Sort our possible models, best one first - pairs.sort((a, b) => score(b) - score(a)); - - // console.log({ - // accepted, - // preferred, - // confidence, - // pairs: pairs.map(pair => ({...pair, score: score(pair)})) - // }); - - // (Using pairs instead of confidence and preferred because we prefer a pair - // we can actually translate to above nothing every time right now.) - return { - from: pairs.length ? pairs[0].from : undefined, - to: pairs.length ? pairs[0].to : undefined, - models: pairs - } +async function isTranslatedDomain(url) { + const {alwaysTranslateDomains} = await preferences.get({alwaysTranslateDomains: []}); + return url && alwaysTranslateDomains.includes(new URL(url).host); } -const State = { - PAGE_LOADING: 'page-loading', - PAGE_LOADED: 'page-loaded', - PAGE_ERROR: 'page-error', - TRANSLATION_NOT_AVAILABLE: 'translation-not-available', - TRANSLATION_AVAILABLE: 'translation-available', - DOWNLOADING_MODELS: 'downloading-models', - TRANSLATION_IN_PROGRESS: 'translation-in-progress', - TRANSLATION_FINISHED: 'translation-finished', - TRANSLATION_ABORTED: 'translation-aborted', - TRANSLATION_ERROR: 'translation-error' -}; - -// States in which the user has the translation enabled. Used to keep -// translating pages in the same domain. -const activeTranslationStates = [ - State.DOWNLOADING_MODELS, - State.TRANSLATION_IN_PROGRESS, - State.TRANSLATION_FINISHED, - State.TRANSLATION_ABORTED, -]; - -class Tab extends EventTarget { - /** - * @param {Number} id tab id - */ - constructor(id) { - super(); - this.id = id; - this.state = { - state: State.PAGE_LOADING, - active: false, - from: undefined, - to: undefined, - models: [], - debug: false, - error: null, - url: null, - pendingTranslationRequests: 0, - totalTranslationRequests: 0, - modelDownloadRead: undefined, - modelDownloadSize: undefined, - record: false, - recordedPagesCount: undefined, - recordedPagesURL: undefined - }; - - /** @type {Map} */ - this.frames = new Map(); - - /** @type {{diff:Object,callbackId:Number}|null} */ - this._scheduledUpdateEvent = null; - } - - /** - * Begins translation of the tab - */ - translate() { - this.update(state => ({ - state: State.TRANSLATION_IN_PROGRESS - })); - } - - /** - * Aborts translation of the tab - */ - abort() { - this.update(state => ({ - state: State.TRANSLATION_ABORTED - })); - - this.frames.forEach(frame => { - frame.postMessage({ - command: 'TranslateAbort' - }); - }); - } - - /** - * Resets the tab state after navigating away from a page. The disconnect - * of the tab's content scripts will already have triggered abort() - * @param {String} url - */ - reset(url) { - this.update(state => { - if (isSameDomain(url, state.url) && activeTranslationStates.includes(state.state)) { - return { - url, - pendingTranslationRequests: 0, - totalTranslationRequests: 0 - }; - } else { - return { - url, - page: undefined, - from: null, // Only reset from as page could be different - // language. We leave to selected as is - pendingTranslationRequests: 0, - totalTranslationRequests: 0, - state: State.PAGE_LOADING, - error: null - }; - } - }); - } - - /** - * @callback StateUpdatePredicate - * @param {Object} state - * @return {Object} state - */ - - /** - * @param {StateUpdatePredicate} callback - */ - update(callback) { - const diff = callback(this.state); - if (diff === undefined) - throw new Error('state update callback function did not return a value'); - - Object.assign(this.state, diff); - - // Delay the update notification to accumulate multiple changes in one - // notification. - if (!this._scheduledUpdateEvent) { - const callbackId = setTimeout(this._dispatchUpdateEvent.bind(this)); - this._scheduledUpdateEvent = {diff, callbackId}; - } else { - Object.assign(this._scheduledUpdateEvent.diff, diff); - } - } - - _dispatchUpdateEvent() { - const {diff} = this._scheduledUpdateEvent; - this._scheduledUpdateEvent = null; - - const updateEvent = new Event('update'); - updateEvent.data = diff; - this.dispatchEvent(updateEvent); - } -} +// Give content-script access to session storage +// compat.storage.session.setAccessLevel(compat.storage.TRUSTED_AND_UNTRUSTED_CONTEXTS); +/* function updateActionButton(event) { switch (event.target.state.state) { case State.TRANSLATION_AVAILABLE: @@ -327,124 +57,91 @@ function updateMenuItems({data, target: {state}}) { && state.models?.some(({from, to}) => from === state.to && to === state.from) }); } +*/ -// Supported translation providers /** - * @type{[name:String]:Promise>} + * Popup port per tab + *@type {Map} */ -const providers = {}; - -// WASM (shipped) wither in this thread or in an offscreen page -if (globalThis?.Worker) { - providers['wasm'] = async () => (await import('./WASMTranslationHelper.js')).default; -} else if (chrome?.offscreen) { - providers['wasm'] = async () => (await import('./WASMOffscreenTranslationHelper.js')).default; -} +const popups = new Map(); -// Locally installed -if (compat.runtime.connectNative) { - providers['translatelocally'] = async () => (await import('./TLTranslationHelper.js')).default; -} - -// State per tab -const tabs = new Map(); +/** + * Session storage per tab. Used for state. + *@type {Map} + */ +const session = new DefaultMap((tabId) => { + return new StorageArea('session', `tab:${tabId}`); +}); -function getTab(tabId) { - if (!tabs.has(tabId)) { - const tab = new Tab(tabId); - tabs.set(tabId, tab); - - // Update action button - tab.addEventListener('update', updateActionButton); - - // Update context menu items for this tab - tab.addEventListener('update', updateMenuItems) - } +/** + * Runtime storage per tab. Used for progress. + *@type {Map} + */ +const local = new DefaultMap((tabId) => { + return new StorageArea(); +}); - return tabs.get(tabId); -} +/** + * Supported translation providers + * @type{[name:String]:Promise>} + */ +const providers = { + // Chrome-compatible implementation which runs the Worker inside an offscreen page + ...(chrome?.offscreen ? {wasm: async () => (await import('./WASMOffscreenTranslationHelper.js')).default} : {}), + // Qt application running in headless mode on the user's machine + ...(compat.runtime.connectNative ? {translatelocally: async () => (await import('./TLTranslationHelper.js')).default} : {}), + // Normal implementation: uses Worker directly + ...(globalThis?.Worker ? {wasm: async () => (await import('./WASMTranslationHelper.js')).default} : {}), +}; -// Instantiation of a TranslationHelper. Access it through .get(). -let provider = new class { - /** - * @type {Promise} - */ - #provider; +// Instantiation of a TranslationHelper. Access it as if it is a promise. +const provider = lazy(async (self) => { + let {provider:preferred} = await preferences.get({provider: 'wasm'}) - constructor() { - // Reset provider instance if the selected provider is changed by the user. - preferences.listen('provider', this.reset.bind(this)); + if (!(preferred in providers)) { + console.info(`Provider ${preferred} not in list of supported translation providers. Falling back to 'wasm'`); + preferred = 'wasm'; + preferences.set({provider: preferred}, { + silent: true // Don't trigger the `provider.reset()` down below + }); } + + let {options} = await preferences.get({options: { + workers: 1, // be kind to the user's pc + cacheSize: 20000, // remember website boilerplate + useNativeIntGemm: true // faster is better (unless it is buggy: https://github.com/browsermt/marian-dev/issues/81) + }}); - /** - * Get (and if necessary instantiates) a translation helper. - * @returns {Promise} - */ - get() { - if (this.#provider) - return this.#provider; - - return this.#provider = new Promise(async (accept) => { - let preferred = await preferences.get('provider', 'wasm') - - if (!(preferred in providers)) { - console.info(`Provider ${preferred} not in list of supported translation providers. Falling back to 'wasm'`); - preferred = 'wasm'; - preferences.set('provider', preferred, {silent: true}); - } - - let options = await preferences.get('options', { - workers: 1, // be kind to the user's pc - cacheSize: 20000, // remember website boilerplate - useNativeIntGemm: true // faster is better (unless it is buggy: https://github.com/browsermt/marian-dev/issues/81) - }); - - const implementation = await providers[preferred](); - - const provider = new implementation(options); - - provider.onerror = err => { - console.error('Translation provider error:', err); + const implementation = await providers[preferred](); - tabs.forEach(tab => tab.update(() => ({ - state: State.PAGE_ERROR, - error: `Translation provider error: ${err.message}`, - }))); + const provider = new implementation(options); - // Try falling back to WASM is the current provider doesn't work - // out. Might lose some translations the process but - // InPageTranslation should be able to deal with that. - if (preferred !== 'wasm') { - console.info(`Provider ${preferred} encountered irrecoverable errors. Falling back to 'wasm'`); - preferences.delete('provider', preferred); - this.reset(); - } - }; + provider.onerror = err => { + console.error('Translation provider error:', err); - accept(provider); + compat.runtime.sendMessage({ + command: 'Error', + data: err }); - } - /** - * Useful to get access to the provider but only if it was instantiated. - * @returns {Promise|Null} - */ - has() { - return this.#provider - } + // Try falling back to WASM is the current provider doesn't work + // out. Might lose some translations the process but + // InPageTranslation should be able to deal with that. + if (preferred !== 'wasm') { + console.info(`Provider ${preferred} encountered irrecoverable errors. Falling back to 'wasm'`); + preferences.delete('provider'); + self.reset(); + } + }; - /** - * Releases the current translation provider. - */ - reset() { - // TODO: Why are we doing this again? - tabs.forEach(tab => tab.reset(tab.state.url)); + self.onReset(() => provider.delete()); - this.has()?.then(provider => provider.delete()); + return provider; +}); - this.#provider = null; - } -}; +// When the provider preference is changed in the options page, reload the +// translation engine. +preferences.listen(['provider'], () => provider.reset()); const recorder = new Recorder(); @@ -453,271 +150,134 @@ const recorder = new Recorder(); * mechanism of the tab. This allows the content-script to make UpdateRequest * calls to update the state, and receive state updates through Update messages. */ -function connectTab(tab, port) { - const updateListener = (event) => { - port.postMessage({ - command: 'Update', - data: event.data - }); - }; - - // Listen for state updates locally - tab.addEventListener('update', updateListener); - - // If the port disconnect, stop listening - port.onDisconnect.addListener(event => { - tab.removeEventListener('update', updateListener); - }); - - // Allow the port to update the tab state with update requests - port.onMessage.addListener(({command, data}) => { - if (command === 'UpdateRequest') { - tab.update(state => data); - } - }); - - // Send an initial update to the port - port.postMessage({ - command: 'Update', - data: tab.state - }); -} - function connectContentScript(contentScript) { - const tab = getTab(contentScript.sender.tab.id); - - // Register this content script with the tab - tab.frames.set(contentScript.sender.frameId, contentScript); + let abortSignal = {aborted: false}; - let _abortSignal = {aborted: false}; const abort = () => { // Use the signal we stored for this tab to signal all pending // translation promises to not resolve. - _abortSignal.aborted = true; + abortSignal.aborted = true; // Also prune any pending translation requests that have this same // signal from the queue. No need to put any work into it. - provider.has()?.then(provider => { - if (provider) + if (provider.instantiated) + provider.then(provider => { provider.remove((request) => request._abortSignal.aborted); - }) + }); // Create a new signal in case we want to start translating again. - _abortSignal = {aborted: false}; + abortSignal = {aborted: false}; }; - // Make the content-script receive state updates. Also sends the initial - // state update. - connectTab(tab, contentScript); + const tabId = contentScript.sender.tab.id; - // If the content-script stops (i.e. user navigates away) - contentScript.onDisconnect.addListener(event => { - // Disconnect it from this tab - tab.frames.delete(contentScript.sender.frameId); - - // Abort all in-progress translations that belonged to this page - abort(); - }); - - // Automatically start translating preferred domains. - tab.addEventListener('update', async ({target, data: {state}}) => { - if (state === State.TRANSLATION_AVAILABLE) { - const domains = await preferences.get('alwaysTranslateDomains', []); - if (target.state.from && target.state.to && target.state.url - && domains.includes(new URL(target.state.url).host)) - tab.translate(); - } + const handler = new MessageHandler(callback => { + contentScript.onMessage.addListener(callback) }); - // Respond to certain messages from the content script. Mainly individual - // translation requests, and detect language requests which then change the - // state of the tab to reflect whether translations are available or not. - contentScript.onMessage.addListener(async (message) => { - switch (message.command) { - // Send by the content-scripts inside this tab - case "DetectLanguage": - // TODO: When we support multiple frames inside a tab, we - // should integrate the results from each frame somehow. - // For now we ignore it, because 90% of the time it will be - // an ad that's in English and mess up our estimate. - if (contentScript.sender.frameId !== 0) - return; - - try { - const preferred = await preferences.get('preferredLanguageForPage') - - const summary = await detectLanguage(message.data, await provider.get(), {preferred}) - - tab.update(state => ({ - from: state.from || summary.from, // default to keeping chosen from/to languages - to: state.to || summary.to, - models: summary.models, - state: summary.models.length > 0 // TODO this is always true (?) - ? State.TRANSLATION_AVAILABLE - : State.TRANSLATION_NOT_AVAILABLE - })); - } catch (error) { - tab.update(state => ({ - state: State.PAGE_ERROR, - error - })); - } - break; - - // Send by the content-scripts inside this tab - case "TranslateRequest": - tab.update(state => ({ + // If the content-script stops (i.e. user navigates away) + contentScript.onDisconnect.addListener(() => abort()); + + handler.on("TranslateRequest", async (data) => { + local.get(tabId).get({ + pendingTranslationRequests: 0, + totalTranslationRequests: 0, + }).then((state) => { + local.get(tabId).set({ pendingTranslationRequests: state.pendingTranslationRequests + 1, totalTranslationRequests: state.totalTranslationRequests + 1 - })); - - // If we're recording requests from this tab, add the translation - // request. Also disabled when developer setting is false since - // then there are no controls to turn it on/off. - preferences.get('developer').then(developer => { - if (developer && tab.state.record) { - recorder.record(message.data); - tab.update(state => ({ - recordedPagesCount: recorder.size - })); - } }); + }); - try { - const translator = await provider.get(); - const response = await translator.translate({...message.data, _abortSignal}); - if (!response.request._abortSignal.aborted) { - contentScript.postMessage({ - command: "TranslateResponse", - data: response - }); - } - } catch(e) { - // Catch error messages caused by abort() - if (e?.message === 'removed by filter' && e?.request?._abortSignal?.aborted) - return; - - // Tell the requester that their request failed. - contentScript.postMessage({ - command: "TranslateResponse", - data: { - request: message.data, - error: e.message - } - }); - - // TODO: Do we want the popup to shout on every error? - // Because this can also be triggered by failing Outbound - // Translation! - tab.update(state => ({ - state: State.TRANSLATION_ERROR, - error: e.message - })); - } finally { - tab.update(state => ({ - // TODO what if we just navigated away and all the - // cancelled translations from the previous page come - // in and decrement the pending count of the current - // page? - pendingTranslationRequests: state.pendingTranslationRequests - 1 - })); - } - break; + // If we're recording requests from this tab, add the translation + // request. Also disabled when developer setting is false since + // then there are no controls to turn it on/off. + Promise.all([ + preferences.get({developer:false}), + session.get(tabId).get({record: false}) + ]).then(([{developer}, {record}]) => { + if (developer && record) + recorder.record(data); + }); - // Send by this script's Tab.abort() but handled per content-script - // since each content-script handler (connectContentScript) has the - // ability to abort all of the content-script's translation - // requests. Same code is called when content-script disconnects. - case "TranslateAbort": - abort(); - break; + try { + const translator = await provider + const response = await translator.translate({...data, abortSignal}); + if (!response.request.abortSignal.aborted) { + contentScript.postMessage({ + command: "TranslateResponse", + data: response + }); + } + } catch(e) { + // Catch error messages caused by abort() + if (e?.message === 'removed by filter' && e?.request?.abortSignal?.aborted) + return; + + console.error('Error during translation', e); + + // Tell the requester that their request failed. + contentScript.postMessage({ + command: "TranslateResponse", + data: { + request: data, + error: e.message + } + }); + + // TODO: Do we want the popup to shout on every error? + // Because this can also be triggered by failing Outbound + // Translation! + compat.runtime.sendMessage({ + command: 'Error', + data: e + }); + } finally { + local.get(tabId).get({ + pendingTranslationRequests: 0 + }).then((state) => { + local.get(tabId).set({ + // TODO what if we just navigated away and all the + // cancelled translations from the previous page come + // in and decrement the pending count of the current + // page? + pendingTranslationRequests: state.pendingTranslationRequests - 1 + }); + }); } }); } -function connectPopup(popup) { - const tabId = parseInt(popup.name.substr('popup-'.length)); - - const tab = getTab(tabId); - - // Make the popup receive state updates - connectTab(tab, popup); - - popup.onMessage.addListener(async message => { - switch (message.command) { - case "DownloadModels": - // Tell the tab we're downloading models - tab.update(state => ({ - state: State.DOWNLOADING_MODELS - })); - - const translator = await provider.get(); - - // Start the downloads and put them in a {[download:promise]: {read:int,size:int}} - const downloads = new Map(message.data.models.map(model => [translator.downloadModel(model), {read:0.0, size:0.0}])); - - // For each download promise, add a progress listener that updates the tab state - // with how far all our downloads have progressed so far. - downloads.forEach((_, promise) => { - // (not supported by the Chrome offscreen proxy implementation right now) - if (promise.addProgressListener) { - promise.addProgressListener(({read, size}) => { - // Update download we got a notification about - downloads.set(promise, {read, size}); - // Update tab state about all downloads combined (i.e. model, optionally pivot) - tab.update(state => ({ - modelDownloadRead: Array.from(downloads.values()).reduce((sum, {read}) => sum + read, 0), - modelDownloadSize: Array.from(downloads.values()).reduce((sum, {size}) => sum + size, 0) - })); - }); - } - - promise.then(() => { - // Trigger update of state.models because the `local` - // property this model has changed. We don't support - // any nested key updates so let's just push the whole - // damn thing. - tab.update(state => ({ - models: state.models - })); - }) - }); +async function connectPopup(port) { + const tabId = parseInt(port.name.slice('popup-'.length)); - // Finally, when all downloads have finished, start translating the page. - try { - await Promise.all(downloads.keys()); - tab.translate(); - } catch (e) { - tab.update(state => ({ - state: State.TRANSLATION_ERROR, - error: e.toString() - })); - } - break; - case "TranslateStart": - tab.translate(); - break; - - case 'TranslateAbort': - tab.abort(); - break; + popups.set(tabId, port); - case 'ExportRecordedPages': - popup.postMessage({ - command: 'DownloadRecordedPages', - data: { - name: 'recorded-pages.xml', - url: URL.createObjectURL(recorder.exportAXML()) - } - }); - recorder.clear(); - tab.update(state => ({recordedPagesCount: 0})); - break; - } + port.onDisconnect.addListener(() => popups.delete(tabId)); + + provider.then(async (translator) => { + port.postMessage({ + command: 'Models', + data: await translator.registry + }) + }); + + local.get(tabId).get().then(data => { + port.postMessage({ + command: 'Progress', + data + }); }); } -// Receive incoming connection requests from content-script and popup +// Receive incoming connection requests from content-script and popup. +// The content script connection is used only for translation. If the +// connection is dropped (page unload, tab closed, etc) then that is used +// as a signal to cancel those translations. +// The popup connection is only used for state updates, such as model +// downloads and translation state. Since these are tab-specific but very +// frequent sending them over the connection instead of compat.runtime should +// keep some event loops a little less busy. compat.runtime.onConnect.addListener((port) => { if (port.name == 'content-script') connectContentScript(port); @@ -725,48 +285,208 @@ compat.runtime.onConnect.addListener((port) => { connectPopup(port); }); -// Initialize or update the state of a tab when navigating -compat.tabs.onUpdated.addListener((tabId, diff) => { - if (diff.url) - getTab(tabId).reset(diff.url); - // Todo: treat reload and link different? Reload -> disable translation? -}); - // When a new tab is created start, track its active state -compat.tabs.onCreated.addListener(({id: tabId, active, openerTabId}) => { +compat.tabs.onCreated.addListener(async ({id: tabId, openerTabId}) => { let inheritedState = {}; // If the tab was opened from another tab that was already translating, // this tab will inherit that state and also automatically continue // translating. if (openerTabId) { - const {state, url, from, to, models} = getTab(openerTabId).state; - inheritedState = {state, url, from, to, models}; + inheritedState = await session.get(openerTabId).get({ + translate: false, + url: undefined, + from: undefined, + to: undefined, + }); } - getTab(tabId).update(() => ({...inheritedState, active})); + console.log('Setting', tabId, inheritedState); + + session.get(tabId).set(inheritedState); }); -// Remove the tab state if a tab is removed -compat.tabs.onRemoved.addListener(({tabId}) => { - tabs.delete(tabId); +// Initialize or update the state of a tab when navigating +compat.tabs.onUpdated.addListener(async (tabId, diff, tab) => { + if (diff.url) { + const state = await session.get(tabId).get({ + translate: false, + url: undefined + }); + + // If we changed domain, reset from, to and domain. + if (!isSameDomain(diff.url, state.url)) { + console.log('different domain', tabId, diff.url, 'was', state.url); + Object.assign(state, { + translate: await isTranslatedDomain(diff.url), + from: undefined, + to: undefined + }); + } + + session.get(tabId).set({ + ...state, + url: diff.url + }); + } + + if (diff.status && diff.status === 'complete') { + const {translate, from, to} = await session.get(tabId).get({ + translate: false, + from: undefined, + to: undefined + }); + + console.log('tabState status=complete', translate, from, to); + + if (translate && from && to) { + compat.tabs.sendMessage(tabId, { + command: 'TranslatePage', + data: {from, to} + }); + } + } + + // Todo: treat reload and link different? Reload -> disable translation? +}); + +const handler = new MessageHandler(callback => { + compat.runtime.onMessage.addListener(callback); }); -// Let each tab know whether its the active one. We use this state change -// event to keep the menu items in sync. -compat.tabs.onActivated.addListener(({tabId}) => { - for (let [id, tab] of tabs) { - // If the tab's active state doesn't match the activated tab, fix that. - if (tab.active != (tab.id === tabId)) - tab.update(() => ({active: Boolean(tab.id === tabId)})); +// Sent from content script once it has enough content to detect the language +handler.on('DetectLanguage', async (data, sender) => { + // TODO: When we support multiple frames inside a tab, we + // should integrate the results from each frame somehow. + // For now we ignore it, because 90% of the time it will be + // an ad that's in English and mess up our estimate. + if (sender.frameId !== 0) + return; + + try { + const {preferredLanguageForPage:preferred} = await preferences.get({preferredLanguageForPage:undefined}) + const {from, to, models} = await detectLanguage(data, (await provider).registry, {preferred}) + session.get(sender.tab.id).set({from, to, models}); + + const {translate} = await session.get(sender.tab.id).get({translate: false}); + + console.log('detectLanguage', translate, from, to); + + if (translate) + compat.tabs.sendMessage(sender.tab.id, { + command: 'TranslatePage', + data: {from, to} + }); + + } catch (error) { + console.error('Error during language detection', error); + compat.runtime.sendMessage({ + command: 'Error', + data: error + }); } }); -// On start-up init all (important) tabs we missed onCreated for -compat.tabs.query({active:true}).then(allTabs => { - for (const tab of allTabs) - getTab(tab.id).reset(tab.url); -}) +// Sent from the popup when the download button is clicked. +handler.on("DownloadModels", async ({tabId, from, to, models}) => { + // Tell the tab we're downloading models + /* + tab.update(state => ({ + state: State.DOWNLOADING_MODELS + })); + */ + + const translator = await provider; + + // Start the downloads and put them in a {[download:promise]: {read:int,size:int}} + const downloads = new Map(models.map(model => [translator.downloadModel(model), {read:0.0, size:0.0}])); + + // For each download promise, add a progress listener that updates the tab state + // with how far all our downloads have progressed so far. + downloads.forEach((_, promise) => { + // (not supported by the Chrome offscreen proxy implementation right now) + if (promise.addProgressListener) { + promise.addProgressListener(async ({read, size}) => { + // Update download we got a notification about + downloads.set(promise, {read, size}); + + // Update tab state about all downloads combined (i.e. model, optionally pivot) + const data = await local.get(tabId).set({ + modelDownloadRead: Array.from(downloads.values()).reduce((sum, {read}) => sum + read, 0), + modelDownloadSize: Array.from(downloads.values()).reduce((sum, {size}) => sum + size, 0) + }) + + // Tell the popup (if there is one) about the progress :D + popups.get(tabId)?.postMessage({ + command: 'Progress', + data + }); + }); + } + + promise.then(async () => { + // Trigger update of state.models because the `local` + // property this model has changed. We don't support + // any nested key updates so let's just push the whole + // damn thing. + compat.runtime.sendMessage('Models', await translator.registry); + }) + }); + + // Finally, when all downloads have finished, start translating the page. + try { + await Promise.all(downloads.keys()); + session.get(tabId).set({ + translate: true, + from, + to + }); + compat.tabs.sendMessage(tabId, { + command: 'TranslatePage', + data: {from, to} + }); + } catch (e) { + compat.runtime.sendMessage({ + command: 'Error', + data: e + }); + } +}); + +// Sent from Popup when translate button is pressed +handler.on("TranslateStart", ({tabId, from, to}) => { + session.get(tabId).set({ + translate: true, + from, + to + }); + + compat.tabs.sendMessage(tabId, { + command: 'TranslatePage', + data: {from, to} + }); +}); + +// Sent from Popup if "restore original page" button is pressed +handler.on('TranslateAbort', ({tabId}) => { + session.get(tabId).set({translate: false}); + + compat.tabs.sendMessage(tabId, { + command: 'RestorePage', + data: {} + }); +}); + +// Sent from popup when recorded pages download link is clicked +handler.on('ExportRecordedPages', ({}, sender, respond) => { + respond({ + name: 'recorded-pages.xml', + url: URL.createObjectURL(recorder.exportAXML()) + }); + recorder.clear(); + updateTab(state => ({recordedPagesCount: 0})); + return true; +}); compat.runtime.onInstalled.addListener(() => { // Add "translate selection" menu item to selections @@ -784,36 +504,35 @@ compat.runtime.onInstalled.addListener(() => { }); }); -chrome.contextMenus.onClicked.addListener((info, tab) => { +compat.contextMenus.onClicked.addListener(async ({menuItemId, frameId}, tab) => { // First sanity check whether we know from and to languages // (and it isn't the same by accident) - const {from, to} = getTab(tab.id).state; - if (from === undefined || to === undefined || from === to) { - compat.action.openPopup(); - return; - } + const {from, to} = session.get(tab.id).get({from, to}); // Send the appropriate message down to the content script of the // tab we just clicked inside of. - switch (info.menuItemId) { + switch (menuItemId) { case 'translate-selection': - getTab(tab.id).frames.get(info.frameId).postMessage({ - command: 'TranslateSelection' - }); + if (from === undefined || to === undefined || from === to) { + compat.action.openPopup(); + break; + } + + compat.tabs.sendMessage(tab.id, { + command: 'TranslateSelection', + data: {from, to}, + }, {frameId}); break; case 'show-outbound-translation': - getTab(tab.id).frames.get(info.frameId).postMessage({ - command: 'ShowOutboundTranslation' - }); + const translator = await provider; + compat.tabs.sendMessage(tab.id, { + command: 'ShowOutboundTranslation', + data: { + from, + to, + models: await translator.registry + }, + }, {frameId}); break; } }) - -// Makes debugging easier -Object.assign(self, { - tabs, - providers, - provider, - preferences, - test -}) diff --git a/src/content/content-script.js b/src/content/content-script.js index 7476d0d0..ed574245 100644 --- a/src/content/content-script.js +++ b/src/content/content-script.js @@ -1,102 +1,91 @@ import compat from '../shared/compat.js'; +import { MessageHandler } from '../shared/common.js'; import LanguageDetection from './LanguageDetection.js'; import InPageTranslation from './InPageTranslation.js'; import SelectionTranslation from './SelectionTranslation.js'; import OutboundTranslation from './OutboundTranslation.js'; import { LatencyOptimisedTranslator } from '@browsermt/bergamot-translator'; import preferences from '../shared/preferences.js'; +import { lazy } from '../shared/func.js'; const listeners = new Map(); -const state = { - state: 'page-loaded' -}; - // Loading indicator for html element translation preferences.bind('progressIndicator', progressIndicator => { document.body.setAttribute('x-bergamot-indicator', progressIndicator); }, {default: ''}) -function on(command, callback) { - if (!listeners.has(command)) - listeners.set(command, []); +preferences.bind('debug', debug => { + if (debug) + document.querySelector('html').setAttribute('x-bergamot-debug', true); + else + document.querySelector('html').removeAttribute('x-bergamot-debug'); +}, {default: false}); + +const sessionID = new Date().getTime(); + +async function detectPageLanguage() { + // request the language detection class to extract a page's snippet + const languageDetection = new LanguageDetection(); + const sample = await languageDetection.extractPageContent(); + const suggested = languageDetection.extractSuggestedLanguages(); - listeners.get(command).push(callback); + // Once we have the snippet, send it to background script for analysis + // and possibly further action (like showing the popup) + compat.runtime.sendMessage({ + command: "DetectLanguage", + data: { + url: document.location.href, + sample, + suggested + } + }); } -on('Update', diff => { - Object.assign(state, diff); - // document.body.dataset.xBergamotState = JSON.stringify(state); -}); +// Changed by translation start requests. +const state = { + from: null, + to: null +}; + +// background-script connection is only used for translation +let connection = lazy(async (self) => { + const port = compat.runtime.connect({name: 'content-script'}); + + // Reset lazy connection instance if port gets disconnected + port.onDisconnect.addListener(() => self.reset()); + + // Likewise, if the connection is reset from outside, disconnect port. + self.onReset(() => port.disconnect()); -on('Update', diff => { - if ('state' in diff) { - switch (diff.state) { - // Not sure why we have the page-loading event here, like, as soon - // as frame 0 connects we know we're in page-loaded territory. - case 'page-loading': - postBackgroundScriptMessage({ - command: 'UpdateRequest', - data: {state: 'page-loaded'} - }); + const handler = new MessageHandler(callback => { + port.onMessage.addListener(callback); + }) + + handler.on('TranslateResponse', data => { + switch (data.request.user?.source) { + case 'InPageTranslation': + inPageTranslation.enqueueTranslationResponse(data); break; - - case 'translation-in-progress': - inPageTranslation.addElement(document.querySelector("head > title")); - inPageTranslation.addElement(document.body); - inPageTranslation.start(state.from); + case 'SelectionTranslation': + selectionTranslation.enqueueTranslationResponse(data); break; - - default: - inPageTranslation.restore(); + case 'OutboundTranslation': + outboundTranslationWorker.enqueueTranslationResponse(data); break; } - } -}); - -on('Update', async diff => { - if ('state' in diff && diff.state === 'page-loaded') { - // request the language detection class to extract a page's snippet - const languageDetection = new LanguageDetection(); - const sample = await languageDetection.extractPageContent(); - const suggested = languageDetection.extractSuggestedLanguages(); - - // Once we have the snippet, send it to background script for analysis - // and possibly further action (like showing the popup) - postBackgroundScriptMessage({ - command: "DetectLanguage", - data: { - url: document.location.href, - sample, - suggested - } - }); - } -}); + }); -on('Update', diff => { - if ('debug' in diff) { - if (diff.debug) - document.querySelector('html').setAttribute('x-bergamot-debug', JSON.stringify(state)); - else - document.querySelector('html').removeAttribute('x-bergamot-debug'); - } + return port; }); -const sessionID = new Date().getTime(); - -// Used to track the last text selection translation request, so we don't show -// the response to an old request by accident. -let selectionTranslationId = null; - -function translate(text, user) { - console.assert(state.from !== undefined && state.to !== undefined, "state.from or state.to is not set"); - postBackgroundScriptMessage({ +async function translate(text, user) { + (await connection).postMessage({ command: "TranslateRequest", data: { // translation request - from: state.from, - to: state.to, + from: user.from || state.from, + to: user.to || state.to, html: user.html, text, @@ -185,33 +174,14 @@ class BackgroundScriptWorkerProxy { throw new TypeError('Only batches of 1 are expected'); return new Promise((accept, reject) => { - const request = { - // translation request - from: models[0].from, - to: models[0].to, - html: texts[0].html, - text: texts[0].text, - - // data useful for the response - user: { - id: ++this.#serial, - source: 'OutboundTranslation' - }, - - // data useful for the scheduling - priority: 3, - - // data useful for recording - session: { - id: sessionID, - url: document.location.href - } - }; - this.#pending.set(request.user.id, {request, accept, reject}); - postBackgroundScriptMessage({ - command: "TranslateRequest", - data: request + translate(texts[0].text, { + id: ++this.#serial, + source: 'OutboundTranslation', + from: texts[0].from, + to: texts[0].to, + html: texts[0].html, + priority: 3 }); }) } @@ -270,116 +240,71 @@ const outboundTranslation = new OutboundTranslation(new class { } }()); -// This one is mainly for the TRANSLATION_AVAILABLE event -on('Update', async (diff) => { - if ('from' in diff) - outboundTranslation.setPageLanguage(diff.from); +const handler = new MessageHandler(callback => { + compat.runtime.onMessage.addListener(callback); +}) - const preferredLanguage = await preferences.get('preferredLanguageForOutboundTranslation'); +handler.on('TranslatePage', ({from,to}) => { + // Save for the translate() function + Object.assign(state, {from,to}); - if ('to' in diff) - outboundTranslation.setUserLanguage(preferredLanguage || diff.to); + inPageTranslation.addElement(document.querySelector("head > title")); + inPageTranslation.addElement(document.body); + inPageTranslation.start(from); +}) - if ('from' in diff || 'models' in diff) { - outboundTranslation.setUserLanguageOptions(state.models.reduce((options, entry) => { - // `state` has already been updated at this point as well and we know - // that is complete. `diff` might not contain all the keys we need. - if (entry.to === state.from && !options.has(entry.from)) - options.add(entry.from) - return options - }, new Set())); - } -}); - -on('TranslateResponse', data => { - switch (data.request.user?.source) { - case 'InPageTranslation': - inPageTranslation.enqueueTranslationResponse(data); - break; - case 'SelectionTranslation': - selectionTranslation.enqueueTranslationResponse(data); - break; - case 'OutboundTranslation': - outboundTranslationWorker.enqueueTranslationResponse(data); - break; - } -}); - -// Timeout of retrying connectToBackgroundScript() -let retryTimeout = 100; - -let backgroundScript; - -function postBackgroundScriptMessage(message) { - if (!backgroundScript) - connectToBackgroundScript(); - - return backgroundScript.postMessage(message); -} - -function connectToBackgroundScript() { - // If we're already connected (e.g. when this function was called directly - // but then also through 'pageshow' event caused by 'onload') ignore it. - if (backgroundScript) - return; - - // Connect to our background script, telling it we're the content-script. - backgroundScript = compat.runtime.connect({name: 'content-script'}); - - // Connect all message listeners (the "on()" calls above) - backgroundScript.onMessage.addListener(({command, data}) => { - if (listeners.has(command)) - listeners.get(command).forEach(callback => callback(data)); - - // (We're connected, reset the timeout) - retryTimeout = 100; - }); - - // When the background script disconnects, also pause in-page translation - backgroundScript.onDisconnect.addListener(() => { - inPageTranslation.stop(); +handler.on('RestorePage', () => { + inPageTranslation.restore(); +}) - // If we cannot connect because the backgroundScript is not (yet?) - // available, try again in a bit. - if (backgroundScript.error && backgroundScript.error.toString().includes('Receiving end does not exist')) { - // Exponential back-off sounds like a safe thing, right? - retryTimeout *= 2; - - // Fallback fallback: if we keep retrying, stop. We're just wasting CPU at this point. - if (retryTimeout < 5000) - setTimeout(connectToBackgroundScript, retryTimeout); - } - - // Mark as disconnected - backgroundScript = null; - }); -} - -connectToBackgroundScript(); +detectPageLanguage(); // When this page shows up (either through onload or through history navigation) -window.addEventListener('pageshow', connectToBackgroundScript); +window.addEventListener('pageshow', () => { + // TODO: inPageTranslation.resume()??? +}); // When this page disappears (either onunload, or through history navigation) window.addEventListener('pagehide', e => { - if (backgroundScript) { - backgroundScript.disconnect(); - backgroundScript = null; - } + // Ditch the inPageTranslation state for pending translation requests. + inPageTranslation.stop(); + + // Disconnect from the background page, which will trigger it to prune + // our outstanding translation requests. + connection.reset(); }); let lastClickedElement = null; window.addEventListener('contextmenu', e => { + Object.assign(state, {from, to}); // TODO: HACK! lastClickedElement = e.target; }, {capture: true}); -on('TranslateSelection', () => { +handler.on('TranslateSelection', ({from, to}) => { + Object.assign(state, {from, to}); // TODO: HACK! const selection = document.getSelection(); selectionTranslation.start(selection); }); -on('ShowOutboundTranslation', () => { +handler.on('ShowOutboundTranslation', async ({from, to, models}) => { + if (from) + outboundTranslation.setPageLanguage(from); + + const {preferredLanguageForOutboundTranslation} = await preferences.get({preferredLanguageForOutboundTranslation:undefined}); + if (to) + outboundTranslation.setUserLanguage(preferredLanguageForOutboundTranslation || to); + + if (from || models) { + outboundTranslation.setUserLanguageOptions(models.reduce((options, entry) => { + // `state` has already been updated at this point as well and we know + // that is complete. `diff` might not contain all the keys we need. + if (entry.to === from && !options.has(entry.from)) + options.add(entry.from) + return options + }, new Set())); + } + outboundTranslation.target = lastClickedElement; outboundTranslation.start(); }); diff --git a/src/popup/popup.html b/src/popup/popup.html index f7b22363..be10009a 100644 --- a/src/popup/popup.html +++ b/src/popup/popup.html @@ -67,48 +67,44 @@
-
+

Wanna translate this page?

- +
-
+

Downloading language model…

-
+

Translating page from to

-
+

Translated page from to .

-
+

Error during translation:

-
+

Error:

-
+

Translations not available for this page.

-
+

Downloading list of available language models…

-
- -

Translations not available for this page.

-
-