Skip to content

Commit

Permalink
core/remote: Optimize directory content fetching (#2196)
Browse files Browse the repository at this point in the history
Directories content retrieval as part of the selective synchronization
can be quite costly and is particularly useless during the initial
content retrieval.
As such, we optimize both use cases to limit the number of requests and
thus memory consumption and retrieval time.
  • Loading branch information
taratatach authored Feb 11, 2022
2 parents 369b3d8 + 56c59e2 commit eefe7b9
Show file tree
Hide file tree
Showing 8 changed files with 259 additions and 75 deletions.
3 changes: 2 additions & 1 deletion core/pouch/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ const {
migrate,
migrationLog
} = require('./migrations')
const remoteConstants = require('../remote/constants')

/*::
import type { Config } from '../config'
Expand Down Expand Up @@ -621,7 +622,7 @@ class Pouch {
async getRemoteSeq() /*: Promise<string> */ {
const doc = await this.byIdMaybe('_local/remoteSeq')
if (doc) return doc.seq
else return '0'
else return remoteConstants.INITIAL_SEQ
}

// Set last remote replication sequence
Expand Down
5 changes: 4 additions & 1 deletion core/remote/constants.js
Original file line number Diff line number Diff line change
Expand Up @@ -39,5 +39,8 @@ module.exports = {

// Maximum file size allowed by Swift thus the remote Cozy.
// See https://docs.openstack.org/kilo/config-reference/content/object-storage-constraints.html
MAX_FILE_SIZE: FIVE_GIGABYTES
MAX_FILE_SIZE: FIVE_GIGABYTES,

// Initial CouchDB sequence
INITIAL_SEQ: '0'
}
98 changes: 65 additions & 33 deletions core/remote/cozy.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ const {
FILES_DOCTYPE,
FILE_TYPE,
DIR_TYPE,
INITIAL_SEQ,
MAX_FILE_SIZE,
OAUTH_CLIENTS_DOCTYPE
} = require('./constants')
Expand Down Expand Up @@ -317,28 +318,28 @@ class RemoteCozy {
}

async changes(
since /*: string */ = '0',
since /*: string */ = INITIAL_SEQ,
batchSize /*: number */ = 3000
) /*: Promise<{last_seq: string, docs: Array<MetadataRemoteInfo|RemoteDeletion>}> */ {
) /*: Promise<{last_seq: string, docs: Array<MetadataRemoteInfo|RemoteDeletion>, isInitialFetch: boolean}> */ {
const client = await this.newClient()
const { last_seq, remoteDocs } =
since === '0'
? await fetchInitialChanges(since, client, batchSize)
: await fetchChangesFromFeed(since, this.client, batchSize)
const isInitialFetch = since === INITIAL_SEQ
const { last_seq, remoteDocs } = isInitialFetch
? await fetchInitialChanges(since, client, batchSize)
: await fetchChangesFromFeed(since, this.client, batchSize)

const docs = (await this.completeRemoteDocs(
dropSpecialDocs(remoteDocs)
)).sort(byPath)

return { last_seq, docs }
return { last_seq, docs, isInitialFetch }
}

async fetchLastSeq() {
const client = await this.newClient()
const { last_seq } = await client
.collection(FILES_DOCTYPE)
.fetchChangesRaw({
since: '0',
since: INITIAL_SEQ,
descending: true,
limit: 1,
includeDocs: false
Expand Down Expand Up @@ -450,34 +451,65 @@ class RemoteCozy {
client = client || (await this.newClient())

let dirContent = []
let resp /*: { next: boolean, bookmark?: string, data: Object[] } */ = {
next: true,
data: []
let nextDirs = [dir]
while (nextDirs.length) {
const nestedContent = await this.getDirectoriesContent(nextDirs, {
client
})
dirContent = dirContent.concat(nestedContent.docs)
nextDirs = nestedContent.nextDirs
}
while (resp && resp.next) {
const queryDef = Q(FILES_DOCTYPE)
.where({
dir_id: dir._id
})
.indexFields(['dir_id', 'name'])
.sortBy([{ dir_id: 'asc' }, { name: 'asc' }])
.limitBy(3000)
.offsetBookmark(resp.bookmark)
resp = await client.query(queryDef)
for (const j of resp.data) {
const remoteJson = jsonApiToRemoteJsonDoc(j)
if (remoteJson._deleted) continue

const remoteDoc = await this.toRemoteDoc(remoteJson, dir)
dirContent.push(remoteDoc)
if (remoteDoc.type === DIR_TYPE) {
// Fetch subdir content
dirContent.push(this.getDirectoryContent(remoteDoc, { client }))
}

return dirContent.sort((a, b) => {
if (a.path < b.path) return -1
if (a.path > b.path) return 1
return 0
})
}

async getDirectoriesContent(
dirs /*: $ReadOnlyArray<RemoteDir> */,
{ client } /*: { client: CozyClient } */
) /*: Promise<{ nextDirs: $ReadOnlyArray<MetadataRemoteDir>, docs: $ReadOnlyArray<MetadataRemoteInfo> }> */ {
const queryDef = Q(FILES_DOCTYPE)
.where({
dir_id: { $in: dirs.map(dir => dir._id) }
})
.indexFields(['dir_id', 'name'])
.sortBy([{ dir_id: 'asc' }, { name: 'asc' }])
.limitBy(3000)

const resp = await this.queryAll(queryDef, { client })

const nextDirs = []
const docs = []
for (const j of resp.data) {
const remoteJson = jsonApiToRemoteJsonDoc(j)
if (remoteJson._deleted) continue

const parentDir = dirs.find(
dir => dir._id === remoteJson.attributes.dir_id
)
const remoteDoc = await this.toRemoteDoc(remoteJson, parentDir)
docs.push(remoteDoc)

if (remoteDoc.type === DIR_TYPE) nextDirs.push(remoteDoc)
}
return { nextDirs, docs }
}

async queryAll(queryDef /*: Q */, { client } /*: { client: CozyClient } */) {
const { data, next, bookmark } = await client.query(queryDef)

if (next) {
return {
data: data.concat(
await this.queryAll(queryDef.offsetBookmark(bookmark), { client })
)
}
} else {
return { data }
}
// $FlowFixMe Array.prototype.flat is available in NodeJS v12
return (await Promise.all(dirContent)).flat()
}

async isEmpty(id /*: string */) /*: Promise<boolean> */ {
Expand Down
31 changes: 25 additions & 6 deletions core/remote/watcher/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,13 @@ import type EventEmitter from 'events'
import type { Pouch } from '../../pouch'
import type Prep from '../../prep'
import type { RemoteCozy } from '../cozy'
import type { Metadata, MetadataRemoteInfo, SavedMetadata, RemoteRevisionsByID } from '../../metadata'
import type {
Metadata,
MetadataRemoteInfo,
MetadataRemoteDir,
SavedMetadata,
RemoteRevisionsByID
} from '../../metadata'
import type { RemoteChange, RemoteFileMove, RemoteDirMove, RemoteDescendantChange } from '../change'
import type { RemoteDeletion } from '../document'
import type { RemoteError } from '../errors'
Expand All @@ -42,6 +48,15 @@ const log = logger({

const sideName = 'remote'

const folderMightHaveBeenExcluded = (
remoteDir /*: MetadataRemoteDir */
) /*: boolean %checks */ => {
// A folder newly created has a rev number of 1.
// Once exluded, its rev number is at least 2.
// Once re-included, its rev number is at least 3.
return metadata.extractRevNumber(remoteDir) > 2
}

/** Get changes from the remote Cozy and prepare them for merge */
class RemoteWatcher {
/*::
Expand Down Expand Up @@ -146,7 +161,9 @@ class RemoteWatcher {
try {
this.events.emit('buffering-start')
const seq = await this.pouch.getRemoteSeq()
const { last_seq, docs } = await this.remoteCozy.changes(seq)
const { last_seq, docs, isInitialFetch } = await this.remoteCozy.changes(
seq
)
this.events.emit('online')

if (docs.length === 0) {
Expand All @@ -156,7 +173,7 @@ class RemoteWatcher {

this.events.emit('remote-start')
this.events.emit('buffering-end')
await this.pullMany(docs)
await this.pullMany(docs, { isInitialFetch })

let target = -1
target = (await this.pouch.db.changes({ limit: 1, descending: true }))
Expand Down Expand Up @@ -191,18 +208,20 @@ class RemoteWatcher {
* FIXME: Misleading method name?
*/
async pullMany(
docs /*: $ReadOnlyArray<MetadataRemoteInfo|RemoteDeletion> */
docs /*: $ReadOnlyArray<MetadataRemoteInfo|RemoteDeletion> */,
{ isInitialFetch } /*: { isInitialFetch: boolean } */
) /*: Promise<void> */ {
let changes = await this.analyse(docs, await this.olds(docs))

for (const change of changes) {
if (
!isInitialFetch &&
change.type === 'DirAddition' &&
metadata.extractRevNumber(change.doc.remote) > 1
folderMightHaveBeenExcluded(change.doc.remote)
) {
log.trace(
{ path: change.doc.path },
'Fetching content of unknwon folder...'
'Fetching content of unknown folder...'
)
const children = (await this.remoteCozy.getDirectoryContent(
change.doc.remote
Expand Down
4 changes: 3 additions & 1 deletion test/integration/sync_state.js
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@ describe('Sync state', () => {

it('1 sync error (missing remote file)', async () => {
const remoteFile = builders.remoteFile().build()
await helpers._remote.watcher.pullMany([remoteFile])
await helpers._remote.watcher.pullMany([remoteFile], {
isInitialFetch: true // XXX: avoid unnecessary remote requests
})
await helpers.syncAll()
should(events.emit.args).containDeepOrdered([
['sync-start'],
Expand Down
2 changes: 1 addition & 1 deletion test/support/helpers/remote.js
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ class RemoteTestHelpers {
}

async simulateChanges(docs /*: * */) {
await this.side.watcher.pullMany(docs)
await this.side.watcher.pullMany(docs, { isInitialFetch: false })
}

async readFile(path /*: string */) {
Expand Down
37 changes: 36 additions & 1 deletion test/unit/remote/cozy.js
Original file line number Diff line number Diff line change
Expand Up @@ -878,6 +878,10 @@ describe('RemoteCozy', function() {
'dir/subdir/subsubdir/',
'dir/subdir/file',
'dir/file',
'dir/other-subdir/',
'dir/other-subdir/next-level/',
'dir/other-subdir/next-level/last-level/',
'dir/other-subdir/next-level/last-level/content',
'dir/subdir/subsubdir/last',
'hello.txt',
'other-dir/',
Expand All @@ -887,13 +891,44 @@ describe('RemoteCozy', function() {
remoteCozy.getDirectoryContent(tree['dir/'])
).be.fulfilledWith([
tree['dir/file'],
tree['dir/other-subdir/'],
tree['dir/other-subdir/next-level/'],
tree['dir/other-subdir/next-level/last-level/'],
tree['dir/other-subdir/next-level/last-level/content'],
tree['dir/subdir/'],
tree['dir/subdir/file'],
tree['dir/subdir/subsubdir/'],
tree['dir/subdir/subsubdir/last']
])
})

it('requests content level by level and not directory by directory', async () => {
const tree = await builders.createRemoteTree([
'dir/',
'dir/file',
'dir/subdir/',
'dir/subdir/file',
'dir/subdir/subsubdir/',
'dir/subdir/subsubdir/last',
'dir/other-subdir/',
'dir/other-subdir/next-level/',
'dir/other-subdir/next-level/last-level/',
'dir/other-subdir/next-level/last-level/content',
'other-dir/',
'other-dir/content'
])

const client = await remoteCozy.newClient()
const querySpy = sinon.spy(client, 'query')
try {
await remoteCozy.getDirectoryContent(tree['dir/'], { client })

should(querySpy).have.callCount(4)
} finally {
querySpy.restore()
}
})

it('does not fail on an empty directory', async () => {
const dir = await builders
.remoteDir()
Expand All @@ -918,7 +953,7 @@ describe('RemoteCozy', function() {
const stubbedClient = await remoteCozy.newClient()
const originalQuery = stubbedClient.query.bind(stubbedClient)
sinon.stub(stubbedClient, 'query').callsFake(async queryDef => {
if (queryDef.selector.dir_id === tree['dir/subdir/']._id) {
if (queryDef.selector.dir_id.$in.includes(tree['dir/subdir/']._id)) {
throw new Error('test error')
} else {
return originalQuery(queryDef)
Expand Down
Loading

0 comments on commit eefe7b9

Please sign in to comment.