Skip to content

Commit

Permalink
chore: add retention policy with GB or MB limitation #1885
Browse files Browse the repository at this point in the history
  • Loading branch information
ABresting committed Sep 30, 2023
1 parent 56dbe2a commit 07217f3
Show file tree
Hide file tree
Showing 8 changed files with 190 additions and 2 deletions.
Binary file added tests/waku_archive/test_retention_policy
Binary file not shown.
33 changes: 33 additions & 0 deletions tests/waku_archive/test_retention_policy.nim
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import
../../../waku/waku_archive/driver/sqlite_driver,
../../../waku/waku_archive/retention_policy,
../../../waku/waku_archive/retention_policy/retention_policy_capacity,
../../../waku/waku_archive/retention_policy/retention_policy_size,
../testlib/common,
../testlib/wakucore

Expand Down Expand Up @@ -53,6 +54,38 @@ suite "Waku Archive - Retention policy":

## Cleanup
(waitFor driver.close()).expect("driver to close")

test "size retention policy - windowed message deletion":
## Given
let
# in megabytes
sizeLimit:float = 0.05
excess = 123

let driver = newTestArchiveDriver()

let retentionPolicy: RetentionPolicy = SizeRetentionPolicy.init(size=sizeLimit)

## When
for i in 1..excess:
let msg = fakeWakuMessage(payload= @[byte i], contentTopic=DefaultContentTopic, ts=Timestamp(i))

require (waitFor driver.put(DefaultPubsubTopic, msg, computeDigest(msg), msg.timestamp)).isOk()
require (waitFor retentionPolicy.execute(driver)).isOk()
## Then
# calculate the current database size
let pageSize = (waitFor driver.getPagesSize()).tryGet()
let pageCount = (waitFor driver.getPagesCount()).tryGet()
let sizeDB = float(pageCount * pageSize) / (1024.0 * 1024.0)

check:
# size of the database is used to check if the storage limit has been preserved
# check the current database size with the limitSize provided by the user
# it should be lower
sizeDB <= sizeLimit

## Cleanup
(waitFor driver.close()).expect("driver to close")

test "store capacity should be limited":
## Given
Expand Down
3 changes: 2 additions & 1 deletion waku/common/databases/db_sqlite.nim
Original file line number Diff line number Diff line change
Expand Up @@ -484,4 +484,5 @@ proc performSqliteVacuum*(db: SqliteDatabase): DatabaseResult[void] =
if resVacuum.isErr():
return err("failed to execute vacuum: " & resVacuum.error)

debug "finished sqlite database vacuuming"
debug "finished sqlite database vacuuming"
ok()
9 changes: 9 additions & 0 deletions waku/waku_archive/driver.nim
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,15 @@ method getMessages*(driver: ArchiveDriver,
method getMessagesCount*(driver: ArchiveDriver):
Future[ArchiveDriverResult[int64]] {.base, async.} = discard

method getPagesCount*(driver: ArchiveDriver):
Future[ArchiveDriverResult[int64]] {.base, async.} = discard

method getPagesSize*(driver: ArchiveDriver):
Future[ArchiveDriverResult[int64]] {.base, async.} = discard

method performsSqliteVacuum*(driver: ArchiveDriver):
Future[ArchiveDriverResult[void]] {.base, async.} = discard

method getOldestMessageTimestamp*(driver: ArchiveDriver):
Future[ArchiveDriverResult[Timestamp]] {.base, async.} = discard

Expand Down
12 changes: 12 additions & 0 deletions waku/waku_archive/driver/queue_driver/queue_driver.nim
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,18 @@ method getMessagesCount*(driver: QueueDriver):
Future[ArchiveDriverResult[int64]] {.async} =
return ok(int64(driver.len()))

method getPagesCount*(driver: QueueDriver):
Future[ArchiveDriverResult[int64]] {.async} =
return ok(int64(driver.len()))

method getPagesSize*(driver: QueueDriver):
Future[ArchiveDriverResult[int64]] {.async} =
return ok(int64(driver.len()))

method performsSqliteVacuum*(driver: QueueDriver):
Future[ArchiveDriverResult[void]] {.async.} =
return ok()

method getOldestMessageTimestamp*(driver: QueueDriver):
Future[ArchiveDriverResult[Timestamp]] {.async.} =
return driver.first().map(proc(msg: IndexedWakuMessage): Timestamp = msg.index.receiverTime)
Expand Down
12 changes: 12 additions & 0 deletions waku/waku_archive/driver/sqlite_driver/sqlite_driver.nim
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,18 @@ method getMessagesCount*(s: SqliteDriver):
Future[ArchiveDriverResult[int64]] {.async.} =
return s.db.getMessageCount()

method getPagesCount*(s: SqliteDriver):
Future[ArchiveDriverResult[int64]] {.async.} =
return s.db.getPageCount()

method getPagesSize*(s: SqliteDriver):
Future[ArchiveDriverResult[int64]] {.async.} =
return s.db.getPageSize()

method performsSqliteVacuum*(s: SqliteDriver):
Future[ArchiveDriverResult[void]] {.async.} =
return s.db.performSqliteVacuum()

method getOldestMessageTimestamp*(s: SqliteDriver):
Future[ArchiveDriverResult[Timestamp]] {.async.} =
return s.db.selectOldestReceiverTimestamp()
Expand Down
36 changes: 35 additions & 1 deletion waku/waku_archive/retention_policy/builder.nim
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ import
import
../retention_policy,
./retention_policy_time,
./retention_policy_capacity
./retention_policy_capacity,
./retention_policy_size

proc new*(T: type RetentionPolicy,
retPolicy: string):
Expand Down Expand Up @@ -51,5 +52,38 @@ proc new*(T: type RetentionPolicy,
let retPolicy: RetentionPolicy = CapacityRetentionPolicy.init(retentionCapacity)
return ok(some(retPolicy))

elif policy == "size":
var retentionSize: string
retentionSize = policyArgs

# captures the size unit such as Gb or Mb
let sizeUnit = retentionSize.substr(retentionSize.len-2)
# captures the string type number data of the size provided
let sizeQuantityStr = retentionSize.substr(0,retentionSize.len-3)
# to hold the numeric value data of size
var sizeQuantity: float

if sizeUnit in ["gb", "Gb", "GB", "gB"]:
# parse the actual value into integer type var
try:
sizeQuantity = parseFloat(sizeQuantityStr)
except ValueError:
return err("invalid size retention policy argument")
# Gb data is converted into Mb for uniform processing
sizeQuantity = sizeQuantity * 1024
elif sizeUnit in ["mb", "Mb", "MB", "mB"]:
try:
sizeQuantity = parseFloat(sizeQuantityStr)
except ValueError:
return err("invalid size retention policy argument")
else:
return err ("""invalid size retention value unit: expected "Mb" or "Gb" but got """ & sizeUnit )

if sizeQuantity <= 0:
return err("invalid size retention policy argument: a non-zero value is required")

let retPolicy: RetentionPolicy = SizeRetentionPolicy.init(sizeQuantity)
return ok(some(retPolicy))

else:
return err("unknown retention policy")
87 changes: 87 additions & 0 deletions waku/waku_archive/retention_policy/retention_policy_size.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
when (NimMajor, NimMinor) < (1, 4):
{.push raises: [Defect].}
else:
{.push raises: [].}

import
std/times,
stew/results,
chronicles,
chronos
import
../driver,
../retention_policy

logScope:
topics = "waku archive retention_policy"

# default size is 30 Gb
const DefaultRetentionSize*: float = 30_720

# to remove 20% of the outdated data from database
const DeleteLimit = 0.80

type
# SizeRetentionPolicy implements auto delete as follows:
# - sizeLimit is the size in megabytes (Mbs) the database can grow upto
# to reduce the size of the databases, remove the rows/number-of-messages
# DeleteLimit is the total number of messages to delete beyond this limit
# when the database size crosses the sizeLimit, then only a fraction of messages are kept,
# rest of the outdated message are deleted using deleteOldestMessagesNotWithinLimit(),
# upon deletion process the fragmented space is retrieve back using Vacuum process.
SizeRetentionPolicy* = ref object of RetentionPolicy
sizeLimit: float

proc init*(T: type SizeRetentionPolicy, size=DefaultRetentionSize): T =
SizeRetentionPolicy(
sizeLimit: size
)

method execute*(p: SizeRetentionPolicy,
driver: ArchiveDriver):
Future[RetentionPolicyResult[void]] {.async.} =
## when db size overshoots the database limit, shread 20% of outdated messages

# to get the size of the database, pageCount and PageSize is required
# get page count in "messages" database
var pageCountRes = await driver.getPagesCount()
if pageCountRes.isErr():
return err("failed to get Pages count: " & pageCountRes.error)

var pageCount: int64 = pageCountRes.value

# # get page size of database
var pageSizeRes = await driver.getPagesSize()
if pageSizeRes.isErr():
return err("failed to get Page size: " & pageSizeRes.error)

# get the page size in kilobytes Kb)
var pageSize: int64 = int64(pageSizeRes.value div 1024)

# database size in megabytes (Mb)
var totalSizeOfDB: float = float(pageSize * pageCount)/1024.0

# check if current databse size crosses the db size limit
if totalSizeOfDB <= p.sizeLimit:
return ok()

# to shread/delete messsges, get the total row/message count
var numMessagesRes = await driver.getMessagesCount()
if numMessagesRes.isErr():
return err("failed to get messages count: " & numMessagesRes.error)
var numMessages = numMessagesRes.value

# 80% of the total messages are to be kept, delete others
let pageDeleteWindow = int(float(numMessages) * DeleteLimit)

let res = await driver.deleteOldestMessagesNotWithinLimit(limit=pageDeleteWindow)
if res.isErr():
return err("deleting oldest messages failed: " & res.error)

# vacuum to get the deleted pages defragments to save storage space
# this will resize the database size
let resVaccum = await driver.performsSqliteVacuum()
if resVaccum.isErr():
return err("vacuumming failed: " & resVaccum.error)

return ok()

0 comments on commit 07217f3

Please sign in to comment.