Skip to content

Commit

Permalink
Add log rotation with "logrotate" (#409)
Browse files Browse the repository at this point in the history
## Issue
[ISSUE-398](#398)
At the current moment Charmed MongoDB logs, can grow very fast,
especially audit log.
For example when a user imports data sets.

## Solution
Support log rotation to avoid consuming too much space with log files.
Log will be rotated if they grow bigger than 100M, the check for log
files size will be performed every minute

## Result of log rotation
<img width="635" alt="Screenshot 2024-04-23 at 18 59 47"
src="https://github.com/canonical/mongodb-operator/assets/132273757/c1ccbd81-ca9d-4678-9423-0f2374f7226d">
  • Loading branch information
dmitry-ratushnyy authored Apr 29, 2024
1 parent 89c933f commit 34dba61
Show file tree
Hide file tree
Showing 11 changed files with 155 additions and 113 deletions.
4 changes: 4 additions & 0 deletions lib/charms/mongodb/v1/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,8 @@ def get_mongos_args(
f"--configdb {config_server_db}",
# config server is already using 27017
f"--port {Config.MONGOS_PORT}",
"--logRotate reopen",
"--logappend",
]

# TODO : generalise these into functions to be re-used
Expand Down Expand Up @@ -202,6 +204,8 @@ def get_mongod_args(
# port
f"--port={Config.MONGODB_PORT}",
"--setParameter processUmask=037", # required for log files perminission (g+r)
"--logRotate reopen",
"--logappend",
logging_options,
]
cmd.extend(audit_log_settings)
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,4 @@ parameterized==0.9.0
pydantic==1.10.7
# Future PR - use poetry in MongoDB Charm
poetry==1.8.2
jinja2==3.1.3
9 changes: 7 additions & 2 deletions src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,12 @@
from config import Config, Package
from events.upgrade import MongoDBDependencyModel, MongoDBUpgrade
from exceptions import AdminUserCreationError, ApplicationHostNotFoundError
from machine_helpers import MONGO_USER, ROOT_USER_GID, update_mongod_service
from machine_helpers import (
MONGO_USER,
ROOT_USER_GID,
setup_logrotate_and_cron,
update_mongod_service,
)

AUTH_FAILED_CODE = 18
UNAUTHORISED_CODE = 13
Expand Down Expand Up @@ -357,7 +362,7 @@ def _on_install(self, event: InstallEvent) -> None:
config=self.mongodb_config,
role=self.role,
)

setup_logrotate_and_cron()
# add licenses
copy_licenses_to_unit()

Expand Down
6 changes: 6 additions & 0 deletions src/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,12 @@ class Backup:
SERVICE_NAME = "pbm-agent"
URI_PARAM_NAME = "pbm-uri"

class LogRotate:
"""Log rotate related constants."""

MAX_LOG_SIZE = "50M"
MAX_ROTATIONS_TO_KEEP = 10

class Monitoring:
"""Monitoring related config for MongoDB Charm."""

Expand Down
43 changes: 42 additions & 1 deletion src/machine_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,15 @@
# See LICENSE file for licensing details.
import logging

import jinja2
from charms.mongodb.v0.mongodb import MongoDBConfiguration
from charms.mongodb.v1.helpers import add_args_to_env, get_mongod_args, get_mongos_args
from charms.mongodb.v1.helpers import (
LOG_DIR,
MONGODB_COMMON_DIR,
add_args_to_env,
get_mongod_args,
get_mongos_args,
)

from config import Config

Expand All @@ -28,3 +35,37 @@ def update_mongod_service(
if role == Config.Role.CONFIG_SERVER:
mongos_start_args = get_mongos_args(config, snap_install=True)
add_args_to_env("MONGOS_ARGS", mongos_start_args)


def setup_logrotate_and_cron() -> None:
"""Create and write the logrotate config file.
Logs will be rotated if they are bigger than Config.LogRotate.MAX_LOG_SIZE,
Cron job to for starting log rotation will be run every minute.
Note: we split cron into 2 jobs to avoid error with logrotate
running 2 times on same minute at midnight due to default system
log rotation being run daily at 00:00 (and we want to keep it).
"""
logger.debug("Creating logrotate config file")

with open("templates/logrotate.j2", "r") as file:
template = jinja2.Template(file.read())

rendered = template.render(
logs_directory=f"{MONGODB_COMMON_DIR}/{LOG_DIR}",
mongo_user=MONGO_USER,
max_log_size=Config.LogRotate.MAX_LOG_SIZE,
max_rotations=Config.LogRotate.MAX_ROTATIONS_TO_KEEP,
)

with open("/etc/logrotate.d/mongodb", "w") as file:
file.write(rendered)

cron = (
"* 1-23 * * * root logrotate /etc/logrotate.d/mongodb\n"
"1-59 0 * * * root logrotate /etc/logrotate.d/mongodb\n"
)
with open("/etc/cron.d/mongodb", "w") as file:
file.write(cron)
16 changes: 16 additions & 0 deletions templates/logrotate.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{{logs_directory}}/*.log
{
rotate {{max_rotations}}
size {{max_log_size}}
missingok
notifempty
create 0600 {{mongo_user}} {{mongo_user}}
compress
nomail
nocopytruncate
sharedscripts
postrotate
/bin/kill -SIGUSR1 $(systemctl show -p MainPID --value snap.charmed-mongodb.mongod.service)
endscript
}

46 changes: 1 addition & 45 deletions tests/integration/ha_tests/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
MONGODB_LOG_PATH = f"{MONGO_COMMON_DIR}/var/log/mongodb/mongodb.log"
MONGOD_SERVICE_DEFAULT_PATH = "/etc/systemd/system/snap.charmed-mongodb.mongod.service"
TMP_SERVICE_PATH = "tests/integration/ha_tests/tmp.service"
LOGGING_OPTIONS = "--logappend"
LOGGING_OPTIONS = f"--logpath={MONGO_COMMON_DIR}/var/log/mongodb/mongodb.log --logappend"
EXPORTER_PROC = "/usr/bin/mongodb_exporter"
GREP_PROC = "grep"

Expand Down Expand Up @@ -138,7 +138,6 @@ async def fetch_primary(
return None
finally:
client.close()

primary = None
# loop through all members in the replica set
for member in status["members"]:
Expand Down Expand Up @@ -600,49 +599,6 @@ async def update_restart_delay(ops_test: OpsTest, unit, delay: int):
raise ProcessError(f"Command: {reload_cmd} failed on unit: {unit.name}.")


async def update_service_logging(ops_test: OpsTest, unit, logging: bool):
"""Turns on/off logging in for the mongo daemon."""
# load the service file from the unit and update it with the new delay
await unit.scp_from(source=MONGOD_SERVICE_DEFAULT_PATH, destination=TMP_SERVICE_PATH)
with open(TMP_SERVICE_PATH, "r") as mongodb_service_file:
mongodb_service = mongodb_service_file.readlines()

for index, line in enumerate(mongodb_service):
if "ExecStart" not in line:
continue
line = line.replace("\n", "")

if logging:
if LOGGING_OPTIONS not in line:
mongodb_service[index] = line + " " + LOGGING_OPTIONS + "\n"
else:
if LOGGING_OPTIONS in line:
mongodb_service[index] = line.replace(LOGGING_OPTIONS, "") + "\n"

with open(TMP_SERVICE_PATH, "w") as service_file:
service_file.writelines(mongodb_service)

# upload the changed file back to the unit, we cannot scp this file directly to
# MONGOD_SERVICE_DEFAULT_PATH since this directory has strict permissions, instead we scp it
# elsewhere and then move it to MONGOD_SERVICE_DEFAULT_PATH.
await unit.scp_to(source=TMP_SERVICE_PATH, destination="mongod.service")
mv_cmd = (
f"exec --unit {unit.name} mv /home/ubuntu/mongod.service {MONGOD_SERVICE_DEFAULT_PATH}"
)
return_code, _, _ = await ops_test.juju(*mv_cmd.split())
if return_code != 0:
raise ProcessError(f"Command: {mv_cmd} failed on unit: {unit.name}.")

# remove tmp file from machine
subprocess.call(["rm", TMP_SERVICE_PATH])

# reload the daemon for systemd otherwise changes are not saved
reload_cmd = f"exec --unit {unit.name} systemctl daemon-reload"
return_code, _, _ = await ops_test.juju(*reload_cmd.split())
if return_code != 0:
raise ProcessError(f"Command: {reload_cmd} failed on unit: {unit.name}.")


async def stop_mongod(ops_test: OpsTest, unit) -> None:
"""Safely stops the mongod process."""
stop_db_process = f"exec --unit {unit.name} snap stop charmed-mongodb.mongod"
Expand Down
43 changes: 1 addition & 42 deletions tests/integration/ha_tests/test_ha.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
unit_uri,
)
from .helpers import (
MONGODB_LOG_PATH,
add_unit_with_storage,
all_db_processes_down,
clear_db_writes,
Expand All @@ -44,13 +43,10 @@
scale_and_verify,
secondary_up_to_date,
start_continous_writes,
start_mongod,
stop_continous_writes,
stop_mongod,
storage_id,
storage_type,
update_restart_delay,
update_service_logging,
verify_replica_set_configuration,
verify_writes,
wait_network_restore,
Expand Down Expand Up @@ -447,7 +443,7 @@ async def test_freeze_db_process(ops_test, continuous_writes):


@pytest.mark.group(1)
async def test_restart_db_process(ops_test, continuous_writes, change_logging):
async def test_restart_db_process(ops_test, continuous_writes):
# locate primary unit
app_name = await get_app_name(ops_test)
ip_addresses = [unit.public_address for unit in ops_test.model.applications[app_name].units]
Expand Down Expand Up @@ -716,41 +712,4 @@ async def reset_restart_delay(ops_test: OpsTest):
await update_restart_delay(ops_test, unit, ORIGINAL_RESTART_DELAY)


@pytest.fixture()
async def change_logging(ops_test: OpsTest):
"""Enables appending logging for a test and resets the logging at the end of the test."""
app_name = await get_app_name(ops_test)
ip_addresses = [unit.public_address for unit in ops_test.model.applications[app_name].units]
primary = await replica_set_primary(ip_addresses, ops_test)

for unit in ops_test.model.applications[app_name].units:
# tests which use this fixture restart the primary. Therefore the primary should not be
# restarted as to leave the restart testing to the test itself.
if unit.name == primary.name:
continue

# must restart unit to ensure that changes to logging are made
await stop_mongod(ops_test, unit)
await update_service_logging(ops_test, unit, logging=True)
await start_mongod(ops_test, unit)

# sleep long enough for the mongod to start up correctly
time.sleep(15)
yield

app_name = await get_app_name(ops_test)
for unit in ops_test.model.applications[app_name].units:
# must restart unit to ensure that changes to logging are made
await stop_mongod(ops_test, unit)
await update_service_logging(ops_test, unit, logging=False)
await start_mongod(ops_test, unit)

# sleep long enough for the mongod to start up correctly
time.sleep(15)

# remove the log file as to not clog up space on the replicas.
rm_cmd = f"exec --unit {unit.name} rm {MONGODB_LOG_PATH}"
await ops_test.juju(*rm_cmd.split())


# Fixtures end
90 changes: 67 additions & 23 deletions tests/integration/test_charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,12 @@

from config import Config

from .ha_tests.helpers import kill_unit_process
from .ha_tests.helpers import (
clear_db_writes,
kill_unit_process,
start_continous_writes,
stop_continous_writes,
)
from .helpers import (
PORT,
UNIT_IDS,
Expand Down Expand Up @@ -319,6 +324,67 @@ async def test_no_password_change_on_invalid_password(ops_test: OpsTest) -> None
assert password1 == password2


@pytest.mark.group(1)
async def test_audit_log(ops_test: OpsTest) -> None:
"""Test that audit log was created and contains actual audit data."""
app_name = await get_app_name(ops_test)
audit_log_snap_path = "/var/snap/charmed-mongodb/common/var/log/mongodb/audit.log"
audit_log = check_output(
f"JUJU_MODEL={ops_test.model_full_name} juju ssh {app_name}/leader 'sudo cat {audit_log_snap_path}'",
stderr=subprocess.PIPE,
shell=True,
universal_newlines=True,
)

for line in audit_log.splitlines():
if not len(line):
continue
item = json.loads(line)
# basic sanity check
assert audit_log_line_sanity_check(item), "Audit sanity log check failed for first line"


@pytest.mark.group(1)
async def test_log_rotate(ops_test: OpsTest) -> None:
"""Test that log are being rotated."""
# Note: this timeout out depends on max log size
# which is defined in "src/config.py::Config.MAX_LOG_SIZE"
time_to_write_50m_of_data = 60 * 10
logrotate_timeout = 60
audit_log_snap_path = "/var/snap/charmed-mongodb/common/var/log/mongodb/"

app_name = await get_app_name(ops_test)

log_files = check_output(
f"JUJU_MODEL={ops_test.model_full_name} juju ssh {app_name}/leader 'sudo ls {audit_log_snap_path}'",
stderr=subprocess.PIPE,
shell=True,
universal_newlines=True,
)

log_not_rotated = "audit.log.1.gz" not in log_files
assert log_not_rotated, f"Found rotated log in {log_files}"

await start_continous_writes(ops_test, 1)
time.sleep(time_to_write_50m_of_data)
await stop_continous_writes(ops_test, app_name=app_name)
time.sleep(logrotate_timeout) # Just to make sure that logroate will run
await clear_db_writes(ops_test)

log_files = check_output(
f"JUJU_MODEL={ops_test.model_full_name} juju ssh {app_name}/leader 'sudo ls {audit_log_snap_path}'",
stderr=subprocess.PIPE,
shell=True,
universal_newlines=True,
)

log_rotated = "audit.log.1.gz" in log_files
assert log_rotated, f"Could not find rotated log in {log_files}"

audit_log_exists = "audit.log" in log_files
assert audit_log_exists, f"Could not find audit.log log in {log_files}"


@pytest.mark.group(1)
async def test_exactly_one_primary_reported_by_juju(ops_test: OpsTest) -> None:
"""Tests that there is exactly one replica set primary unit reported by juju."""
Expand Down Expand Up @@ -369,25 +435,3 @@ def juju_reports_one_primary(unit_messages):

# cleanup, remove killed unit
await ops_test.model.destroy_unit(target_unit)


@pytest.mark.group(1)
@pytest.mark.skip("Skipping until write to log files enabled")
async def test_audit_log(ops_test: OpsTest) -> None:
"""Test that audit log was created and contains actual audit data."""
app_name = await get_app_name(ops_test)
leader_unit = await find_unit(ops_test, leader=True, app_name=app_name)
audit_log_snap_path = "/var/snap/charmed-mongodb/common/var/log/mongodb/audit.log"
audit_log = check_output(
f"JUJU_MODEL={ops_test.model_full_name} juju ssh {leader_unit.name} 'sudo cat {audit_log_snap_path}'",
stderr=subprocess.PIPE,
shell=True,
universal_newlines=True,
)

for line in audit_log.splitlines():
if not len(line):
continue
item = json.loads(line)
# basic sanity check
assert audit_log_line_sanity_check(item), "Audit sanity log check failed for first line"
Loading

0 comments on commit 34dba61

Please sign in to comment.