Skip to content

Commit

Permalink
Introduce TrackedFilesInfoSource message and new actor
Browse files Browse the repository at this point in the history
We hit already several times a situation that an actor needed an
information about specific file (whether exists, has been changed,...).
And for that purpose extra scanner actor needed to be created, with
an associated message and Model.

To cover such cases, we are introducing new model
TrackedFilesInfoSource and actor scansourcefiles. So in future, when
any actor needs such a piece of information and do something based
on it, developer can just update lists in the introduced actor's
library, so the information about particular file will be provided.

Another benefit is saving a time on writting new unit tests and code
for the scan, as updating a list of files to be tracked does not
affect the algorithm.
  • Loading branch information
pirat89 committed Jan 10, 2024
1 parent c627a0b commit cda8ebe
Show file tree
Hide file tree
Showing 4 changed files with 175 additions and 0 deletions.
32 changes: 32 additions & 0 deletions repos/system_upgrade/common/actors/scansourcefiles/actor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from leapp.actors import Actor
from leapp.libraries.actor import scansourcefiles
from leapp.models import TrackedFilesInfoSource
from leapp.tags import FactsPhaseTag, IPUWorkflowTag


class ScanSourceFiles(Actor):
"""
Scan files (explicitly specified) of the source system.
If an actor require information about a file, like whether it's installed,
modified, etc. It can be added to the list of files to be tracked, so no
extra actor is required to be created to provide just that one information.
The scan of all changed files tracked by RPMs is very expensive. So we rather
provide this possibility to simplify the work for others.
See lists defined in the private library.
"""
# TODO(pstodulk): in some cases could be valuable to specify an rpm name
# and provide information about all changed files instead. Both approaches
# have a little bit different use-cases and expectations. In the second
# case it would be good solution regarding track of leapp-repository
# changed files.

name = 'scan_source_files'
consumes = ()
produces = (TrackedFilesInfoSource,)
tags = (IPUWorkflowTag, FactsPhaseTag)

def process(self):
scansourcefiles.process()
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import os

from leapp.libraries.common.config.version import get_source_major_version
from leapp.libraries.stdlib import api, CalledProcessError, run
from leapp.models import FileInfo, TrackedFilesInfoSource

# TODO(pstodulk): make linter happy about this
# common -> Files supposed to be scanned on all system versions.
# '8' (etc..) -> files supposed to be scanned when particular major version of OS is used
TRACKED_FILES = {
'common': [
],
'8': [
],
'9': [
],
}

# TODO(pstodulk)?: introduce possibility to discover files under a dir that
# are not tracked by any rpm or a specified rpm? Currently I have only one
# use case for that in my head, so possibly it will be better to skip a generic
# solution and just introduce a new actor and msg for that (check whether
# actors not owned by our package(s) are present).


def _get_rpm_name(input_file):
try:
rpm_names = run(['rpm', '-qf', '--queryformat', r'%{NAME}\n', input_file], split=True)['stdout']
except CalledProcessError:
# is not owned by any rpm
return ''

if len(rpm_names) > 1:
# this is very seatbelt; could happen for directories, but we do
# not expect here directories specified at all. if so, we should
# provide list instead of string
api.current_logger().warning(
'The {} file is owned by multiple rpms: {}.'
.format(input_file, ', '.join(rpm_names))
)
return rpm_names[0]


def is_modified(input_file):
"""
Return True if checksum has been changed (or removed).
Ignores mode, user, type, ...
"""
result = run(['rpm', '-Vf', '--nomtime', input_file], checked=False)
if not result['exit_code']:
return False
status = result['stdout'].split()[0]
return status == 'missing' or '5' in status


def scan_file(input_file):
data = {
'path': input_file,
'exists': os.path.exists(input_file),
'rpm_name': _get_rpm_name(input_file),
}

if data['rpm_name']:
data['is_modified'] = is_modified(input_file)
else:
# it's not tracked by any rpm at all, so always False
data['is_modified'] = False

return FileInfo(**data)


def scan_files(files):
return [scan_file(fname) for fname in files]


def process():
files = scan_files(TRACKED_FILES['common'] + TRACKED_FILES.get(get_source_major_version(), []))
api.produce(TrackedFilesInfoSource(files=files))
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
def test_scansourcefiles():
# TODO(pstodulk): keeping unit tests for later after I check the idea
# of this actor with the team.
pass
60 changes: 60 additions & 0 deletions repos/system_upgrade/common/models/trackedfiles.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
from leapp.models import fields, Model
from leapp.topics import SystemInfoTopic


class FileInfo(Model):
"""
Various data about a file.
This model is not supposed to be used as a message directly.
See e.g. :class:`TrackedSourceFilesInfo` instead.
"""
topic = SystemInfoTopic

path = fields.String()
"""
Canonical path to the file.
"""

exists = fields.Boolean()
"""
True if the file is present on the system.
"""

rpm_name = fields.String(default="")
"""
Name of the rpm that owns the file. Otherwise empty string if not owned
by any rpm.
"""

# NOTE(pstodulk): I have been thinking about the "state"/"modified" field
# instead. Which could contain enum list, where could be specified what has
# been changed (checksum, type, owner, ...). But currently we do not have
# use cases for that and do not want to implement it now. So starting simply
# with this one.
is_modified = fields.Boolean()
"""
True if the checksum of the file has been changed (includes the missing state).
The field is valid only for a file tracked by rpm - excluding ghost files.
In such a case the value is always false.
"""


class TrackedFilesInfoSource(Model):
"""
Provide information about files on the source system explicitly defined
in the actor to be tracked.
Search an actor producing this message to discover the list where you
could add the file into the list to be tracked.
This particular message is expected to be produced only once by the
specific actor. Do not produce multiple messages of this model.
"""
topic = SystemInfoTopic

files = fields.List(fields.Model(FileInfo), default=[])
"""
List of :class:`FileInfo`.
"""

0 comments on commit cda8ebe

Please sign in to comment.