Skip to content

Commit

Permalink
Add blacklist functionality while ingesting stats (#37)
Browse files Browse the repository at this point in the history
* add blacklist functionality while ingesting stats

* minor fix
  • Loading branch information
lionick authored Nov 3, 2023
1 parent ca823d9 commit 92c89f0
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 16 deletions.
37 changes: 32 additions & 5 deletions app/ingester/loginsIngester.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from app.logger import log
from app.utils import configParser
from app.utils.ipDatabase import geoip2Database
from sqlalchemy.exc import NoResultFound
from .utilsIngester import utilsIngester
Expand All @@ -7,6 +8,7 @@
class LoginDataIngester:
logger = log.get_logger("LoginDataIngester")


@classmethod
def getIdpId(cls, entityid, idpName, tenenvId, session):
# Check if IdP exists
Expand Down Expand Up @@ -152,8 +154,20 @@ def getCountryFromIP(cls, ipAddress, session):
return countryId

@classmethod
def ingestLoginDataPerTenenv(cls, tenenvId, session):
def ingestLoginDataPerTenenv(cls, tenenv, session):
tenenvId = tenenv['id']
tenant_name = tenenv['tenant_name']
environment_name = tenenv['environment_name']
hashed_user_ids = []

config_file = f'config.{tenant_name.lower()}.{environment_name.lower()}.py'

if (configParser.getConfig('user_id_blacklist', config_file) is not False and
'user_ids' in configParser.getConfig('user_id_blacklist', config_file)):
user_ids = configParser.getConfig('user_id_blacklist', config_file)['user_ids'].split('\n')
# Hash each value using SHA-256
hashed_user_ids = [hashlib.md5(value.strip().encode()).hexdigest() for value in user_ids]

# get dates not mapped for logi5ns data
datesNotMapped = utilsIngester.getDatesNotMapped(
"statistics_country_hashed",
Expand All @@ -175,7 +189,14 @@ def ingestLoginDataPerTenenv(cls, tenenvId, session):
AND tenenv_id={0} {1}
""".format(tenenvId, between)).all()
loginMappedItems = 0

for login in loginsNotMapped:

if (login[0]['voPersonId'] in hashed_user_ids):
cls.logger.info("""Ignore this user with
hash {0} as he is at the blacklist""". format(login[0]['voPersonId']))
continue

if (not login[0]['failedLogin']
and utilsIngester.validateTenenv(login[0]['tenenvId'], session)
and 'voPersonId' in login[0]
Expand All @@ -197,7 +218,7 @@ def ingestLoginDataPerTenenv(cls, tenenvId, session):
login[0]['spName'],
login[0]['tenenvId'],
session)

if ('countryCode' in login[0] and 'countryName' in login[0]):
# find countryId
countryId = LoginDataIngester.getCountryFromCountryCode([login[0]['countryCode'], login[0]['countryName']], session)
Expand All @@ -222,6 +243,12 @@ def ingestLoginDataPerTenenv(cls, tenenvId, session):

@classmethod
def ingestLoginData(cls, session):
tenenvIds = session.exec("""SELECT id FROM tenenv_info""").all()
for tenenvId in tenenvIds:
LoginDataIngester.ingestLoginDataPerTenenv(tenenvId[0], session)
tenenvIds = session.exec("""SELECT tenenv_info.id,
tenant_info.name AS tenant_name,
environment_info.name AS environment_name
FROM tenenv_info
JOIN tenant_info ON tenant_id=tenant_info.id
JOIN environment_info ON env_id=environment_info.id
""").all()
for tenenv in tenenvIds:
LoginDataIngester.ingestLoginDataPerTenenv(tenenv, session)
33 changes: 29 additions & 4 deletions app/ingester/membeshipIngester.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from app.logger import log
from app.utils import configParser
from sqlalchemy.exc import NoResultFound
from .utilsIngester import utilsIngester
import hashlib


class MembershipDataIngester:
Expand All @@ -27,7 +29,20 @@ def getCommunityId(cls, communityName, tenenvId, session):
return communityId

@classmethod
def ingestMembershipDataPerTenenv(cls, tenenvId, session):
def ingestMembershipDataPerTenenv(cls, tenenv, session):
tenenvId = tenenv['id']
tenant_name = tenenv['tenant_name']
environment_name = tenenv['environment_name']
hashed_user_ids = []

config_file = f'config.{tenant_name.lower()}.{environment_name.lower()}.py'

if (configParser.getConfig('user_id_blacklist', config_file) is not False and
'user_ids' in configParser.getConfig('user_id_blacklist', config_file)):
user_ids = configParser.getConfig('user_id_blacklist', config_file)['user_ids'].split('\n')
# Hash each value using SHA-256
hashed_user_ids = [hashlib.md5(value.strip().encode()).hexdigest() for value in user_ids]

# get dates not mapped for users data
datesNotMapped = utilsIngester.getDatesNotMapped(
"members",
Expand All @@ -54,6 +69,10 @@ def ingestMembershipDataPerTenenv(cls, tenenvId, session):
cls.logger.error("""
VO name '{0}' not found """.format(membership[0]['voName']))
continue
if (membership[0]['voPersonId'] in hashed_user_ids):
cls.logger.info("""Ignore this user with
hash {0} as he is at the blacklist""". format(membership[0]['voPersonId']))
continue
session.exec("""INSERT INTO members(community_id,
hasheduserid, status, tenenv_id, created, updated)
VALUES ('{0}','{1}','{2}', {3}, '{4}', '{4}')
Expand All @@ -69,9 +88,15 @@ def ingestMembershipDataPerTenenv(cls, tenenvId, session):

@classmethod
def ingestMembershipData(cls, session):
tenenvIds = session.exec("""SELECT id FROM tenenv_info""").all()
tenenvIds = session.exec("""SELECT tenenv_info.id,
tenant_info.name AS tenant_name,
environment_info.name AS environment_name
FROM tenenv_info
JOIN tenant_info ON tenant_id=tenant_info.id
JOIN environment_info ON env_id=environment_info.id
""").all()
# for each tenenv on database try to ingest UserData
# from statistics_raw table
for tenenvId in tenenvIds:
for tenenv in tenenvIds:
MembershipDataIngester.ingestMembershipDataPerTenenv(
tenenvId[0], session)
tenenv, session)
34 changes: 29 additions & 5 deletions app/ingester/usersIngester.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,26 @@
from app.logger import log
from app.utils import configParser
from .utilsIngester import utilsIngester

import hashlib

class UserDataIngester:
logger = log.get_logger("UserDataIngester")

@classmethod
def ingestUserDataPerTenenv(cls, tenenvId, session):
def ingestUserDataPerTenenv(cls, tenenv, session):
tenenvId = tenenv['id']
tenant_name = tenenv['tenant_name']
environment_name = tenenv['environment_name']
hashed_user_ids = []

config_file = f'config.{tenant_name.lower()}.{environment_name.lower()}.py'

if (configParser.getConfig('user_id_blacklist', config_file) is not False and
'user_ids' in configParser.getConfig('user_id_blacklist', config_file)):
user_ids = configParser.getConfig('user_id_blacklist', config_file)['user_ids'].split('\n')
# Hash each value using SHA-256
hashed_user_ids = [hashlib.md5(value.strip().encode()).hexdigest() for value in user_ids]

# get dates not mapped for users data
datesNotMapped = utilsIngester.getDatesNotMapped(
"users",
Expand Down Expand Up @@ -34,6 +48,10 @@ def ingestUserDataPerTenenv(cls, tenenvId, session):
cls.logger.error("""
user status '{0}' is not valid """.format(user[0]['status']))
continue
if (user[0]['voPersonId'] in hashed_user_ids):
cls.logger.info("""Ignore this user with
hash {0} as he is at the blacklist""". format(user[0]['voPersonId']))
continue
session.exec("""INSERT INTO users(hasheduserid, created, updated, status, tenenv_id)
VALUES ('{0}','{1}','{1}', '{2}', {3})
ON CONFLICT(hasheduserid, tenenv_id)
Expand All @@ -48,8 +66,14 @@ def ingestUserDataPerTenenv(cls, tenenvId, session):

@classmethod
def ingestUserData(cls, session):
tenenvIds = session.exec("""SELECT id FROM tenenv_info""").all()
tenenvIds = session.exec("""SELECT tenenv_info.id,
tenant_info.name AS tenant_name,
environment_info.name AS environment_name
FROM tenenv_info
JOIN tenant_info ON tenant_id=tenant_info.id
JOIN environment_info ON env_id=environment_info.id
""").all()
# for each tenenv on database try to ingest UserData
# from statistics_raw table
for tenenvId in tenenvIds:
UserDataIngester.ingestUserDataPerTenenv(tenenvId[0], session)
for tenenv in tenenvIds:
UserDataIngester.ingestUserDataPerTenenv(tenenv, session)
3 changes: 1 addition & 2 deletions app/utils/configParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ def getConfig(section='source_database', config_file='config.py'):
config[param[0]] = param[1]

else:
raise Exception(
'Section {0} not found in the {1} file'.format(section, config_file))
return False

return config

0 comments on commit 92c89f0

Please sign in to comment.