Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Develop #39

Merged
merged 7 commits into from
Nov 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 32 additions & 5 deletions app/ingester/loginsIngester.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from app.logger import log
from app.utils import configParser
from app.utils.ipDatabase import geoip2Database
from sqlalchemy.exc import NoResultFound
from .utilsIngester import utilsIngester
Expand All @@ -7,6 +8,7 @@
class LoginDataIngester:
logger = log.get_logger("LoginDataIngester")


@classmethod
def getIdpId(cls, entityid, idpName, tenenvId, session):
# Check if IdP exists
Expand Down Expand Up @@ -152,8 +154,20 @@ def getCountryFromIP(cls, ipAddress, session):
return countryId

@classmethod
def ingestLoginDataPerTenenv(cls, tenenvId, session):
def ingestLoginDataPerTenenv(cls, tenenv, session):
tenenvId = tenenv['id']
tenant_name = tenenv['tenant_name']
environment_name = tenenv['environment_name']
hashed_user_ids = []

config_file = f'config.{tenant_name.lower()}.{environment_name.lower()}.py'

if (configParser.getConfig('user_excludelist', config_file) is not False and
'user_ids' in configParser.getConfig('user_excludelist', config_file)):
user_ids = configParser.getConfig('user_excludelist', config_file)['user_ids'].split('\n')
# Hash each value using SHA-256
hashed_user_ids = [hashlib.md5(value.strip().encode()).hexdigest() for value in user_ids]

# get dates not mapped for logi5ns data
datesNotMapped = utilsIngester.getDatesNotMapped(
"statistics_country_hashed",
Expand All @@ -175,7 +189,14 @@ def ingestLoginDataPerTenenv(cls, tenenvId, session):
AND tenenv_id={0} {1}
""".format(tenenvId, between)).all()
loginMappedItems = 0

for login in loginsNotMapped:

if (login[0]['voPersonId'] in hashed_user_ids):
cls.logger.info("""Ignore this user with
hash {0} as he is at the blacklist""". format(login[0]['voPersonId']))
continue

if (not login[0]['failedLogin']
and utilsIngester.validateTenenv(login[0]['tenenvId'], session)
and 'voPersonId' in login[0]
Expand All @@ -197,7 +218,7 @@ def ingestLoginDataPerTenenv(cls, tenenvId, session):
login[0]['spName'],
login[0]['tenenvId'],
session)

if ('countryCode' in login[0] and 'countryName' in login[0]):
# find countryId
countryId = LoginDataIngester.getCountryFromCountryCode([login[0]['countryCode'], login[0]['countryName']], session)
Expand All @@ -222,6 +243,12 @@ def ingestLoginDataPerTenenv(cls, tenenvId, session):

@classmethod
def ingestLoginData(cls, session):
tenenvIds = session.exec("""SELECT id FROM tenenv_info""").all()
for tenenvId in tenenvIds:
LoginDataIngester.ingestLoginDataPerTenenv(tenenvId[0], session)
tenenvIds = session.exec("""SELECT tenenv_info.id,
tenant_info.name AS tenant_name,
environment_info.name AS environment_name
FROM tenenv_info
JOIN tenant_info ON tenant_id=tenant_info.id
JOIN environment_info ON env_id=environment_info.id
""").all()
for tenenv in tenenvIds:
LoginDataIngester.ingestLoginDataPerTenenv(tenenv, session)
33 changes: 29 additions & 4 deletions app/ingester/membeshipIngester.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from app.logger import log
from app.utils import configParser
from sqlalchemy.exc import NoResultFound
from .utilsIngester import utilsIngester
import hashlib


class MembershipDataIngester:
Expand All @@ -27,7 +29,20 @@ def getCommunityId(cls, communityName, tenenvId, session):
return communityId

@classmethod
def ingestMembershipDataPerTenenv(cls, tenenvId, session):
def ingestMembershipDataPerTenenv(cls, tenenv, session):
tenenvId = tenenv['id']
tenant_name = tenenv['tenant_name']
environment_name = tenenv['environment_name']
hashed_user_ids = []

config_file = f'config.{tenant_name.lower()}.{environment_name.lower()}.py'

if (configParser.getConfig('user_excludelist', config_file) is not False and
'user_ids' in configParser.getConfig('user_excludelist', config_file)):
user_ids = configParser.getConfig('user_excludelist', config_file)['user_ids'].split('\n')
# Hash each value using SHA-256
hashed_user_ids = [hashlib.md5(value.strip().encode()).hexdigest() for value in user_ids]

# get dates not mapped for users data
datesNotMapped = utilsIngester.getDatesNotMapped(
"members",
Expand All @@ -54,6 +69,10 @@ def ingestMembershipDataPerTenenv(cls, tenenvId, session):
cls.logger.error("""
VO name '{0}' not found """.format(membership[0]['voName']))
continue
if (membership[0]['voPersonId'] in hashed_user_ids):
cls.logger.info("""Ignore this user with
hash {0} as he is at the blacklist""". format(membership[0]['voPersonId']))
continue
session.exec("""INSERT INTO members(community_id,
hasheduserid, status, tenenv_id, created, updated)
VALUES ('{0}','{1}','{2}', {3}, '{4}', '{4}')
Expand All @@ -69,9 +88,15 @@ def ingestMembershipDataPerTenenv(cls, tenenvId, session):

@classmethod
def ingestMembershipData(cls, session):
tenenvIds = session.exec("""SELECT id FROM tenenv_info""").all()
tenenvIds = session.exec("""SELECT tenenv_info.id,
tenant_info.name AS tenant_name,
environment_info.name AS environment_name
FROM tenenv_info
JOIN tenant_info ON tenant_id=tenant_info.id
JOIN environment_info ON env_id=environment_info.id
""").all()
# for each tenenv on database try to ingest UserData
# from statistics_raw table
for tenenvId in tenenvIds:
for tenenv in tenenvIds:
MembershipDataIngester.ingestMembershipDataPerTenenv(
tenenvId[0], session)
tenenv, session)
34 changes: 29 additions & 5 deletions app/ingester/usersIngester.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,26 @@
from app.logger import log
from app.utils import configParser
from .utilsIngester import utilsIngester

import hashlib

class UserDataIngester:
logger = log.get_logger("UserDataIngester")

@classmethod
def ingestUserDataPerTenenv(cls, tenenvId, session):
def ingestUserDataPerTenenv(cls, tenenv, session):
tenenvId = tenenv['id']
tenant_name = tenenv['tenant_name']
environment_name = tenenv['environment_name']
hashed_user_ids = []

config_file = f'config.{tenant_name.lower()}.{environment_name.lower()}.py'

if (configParser.getConfig('user_excludelist', config_file) is not False and
'user_ids' in configParser.getConfig('user_excludelist', config_file)):
user_ids = configParser.getConfig('user_excludelist', config_file)['user_ids'].split('\n')
# Hash each value using SHA-256
hashed_user_ids = [hashlib.md5(value.strip().encode()).hexdigest() for value in user_ids]

# get dates not mapped for users data
datesNotMapped = utilsIngester.getDatesNotMapped(
"users",
Expand Down Expand Up @@ -34,6 +48,10 @@ def ingestUserDataPerTenenv(cls, tenenvId, session):
cls.logger.error("""
user status '{0}' is not valid """.format(user[0]['status']))
continue
if (user[0]['voPersonId'] in hashed_user_ids):
cls.logger.info("""Ignore this user with
hash {0} as he is at the blacklist""". format(user[0]['voPersonId']))
continue
session.exec("""INSERT INTO users(hasheduserid, created, updated, status, tenenv_id)
VALUES ('{0}','{1}','{1}', '{2}', {3})
ON CONFLICT(hasheduserid, tenenv_id)
Expand All @@ -48,8 +66,14 @@ def ingestUserDataPerTenenv(cls, tenenvId, session):

@classmethod
def ingestUserData(cls, session):
tenenvIds = session.exec("""SELECT id FROM tenenv_info""").all()
tenenvIds = session.exec("""SELECT tenenv_info.id,
tenant_info.name AS tenant_name,
environment_info.name AS environment_name
FROM tenenv_info
JOIN tenant_info ON tenant_id=tenant_info.id
JOIN environment_info ON env_id=environment_info.id
""").all()
# for each tenenv on database try to ingest UserData
# from statistics_raw table
for tenenvId in tenenvIds:
UserDataIngester.ingestUserDataPerTenenv(tenenvId[0], session)
for tenenv in tenenvIds:
UserDataIngester.ingestUserDataPerTenenv(tenenv, session)
2 changes: 2 additions & 0 deletions app/tests/locust.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,5 @@ def metrics_egi_devel_tenenv(self):
}

self.client.get("/api/v1/tenenv/egi/devel", headers=headers)
self.client.get("/api/v1/tenenv/einfra/devel", headers=headers)

3 changes: 1 addition & 2 deletions app/utils/configParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ def getConfig(section='source_database', config_file='config.py'):
config[param[0]] = param[1]

else:
raise Exception(
'Section {0} not found in the {1} file'.format(section, config_file))
return False

return config
Loading