diff --git a/src/baseline_modules/temporal_by_day/temporal_by_day.R b/src/baseline_modules/temporal_by_day/temporal_by_day.R index 98cbc298d..7317e3792 100644 --- a/src/baseline_modules/temporal_by_day/temporal_by_day.R +++ b/src/baseline_modules/temporal_by_day/temporal_by_day.R @@ -2,9 +2,11 @@ require('dplyr') require(tidyr) require(purrr) #input_table -#EVENT_TIME +#EVENT_TIME OR DAY +#NUM_EVENTS -> OPTIONAL #PIVOT #ID + get_percentiles <- function(dataframe, column_name, exit_name, column_names_summarize_by ){ p <- c(.1, .25,.75,.95,.5) p_names <- map_chr(p, ~paste0(.x*100, exit_name)) @@ -19,23 +21,43 @@ get_percentiles <- function(dataframe, column_name, exit_name, column_names_summ return(dataframe %>% group_by_(column_name) %>% summarize_at(vars(column_names_summarize_by), full_funs)) } -results <- input_table -results$EVENT_TIME <- as.POSIXct(results$EVENT_TIME) -results$DAY <- as.Date(results$EVENT_TIME, na.rm=TRUE) -earliest_time <- min(results$DAY, na.rm=TRUE) -latest_time <- max(results$DAY, na.rm=TRUE) -num_days <- latest_time - earliest_time +results <- input_table +print(colnames(results)) +if('EVENT_TIME' %in% colnames(results)) { + print('Event time case triggered') + results$EVENT_TIME <- as.POSIXct(results$EVENT_TIME) + results$DAY <- as.Date(results$EVENT_TIME, na.rm=TRUE) +}else{ + print('No event time going to day') + results$EVENT_TIME <- NA + results$DAY <- as.Date(INPUT_TABLE$DAY,"%Y-%m-%d", tz="GMT") +} +if(!('ID' %in% colnames(results))){ + print('No id in input') + results$ID <- NA +} +if('NUM_EVENTS' %in% colnames(results)){ + print('Num events found') + by_day_when_present <- results %>% + group_by(PIVOT, DAY)%>% + summarise(num_events=sum(NUM_EVENTS), num_ids=sum(ID)) +}else{ + print('num events not found') by_day_when_present <- results %>% group_by(PIVOT, DAY) %>% summarise(num_events=n(), num_ids=length(unique(ID)) ) - -expand_days <- by_day_when_present %>% tidyr::complete(DAY=seq.Date(min(DAY, na.rm=TRUE), max(DAY, na.rm=TRUE), by="day"), PIVOT, fill = list(num_events = 0, num_ids=0)) +} +print(by_day_when_present) +earliest_time <- min(results$DAY, na.rm=TRUE) +latest_time <- max(results$DAY, na.rm=TRUE) +num_days <- latest_time - earliest_time +expand_days <- by_day_when_present %>% tidyr::complete(DAY=seq.Date(min(DAY, na.rm=TRUE), max(DAY, na.rm=TRUE), by="day"), PIVOT, fill = list(num_events = 0, if('ID' %in% colnames(results)) num_ids=0 else num_ids=NA)) when_present_numeric<- get_percentiles(by_day_when_present, 'PIVOT', 'when_present', c('num_ids', 'num_events')) expand_days_numeric <- get_percentiles(expand_days, 'PIVOT', 'overall', c('num_ids', 'num_events')) diff --git a/src/baseline_modules/violations_closeout_date/violations_closeout_date.R b/src/baseline_modules/violations_closeout_date/violations_closeout_date.R index de21ba312..d1cb2aaeb 100644 --- a/src/baseline_modules/violations_closeout_date/violations_closeout_date.R +++ b/src/baseline_modules/violations_closeout_date/violations_closeout_date.R @@ -28,18 +28,17 @@ require(MASS) require(tidyr) require(purrr) - -print('a') - a <- input_table rm(input_table) + a$CURRENT_DAY <- a$CURRENT_DAY <- as.Date(as.POSIXct(a$CURRENT_DAY), format='%Y-%m-%d') a$FINAL <- as.logical(a$FINAL) a$NEW <- as.logical(a$NEW) a$PROD <- as.logical(a$PROD) colnames(a) <- make.unique(names(a)) + #Group for counts b <- a %>% group_by(QUERY_ID, CURRENT_DAY) %>% dplyr::summarize(counts=n_distinct(UNIQUE_KEYS)) namessss <- a %>% group_by(QUERY_ID) %>% dplyr::summarize(TITLE=first(TITLE)) @@ -51,13 +50,6 @@ c <- b %>% c$age = as.integer(Sys.Date() - c$CURRENT_DAY+2) rm(b) -print(c) -#Group for name -c <- base::merge(c, namessss, by = "QUERY_ID", all.x=TRUE) -print(unique(c$QUERY_ID)) -print(unique(c$CURRENT_DAY)) -print(unique(c$age)) -print(unique(c$counts)) #Group for name c <- base::merge(c, namessss, by = "QUERY_ID", all.x=TRUE) @@ -66,22 +58,25 @@ c <- base::merge(c, namessss, by = "QUERY_ID", all.x=TRUE) model <- c %>% tidyr::nest(-QUERY_ID) %>% mutate( fit=map(data, ~ rlm(counts ~ CURRENT_DAY, weights=1/age^2, data = ., na.action = 'na.omit', maxit=100)) ) -print('model_complete') + e <- c %>% tidyr::complete(CURRENT_DAY=seq.Date(min(c$CURRENT_DAY), max(c$CURRENT_DAY)+100, by="day"),QUERY_ID) e$age = as.integer(max(e$CURRENT_DAY) - e$CURRENT_DAY+1) nested <- e %>% tidyr::nest(-QUERY_ID) + prediction <- model %>% inner_join(nested, by = "QUERY_ID") %>% mutate(results=map2(.x = model$fit, .y = nested$data, .f = ~augment(.x, newdata = .y), .id=.x), model2=model$fit) %>% unnest(c(results)) + prediction <- base::merge(prediction, namessss, by = "QUERY_ID", all.x=TRUE) prediction <- base::merge(prediction, dplyr::select(model, QUERY_ID, fit), by = "QUERY_ID", all.x=TRUE) prediction$near_zero <- abs(prediction$.fitted) - + +prediction<- prediction %>% replace(., is.na(.), "") return_value <- prediction %>% group_by(QUERY_ID) %>% summarise(last_day=max(CURRENT_DAY), x_intercept=as.character(CURRENT_DAY[which.min(near_zero)]) , unknown=as.character(x_intercept==last_day), value=min(near_zero), TITLE=first(TITLE.y)) %>% dplyr::select(QUERY_ID, TITLE, unknown, x_intercept) diff --git a/src/baseline_modules/violations_linear_prediction/violations_linear_prediction.R b/src/baseline_modules/violations_linear_prediction/violations_linear_prediction.R index 205a19e81..5c5ce3d1b 100644 --- a/src/baseline_modules/violations_linear_prediction/violations_linear_prediction.R +++ b/src/baseline_modules/violations_linear_prediction/violations_linear_prediction.R @@ -23,43 +23,41 @@ # ' require(dplyr) - require(tidyverse) - require(broom) - require(MASS) +require(broom) +require(MASS) +require(tidyr) +require(purrr) a <- input_table rm(input_table) -#Cleaning a$CURRENT_DAY <- a$CURRENT_DAY <- as.Date(as.POSIXct(a$CURRENT_DAY), format='%Y-%m-%d') a$FINAL <- as.logical(a$FINAL) a$NEW <- as.logical(a$NEW) a$PROD <- as.logical(a$PROD) colnames(a) <- make.unique(names(a)) - - #Group for counts -b <- a %>% group_by(QUERY_ID, CURRENT_DAY) %>% - dplyr::summarize(counts=n_distinct(UNIQUE_KEYS)) -namessss <- a %>% group_by(QUERY_ID) %>% dplyr::summarise(TITLE=first(TITLE)) +b <- a %>% group_by(QUERY_ID, CURRENT_DAY) %>% dplyr::summarize(counts=n_distinct(UNIQUE_KEYS)) +namessss <- a %>% group_by(QUERY_ID) %>% dplyr::summarize(TITLE=first(TITLE)) + rm(a) #Complete the missing values with zero -> no violations c <- b %>% tidyr::complete(CURRENT_DAY=seq.Date(min(b$CURRENT_DAY), max(b$CURRENT_DAY), by="day"),QUERY_ID, fill=list(counts = 0)) c$age = as.integer(Sys.Date() - c$CURRENT_DAY+2) rm(b) + #Group for name c <- base::merge(c, namessss, by = "QUERY_ID", all.x=TRUE) #Do the prediction analysis -model <- c %>% nest(-QUERY_ID) %>% +model <- c %>% tidyr::nest(-QUERY_ID) %>% mutate( - fit=map(data, ~ rlm(counts ~ CURRENT_DAY, weights=1/age^2, data = ., na.action = 'na.omit', maxit=100)) ) - + fit=map(data, ~ rlm(counts ~ CURRENT_DAY, weights=1/age^2, data = ., na.action = 'na.omit', maxit=100)) ) + e <- c %>% tidyr::complete(CURRENT_DAY=seq.Date(min(c$CURRENT_DAY), max(c$CURRENT_DAY)+100, by="day"),QUERY_ID) e$age = as.integer(max(e$CURRENT_DAY) - e$CURRENT_DAY+1) -nested <- e %>% nest(-QUERY_ID) - +nested <- e %>% tidyr::nest(-QUERY_ID) prediction <- model %>% @@ -69,8 +67,15 @@ prediction <- prediction <- base::merge(prediction, namessss, by = "QUERY_ID", all.x=TRUE) prediction <- base::merge(prediction, dplyr::select(model, QUERY_ID, fit), by = "QUERY_ID", all.x=TRUE) -prediction$fit <- toString(prediction$fit) + +prediction$fit <- as.character(prediction$fit) + + return_value <- dplyr::select(prediction, QUERY_ID, TITLE.y, CURRENT_DAY, counts, .fitted, .se.fit, fit) +return_value$CURRENT_DAY <- as.character(return_value$CURRENT_DAY) +#return_value$fit <- 'This is my fit, iti s a random string for now' +return_value <- return_value %>% replace(., is.na(.), "") +colnames(return_value) <- c('QUERY_ID', 'TITLE', 'CURRENT_DAY', 'COUNTS', 'FITTED', 'SEFIT', 'FIT') return_value #END diff --git a/src/connectors/__init__.py b/src/connectors/__init__.py index a86e3044d..841c2e7b4 100644 --- a/src/connectors/__init__.py +++ b/src/connectors/__init__.py @@ -5,6 +5,7 @@ from . import azure_vm from . import aws_config from . import aws_inventory +from . import tenable_settings __all__ = [ 'aws_inventory', @@ -14,6 +15,7 @@ 'azure_subscription', 'azure_vm', 'okta', + 'tenable_settings', ] connectors = { @@ -24,6 +26,7 @@ 'azure_subscription': azure_subscription, 'azure_vm': azure_vm, 'okta': okta, + 'tenable_settings': tenable_settings, } CONNECTION_OPTIONS = [ diff --git a/src/connectors/aws_cloudtrail.py b/src/connectors/aws_cloudtrail.py index 18b47b3d1..75a3da79a 100644 --- a/src/connectors/aws_cloudtrail.py +++ b/src/connectors/aws_cloudtrail.py @@ -1,5 +1,5 @@ """AWS CloudTrail -Collects AWS logs from an S3 bucket using AssumeRole +Collect AWS CloudTrail logs from S3 using a privileged Role """ from json import dumps diff --git a/src/connectors/aws_config.py b/src/connectors/aws_config.py index 2e744d06b..393cc571d 100644 --- a/src/connectors/aws_config.py +++ b/src/connectors/aws_config.py @@ -1,5 +1,5 @@ """AWS Config -Collects Config logs from S3 using AssumeRole +Collect Config logs from S3 using a privileged Role """ from json import dumps from time import sleep diff --git a/src/connectors/aws_inventory.py b/src/connectors/aws_inventory.py index 69852da82..2a309e6ce 100644 --- a/src/connectors/aws_inventory.py +++ b/src/connectors/aws_inventory.py @@ -1,5 +1,5 @@ """AWS Asset Inventory -Collects AWS EC2, SG, ELB details using an Access Key +Collect AWS EC2, SG, ELB details using an Access Key """ from datetime import datetime import json diff --git a/src/connectors/azure_subscription.py b/src/connectors/azure_subscription.py index 3cd0aa5bd..2cd6b2a9a 100644 --- a/src/connectors/azure_subscription.py +++ b/src/connectors/azure_subscription.py @@ -1,5 +1,5 @@ """Azure Subscription Inventory -Collects Azure Subscriptions using a Service Principal (SP) +Collect Azure Subscriptions using a Service Principal (SP) """ from dateutil.parser import parse @@ -115,7 +115,6 @@ def ingest(table_name, options): 'PARSE_JSON(column3)', 'column4', 'column5', - 'column5', 'column6', 'column7', 'PARSE_JSON(column8)', diff --git a/src/connectors/azure_vm.py b/src/connectors/azure_vm.py index 82ebd11b2..268902cdf 100644 --- a/src/connectors/azure_vm.py +++ b/src/connectors/azure_vm.py @@ -50,7 +50,6 @@ 'required': True, 'default': 'default' } - ] LANDING_TABLE_COLUMNS = [ @@ -121,7 +120,7 @@ def connect(connection_name, options): return { 'newStage': 'finalized', - 'newMessage': 'Landing and subscription data tables created for collectors to populate.' + 'newMessage': 'Landing and metadata tables created for collectors to populate.' } diff --git a/src/connectors/tenable_settings.py b/src/connectors/tenable_settings.py new file mode 100644 index 000000000..fb5db019e --- /dev/null +++ b/src/connectors/tenable_settings.py @@ -0,0 +1,137 @@ +"""Tenable Settings +Collect Tenable Settings using a Service User’s API Key +""" + +from tenable.io import TenableIO +from datetime import datetime + +from runners.helpers import db +from runners.helpers.dbconfig import ROLE as SA_ROLE + +CONNECTION_OPTIONS = [ + { + 'type': 'select', + 'options': [ + {'value': 'user', 'label': "Tenable Users"}, + ], + 'default': 'user', + 'name': 'connection_type', + 'title': "Settings Type", + 'prompt': "The type of Tenable Settings information you are ingesting to Snowflake.", + 'required': True + }, + { + 'type': 'str', + 'name': 'token', + 'title': "Tenable API Token", + 'prompt': "The Tenable API Token", + 'placeholder': 'f1234764cd987654we543nt1x456b65a098a1df1233c2986c07efa700f9d2187', + 'required': True, + }, + { + 'type': 'str', + 'name': 'secret', + 'title': "Tenable API Secret", + 'prompt': "The Secret Token for the Tenable API.", + 'required': True, + 'secret': True, + }, +] + +USER_LANDING_TABLE = [ + ('USERNAME', 'STRING(250)'), + ('ROLE', 'STRING(100)'), + ('RAW', 'VARIANT'), + ('SNAPSHOT_AT', 'TIMESTAMP_LTZ'), + ('UUID', 'STRING(100)'), + ('ID', 'STRING(100)'), + ('USER_NAME', 'STRING(250)'), + ('EMAIL', 'STRING(250)'), + ('TYPE', 'STRING(100)'), + ('PERMISSION', 'NUMBER'), + ('LAST_LOGIN_ATTEMPT', 'TIMESTAMP_LTZ'), + ('LOGIN_FAIL_COUNT', 'NUMBER'), + ('LOGIN_FAIL_TOTAL', 'NUMBER'), + ('ENABLED', 'BOOLEAN'), + ('TWO_FACTOR', 'VARIANT'), + ('LAST_LOGIN', 'TIMESTAMP_LTZ'), + ('UUID_ID', 'STRING(100)'), +] + + +def ingest_users(tio, table_name): + users = tio.users.list() + timestamp = datetime.utcnow() + + for user in users: + user['role'] = { + 16: 'Basic', + 24: 'Scan Operator', + 32: 'Standard', + 40: 'Scan Manager', + 64: 'Administrator', + }.get( + user['permissions'], + 'unknown permissions {permissions}' + ) + + db.insert( + table=f'data.{table_name}', + values=[( + user.get('username', None), + user.get('role', None), + user, + timestamp, + user.get('uuid', None), + user.get('id', None), + user.get('user_name', None), + user.get('email', None), + user.get('type', None), + user.get('permissions', None), + user.get('last_login_attempt', None), + user.get('login_fail_count', None), + user.get('login_fail_total', None), + user.get('enabled', None), + user.get('two_factor', None), + user.get('lastlogin', None), + user.get('uuid_id', None) + ) for user in users], + select=""" + column1, column2, PARSE_JSON(column3), column4, column5, column6, + column7, column8, column9, column10, + to_timestamp(column11, 3)::timestamp_ltz, column12, column13, + column14, PARSE_JSON(column15), + to_timestamp(column16, 3)::timestamp_ltz, column17 + """ + ) + + +def create_user_table(connection_name, options): + table_name = f'data.TENABLE_SETTINGS_{connection_name}_USER_CONNECTION' + token = options['token'] + secret = options['secret'] + comment = f""" +--- +module: tenable_settings +token: {token} +secret: {secret} +""" + + db.create_table(table_name, cols=USER_LANDING_TABLE, comment=comment) + db.execute(f'GRANT INSERT, SELECT ON {table_name} TO ROLE {SA_ROLE}') + + +def connect(connection_name, options): + if options['connection_type'] == 'user': + create_user_table(connection_name, options) + + return { + 'newStage': 'finalized', + 'newMessage': 'Landing table created for collectors to populate.' + } + + +def ingest(table_name, options): + tio = TenableIO(options['token'], options['secret']) + if table_name.endswith('USER_CONNECTION'): + ingest_users(tio, table_name) diff --git a/src/ingestion/aad_auditlogs.py b/src/ingestion/aad_auditlogs.py deleted file mode 100644 index a7d095a87..000000000 --- a/src/ingestion/aad_auditlogs.py +++ /dev/null @@ -1,199 +0,0 @@ -import time -import requests -import json -from datetime import datetime, timedelta - -from adal import AuthenticationContext -from azure.storage.blob import BlockBlobService -from azure.storage.blob import ContentSettings -from azure.storage.common import TokenCredential - -from runners.helpers import vault - -# Azure Gov Cloud Endpoints -AAD_ENDPOINT_URI = "https://login.microsoftonline.us/" -GRAPH_ENDPOINT_URI = "https://graph.microsoft.us/" -STORAGE_ENDPOINT_SUFFIX = "core.usgovcloudapi.net" - - -def log(msg): - print(str(datetime.now()) + " " + msg) - - -def save_aad_auditlogs(auditlog_type, tenant_id, client_id, client_secret, - storage_account, storage_container): - METADATA_LAST_DATETIME = "last_datetime" - METADATA_LAST_EXECUTION = "last_execution" - - log("Save " + auditlog_type + " to " + storage_account + "/" + storage_container) - - # Create AAD authentication context to use for obtaining access tokens - auth_context = AuthenticationContext(AAD_ENDPOINT_URI + tenant_id) - - # Get access token for storage.azure.com - storage_token_response = auth_context.acquire_token_with_client_credentials( - "https://storage.azure.com/", - client_id, - client_secret - ) - - # Create Azure Blob service client - blob_service = BlockBlobService( - storage_account, - endpoint_suffix=STORAGE_ENDPOINT_SUFFIX, - token_credential=TokenCredential(storage_token_response['accessToken']) - ) - - # Create container if it does not yet exist - blob_service.create_container(storage_container, fail_on_exist=False) - - # Get datetime of last record from container metadata - # NOTE: Date strings have nanosecond precision so would require numpy.datetime64 for parsing - container_metadata = blob_service.get_container_metadata(storage_container) - last_datetime = "" - if METADATA_LAST_DATETIME in container_metadata: - last_datetime = container_metadata[METADATA_LAST_DATETIME] - else: - last_datetime = datetime.strftime(datetime.now() - timedelta(days=90), "%Y-%m-%dT%H:%M:%S.%fZ") - - log("Previous value container last_datetime=" + last_datetime + "") - - # Get access token for graph.microsoft.com - graph_token_response = auth_context.acquire_token_with_client_credentials( - GRAPH_ENDPOINT_URI, client_id, client_secret) - - # Initial request filtered by latest date time with a batch of 500 - if auditlog_type == "directoryAudits": - datetime_record_name = "activityDateTime" - graph_uri = ( - GRAPH_ENDPOINT_URI - + 'v1.0/auditLogs/directoryAudits?$top=500&$filter=' + datetime_record_name - + ' gt ' + last_datetime - ) - elif auditlog_type == "signIns": - datetime_record_name = "createdDateTime" - graph_uri = ( - GRAPH_ENDPOINT_URI - + 'v1.0/auditLogs/signIns?$top=500&$filter=' + datetime_record_name - + ' gt ' + last_datetime - ) - else: - log("Unknown auditlog_type = " + auditlog_type) - return - - max_record_datetime = last_datetime - - # May need to loop multiple times to get all of the data and retry throttled requestes with status code 429 - request_count = 0 - error_count = 0 - max_requests = 100 - max_errors = 50 - while request_count < max_requests and error_count < max_errors: - request_count += 1 - - # Issue Graph API request - session = requests.Session() - session.headers.update({'Authorization': "Bearer " + graph_token_response['accessToken']}) - response = session.get(graph_uri) - content_length = len(response.content) - response_json = response.json() - - log( - "Get " + graph_uri + " returned status_code=" + str(response.status_code) - + "; content_length=" + str(content_length) - + "; requests=" + str(request_count) + "/" + str(max_requests) - + "; errors=" + str(error_count) + "/" + str(max_errors) - ) - - if response.status_code != 200: - error_count += 1 - log("*** ERROR ***") - log("Headers: " + str(response.headers)) - log("Content: " + response.text) - - if response.status_code == 403: - # Exit immediately - log("Permission denied, existing.") - return - elif response.status_code == 429: - # Pause for longer when throttled - log("Request was throttled, waiting 10 seconds...") - time.sleep(10.0) - continue - else: - # Pause before retry - log("Waiting 5 seconds...") - time.sleep(5.0) - continue - - # Check if received valid response - if 'value' in response_json: - count = len(response_json['value']) - - # Records are ordered in descending order by activityDateTime/createdDateTime, so first record is the - # newest and last is the oldest - if count > 0: - last_record_datetime = response_json['value'][0][datetime_record_name] - first_record_datetime = response_json['value'][count - 1][datetime_record_name] - - # Upload logs to blob storage - blob_name = ( - "logs_" + first_record_datetime.replace(":", "") - + "_" + last_record_datetime.replace(":", "") - + "_" + str(count) - + ".json" - ) - blob_service.create_blob_from_text( - storage_container, - blob_name, - json.dumps(response_json), - encoding='utf-8', content_settings=ContentSettings(content_type='application/json') - ) - - log("Uploaded " + blob_name + " to " + storage_account + "/" + storage_container) - - if last_record_datetime > max_record_datetime: - max_record_datetime = last_record_datetime - else: - log("No new data") - - # If there is next page, go to next page. Otherwise, break out of the loop. - if "@odata.nextLink" in response_json: - graph_uri = response_json['@odata.nextLink'] - log("Next page found " + graph_uri) - else: - break - - # Record the last activityDateTime to filter next set of logs - blob_service.set_container_metadata( - storage_container, - metadata={ - METADATA_LAST_DATETIME: max_record_datetime, - METADATA_LAST_EXECUTION: str(datetime.now()) - } - ) - log("Recorded new container last_datetime=" + max_record_datetime) - - -def main(): - # Set your Azure Active Directory application credentials. - # Application must have permission for Microsoft.Graph AuditLog.Read.All - # and RBAC role "Storage Blob Contributor" to the storage account. - tenant_id = vault.decrypt_if_encrypted(envar='AAD_TENANT_ID') - client_id = vault.decrypt_if_encrypted(envar='AAD_CLIENT_ID') - client_secret = vault.decrypt_if_encrypted(envar='AAD_CLIENT_SECRET') - storage_account = vault.decrypt_if_encrypted(envar='AAD_STORAGE_ACCOUNT') - - if not (tenant_id and client_id and client_secret and storage_account): - print('[aad_auditlogs] missing required env var') - return - - save_aad_auditlogs("directoryAudits", tenant_id, client_id, client_secret, storage_account, "logs-audit") - - # AAD signIns report is only available for Azure AD Premium P1 or higher and will return an error for non-premium - # AAD tenants. - save_aad_auditlogs("signIns", tenant_id, client_id, client_secret, storage_account, "logs-signin") - - -if __name__ == '__main__': - main() diff --git a/src/mypy.ini b/src/mypy.ini index 63ac9464e..69822e9e3 100644 --- a/src/mypy.ini +++ b/src/mypy.ini @@ -48,3 +48,6 @@ ignore_missing_imports = True [mypy-azure.*] ignore_missing_imports = True + +[mypy-tenable.*] +ignore_missing_imports = True diff --git a/src/runners/alert_queries_runner.py b/src/runners/alert_queries_runner.py index df55d3b33..facb9781f 100755 --- a/src/runners/alert_queries_runner.py +++ b/src/runners/alert_queries_runner.py @@ -1,7 +1,5 @@ #!/usr/bin/env python -import json -import uuid import datetime from multiprocessing import Pool from typing import Any, Dict @@ -15,59 +13,10 @@ CLOUDWATCH_METRICS, ) from runners.helpers import db, log -from runners.utils import groups_of GROUPING_CUTOFF = f"DATEADD(minute, -90, CURRENT_TIMESTAMP())" - -def log_alerts(alerts): - if len(alerts): - print("Recording alerts.") - try: - VALUES_INSERT_LIMIT = 16384 - for alert_group in groups_of(VALUES_INSERT_LIMIT, alerts): - db.insert_alerts(list(alert_group)) - - except Exception as e: - log.error("Failed to log alert", e) - - else: - print("No alerts to log.") - - -def log_failure(query_name, e, event_data=None, description=None): - if event_data is None: - event_data = f"The query '{query_name}' failed to execute with error: {e!r}" - - if description is None: - description = f"The query '{query_name}' failed to execute with error: {e!r}" - - alerts = [json.dumps({ - 'ALERT_ID': uuid.uuid4().hex, - 'QUERY_ID': '3a3d173a64ca4fcab2d13ac3e6d08522', - 'QUERY_NAME': 'Failure caught in AQR', - 'ENVIRONMENT': 'Queries', - 'SOURCES': ['Query Runner'], - 'ACTOR': 'Query Runner', - 'OBJECT': query_name, - 'ACTION': 'Query Execution', - 'TITLE': f'Error in {query_name}', - 'ALERT_TIME': str(datetime.datetime.utcnow()), - 'EVENT_TIME': str(datetime.datetime.utcnow()), - 'EVENT_DATA': event_data, - 'DESCRIPTION': description, - 'DETECTOR': 'Query Runner', - 'SEVERITY': 'High' - })] - try: - log_alerts(alerts) - log.info("Query failure logged.", e) - - except Exception as e: - log.error("Failed to log query failure", e) - - RUN_ALERT_QUERY = f""" CREATE TRANSIENT TABLE results.RUN_{RUN_ID}_{{query_name}} AS SELECT OBJECT_CONSTRUCT( @@ -169,7 +118,6 @@ def create_alerts(rule_name: str) -> Dict[str, Any]: db.execute(f"DROP TABLE results.RUN_{RUN_ID}_{rule_name}") except Exception as e: - log_failure(rule_name, e) db.record_metadata(metadata, table=QUERY_METADATA_TABLE, e=e) return metadata diff --git a/src/runners/alert_suppressions_runner.py b/src/runners/alert_suppressions_runner.py index 2ac5baed6..2cd7b0b15 100755 --- a/src/runners/alert_suppressions_runner.py +++ b/src/runners/alert_suppressions_runner.py @@ -1,7 +1,5 @@ #!/usr/bin/env python -import json -import uuid import datetime from typing import List @@ -41,64 +39,6 @@ METADATA_HISTORY: List = [] -def log_alerts(ctx, alerts): - """We don't usually log alerts in the suppression runner, but we want the runner to create an alert if a - suppression fails to execute. - """ - if len(alerts): - print("Recording alerts.") - format_string = ", ".join(["(%s)"] * len(alerts)) - try: - ctx.cursor().execute( - f''' - INSERT INTO results.alerts (alert_time, alert) - SELECT PARSE_JSON(column1):ALERT_TIME, - PARSE_JSON(column1) - FROM VALUES {format_string}; - ''', - alerts - ) - except Exception as e: - log.error("Failed to log alert", e) - else: - print("No alerts to log.") - - -def log_failure(suppression_name, e, event_data=None, description=None): - if event_data is None: - event_data = f"The suppression '{suppression_name}' failed to execute with error: {e}" - - if description is None: - description = f"The suppression '{suppression_name}' failed to execute with error: {e}" - - ctx = db.connect() - - alerts = [json.dumps({ - 'ALERT_ID': uuid.uuid4().hex, - 'QUERY_ID': 'b1d02051dd2c4d62bb75274f2ee5996a', - 'QUERY_NAME': 'Suppression Runner Failure', - 'ENVIRONMENT': 'Suppressions', - 'SOURCES': ['Suppression Runner'], - 'ACTOR': 'Suppression Runner', - 'OBJECT': suppression_name, - 'ACTION': 'Suppression Execution', - 'TITLE': 'Suppression Runner Failure', - 'EVENT_TIME': str(datetime.datetime.utcnow()), - 'ALERT_TIME': str(datetime.datetime.utcnow()), - 'DESCRIPTION': description, - 'DETECTOR': 'Suppression Runner', - 'EVENT_DATA': event_data, - 'SEVERITY': 'High', - })] - - try: - log_alerts(ctx, alerts) - log.error(f"{suppression_name} failure successfully logged", e) - - except Exception as e: - log.error("Failed to log suppression failure", e) - - def run_suppression_query(squelch_name): try: query = SUPPRESSION_QUERY.format(suppression_name=squelch_name) @@ -125,7 +65,6 @@ def run_suppressions(squelch_name): db.record_metadata(metadata, table=QUERY_METADATA_TABLE) except Exception as e: - log_failure(squelch_name, e) metadata['ROW_COUNT'] = {'SUPPRESSED': 0} db.record_metadata(metadata, table=QUERY_METADATA_TABLE, e=e) diff --git a/src/runners/baseline_runner.py b/src/runners/baseline_runner.py index 0c57a7577..a9c80c000 100644 --- a/src/runners/baseline_runner.py +++ b/src/runners/baseline_runner.py @@ -1,4 +1,5 @@ from typing import List, Dict, Any +import subprocess from runners.config import ( DATA_SCHEMA, @@ -9,6 +10,7 @@ from rpy2 import robjects as ro from rpy2.robjects import pandas2ri +import math import pandas import yaml @@ -24,8 +26,14 @@ def pack(data: List[Dict[Any, Any]]) -> Dict[Any, List[Any]]: return {k: [d.get(k) for d in data] for k in keys} +def nanToNone(x): + if type(x) is float and math.isnan(x): + return None + return x + + def unpack(data): - b = [[data[k][i] for i in data[k]] for k in data] + b = [[nanToNone(x) for x in v.values()] for v in data.values()] return list(zip(*b)) @@ -36,7 +44,6 @@ def query_log_source(source, time_filter, time_column): data = list(db.fetch(query)) except Exception as e: log.error("Failed to query log source: ", e) - f = pack(data) frame = pandas.DataFrame(f) pandas2ri.activate() @@ -61,11 +68,9 @@ def run_baseline(name, comment): with open(f"../baseline_modules/{code_location}/{code_location}.R") as f: r_code = f.read() - r_code = format_code(r_code, required_values) frame = query_log_source(source, time_filter, time_column) ro.globalenv['input_table'] = frame - output = ro.r(r_code) output = output.to_dict() @@ -77,11 +82,15 @@ def run_baseline(name, comment): log.error("Failed to insert the results into the target table", e) -def main(): +def main(baseline='%_BASELINE'): db.connect() - for table in db.fetch(f"show tables like '%_BASELINE' in {DATA_SCHEMA}"): + baseline_tables = list(db.fetch(f"show tables like '{baseline}' in {DATA_SCHEMA}")) + for table in baseline_tables: name = table['name'] comment = table['comment'] log.info(f'{name} started...') - run_baseline(name, comment) + if len(baseline_tables) > 1: + subprocess.call(f"python ./run.py baseline {name}", shell=True) + else: + run_baseline(name, comment) log.info(f'{name} done.') diff --git a/src/runners/handlers/jira.py b/src/runners/handlers/jira.py index 12335c535..9a0251ccb 100644 --- a/src/runners/handlers/jira.py +++ b/src/runners/handlers/jira.py @@ -91,7 +91,7 @@ def link_search_todos(description=None): return f'{URL}/issues/?jql={quote(q)}' -def create_jira_ticket(alert): +def create_jira_ticket(alert, assignee=None, custom_field=None): if not user: return @@ -106,7 +106,12 @@ def create_jira_ticket(alert): new_issue = jira.create_issue(project=PROJECT, issuetype={'name': 'Story'}, summary=alert['TITLE'], - description=body) + description=body,) + if custom_field: + new_issue.update(fields={custom_field['id']: {'value': custom_field['value']}}) + if assignee: + jira.assign_issue(new_issue, assignee) + return new_issue @@ -144,14 +149,12 @@ def bail_out(alert_id): log.error(e, f"Failed to update alert {alert_id} with handler status") -def handle(alert, correlation_id, project=PROJECT): +def handle(alert, correlation_id, project=PROJECT, assignee=None, custom_field=None): global PROJECT PROJECT = project if PROJECT == '': - log.error("No Jira project defined") return "No Jira Project defined" if URL == '': - log.error("No Jira URL defined.") return "No Jira URL defined." CORRELATION_QUERY = f""" @@ -191,7 +194,7 @@ def handle(alert, correlation_id, project=PROJECT): # There is no correlation with a ticket that exists # Create a new ticket in JIRA for the alert try: - ticket_id = create_jira_ticket(alert) + ticket_id = create_jira_ticket(alert, assignee, custom_field) except Exception as e: log.error(e, f"Failed to create ticket for alert {alert_id}") return e diff --git a/src/runners/handlers/slack.py b/src/runners/handlers/slack.py index 9f2f1f0cc..a71d262ff 100644 --- a/src/runners/handlers/slack.py +++ b/src/runners/handlers/slack.py @@ -20,7 +20,7 @@ def message_template(vars): try: # retrieve Slack message structure from javascript UDF rows = db.connect_and_fetchall( - "select " + vars['template'] + "(parse_json('" + json.dumps(params) + "'))") + "select " + vars['template'] + "(" + db.value_to_sql(params) + ")") row = rows[1] if len(row) > 0: diff --git a/src/runners/helpers/db.py b/src/runners/helpers/db.py index 6470c17d9..90287a2ff 100644 --- a/src/runners/helpers/db.py +++ b/src/runners/helpers/db.py @@ -309,6 +309,17 @@ def insert_violations_query_run(query_name, ctx=None) -> Tuple[int, int]: def value_to_sql(v): if type(v) is str: return f"'{v}'" + + if type(v) is dict: + v = json.dumps(v) + obj = ( + v.replace("'", "\\'") + .replace("\\n", "\\\\n") + .replace("\\t", "\\\\t") + .replace("\\\"", "\\\\\"") + ) + return f"parse_json('{obj}')" + return str(v) @@ -343,13 +354,12 @@ def record_metadata(metadata, table, e=None): record_type = metadata.get('QUERY_NAME', 'RUN') - metadata_json_sql = "'" + json.dumps(metadata).replace('\\', '\\\\').replace("'", "\\'") + "'" + metadata_json_sql = value_to_sql(metadata) sql = f''' INSERT INTO {table}(event_time, v) SELECT '{metadata['START_TIME']}' - , PARSE_JSON(column1) - FROM VALUES({metadata_json_sql}) + , {metadata_json_sql} ''' try: diff --git a/src/runners/run.py b/src/runners/run.py index 7cb152b07..60fa0d2d6 100644 --- a/src/runners/run.py +++ b/src/runners/run.py @@ -46,6 +46,9 @@ def main(target="all", rule_name=None): if rule_name.endswith("_CONNECTION"): connectors_runner.main(rule_name.upper()) + if rule_name.upper().endswith("_BASELINE"): + baseline_runner.main(rule_name.upper()) + else: log.info(f"STARTING RUN WITH ID {RUN_ID}") log.info(f"got command {target}") diff --git a/src/runners/tests/regression/SP1288_snowflake_url_parsing.py b/src/runners/tests/regression/SP1288_snowflake_url_parsing.py new file mode 100644 index 000000000..493a7d8ab --- /dev/null +++ b/src/runners/tests/regression/SP1288_snowflake_url_parsing.py @@ -0,0 +1,26 @@ +""" +The installer didn't handle snowflake URLs with a region containing '.', +such as azure_account.azure_region.azure.snowflakecomputing.com +""" + +from scripts.install import parse_snowflake_url + + +TEST_URLS = { + "account": ['account', None], + "account.snowflakecomputing.com": ['account', 'us-west-2'], + "account.region": ['account', 'region'], + "account.region.snowflakecomputing.com": ['account', 'region'], + "azure_account.azure_region.azure": ['azure_account', 'azure_region.azure'], + "azure_account.azure_region.azure.snowflakecomputing.com": ['azure_account', 'azure_region.azure'], + "https://account.snowflakecomputing.com": ['account', 'us-west-2'], + "https://account.region.snowflakecomputing.com": ['account', 'region'], + "https://azure_account.azure_region.azure.snowflakecomputing.com/console": ['azure_account', 'azure_region.azure'], +} + + +def test_url_parsing(): + for key, value in TEST_URLS.items(): + a, b = parse_snowflake_url(key) + assert a == value[0] + assert b == value[1] diff --git a/src/runners/tests/run_alerts.py b/src/runners/tests/run_alerts.py index 615a311f1..5a421affd 100644 --- a/src/runners/tests/run_alerts.py +++ b/src/runners/tests/run_alerts.py @@ -212,7 +212,7 @@ def test_alert_runners_processor_and_dispatcher(sample_alert_rules, update_jira_ assert len(query_rule_run_record) == 7 # 3 from samples + 4 test alert queries assert query_rule_run_record[0]['QUERY_NAME'] == 'ACTIVITY_BY_ADMIN_ALERT_QUERY' - queries_by_admin = 50 + queries_by_admin = 55 assert query_rule_run_record[0]['NUM_ALERTS_CREATED'] == queries_by_admin assert query_rule_run_record[1]['QUERY_NAME'] == 'SNOWFLAKE_LOGIN_WITHOUT_MFA_ALERT_QUERY' diff --git a/src/scripts/install.py b/src/scripts/install.py index e443978ad..236e942ec 100755 --- a/src/scripts/install.py +++ b/src/scripts/install.py @@ -155,9 +155,12 @@ def parse_snowflake_url(url): if len(c) == 1: account = c[0] else: - if path.endswith("snowflakecomputing.com"): - account = c[0] - region = c[1] if len(c) == 4 else 'us-west-2' + if c[-2:] == ['snowflakecomputing', 'com']: + c.pop(-1) + c.pop(-1) + + account = c.pop(0) + region = '.'.join(c) if len(c) > 0 else 'us-west-2' return account, region @@ -198,6 +201,9 @@ def login(configuration=None): else: print(f"Loaded account: '{account}'") + if not region: + region = input("Region of your Snowflake account [blank for us-west-2]: ") + if not username: print("Next, authenticate installer --") username = input("Snowflake username: ") @@ -209,9 +215,6 @@ def login(configuration=None): else: print(f"Loaded password: {'*' * len(password)}") - if not region: - region = input("Region of your Snowflake account [blank for us-west-2]: ") - connect_kwargs = {'user': username, 'account': account} if password == '': connect_kwargs['authenticator'] = 'externalbrowser' diff --git a/src/scripts/installer-queries/data-views.sql.fmt b/src/scripts/installer-queries/data-views.sql.fmt index 0914ca5e1..fd6d6678c 100644 --- a/src/scripts/installer-queries/data-views.sql.fmt +++ b/src/scripts/installer-queries/data-views.sql.fmt @@ -139,6 +139,7 @@ SELECT V:RUN_ID::VARCHAR AS run_id , V:START_TIME::TIMESTAMP AS start_time , V:END_TIME::TIMESTAMP AS end_time , V:ROW_COUNT.SUPPRESSED::INTEGER AS num_alerts_suppressed + , V:ERROR AS error FROM results.query_metadata WHERE V:QUERY_NAME ILIKE '%_ALERT_SUPPRESSION' ; @@ -190,6 +191,81 @@ SELECT V:RUN_ID::VARCHAR AS run_id , V:START_TIME::TIMESTAMP AS start_time , V:END_TIME::TIMESTAMP AS end_time , V:ROW_COUNT.SUPPRESSED::INTEGER AS num_violations_suppressed + , V:ERROR AS error FROM results.query_metadata WHERE V:QUERY_NAME ILIKE '%_VIOLATION_SUPPRESSION' ; + +CREATE OR REPLACE VIEW data.rule_views_to_titles_map COPY GRANTS + COMMENT='Maps rules views to their titles for easy joining' +AS +SELECT table_name AS view_name + , REGEXP_SUBSTR(comment, '^[^\\n]*', 1, 1, 'e') AS title_from_comment + , REGEXP_SUBSTR(view_definition, ', \'(.*\)\' AS title', 1, 1, 'e') AS title_field +FROM information_schema.views +WHERE table_schema='RULES' +; + +CREATE OR REPLACE VIEW data.alert_query_rule_run_errors COPY GRANTS + COMMENT='Alert Query rule runs joined on errors' +AS +SELECT start_time + , run_id + , title_field + , title_from_comment + , query_name + , REGEXP_REPLACE(error:EXCEPTION_ONLY::STRING, '\\n', ' ') AS exception +FROM data.alert_query_rule_runs runs +LEFT JOIN data.rule_views_to_titles_map map +ON runs.query_name=map.view_name +WHERE error IS NOT NULL +ORDER BY start_time DESC +; + +CREATE OR REPLACE VIEW data.alert_suppression_rule_run_errors COPY GRANTS + COMMENT='Alert Query rule runs joined on errors' +AS +SELECT start_time + , run_id + , title_field + , title_from_comment + , rule_name + , REGEXP_REPLACE(error:EXCEPTION_ONLY::STRING, '\\n', ' ') AS exception +FROM data.alert_suppression_rule_runs runs +LEFT JOIN data.rule_views_to_titles_map map +ON runs.rule_name=map.view_name +WHERE error IS NOT NULL +ORDER BY start_time DESC +; + +CREATE OR REPLACE VIEW data.violation_query_rule_run_errors COPY GRANTS + COMMENT='Violation Query rule runs joined on errors' +AS +SELECT start_time + , run_id + , title_field + , title_from_comment + , query_name + , REGEXP_REPLACE(error:EXCEPTION_ONLY::STRING, '\\n', ' ') AS exception +FROM data.violation_query_rule_runs runs +LEFT JOIN data.rule_views_to_titles_map map +ON runs.query_name=map.view_name +WHERE error IS NOT NULL +ORDER BY start_time DESC +; + +CREATE OR REPLACE VIEW data.violation_suppression_rule_run_errors COPY GRANTS + COMMENT='Violation Query rule runs joined on errors' +AS +SELECT start_time + , run_id + , title_field + , title_from_comment + , rule_name + , REGEXP_REPLACE(error:EXCEPTION_ONLY::STRING, '\\n', ' ') AS exception +FROM data.violation_suppression_rule_runs runs +LEFT JOIN data.rule_views_to_titles_map map +ON runs.rule_name=map.view_name +WHERE error IS NOT NULL +ORDER BY start_time DESC +; diff --git a/src/setup.py b/src/setup.py index 3ad0862a4..f3c96b0d0 100644 --- a/src/setup.py +++ b/src/setup.py @@ -24,6 +24,7 @@ 'azure-mgmt-subscription==0.4.1', 'azure-storage-blob==1.5.0', 'azure-storage-common==1.4.0', + 'pyTenable==0.3.22', 'boto3' ], ) diff --git a/src/webui/frontend/public/icons/connectors/tenable_settings.png b/src/webui/frontend/public/icons/connectors/tenable_settings.png new file mode 100644 index 000000000..f81439710 Binary files /dev/null and b/src/webui/frontend/public/icons/connectors/tenable_settings.png differ diff --git a/src/webui/frontend/src/components/GlobalHeader/GlobalHeader.tsx b/src/webui/frontend/src/components/GlobalHeader/GlobalHeader.tsx index 425e263ec..1793b213e 100644 --- a/src/webui/frontend/src/components/GlobalHeader/GlobalHeader.tsx +++ b/src/webui/frontend/src/components/GlobalHeader/GlobalHeader.tsx @@ -102,7 +102,7 @@ class GlobalHeader extends React.PureComponent { - SnowAlert v1.8.2 + SnowAlert v1.8.3