|
| 1 | +import os |
| 2 | +import requests |
| 3 | +import json |
| 4 | +import zipfile |
| 5 | +import io |
| 6 | +import glob |
| 7 | +import re |
| 8 | +from datetime import datetime |
| 9 | + |
| 10 | +def main(): |
| 11 | + GITHUB_REF=os.environ["GITHUB_REF"] |
| 12 | + GITHUB_REPOSITORY=os.environ["GITHUB_REPOSITORY"] |
| 13 | + GITHUB_RUN_ID=os.environ["GITHUB_RUN_ID"] |
| 14 | + GITHUB_API_URL=os.environ["GITHUB_API_URL"] |
| 15 | + GITHUB_WORKFLOWID=os.environ["INPUT_WORKFLOW_ID"] |
| 16 | + GITHUB_TOKEN = os.environ.get("INPUT_GITHUB_TOKEN") |
| 17 | + |
| 18 | + SPLUNK_HEC_URL=os.environ["INPUT_SPLUNK_URL"]+"services/collector/event" |
| 19 | + SPLUNK_HEC_TOKEN=os.environ["INPUT_HEC_TOKEN"] |
| 20 | + SPLUNK_SOURCE=os.environ["INPUT_SOURCE"] |
| 21 | + SPLUNK_SOURCETYPE=os.environ["INPUT_SOURCETYPE"] |
| 22 | + |
| 23 | + batch = count = 0 |
| 24 | + eventBatch = "" |
| 25 | + headers = {"Authorization": "Splunk "+SPLUNK_HEC_TOKEN} |
| 26 | + host=os.uname()[1] |
| 27 | + |
| 28 | + summary_url = f"{GITHUB_API_URL}/repos/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_WORKFLOWID}" |
| 29 | + |
| 30 | + # print("######################") |
| 31 | + # print(f"GITHUB_REF: {GITHUB_REF}") |
| 32 | + # print(f"GITHUB_REPOSITORY: {GITHUB_REPOSITORY}") |
| 33 | + # print(f"GITHUB_RUN_ID: {GITHUB_RUN_ID}") |
| 34 | + # print(f"GITHUB_API_URL: {GITHUB_API_URL}") |
| 35 | + # print(f"GITHUB_WORKFLOWID: {GITHUB_WORKFLOWID}") |
| 36 | + # print(f"GITHUB_TOKEN: {GITHUB_TOKEN}") |
| 37 | + # print(f"SPLUNK_HEC_URL: {SPLUNK_HEC_URL}") |
| 38 | + # print(f"SPLUNK_HEC_TOKEN: {SPLUNK_HEC_TOKEN}") |
| 39 | + # print(f"SPLUNK_SOURCE: {SPLUNK_SOURCE}") |
| 40 | + # print(f"SPLUNK_SOURCETYPE: {SPLUNK_SOURCETYPE}") |
| 41 | + # print(f"host: {host}") |
| 42 | + # print(f"headers: {headers}") |
| 43 | + # print(f"summary_url: {summary_url}") |
| 44 | + # print("######################") |
| 45 | + # for key, value in os.environ.items(): |
| 46 | + # print(f'{key}={value}') |
| 47 | + # print("######################") |
| 48 | + |
| 49 | + try: |
| 50 | + x = requests.get(summary_url, stream=True, auth=('token',GITHUB_TOKEN)) |
| 51 | + x.raise_for_status() |
| 52 | + except requests.exceptions.HTTPError as errh: |
| 53 | + output = "GITHUB API Http Error:" + str(errh) |
| 54 | + print(f"Error: {output}") |
| 55 | + print(f"::set-output name=result::{output}") |
| 56 | + return x.status_code |
| 57 | + except requests.exceptions.ConnectionError as errc: |
| 58 | + output = "GITHUB API Error Connecting:" + str(errc) |
| 59 | + print(f"Error: {output}") |
| 60 | + print(f"::set-output name=result::{output}") |
| 61 | + return x.status_code |
| 62 | + except requests.exceptions.Timeout as errt: |
| 63 | + output = "Timeout Error:" + str(errt) |
| 64 | + print(f"Error: {output}") |
| 65 | + print(f"::set-output name=result::{output}") |
| 66 | + return x.status_code |
| 67 | + except requests.exceptions.RequestException as err: |
| 68 | + output = "GITHUB API Non catched error conecting:" + str(err) |
| 69 | + print(f"Error: {output}") |
| 70 | + print(f"::set-output name=result::{output}") |
| 71 | + return x.status_code |
| 72 | + except Exception as e: |
| 73 | + print("Internal error", e) |
| 74 | + return x.status_code |
| 75 | + |
| 76 | + summary = x.json() |
| 77 | + |
| 78 | + summary.pop('repository') |
| 79 | + |
| 80 | + summary["repository"]=summary["head_repository"]["name"] |
| 81 | + summary["repository_full"]=summary["head_repository"]["full_name"] |
| 82 | + |
| 83 | + summary.pop('head_repository') |
| 84 | + |
| 85 | + utc_time = datetime.strptime(summary["updated_at"], "%Y-%m-%dT%H:%M:%SZ") |
| 86 | + epoch_time = (utc_time - datetime(1970, 1, 1)).total_seconds() |
| 87 | + |
| 88 | + event={'event':json.dumps(summary),'sourcetype':SPLUNK_SOURCETYPE,'source':'workflow_summary','host':host,'time':epoch_time} |
| 89 | + event=json.dumps(event) |
| 90 | + print(event) |
| 91 | + |
| 92 | + #x=requests.post(SPLUNK_HEC_URL, data=event, headers=headers) |
| 93 | + |
| 94 | + |
| 95 | + url = f"{GITHUB_API_URL}/repos/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_WORKFLOWID}/logs" |
| 96 | + print(url) |
| 97 | + |
| 98 | + try: |
| 99 | + x = requests.get(url, stream=True, auth=('token',GITHUB_TOKEN)) |
| 100 | + |
| 101 | + except requests.exceptions.HTTPError as errh: |
| 102 | + output = "GITHUB API Http Error:" + str(errh) |
| 103 | + print(f"Error: {output}") |
| 104 | + print(f"::set-output name=result::{output}") |
| 105 | + return |
| 106 | + except requests.exceptions.ConnectionError as errc: |
| 107 | + output = "GITHUB API Error Connecting:" + str(errc) |
| 108 | + print(f"Error: {output}") |
| 109 | + print(f"::set-output name=result::{output}") |
| 110 | + return |
| 111 | + except requests.exceptions.Timeout as errt: |
| 112 | + output = "Timeout Error:" + str(errt) |
| 113 | + print(f"Error: {output}") |
| 114 | + print(f"::set-output name=result::{output}") |
| 115 | + return |
| 116 | + except requests.exceptions.RequestException as err: |
| 117 | + output = "GITHUB API Non catched error conecting:" + str(err) |
| 118 | + print(f"Error: {output}") |
| 119 | + print(f"::set-output name=result::{output}") |
| 120 | + return |
| 121 | + |
| 122 | + z = zipfile.ZipFile(io.BytesIO(x.content)) |
| 123 | + log_folder = '/tmp' |
| 124 | + z.extractall(log_folder) |
| 125 | + |
| 126 | + timestamp = batch = count = 0 |
| 127 | + |
| 128 | + for name in glob.glob(f'{log_folder}/*.txt'): |
| 129 | + if os.path.basename(name).startswith('-'): |
| 130 | + continue |
| 131 | + logfile = open(os.path.join(os.path.dirname(os.path.abspath(__file__)), name.replace('./','')),'r') |
| 132 | + lines = logfile.readlines() |
| 133 | + count = 0 |
| 134 | + batch_number = 1 |
| 135 | + for line in lines: |
| 136 | + if line: |
| 137 | + count+=1 |
| 138 | + if timestamp: |
| 139 | + t2=timestamp |
| 140 | + timestamp = re.search(r"\d{4}-\d{2}-\d{2}T\d+:\d+:\d+\.\d+Z", line.strip()) |
| 141 | + |
| 142 | + if timestamp: |
| 143 | + timestamp = re.sub(r"\dZ","",timestamp.group()) |
| 144 | + timestamp = datetime.strptime(timestamp,"%Y-%m-%dT%H:%M:%S.%f") |
| 145 | + timestamp = (timestamp - datetime(1970,1,1)).total_seconds() |
| 146 | + else: |
| 147 | + timestamp=t2 |
| 148 | + |
| 149 | + # find empty lines and skip them |
| 150 | + x = re.sub(r"\d{4}-\d{2}-\d{2}T\d+:\d+:\d+.\d+Z","",line.strip()) |
| 151 | + x=x.strip() |
| 152 | + job_name=re.search(r"\/\d+\_(?P<job>.*)\.txt",name) |
| 153 | + job_name=job_name.group('job') |
| 154 | + fields = {'github_run_id':GITHUB_RUN_ID,'github_workflow_id':GITHUB_WORKFLOWID,'github_job_name':job_name,'line_number':count} |
| 155 | + if x: |
| 156 | + batch+=1 |
| 157 | + event={'event':x,'sourcetype':SPLUNK_SOURCETYPE,'source':SPLUNK_SOURCE,'host':host,'time':timestamp,'fields':fields} |
| 158 | + eventBatch=eventBatch+json.dumps(event) |
| 159 | + |
| 160 | + # push every 1000 log lines to splunk as a batch |
| 161 | + if batch>=1000: |
| 162 | + print(f'log_file={name}, batch_number={batch_number}, line_number={count}') |
| 163 | + batch=0 |
| 164 | + |
| 165 | + # x=requests.post(SPLUNK_HEC_URL, data=eventBatch, headers=headers) |
| 166 | + # print(f'log_file={name}, batch_number={batch_number}, line_number={count}, request_status_code:{x.status_code}') |
| 167 | + eventBatch="" |
| 168 | + batch_number+=1 |
| 169 | + break |
| 170 | + |
| 171 | + # push the last batch |
| 172 | + if batch>0: |
| 173 | + # print(f'log_file={name}, batch_number={batch_number}, line_number={count}') |
| 174 | + x=requests.post(SPLUNK_HEC_URL, data=eventBatch, headers=headers) |
| 175 | + print(f'log_file={name}, batch_number={batch_number}, line_number={count}, request_status_code:{x.status_code}') |
| 176 | + eventBatch="" |
| 177 | + |
| 178 | +if __name__ == '__main__': |
| 179 | + main() |
0 commit comments