88from datetime import datetime
99
1010def main ():
11- print ("######################" )
12-
1311 GITHUB_REF = os .environ ["GITHUB_REF" ]
1412 GITHUB_REPOSITORY = os .environ ["GITHUB_REPOSITORY" ]
1513 GITHUB_RUN_ID = os .environ ["GITHUB_RUN_ID" ]
@@ -29,20 +27,21 @@ def main():
2927
3028 summary_url = "{url}/repos/{repo}/actions/runs/{run_id}" .format (url = GITHUB_API_URL ,repo = GITHUB_REPOSITORY ,run_id = GITHUB_WORKFLOWID )
3129
32- print (f"GITHUB_REF: { GITHUB_REF } " )
33- print (f"GITHUB_REPOSITORY: { GITHUB_REPOSITORY } " )
34- print (f"GITHUB_RUN_ID: { GITHUB_RUN_ID } " )
35- print (f"GITHUB_API_URL: { GITHUB_API_URL } " )
36- print (f"GITHUB_WORKFLOWID: { GITHUB_WORKFLOWID } " )
37- print (f"GITHUB_TOKEN: { GITHUB_TOKEN } " )
38- print (f"SPLUNK_HEC_URL: { SPLUNK_HEC_URL } " )
39- print (f"SPLUNK_HEC_TOKEN: { SPLUNK_HEC_TOKEN } " )
40- print (f"SPLUNK_SOURCE: { SPLUNK_SOURCE } " )
41- print (f"SPLUNK_SOURCETYPE: { SPLUNK_SOURCETYPE } " )
42- print (f"host: { host } " )
43- print (f"headers: { headers } " )
44- print (f"summary_url: { summary_url } " )
45- print ("######################" )
30+ # print("######################")
31+ # print(f"GITHUB_REF: {GITHUB_REF}")
32+ # print(f"GITHUB_REPOSITORY: {GITHUB_REPOSITORY}")
33+ # print(f"GITHUB_RUN_ID: {GITHUB_RUN_ID}")
34+ # print(f"GITHUB_API_URL: {GITHUB_API_URL}")
35+ # print(f"GITHUB_WORKFLOWID: {GITHUB_WORKFLOWID}")
36+ # print(f"GITHUB_TOKEN: {GITHUB_TOKEN}")
37+ # print(f"SPLUNK_HEC_URL: {SPLUNK_HEC_URL}")
38+ # print(f"SPLUNK_HEC_TOKEN: {SPLUNK_HEC_TOKEN}")
39+ # print(f"SPLUNK_SOURCE: {SPLUNK_SOURCE}")
40+ # print(f"SPLUNK_SOURCETYPE: {SPLUNK_SOURCETYPE}")
41+ # print(f"host: {host}")
42+ # print(f"headers: {headers}")
43+ # print(f"summary_url: {summary_url}")
44+ # print("######################")
4645
4746 try :
4847 x = requests .get (summary_url , stream = True , auth = ('token' ,GITHUB_TOKEN ))
@@ -89,74 +88,90 @@ def main():
8988 x = requests .post (SPLUNK_HEC_URL , data = event , headers = headers )
9089
9190
92- url = "{url }/repos/{repo }/actions/runs/{run_id }/logs". format ( url = GITHUB_API_URL , repo = GITHUB_REPOSITORY , run_id = GITHUB_WORKFLOWID )
91+ url = f" { GITHUB_API_URL } /repos/{ GITHUB_REPOSITORY } /actions/runs/{ GITHUB_WORKFLOWID } /logs"
9392 print (url )
9493
95- # try:
96- # x = requests.get(url, stream=True, auth=('token',GITHUB_TOKEN))
97-
98- # except requests.exceptions.HTTPError as errh:
99- # output = "GITHUB API Http Error:" + str(errh)
100- # print(f"Error: {output}")
101- # print(f"::set-output name=result::{output}")
102- # return
103- # except requests.exceptions.ConnectionError as errc:
104- # output = "GITHUB API Error Connecting:" + str(errc)
105- # print(f"Error: {output}")
106- # print(f"::set-output name=result::{output}")
107- # return
108- # except requests.exceptions.Timeout as errt:
109- # output = "Timeout Error:" + str(errt)
110- # print(f"Error: {output}")
111- # print(f"::set-output name=result::{output}")
112- # return
113- # except requests.exceptions.RequestException as err:
114- # output = "GITHUB API Non catched error conecting:" + str(err)
115- # print(f"Error: {output}")
116- # print(f"::set-output name=result::{output}")
117- # return
118-
119- # z = zipfile.ZipFile(io.BytesIO(x.content))
120- # z.extractall('/app')
94+ try :
95+ x = requests .get (url , stream = True , auth = ('token' ,GITHUB_TOKEN ))
12196
122- # timestamp = batch = count = 0
123-
124- # for name in glob.glob('/app/*.txt'):
125- # logfile = open(os.path.join(os.path.dirname(os.path.abspath(__file__)), name.replace('./','')),'r')
126- # Lines = logfile.readlines()
127- # for line in Lines:
128-
129- # if line:
130- # count+=1
131- # if timestamp:
132- # t2=timestamp
133- # timestamp = re.search("\d{4}-\d{2}-\d{2}T\d+:\d+:\d+.\d+Z",line.strip())
134-
135- # if timestamp:
136- # timestamp = re.sub("\dZ","",timestamp.group())
137- # timestamp = datetime.strptime(timestamp,"%Y-%m-%dT%H:%M:%S.%f")
138- # timestamp = (timestamp - datetime(1970,1,1)).total_seconds()
139- # else:
140- # timestamp=t2
141-
142- # x = re.sub("\d{4}-\d{2}-\d{2}T\d+:\d+:\d+.\d+Z","",line.strip())
143- # x=x.strip()
144- # job_name=re.search("\/\d+\_(?P<job>.*)\.txt",name)
145- # job_name=job_name.group('job')
146- # fields = {'lineNumber':count,'workflowID':GITHUB_WORKFLOWID,'job':job_name}
147- # if x:
148- # batch+=1
149- # event={'event':x,'sourcetype':SPLUNK_SOURCETYPE,'source':SPLUNK_SOURCE,'host':host,'time':timestamp,'fields':fields}
150- # eventBatch=eventBatch+json.dumps(event)
151- # else:
152- # print("skipped line "+str(count))
153-
154- # if batch>=1000:
155- # batch=0
156- # x=requests.post(SPLUNK_HEC_URL, data=eventBatch, headers=headers)
157- # eventBatch=""
158-
159- # x=requests.post(SPLUNK_HEC_URL, data=eventBatch, headers=headers)
97+ except requests .exceptions .HTTPError as errh :
98+ output = "GITHUB API Http Error:" + str (errh )
99+ print (f"Error: { output } " )
100+ print (f"::set-output name=result::{ output } " )
101+ return
102+ except requests .exceptions .ConnectionError as errc :
103+ output = "GITHUB API Error Connecting:" + str (errc )
104+ print (f"Error: { output } " )
105+ print (f"::set-output name=result::{ output } " )
106+ return
107+ except requests .exceptions .Timeout as errt :
108+ output = "Timeout Error:" + str (errt )
109+ print (f"Error: { output } " )
110+ print (f"::set-output name=result::{ output } " )
111+ return
112+ except requests .exceptions .RequestException as err :
113+ output = "GITHUB API Non catched error conecting:" + str (err )
114+ print (f"Error: { output } " )
115+ print (f"::set-output name=result::{ output } " )
116+ return
117+
118+ z = zipfile .ZipFile (io .BytesIO (x .content ))
119+ # z.extractall('/app')
120+ log_folder = '/Users/ykoer/Workspace/ykoer/github-actions-example-workflows/.github/actions/log_to_splunk/tmp'
121+ z .extractall (log_folder )
122+
123+ timestamp = batch = count = 0
124+
125+ for name in glob .glob (f'{ log_folder } /*.txt' ):
126+ if os .path .basename (name ).startswith ('-' ):
127+ continue
128+ logfile = open (os .path .join (os .path .dirname (os .path .abspath (__file__ )), name .replace ('./' ,'' )),'r' )
129+ lines = logfile .readlines ()
130+ count = 0
131+ batch_number = 1
132+ for line in lines :
133+ if line :
134+ count += 1
135+ if timestamp :
136+ t2 = timestamp
137+ timestamp = re .search ("\d{4}-\d{2}-\d{2}T\d+:\d+:\d+.\d+Z" ,line .strip ())
138+
139+ if timestamp :
140+ timestamp = re .sub ("\dZ" ,"" ,timestamp .group ())
141+ timestamp = datetime .strptime (timestamp ,"%Y-%m-%dT%H:%M:%S.%f" )
142+ timestamp = (timestamp - datetime (1970 ,1 ,1 )).total_seconds ()
143+ else :
144+ timestamp = t2
145+
146+ # find empty lines and skip them
147+ x = re .sub ("\d{4}-\d{2}-\d{2}T\d+:\d+:\d+.\d+Z" ,"" ,line .strip ())
148+ x = x .strip ()
149+ job_name = re .search ("\/\d+\_(?P<job>.*)\.txt" ,name )
150+ job_name = job_name .group ('job' )
151+ fields = {'github_run_id' :GITHUB_RUN_ID ,'github_workflow_id' :GITHUB_WORKFLOWID ,'github_job_name' :job_name ,'line_number' :count }
152+ if x :
153+ batch += 1
154+ event = {'event' :x ,'sourcetype' :SPLUNK_SOURCETYPE ,'source' :SPLUNK_SOURCE ,'host' :host ,'time' :timestamp ,'fields' :fields }
155+ eventBatch = eventBatch + json .dumps (event )
156+ # else:
157+ # print("skipped line "+str(count))
158+
159+ # push every 1000 log lines to splunk as a batch
160+ if batch >= 1000 :
161+ batch = 0
162+
163+ x = requests .post (SPLUNK_HEC_URL , data = eventBatch , headers = headers )
164+ print (f'log_file={ name } , batch_number={ batch_number } , line_number={ count } , request_status_code:{ x .status_code } ' )
165+ eventBatch = ""
166+ batch_number += 1
167+ break
168+
169+ # push the last batch
170+ if batch > 0 :
171+ x = requests .post (SPLUNK_HEC_URL , data = eventBatch , headers = headers )
172+ print (f'log_file={ name } , batch_number={ batch_number } , line_number={ count } , request_status_code:{ x .status_code } ' )
173+ eventBatch = ""
174+ batch_number += 1
160175
161176if __name__ == '__main__' :
162177 main ()
0 commit comments