-
Notifications
You must be signed in to change notification settings - Fork 0
/
get_cmorph2.py
184 lines (165 loc) · 6.06 KB
/
get_cmorph2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
#!/usr/bin/env python3
"""Get and set attributes on a satellite netcdf file."""
import argparse
import os
import urllib.request, urllib.parse, urllib.error, urllib.request, urllib.error, urllib.parse
import datetime
from html.parser import HTMLParser
from time import strftime
from time import sleep
from datetime import datetime, timedelta
##############################################################
class MyHTMLParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.satfile = []
self.record = False
self.fcnt = 0
def handle_starttag(self, tag, attrs):
""" look for start tag and turn on recording """
if tag == 'a':
#print "Encountered a url tag:", tag
self.record = True
#print "Encountered a start tag:", tag
def handle_endtag(self, tag):
""" look for end tag and turn on recording """
if tag == 'a':
#print "Encountered end of url tag :", tag
self.record = False
def handle_data(self, data):
""" handle data string between tags """
if verbose:
print("Found data line: ", data)
lines = data.splitlines()
for dline in lines:
#print "LINE: ",dline
# make sure line is not blank
if "CMORPH2" in dline[:8]:
self.satfile.append(dline)
##################
def _process_command_line():
"""Process the command line arguments.
Return an argparse.parse_args namespace object.
"""
parser = argparse.ArgumentParser()
parser.add_argument(
'-bh', '--backhrs', type=int, action='store', default=8,
help='num hrs back to search'
)
parser.add_argument(
'-v', '--verbose', action='store_true', help='verbose flag'
)
args = parser.parse_args()
return args
##############################################################3
def get_filepaths(directory):
""" generate the filenames in a directory tree by walking down
the tree. """
file_paths = []
for root, directories, files in os.walk(directory):
for filename in files:
# Join the two strings to form the full path
filepath = os.path.join(root,filename)
file_paths.append(filepath)
return file_paths
##############################################################3
def find_files(directory,matchfile):
""" generate the filenames in a directory tree by walking down
the tree. """
#print "directory={}".format(directory)
#print "matchfile={}".format(matchfile)
file_paths = []
for root, directories, files in os.walk(directory):
for filename in files:
if filename == matchfile:
return (1)
return 0
##############################################################3
def filesecs(filename):
""" decode filename for time """
fparts=filename.split('_')
fyr = int(fparts[2][:4])
fmo = int(fparts[2][4:6])
fda = int(fparts[2][6:8])
fhr = int(fparts[2][8:10])
fmn = int(fparts[2][10:12])
#print ("{} {} {} {} {}".format(fyr,fmo,fda,fhr,fmn))
filetime = datetime(fyr,fmo,fda,fhr,fmn)
return filetime
#####################################################################
def main():
"""Call to run fetching script."""
global verbose
args = _process_command_line()
verbose = args.verbose
now = datetime.utcnow()
nowdatestr = now.strftime("%Y%m%d%H%M")
starttime = now - (timedelta(hours=args.backhrs))
startyr = int(starttime.strftime("%Y"))
startmo = int(starttime.strftime("%m"))
startda = int(starttime.strftime("%d"))
starthr = int(starttime.strftime("%H"))
startmin = int(starttime.strftime("%M"))
starttime = datetime(startyr,startmo,startda,starthr,startmin)
endtime = now
endyr = int(endtime.strftime("%Y"))
endmo = int(endtime.strftime("%m"))
endda = int(endtime.strftime("%d"))
endhr = int(endtime.strftime("%H"))
endmin = int((int(endtime.strftime("%M")) / 30) * 30)
endtime = datetime(endyr,endmo,endda,endhr,endmin)
#This is the base url for access to CMORPH2.
baseurl = "https://ftp.cpc.ncep.noaa.gov/precip/PORT/NESDIS/CMORPH2_RT/.GINA/"
print("URL=",baseurl)
sock = urllib.request.urlopen (baseurl)
htmlSource = str(sock.read(),'UTF-8')
sock.close()
if args.verbose:
print("BEGIN HTML ==================================================")
print(htmlSource)
print("END HTML ====================================================")
rtnval = len(htmlSource)
print("HTML String length = {}".format(rtnval))
# instantiate the parser and feed it the HTML page
parser = MyHTMLParser()
parser.feed(htmlSource)
archivebase = "/mnt/noaa-case-study-data/cmorph2"
download = 0
for filename in parser.satfile:
if args.verbose:
print (filename)
fsecs = filesecs(filename)
if fsecs < starttime:
if args.verbose:
print("File too old. Skipping...".format(filename))
elif find_files(archivebase, filename):
if args.verbose:
print("File already downloaded: {} Skipping...".format(filename))
else:
try:
fullurl = "{}{}".format(baseurl, filename)
print("Requesting: {}".format(fullurl))
print("fullurl={}".format(fullurl))
response = urllib.request.urlopen(fullurl)
except urllib.error.HTTPError as e:
print('The server couldn\'t fulfill the request.')
print('Error code: ', e.code)
except urllib.error.URLError as e:
print('Failed to reach a server.')
print('Reason: ', e.reason)
else:
#open the file for writing
print(("Connected! Writing to: {}", filename))
fh = open(filename, "wb")
# read from request while writing to file
fh.write(response.read())
fh.close()
download += 1
<<<<<<< HEAD
print("{}{}Z Files downloaded: {}".format(endhr, endmin, download))
=======
print("{} Files downloaded: {}".format(nowdatestr, download))
>>>>>>> e43a1c1a4ab134d9b057ac4b3cad3979b68e1f9b
return
if __name__ == '__main__':
main()