forked from kasperschmidt/idl
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathreadcol.pro
333 lines (292 loc) · 13.4 KB
/
readcol.pro
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
pro readcol,name,v1,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13,v14,v15, $
v16,v17,v18,v19,v20,v21,v22,v23,v24,v25,v26,v27,v28,v29,v30,$
v31,v32,v33,v34,v35,v36,v37,v38,v39,v40, COMMENT = comment, $
FORMAT = fmt, DEBUG=debug, SILENT=silent, SKIPLINE = skipline, $
NUMLINE = numline, DELIMITER = delimiter, NAN = NaN, $
PRESERVE_NULL = preserve_null, COUNT=ngood, NLINES=nlines, $
STRINGSKIP = skipstart
;+
; NAME:
; READCOL
; PURPOSE:
; Read a free-format ASCII file with columns of data into IDL vectors
; EXPLANATION:
; Lines of data not meeting the specified format (e.g. comments) are
; ignored. By default, columns may be separated by commas or spaces.
;
; Use READFMT to read a fixed-format ASCII file. Use RDFLOAT for
; much faster I/O (but less flexibility). Use FORPRINT to write
; columns of data (inverse of READCOL).
;
; CALLING SEQUENCE:
; READCOL, name, v1, [ v2, v3, v4, v5, ... v40 , COMMENT=, /NAN
; DELIMITER= ,FORMAT = , /DEBUG , /SILENT , SKIPLINE = , NUMLINE =
; COUNT =, STRINGSKIP=
;
; INPUTS:
; NAME - Name of ASCII data file, scalar string.
;
; OPTIONAL INPUT KEYWORDS:
; FORMAT - scalar string containing a letter specifying an IDL type
; for each column of data to be read. Allowed letters are
; A - string data, B - byte, O - long 64 bit integer, D - double precision,
; F- floating point, I - integer, L - longword, Z - longword hexadecimal,
; and X - skip a column.
;
; Columns without a specified format are assumed to be floating
; point. Examples of valid values of FMT are
;
; 'A,B,I' ;First column to read as a character string, then
; 1 column of byte data, 1 column integer data
; 'L,L,L,L' ;Four columns will be read as longword arrays.
; ' ' ;All columns are floating point
;
; If a FORMAT keyword string is not supplied, then all columns are
; assumed to be floating point.
;
; /SILENT - Normally, READCOL will display each line that it skips over.
; If SILENT is set and non-zero then these messages will be
; suppressed.
; /DEBUG - If this keyword is non-zero, then additional information is
; printed as READCOL attempts to read and interpret the file.
; COMMENT - single character specifying comment character. Any line
; beginning with this character will be skipped. Default is
; no comment lines.
; DELIMITER - Character(s) specifying delimiter used to separate
; columns. Usually a single character but, e.g. delimiter=':,'
; specifies that either a colon or comma as a delimiter.
; The default delimiter is either a comma or a blank.
; /NAN - if set, then an empty field will be read into a floating or
; double numeric variable as NaN; by default an empty field is
; converted to 0.0.
; /PRESERVE_NULL - If set, then spaces are considered to be valid fields,
; useful if the columns contain missing data. Note that between
; April and December 2006, /PRESERVE_NULL was the default.
; SKIPLINE - Scalar specifying number of lines to skip at the top of file
; before reading. Default is to start at the first line.
; NUMLINE - Scalar specifying number of lines in the file to read.
; Default is to read the entire file
; STRINGSKIP - will skip all lines that begin with the specified string.
; (Unlike COMMENT this can be more than 1 character.) Useful to
; skip over comment lines.
;
; OUTPUTS:
; V1,V2,V3,...V40 - IDL vectors to contain columns of data.
; Up to 40 columns may be read. The type of the output vectors
; are as specified by FORMAT.
;
; OPTIONAL OUTPUT KEYWORDS:
; COUNT - integer giving the number of valid lines actually read
; NLINES - integer giving the total number of lines in the file
; (as returned by FILE_LINES)
;
; EXAMPLES:
; Each row in a file position.dat contains a star name and 6 columns
; of data giving an RA and Dec in sexigesimal format. Read into IDL
; variables. (NOTE: The star names must not include the delimiter
; as a part of the name, no spaces or commas as default.)
;
; IDL> FMT = 'A,I,I,F,I,I,F'
; IDL> READCOL,'position.dat',F=FMT,name,hr,min,sec,deg,dmin,dsec
;
; The HR,MIN,DEG, and DMIN variables will be integer vectors.
;
; Alternatively, all except the first column could be specified as
; floating point.
;
; IDL> READCOL,'position.dat',F='A',name,hr,min,sec,deg,dmin,dsec
;
; To read just the variables HR,MIN,SEC
; IDL> READCOL,'position.dat',F='X,I,I,F',HR,MIN,SEC
;
; RESTRICTIONS:
; This procedure is designed for generality and not for speed.
; If a large ASCII file is to be read repeatedly, it may be worth
; writing a specialized reader.
;
; Columns to be read as strings must not contain the delimiter character
; (i.e. commas or spaces by default). Either change the default
; delimiter with the DELIMITER keyword, or use READFMT to read such files.
;
; Numeric values are converted to specified format. For example,
; the value 0.13 read with an 'I' format will be converted to 0.
;
; PROCEDURES CALLED
; GETTOK(), STRNUMBER()
; The version of STRNUMBER() must be after August 2006.
; NOTES:
; Under V6.1 or later, READCOL uses the SCOPE_VARFETCH function rather
; than EXECUTE(). This is faster and allows readcol.pro to be used
; in the IDL Virtual machine.
; REVISION HISTORY:
; Written W. Landsman November, 1988
; Modified J. Bloch June, 1991
; (Fixed problem with over allocation of logical units.)
; Added SKIPLINE and NUMLINE keywords W. Landsman March 92
; Read a maximum of 25 cols. Joan Isensee, Hughes STX Corp., 15-SEP-93.
; Call NUMLINES() function W. Landsman Feb. 1996
; Added DELIMITER keyword W. Landsman Nov. 1999
; Fix indexing typos (i for k) that mysteriously appeared W. L. Mar. 2000
; Hexadecimal support added. MRG, RITSS, 15 March 2000.
; Default is comma or space delimiters as advertised W.L. July 2001
; Faster algorithm, use STRSPLIT if V5.3 or later W.L. May 2002
; Accept null strings separated by delimiter ,e.g. ',,,'
; Use SCOPE_VARFETCH instead of EXECUTE() for >V6.1 W.L. Jun 2005
; Added compile_opt idl2 W. L. July 2005
; Added the NaN keyword W. L August 2006
; Added /PRESERVE_NULL keyword W.L. January 2007
; Assume since V5.6 (FILE_LINES available ) W.L. Nov 2007
; Added COUNT output keyword W.L. Aug 2008
; Added NLINES output keyword W.L. Nov 2008
; Added SKIPSTART keyword Stephane Beland January 2008
; Renamed SKIPSTART to STRINGSKIP to keep meaning of SKIP W.L. Feb 2008
; Assume since V6.1, SCOPE_VARFETCH available W.L. July 2009
; Read up to 40 columns W.L. Aug 2009
; 64 bit integer format added by K. B. Schmidt (MPIA) Sep 2009
; (for further formats see http://idlastro.gsfc.nasa.gov/idl_html_help/SIZE.html)
;-
On_error,2 ;Return to caller
compile_opt idl2
if not keyword_set(SILENT) then message,'== readcol.pro version 090924 ==',/INFORM
if N_params() lt 2 then begin
print,'Syntax - READCOL, name, v1, [ v2, v3,...v25, /NAN'
print,' FORMAT= ,/SILENT ,SKIPLINE =, NUMLINE = , /DEBUG, COUNT=]'
return
endif
; Get number of lines in file
nlines = FILE_LINES( name )
if nlines LE 0 then begin
message,'ERROR - File ' + name+' contains no data',/CON
return
endif
if keyword_set(DEBUG) then $
message,'File ' + name+' contains ' + strtrim(nlines,2) + ' lines',/INF
if not keyword_set( SKIPLINE ) then skipline = 0
nlines = nlines - skipline
if keyword_set( NUMLINE) then nlines = numline < nlines
if not keyword_set( SKIPSTART ) then begin
skipstart_flg=0
endif else begin
skipstart_flg=1
nskipstart = strlen(skipstart)
endelse
ncol = N_params() - 1 ;Number of columns of data expected
vv = 'v' + strtrim( indgen(ncol)+1, 2)
nskip = 0
if N_elements(fmt) GT 0 then begin ;FORMAT string supplied?
if size(fmt,/tname) NE 'STRING' then $
message,'ERROR - Supplied FORMAT keyword must be a scalar string'
; Remove blanks from format string
frmt = strupcase(strcompress(fmt,/REMOVE))
remchar, frmt, '(' ;Remove parenthesis from format
remchar, frmt, ')'
; Determine number of columns to skip ('X' format)
pos = strpos(frmt, 'X', 0)
while pos NE -1 do begin
pos = strpos( frmt, 'X', pos+1)
nskip = nskip + 1
endwhile
endif else begin ;Read everything as floating point
frmt = 'F'
if ncol GT 1 then for i = 1,ncol-1 do frmt = frmt + ',F'
if not keyword_set( SILENT ) then message, $
'Format keyword not supplied - All columns assumed floating point',/INF
endelse
nfmt = ncol + nskip
idltype = intarr(nfmt)
; Create output arrays according to specified formats
k = 0L ;Loop over output columns
hex = bytarr(nfmt)
for i = 0L, nfmt-1 do begin
fmt1 = gettok( frmt, ',' )
if fmt1 EQ '' then fmt1 = 'F' ;Default is F format
case strmid(fmt1,0,1) of
'A': idltype[i] = 7 ; string
'D': idltype[i] = 5 ; double precision
'F': idltype[i] = 4 ; floating point
'I': idltype[i] = 2 ; integer
'B': idltype[i] = 1 ; byte data
'O': idltype[i] = 14 ; long 64 bit integer
'L': idltype[i] = 3 ; longword
'Z': begin
idltype[i] = 3 ;Hexadecimal
hex[i] = 1b
end
'X': idltype[i] = 0 ;IDL type of 0 ==> to skip column
ELSE: message,'Illegal format ' + fmt1 + ' in field ' + strtrim(i,2)
endcase
; Define output arrays
if idltype[i] GT 0 then begin
(SCOPE_VARFETCH(vv[k], LEVEL=0)) = make_array(nlines,TYPE = idltype[i])
k = k+1
endif
endfor
goodcol = where(idltype)
idltype = idltype[goodcol]
check_numeric = (idltype NE 7 AND idltype NE 14) ; treating lon64int as non numberic value to prevent
; the conversion into a real*8 by strnumber below
openr, lun, name, /get_lun
ngood = 0L
temp = ' '
skip_lun,lun,skipline, /lines
if not keyword_set(delimiter) then delimiter = ' ,'
for j = 0L, nlines-1 do begin
readf, lun, temp
if skipstart_flg then begin
; requested to skip lines starting with specifc string
if strmid(temp,0,nskipstart) eq skipstart then begin
ngood = ngood-1
goto, BADLINE
endif
endif
if strlen(temp) LT ncol then begin ;Need at least 1 chr per output line
ngood = ngood-1
if not keyword_set(SILENT) then $
message,'Skipping Line ' + strtrim(skipline+j+1,2),/INF
goto, BADLINE
endif
k = 0
temp = strtrim(temp,1) ;Remove leading spaces
if keyword_set(comment) then if strmid(temp,0,1) EQ comment then begin
ngood = ngood-1
if keyword_set(DEBUG) then $
message,'Skipping Comment Line ' + strtrim(skipline+j+1,2),/INF
goto, BADLINE
endif
var = strsplit(strcompress(temp),delimiter,/extract, preserve=preserve_null)
if N_elements(var) LT nfmt then begin
if not keyword_set(SILENT) then $
message,'Skipping Line ' + strtrim(skipline+j+1,2),/INF
ngood = ngood-1
goto, BADLINE ;Enough columns?
endif
var = var[goodcol]
for i = 0L,ncol-1 do begin
if check_numeric[i] then begin ;Check for valid numeric data
tst = strnumber(var[i],val,hex=hex[i],NAN=nan) ;Valid number?
if tst EQ 0 then begin ;If not, skip this line
if not keyword_set(SILENT) then $
message,'Skipping Line ' + strtrim(skipline+j+1,2),/INF
ngood = ngood-1
goto, BADLINE
endif
(SCOPE_VARFETCH(vv[k], LEVEL=0))[ngood] = val
endif else $
(SCOPE_VARFETCH(vv[k], LEVEL=0))[ngood] = var[i]
k = k+1
endfor
BADLINE: ngood = ngood+1
endfor
free_lun,lun
if ngood EQ 0 then begin
message,'ERROR - No valid lines found for specified format',/INFORM
return
endif
if not keyword_set(SILENT) then $
message,strtrim(ngood,2) + ' valid lines read', /INFORM
; Compress arrays to match actual number of valid lines
if ngood lt Nlines then for i=0,ncol-1 do $
(SCOPE_VARFETCH(vv[i], LEVEL=0)) = $
(SCOPE_VARFETCH(vv[i], LEVEL=0))[0:ngood-1]
return
end