-
Notifications
You must be signed in to change notification settings - Fork 96
/
codebook.sas
236 lines (202 loc) · 8.11 KB
/
codebook.sas
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
%macro codebook(
data=,
library=work,
maxfmts=0,
def_othr=yes,
w_label=20,
w_format=20,
w_raw=9,
addvalue=yes,
sortby=name, /* sorby: name or varnum */
out=codebook ) ;
/*------------------------------------------------------------------
This macro creates a codebook for a SAS data set, describing
each of the variables in the data set, its length, type, label,
user-defined format, and the possible values it can have based on
the format assigned to it. It does this by combining the output
of a PROC CONTENTS and a PROC FORMATS and printing out the
results using PROC REPORT.
The macro has the following arguments:
data Identifies the SAS data set to document.
library Identifies the libname that is assigned to the format
library holding the formats for the data.
maxfmts Restricts the number of entries of output for each
variable. If more are to be printed, a note is
displayed indicating as such. Zero means all entries
print.
def_othr Designates which formats do not have a formatted value
for 'other' values. Is not in effect if addvalue=no.
w_label Defines the width of the field holding the variable
label. Labels in excess of this width will wrap.
w_format Defines the width of the formated values for each
variable.
addvalue Determines whether unformatted values of each variable
will display in addition to the formatted values.
w_raw Defines the width of the unformatted values in the
output.
out Names the output data set
Note: By manipulating the PAGESIZE and LINESIZE options of your
SAS session, as well as the W_LABEL, W_FORMAT and W_RAW you
can orient the output to either a landscape or portrait
mode.
Example :
%codebook(data=sas.data,library=library, def_othr=yes,
maxfmts=5, w_label=40,w_format=20, addvalue=yes, w_raw=9)
*-------------------------------------------------------------------
CODEBOOK (was DOCSPGM)
Written By : Steve James [email protected]
Last Updated 1/22/1997
Modified By : Michael Friendly [email protected]
Last Updated 16 Feb 1999 10:11:33
*-------------------------------------------------------------------*/
%let dsn = %scan(&data,2,'.') ;
%if &dsn =
%then %do ;
%let libname = work ;
%let dsn = &data ;
%end ;
%else %let libname = %scan(&data,1,'.') ;
options pageno=1 nonotes;
proc format ;
value vartype 1='Num' 2='Char' ;
run;
*-------------------------------------------------------------------;
* Now create the data set that contains the formats and their ;
* possible values by taking the output from a PROC FORMAT. ;
*-------------------------------------------------------------------;
proc format library=&library
cntlout=formats(rename=(fmtname=format label=fmtlabel type=ftype)
keep=fmtname type label start end sexcl eexcl hlo
where=(ftype in ('N','C'))) ;
run ;
*-------------------------------------------------------------------;
* Create a data set with a list of all the variables and any ;
* user-defined format names. ;
*-------------------------------------------------------------------;
proc contents data=&data noprint
out=vars(keep=name type format label length engine nobs varnum) ;
run ;
*---------------------------------------------------------------------;
* Combine the two data sets by format name. PROC SQL is because MERGE;
* will not handle the case where the same format is used for more than;
* one variable, and there is no need to strip the leading '$' from the;
* name of character formats. ;
*---------------------------------------------------------------------;
proc sql ;
create table &out as
select vars.name, vars.type, vars.format, vars.label, vars.length,
vars.engine, formats.start, vars.varnum,
formats.end, formats.fmtlabel,
vars.nobs, formats.sexcl, formats.eexcl, formats.hlo
from vars left join formats
on scan(vars.format,1,'$') = formats.format
order by &sortby, start
;
data &out ;
length format $10 rawvalue $&w_raw fmtlabel $&w_format
connect $3 typelen $9 ;
set &out end=eof;
by &sortby ;
drop engine nobs start end sexcl eexcl hlo;
*-- determine maximum label width;
retain maxlab maxfmt 0;
maxlab = max(maxlab, length(label));
maxfmt = max(maxfmt, length(fmtlabel));
if eof then do;
call symput('maxlab',left(trim(put(maxlab,8.))));
call symput('maxfmt',left(trim(put(maxfmt,8.))));
end;
if type = 1 then typelen = 'Num:' || left(put(length,3.));
else typelen = 'Char:' || left(put(length,3.));
if format = ' ' then format ='* None *' ;
if start = end then rawvalue = trim(left(start)) ;
else do ;
connect = '-' ;
if sexcl = 'Y' and eexcl='Y' then connect = '<-<' ;
else if sexcl = 'Y' then connect = '<-' ;
else if eexcl = 'Y' then connect = '-<' ;
rawvalue = trim(left(start)) || trim(left(connect))
|| trim(left(end)) ;
end ;
*----------------------------------------------------;
* If MAXFMTS is specified, count the number of ;
* entries for each variable and delete those past the;
* limit. ;
*----------------------------------------------------;
if &maxfmts > 0
then do ;
if first.name then count= 0 ;
count+1 ;
if count = &maxfmts and not last.name
then fmtlabel = '... and more ...' ;
else if count > &maxfmts then delete ;
end ;
output ;
%if %upcase(&def_othr) = YES and %upcase(&addvalue)=YES
%then %do ;
if last.name and start ne '**OTHER**' and format ne '* None *'
then do ;
rawvalue = '**OTHER**' ;
fmtlabel = '*No Format*' ;
output ;
end ;
%end ;
run;
%if %length(w_label)=0 %then %let w_label = &maxlab;
%if &maxlab < &w_label %then %let w_label = &maxlab;
*-------------------------------------------------------------------;
* The number of observations in the data set VARS equals the number ;
* of variables in the data set being documented. Also in that ;
* data set is the number of observations. Get that information ;
* about the data set and assign it and other known data to macro ;
* variables to be used later as a header. ;
*-------------------------------------------------------------------;
%if &sysver > 6.09 %then %do;
data _null_ ;
set sashelp.vslib ;
if libname = upcase("&libname")
then do ;
call symput('path',trim(left(path))) ;
stop ;
end ;
run ;
%end;
%else %let path=&libname;
data header ;
length heading $25 data $80 ;
set vars point=_n_ nobs=numvars ;
%headout(Host Path or Library Name, upcase("&path")) ;
%headout(SAS Data Set Name, upcase("&dsn"));
%headout(SAS Data Engine, engine) ;
%headout(Number of Variables, left(put(numvars,comma5.))) ;
%headout(Number of Observations, left(put(nobs,comma9.)));
stop ;
run ;
proc report data=header nowd headskip ;
column heading data ;
define heading / display ' ' ;
define data / display ' ' width=40 flow ;
run ;
*-------------------------------------------------------------------;
* Now use PROC REPORT to print out the data. ;
*-------------------------------------------------------------------;
%let print = ;
proc report data = &out nowindows headskip headline missing split='/' ;
column name typelen label format rawvalue fmtlabel ;
define name / Group width=8 'Variable/Name' ;
define typelen / Group width=9 'Type:/Length' left ;
define label / Group width=&w_label 'Label'
%if &maxfmts < 40 %then flow ;;
define format / Group width=10 'Format/Name' ;
define rawvalue / group 'Raw/Value' width=&w_raw
%if %upcase(&addvalue) eq NO %then noprint ;
;
define fmtlabel / group width=&w_format 'Possible/Values' ;
break after name / suppress skip ;
run ;
options notes;
%mend ;
%macro headout(heading, data);
heading="&heading"; data=&data;
output;
%mend;