-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSAScode_PJT1.sas
222 lines (186 loc) · 5.71 KB
/
SAScode_PJT1.sas
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
/*Load data*/
PROC IMPORT OUT= WORK.kickstarter
DATAFILE= 'C:\Users\exn121330\HW 1 Data\DATA.csv'
DBMS=CSV REPLACE;
GETNAMES=YES;
DATAROW=2;
proc print data=WORK.kickstarter;
title1 'Kickstarter Projects';
PROC CONTENTS;
RUN;
/*Analysis*/
/*General Statistical Analysis and Percents*/
proc sql;
create table scsl_fld_cncl_prjcts as
select *
from work.kickstarter
where state = 'successful' OR state = 'failed' OR state = 'canceled';
proc means data= scsl_fld_cncl_prjcts n mean median stddev min max p25 p75 maxdec= 2;
var USD_pledged_real USD_goal_real;
title 'Summary Statistics';
run;
proc tabulate data= scsl_fld_cncl_prjcts;
class Main_Category State;
var USD_pledged_real USD_goal_real;
table Main_Category,State*(USD_pledged_real USD_goal_real)*(N Mean Median StdDev Min Max p25 p75);
title 'Summary Statistics';
run;
proc sql;
create table categories_state as
select Main_Category, COUNT(Main_Category) as Projects, State
from scsl_fld_cncl_prjcts
group by Main_Category, State;
/*Pledged Real Amounts*/
proc sql;
create table categories_pledged as
select Main_Category, SUM(USD_pledged_real) as Total_Pledged_Real, State
from scsl_fld_cncl_prjcts
group by Main_Category, State;
proc sql;
create table pledged_success as
select Main_Category, Total_Pledged_Real, State
from categories_pledged
where state = 'successful';
data pledged_success_fee;
set pledged_suc_sort;
Fee_on_Funds_Raised = Total_Pledged_Real * .05;
run;
/*Successful projects*/
proc sql;
create table successful_projects as
select *
from work.kickstarter
where state= 'successful';
proc sql;
create table successful_projects_count as
select COUNT(*)
from work.kickstarter
where state= 'successful';
proc means data= successful_projects n mean median stddev min max p25 p75 maxdec= 2;
var USD_pledged_real USD_goal_real;
title 'Summary Statistics of Successful Projects';
run;
/*Failed projects*/
proc sql;
create table failed_projects as
select *
from work.kickstarter
where state= 'failed';
proc sql;
create table failed_projects_count as
select COUNT(*)
from work.kickstarter
where state= 'failed';
proc means data= failed_projects n mean median stddev min max p25 p75 maxdec= 2;
var USD_pledged_real USD_goal_real;
title 'Summary Statistics of Failed Projects';
run;
/*Canceled projects*/
proc sql;
create table canceled_projects as
select *
from work.kickstarter
where state= 'canceled';
proc sql;
create table canceled_projects_count as
select COUNT(*)
from work.kickstarter
where state= 'canceled';
proc means data= canceled_projects n mean median stddev min max p25 p75 maxdec= 2;
var USD_pledged_real USD_goal_real;
title 'Summary Statistics of Canceled Projects';
run;
/*Year Variable and 2014-2018 Data Sets*/
data prjct_yrs;
set scsl_fld_cncl_prjcts;
Deadline_Year = year(deadline);
run;
proc sql;
create table scsl_fld_cncl_prjcts_yrs as
select *
from prjct_yrs
where Deadline_Year = 2014 OR Deadline_Year = 2015 OR Deadline_Year = 2016 OR Deadline_Year = 2017 OR Deadline_Year = 2018;
/*Historgrams and Normality*/
proc sgplot data= scsl_fld_cncl_prjcts_yrs;
histogram usd_pledged_real / binstart = 0 binwidth = 200000 ;
density usd_pledged_real / type = kernel;
density usd_pledged_real;
title 'USD Pledged Real';
run;
proc sgplot data= scsl_fld_cncl_prjcts_yrs;
histogram usd_goal_real / binstart = 0 binwidth = 1000000 ;
density usd_goal_real / type = kernel;
density usd_goal_real;
title 'USD Goal Real';
run;
data usd_project_success;
set scsl_fld_cncl_prjcts_yrs;
Pledged_Goal_Real_Diff = usd_pledged_real - usd_goal_real;
run;
proc sgplot data= usd_project_success;
histogram Pledged_Goal_Real_Diff ;
density Pledged_Goal_Real_Diff / type = kernel;
density Pledged_Goal_Real_Diff;
title 'USD Goal Real';
run;
/*Tests for Normality*/
proc univariate normal plot data= usd_project_success alpha=0.05;
var usd_pledged_real;
title 'USD Pledged Real Normality Test';
run;
proc univariate normal plot data= usd_project_success alpha=0.05;
var usd_goal_real;
title 'USD Pledged Real Normality Test';
run;
proc univariate normal plot data= usd_project_success alpha=0.05;
var Pledged_Goal_Real_Diff;
title 'USD Pledged Real Normality Test';
run;
/*Correlation and Causation*/
data new_launch;
set usd_project_success;
launch_date = datepart(launched);
format launch_date mmddyy10.;
run;
data l_d_duration;
set new_launch;
date1=launch_date;
date2=deadline;
days=intck('day', date1, date2);
put days=;
run;
proc corr data=l_d_duration spearman;
var usd_goal_real usd_pledged_real backers days;
title 'Study Spearman Correlation Test';
run;
/*Duration averages*/
proc sql;
create table prjcts_duration as
select State, COUNT(ID) as Number_of_Projects, SUM(days) as Total_Days_Btwn_Launch_Deadline, AVG(days) as Average_Duration
from l_d_duration
group by State;
/*Failed and Successful Comparison and T-test*/
proc sort data= l_d_duration;
by Deadline_Year state;
run;
proc sql;
create table fail_success_pjcts as
select *
from l_d_duration
where state = 'failed' OR state = 'successful';
proc means data= fail_success_pjcts mean maxdec= 2 noprint;
by Deadline_Year state;
var usd_pledged_real usd_goal_real;
output out=means2
mean= AvgPledgedReal AvgGoalReal;
run;
proc sgpanel data= means2;
panelby state;
series x=Deadline_Year y=AvgPledgedReal;
series x=Deadline_Year y=AvgGoalReal;
title 'Average USD Goal Real and USD Pledged Real for Failed and Successful Projects Over Time';
run;
proc ttest data=means2 sides=L alpha=0.05;
class state;
var AvgGoalReal;
run;