diff --git a/Shayan_Ishaq_karachi_R_Assigment2.R b/Shayan_Ishaq_karachi_R_Assigment2.R new file mode 100644 index 0000000..d8b2dbc --- /dev/null +++ b/Shayan_Ishaq_karachi_R_Assigment2.R @@ -0,0 +1,142 @@ +library(dplyr) +library(readr) +library(tidyr) + + + +#Q1 + +Datasethosp <- read.csv("E:\\DIH\\hospitaldata.csv", stringsAsFactors = F, strip.white = T) +names(Datasethosp) <- gsub("\\.", "", names(Datasethosp)) +df <- tbl_df(Datasethosp) +names(Datasethosp) +View(df) + +df$Age <-as.numeric(df$Age) +df[is.na(df$Age),"Age"]<-0 +df$TotalCharges <-as.numeric(df$TotalCharges) +df[is.na(df$TotalCharges),"TotalCharges"]<-0 + +#Q2 + +class(df$Date) +df$Date <- as.Date(strptime(df$Date, "%a, %B %d, %Y")) +weekdays(df$Date[which(table(df$Date) == max(table(df$Date)))]) + + +#Q3 + +v_age <- as.numeric(df$Age) +v_age[which(is.na(as.numeric(as.character(v_age))))]<-0 +v_age +mean(v_age) + +#Q4 + +X <- select(df, Age) +top_counts <- filter(Datasethosp, X<= 12) + + + +#Q5 +df%>% + count(Sex, Procedure) %>% + slice(which.max(n)) + + + # Qs : 6 + x <- + df %>% + filter(ConsultingDoctor !='Nursing Staff' ,!is.na(AmountReceived)) %>% + group_by(ConsultingDoctor) %>% + summarize(tot_con_doc_wise_amt = sum(AmountReceived)) %>% + filter(tot_con_doc_wise_amt==max(tot_con_doc_wise_amt)) + + +# Qs : 7 +x <- df %>% + group_by(Procedure) %>% + summarize(tot_pro_wise_amt = sum(AmountReceived)) %>% + filter(!is.na(tot_pro_wise_amt)) %>% + filter(tot_pro_wise_amt == max(tot_pro_wise_amt)) + + +# Qs : 8 +x <- df %>% + filter(!is.na(Time), Time != '-') %>% + group_by(Time) %>% + summarize(time_wise_cnt = n()) %>% + filter(Time != '') %>% + filter(time_wise_cnt == max(time_wise_cnt)) + + +#Qs : 9 + +# Qs : 10 +x <- df %>% + group_by(id)%>% + summarize(pat_wise_cnt = n()) %>% + filter(pat_wise_cnt > 1) %>% + summarize(tot_rep_vis = n()) + + +# Qs : 11 +x <- df %>% + group_by(id)%>% + summarize(pat_wise_cnt = n()) %>% + filter(pat_wise_cnt > 1) %>% + arrange(desc(pat_wise_cnt)) + + +# Qs : 12 +x <- df %>% + group_by(id, Procedure)%>% + summarize(pat_wise_cnt = n()) %>% + filter(pat_wise_cnt > 1) %>% + # summarize(tot_rep_vis = n()) + arrange(id) + + +# Qs : 13 +x <- df %>% + filter(!is.na(Sex), Sex!='-', !is.na(Age), Age!='-') %>% + group_by(Sex) %>% + summarize(M_mean=mean(parse_number(Age))) + +# Qs : 14 +x <- df$AmountBalance +x <- as.numeric(parse_number(x)) +x <- as.numeric(x) +x[which(is.na(as.numeric(as.character(x))))]<-0 +x = sum(x) + +x <- df %>% + filter(!is.na(AmountBalance), AmountBalance!= '-') %>% + summarize(M_mean=sum(parse_number(AmountBalance))) + + +# Qs : 15 +x <- df %>% + filter( Procedure == 'Consultation', !is.na(AmountReceived), AmountReceived!= '-') %>% + group_by(Procedure) %>% + summarize(tot_pro_wise_amt = sum(AmountReceived)) + +# Qs : 16 +cor(df$Age,df$TotalCharges) + +# Qs : 17 +x <- df %>% + filter(!is.na(Age), Age!='-') %>% + group_by(Age) %>% + summarize(Age_wise_cnt=n()) %>% + filter(Age!='') %>% + filter(Age_wise_cnt == max(Age_wise_cnt)) + + +# Qs : 18 +x <- df %>% + filter( Procedure == 'X Ray'|Procedure == 'Scalling', !is.na(AmountReceived), AmountReceived!= '-') %>% + group_by(Procedure) %>% + summarize(proc_wise_tot = sum(AmountReceived)) + + diff --git a/Shayan_Ishaq_karachi_R_Assigment2.html b/Shayan_Ishaq_karachi_R_Assigment2.html new file mode 100644 index 0000000..38e5229 --- /dev/null +++ b/Shayan_Ishaq_karachi_R_Assigment2.html @@ -0,0 +1,338 @@ + + + + +
+ + + + + + + + + +library(dplyr)
+##
+## Attaching package: 'dplyr'
+## The following objects are masked from 'package:stats':
+##
+## filter, lag
+## The following objects are masked from 'package:base':
+##
+## intersect, setdiff, setequal, union
+library(readr)
+library(tidyr)
+
+
+
+#Q1
+
+Datasethosp <- read.csv("E:\\DIH\\hospitaldata.csv", stringsAsFactors = F, strip.white = T)
+names(Datasethosp) <- gsub("\\.", "", names(Datasethosp))
+df <- tbl_df(Datasethosp)
+names(Datasethosp)
+## [1] "Date" "id" "Time"
+## [4] "Age" "Sex" "ConsultingDoctor"
+## [7] "Specialty" "Procedure" "TotalCharges"
+## [10] "AmountReceived" "AmountBalance" "AmountReceivedBy"
+## [13] "AmountinHospital" "ReceptionistName" "NextApt"
+View(df)
+
+df$Age <-as.numeric(df$Age)
+## Warning: NAs introduced by coercion
+df[is.na(df$Age),"Age"]<-0
+df$TotalCharges <-as.numeric(df$TotalCharges)
+## Warning: NAs introduced by coercion
+df[is.na(df$TotalCharges),"TotalCharges"]<-0
+
+#Q2
+
+class(df$Date)
+## [1] "character"
+df$Date <- as.Date(strptime(df$Date, "%a, %B %d, %Y"))
+weekdays(df$Date[which(table(df$Date) == max(table(df$Date)))])
+## [1] "Monday"
+#Q3
+
+v_age <- as.numeric(df$Age)
+v_age[which(is.na(as.numeric(as.character(v_age))))]<-0
+v_age
+## [1] 40 26 30 40 27 40 43 28 2 40 32 28 76 75 36 42 23 48 25 50 60 0 0
+## [24] 26 27 0 57 0 39 6 26 60 26 5 40 9 0 30 40 29 39 50 50 50 60 29
+## [47] 48 45 42 29 26 34 50 45 30 40 40 29 27 23 60 52 21 32 58 0 0 0 26
+## [70] 40 29 33 30 28 10 26 19 53 30 10 0 55 26 30 0 0 0 0 0 9 0 28
+## [93] 47 49 19 31 7 26 8 28 17 54 35 45 30 30 23 27 6 2 30 32 25 80 70
+## [116] 20 23 60 50 30 13 30 13 13 39 28 28 17 27 57 6 18 25 50 0 19 39 6
+## [139] 40 39 20 0 14 0 0 24 3 23 29 55 30 0 30 3 0 13 10 30 45 3 38
+## [162] 20 2 3 0 23 26 35 22 65 45 21 30 64 40 52 55 54 30 53 0 38 30 17
+## [185] 3 17 0 0 30 17 26 0 45 17 22 30 17 38 17 34 28 78 56 53 21 17 0
+## [208] 78 3 76 9 32 0 0 24 3 76 0 39 30 76 45
+mean(v_age)
+## [1] 28.31081
+#Q4
+
+X <- select(df, Age)
+top_counts <- filter(Datasethosp, X<= 12)
+
+
+
+#Q5
+df%>%
+ count(Sex, Procedure) %>%
+ slice(which.max(n))
+## Source: local data frame [5 x 3]
+## Groups: Sex [5]
+##
+## Sex Procedure n
+## <chr> <chr> <int>
+## 1 Pharmacy 10
+## 2 - Consultation 1
+## 3 f Consultation 1
+## 4 F Consultation 45
+## 5 M Consultation 37
+ # Qs : 6
+ x <-
+ df %>%
+ filter(ConsultingDoctor !='Nursing Staff' ,!is.na(AmountReceived)) %>%
+ group_by(ConsultingDoctor) %>%
+ summarize(tot_con_doc_wise_amt = sum(AmountReceived)) %>%
+ filter(tot_con_doc_wise_amt==max(tot_con_doc_wise_amt))
+
+
+# Qs : 7
+x <- df %>%
+ group_by(Procedure) %>%
+ summarize(tot_pro_wise_amt = sum(AmountReceived)) %>%
+ filter(!is.na(tot_pro_wise_amt)) %>%
+ filter(tot_pro_wise_amt == max(tot_pro_wise_amt))
+
+
+# Qs : 8
+x <- df %>%
+ filter(!is.na(Time), Time != '-') %>%
+ group_by(Time) %>%
+ summarize(time_wise_cnt = n()) %>%
+ filter(Time != '') %>%
+ filter(time_wise_cnt == max(time_wise_cnt))
+
+
+#Qs : 9
+
+# Qs : 10
+x <- df %>%
+ group_by(id)%>%
+ summarize(pat_wise_cnt = n()) %>%
+ filter(pat_wise_cnt > 1) %>%
+ summarize(tot_rep_vis = n())
+
+
+# Qs : 11
+x <- df %>%
+ group_by(id)%>%
+ summarize(pat_wise_cnt = n()) %>%
+ filter(pat_wise_cnt > 1) %>%
+ arrange(desc(pat_wise_cnt))
+
+
+# Qs : 12
+x <- df %>%
+ group_by(id, Procedure)%>%
+ summarize(pat_wise_cnt = n()) %>%
+ filter(pat_wise_cnt > 1) %>%
+ # summarize(tot_rep_vis = n())
+ arrange(id)
+
+
+# Qs : 13
+x <- df %>%
+ filter(!is.na(Sex), Sex!='-', !is.na(Age), Age!='-') %>%
+ group_by(Sex) %>%
+ summarize(M_mean=mean(parse_number(Age)))
+
+# Qs : 14
+x <- df$AmountBalance
+x <- as.numeric(parse_number(x))
+## Warning: 211 parsing failures.
+## row col expected actual
+## 1 -- a number -
+## 2 -- a number -
+## 3 -- a number -
+## 4 -- a number -
+## 5 -- a number -
+## ... ... ........ ......
+## See problems(...) for more details.
+x <- as.numeric(x)
+x[which(is.na(as.numeric(as.character(x))))]<-0
+x = sum(x)
+
+x <- df %>%
+ filter(!is.na(AmountBalance), AmountBalance!= '-') %>%
+ summarize(M_mean=sum(parse_number(AmountBalance)))
+
+
+# Qs : 15
+x <- df %>%
+ filter( Procedure == 'Consultation', !is.na(AmountReceived), AmountReceived!= '-') %>%
+ group_by(Procedure) %>%
+ summarize(tot_pro_wise_amt = sum(AmountReceived))
+
+# Qs : 16
+cor(df$Age,df$TotalCharges)
+## [1] 0.07017853
+# Qs : 17
+x <- df %>%
+ filter(!is.na(Age), Age!='-') %>%
+ group_by(Age) %>%
+ summarize(Age_wise_cnt=n()) %>%
+ filter(Age!='') %>%
+ filter(Age_wise_cnt == max(Age_wise_cnt))
+
+
+# Qs : 18
+x <- df %>%
+ filter( Procedure == 'X Ray'|Procedure == 'Scalling', !is.na(AmountReceived), AmountReceived!= '-') %>%
+ group_by(Procedure) %>%
+ summarize(proc_wise_tot = sum(AmountReceived))
+
+
+
+
+