diff --git a/Saif_KHI_R_Assingnment.R b/Saif_KHI_R_Assingnment.R
new file mode 100644
index 0000000..44947c4
--- /dev/null
+++ b/Saif_KHI_R_Assingnment.R
@@ -0,0 +1,128 @@
+#Question 1)
+library("ggplot2")
+library("lubridate")
+library("dplyr")
+library(tidyr)
+library("DataCombine")
+hospitaldata <- read.csv("hospitaldata.csv")
+names(hospitaldata)<-gsub("..","",names(hospitaldata),fixed = TRUE)
+names(hospitaldata)<-gsub(".","_",names(hospitaldata),fixed = TRUE)
+View(hospitaldata)
+
+#removing character M from age i.e 28M
+hospitaldata$Age <- as.numeric(gsub("[^0-9]",'',hospitaldata$Age))
+
+#Question 2)
+hospitaldata$Date <- as.Date(strptime(hospitaldata$Date, "%a, %B %d, %Y"))
+
+Max_day <-  hospitaldata %>%
+  mutate(Day=weekdays(hospitaldata$Date),label=TRUE) %>%
+  group_by(Day) %>%
+  summarize(visits=length(Day)) %>%
+  print
+ggplot(Max_day,aes(x=Day,y=visits))+geom_bar(stat="identity",fill="slateblue")+ggtitle("Visits per Weekday")+labs(x="Day",y="Visits")
+
+#Question 3)
+mean(hospitaldata$Age, na.rm = TRUE)
+
+#Question 4)
+child <- filter(hospitaldata, Age > 1 & Age < 13) %>%
+  select(-(Date:Time)) %>%
+  select(-(Sex:Next_Apt)) %>%
+  count() %>%
+  print
+#Question 5)
+
+hospitaldata$Sex <- gsub("f","F",hospitaldata$Sex)
+hospitaldata$Sex<-gsub("\\s|-",NA,hospitaldata$Sex)
+qplot(data=hospitaldata, Sex, fill=Procedure)+ggtitle("Procedure vs Gender")+labs(x='Gender',y='Procedure')
+
+#Question 6)
+qplot(data=hospitaldata, fill=ConsultingDoctor, as.numeric(TotalCharges))+ggtitle("Highest Salary")+labs(x='TotalCharges',y='ConsultingDoctor')
+
+#Question 7)
+qplot(data=hospitaldata, as.numeric(TotalCharges), fill=Procedure)+ggtitle("Procedure")+labs(x='TotalCharges',y='Procedure')
+
+#Question 8)
+hour_and_visits <-  hospitaldata %>%
+  select(Time) %>%
+  mutate(Hour = hour(hm(format(strptime(hospitaldata$Time, "%I:%M %p"), "%H:%M")))) %>%
+  group_by(Hour) %>%
+  summarize(visits=length(Hour)) %>%
+  arrange(desc(visits)) %>%
+  print     # printing 13 the highest hour, i.e. actaully 1 AM/PM in 12 hour format
+
+#Question 9)
+
+
+#Question 10)
+visitor_repeated <- select(hospitaldata,id) %>%
+  group_by(id) %>%
+  summarise(visits=length(id)) %>%
+  arrange(desc(visits)) %>%
+  filter(visits > 1) %>%
+  print 
+
+#Question 11)
+
+visitor_repeated <- select(hospitaldata,id) %>%
+  group_by(id) %>%
+  summarise(visits=length(id)) %>%
+  arrange(desc(visits)) %>%
+  filter(visits > 1) %>%
+  print 
+
+#Question 12)
+
+hospitaldata %>% 
+  count(id, Procedure) %>%
+  slice(which(n>1))%>%
+  filter(!is.na(Procedure))%>%
+  select(id,Procedure)
+
+#Question 13)
+Age_median <- hospitaldata %>% 
+  select(Sex,Age) %>%
+  group_by(Sex) %>%
+  summarise(median(Age, na.rm = TRUE)) %>%
+  print
+
+#Question 14)
+hospitaldata$TotalCharges<-as.numeric(as.character(hospitaldata$TotalCharges))
+sum_of_Balance <- sum(hospitaldata$TotalCharges, na.rm = TRUE)
+print(sum_of_Balance)
+
+#Question 15)
+hospitaldata$TotalCharges<-as.numeric(as.character(hospitaldata$TotalCharges))
+consult_amount <- hospitaldata%>%
+  select(Procedure,TotalCharges) %>%
+  group_by(Procedure) %>%
+  filter(Procedure == 'Consultation') %>%
+  summarise(sum(TotalCharges,na.rm=TRUE)) %>%
+  print
+
+#Question 16)
+data <- hospitaldata %>%
+  select(Age,TotalCharges) %>%
+  filter(!is.na(Age),!is.na(TotalCharges))
+cor(data$Age,data$TotalCharges)
+
+#Question 17)
+Number_of_visits_by_age <- hospitaldata %>%
+  select(id,Age) %>%
+  group_by(Age) %>%
+  summarize(visits=length(Age)) %>%
+  arrange(desc(visits)) %>%
+  filter(!is.na(Age)) %>%
+  print
+
+ggplot(data=Number_of_visits_by_age,aes(x=as.numeric(Age),y=visits))+geom_bar(stat='identity',fill='slate blue')+ggtitle("Number Of Visits By Age")+labs(x='Age',y='Visits')
+
+
+#Question 18)
+
+sum(hospitaldata$TotalCharges[hospitaldata$Procedure=="X Ray" | hospitaldata$Procedure=="Scalling"],na.rm = TRUE)
+
+
+write.csv("~/hospitaldata.csv", row.names = FALSE)
+View(hospitaldata)
diff --git a/Saif_KHI_R_Assingnment.html b/Saif_KHI_R_Assingnment.html
new file mode 100644
index 0000000..0c80511
--- /dev/null
+++ b/Saif_KHI_R_Assingnment.html
@@ -0,0 +1,398 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
#Question 1)
+library("ggplot2")
+library("lubridate")
+
## 
+## Attaching package: 'lubridate'
+
## The following object is masked from 'package:base':
+## 
+##     date
+
library("dplyr")
+
## 
+## Attaching package: 'dplyr'
+
## The following objects are masked from 'package:lubridate':
+## 
+##     intersect, setdiff, union
+
## The following objects are masked from 'package:stats':
+## 
+##     filter, lag
+
## The following objects are masked from 'package:base':
+## 
+##     intersect, setdiff, setequal, union
+
library(tidyr)
+library("DataCombine")
+hospitaldata <- read.csv("hospitaldata.csv")
+names(hospitaldata)<-gsub("..","",names(hospitaldata),fixed = TRUE)
+names(hospitaldata)<-gsub(".","_",names(hospitaldata),fixed = TRUE)
+View(hospitaldata)
+
+#removing character M from age i.e 28M
+hospitaldata$Age <- as.numeric(gsub("[^0-9]",'',hospitaldata$Age))
+
+#Question 2)
+hospitaldata$Date <- as.Date(strptime(hospitaldata$Date, "%a, %B %d, %Y"))
+
+Max_day <-  hospitaldata %>%
+  mutate(Day=weekdays(hospitaldata$Date),label=TRUE) %>%
+  group_by(Day) %>%
+  summarize(visits=length(Day)) %>%
+  print
+
## # A tibble: 7 × 2
+##         Day visits
+##       <chr>  <int>
+## 1    Friday     26
+## 2    Monday     51
+## 3  Saturday     20
+## 4    Sunday      7
+## 5  Thursday     33
+## 6   Tuesday     42
+## 7 Wednesday     43
+
ggplot(Max_day,aes(x=Day,y=visits))+geom_bar(stat="identity",fill="slateblue")+ggtitle("Visits per Weekday")+labs(x="Day",y="Visits")
+

+
#Question 3)
+mean(hospitaldata$Age, na.rm = TRUE)
+
## [1] 32.57216
+
#Question 4)
+child <- filter(hospitaldata, Age > 1 & Age < 13) %>%
+  select(-(Date:Time)) %>%
+  select(-(Sex:Next_Apt)) %>%
+  count() %>%
+  print
+
## # A tibble: 1 × 1
+##       n
+##   <int>
+## 1    24
+
#Question 5)
+
+hospitaldata$Sex <- gsub("f","F",hospitaldata$Sex)
+hospitaldata$Sex<-gsub("\\s|-",NA,hospitaldata$Sex)
+qplot(data=hospitaldata, Sex, fill=Procedure)+ggtitle("Procedure vs Gender")+labs(x='Gender',y='Procedure')
+

+
#Question 6)
+qplot(data=hospitaldata, fill=ConsultingDoctor, as.numeric(TotalCharges))+ggtitle("Highest Salary")+labs(x='TotalCharges',y='ConsultingDoctor')
+
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
+

+
#Question 7)
+qplot(data=hospitaldata, as.numeric(TotalCharges), fill=Procedure)+ggtitle("Procedure")+labs(x='TotalCharges',y='Procedure')
+
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
+

+
#Question 8)
+hour_and_visits <-  hospitaldata %>%
+  select(Time) %>%
+  mutate(Hour = hour(hm(format(strptime(hospitaldata$Time, "%I:%M %p"), "%H:%M")))) %>%
+  group_by(Hour) %>%
+  summarize(visits=length(Hour)) %>%
+  arrange(desc(visits)) %>%
+  print     # printing 13 the highest hour, i.e. actaully 1 AM/PM in 12 hour format
+
## # A tibble: 16 × 2
+##     Hour visits
+##    <dbl>  <int>
+## 1     13     29
+## 2     NA     29
+## 3     18     23
+## 4     12     20
+## 5     14     20
+## 6     15     20
+## 7     17     16
+## 8     16     15
+## 9     19     14
+## 10    11      9
+## 11    20      8
+## 12    10      6
+## 13    21      5
+## 14    22      4
+## 15     9      2
+## 16    23      2
+
#Question 9)
+
+
+#Question 10)
+visitor_repeated <- select(hospitaldata,id) %>%
+  group_by(id) %>%
+  summarise(visits=length(id)) %>%
+  arrange(desc(visits)) %>%
+  filter(visits > 1) %>%
+  print 
+
## # A tibble: 37 × 2
+##       id visits
+##    <int>  <int>
+## 1      1     12
+## 2     46      5
+## 3    122      5
+## 4     17      4
+## 5     94      4
+## 6    140      4
+## 7     45      3
+## 8     63      3
+## 9    101      3
+## 10   107      3
+## # ... with 27 more rows
+
#Question 11)
+
+visitor_repeated <- select(hospitaldata,id) %>%
+  group_by(id) %>%
+  summarise(visits=length(id)) %>%
+  arrange(desc(visits)) %>%
+  filter(visits > 1) %>%
+  print 
+
## # A tibble: 37 × 2
+##       id visits
+##    <int>  <int>
+## 1      1     12
+## 2     46      5
+## 3    122      5
+## 4     17      4
+## 5     94      4
+## 6    140      4
+## 7     45      3
+## 8     63      3
+## 9    101      3
+## 10   107      3
+## # ... with 27 more rows
+
#Question 12)
+
+hospitaldata %>% 
+  count(id, Procedure) %>%
+  slice(which(n>1))%>%
+  filter(!is.na(Procedure))%>%
+  select(id,Procedure)
+
## Source: local data frame [24 x 2]
+## Groups: id [23]
+## 
+##       id                      Procedure
+##    <int>                         <fctr>
+## 1      1                       Pharmacy
+## 2     12                 22 Unit Bridge
+## 3     13                   Consultation
+## 4     17                   Consultation
+## 5     17 RCT (4 teeth) Bridge (9 teeth)
+## 6     20                   Consultation
+## 7     25                   Consultation
+## 8     45                  R.P.D + Crown
+## 9     46                       Dressing
+## 10    63                   Consultation
+## # ... with 14 more rows
+
#Question 13)
+Age_median <- hospitaldata %>% 
+  select(Sex,Age) %>%
+  group_by(Sex) %>%
+  summarise(median(Age, na.rm = TRUE)) %>%
+  print
+
## # A tibble: 4 × 2
+##     Sex `median(Age, na.rm = TRUE)`
+##   <chr>                       <dbl>
+## 1                                NA
+## 2     F                          30
+## 3     M                          29
+## 4  <NA>                          NA
+
#Question 14)
+hospitaldata$TotalCharges<-as.numeric(as.character(hospitaldata$TotalCharges))
+
## Warning: NAs introduced by coercion
+
sum_of_Balance <- sum(hospitaldata$TotalCharges, na.rm = TRUE)
+print(sum_of_Balance)
+
## [1] 786350
+
#Question 15)
+hospitaldata$TotalCharges<-as.numeric(as.character(hospitaldata$TotalCharges))
+consult_amount <- hospitaldata%>%
+  select(Procedure,TotalCharges) %>%
+  group_by(Procedure) %>%
+  filter(Procedure == 'Consultation') %>%
+  summarise(sum(TotalCharges,na.rm=TRUE)) %>%
+  print
+
## # A tibble: 1 × 2
+##      Procedure `sum(TotalCharges, na.rm = TRUE)`
+##         <fctr>                             <dbl>
+## 1 Consultation                             83950
+
#Question 16)
+data <- hospitaldata %>%
+  select(Age,TotalCharges) %>%
+  filter(!is.na(Age),!is.na(TotalCharges))
+cor(data$Age,data$TotalCharges)
+
## [1] 0.02952065
+
#Question 17)
+Number_of_visits_by_age <- hospitaldata %>%
+  select(id,Age) %>%
+  group_by(Age) %>%
+  summarize(visits=length(Age)) %>%
+  arrange(desc(visits)) %>%
+  filter(!is.na(Age)) %>%
+  print
+
## # A tibble: 55 × 2
+##      Age visits
+##    <dbl>  <int>
+## 1     30     20
+## 2     26     11
+## 3     40     11
+## 4     17      9
+## 5     28      9
+## 6      3      7
+## 7     45      7
+## 8     50      7
+## 9     23      6
+## 10    29      6
+## # ... with 45 more rows
+
ggplot(data=Number_of_visits_by_age,aes(x=as.numeric(Age),y=visits))+geom_bar(stat='identity',fill='slate blue')+ggtitle("Number Of Visits By Age")+labs(x='Age',y='Visits')
+

+
#Question 18)
+
+sum(hospitaldata$TotalCharges[hospitaldata$Procedure=="X Ray" | hospitaldata$Procedure=="Scalling"],na.rm = TRUE)
+
## [1] 22300
+
write.csv("~/hospitaldata.csv", row.names = FALSE)
+
## "x"
+## "~/hospitaldata.csv"
+
View(hospitaldata)
+
+
+
+
+