data(iris)
data <- iris
data[4:10,3] <- NA
data[1:5,4] <- NA
# count the missing Numbers and Proportion
library(dplyr)
data %>%
summarise(across(where(is.numeric),list(
n = ~ sum(is.na(.)),
prop = ~ round(sum(is.na(.)) / length(.),3)
)))
# repsect the missing data
data %>% filter(if_any(everything(), is.na))
iris[4:10,3]
completedData[4:10,3]
iris$Petal.Length
mean(iris$Petal.Length,na.rm = TRUE)
mean(iris$Petal.Length)
summary(iris$Petal.Length)
hist(iris$Petal.Length,labels = T,ylim = c(1,40))
(iris$Petal.Length)
boxplot(x=iris$Petal.Length)
#
library(mice)
tempData <- mice(data,m=5, # m=5 Refers to the number of imputed data sets
maxit=50,meth='pmm',seed=500)
summary(tempData)
tempData$meth
stripplot(x= tempData)
# extract complete data
completedData <- complete(tempData,1) # 1 ,2,3,4,5
# Compare
data %>%
summarise(across(is.numeric,list(
mean = ~ mean(.,na.rm = T),
sd = ~ sd(.,na.rm = T) ) ) )
completedData %>%
summarise(across(is.numeric,list(
mean = ~ mean(.,na.rm = T),
sd = ~ sd(.,na.rm = T) ) ) )
#