Smoking and Birth Weight

Let’s first load the libraries which we will need to work on this assignment:

Relationship between birth weight and mother’s smoking habit

# import the data set
smoking_birth_weight <- read_csv(here::here("data", "smoking_birth_weight.csv"))
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   father_age = col_double(),
##   mother_age = col_double(),
##   mature = col_character(),
##   weeks = col_double(),
##   premie = col_character(),
##   visits = col_double(),
##   marital = col_character(),
##   gained = col_double(),
##   lowbirthweight = col_character(),
##   gender = col_character(),
##   habit = col_character(),
##   whitemom = col_character(),
##   weight_kg = col_double()
## )
(#tab:skimming data)Data summary
Name smoking_birth_weight
Number of rows 1000
Number of columns 13
_______________________
Column type frequency:
character 7
numeric 6
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
mature 0 1 10 11 0 2 0
premie 2 1 6 9 0 2 0
marital 1 1 7 11 0 2 0
lowbirthweight 0 1 3 7 0 2 0
gender 0 1 4 6 0 2 0
habit 1 1 6 9 0 2 0
whitemom 2 1 5 9 0 2 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
father_age 171 0.83 30.26 6.76 14.00 25.0 30.00 35.00 55.00 ▃▇▇▂▁
mother_age 0 1.00 27.00 6.21 13.00 22.0 27.00 32.00 50.00 ▃▇▇▂▁
weeks 2 1.00 38.33 2.93 20.00 37.0 39.00 40.00 45.00 ▁▁▁▇▂
visits 9 0.99 12.10 3.95 0.00 10.0 12.00 15.00 30.00 ▂▇▇▁▁
gained 27 0.97 30.33 14.24 0.00 20.0 30.00 38.00 85.00 ▂▇▅▁▁
weight_kg 0 1.00 3.22 0.69 0.45 2.9 3.32 3.66 5.33 ▁▁▇▇▁
smoking_birth_weight %>% 
  count(habit, sort=TRUE) %>% 
  mutate(proportion = n/sum(n))
habitnproportion
nonsmoker8730.873
smoker1260.126
10.001
favstats(weight_kg ~ habit, data = smoking_birth_weight)

habitminQ1medianQ3maxmeansdnmissing
nonsmoker0.4542.923.323.665.333.240.6898730
smoker0.7672.763.213.514.173.1 0.6291260
The mean difference in weight is 144gm (3.244 - 3.100)

write down, by hand, the two CIs

t.test(weight_kg ~ habit, data = smoking_birth_weight)
## 
##  Welch Two Sample t-test
## 
## data:  weight_kg by habit
## t = 2.359, df = 171.32, p-value = 0.01945
## alternative hypothesis: true difference in means between group nonsmoker and group smoker is not equal to 0
## 95 percent confidence interval:
##  0.02338629 0.26312627
## sample estimates:
## mean in group nonsmoker    mean in group smoker 
##                3.243500                3.100243
smoking_birth_weight %>% 
  filter(!is.na(habit)) %>% 
  select(weight_kg, habit ) %>% 
  ggpairs()+
  theme_bw()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Model building

favstats(~weight_kg, data = smoking_birth_weight)
minQ1medianQ3maxmeansdnmissing
0.4542.93.323.665.333.220.68510000
model1 <- lm(weight_kg ~ 1, data= smoking_birth_weight)
msummary(model1)
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.22385    0.02166   148.8   <2e-16 ***
## 
## Residual standard error: 0.685 on 999 degrees of freedom
model2 <- lm(weight_kg ~ weeks, data= smoking_birth_weight)
msummary(model2)
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.767263   0.210948  -13.12   <2e-16 ***
## weeks        0.156325   0.005487   28.49   <2e-16 ***
## 
## Residual standard error: 0.5079 on 996 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.449,  Adjusted R-squared:  0.4485 
## F-statistic: 811.7 on 1 and 996 DF,  p-value: < 2.2e-16
model3 <- lm(weight_kg ~ weeks + marital, data= smoking_birth_weight)
msummary(model3)
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        -2.795227   0.208616 -13.399  < 2e-16 ***
## weeks               0.154483   0.005437  28.413  < 2e-16 ***
## maritalnot married  0.160758   0.032712   4.914 1.04e-06 ***
## 
## Residual standard error: 0.5021 on 995 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.4621, Adjusted R-squared:  0.461 
## F-statistic: 427.4 on 2 and 995 DF,  p-value: < 2.2e-16
model4 <- lm(weight_kg ~ weeks + marital + habit, data= smoking_birth_weight)
msummary(model4)
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        -2.784485   0.207877  -13.39  < 2e-16 ***
## weeks               0.154825   0.005418   28.57  < 2e-16 ***
## maritalnot married  0.150470   0.032783    4.59    5e-06 ***
## habitsmoker        -0.139085   0.047961   -2.90  0.00381 ** 
## 
## Residual standard error: 0.5002 on 994 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.4666, Adjusted R-squared:  0.465 
## F-statistic: 289.8 on 3 and 994 DF,  p-value: < 2.2e-16
model5 <- lm(weight_kg ~ weeks + marital + habit + whitemom + gender + gained, data= smoking_birth_weight)
msummary(model5)
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        -2.833996   0.207983 -13.626  < 2e-16 ***
## weeks               0.149385   0.005430  27.509  < 2e-16 ***
## maritalnot married  0.120372   0.034381   3.501 0.000485 ***
## habitsmoker        -0.176535   0.047560  -3.712 0.000218 ***
## whitemomwhite       0.096307   0.036921   2.608 0.009236 ** 
## gendermale          0.171796   0.031313   5.486 5.24e-08 ***
## gained              0.004203   0.001104   3.809 0.000149 ***
## 
## Residual standard error: 0.4861 on 963 degrees of freedom
##   (30 observations deleted due to missingness)
## Multiple R-squared:  0.4869, Adjusted R-squared:  0.4837 
## F-statistic: 152.3 on 6 and 963 DF,  p-value: < 2.2e-16
model6 <- lm(weight_kg ~ . , data= smoking_birth_weight)
msummary(model6)
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           -1.1422671  0.3381352  -3.378 0.000766 ***
## father_age             0.0038255  0.0035006   1.093 0.274812    
## mother_age             0.0001432  0.0045862   0.031 0.975106    
## matureyounger mom     -0.0031005  0.0551883  -0.056 0.955212    
## weeks                  0.0782526  0.0082420   9.494  < 2e-16 ***
## premiepremie          -0.0200670  0.0653017  -0.307 0.758698    
## visits                 0.0004352  0.0040427   0.108 0.914306    
## maritalnot married     0.0577704  0.0381095   1.516 0.129945    
## gained                 0.0031780  0.0010612   2.995 0.002834 ** 
## lowbirthweightnot low  1.0853530  0.0648699  16.731  < 2e-16 ***
## gendermale             0.1726791  0.0296218   5.829 8.11e-09 ***
## habitsmoker           -0.0971133  0.0484068  -2.006 0.045177 *  
## whitemomwhite          0.1108407  0.0364514   3.041 0.002438 ** 
## 
## Residual standard error: 0.4143 on 787 degrees of freedom
##   (200 observations deleted due to missingness)
## Multiple R-squared:  0.6049, Adjusted R-squared:  0.5989 
## F-statistic: 100.4 on 12 and 787 DF,  p-value: < 2.2e-16
huxreg(model1, model2, model3, model4, model5, model6,
       statistics = c('#observations' = 'nobs', 
                      'R squared' = 'r.squared', 
                      'Adj. R Squared' = 'adj.r.squared', 
                      'Residual SE' = 'sigma'), 
       bold_signif = 0.05, 
       stars = NULL
) %>% 
  set_caption('Comparison of models')
Comparison of models
(1)(2)(3)(4)(5)(6)
(Intercept)3.224 -2.767 -2.795 -2.784 -2.834 -1.142 
(0.022)(0.211)(0.209)(0.208)(0.208)(0.338)
weeks     0.156 0.154 0.155 0.149 0.078 
     (0.005)(0.005)(0.005)(0.005)(0.008)
maritalnot married          0.161 0.150 0.120 0.058 
          (0.033)(0.033)(0.034)(0.038)
habitsmoker               -0.139 -0.177 -0.097 
               (0.048)(0.048)(0.048)
whitemomwhite                    0.096 0.111 
                    (0.037)(0.036)
gendermale                    0.172 0.173 
                    (0.031)(0.030)
gained                    0.004 0.003 
                    (0.001)(0.001)
father_age                         0.004 
                         (0.004)
mother_age                         0.000 
                         (0.005)
matureyounger mom                         -0.003 
                         (0.055)
premiepremie                         -0.020 
                         (0.065)
visits                         0.000 
                         (0.004)
lowbirthweightnot low                         1.085 
                         (0.065)
#observations1000     998     998     998     970     800     
R squared0.000 0.449 0.462 0.467 0.487 0.605 
Adj. R Squared0.000 0.448 0.461 0.465 0.484 0.599 
Residual SE0.685 0.508 0.502 0.500 0.486 0.414