Happiness

Like always, I’ll start this project too with loading the required libraries:

url <- "https://happiness-report.s3.amazonaws.com/2021/DataPanelWHR2021C2.xls"

# Download data to temporary file
httr::GET(url, write_disk(happiness.temp <- tempfile(fileext = ".xls")))

## Response [https://happiness-report.s3.amazonaws.com/2021/DataPanelWHR2021C2.xls]
##   Date: 2021-09-20 19:48
##   Status: 200
##   Content-Type: application/vnd.ms-excel
##   Size: 435 kB
## <ON DISK>  /var/folders/1k/wft4clgd33qdzxt_l3hlsfqh0000gn/T//Rtmp9OoYov/file276d52deb01f.xls

Before starting with our statistocs and regression models, lets just have a look at our data:

# Using read_excel to read it as dataframe
world_happiness <- read_excel(happiness.temp,
                    sheet = "Sheet1",
                    range = cell_cols("A:K")) %>% 
  janitor::clean_names()

# inspect dataframe
glimpse(world_happiness)

## Rows: 1,949
## Columns: 11
## $ country_name                     <chr> "Afghanistan", "Afghanistan", "Afghan…
## $ year                             <dbl> 2008, 2009, 2010, 2011, 2012, 2013, 2…
## $ life_ladder                      <dbl> 3.723590, 4.401778, 4.758381, 3.83171…
## $ log_gdp_per_capita               <dbl> 7.370100, 7.539972, 7.646709, 7.61953…
## $ social_support                   <dbl> 0.4506623, 0.5523084, 0.5390752, 0.52…
## $ healthy_life_expectancy_at_birth <dbl> 50.80, 51.20, 51.60, 51.92, 52.24, 52…
## $ freedom_to_make_life_choices     <dbl> 0.7181143, 0.6788964, 0.6001272, 0.49…
## $ generosity                       <dbl> 0.167639986, 0.190099195, 0.120589636…
## $ perceptions_of_corruption        <dbl> 0.8816863, 0.8500354, 0.7067661, 0.73…
## $ positive_affect                  <dbl> 0.5176372, 0.5839256, 0.6182655, 0.61…
## $ negative_affect                  <dbl> 0.2581955, 0.2370924, 0.2753238, 0.26…

I have used ggparis to analyze the correlation between dependent and explanatory variables:

# Scatterplot-correlation matrix for 2019 using GGally::ggpairs()
world_happiness_19 <- world_happiness %>%
  filter(year==2019) 

world_happiness_19 %>% 
  select(-c(country_name,year)) %>% 
  ggpairs(alpha = 0.3)

# Summary statistics for life_ladder, a measure of happiness score
world_happiness_19 %>% 
  select(life_ladder) %>% 
  skim()

Table 1: Data summary
Name	Piped data
Number of rows	144
Number of columns	1
_______________________
Column type frequency:
numeric	1
________________________
Group variables	None

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
life_ladder	0	1	5.57	1.11	2.38	4.93	5.59	6.28	7.78	▁▃▇▇▃

Regression Models

# fit 3 models, with model1 being just the average happiness
model1 <- lm(life_ladder ~ 1, data = world_happiness_19)
mosaic::msummary(model1)

## Registered S3 method overwritten by 'mosaic':
##   method                           from   
##   fortify.SpatialPolygonsDataFrame ggplot2

##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.57089    0.09266   60.12   <2e-16 ***
## 
## Residual standard error: 1.112 on 143 degrees of freedom

model2 <- lm(life_ladder ~ freedom_to_make_life_choices, data = world_happiness_19)
mosaic::msummary(model2)

##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    1.1235     0.5233   2.147   0.0335 *  
## freedom_to_make_life_choices   5.5982     0.6516   8.591 1.46e-14 ***
## 
## Residual standard error: 0.9072 on 141 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.3436, Adjusted R-squared:  0.3389 
## F-statistic: 73.81 on 1 and 141 DF,  p-value: 1.456e-14

model3 <- lm(life_ladder ~ log_gdp_per_capita + freedom_to_make_life_choices, data = world_happiness_19)
mosaic::msummary(model3)

##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  -2.71600    0.52309  -5.192 7.53e-07 ***
## log_gdp_per_capita            0.61363    0.05478  11.201  < 2e-16 ***
## freedom_to_make_life_choices  3.11083    0.53828   5.779 5.01e-08 ***
## 
## Residual standard error: 0.6643 on 134 degrees of freedom
##   (7 observations deleted due to missingness)
## Multiple R-squared:  0.6565, Adjusted R-squared:  0.6513 
## F-statistic:   128 on 2 and 134 DF,  p-value: < 2.2e-16

model4 <- lm(life_ladder ~ log_gdp_per_capita + social_support + freedom_to_make_life_choices, data = world_happiness_19)
mosaic::msummary(model4)

##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   -2.6830     0.4939  -5.432 2.57e-07 ***
## log_gdp_per_capita             0.3909     0.0744   5.254 5.76e-07 ***
## social_support                 2.9973     0.7198   4.164 5.59e-05 ***
## freedom_to_make_life_choices   2.6539     0.5199   5.105 1.12e-06 ***
## 
## Residual standard error: 0.6272 on 133 degrees of freedom
##   (7 observations deleted due to missingness)
## Multiple R-squared:  0.6961, Adjusted R-squared:  0.6892 
## F-statistic: 101.5 on 3 and 133 DF,  p-value: < 2.2e-16

model5 <- lm(life_ladder ~ log_gdp_per_capita + healthy_life_expectancy_at_birth + social_support + freedom_to_make_life_choices, data = world_happiness_19)
mosaic::msummary(model5)

##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      -3.41409    0.52970  -6.445 2.06e-09 ***
## log_gdp_per_capita                0.18468    0.10084   1.831 0.069337 .  
## healthy_life_expectancy_at_birth  0.04847    0.01543   3.142 0.002079 ** 
## social_support                    2.80793    0.70049   4.009 0.000102 ***
## freedom_to_make_life_choices      2.26060    0.51739   4.369 2.53e-05 ***
## 
## Residual standard error: 0.6075 on 130 degrees of freedom
##   (9 observations deleted due to missingness)
## Multiple R-squared:  0.7202, Adjusted R-squared:  0.7116 
## F-statistic: 83.65 on 4 and 130 DF,  p-value: < 2.2e-16

# if we want regression summary in tibble format, use broom::tidy() and broom::glance()
# get model summary and R2, etc for both models
model2 %>% broom::tidy(conf.int = TRUE)

term	estimate	std.error	statistic	p.value	conf.low	conf.high
(Intercept)	1.12	0.523	2.15	0.0335	0.089	2.16
freedom_to_make_life_choices	5.6	0.652	8.59	1.46e-14	4.31	6.89

model2 %>% broom::glance()

r.squared	adj.r.squared	sigma	statistic	p.value	df	logLik	AIC	BIC	deviance	df.residual	nobs
0.344	0.339	0.907	73.8	1.46e-14	1	-188	382	391	116	141	143

model3 %>% broom::tidy(conf.int = TRUE)

term	estimate	std.error	statistic	p.value	conf.low	conf.high
(Intercept)	-2.72	0.523	-5.19	7.53e-07	-3.75	-1.68
log_gdp_per_capita	0.614	0.0548	11.2	5.81e-21	0.505	0.722
freedom_to_make_life_choices	3.11	0.538	5.78	5.01e-08	2.05	4.18

model3 %>% broom::glance()

r.squared	adj.r.squared	sigma	statistic	p.value	df	logLik	AIC	BIC	deviance	df.residual	nobs
0.656	0.651	0.664	128	8.15e-32	2	-137	282	293	59.1	134	137

# produce summary table comparing models using huxtable::huxreg()
huxreg(model1, model2, model3, model4,model5,
       statistics = c('#observations' = 'nobs', 
                      'R squared' = 'r.squared', 
                      'Adj. R Squared' = 'adj.r.squared', 
                      'Residual SE' = 'sigma'), 
       bold_signif = 0.05, 
       stars = NULL
) %>% 
  set_caption('Comparison of models')

Comparison of models
	(1)	(2)	(3)	(4)	(5)
(Intercept)	5.571	1.123	-2.716	-2.683	-3.414
	(0.093)	(0.523)	(0.523)	(0.494)	(0.530)
freedom_to_make_life_choices		5.598	3.111	2.654	2.261
		(0.652)	(0.538)	(0.520)	(0.517)
log_gdp_per_capita			0.614	0.391	0.185
			(0.055)	(0.074)	(0.101)
social_support				2.997	2.808
				(0.720)	(0.700)
healthy_life_expectancy_at_birth					0.048
					(0.015)
#observations	144	143	137	137	135
R squared	0.000	0.344	0.656	0.696	0.720
Adj. R Squared	0.000	0.339	0.651	0.689	0.712
Residual SE	1.112	0.907	0.664	0.627	0.607

# Check whether any model has a VIF (Variance Inflation Factor) greater than 5
car::vif(model3)

##           log_gdp_per_capita freedom_to_make_life_choices 
##                     1.205776                     1.205776

car::vif(model4)

##           log_gdp_per_capita               social_support 
##                     2.495287                     2.568193 
## freedom_to_make_life_choices 
##                     1.261996

car::vif(model5)

##               log_gdp_per_capita healthy_life_expectancy_at_birth 
##                         4.822825                         3.896442 
##                   social_support     freedom_to_make_life_choices 
##                         2.589634                         1.327391