13 HDDS indicator
In this section, we harmonize variables needed to compute HDDS score.
library(haven)
library(labelled) # for general functions to work with labelled data
library(tidyverse) # general wrangling
library(dplyr)
library(Hmisc)
library(gtsummary) # to demonstrate automatic use of variable labels in summary tables
library(readxl)
library(foreign)
library(sjPlot)
library(sjmisc)
library(sjlabelled) # for example efc data set with variable labels
library(stringr)
dir_input_data = "C:/Users/AHema/OneDrive - CGIAR/Desktop/WFP Resilience dataset/data/input_data/Chad"
dir_output_data = "C:/Users/AHema/OneDrive - CGIAR/Desktop/WFP Resilience dataset/data/output_data/Chad"
Chad_Harmonization_variables <- read_excel(paste0(dir_input_data,"/Chad_Harmonization.xlsx"),
sheet = "variables_harmonization")
#View(Chad_Harmonization_variables)
Chad_Harmonization_description <- read_excel(paste0(dir_input_data,"/Chad_Harmonization.xlsx"),
sheet = "description")
#View(Chad_Harmonization_description)
lst_data = Chad_Harmonization_description$Data
lst_test = Chad_Harmonization_description$Name
for(i in 1:length(lst_data)) { # Head of for-loop
assign(lst_test[i], # Read and store data frames
read_sav(paste0(dir_input_data,"/",lst_data[i])))
}
13.1 Data consistency Check
Chad_baseline_2018$ID <- Chad_baseline_2018$`@_id`
Chad_ea_2019$ID <- Chad_ea_2019$`@_id`
Chad_ea_2020$ID <- Chad_ea_2020$Identifiant
Chad_pdm_2020$ID <- Chad_pdm_2020$Identifiant
Chad_ea_2021$ID <- Chad_ea_2021$`@_id`
Chad_pdm_2021$ID <- Chad_pdm_2021$Identifiant
Chad_ea_2022$ID <- Chad_ea_2022$`@_id`
Chad_pdm_2023$ID <- Chad_pdm_2023$`@_id`
Chad_ea_2023$ID <- Chad_ea_2023$`@_id`
Chad_pdm_2022$ID <- 1:nrow(Chad_pdm_2022)
Chad_baseline_2018$RESPConsent <- Chad_baseline_2018$resultat_enquete
Chad_ea_2019$RESPConsent <- Chad_ea_2019$`@2.1.2Linterviewéavalidélanotedeconsentement`
Chad_ea_2020$RESPConsent <- Chad_ea_2020$q212_notevld
#Chad_pdm_2020$RESPConsent <- Chad_pdm_2020
#Chad_ea_2021$RESPConsent <- Chad_ea_2021$`@_RESPConsent`
Chad_pdm_2021$RESPConsent <- Chad_pdm_2021$q212_notevld
Chad_ea_2022$RESPConsent <- Chad_ea_2022$RESPConsent
Chad_pdm_2022$RESPConsent <- Chad_pdm_2022$RESPConsent
Chad_pdm_2023$RESPConsent <- Chad_pdm_2023$RESPConsent
Chad_ea_2023$RESPConsent <- Chad_ea_2023$RESPConsent
Chad_baseline_2018$ADMIN1Name <- Chad_baseline_2018$region
Chad_ea_2019$ADMIN1Name <- Chad_ea_2019$`@1.02NomdelaProvince`
Chad_ea_2020$ADMIN1Name <- Chad_ea_2020$province
Chad_pdm_2020$ADMIN1Name <- Chad_pdm_2020$q4_provinces
Chad_ea_2021$ADMIN1Name <- Chad_ea_2021$ADMIN1Name
Chad_pdm_2021$ADMIN1Name <- Chad_pdm_2021$province
Chad_ea_2022$ADMIN1Name <- Chad_ea_2022$ADMIN1Name
Chad_pdm_2022$ADMIN1Name <- Chad_pdm_2022$ADMIN1Name
Chad_pdm_2023$ADMIN1Name <- Chad_pdm_2023$ADMIN1Name
Chad_ea_2023$ADMIN1Name <- Chad_ea_2023$ADMIN1Name
13.5 HDDS Check
df_Chad_pdm_2023=Chad_pdm_2023 %>%
dplyr::select("ID",starts_with("HDDS"))
df_Chad_pdm_2022=Chad_pdm_2022 %>%
dplyr::select("ID",starts_with("HDDS"))
# Chad_pdm_2021 %>%
# dplyr::select(starts_with("HDDS")) %>% names()
# Chad_pdm_2020 %>%
# dplyr::select(starts_with("HDDS")) %>% names()
df_Chad_ea_2023=Chad_ea_2023 %>%
dplyr::select("ID",starts_with("HDDS"))
Chad_ea_2022$ID <- Chad_ea_2022$`@_id`
df_Chad_ea_2022=Chad_ea_2022 %>%
dplyr::select("ID",starts_with("HDDS"))
#
# Chad_ea_2021 %>%
# dplyr::select(starts_with("HDDS")) %>% names()
# Chad_ea_2020 %>%
# dplyr::select(starts_with("HDDS")) %>% names()
hdds_var = df_Chad_ea_2022 %>%
dplyr::select(starts_with("HDDS")) %>% names()
hdds_var <- hdds_var[! hdds_var %in% c("HDDS","HDDS_CH","HDDSCat_IPC")]
df_Chad_ea_2022 <- df_Chad_ea_2022 %>%
mutate_at(hdds_var, as.numeric)
df_Chad_pdm_2022 <- df_Chad_pdm_2022 %>%
mutate_at(hdds_var, as.numeric)
df_Chad_ea_2023 <- df_Chad_ea_2023 %>%
mutate_at(c(hdds_var[! hdds_var %in% c("HDDSVegAll","HDDSFruitAll","HDDSPrMeatAll")]), as.numeric)
df_Chad_pdm_2023 <- df_Chad_pdm_2023 %>%
mutate_at(c(hdds_var[! hdds_var %in% c("HDDSVegAll","HDDSFruitAll","HDDSPrMeatAll")]), as.numeric)
selected_var <- df_Chad_ea_2022 %>%
select_if(~ any(. == 2)) %>% names()
df_Chad_ea_2022 <- df_Chad_ea_2022 %>%
mutate(across(selected_var[! selected_var %in% c("ID","HDDS","HDDS_CH","HDDSCat_IPC")], ~ifelse(is.na(.),NA,ifelse(.==2, 1, 0))))
selected_var <- df_Chad_pdm_2022 %>%
select_if(~ any(. == 2)) %>% names()
df_Chad_pdm_2022 <- df_Chad_pdm_2022 %>%
mutate(across(selected_var[! selected_var %in% c("ID","HDDS","HDDS_CH","HDDSCat_IPC")], ~ifelse(is.na(.),NA,ifelse(.==2, 1, 0))))
# Labels mapping
label_map <- c(
Non = 0,
Oui = 1
)
# Apply labels using mutate() and across()
df_Chad_ea_2022 <- df_Chad_ea_2022 %>%
mutate(across(hdds_var,
~ labelled::labelled(., label_map)))
df_Chad_pdm_2022 <- df_Chad_pdm_2022 %>%
mutate(across(hdds_var,
~ labelled::labelled(., label_map)))
WFP_Chad_HDDS<-plyr::rbind.fill(df_Chad_ea_2022,
df_Chad_ea_2023,
df_Chad_pdm_2022,
df_Chad_pdm_2023)
# WFP_Chad_HDDS <- WFP_Chad_HDDS %>%
# dplyr::select("ID",hdds_var)
WFP_Chad_HDDS$ID <- as.character(WFP_Chad_HDDS$ID)
WFP_Chad_HDDS$HDDS <- as.numeric(WFP_Chad_HDDS$HDDS)
#calculate HDDS first by creating the 12 groups based on the 16 questions
WFP_Chad_HDDS <- WFP_Chad_HDDS %>% mutate(
HDDSStapCer_calc = case_when(HDDSStapCer == 1 ~ 1, TRUE ~ 0),
HDDSStapRoot_calc = case_when(HDDSStapRoot == 1 ~ 1, TRUE ~ 0),
HDDSVeg_calc = case_when(HDDSVegOrg == 1 | HDDSVegGre == 1 | HDDSVegOth == 1 ~ 1, TRUE ~ 0),
HDDSFruit_calc = case_when(HDDSFruitOrg == 1 | HDDSFruitOth == 1 ~ 1, TRUE ~ 0),
HDDSPrMeat_calc = case_when(HDDSPrMeatF == 1 | HDDSPrMeatO == 1 ~ 1, TRUE ~ 0),
HDDSPrEgg_calc = case_when(HDDSPrEgg == 1 ~ 1, TRUE ~ 0),
HDDSPrFish_calc = case_when(HDDSPrFish == 1 ~ 1, TRUE ~ 0),
HDDSPulse_calc = case_when(HDDSPulse == 1 ~ 1, TRUE ~ 0),
HDDSDairy_calc = case_when(HDDSDairy == 1 ~ 1, TRUE ~ 0),
HDDSFat_calc = case_when(HDDSFat == 1 ~ 1, TRUE ~ 0),
HDDSSugar_calc = case_when(HDDSSugar == 1 ~ 1, TRUE ~ 0),
HDDSCond_calc = case_when(HDDSCond == 1~ 1, TRUE ~ 0))
#Calculate HDDS and Cadre Harmonise Phases
WFP_Chad_HDDS <- WFP_Chad_HDDS %>% mutate(HDDS_hema = HDDSStapCer_calc +HDDSStapRoot_calc +HDDSVeg_calc +HDDSFruit_calc +HDDSPrMeat_calc +HDDSPrEgg_calc +HDDSPrFish_calc +HDDSPulse_calc +HDDSDairy_calc +HDDSFat_calc +HDDSSugar_calc +HDDSCond_calc)
var_label(WFP_Chad_HDDS$HDDS_hema) <- "Hosehold Dietary Diversity Score"