13 HDDS indicator

In this section, we harmonize variables needed to compute HDDS score.

library(haven)
library(labelled) # for general functions to work with labelled data
library(tidyverse) # general wrangling
library(dplyr)
library(Hmisc)
library(gtsummary) # to demonstrate automatic use of variable labels in summary tables
library(readxl)
library(foreign)
library(sjPlot)
library(sjmisc)
library(sjlabelled) # for example efc data set with variable labels
library(stringr)

rm(list = ls())

dir_input_data = "C:/Users/AHema/OneDrive - CGIAR/Desktop/WFP Resilience dataset/data/input_data/Chad"
dir_output_data = "C:/Users/AHema/OneDrive - CGIAR/Desktop/WFP Resilience dataset/data/output_data/Chad"

Chad_Harmonization_variables <- read_excel(paste0(dir_input_data,"/Chad_Harmonization.xlsx"), 
    sheet = "variables_harmonization")
#View(Chad_Harmonization_variables)

Chad_Harmonization_description <- read_excel(paste0(dir_input_data,"/Chad_Harmonization.xlsx"), 
    sheet = "description")
#View(Chad_Harmonization_description)

lst_data = Chad_Harmonization_description$Data
lst_test = Chad_Harmonization_description$Name

for(i in 1:length(lst_data)) {                              # Head of for-loop
  assign(lst_test[i],                                   # Read and store data frames
         read_sav(paste0(dir_input_data,"/",lst_data[i])))
}

13.1 Data consistency Check

Chad_baseline_2018$ID <- Chad_baseline_2018$`@_id`
Chad_ea_2019$ID <- Chad_ea_2019$`@_id`
Chad_ea_2020$ID <- Chad_ea_2020$Identifiant
Chad_pdm_2020$ID <- Chad_pdm_2020$Identifiant
Chad_ea_2021$ID <- Chad_ea_2021$`@_id`
Chad_pdm_2021$ID <- Chad_pdm_2021$Identifiant
Chad_ea_2022$ID <- Chad_ea_2022$`@_id`
Chad_pdm_2023$ID <- Chad_pdm_2023$`@_id`
Chad_ea_2023$ID <- Chad_ea_2023$`@_id`
Chad_pdm_2022$ID <- 1:nrow(Chad_pdm_2022)

Chad_baseline_2018$RESPConsent <- Chad_baseline_2018$resultat_enquete
Chad_ea_2019$RESPConsent <- Chad_ea_2019$`@2.1.2Linterviewéavalidélanotedeconsentement`
Chad_ea_2020$RESPConsent <- Chad_ea_2020$q212_notevld
#Chad_pdm_2020$RESPConsent <- Chad_pdm_2020
#Chad_ea_2021$RESPConsent <- Chad_ea_2021$`@_RESPConsent`
Chad_pdm_2021$RESPConsent <- Chad_pdm_2021$q212_notevld
Chad_ea_2022$RESPConsent <- Chad_ea_2022$RESPConsent
Chad_pdm_2022$RESPConsent <- Chad_pdm_2022$RESPConsent
Chad_pdm_2023$RESPConsent <- Chad_pdm_2023$RESPConsent
Chad_ea_2023$RESPConsent <- Chad_ea_2023$RESPConsent

Chad_baseline_2018$ADMIN1Name <- Chad_baseline_2018$region
Chad_ea_2019$ADMIN1Name <- Chad_ea_2019$`@1.02NomdelaProvince`
Chad_ea_2020$ADMIN1Name <- Chad_ea_2020$province
Chad_pdm_2020$ADMIN1Name <- Chad_pdm_2020$q4_provinces
Chad_ea_2021$ADMIN1Name <- Chad_ea_2021$ADMIN1Name
Chad_pdm_2021$ADMIN1Name <- Chad_pdm_2021$province
Chad_ea_2022$ADMIN1Name <- Chad_ea_2022$ADMIN1Name
Chad_pdm_2022$ADMIN1Name <- Chad_pdm_2022$ADMIN1Name
Chad_pdm_2023$ADMIN1Name <- Chad_pdm_2023$ADMIN1Name
Chad_ea_2023$ADMIN1Name <- Chad_ea_2023$ADMIN1Name

13.2 Drop duplicated observations

13.4 ID Check

13.5 HDDS Check

df_Chad_pdm_2023=Chad_pdm_2023 %>%
 dplyr::select("ID",starts_with("HDDS"))



df_Chad_pdm_2022=Chad_pdm_2022 %>% 
  dplyr::select("ID",starts_with("HDDS"))

# Chad_pdm_2021 %>% 
#   dplyr::select(starts_with("HDDS")) %>% names()

# Chad_pdm_2020 %>% 
#   dplyr::select(starts_with("HDDS")) %>% names()


df_Chad_ea_2023=Chad_ea_2023 %>% 
  dplyr::select("ID",starts_with("HDDS"))


Chad_ea_2022$ID <- Chad_ea_2022$`@_id`
df_Chad_ea_2022=Chad_ea_2022 %>% 
  dplyr::select("ID",starts_with("HDDS"))
# 
# Chad_ea_2021 %>% 
#   dplyr::select(starts_with("HDDS")) %>% names()

# Chad_ea_2020 %>% 
#   dplyr::select(starts_with("HDDS")) %>% names()

hdds_var = df_Chad_ea_2022 %>% 
  dplyr::select(starts_with("HDDS")) %>% names()

hdds_var <- hdds_var[! hdds_var %in% c("HDDS","HDDS_CH","HDDSCat_IPC")]

df_Chad_ea_2022 <- df_Chad_ea_2022 %>%
  mutate_at(hdds_var, as.numeric)

df_Chad_pdm_2022 <- df_Chad_pdm_2022 %>%
  mutate_at(hdds_var, as.numeric)

df_Chad_ea_2023 <- df_Chad_ea_2023 %>%
  mutate_at(c(hdds_var[! hdds_var %in% c("HDDSVegAll","HDDSFruitAll","HDDSPrMeatAll")]), as.numeric)

df_Chad_pdm_2023 <- df_Chad_pdm_2023 %>%
  mutate_at(c(hdds_var[! hdds_var %in% c("HDDSVegAll","HDDSFruitAll","HDDSPrMeatAll")]), as.numeric)

selected_var <- df_Chad_ea_2022 %>%
  select_if(~ any(. == 2)) %>% names()



df_Chad_ea_2022 <- df_Chad_ea_2022 %>%
  mutate(across(selected_var[! selected_var %in% c("ID","HDDS","HDDS_CH","HDDSCat_IPC")], ~ifelse(is.na(.),NA,ifelse(.==2, 1, 0))))


selected_var <- df_Chad_pdm_2022 %>%
  select_if(~ any(. == 2)) %>% names()

df_Chad_pdm_2022 <- df_Chad_pdm_2022 %>%
  mutate(across(selected_var[! selected_var %in% c("ID","HDDS","HDDS_CH","HDDSCat_IPC")], ~ifelse(is.na(.),NA,ifelse(.==2, 1, 0))))


# Labels mapping
label_map <- c(
  Non = 0,
  Oui = 1
)

# Apply labels using mutate() and across()
df_Chad_ea_2022 <- df_Chad_ea_2022 %>%
  mutate(across(hdds_var,
                ~ labelled::labelled(., label_map)))
df_Chad_pdm_2022 <- df_Chad_pdm_2022 %>%
  mutate(across(hdds_var,
                ~ labelled::labelled(., label_map)))

WFP_Chad_HDDS<-plyr::rbind.fill(df_Chad_ea_2022,
                                df_Chad_ea_2023,
                                df_Chad_pdm_2022,
                                df_Chad_pdm_2023)



# WFP_Chad_HDDS <- WFP_Chad_HDDS %>% 
#   dplyr::select("ID",hdds_var)

WFP_Chad_HDDS$ID <- as.character(WFP_Chad_HDDS$ID)
WFP_Chad_HDDS$HDDS <- as.numeric(WFP_Chad_HDDS$HDDS)

#calculate HDDS first by creating the 12 groups based on the 16 questions
WFP_Chad_HDDS <- WFP_Chad_HDDS %>% mutate(
  HDDSStapCer_calc = case_when(HDDSStapCer == 1 ~ 1, TRUE ~ 0),
  HDDSStapRoot_calc = case_when(HDDSStapRoot  == 1 ~ 1, TRUE ~ 0),
  HDDSVeg_calc = case_when(HDDSVegOrg  == 1 | HDDSVegGre == 1 | HDDSVegOth == 1 ~ 1, TRUE ~ 0),
  HDDSFruit_calc = case_when(HDDSFruitOrg == 1 | HDDSFruitOth == 1 ~ 1, TRUE ~ 0),
  HDDSPrMeat_calc = case_when(HDDSPrMeatF == 1 | HDDSPrMeatO == 1 ~ 1, TRUE ~ 0),
  HDDSPrEgg_calc = case_when(HDDSPrEgg  == 1 ~ 1, TRUE ~ 0),
  HDDSPrFish_calc = case_when(HDDSPrFish == 1 ~ 1, TRUE ~ 0),
  HDDSPulse_calc = case_when(HDDSPulse == 1 ~ 1, TRUE ~ 0),
  HDDSDairy_calc = case_when(HDDSDairy == 1 ~ 1, TRUE ~ 0),
  HDDSFat_calc = case_when(HDDSFat == 1 ~ 1, TRUE ~ 0),
  HDDSSugar_calc = case_when(HDDSSugar == 1 ~ 1, TRUE ~ 0),
  HDDSCond_calc = case_when(HDDSCond == 1~ 1, TRUE ~ 0))

#Calculate HDDS and Cadre Harmonise Phases
WFP_Chad_HDDS <- WFP_Chad_HDDS %>% mutate(HDDS_hema = HDDSStapCer_calc +HDDSStapRoot_calc +HDDSVeg_calc +HDDSFruit_calc +HDDSPrMeat_calc +HDDSPrEgg_calc +HDDSPrFish_calc +HDDSPulse_calc +HDDSDairy_calc +HDDSFat_calc +HDDSSugar_calc +HDDSCond_calc)
var_label(WFP_Chad_HDDS$HDDS_hema) <- "Hosehold Dietary Diversity Score"

13.6 Export all data

write_sav(WFP_Chad_HDDS, paste0(dir_output_data,"/WFP_Chad_HDDS.sav"))
write_dta(WFP_Chad_HDDS, paste0(dir_output_data,"/WFP_Chad_HDDS.dta"))