9 Asset benefit indicator (ABI)

In this section, we harmonize variables needed to compute asset benefit indicator.

library(haven)
library(labelled) # for general functions to work with labelled data
library(tidyverse) # general wrangling
library(dplyr)
library(Hmisc)
library(gtsummary) # to demonstrate automatic use of variable labels in summary tables
library(readxl)
library(foreign)
library(sjPlot)
library(sjmisc)
library(sjlabelled) # for example efc data set with variable labels
library(stringr)

rm(list = ls())

dir_input_data = "C:/Users/AHema/OneDrive - CGIAR/Desktop/WFP Resilience dataset/data/input_data/Chad"
dir_output_data = "C:/Users/AHema/OneDrive - CGIAR/Desktop/WFP Resilience dataset/data/output_data/Chad"

Chad_Harmonization_variables <- read_excel(paste0(dir_input_data,"/Chad_Harmonization.xlsx"), 
    sheet = "variables_harmonization")
#View(Chad_Harmonization_variables)

Chad_Harmonization_description <- read_excel(paste0(dir_input_data,"/Chad_Harmonization.xlsx"), 
    sheet = "description")
#View(Chad_Harmonization_description)

lst_data = Chad_Harmonization_description$Data
lst_test = Chad_Harmonization_description$Name

for(i in 1:length(lst_data)) {                              # Head of for-loop
  assign(lst_test[i],                                   # Read and store data frames
         read_sav(paste0(dir_input_data,"/",lst_data[i])))
}

for (j in 1:length(lst_test)){
         df=  get(lst_test[j], envir = .GlobalEnv)
          for (i in 1:nrow(Chad_Harmonization_variables)){
            df[,Chad_Harmonization_variables$NewVariable_Name[i]]=ifelse(is.na(Chad_Harmonization_variables[i,lst_test[j]]),NA,df[,Chad_Harmonization_variables[i,lst_test[j]][[1]]])
          }
    
    df<-df %>% select(Chad_Harmonization_variables$NewVariable_Name)
    assign(lst_test[j],                                   # Read and store data frames
         df)
#write_sav(df, paste0(dir_output_data,"/",lst_test[j],".sav"))
#write_dta(df, paste0(dir_output_data,"/",lst_test[j],".dta"))
    

}

Chad_pdm_2022$ID = 1:nrow(Chad_pdm_2022)

abi_variables = Chad_baseline_2018 %>% dplyr::select(gtsummary::starts_with("ABI")) %>% names()
abi_variables <- c(abi_variables,"ActifCreationEmploi",
"BeneficieEmploi",
"TRavailMaintienActif")
#
abi_variables <- abi_variables[! abi_variables %in% c('ABISexparticipant')]

9.1 EA 2019

df_2019 = Chad_ea_2019 %>% 
  dplyr::select(ID,abi_variables)

all_missing_cols <- df_2019 %>%
  select(where(~ all(is.na(.)))) %>% names()

#all_missing_cols

abi_variables_tmp <- abi_variables[! abi_variables %in% all_missing_cols]


expss::val_lab(df_2019$ABIProteger)

# Chad_ea_2019 <- Chad_ea_2019 %>%  
#   dplyr::mutate(across(abi_variables,
#                 ~recode(.,
#                   "Non" = 0,
#                   "Oui" = 1,
#                   "NA" = 888
#                  )
#   )
#   )
df_2019 <- df_2019 %>%
  dplyr::mutate(across(all_of(abi_variables_tmp), ~ as.character(.))) %>%
  dplyr::mutate(across(all_of(abi_variables_tmp), ~ dplyr::recode(., "Non" = 0, "Oui" = 1,"NA" = 888)))


df_2019 <- df_2019 %>% 
  dplyr::mutate(across(abi_variables_tmp,
                ~labelled(., labels = c(
                  "Non" = 0,
                  "Oui" = 1,
                  "Ne sait pas" = 888
                )
                )
  )
  )
expss::val_lab(df_2019$ABIProteger)
# haven::write_dta(df_2019,paste0(dir_output_data,"/","df_2019.dta"))

9.2 EA 2020

df_2020 = Chad_ea_2020 %>% 
  dplyr::select(ID,abi_variables)

all_missing_cols <- df_2020 %>%
  select(where(~ all(is.na(.)))) %>% names()

#all_missing_cols

abi_variables_tmp <- abi_variables[! abi_variables %in% all_missing_cols]


expss::val_lab(df_2020$ABIProteger)

df_2020 <- df_2020 %>%
  dplyr::mutate(across(all_of(abi_variables_tmp), ~ as.character(.))) %>%
  dplyr::mutate(across(all_of(abi_variables_tmp), ~ dplyr::recode(., "0" = 0, "1" = 1))) %>%
  tidyr::replace_na(setNames(as.list(rep(888, length(abi_variables_tmp))), abi_variables_tmp))


df_2020 <- df_2020 %>% 
  dplyr::mutate(across(abi_variables_tmp,
                       ~labelled(., labels = c(
                         "Non" = 0,
                         "Oui" = 1,
                         "Ne sait pas" = 888
                       )
                       )
  )
  )
expss::val_lab(df_2020$ABIProteger)
# haven::write_dta(df_2020,paste0(dir_output_data,"/","df_2020.dta"))

9.3 EA 2021

df_2021 = Chad_ea_2021 %>% 
  dplyr::select(ID,abi_variables)

all_missing_cols <- df_2021 %>%
  select(where(~ all(is.na(.)))) %>% names()

#all_missing_cols

abi_variables_tmp <- abi_variables[! abi_variables %in% all_missing_cols]

expss::val_lab(df_2021$ABIProteger)
df_2021 <- df_2021 %>% 
  dplyr::mutate(across(abi_variables_tmp,
                ~recode(.,
                  "1" = 0,
                  "2" = 1,
                  "3" = 888
                 )
  )
  )
df_2021 <- df_2021 %>% 
  dplyr::mutate(across(abi_variables_tmp,
                ~labelled(., labels = c(
                  "Non" = 0,
                  "Oui" = 1,
                  "Ne sait pas" = 888
                )
                )
  )
  )
expss::val_lab(df_2021$ABIProteger)

9.4 EA 2022

df_2022 = Chad_ea_2022 %>% 
  dplyr::select(ID,abi_variables)

all_missing_cols <- df_2022 %>%
  select(where(~ all(is.na(.)))) %>% names()

#all_missing_cols

abi_variables_tmp <- abi_variables[! abi_variables %in% all_missing_cols]

expss::val_lab(df_2022$ABIProteger)
df_2022 <- df_2022 %>% 
  dplyr::mutate(across(abi_variables_tmp,
                ~recode(.,
                  "1" = 0,
                  "2" = 1,
                  "3" = 888
                 )
  )
  )
df_2022 <- df_2022 %>% 
  dplyr::mutate(across(abi_variables_tmp,
                ~labelled(., labels = c(
                  "Non" = 0,
                  "Oui" = 1,
                  "Ne sait pas" = 888
                )
                )
  )
  )
expss::val_lab(df_2022$ABIProteger)

9.5 EA 2023

df_2023 = Chad_ea_2023 %>% 
  dplyr::select(ID,abi_variables)

all_missing_cols <- df_2023 %>%
  select(where(~ all(is.na(.)))) %>% names()

#all_missing_cols

abi_variables_tmp <- abi_variables[! abi_variables %in% all_missing_cols]

expss::val_lab(df_2023$ABIProteger)
# df_2023 <- df_2023 %>% 
#   dplyr::mutate(across(abi_variables_tmp,
#                        ~recode(.,
#                                "1" = 0,
#                                "2" = 1,
#                                "3" = 888
#                        )
#   )
#   )
df_2023 <- df_2023 %>% 
  dplyr::mutate(across(abi_variables_tmp,
                       ~labelled(., labels = c(
                         "Non" = 0,
                         "Oui" = 1,
                         "Ne sait pas" = 888
                       )
                       )
  )
  )
expss::val_lab(df_2023$ABIProteger)

9.6 PDM 2020

df_pdm_2020 = Chad_pdm_2020 %>% 
  dplyr::select(ID,abi_variables)

all_missing_cols <- df_pdm_2020 %>%
  select(where(~ all(is.na(.)))) %>% names()

#all_missing_cols

abi_variables_tmp <- abi_variables[! abi_variables %in% all_missing_cols]


expss::val_lab(df_pdm_2020$ABIProteger)

df_pdm_2020 <- df_pdm_2020 %>%
  dplyr::mutate(across(all_of(abi_variables_tmp), ~ as.character(.))) %>%
  dplyr::mutate(across(all_of(abi_variables_tmp), ~ dplyr::recode(., "0" = 0, "1" = 1)))
df_pdm_2020 <- df_pdm_2020 %>% 
  dplyr::mutate(across(abi_variables_tmp,
                       ~labelled(., labels = c(
                         "Non" = 0,
                         "Oui" = 1,
                         "Ne sait pas" = 888
                       )
                       )
  )
  )
expss::val_lab(df_pdm_2020$ABIProteger)

9.7 PDM 2021

df_pdm_2021 = Chad_pdm_2021 %>% 
  dplyr::select(ID,abi_variables)

all_missing_cols <- df_pdm_2021 %>%
  select(where(~ all(is.na(.)))) %>% names()

#all_missing_cols

abi_variables_tmp <- abi_variables[! abi_variables %in% all_missing_cols]


expss::val_lab(df_pdm_2021$ABIProteger)

df_pdm_2021 <- df_pdm_2021 %>%
  dplyr::mutate(across(all_of(abi_variables_tmp), ~ as.character(.))) %>%
  dplyr::mutate(across(all_of(abi_variables_tmp), ~ dplyr::recode(., "0" = 0, "1" = 1, "2" = 888)))
df_pdm_2021 <- df_pdm_2021 %>% 
  dplyr::mutate(across(abi_variables_tmp,
                       ~labelled(., labels = c(
                         "Non" = 0,
                         "Oui" = 1,
                         "Ne sait pas" = 888
                       )
                       )
  )
  )
expss::val_lab(df_pdm_2021$ABIProteger)

9.8 PDM 2022

df_pdm_2022 = Chad_pdm_2022 %>% labelled::to_factor() %>%
  dplyr::select(ID,abi_variables)

all_missing_cols <- df_pdm_2022 %>%
  select(where(~ all(is.na(.)))) %>% names()

#all_missing_cols

abi_variables_tmp <- abi_variables[! abi_variables %in% all_missing_cols]


expss::val_lab(df_pdm_2022$ABIProteger)

df_pdm_2022 <- df_pdm_2022 %>%
  dplyr::mutate(across(all_of(abi_variables_tmp), ~ as.character(.))) %>%
  dplyr::mutate(across(all_of(abi_variables_tmp), ~ dplyr::recode(., "Non" = 0, "Oui" = 1, "Ne sait pas" = 888)))
df_pdm_2022 <- df_pdm_2022 %>% 
  dplyr::mutate(across(abi_variables_tmp,
                       ~labelled(., labels = c(
                         "Non" = 0,
                         "Oui" = 1,
                         "Ne sait pas" = 888
                       )
                       )
  )
  )
expss::val_lab(df_pdm_2022$ABIProteger)

9.9 PDM 2023

df_pdm_2023 = Chad_pdm_2023 %>%  
  dplyr::select(ID,abi_variables)

all_missing_cols <- df_pdm_2023 %>%
  select(where(~ all(is.na(.)))) %>% names()

#all_missing_cols

abi_variables_tmp <- abi_variables[! abi_variables %in% all_missing_cols]

df_pdm_2023 <- df_pdm_2023 %>% 
  dplyr::mutate(across(abi_variables_tmp,
                       ~labelled(., labels = c(
                         "Non" = 0,
                         "Oui" = 1,
                         "Ne sait pas" = 888
                       )
                       )
  )
  )
expss::val_lab(df_pdm_2023$ABIProteger)

9.10 Merging all data

df_Chad<-plyr::rbind.fill(df_2019,
                          df_2020,
                          df_2021,
                          df_2022,
                          df_2023,
                          df_pdm_2020,
                          df_pdm_2021,
                          df_pdm_2022,
                          df_pdm_2023)

expss::val_lab(df_Chad$ABIProteger)
not_ok = c("ABIParticipation","ABItransferts","ABITensions","ActifCreationEmploi","BeneficieEmploi","TRavailMaintienActif")

df_Chad <- df_Chad %>% 
  dplyr::mutate(across(not_ok,
                       ~labelled(., labels = c(
                         "Non" = 0,
                         "Oui" = 1,
                         "Ne sait pas" = 888
                       )
                       )
  )
  )
expss::val_lab(df_Chad$ABIProteger)

df_Chad$HHFFAPart <- df_Chad$ABIParticipation
df_Chad$HHAssetProtect <- df_Chad$ABIProteger
df_Chad$HHAssetProduct <- df_Chad$ABIProduction
df_Chad$HHAssetDecHardship <- df_Chad$ABIdifficultes
df_Chad$HHAssetAccess <- df_Chad$ABIMarches
df_Chad$HHTrainingAsset <- df_Chad$ABIGererActifs
df_Chad$HHAssetEnv <- df_Chad$ABIEnvironnement
df_Chad$HHWorkAsset <- df_Chad$ABIutiliseractifs

df_Chad <- df_Chad %>%
  mutate(across(c(HHAssetProtect,HHAssetProduct,HHAssetDecHardship,HHAssetAccess,HHTrainingAsset,HHAssetEnv,HHWorkAsset), ~labelled(., labels = c(
    "No" = 0,
    "Yes" = 1,
    "Don't know" = 888,
    "Not applicable" = 999
  ))))



#recode 888/999 to 0
df_Chad <- df_Chad %>%
  mutate(across(HHAssetProtect:HHWorkAsset, ~ dplyr::recode(.x, "0" = 0, "1" = 1, "999" = 0, "888" = 0)))

# Count non-missing values per row
df_Chad$NonMissingCount <- rowSums(!is.na(df_Chad[, c("ABIProteger", "ABIProduction", 
                                             "ABIdifficultes", "ABIMarches", 
                                             "ABIGererActifs", "ABIEnvironnement", 
                                             "ABIutiliseractifs")]))

#sum ABI rows
df_Chad <- df_Chad %>% 
  mutate(ABIScore = ifelse(NonMissingCount >0 ,rowSums(across(c(HHAssetProtect:HHWorkAsset)),na.rm = T),NA))

#assign variable and value labels 
var_label(df_Chad$HHFFAPart) <- "Have you or any of your household member participated in the asset creation activities and received a food assistance transfer?"
var_label(df_Chad$HHAssetProtect) <- "Do you think that the assets that were built or rehabilitated in your community are better protecting your household, its belongings and its production capacities (fields, equipment, etc.) from floods / drought / landslides / mudslides?"
var_label(df_Chad$HHAssetProduct) <- "Do you think that the assets that were built or rehabilitated in your community have allowed your household to increase or diversify its production (agriculture / livestock / other)?"
var_label(df_Chad$HHAssetDecHardship) <- "Do you think that the assets that were built or rehabilitated in your community have decreased the day-to-day hardship and released time for any of your family members (including women and children)?"
var_label(df_Chad$HHAssetAccess) <- "Do you think that the assets that were built or rehabilitated in your community have improved the ability of any of your household member to access markets and/or basic services (water, sanitation, health, education, etc)?"
var_label(df_Chad$HHTrainingAsset) <- "Do you think that the trainings and other support provided in your community have improved your household’s ability to manage and maintain assets?"
var_label(df_Chad$HHAssetEnv) <- "Do you think that the assets that were built or rehabilitated in your community have improved your natural environment (for example more vegetal cover, water table increased, less erosion, etc.)?"
var_label(df_Chad$HHWorkAsset) <- "Do you think that the works undertaken in your community have restored your ability to access and/or use basic asset functionalities?"

var_label(df_Chad$ABIScore) <- "ASSET BENEFIT INDICATOR SCORE"

9.11 Export all data

df_Chad = df_Chad %>% 
  distinct()

haven::write_dta(df_Chad,paste0(dir_output_data,"/","WFP_Chad_ABI.dta"))