RQ_4

Joshua Rosenberg

12/19/2017

RQ #4: Is engagement in STEM activities across several weeks associated with changes in: individual (sustained) interest in STEM and future goals and aspirations related to STEM?

This is what we know:

  • Predicted engagement (via a BLUP) strongly predicts changes in interest and future goals and plans
  • Males are associated with changes in future goals and plans
  • Overall program rating is associated with changes in interest (but may not be very robust)

This is what we still need to figure out:

  • How will these findings hold up when we use the predictions for relevance and challenge (the BLUPs) in different ways (in a bivariate model that accounts for the standard error)

How this document is organized

  • Two different outcomes, Interest and future goals, controlling on interest and future goals and gender, with three models each:
    • Basic model (described above)
    • Addition of categorical overall program rating
    • Addition of continuous overall program rating
library(tidyverse)
library(lme4)
library(corrr)
library(jmRtools)
library(sjPlot)
library(magrittr)
esm <- read_csv("/Volumes/SCHMIDTLAB/PSE/data/STEM-IE/STEM-IE-esm.csv")
pre_survey_data_processed <- read_csv("/Volumes/SCHMIDTLAB/PSE/data/STEM-IE/STEM-IE-pre-survey.csv")
post_survey_data_partially_processed <- read_csv("/Volumes/SCHMIDTLAB/PSE/data/STEM-IE/STEM-IE-post-survey.csv")
video <- read_csv("/Volumes/SCHMIDTLAB/PSE/data/STEM-IE/STEM-IE-video.csv")
pqa <- read_csv("/Volumes/SCHMIDTLAB/PSE/data/STEM-IE/STEM-IE-pqa.csv")
attendance <- read_csv("/Volumes/SCHMIDTLAB/PSE/data/STEM-IE/STEM-IE-attendance.csv")
class_data <- read_csv("/Volumes/SCHMIDTLAB/PSE/data/STEM-IE/STEM-IE-class-video.csv")
demographics <- read_csv("/Volumes/SCHMIDTLAB/PSE/data/STEM-IE/STEM-IE-demographics.csv")
pm <- read_csv("/Volumes/SCHMIDTLAB/PSE/Data/STEM-IE/STEM-IE-program-match.csv")
act <- read_csv("/Volumes/SCHMIDTLAB/PSE/data/STEM-IE/STEM-IE-program-by-activity.csv")
loc <- read_csv("/Volumes/SCHMIDTLAB/PSE/data/STEM-IE/STEM-IE-program-by-location.csv")
attendance <- rename(attendance, participant_ID = ParticipantID)
attendance <- mutate(attendance, prop_attend = DaysAttended / DaysScheduled, 
                     participant_ID = as.integer(participant_ID))
attendance <- select(attendance, participant_ID, prop_attend)

demographics <- filter(demographics, participant_ID!= 7187)
demographics <- left_join(demographics, attendance)

esm$overall_engagement <- jmRtools::composite_mean_maker(esm, hard_working, concentrating, enjoy, interest)
df <- left_join(esm, pre_survey_data_processed, by = "participant_ID") # df & post-survey
df <- left_join(df, video, by = c("program_ID", "response_date", "sociedad_class", "signal_number")) # df & video
df <- left_join(df, demographics, by = c("participant_ID", "program_ID")) # df and demographics
pqa <- mutate(pqa, 
              active = active_part_1 + active_part_2,
              ho_thinking = ho_thinking_1 + ho_thinking_2 + ho_thinking_3,
              belonging = belonging_1 + belonging_2,
              agency = agency_1 + agency_2 + agency_3 + agency_4,
              youth_development_overall = active_part_1 + active_part_2 + ho_thinking_1 + ho_thinking_2 + ho_thinking_3 + belonging_1 + belonging_2 + agency_1 + agency_2 + agency_3 + agency_4,
              making_observations = stem_sb_8,
              data_modeling = stem_sb_2 + stem_sb_3 + stem_sb_9,
              interpreting_communicating = stem_sb_6,
              generating_data = stem_sb_4,
              asking_questions = stem_sb_1)

# pqa <- rename(pqa, sixth_math_sociedad = sixth_math)
# pqa <- rename(pqa, seventh_math_sociedad = seventh_math)
# pqa <- rename(pqa, eighth_math_sociedad = eighth_math)
# pqa <- rename(pqa, dance_sociedad = dance)
# pqa <- rename(pqa, robotics_sociedad = robotics)

pqa$sociedad_class <- ifelse(pqa$eighth_math == 1, "8th Math",
                             ifelse(pqa$seventh_math == 1, "7th Math",
                                    ifelse(pqa$sixth_math == 1, "6th Math",
                                           ifelse(pqa$robotics == 1, "Robotics",
                                                  ifelse(pqa$dance == 1, "Dance", NA)))))

pqa <- rename(pqa, 
              program_ID = SiteIDNumeric,
              response_date = resp_date,
              signal_number = signal)

pqa$program_ID <- as.character(pqa$program_ID)

df <- left_join(df, pqa, by = c("response_date", "program_ID", "signal_number", "sociedad_class"))
x <- lmer(challenge ~ (1|program_ID) + (1|participant_ID) + (1|beep_ID_new), data = df) %>% 
    broom::tidy() %>% 
    filter(term == "(Intercept)") %>% 
    select(estimate)

chall_df <- lmer(challenge ~ (1|program_ID) + (1|participant_ID) + (1|beep_ID_new), data = df) %>% 
    ranef() %>% 
    extract2("participant_ID") %>% 
    rownames_to_column(var = "participant_ID") %>% 
    rename(pred_challenge = `(Intercept)`) %>%
    mutate(pred_challenge = pred_challenge + x[1, 1]) %>% 
    tbl_df()

x <- lmer(learning ~ (1|program_ID) + (1|participant_ID) + (1|beep_ID_new), data = df) %>% 
    broom::tidy() %>% 
    filter(term == "(Intercept)") %>% 
    select(estimate)

learning_df <- lmer(learning ~ (1|program_ID) + (1|participant_ID) + (1|beep_ID_new), data = df) %>% 
    ranef() %>% 
    extract2("participant_ID") %>% 
    rownames_to_column(var = "participant_ID") %>% 
    rename(pred_learning = `(Intercept)`) %>% 
    mutate(pred_learning = pred_learning + x[1, 1]) %>% 
    tbl_df()

x <- lmer(positive_affect ~ (1|program_ID) + (1|participant_ID) + (1|beep_ID_new), data = df) %>% 
    broom::tidy() %>% 
    filter(term == "(Intercept)") %>% 
    select(estimate)

positive_affect_df <- lmer(positive_affect ~ (1|program_ID) + (1|participant_ID) + (1|beep_ID_new), data = df) %>% 
    ranef() %>% 
    extract2("participant_ID") %>% 
    rownames_to_column(var = "participant_ID") %>% 
    rename(pred_positive_affect = `(Intercept)`) %>% 
    mutate(pred_positive_affect = pred_positive_affect + x[1, 1]) %>% 
    tbl_df()

x <- lmer(relevance ~ (1|program_ID) + (1|participant_ID) + (1|beep_ID_new), data = df) %>% 
    broom::tidy() %>% 
    filter(term == "(Intercept)") %>% 
    select(estimate)

relevance_df <- lmer(relevance ~ (1|program_ID) + (1|participant_ID) + (1|beep_ID_new), data = df) %>% 
    ranef() %>% 
    extract2("participant_ID") %>% 
    rownames_to_column(var = "participant_ID") %>% 
    rename(pred_relevance = `(Intercept)`) %>% 
    mutate(pred_relevance = pred_relevance + x[1, 1]) %>% 
    tbl_df()

x <- lmer(interest ~ (1|program_ID) + (1|participant_ID) + (1|beep_ID_new), data = df) %>% 
    broom::tidy() %>% 
    filter(term == "(Intercept)") %>% 
    select(estimate)

interest_df <- lmer(relevance ~ (1|program_ID) + (1|participant_ID) + (1|beep_ID_new), data = df) %>% 
    ranef() %>% 
    extract2("participant_ID") %>% 
    rownames_to_column(var = "participant_ID") %>% 
    rename(pred_interest = `(Intercept)`) %>% 
    mutate(pred_interest = pred_interest + x[1, 1]) %>% 
    tbl_df()

x <- lmer(overall_engagement ~ (1|program_ID) + (1|participant_ID) + (1|beep_ID_new), data = df) %>% 
    broom::tidy() %>% 
    filter(term == "(Intercept)") %>% 
    select(estimate)

engagement_df <- lmer(overall_engagement ~ (1|program_ID) + (1|participant_ID) + (1|beep_ID_new), data = df) %>% 
    ranef() %>% 
    extract2("participant_ID") %>% 
    rownames_to_column(var = "participant_ID") %>% 
    rename(pred_overall_engagement = `(Intercept)`) %>% 
    mutate(pred_overall_engagement = pred_overall_engagement + x[1, 1]) %>% 
    tbl_df()
pred_var_df <- chall_df %>% 
    left_join(learning_df, by = "participant_ID") %>% 
    left_join(positive_affect_df, by = "participant_ID") %>% 
    left_join(relevance_df, by = "participant_ID") %>% 
    left_join(interest_df, by = "participant_ID") %>% 
    left_join(engagement_df, by = "participant_ID")

pre_survey_data_processed$participant_ID <- as.character(pre_survey_data_processed$participant_ID)
post_survey_data_partially_processed$participant_ID <- as.character(post_survey_data_partially_processed$participant_ID)
demographics$participant_ID <- as.character(demographics$participant_ID)

fix_missing <- function(x) {
    x[is.na(x)] <- 0
    x
}

names(act)[2:7] <- paste0("ACT_", names(act)[2:7])
act <- mutate_if(act, is.double, fix_missing)
names(loc)[2:7] <- paste0("LOC_", names(loc)[2:7])
loc <- mutate_if(loc, is.double, fix_missing)

mod_df <- left_join(pred_var_df, pre_survey_data_processed, by = "participant_ID") %>% 
    left_join(post_survey_data_partially_processed, by = "participant_ID") %>% 
    left_join(demographics, by = "participant_ID") %>% 
    left_join(pm, by = "program_ID")

mod_df <- mod_df %>% 
    left_join(act) %>%
    left_join(loc) %>% 
    rename(lab = `ACT_Lab Activity`,
           create = `ACT_Creating Product`,
           not_focused = `ACT_Not Focused`,
           basic = `ACT_Basic Skills Activity`,
           psl = `ACT_Program Staff Led`,
           fts = `ACT_Field Trip Speaker`,
           class_space = `LOC_Classroom Space`,
           in_comm = `LOC_Outdoors - Community`) %>% 
    mutate(doing = lab + create)

mod_df$post_future_goals_plans <- jmRtools::composite_mean_maker(mod_df,
                                                                 post_future_job_become_STEM,
                                                                 post_future_job_use_science_math,
                                                                 post_future_job_work_science_computer)

mod_df$pre_future_goals_plans <- jmRtools::composite_mean_maker(mod_df,
                                                                pre_future_job_become_STEM,
                                                                pre_future_job_use_science_math,
                                                                pre_future_job_work_science_computer)
mod_df <- mutate(mod_df,
                 program_name = ifelse(program_name == "Providence -  RWP Zoo", "Providence - RWP Zoo", program_name),
                 overall_program_rating = case_when(
                     program_name == "Boston - Dorchester House" ~ "inconsistent-quality",
                     program_name == "Boston - Mathpower" ~ "inconsistent-quality",
                     program_name == "Boston - Sociedad Latina" ~ "inconsistent-quality",
                     program_name == "Boston - Thompson Island" ~ "consistent-quality",
                     program_name == "Providence - RWP Zoo" ~ "inconsistent-quality",
                     program_name == "Providence - Biomes" ~ "consistent-quality",
                     program_name == "Providence - Crazy Machines" ~ "consistent-quality",
                     program_name == "Providence - Down City Design" ~ "inconsistent-quality",
                     program_name == "Providence - Explore the Bay" ~ "consistent-quality",
                     TRUE ~ "missing"
                 ),
                 overall_program_continuous_rating = case_when(
                     program_name == "Boston - Dorchester House" ~ 151.2782,
                     program_name == "Boston - Mathpower" ~ 112.4517,
                     program_name == "Boston - Sociedad Latina" ~ 133.9675,
                     program_name == "Boston - Thompson Island" ~ 94.2545,
                     program_name == "Providence - RWP Zoo" ~ 136.8659,
                     program_name == "Providence - Biomes" ~ 92.6211,
                     program_name == "Providence - Crazy Machines" ~ 99.3313,
                     program_name == "Providence - Down City Design" ~ 117.7022,
                     program_name == "Providence - Explore the Bay" ~ 88.5104,
                     TRUE ~ -99
                 )
)

mod_df$overall_program_rating <- fct_relevel(mod_df$overall_program_rating,
                                             "inconsistent-quality")

Correlations

mod_df %>% 
    select(overall_pre_interest, overall_post_interest, pre_future_goals_plans, post_future_goals_plans, pred_challenge, pred_relevance, overall_program_continuous_rating) %>% 
    correlate() %>% 
    shave() %>% 
    fashion()
##                             rowname overall_pre_interest
## 1              overall_pre_interest                     
## 2             overall_post_interest                  .59
## 3            pre_future_goals_plans                  .39
## 4           post_future_goals_plans                  .20
## 5                    pred_challenge                  .00
## 6                    pred_relevance                  .11
## 7 overall_program_continuous_rating                 -.12
##   overall_post_interest pre_future_goals_plans post_future_goals_plans
## 1                                                                     
## 2                                                                     
## 3                   .33                                               
## 4                   .47                    .47                        
## 5                  -.06                    .05                     .08
## 6                   .17                    .28                     .30
## 7                  -.13                   -.16                    -.00
##   pred_challenge pred_relevance overall_program_continuous_rating
## 1                                                                
## 2                                                                
## 3                                                                
## 4                                                                
## 5                                                                
## 6            .58                                                 
## 7            .04            .10

1. Changes in engagement

1A: Basic model

m <- lmer(overall_post_interest ~ 
              
              pred_overall_engagement +
              
              overall_pre_interest + 
              
              gender + 

              (1|program_ID), 
          data = mod_df)

sjPlot::sjt.lmer(m, p.kr = T, show.re.var = F, show.ci = F, show.se = T)
    overall_post_interest
    B std. Error p
Fixed Parts
(Intercept)   0.10 0.37 .784
pred_overall_engagement   0.46 0.11 <.001
overall_pre_interest   0.53 0.07 <.001
gender (M)   0.11 0.12 .345
Random Parts
Nprogram_ID   9
ICCprogram_ID   0.051
Observations   141
R2 / Ω02   .465 / .465

With overall program rating added

m <- lmer(overall_post_interest ~ 
              
              pred_overall_engagement + 
              
              overall_pre_interest + 
              
              gender + 
              
              overall_program_rating + 

              (1|program_ID), 
          data = mod_df)

sjPlot::sjt.lmer(m, p.kr = T, show.re.var = F, show.ci = F, show.se = T)
    overall_post_interest
    B std. Error p
Fixed Parts
(Intercept)   -0.03 0.36 .925
pred_overall_engagement   0.45 0.11 <.001
overall_pre_interest   0.54 0.07 <.001
gender (M)   0.12 0.12 .299
overall_program_rating (consistent-quality)   0.26 0.13 .045
Random Parts
Nprogram_ID   9
ICCprogram_ID   0.010
Observations   141
R2 / Ω02   .454 / .454

With overall program rating (continuous) added

m <- lmer(overall_post_interest ~ 
              
              pred_overall_engagement + 
               
              overall_pre_interest + 
               
              gender + 
               
              scale(overall_program_continuous_rating) + 

              (1|program_ID), 
          data = mod_df)

sjPlot::sjt.lmer(m, p.kr = T, show.re.var = F, show.ci = F, show.se = T)
    overall_post_interest
    B std. Error p
Fixed Parts
(Intercept)   0.11 0.37 .758
pred_overall_engagement   0.46 0.11 <.001
overall_pre_interest   0.53 0.07 <.001
gender (M)   0.12 0.12 .319
scale(overall_program_continuous_rating)   -0.06 0.08 .443
Random Parts
Nprogram_ID   9
ICCprogram_ID   0.054
Observations   141
R2 / Ω02   .467 / .466

2. Changes in engagement

2A: Basic model

m <- lmer(post_future_goals_plans ~ 
              
              pred_overall_engagement +
              
              pre_future_goals_plans +
              
              gender + 

              (1|program_ID), 
          data = mod_df)

sjPlot::sjt.lmer(m, p.kr = T, show.re.var = F, show.ci = F, show.se = T)
    post_future_goals_plans
    B std. Error p
Fixed Parts
(Intercept)   0.72 0.39 .064
pred_overall_engagement   0.30 0.13 .026
pre_future_goals_plans   0.38 0.09 <.001
gender (M)   0.39 0.14 .007
Random Parts
Nprogram_ID   9
ICCprogram_ID   0.000
Observations   133
R2 / Ω02   .293 / .293

2B: With overall program rating added

m <- lmer(post_future_goals_plans ~ 
              
              pred_overall_engagement + 
              
              pre_future_goals_plans +
              
              gender + 
              
              overall_program_rating + 

              (1|program_ID), 
          data = mod_df)

sjPlot::sjt.lmer(m, p.kr = T, show.re.var = F, show.ci = F, show.se = T)
    post_future_goals_plans
    B std. Error p
Fixed Parts
(Intercept)   0.73 0.39 .063
pred_overall_engagement   0.29 0.13 .027
pre_future_goals_plans   0.39 0.09 <.001
gender (M)   0.39 0.14 .007
overall_program_rating (consistent-quality)   -0.04 0.14 .786
Random Parts
Nprogram_ID   9
ICCprogram_ID   0.000
Observations   133
R2 / Ω02   .293 / .293

2C: With overall program rating (continuous) added

m <- lmer(post_future_goals_plans ~ 
              
              pred_overall_engagement + 
               
              pre_future_goals_plans +
               
              gender + 
               
              scale(overall_program_continuous_rating) + 

              (1|program_ID), 
          data = mod_df)

sjPlot::sjt.lmer(m, p.kr = T, show.re.var = F, show.ci = F, show.se = T)
    post_future_goals_plans
    B std. Error p
Fixed Parts
(Intercept)   0.72 0.39 .065
pred_overall_engagement   0.30 0.13 .026
pre_future_goals_plans   0.38 0.09 <.001
gender (M)   0.39 0.14 .007
scale(overall_program_continuous_rating)   -0.00 0.07 .949
Random Parts
Nprogram_ID   9
ICCprogram_ID   0.000
Observations   133
R2 / Ω02   .293 / .293