Workspace doc

https://docs.google.com/document/d/1g52pl-0JyEO26bFEJ9aE295dL7oZSOU1wrVXvVbd2lg/edit

Next steps

look at measurement of interest and value
add dummy for intervention

Overall takeaways

UV and interest seem to need to be in separate models - along with perceived competence
Task value (UV and interest and perceived competence seem to be needing to be in separate models)
Using just the control data, it seems important to run the models separate

Items

In general…

I think this course is an interesting subject. (Int)
What I am learning in this class is relevant to my life. (UV)
I consider this topic to be one of my best subjects. (PC)
I am not interested in this course. (Int - Rev)
I think I will like learning about this topic. (Int)
I think what we are studying in this course is useful for me to know. (UV)
I don’t feel comfortable when it comes to answering questions in this area. (PC - Rev)
I think this subject is interesting. (Int)
I find the content of this course to be personally meaningful. (UV)
I’ve always wanted to learn more about this subject. (Int)

Int: 1, 4, 5, 8, 10 UV: 2, 6, 9 PC: 3, 7

1. Pre-processing (for semesters 1 and 2)

# install.packages("devtools")
# devtools::install_github("jrosen48/jmRtools")
Sys.setenv(TZ='America/Detroit')

library(jmRtools)
library(readxl)
library(tidyverse)
library(lubridate)

RR_Course_Data <- read_csv("RR_Course_Data.csv")

CS1 <- read_csv("data/CS1.csv") # this is the pre-survey for the Fall, 2015 and Spring, 2016 semesters

CS1_ss <- dplyr::filter(CS1, 
                        !is.na(Q1MaincellgroupRow1),
                        opdata_username != "_49147_1",
                        opdata_username != "_93993_1",
                        opdata_username != "@X@user.pk_string@X@",
                        opdata_username != "_80624_1",
                        opdata_CourseID != "@X@course.course_id@X@",
                        opdata_username != "") # must revisit

ps12 <- dplyr::arrange(CS1_ss, opdata_username, opdata_CourseID, StartDate)

ps12$Q1MaincellgroupRow4_rc <- car::recode(ps12$Q1MaincellgroupRow4, "1=5; 2=4; 5=1; 4=2")
ps12$Q1MaincellgroupRow7_rc <- car::recode(ps12$Q1MaincellgroupRow7, "1=5; 2=4; 5=1; 4=2")

# Int: 1, 4, 5, 8, 10
# UV: 2, 6, 9
# PC: 3, 7

ps12 <- ps12 %>% 
    mutate(q1 = Q1MaincellgroupRow1,
           q2 = Q1MaincellgroupRow2,
           q3 = Q1MaincellgroupRow3,
           q4 = Q1MaincellgroupRow4_rc, 
           q5 = Q1MaincellgroupRow5,
           q6 = Q1MaincellgroupRow6,
           q7 = Q1MaincellgroupRow7_rc,
           q8 = Q1MaincellgroupRow8,
           q9 = Q1MaincellgroupRow9,
           q10 = Q1MaincellgroupRow10)

ps12$int <- (ps12$Q1MaincellgroupRow1 + ps12$Q1MaincellgroupRow4_rc + ps12$Q1MaincellgroupRow8 + ps12$Q1MaincellgroupRow10+ ps12$Q1MaincellgroupRow5) / 5
ps12$uv <- (ps12$Q1MaincellgroupRow2 + ps12$Q1MaincellgroupRow6+ ps12$Q1MaincellgroupRow9) / 3 
ps12$percomp <- (ps12$Q1MaincellgroupRow3 + ps12$Q1MaincellgroupRow7_rc) / 2
ps12$tv <- (ps12$Q1MaincellgroupRow1 + ps12$Q1MaincellgroupRow8 + ps12$Q1MaincellgroupRow10+ ps12$Q1MaincellgroupRow5 + ps12$Q1MaincellgroupRow2 + ps12$Q1MaincellgroupRow6+ ps12$Q1MaincellgroupRow9) / 7

x <- str_split(ps12$opdata_CourseID, "-")

ps12_f <- mutate(ps12,
                 subject = map_chr(x, ~ .[1]),
                 semester = map_chr(x, ~ .[2]),
                 section = map_chr(x, ~ .[3]))

ps12_f <- select(ps12_f,
                 student_ID = opdata_username,
                 course_ID = opdata_CourseID,
                 subject, semester, section,
                 int, uv, percomp, tv, 
                 q1:q10)

ps12_f <- mutate(ps12_f, student_ID = str_sub(student_ID, start = 2L, end = -3L))
ps12_f <- arrange(ps12_f, student_ID)

CS2 <- read_csv("data/CS2.csv") # this is the pre-survey for the Fall, 2015 and Spring, 2016 semesters

CS2$Q1MaincellgroupRow4_rc <- car::recode(CS2$Q1MaincellgroupRow4, "1=5; 2=4; 5=1; 4=2")
CS2$Q1MaincellgroupRow7_rc <- car::recode(CS2$Q1MaincellgroupRow7, "1=5; 2=4; 5=1; 4=2")

CS2$post_int <- (CS2$Q1MaincellgroupRow1 + CS2$Q1MaincellgroupRow8 + CS2$Q1MaincellgroupRow10+ CS2$Q1MaincellgroupRow5) / 4
CS2$post_uv <- (CS2$Q1MaincellgroupRow2 + CS2$Q1MaincellgroupRow6+ CS2$Q1MaincellgroupRow9) / 3 # dropped 7 (is this supposed to be dropped 4?)

CS2$post_tv <- (CS2$Q1MaincellgroupRow1 + CS2$Q1MaincellgroupRow8 + CS2$Q1MaincellgroupRow10+ CS2$Q1MaincellgroupRow5 + CS2$Q1MaincellgroupRow2 + CS2$Q1MaincellgroupRow6+ CS2$Q1MaincellgroupRow9) / 7

CS2$post_percomp <- (CS2$Q1MaincellgroupRow3 + CS2$Q1MaincellgroupRow7_rc) / 2
CS2$date <- lubridate::ymd_hm(CS2$CompletedDate, tz = "America/Detroit")
CS2 <- arrange(CS2, date)

CS2 <- CS2 %>% 
    mutate(student_ID = str_sub(opdata_username, start = 2L, end = -3L)) %>% 
    select(student_ID, contains("post"), date)

CS2 <- CS2[complete.cases(CS2), ]

CS2 <- filter(CS2, 
              student_ID != "49147",
              student_ID != "93993",
              student_ID != "80624",
              student_ID != "@X@user.pk_string@X@",
              student_ID != "@X@course.course_id@X@",
              student_ID != "")

CS2 <- distinct(CS2, student_ID, .keep_all = T)
CS2 <- select(CS2, -date)
CS2 <- arrange(CS2, student_ID)

ps12_f <- left_join(ps12_f, CS2, by = "student_ID")

2. Pre-processing (for semester 3)

ps3 <- read_excel("~/Dropbox/1_Research/utility_value_intervention_online_science/CS_1_7_13_17.xls")

ps3$Q1MaincellgroupRow31_rc <- car::recode(ps3$Q1MaincellgroupRow31, "1=5; 2=4; 5=1; 4=2")
ps3$Q1MaincellgroupRow61_rc <- car::recode(ps3$Q1MaincellgroupRow61, "1=5; 2=4; 5=1; 4=2")

# ps3$int <- (ps3$Q1MaincellgroupRow01 + ps3$Q1MaincellgroupRow71 + ps3$Q1MaincellgroupRow91+ ps3$Q1MaincellgroupRow41) / 4
# ps3$uv <- (ps3$Q1MaincellgroupRow11 + ps3$Q1MaincellgroupRow51+ ps3$Q1MaincellgroupRow81) / 3 # dropped 7
# ps3$percomp <- (ps3$Q1MaincellgroupRow21 + ps3$Q1MaincellgroupRow61_rc) / 2

# Int: 1, 4, 5, 8, 10
# UV: 2, 6, 9
# PC: 3, 7

ps3 <- ps3 %>% 
    mutate(int = composite_mean_maker(ps3, Q1MaincellgroupRow01, Q1MaincellgroupRow31_rc, Q1MaincellgroupRow41, Q1MaincellgroupRow71, Q1MaincellgroupRow91),
           uv = composite_mean_maker(ps3, Q1MaincellgroupRow11, Q1MaincellgroupRow51, Q1MaincellgroupRow81),
           percomp = composite_mean_maker(ps3, Q1MaincellgroupRow21, Q1MaincellgroupRow61_rc),
           tv = composite_mean_maker(ps3, Q1MaincellgroupRow01, Q1MaincellgroupRow31_rc, Q1MaincellgroupRow41, Q1MaincellgroupRow71, Q1MaincellgroupRow91,Q1MaincellgroupRow11, Q1MaincellgroupRow51, Q1MaincellgroupRow81),
           q1 = Q1MaincellgroupRow01,
           q2 = Q1MaincellgroupRow11,
           q3 = Q1MaincellgroupRow21,
           q4 = Q1MaincellgroupRow31_rc, 
           q5 = Q1MaincellgroupRow41,
           q6 = Q1MaincellgroupRow51,
           q7 = Q1MaincellgroupRow61_rc,
           q8 = Q1MaincellgroupRow71,
           q9 = Q1MaincellgroupRow81,
           q10 = Q1MaincellgroupRow91
    ) %>% 
    filter(opdata_CourseID != "@X@course.course_id@X@") %>% 
    separate(opdata_CourseID, c("subject", "semester", "section"), sep = "-", remove = F) 

ps3_f <- select(ps3,
                student_ID = opdata_username,
                course_ID = opdata_CourseID,
                subject, semester, section,
                int, uv, percomp, tv,
                q1:q10)

df2 <- read_excel("~/Dropbox/1_Research/utility_value_intervention_online_science/CS_2_7_13_17.xls")
df2$post_int <- (df2$Q2MaincellgroupRow01 + df2$Q2MaincellgroupRow71 + df2$Q2MaincellgroupRow91 + df2$Q2MaincellgroupRow41) / 4
df2$post_uv <- (df2$Q2MaincellgroupRow11 + df2$Q2MaincellgroupRow51+ df2$Q2MaincellgroupRow81) / 3 # dropped 7
df2$post_percomp <- (df2$Q2MaincellgroupRow21)
df2$post_tv <- (df2$Q2MaincellgroupRow01 + df2$Q2MaincellgroupRow71 + df2$Q2MaincellgroupRow91 + df2$Q2MaincellgroupRow41 + df2$Q2MaincellgroupRow11 + df2$Q2MaincellgroupRow51+ df2$Q2MaincellgroupRow81) / 7 

df2 <- mutate(df2, date = lubridate::mdy_hm(CompletedDate, tz = "America/Detroit"))

df2 <- arrange(df2, date)

df2 <- select(df2, student_ID = opdata_username, contains("post"), date)
df2 <- distinct(df2)
df2 <- select(df2, -date)

pd3_f <- left_join(ps3_f, df2, by = "student_ID")
ps3_f <- mutate(ps3_f,
                student_ID = str_sub(student_ID, start = 2, end = -3))

3. Merging and processing merged data

ps12s <- dplyr::select(ps12_f, student_ID, course_ID, subject, semester, section, int, uv, percomp, tv, q1:q10)
ps3s <- dplyr::select(ps3_f, course_ID, subject, semester, section, int, uv, percomp, tv, q1:q10)

x <- bind_rows(ps12s, ps3s)
x <- as_tibble(x)

d <- bind_rows(ps12_f, ps3_f)

# treatment vs. control for sems 1 and 2
# https://docs.google.com/document/d/1g52pl-0JyEO26bFEJ9aE295dL7oZSOU1wrVXvVbd2lg/edit

d <- mutate(d,
            intervention_dummy = case_when(
                # Fall 15
                course_ID == "AnPhA-S116-01" ~ 1,
                course_ID == "AnPhA-S116-02" ~ 0,
                course_ID == "BioA-S116-01" ~ 1,
                course_ID == "BioA-T116-01" ~ 0,
                course_ID == "FrScA-S116-01" ~ 1,
                course_ID == "FrScA-S116-02" ~ 0,
                course_ID == "FrScA-S116-03" ~ 1,
                course_ID == "FrScA-S116-04" ~ 0,
                course_ID == "FrScA-T116-01" ~ 0,
                course_ID == "OcnA-S116-01" ~ 1,
                course_ID == "OcnA-S116-01" ~ 0,
                course_ID == "OcnA-S116-03" ~ 1,
                course_ID == "OcnA-T116-01" ~ 0,
                course_ID == "PhysA-S116-01" ~ 1,
                course_ID == "PhysA-T116-01" ~ 0,
                
                # Spring 16
                course_ID == "AnPhA-S216-01" ~ 0,
                course_ID == "AnPhA-S216-02" ~ 1,
                course_ID == "BioA-S216-01" ~ 0,
                course_ID == "FrScA-S216-01" ~ 0,
                course_ID == "FrScA-S216-02" ~ 1,
                course_ID == "FrScA-S216-03" ~ 0,
                course_ID == "FrScA-S216-04" ~ 1,
                course_ID == "OcnA-S216-01" ~ 0,
                course_ID == "OcnA-S216-02" ~ 1,
                course_ID == "PhysA-S216-01" ~ 0,
                
                # Spring 17
                course_ID == "AnPhA-S217-01" ~ 1,
                course_ID == "AnPhA-S217-01" ~ 0,
                course_ID == "Bio-S217-01" ~ 1,
                course_ID == "FrScA-S217-01" ~ 1,
                course_ID == "FrScA-S217-02" ~ 0,
                course_ID == "FrScA-S217-02." ~ 0,
                course_ID == "FrScA-S217-03" ~ 1,
                course_ID == "OcnA-S217-01" ~ 0,
                course_ID == "OcnA-S217-02" ~ 1,
                course_ID == "OcnA-S217-03" ~ 1,
                course_ID == "PhysA-S217-01" ~ 0,
                TRUE ~ 0
            ))

d <- rename(d, pre_int = int, pre_uv = uv, pre_percomp = percomp, pre_tv = tv)

3.5 CFA

BIC is lower for m1, with three factors - tested with chi-square test

library(lavaan)

# Int: 1, 4, 5, 8, 10
# UV: 2, 6, 9
# PC: 3, 7

m1 <- '
    uv =~ q1 + q4 + q5 + q8 + q10
    int =~ q2 + q6 + q9
    pc =~ q3 + q7
'

out1 <- sem(m1, data = d)
summary(out1, fit.measures = T)

## lavaan (0.5-23.1097) converged normally after  40 iterations
## 
##                                                   Used       Total
##   Number of observations                           774         809
## 
##   Estimator                                         ML
##   Minimum Function Test Statistic              170.044
##   Degrees of freedom                                32
##   P-value (Chi-square)                           0.000
## 
## Model test baseline model:
## 
##   Minimum Function Test Statistic             3701.760
##   Degrees of freedom                                45
##   P-value                                        0.000
## 
## User model versus baseline model:
## 
##   Comparative Fit Index (CFI)                    0.962
##   Tucker-Lewis Index (TLI)                       0.947
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)              -7425.925
##   Loglikelihood unrestricted model (H1)      -7340.903
## 
##   Number of free parameters                         23
##   Akaike (AIC)                               14897.850
##   Bayesian (BIC)                             15004.836
##   Sample-size adjusted Bayesian (BIC)        14931.800
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.075
##   90 Percent Confidence Interval          0.064  0.086
##   P-value RMSEA <= 0.05                          0.000
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.037
## 
## Parameter Estimates:
## 
##   Information                                 Expected
##   Standard Errors                             Standard
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   uv =~                                               
##     q1                1.000                           
##     q4                0.904    0.049   18.561    0.000
##     q5                0.977    0.039   25.032    0.000
##     q8                1.036    0.038   27.460    0.000
##     q10               1.230    0.052   23.799    0.000
##   int =~                                              
##     q2                1.000                           
##     q6                0.839    0.041   20.327    0.000
##     q9                0.956    0.048   19.945    0.000
##   pc =~                                               
##     q3                1.000                           
##     q7                0.671    0.064   10.505    0.000
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   uv ~~                                               
##     int               0.256    0.021   12.000    0.000
##     pc                0.260    0.021   12.506    0.000
##   int ~~                                              
##     pc                0.335    0.029   11.573    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .q1                0.143    0.009   15.299    0.000
##    .q4                0.359    0.020   18.277    0.000
##    .q5                0.159    0.010   15.931    0.000
##    .q8                0.115    0.008   13.814    0.000
##    .q10               0.307    0.018   16.609    0.000
##    .q2                0.338    0.025   13.289    0.000
##    .q6                0.253    0.019   13.658    0.000
##    .q9                0.369    0.026   14.344    0.000
##    .q3                0.418    0.037   11.309    0.000
##    .q7                0.580    0.032   17.857    0.000
##     uv                0.295    0.022   13.492    0.000
##     int               0.546    0.046   11.983    0.000
##     pc                0.319    0.043    7.384    0.000

m2 <- '
    tv =~ q1 + q4 + q5 + q8 + q10 + q2 + q6 + q9
    pc =~ q3 + q7
'

out2 <- sem(m2, data = d)
summary(out2, fit.measures = T)

## lavaan (0.5-23.1097) converged normally after  32 iterations
## 
##                                                   Used       Total
##   Number of observations                           774         809
## 
##   Estimator                                         ML
##   Minimum Function Test Statistic              598.387
##   Degrees of freedom                                34
##   P-value (Chi-square)                           0.000
## 
## Model test baseline model:
## 
##   Minimum Function Test Statistic             3701.760
##   Degrees of freedom                                45
##   P-value                                        0.000
## 
## User model versus baseline model:
## 
##   Comparative Fit Index (CFI)                    0.846
##   Tucker-Lewis Index (TLI)                       0.796
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)              -7640.096
##   Loglikelihood unrestricted model (H1)      -7340.903
## 
##   Number of free parameters                         21
##   Akaike (AIC)                               15322.193
##   Bayesian (BIC)                             15419.876
##   Sample-size adjusted Bayesian (BIC)        15353.191
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.146
##   90 Percent Confidence Interval          0.136  0.157
##   P-value RMSEA <= 0.05                          0.000
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.078
## 
## Parameter Estimates:
## 
##   Information                                 Expected
##   Standard Errors                             Standard
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   tv =~                                               
##     q1                1.000                           
##     q4                0.910    0.050   18.161    0.000
##     q5                0.977    0.041   24.076    0.000
##     q8                1.016    0.040   25.717    0.000
##     q10               1.258    0.053   23.599    0.000
##     q2                0.929    0.063   14.868    0.000
##     q6                0.897    0.052   17.222    0.000
##     q9                1.052    0.061   17.320    0.000
##   pc =~                                               
##     q3                1.000                           
##     q7                0.688    0.066   10.404    0.000
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   tv ~~                                               
##     pc                0.269    0.021   12.828    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .q1                0.153    0.010   15.938    0.000
##    .q4                0.364    0.020   18.387    0.000
##    .q5                0.168    0.010   16.437    0.000
##    .q8                0.137    0.009   15.348    0.000
##    .q10               0.302    0.018   16.684    0.000
##    .q2                0.638    0.034   18.909    0.000
##    .q6                0.408    0.022   18.560    0.000
##    .q9                0.554    0.030   18.543    0.000
##    .q3                0.426    0.037   11.489    0.000
##    .q7                0.576    0.033   17.696    0.000
##     tv                0.285    0.022   13.184    0.000
##     pc                0.312    0.043    7.259    0.000

anova(out1, out2)

## Chi Square Difference Test
## 
##      Df   AIC   BIC  Chisq Chisq diff Df diff Pr(>Chisq)    
## out1 32 14898 15005 170.04                                  
## out2 34 15322 15420 598.39     428.34       2  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

4. Processing all gradebook data

x <- read_csv("RR_S3.csv")
x <- select(x, course_ID = Course_ID, student_ID = CU_Pk1, Item_Position:last_access_date)
x <- rename(x, Grade_Category = Grade_Catagory)

RR_Course_Data <- select(RR_Course_Data, course_ID = CourseSectionOrigID, student_ID = Bb_UserPK, Gradebook_Item:last_access_date)

xx <- bind_rows(RR_Course_Data, x)
# write_csv(RR_Course_Data, "s12_gradebook_data.csv")

5. Merging self-report and gradebook data (not run yet)

d$student_ID <- as.character(d$student_ID)
xx$student_ID <- as.character(xx$student_ID)
df <- left_join(d, xx, by = "student_ID")

Processing trace data

library(readxl)
ts_12 <- read_csv("RR_Minutes.csv")

td_12 <- read_csv("RR_Course_Data.csv")
td_3 <- read_excel("Ranelluci Study Data Pull Request.xlsx")

td_12 <- td_12 %>% 
    select(student_ID = Bb_UserPK, course_ID = CourseSectionOrigID,
           gender = Gender, enrollment_reason = EnrollmentReason, 
           enrollment_status = EnrollmentStatus,
           final_grade = FinalGradeCEMS) %>% 
    distinct()

ts_12 <- ts_12 %>% 
    select(student_ID = Bb_UserPK,
           course_ID = CourseSectionOrigID,
           time_spent = TimeSpent)

td_12 <- left_join(td_12, ts_12)

td_3 <- td_3 %>% 
    select(student_ID = CEMS_Bb_UserPK, 
           course_ID = Section_ID,
           gender = Gender,
           enrollment_reason = EnrollmentReason,
           enrollment_status = EnrollmentStatus,
           final_grade = Final_Grade,
           time_spent = `Sum of time spent in course`) %>% 
    mutate(final_grade = as.numeric(final_grade))

trace_data <- bind_rows(td_12, td_3)

Merging trace data with other data

d$student_ID <- as.integer(d$student_ID)
d <- left_join(d, trace_data)

6. Pre-post analysis

Using multi-level models by course.

60. Looking first at n’s

d %>% 
    count(intervention_dummy)

## # A tibble: 2 x 2
##   intervention_dummy     n
##                <dbl> <int>
## 1                  0   380
## 2                  1   429

d[complete.cases(d), ] %>% 
    count(intervention_dummy)

## # A tibble: 2 x 2
##   intervention_dummy     n
##                <dbl> <int>
## 1                  0     8
## 2                  1   107

6A. Just looking at pre-post changes in interest and UV and grades

sjPlot::sjt.lmer(lme4::lmer(post_int ~ intervention_dummy + (1 | course_ID), data = d))

	post_int
	B	CI	p
Fixed Parts
(Intercept)	4.51	3.71 – 5.31	<.001
intervention_dummy	-0.85	-1.80 – 0.11	.096
Random Parts
σ²	0.705
τ_{00, course_ID}	0.370
N_{course_ID}	12
ICC_{course_ID}	0.344
Observations	121
R² / Ω₀²	.262 / .252

sjPlot::sjt.lmer(lme4::lmer(post_uv ~ intervention_dummy + (1 | course_ID), data = d))

	post_uv
	B	CI	p
Fixed Parts
(Intercept)	3.86	3.03 – 4.70	<.001
intervention_dummy	-0.72	-1.70 – 0.27	.166
Random Parts
σ²	0.853
τ_{00, course_ID}	0.354
N_{course_ID}	12
ICC_{course_ID}	0.293
Observations	121
R² / Ω₀²	.205 / .187

sjPlot::sjt.lmer(lme4::lmer(final_grade ~ intervention_dummy + (1 | course_ID), data = d))

	final_grade
	B	CI	p
Fixed Parts
(Intercept)	76.32	73.16 – 79.49	<.001
intervention_dummy	1.57	-2.78 – 5.91	.486
Random Parts
σ²	429.987
τ_{00, course_ID}	15.339
N_{course_ID}	36
ICC_{course_ID}	0.034
Observations	776
R² / Ω₀²	.060 / .051

6B. With pre-values added (pre per-comp for final grades)

sjPlot::sjt.lmer(lme4::lmer(post_int ~ pre_int + intervention_dummy + (1 | course_ID), data = d))

	post_int
	B	CI	p
Fixed Parts
(Intercept)	1.25	0.09 – 2.42	.038
pre_int	0.72	0.50 – 0.95	<.001
intervention_dummy	-0.45	-1.10 – 0.20	.177
Random Parts
σ²	0.556
τ_{00, course_ID}	0.078
N_{course_ID}	12
ICC_{course_ID}	0.124
Observations	119
R² / Ω₀²	.400 / .399

sjPlot::sjt.lmer(lme4::lmer(post_uv ~ pre_uv + intervention_dummy + (1 | course_ID), data = d))

	post_uv
	B	CI	p
Fixed Parts
(Intercept)	0.75	-0.20 – 1.71	.126
pre_uv	0.80	0.61 – 0.98	<.001
intervention_dummy	-0.22	-0.91 – 0.46	.525
Random Parts
σ²	0.556
τ_{00, course_ID}	0.115
N_{course_ID}	12
ICC_{course_ID}	0.172
Observations	118
R² / Ω₀²	.457 / .456

sjPlot::sjt.lmer(lme4::lmer(final_grade ~ pre_percomp + intervention_dummy + (1 | course_ID), data = d))

	final_grade
	B	CI	p
Fixed Parts
(Intercept)	66.67	58.03 – 75.31	<.001
pre_percomp	2.68	0.49 – 4.87	.017
intervention_dummy	1.28	-3.08 – 5.64	.565
Random Parts
σ²	431.258
τ_{00, course_ID}	14.988
N_{course_ID}	36
ICC_{course_ID}	0.034
Observations	759
R² / Ω₀²	.064 / .057

6C. With pre-values added + interactions with pre perceived competence

sjPlot::sjt.lmer(lme4::lmer(post_int ~ pre_int + intervention_dummy*pre_percomp + (1 | course_ID), data = d))

	post_int
	B	CI	p
Fixed Parts
(Intercept)	1.22	-2.45 – 4.89	.518
pre_int	0.71	0.43 – 1.00	<.001
intervention_dummy	-0.43	-4.21 – 3.36	.826
pre_percomp	0.02	-0.90 – 0.94	.965
intervention_dummy:pre_percomp	-0.00	-0.92 – 0.91	.992
Random Parts
σ²	0.564
τ_{00, course_ID}	0.086
N_{course_ID}	12
ICC_{course_ID}	0.133
Observations	119
R² / Ω₀²	.402 / .400

sjPlot::sjt.lmer(lme4::lmer(post_uv ~ pre_uv + intervention_dummy*pre_percomp + (1 | course_ID), data = d))

	post_uv
	B	CI	p
Fixed Parts
(Intercept)	0.72	-3.12 – 4.56	.715
pre_uv	0.77	0.56 – 0.99	<.001
intervention_dummy	-0.32	-4.23 – 3.59	.872
pre_percomp	0.03	-0.89 – 0.95	.948
intervention_dummy:pre_percomp	0.03	-0.92 – 0.98	.953
Random Parts
σ²	0.564
τ_{00, course_ID}	0.125
N_{course_ID}	12
ICC_{course_ID}	0.181
Observations	118
R² / Ω₀²	.459 / .458

sjPlot::sjt.lmer(lme4::lmer(final_grade ~ pre_uv + intervention_dummy*pre_percomp + (1 | course_ID), data = d))

	final_grade
	B	CI	p
Fixed Parts
(Intercept)	64.75	51.94 – 77.55	<.001
pre_uv	-0.43	-2.85 – 1.98	.725
intervention_dummy	7.72	-8.98 – 24.42	.365
pre_percomp	3.66	0.30 – 7.03	.033
intervention_dummy:pre_percomp	-1.69	-6.09 – 2.70	.450
Random Parts
σ²	429.382
τ_{00, course_ID}	15.350
N_{course_ID}	36
ICC_{course_ID}	0.035
Observations	755
R² / Ω₀²	.065 / .059

6D. With all variables added

sjPlot::sjt.lmer(lme4::lmer(post_uv ~ pre_uv + pre_int + pre_percomp + intervention_dummy + (1 | course_ID), data = d))

	post_uv
	B	CI	p
Fixed Parts
(Intercept)	0.53	-0.75 – 1.80	.419
pre_uv	0.76	0.53 – 1.00	<.001
pre_int	0.07	-0.24 – 0.39	.644
pre_percomp	0.01	-0.29 – 0.30	.972
intervention_dummy	-0.21	-0.90 – 0.48	.559
Random Parts
σ²	0.568
τ_{00, course_ID}	0.106
N_{course_ID}	12
ICC_{course_ID}	0.157
Observations	117
R² / Ω₀²	.456 / .455

sjPlot::sjt.lmer(lme4::lmer(post_int ~ pre_uv + pre_int + pre_percomp + intervention_dummy + (1 | course_ID), data = d))

	post_int
	B	CI	p
Fixed Parts
(Intercept)	0.97	-0.26 – 2.20	.125
pre_uv	0.24	0.01 – 0.47	.043
pre_int	0.63	0.32 – 0.93	<.001
pre_percomp	-0.05	-0.34 – 0.24	.724
intervention_dummy	-0.46	-1.11 – 0.18	.162
Random Parts
σ²	0.541
τ_{00, course_ID}	0.077
N_{course_ID}	12
ICC_{course_ID}	0.125
Observations	117
R² / Ω₀²	.435 / .434

sjPlot::sjt.lmer(lme4::lmer(final_grade ~ pre_uv + pre_int + pre_percomp + intervention_dummy + (1 | course_ID), data = d))

	final_grade
	B	CI	p
Fixed Parts
(Intercept)	54.78	42.72 – 66.84	<.001
pre_uv	-2.38	-5.01 – 0.25	.077
pre_int	6.46	3.00 – 9.92	<.001
pre_percomp	0.78	-1.97 – 3.53	.579
intervention_dummy	1.42	-2.98 – 5.82	.528
Random Parts
σ²	425.604
τ_{00, course_ID}	15.350
N_{course_ID}	36
ICC_{course_ID}	0.035
Observations	746
R² / Ω₀²	.079 / .074

7. Other analyses

# corrr::correlate(select(d, pre_uv, pre_int))

sjPlot::sjt.lmer(lme4::lmer(final_grade ~ pre_uv + pre_int + pre_percomp + (1 | course_ID), data = d))

	final_grade
	B	CI	p
Fixed Parts
(Intercept)	55.57	43.77 – 67.37	<.001
pre_uv	-2.44	-5.06 – 0.18	.068
pre_int	6.50	3.04 – 9.95	<.001
pre_percomp	0.79	-1.96 – 3.54	.574
Random Parts
σ²	425.193
τ_{00, course_ID}	15.489
N_{course_ID}	36
ICC_{course_ID}	0.035
Observations	746
R² / Ω₀²	.079 / .075

sjPlot::sjt.lmer(lme4::lmer(final_grade ~ I(time_spent/60) + (1 | course_ID), data = d))

	final_grade
	B	CI	p
Fixed Parts
(Intercept)	64.66	61.38 – 67.93	<.001
I(time_spent/60)	0.38	0.32 – 0.44	<.001
Random Parts
σ²	359.037
τ_{00, course_ID}	31.031
N_{course_ID}	36
ICC_{course_ID}	0.080
Observations	776
R² / Ω₀²	.216 / .214

sjPlot::sjt.lmer(lme4::lmer(final_grade ~ pre_uv + pre_int + pre_percomp + I(time_spent/60) + (1 | course_ID), data = d))

	final_grade
	B	CI	p
Fixed Parts
(Intercept)	46.40	35.22 – 57.58	<.001
pre_uv	-2.98	-5.45 – -0.50	.019
pre_int	5.81	2.57 – 9.05	<.001
pre_percomp	1.30	-1.22 – 3.83	.313
I(time_spent/60)	0.37	0.31 – 0.43	<.001
Random Parts
σ²	355.764
τ_{00, course_ID}	29.811
N_{course_ID}	36
ICC_{course_ID}	0.077
Observations	746
R² / Ω₀²	.234 / .232

sjPlot::sjt.lmer(lme4::lmer(time_spent ~ pre_uv + pre_int + pre_percomp + (1 | course_ID), data = d))

	time_spent
	B	CI	p
Fixed Parts
(Intercept)	1316.44	556.19 – 2076.70	<.001
pre_uv	52.34	-116.82 – 221.50	.544
pre_int	167.32	-54.11 – 388.74	.139
pre_percomp	-83.53	-254.87 – 87.82	.340
Random Parts
σ²	1737378.550
τ_{00, course_ID}	154352.339
N_{course_ID}	36
ICC_{course_ID}	0.082
Observations	774
R² / Ω₀²	.124 / .118

7A. With gender added

sjPlot::sjt.lmer(lme4::lmer(final_grade ~ gender + (1 | course_ID), data = d))

	final_grade
	B	CI	p
Fixed Parts
(Intercept)	79.33	76.86 – 81.79	<.001
gender (M)	-7.37	-10.61 – -4.14	<.001
Random Parts
σ²	417.869
τ_{00, course_ID}	18.957
N_{course_ID}	36
ICC_{course_ID}	0.043
Observations	776
R² / Ω₀²	.086 / .080

sjPlot::sjt.lmer(lme4::lmer(final_grade ~ I(time_spent/60) + gender + (1 | course_ID), data = d))

	final_grade
	B	CI	p
Fixed Parts
(Intercept)	66.77	63.27 – 70.26	<.001
I(time_spent/60)	0.37	0.31 – 0.43	<.001
gender (M)	-6.09	-9.09 – -3.09	<.001
Random Parts
σ²	351.239
τ_{00, course_ID}	34.669
N_{course_ID}	36
ICC_{course_ID}	0.090
Observations	776
R² / Ω₀²	.235 / .233

sjPlot::sjt.lmer(lme4::lmer(final_grade ~ pre_uv + pre_int + pre_percomp + gender + I(time_spent/60) + (1 | course_ID), data = d))

	final_grade
	B	CI	p
Fixed Parts
(Intercept)	49.95	38.55 – 61.35	<.001
pre_uv	-2.82	-5.29 – -0.35	.025
pre_int	5.07	1.81 – 8.34	.002
pre_percomp	1.48	-1.03 – 3.99	.248
gender (M)	-4.82	-7.92 – -1.73	.002
I(time_spent/60)	0.37	0.30 – 0.43	<.001
Random Parts
σ²	350.795
τ_{00, course_ID}	33.423
N_{course_ID}	36
ICC_{course_ID}	0.087
Observations	746
R² / Ω₀²	.247 / .245

7A. Mediation analyses (not working quite right, I think)

library(mediation)

m1 <- lme4::lmer(time_spent ~ pre_uv + pre_int + pre_percomp + (1 | course_ID), data = d)
m2 <- lme4::lmer(final_grade ~ time_spent + pre_uv + pre_int + pre_percomp + (1 | course_ID), data = d)

med.outa <- mediate(m1, m2, treat = "pre_uv", mediator = "time_spent", dropobs = T, sims = 500)
med.outb <- mediate(m1, m2, treat = "pre_int", mediator = "time_spent", dropobs = T, sims = 500)

summary(med.outa)
summary(med.outb)

7B. SEM (but no accounting for nesting)

indirect effect of UV

library(lavaan)

d$ts_60 <- d$time_spent / 60
d$male_dummy <- ifelse(d$gender == "M", 1, 0)

m1 <- '
  # regressions
    ts_60 ~ pre_uv + pre_int + pre_percomp + male_dummy
    final_grade ~ ts_60 + pre_uv + pre_int + pre_percomp 
'

out1 <- sem(m1, data = d)
summary(out1)

## lavaan (0.5-23.1097) converged normally after  87 iterations
## 
##                                                   Used       Total
##   Number of observations                           746         809
## 
##   Estimator                                         ML
##   Minimum Function Test Statistic                4.462
##   Degrees of freedom                                 1
##   P-value (Chi-square)                           0.035
## 
## Parameter Estimates:
## 
##   Information                                 Expected
##   Standard Errors                             Standard
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   ts_60 ~                                             
##     pre_uv            4.015    1.351    2.972    0.003
##     pre_int           0.611    1.855    0.330    0.742
##     pre_percomp      -1.361    1.524   -0.893    0.372
##     male_dummy       -4.644    1.841   -2.522    0.012
##   final_grade ~                                       
##     ts_60             0.321    0.032   10.074    0.000
##     pre_uv           -3.709    1.186   -3.126    0.002
##     pre_int           5.768    1.590    3.627    0.000
##     pre_percomp       1.332    1.331    1.000    0.317
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .ts_60           505.264   26.162   19.313    0.000
##    .final_grade     385.969   19.985   19.313    0.000

m2 <- '
  # regressions
    ts_60 ~ a*pre_uv + pre_percomp + male_dummy
    final_grade ~ b*ts_60 + c*pre_uv + pre_percomp 
    # indirect effect (a*b)
    ab := a*b
    # total effect
    total := c + (a*b)
'

out2 <- sem(m2, data = d)
summary(out2, fit.measures = T)

## lavaan (0.5-23.1097) converged normally after  65 iterations
## 
##                                                   Used       Total
##   Number of observations                           755         809
## 
##   Estimator                                         ML
##   Minimum Function Test Statistic                7.895
##   Degrees of freedom                                 1
##   P-value (Chi-square)                           0.005
## 
## Model test baseline model:
## 
##   Minimum Function Test Statistic              129.067
##   Degrees of freedom                                 7
##   P-value                                        0.000
## 
## User model versus baseline model:
## 
##   Comparative Fit Index (CFI)                    0.944
##   Tucker-Lewis Index (TLI)                       0.605
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)              -8750.019
##   Loglikelihood unrestricted model (H1)      -8746.071
## 
##   Number of free parameters                          8
##   Akaike (AIC)                               17516.037
##   Bayesian (BIC)                             17553.051
##   Sample-size adjusted Bayesian (BIC)        17527.648
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.096
##   90 Percent Confidence Interval          0.042  0.162
##   P-value RMSEA <= 0.05                          0.076
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.024
## 
## Parameter Estimates:
## 
##   Information                                 Expected
##   Standard Errors                             Standard
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   ts_60 ~                                             
##     pre_uv     (a)    4.111    1.258    3.267    0.001
##     pre_percmp       -1.104    1.383   -0.798    0.425
##     male_dummy       -4.872    1.797   -2.710    0.007
##   final_grade ~                                       
##     ts_60      (b)    0.322    0.032   10.125    0.000
##     pre_uv     (c)   -2.176    1.114   -1.953    0.051
##     pre_percmp        3.279    1.216    2.696    0.007
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .ts_60           504.517   25.967   19.429    0.000
##    .final_grade     390.084   20.077   19.429    0.000
## 
## Defined Parameters:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     ab                1.326    0.426    3.109    0.002
##     total            -0.850    1.178   -0.721    0.471

7C. Let’s run the same two models accounting for clustering

library(survey)
library(lavaan.survey)

group <- svydesign(ids= ~course_ID, data = d)
out1_c <- lavaan.survey(out1, group)
summary(out1_c)

## lavaan (0.5-23.1097) converged normally after  88 iterations
## 
##   Number of observations                           746
## 
##   Estimator                                         ML      Robust
##   Minimum Function Test Statistic                4.462       0.118
##   Degrees of freedom                                 1           1
##   P-value (Chi-square)                           0.035       0.731
##   Scaling correction factor                                 37.681
##     for the Satorra-Bentler correction
## 
## Parameter Estimates:
## 
##   Information                                 Expected
##   Standard Errors                           Robust.sem
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   ts_60 ~                                             
##     pre_uv            4.015    1.652    2.430    0.015
##     pre_int           0.611    2.137    0.286    0.775
##     pre_percomp      -1.361    2.331   -0.584    0.559
##     male_dummy       -4.644    2.367   -1.962    0.050
##   final_grade ~                                       
##     ts_60             0.321    0.029   11.014    0.000
##     pre_uv           -3.709    1.440   -2.576    0.010
##     pre_int           5.768    2.668    2.162    0.031
##     pre_percomp       1.332    1.703    0.782    0.434
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .ts_60            20.497    5.752    3.563    0.000
##    .final_grade      51.535    8.221    6.269    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .ts_60           505.211   72.545    6.964    0.000
##    .final_grade     385.929   28.507   13.538    0.000

out2_c <- lavaan.survey(out2, group)
summary(out2_c)

## lavaan (0.5-23.1097) converged normally after  66 iterations
## 
##   Number of observations                           755
## 
##   Estimator                                         ML      Robust
##   Minimum Function Test Statistic                7.895       0.336
##   Degrees of freedom                                 1           1
##   P-value (Chi-square)                           0.005       0.562
##   Scaling correction factor                                 23.531
##     for the Satorra-Bentler correction
## 
## Parameter Estimates:
## 
##   Information                                 Expected
##   Standard Errors                           Robust.sem
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   ts_60 ~                                             
##     pre_uv     (a)    4.111    1.590    2.586    0.010
##     pre_percmp       -1.104    2.035   -0.542    0.588
##     male_dummy       -4.872    2.411   -2.021    0.043
##   final_grade ~                                       
##     ts_60      (b)    0.322    0.031   10.353    0.000
##     pre_uv     (c)   -2.176    1.227   -1.773    0.076
##     pre_percmp        3.279    1.390    2.359    0.018
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .ts_60            21.962    5.113    4.295    0.000
##    .final_grade      63.545    5.815   10.928    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .ts_60           504.473   72.086    6.998    0.000
##    .final_grade     390.049   30.818   12.657    0.000
## 
## Defined Parameters:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     ab                1.326    0.550    2.410    0.016
##     total            -0.850    1.333   -0.637    0.524

8. Should we filter out the treatment students?

dd <- filter(d, intervention_dummy == 0)

m1 <- '
  # regressions
    ts_60 ~ pre_uv + pre_int + pre_percomp + male_dummy
    final_grade ~ ts_60 + pre_uv + pre_int + pre_percomp 
'

out1 <- sem(m1, data = dd)
summary(out1)

## lavaan (0.5-23.1097) converged normally after  76 iterations
## 
##                                                   Used       Total
##   Number of observations                           349         380
## 
##   Estimator                                         ML
##   Minimum Function Test Statistic               10.940
##   Degrees of freedom                                 1
##   P-value (Chi-square)                           0.001
## 
## Parameter Estimates:
## 
##   Information                                 Expected
##   Standard Errors                             Standard
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   ts_60 ~                                             
##     pre_uv            3.935    2.094    1.880    0.060
##     pre_int          -0.147    2.952   -0.050    0.960
##     pre_percomp       1.064    2.297    0.463    0.643
##     male_dummy       -3.596    2.870   -1.253    0.210
##   final_grade ~                                       
##     ts_60             0.348    0.045    7.765    0.000
##     pre_uv           -4.613    1.762   -2.619    0.009
##     pre_int           8.297    2.446    3.392    0.001
##     pre_percomp       0.663    1.930    0.344    0.731
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .ts_60           560.546   42.434   13.210    0.000
##    .final_grade     395.359   29.929   13.210    0.000

m2 <- '
  # regressions
    ts_60 ~ a*pre_uv + pre_percomp + male_dummy
    final_grade ~ b*ts_60 + c*pre_uv + pre_percomp  + male_dummy
    # indirect effect (a*b)
    ab := a*b
    # total effect
    total := c + (a*b)
'

out2 <- sem(m2, data = dd)
summary(out2, fit.measures = T)

## lavaan (0.5-23.1097) converged normally after  75 iterations
## 
##                                                   Used       Total
##   Number of observations                           354         380
## 
##   Estimator                                         ML
##   Minimum Function Test Statistic                0.000
##   Degrees of freedom                                 0
##   Minimum Function Value               0.0000000000000
## 
## Model test baseline model:
## 
##   Minimum Function Test Statistic               82.201
##   Degrees of freedom                                 7
##   P-value                                        0.000
## 
## User model versus baseline model:
## 
##   Comparative Fit Index (CFI)                    1.000
##   Tucker-Lewis Index (TLI)                       1.000
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)              -4128.172
##   Loglikelihood unrestricted model (H1)      -4128.172
## 
##   Number of free parameters                          9
##   Akaike (AIC)                                8274.345
##   Bayesian (BIC)                              8309.168
##   Sample-size adjusted Bayesian (BIC)         8280.617
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.000
##   90 Percent Confidence Interval          0.000  0.000
##   P-value RMSEA <= 0.05                             NA
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.000
## 
## Parameter Estimates:
## 
##   Information                                 Expected
##   Standard Errors                             Standard
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   ts_60 ~                                             
##     pre_uv     (a)    3.779    1.925    1.963    0.050
##     pre_percmp        1.100    2.099    0.524    0.600
##     male_dummy       -3.757    2.830   -1.327    0.184
##   final_grade ~                                       
##     ts_60      (b)    0.332    0.044    7.520    0.000
##     pre_uv     (c)   -2.082    1.611   -1.292    0.196
##     pre_percmp        2.713    1.747    1.553    0.120
##     male_dummy       -9.198    2.360   -3.897    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .ts_60           562.993   42.317   13.304    0.000
##    .final_grade     389.660   29.289   13.304    0.000
## 
## Defined Parameters:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     ab                1.257    0.662    1.899    0.058
##     total            -0.825    1.725   -0.478    0.632

group <- svydesign(ids= ~course_ID, data = dd)
out1_c <- lavaan.survey(out1, group)
summary(out1_c)

## lavaan (0.5-23.1097) converged normally after  76 iterations
## 
##   Number of observations                           349
## 
##   Estimator                                         ML      Robust
##   Minimum Function Test Statistic               10.940       0.338
##   Degrees of freedom                                 1           1
##   P-value (Chi-square)                           0.001       0.561
##   Scaling correction factor                                 32.331
##     for the Satorra-Bentler correction
## 
## Parameter Estimates:
## 
##   Information                                 Expected
##   Standard Errors                           Robust.sem
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   ts_60 ~                                             
##     pre_uv            3.935    1.779    2.212    0.027
##     pre_int          -0.147    3.837   -0.038    0.970
##     pre_percomp       1.064    2.801    0.380    0.704
##     male_dummy       -3.596    4.142   -0.868    0.385
##   final_grade ~                                       
##     ts_60             0.348    0.024   14.292    0.000
##     pre_uv           -4.613    1.746   -2.643    0.008
##     pre_int           8.297    2.075    3.998    0.000
##     pre_percomp       0.663    2.308    0.287    0.774
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .ts_60            16.195    9.706    1.669    0.095
##    .final_grade      43.952    9.430    4.661    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .ts_60           560.415  111.732    5.016    0.000
##    .final_grade     395.267   26.375   14.986    0.000

out2_c <- lavaan.survey(out2, group)
summary(out2_c)

## lavaan (0.5-23.1097) converged normally after  76 iterations
## 
##   Number of observations                           354
## 
##   Estimator                                         ML      Robust
##   Minimum Function Test Statistic                0.000       0.000
##   Degrees of freedom                                 0           0
##   Scaling correction factor                                     NA
##     for the Satorra-Bentler correction
## 
## Parameter Estimates:
## 
##   Information                                 Expected
##   Standard Errors                           Robust.sem
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   ts_60 ~                                             
##     pre_uv     (a)    3.779    1.668    2.266    0.023
##     pre_percmp        1.100    2.190    0.502    0.615
##     male_dummy       -3.757    4.185   -0.898    0.369
##   final_grade ~                                       
##     ts_60      (b)    0.332    0.031   10.653    0.000
##     pre_uv     (c)   -2.082    1.465   -1.420    0.155
##     pre_percmp        2.713    1.910    1.420    0.156
##     male_dummy       -9.198    2.189   -4.202    0.000
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .ts_60            16.211    6.559    2.472    0.013
##    .final_grade      65.908    9.759    6.753    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .ts_60           562.884  111.413    5.052    0.000
##    .final_grade     389.584   30.408   12.812    0.000
## 
## Defined Parameters:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     ab                1.257    0.568    2.214    0.027
##     total            -0.825    1.783   -0.463    0.644

9. Task value and gender w/ filtered data - and task value and perceived competence on their own

m1 <- '
  # regressions
    ts_60 ~ pre_tv + pre_percomp + male_dummy
    final_grade ~ ts_60 + pre_tv + pre_percomp + pre_percomp 
'

out1 <- sem(m1, data = dd)

summary(out1, fit.measures = T)

## lavaan (0.5-23.1097) converged normally after  66 iterations
## 
##                                                   Used       Total
##   Number of observations                           351         380
## 
##   Estimator                                         ML
##   Minimum Function Test Statistic               14.186
##   Degrees of freedom                                 1
##   P-value (Chi-square)                           0.000
## 
## Model test baseline model:
## 
##   Minimum Function Test Statistic               77.752
##   Degrees of freedom                                 7
##   P-value                                        0.000
## 
## User model versus baseline model:
## 
##   Comparative Fit Index (CFI)                    0.814
##   Tucker-Lewis Index (TLI)                      -0.305
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)              -3985.715
##   Loglikelihood unrestricted model (H1)      -3978.622
## 
##   Number of free parameters                          8
##   Akaike (AIC)                                7987.430
##   Bayesian (BIC)                              8018.316
##   Sample-size adjusted Bayesian (BIC)         7992.937
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.194
##   90 Percent Confidence Interval          0.113  0.289
##   P-value RMSEA <= 0.05                          0.002
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.047
## 
## Parameter Estimates:
## 
##   Information                                 Expected
##   Standard Errors                             Standard
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   ts_60 ~                                             
##     pre_tv            4.121    2.720    1.515    0.130
##     pre_percomp       1.071    2.279    0.470    0.638
##     male_dummy       -3.313    2.848   -1.163    0.245
##   final_grade ~                                       
##     ts_60             0.336    0.045    7.411    0.000
##     pre_tv            1.361    2.317    0.587    0.557
##     pre_percomp       1.421    1.939    0.733    0.464
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .ts_60           565.794   42.709   13.248    0.000
##    .final_grade     409.647   30.922   13.248    0.000

m2 <- '
  # regressions
    ts_60 ~ a*pre_tv + pre_percomp + male_dummy
    final_grade ~ b*ts_60 + c*pre_tv + pre_percomp
    # indirect effect (a*b)
    ab := a*b
    # total effect
    total := c + (a*b)
'

out2 <- sem(m2, data = d)
semPlot::semPaths(out2, residuals=F, structural=F, layout="tree2", style="mx", whatLabels="hide", rotation=2,curve=F, sizeMan = 6, sizeMan2 = 2, sizeLat = 5, sizeLat2 = 3, shapeMan = "rectangle")

summary(out2)

## lavaan (0.5-23.1097) converged normally after  66 iterations
## 
##                                                   Used       Total
##   Number of observations                           750         809
## 
##   Estimator                                         ML
##   Minimum Function Test Statistic                7.215
##   Degrees of freedom                                 1
##   P-value (Chi-square)                           0.007
## 
## Parameter Estimates:
## 
##   Information                                 Expected
##   Standard Errors                             Standard
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   ts_60 ~                                             
##     pre_tv     (a)    5.224    1.779    2.936    0.003
##     pre_percmp       -1.525    1.503   -1.015    0.310
##     male_dummy       -4.287    1.820   -2.355    0.019
##   final_grade ~                                       
##     ts_60      (b)    0.313    0.032    9.769    0.000
##     pre_tv     (c)    0.581    1.564    0.372    0.710
##     pre_percmp        1.808    1.323    1.366    0.172
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .ts_60           506.956   26.179   19.365    0.000
##    .final_grade     393.369   20.313   19.365    0.000
## 
## Defined Parameters:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     ab                1.635    0.582    2.812    0.005
##     total             2.217    1.650    1.344    0.179

m3 <- '
  # regressions
    ts_60 ~ a*pre_tv + male_dummy
    final_grade ~ b*ts_60 + c*pre_tv
    # indirect effect (a*b)
    ab := a*b
    # total effect
    total := c + (a*b)
'

out3 <- sem(m3, data = dd)
summary(out3)

## lavaan (0.5-23.1097) converged normally after  46 iterations
## 
##                                                   Used       Total
##   Number of observations                           352         380
## 
##   Estimator                                         ML
##   Minimum Function Test Statistic               15.402
##   Degrees of freedom                                 1
##   P-value (Chi-square)                           0.000
## 
## Parameter Estimates:
## 
##   Information                                 Expected
##   Standard Errors                             Standard
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   ts_60 ~                                             
##     pre_tv     (a)    4.887    2.142    2.282    0.022
##     male_dummy       -3.340    2.836   -1.178    0.239
##   final_grade ~                                       
##     ts_60      (b)    0.337    0.046    7.384    0.000
##     pre_tv     (c)    2.879    1.840    1.565    0.118
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .ts_60           564.548   42.554   13.266    0.000
##    .final_grade     415.544   31.323   13.266    0.000
## 
## Defined Parameters:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     ab                1.647    0.755    2.180    0.029
##     total             4.526    1.962    2.306    0.021

m4 <- '
  # regressions
    ts_60 ~ a*pre_percomp + male_dummy
    final_grade ~ b*ts_60 + c*pre_percomp
    # indirect effect (a*b)
    ab := a*b
    # total effect
    total := c + (a*b)
'

out4 <- sem(m4, data = dd)
summary(out4)

## lavaan (0.5-23.1097) converged normally after  44 iterations
## 
##                                                   Used       Total
##   Number of observations                           354         380
## 
##   Estimator                                         ML
##   Minimum Function Test Statistic               14.962
##   Degrees of freedom                                 1
##   P-value (Chi-square)                           0.000
## 
## Parameter Estimates:
## 
##   Information                                 Expected
##   Standard Errors                             Standard
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   ts_60 ~                                             
##     pre_percmp (a)    3.224    1.808    1.783    0.075
##     male_dummy       -3.707    2.845   -1.303    0.193
##   final_grade ~                                       
##     ts_60      (b)    0.338    0.045    7.536    0.000
##     pre_percmp (c)    2.044    1.534    1.333    0.183
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .ts_60           569.120   42.778   13.304    0.000
##    .final_grade     408.401   30.697   13.304    0.000
## 
## Defined Parameters:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     ab                1.091    0.629    1.735    0.083
##     total             3.135    1.644    1.907    0.057

10. Starting looking at subject and gender differences (and task value & only control students)

sjPlot::sjt.lmer(lme4::lmer(ts_60 ~ pre_tv + male_dummy + (1 | course_ID), data = dd))

	ts_60
	B	CI	p
Fixed Parts
(Intercept)	21.25	3.06 – 39.43	.023
pre_tv	3.26	-0.97 – 7.50	.132
male_dummy	-3.71	-9.07 – 1.64	.175
Random Parts
σ²	535.607
τ_{00, course_ID}	42.223
N_{course_ID}	20
ICC_{course_ID}	0.073
Observations	369
R² / Ω₀²	.130 / .125

sjPlot::sjt.lmer(lme4::lmer(ts_60 ~ pre_percomp + male_dummy + (1 | course_ID), data = dd))

	ts_60
	B	CI	p
Fixed Parts
(Intercept)	25.77	12.22 – 39.31	<.001
pre_percomp	2.49	-0.95 – 5.92	.157
male_dummy	-3.97	-9.31 – 1.37	.146
Random Parts
σ²	534.401
τ_{00, course_ID}	46.026
N_{course_ID}	20
ICC_{course_ID}	0.079
Observations	371
R² / Ω₀²	.135 / .130

sjPlot::sjt.lmer(lme4::lmer(ts_60 ~ pre_tv + pre_percomp + male_dummy + (1 | course_ID), data = dd))

	ts_60
	B	CI	p
Fixed Parts
(Intercept)	20.25	1.70 – 38.80	.033
pre_tv	2.19	-3.03 – 7.42	.411
pre_percomp	1.47	-2.76 – 5.69	.497
male_dummy	-3.64	-9.02 – 1.74	.186
Random Parts
σ²	537.876
τ_{00, course_ID}	42.303
N_{course_ID}	20
ICC_{course_ID}	0.073
Observations	368
R² / Ω₀²	.131 / .126

sjPlot::sjt.lmer(lme4::lmer(final_grade ~ ts_60 + (1 | course_ID), data = dd))

	final_grade
	B	CI	p
Fixed Parts
(Intercept)	62.02	57.30 – 66.74	<.001
ts_60	0.41	0.32 – 0.50	<.001
Random Parts
σ²	379.188
τ_{00, course_ID}	26.811
N_{course_ID}	20
ICC_{course_ID}	0.066
Observations	361
R² / Ω₀²	.234 / .232

sjPlot::sjt.lmer(lme4::lmer(final_grade ~ ts_60 + (1 | course_ID), data = dd))

	final_grade
	B	CI	p
Fixed Parts
(Intercept)	62.02	57.30 – 66.74	<.001
ts_60	0.41	0.32 – 0.50	<.001
Random Parts
σ²	379.188
τ_{00, course_ID}	26.811
N_{course_ID}	20
ICC_{course_ID}	0.066
Observations	361
R² / Ω₀²	.234 / .232

sjPlot::sjt.lmer(lme4::lmer(final_grade ~ ts_60 + (1 | course_ID), data = dd))

	final_grade
	B	CI	p
Fixed Parts
(Intercept)	62.02	57.30 – 66.74	<.001
ts_60	0.41	0.32 – 0.50	<.001
Random Parts
σ²	379.188
τ_{00, course_ID}	26.811
N_{course_ID}	20
ICC_{course_ID}	0.066
Observations	361
R² / Ω₀²	.234 / .232

sjPlot::sjt.lmer(lme4::lmer(final_grade ~ pre_tv + male_dummy + (1 | course_ID), data = dd))

	final_grade
	B	CI	p
Fixed Parts
(Intercept)	62.02	45.54 – 78.50	<.001
pre_tv	4.15	0.28 – 8.02	.037
male_dummy	-10.72	-15.72 – -5.72	<.001
Random Parts
σ²	449.986
τ_{00, course_ID}	11.022
N_{course_ID}	20
ICC_{course_ID}	0.024
Observations	352
R² / Ω₀²	.097 / .096

sjPlot::sjt.lmer(lme4::lmer(final_grade ~ pre_percomp + male_dummy + (1 | course_ID), data = dd))

	final_grade
	B	CI	p
Fixed Parts
(Intercept)	68.82	56.60 – 81.04	<.001
pre_percomp	2.86	-0.32 – 6.04	.078
male_dummy	-10.64	-15.61 – -5.67	<.001
Random Parts
σ²	445.816
τ_{00, course_ID}	9.914
N_{course_ID}	20
ICC_{course_ID}	0.022
Observations	354
R² / Ω₀²	.089 / .088

sjPlot::sjt.lmer(lme4::lmer(final_grade ~ pre_tv + pre_percomp + male_dummy + (1 | course_ID), data = dd))

	final_grade
	B	CI	p
Fixed Parts
(Intercept)	62.40	45.70 – 79.10	<.001
pre_tv	2.61	-2.23 – 7.45	.291
pre_percomp	1.62	-2.36 – 5.60	.426
male_dummy	-10.32	-15.32 – -5.31	<.001
Random Parts
σ²	448.084
τ_{00, course_ID}	9.982
N_{course_ID}	20
ICC_{course_ID}	0.022
Observations	351
R² / Ω₀²	.091 / .089

New Analysis for MVS UV Study

Joshua Rosenberg

12/21/2017

Workspace doc

Next steps

Overall takeaways

1. Pre-processing (for semesters 1 and 2)

2. Pre-processing (for semester 3)

3. Merging and processing merged data

3.5 CFA

4. Processing all gradebook data

5. Merging self-report and gradebook data (not run yet)

Processing trace data

Merging trace data with other data

6. Pre-post analysis

60. Looking first at n’s

6A. Just looking at pre-post changes in interest and UV and grades

6B. With pre-values added (pre per-comp for final grades)

6C. With pre-values added + interactions with pre perceived competence

6D. With all variables added

7. Other analyses

7A. With gender added

7A. Mediation analyses (not working quite right, I think)

7B. SEM (but no accounting for nesting)

7C. Let’s run the same two models accounting for clustering

8. Should we filter out the treatment students?

9. Task value and gender w/ filtered data - and task value and perceived competence on their own

10. Starting looking at subject and gender differences (and task value & only control students)